Magellan Linux

Annotation of /trunk/kernel26-xen/patches-2.6.25-r1/1020-2.6.25-xen-patch-2.6.19.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 609 - (hide annotations) (download)
Fri May 23 17:35:37 2008 UTC (16 years ago) by niro
File size: 319559 byte(s)
-using opensuse xen patchset, updated kernel configs

1 niro 609 From: www.kernel.org
2     Subject: Linux 2.6.19
3     Patch-mainline: 2.6.19
4    
5     Automatically created from "patches.kernel.org/patch-2.6.19" by xen-port-patches.py
6    
7     Acked-by: jbeulich@novell.com
8    
9     ---
10     arch/x86/Kconfig | 1
11     arch/x86/ia32/ia32entry-xen.S | 9
12     arch/x86/kernel/Makefile | 5
13     arch/x86/kernel/apic_32-xen.c | 9
14     arch/x86/kernel/apic_64-xen.c | 20
15     arch/x86/kernel/cpu/common-xen.c | 20
16     arch/x86/kernel/e820_64-xen.c | 320 +++---
17     arch/x86/kernel/early_printk-xen.c | 20
18     arch/x86/kernel/entry_32-xen.S | 139 +-
19     arch/x86/kernel/entry_64-xen.S | 106 --
20     arch/x86/kernel/genapic_xen_64.c | 9
21     arch/x86/kernel/head64-xen.c | 44
22     arch/x86/kernel/head_32-xen.S | 2
23     arch/x86/kernel/head_64-xen.S | 5
24     arch/x86/kernel/io_apic_32-xen.c | 750 +++++++++------
25     arch/x86/kernel/io_apic_64-xen.c | 1250 +++++++++++---------------
26     arch/x86/kernel/ioport_64-xen.c | 1
27     arch/x86/kernel/irq_32-xen.c | 19
28     arch/x86/kernel/irq_64-xen.c | 35
29     arch/x86/kernel/ldt_32-xen.c | 2
30     arch/x86/kernel/microcode-xen.c | 85 +
31     arch/x86/kernel/mpparse_32-xen.c | 70 -
32     arch/x86/kernel/mpparse_64-xen.c | 313 +-----
33     arch/x86/kernel/pci-dma_32-xen.c | 16
34     arch/x86/kernel/pci-swiotlb_64-xen.c | 3
35     arch/x86/kernel/process_32-xen.c | 29
36     arch/x86/kernel/process_64-xen.c | 90 +
37     arch/x86/kernel/setup64-xen.c | 41
38     arch/x86/kernel/setup_32-xen.c | 430 +++-----
39     arch/x86/kernel/setup_64-xen.c | 271 +----
40     arch/x86/kernel/smp_32-xen.c | 75 +
41     arch/x86/kernel/smp_64-xen.c | 35
42     arch/x86/kernel/time_32-xen.c | 86 -
43     arch/x86/kernel/traps_32-xen.c | 238 +++-
44     arch/x86/kernel/traps_64-xen.c | 220 +++-
45     arch/x86/kernel/vsyscall_64-xen.c | 117 ++
46     arch/x86/mach-xen/setup.c | 6
47     arch/x86/mm/fault_32-xen.c | 29
48     arch/x86/mm/fault_64-xen.c | 34
49     arch/x86/mm/highmem_32-xen.c | 31
50     arch/x86/mm/hypervisor.c | 9
51     arch/x86/mm/init_32-xen.c | 89 +
52     arch/x86/mm/init_64-xen.c | 184 +--
53     arch/x86/mm/ioremap_32-xen.c | 10
54     arch/x86/mm/pageattr_64-xen.c | 24
55     arch/x86/mm/pgtable_32-xen.c | 31
56     arch/x86/pci/irq-xen.c | 38
57     drivers/char/tpm/tpm_xen.c | 5
58     drivers/pci/Kconfig | 2
59     drivers/xen/Kconfig | 3
60     drivers/xen/balloon/balloon.c | 2
61     drivers/xen/blkback/blkback.c | 2
62     drivers/xen/blkback/common.h | 2
63     drivers/xen/blkfront/blkfront.c | 4
64     drivers/xen/blktap/blktap.c | 2
65     drivers/xen/blktap/common.h | 2
66     drivers/xen/console/console.c | 10
67     drivers/xen/console/xencons_ring.c | 4
68     drivers/xen/core/evtchn.c | 50 -
69     drivers/xen/core/reboot.c | 3
70     drivers/xen/core/smpboot.c | 6
71     drivers/xen/fbfront/xenfb.c | 3
72     drivers/xen/fbfront/xenkbd.c | 2
73     drivers/xen/gntdev/gntdev.c | 11
74     drivers/xen/netback/accel.c | 2
75     drivers/xen/netback/common.h | 2
76     drivers/xen/netback/loopback.c | 2
77     drivers/xen/netback/netback.c | 6
78     drivers/xen/netfront/netfront.c | 8
79     drivers/xen/pciback/pciback.h | 2
80     drivers/xen/pciback/pciback_ops.c | 2
81     drivers/xen/pcifront/pci_op.c | 8
82     drivers/xen/privcmd/compat_privcmd.c | 1
83     drivers/xen/privcmd/privcmd.c | 2
84     drivers/xen/sfc_netback/accel_xenbus.c | 6
85     drivers/xen/sfc_netfront/accel.h | 6
86     drivers/xen/sfc_netfront/accel_msg.c | 6
87     drivers/xen/sfc_netfront/accel_tso.c | 2
88     drivers/xen/sfc_netfront/accel_vi.c | 4
89     drivers/xen/tpmback/common.h | 2
90     drivers/xen/tpmback/tpmback.c | 4
91     drivers/xen/xenbus/xenbus_comms.c | 2
92     drivers/xen/xenoprof/xenoprofile.c | 2
93     include/asm-generic/pgtable.h | 2
94     include/asm-x86/mach-xen/asm/desc_32.h | 127 +-
95     include/asm-x86/mach-xen/asm/dma-mapping_64.h | 7
96     include/asm-x86/mach-xen/asm/e820_64.h | 15
97     include/asm-x86/mach-xen/asm/fixmap_32.h | 5
98     include/asm-x86/mach-xen/asm/fixmap_64.h | 2
99     include/asm-x86/mach-xen/asm/hw_irq_32.h | 8
100     include/asm-x86/mach-xen/asm/hw_irq_64.h | 10
101     include/asm-x86/mach-xen/asm/io_32.h | 27
102     include/asm-x86/mach-xen/asm/io_64.h | 27
103     include/asm-x86/mach-xen/asm/pgtable-2level.h | 12
104     include/asm-x86/mach-xen/asm/pgtable-3level.h | 14
105     include/asm-x86/mach-xen/asm/pgtable_32.h | 143 +-
106     include/asm-x86/mach-xen/asm/pgtable_64.h | 86 +
107     include/asm-x86/mach-xen/asm/processor_32.h | 62 -
108     include/asm-x86/mach-xen/asm/processor_64.h | 2
109     include/asm-x86/mach-xen/asm/segment_32.h | 19
110     include/asm-x86/mach-xen/asm/smp_32.h | 25
111     include/asm-x86/mach-xen/asm/smp_64.h | 27
112     include/asm-x86/mach-xen/asm/system_32.h | 36
113     include/asm-x86/mach-xen/asm/system_64.h | 1
114     include/asm-x86/mach-xen/asm/tlbflush_32.h | 2
115     include/asm-x86/mach-xen/asm/tlbflush_64.h | 3
116     include/asm-x86/thread_info_64.h | 4
117     include/linux/skbuff.h | 7
118     include/xen/evtchn.h | 10
119     include/xen/xencons.h | 2
120     mm/mprotect.c | 2
121     net/core/dev.c | 8
122     112 files changed, 3102 insertions(+), 3145 deletions(-)
123    
124     --- a/arch/x86/Kconfig
125     +++ b/arch/x86/Kconfig
126     @@ -390,6 +390,7 @@
127    
128     menuconfig PARAVIRT_GUEST
129     bool "Paravirtualized guest support"
130     + depends on !X86_XEN && !X86_64_XEN
131     help
132     Say Y here to get to see options related to running Linux under
133     various hypervisors. This option alone does not add any kernel code.
134     --- a/arch/x86/ia32/ia32entry-xen.S
135     +++ b/arch/x86/ia32/ia32entry-xen.S
136     @@ -83,6 +83,7 @@
137     */
138     ENTRY(ia32_sysenter_target)
139     CFI_STARTPROC32 simple
140     + CFI_SIGNAL_FRAME
141     CFI_DEF_CFA rsp,SS+8-RIP+16
142     /*CFI_REL_OFFSET ss,SS-RIP+16*/
143     CFI_REL_OFFSET rsp,RSP-RIP+16
144     @@ -164,6 +165,7 @@
145     */
146     ENTRY(ia32_cstar_target)
147     CFI_STARTPROC32 simple
148     + CFI_SIGNAL_FRAME
149     CFI_DEF_CFA rsp,SS+8-RIP+16
150     /*CFI_REL_OFFSET ss,SS-RIP+16*/
151     CFI_REL_OFFSET rsp,RSP-RIP+16
152     @@ -243,6 +245,7 @@
153    
154     ENTRY(ia32_syscall)
155     CFI_STARTPROC simple
156     + CFI_SIGNAL_FRAME
157     CFI_DEF_CFA rsp,SS+8-RIP+16
158     /*CFI_REL_OFFSET ss,SS-RIP+16*/
159     CFI_REL_OFFSET rsp,RSP-RIP+16
160     @@ -320,6 +323,7 @@
161     popq %r11
162     CFI_ENDPROC
163     CFI_STARTPROC32 simple
164     + CFI_SIGNAL_FRAME
165     CFI_DEF_CFA rsp,SS+8-ARGOFFSET
166     CFI_REL_OFFSET rax,RAX-ARGOFFSET
167     CFI_REL_OFFSET rcx,RCX-ARGOFFSET
168     @@ -653,8 +657,8 @@
169     .quad sys_readlinkat /* 305 */
170     .quad sys_fchmodat
171     .quad sys_faccessat
172     - .quad quiet_ni_syscall /* pselect6 for now */
173     - .quad quiet_ni_syscall /* ppoll for now */
174     + .quad compat_sys_pselect6
175     + .quad compat_sys_ppoll
176     .quad sys_unshare /* 310 */
177     .quad compat_sys_set_robust_list
178     .quad compat_sys_get_robust_list
179     @@ -663,4 +667,5 @@
180     .quad sys_tee
181     .quad compat_sys_vmsplice
182     .quad compat_sys_move_pages
183     + .quad sys_getcpu
184     ia32_syscall_end:
185     --- a/arch/x86/kernel/Makefile
186     +++ b/arch/x86/kernel/Makefile
187     @@ -91,7 +91,7 @@
188     ###
189     # 64 bit specific files
190     ifeq ($(CONFIG_X86_64),y)
191     - obj-y += genapic_64.o genapic_flat_64.o
192     + obj-$(CONFIG_X86_LOCAL_APIC) += genapic_64.o genapic_flat_64.o
193     obj-$(CONFIG_X86_XEN_GENAPIC) += genapic_64.o genapic_xen_64.o
194     obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
195     obj-$(CONFIG_AUDIT) += audit_64.o
196     @@ -104,5 +104,6 @@
197     pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o
198     endif
199    
200     -disabled-obj-$(CONFIG_XEN) := i8253.o i8259_$(BITS).o reboot.o smpboot_$(BITS).o tsc_$(BITS).o
201     +disabled-obj-$(CONFIG_XEN) := early-quirks.o i8253.o i8259_$(BITS).o reboot.o \
202     + smpboot_$(BITS).o tsc_$(BITS).o
203     %/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) :=
204     --- a/arch/x86/kernel/apic_32-xen.c
205     +++ b/arch/x86/kernel/apic_32-xen.c
206     @@ -54,7 +54,6 @@
207     /*
208     * Knob to control our willingness to enable the local APIC.
209     */
210     -int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
211    
212     /*
213     * Debug level
214     @@ -102,7 +101,7 @@
215    
216     #ifndef CONFIG_XEN
217     #ifndef CONFIG_SMP
218     -static void up_apic_timer_interrupt_call(struct pt_regs *regs)
219     +static void up_apic_timer_interrupt_call(void)
220     {
221     int cpu = smp_processor_id();
222    
223     @@ -111,11 +110,11 @@
224     */
225     per_cpu(irq_stat, cpu).apic_timer_irqs++;
226    
227     - smp_local_timer_interrupt(regs);
228     + smp_local_timer_interrupt();
229     }
230     #endif
231    
232     -void smp_send_timer_broadcast_ipi(struct pt_regs *regs)
233     +void smp_send_timer_broadcast_ipi(void)
234     {
235     cpumask_t mask;
236    
237     @@ -128,7 +127,7 @@
238     * We can directly call the apic timer interrupt handler
239     * in UP case. Minus all irq related functions
240     */
241     - up_apic_timer_interrupt_call(regs);
242     + up_apic_timer_interrupt_call();
243     #endif
244     }
245     }
246     --- a/arch/x86/kernel/apic_64-xen.c
247     +++ b/arch/x86/kernel/apic_64-xen.c
248     @@ -43,7 +43,7 @@
249     */
250     void ack_bad_irq(unsigned int irq)
251     {
252     - printk("unexpected IRQ trap at vector %02x\n", irq);
253     + printk("unexpected IRQ trap at irq %02x\n", irq);
254     /*
255     * Currently unexpected vectors happen only on SMP and APIC.
256     * We _must_ ack these because every local APIC has only N
257     @@ -62,19 +62,19 @@
258     return -EINVAL;
259     }
260    
261     -void smp_local_timer_interrupt(struct pt_regs *regs)
262     +void smp_local_timer_interrupt(void)
263     {
264     - profile_tick(CPU_PROFILING, regs);
265     + profile_tick(CPU_PROFILING);
266     #ifndef CONFIG_XEN
267     #ifdef CONFIG_SMP
268     - update_process_times(user_mode(regs));
269     + update_process_times(user_mode(get_irq_regs()));
270     #endif
271     #endif
272     /*
273     * We take the 'long' return path, and there every subsystem
274     * grabs the appropriate locks (kernel lock/ irq lock).
275     *
276     - * we might want to decouple profiling from the 'long path',
277     + * We might want to decouple profiling from the 'long path',
278     * and do the profiling totally in assembly.
279     *
280     * Currently this isn't too much of an issue (performance wise),
281     @@ -92,6 +92,8 @@
282     */
283     void smp_apic_timer_interrupt(struct pt_regs *regs)
284     {
285     + struct pt_regs *old_regs = set_irq_regs(regs);
286     +
287     /*
288     * the NMI deadlock-detector uses this.
289     */
290     @@ -109,8 +111,9 @@
291     */
292     exit_idle();
293     irq_enter();
294     - smp_local_timer_interrupt(regs);
295     + smp_local_timer_interrupt();
296     irq_exit();
297     + set_irq_regs(old_regs);
298     }
299    
300     /*
301     @@ -188,9 +191,8 @@
302     int __init APIC_init_uniprocessor (void)
303     {
304     #ifdef CONFIG_X86_IO_APIC
305     - if (smp_found_config)
306     - if (!skip_ioapic_setup && nr_ioapics)
307     - setup_IO_APIC();
308     + if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
309     + setup_IO_APIC();
310     #endif
311    
312     return 1;
313     --- a/arch/x86/kernel/cpu/common-xen.c
314     +++ b/arch/x86/kernel/cpu/common-xen.c
315     @@ -43,7 +43,7 @@
316    
317     extern int disable_pse;
318    
319     -static void default_init(struct cpuinfo_x86 * c)
320     +static void __cpuinit default_init(struct cpuinfo_x86 * c)
321     {
322     /* Not much we can do here... */
323     /* Check if at least it has cpuid */
324     @@ -56,7 +56,7 @@
325     }
326     }
327    
328     -static struct cpu_dev default_cpu = {
329     +static struct cpu_dev __cpuinitdata default_cpu = {
330     .c_init = default_init,
331     .c_vendor = "Unknown",
332     };
333     @@ -191,7 +191,16 @@
334    
335     static int __init x86_fxsr_setup(char * s)
336     {
337     + /* Tell all the other CPU's to not use it... */
338     disable_x86_fxsr = 1;
339     +
340     + /*
341     + * ... and clear the bits early in the boot_cpu_data
342     + * so that the bootup process doesn't try to do this
343     + * either.
344     + */
345     + clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability);
346     + clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability);
347     return 1;
348     }
349     __setup("nofxsr", x86_fxsr_setup);
350     @@ -272,7 +281,7 @@
351     }
352     }
353    
354     -void __cpuinit generic_identify(struct cpuinfo_x86 * c)
355     +static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
356     {
357     u32 tfms, xlvl;
358     int ebx;
359     @@ -698,8 +707,7 @@
360     */
361     atomic_inc(&init_mm.mm_count);
362     current->active_mm = &init_mm;
363     - if (current->mm)
364     - BUG();
365     + BUG_ON(current->mm);
366     enter_lazy_tlb(&init_mm, current);
367    
368     load_esp0(t, thread);
369     @@ -712,7 +720,7 @@
370     #endif
371    
372     /* Clear %fs and %gs. */
373     - asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
374     + asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
375    
376     /* Clear all 6 debug registers: */
377     set_debugreg(0, 0);
378     --- a/arch/x86/kernel/e820_64-xen.c
379     +++ b/arch/x86/kernel/e820_64-xen.c
380     @@ -16,6 +16,7 @@
381     #include <linux/string.h>
382     #include <linux/kexec.h>
383     #include <linux/module.h>
384     +#include <linux/mm.h>
385    
386     #include <asm/pgtable.h>
387     #include <asm/page.h>
388     @@ -25,6 +26,11 @@
389     #include <asm/sections.h>
390     #include <xen/interface/memory.h>
391    
392     +struct e820map e820 __initdata;
393     +#ifdef CONFIG_XEN
394     +struct e820map machine_e820 __initdata;
395     +#endif
396     +
397     /*
398     * PFN of last memory page.
399     */
400     @@ -41,7 +47,7 @@
401     /*
402     * Last pfn which the user wants to use.
403     */
404     -unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;
405     +static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
406    
407     extern struct resource code_resource, data_resource;
408    
409     @@ -53,13 +59,13 @@
410     #ifndef CONFIG_XEN
411     /* various gunk below that needed for SMP startup */
412     if (addr < 0x8000) {
413     - *addrp = 0x8000;
414     + *addrp = PAGE_ALIGN(0x8000);
415     return 1;
416     }
417    
418     /* direct mapping tables of the kernel */
419     if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
420     - *addrp = table_end << PAGE_SHIFT;
421     + *addrp = PAGE_ALIGN(table_end << PAGE_SHIFT);
422     return 1;
423     }
424    
425     @@ -67,23 +73,18 @@
426     #ifdef CONFIG_BLK_DEV_INITRD
427     if (LOADER_TYPE && INITRD_START && last >= INITRD_START &&
428     addr < INITRD_START+INITRD_SIZE) {
429     - *addrp = INITRD_START + INITRD_SIZE;
430     + *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE);
431     return 1;
432     }
433     #endif
434     - /* kernel code + 640k memory hole (later should not be needed, but
435     - be paranoid for now) */
436     - if (last >= 640*1024 && addr < 1024*1024) {
437     - *addrp = 1024*1024;
438     - return 1;
439     - }
440     - if (last >= __pa_symbol(&_text) && last < __pa_symbol(&_end)) {
441     - *addrp = __pa_symbol(&_end);
442     + /* kernel code */
443     + if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
444     + *addrp = PAGE_ALIGN(__pa_symbol(&_end));
445     return 1;
446     }
447    
448     if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
449     - *addrp = ebda_addr + ebda_size;
450     + *addrp = PAGE_ALIGN(ebda_addr + ebda_size);
451     return 1;
452     }
453    
454     @@ -141,8 +142,6 @@
455     for (i = 0; i < e820.nr_map; i++) {
456     struct e820entry *ei = &e820.map[i];
457     #else
458     - extern struct e820map machine_e820;
459     -
460     if (!is_initial_xendomain())
461     return 0;
462     for (i = 0; i < machine_e820.nr_map; i++) {
463     @@ -184,7 +183,7 @@
464     continue;
465     while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
466     ;
467     - last = addr + size;
468     + last = PAGE_ALIGN(addr) + size;
469     if (last > ei->addr + ei->size)
470     continue;
471     if (last > end)
472     @@ -194,59 +193,14 @@
473     return -1UL;
474     }
475    
476     -/*
477     - * Free bootmem based on the e820 table for a node.
478     - */
479     -void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end)
480     -{
481     - int i;
482     - for (i = 0; i < e820.nr_map; i++) {
483     - struct e820entry *ei = &e820.map[i];
484     - unsigned long last, addr;
485     -
486     - if (ei->type != E820_RAM ||
487     - ei->addr+ei->size <= start ||
488     - ei->addr >= end)
489     - continue;
490     -
491     - addr = round_up(ei->addr, PAGE_SIZE);
492     - if (addr < start)
493     - addr = start;
494     -
495     - last = round_down(ei->addr + ei->size, PAGE_SIZE);
496     - if (last >= end)
497     - last = end;
498     -
499     - if (last > addr && last-addr >= PAGE_SIZE)
500     - free_bootmem_node(pgdat, addr, last-addr);
501     - }
502     -}
503     -
504     /*
505     * Find the highest page frame number we have available
506     */
507     unsigned long __init e820_end_of_ram(void)
508     {
509     - int i;
510     unsigned long end_pfn = 0;
511     + end_pfn = find_max_pfn_with_active_regions();
512    
513     - for (i = 0; i < e820.nr_map; i++) {
514     - struct e820entry *ei = &e820.map[i];
515     - unsigned long start, end;
516     -
517     - start = round_up(ei->addr, PAGE_SIZE);
518     - end = round_down(ei->addr + ei->size, PAGE_SIZE);
519     - if (start >= end)
520     - continue;
521     - if (ei->type == E820_RAM) {
522     - if (end > end_pfn<<PAGE_SHIFT)
523     - end_pfn = end>>PAGE_SHIFT;
524     - } else {
525     - if (end > end_pfn_map<<PAGE_SHIFT)
526     - end_pfn_map = end>>PAGE_SHIFT;
527     - }
528     - }
529     -
530     if (end_pfn > end_pfn_map)
531     end_pfn_map = end_pfn;
532     if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
533     @@ -256,43 +210,10 @@
534     if (end_pfn > end_pfn_map)
535     end_pfn = end_pfn_map;
536    
537     + printk("end_pfn_map = %lu\n", end_pfn_map);
538     return end_pfn;
539     }
540    
541     -/*
542     - * Compute how much memory is missing in a range.
543     - * Unlike the other functions in this file the arguments are in page numbers.
544     - */
545     -unsigned long __init
546     -e820_hole_size(unsigned long start_pfn, unsigned long end_pfn)
547     -{
548     - unsigned long ram = 0;
549     - unsigned long start = start_pfn << PAGE_SHIFT;
550     - unsigned long end = end_pfn << PAGE_SHIFT;
551     - int i;
552     - for (i = 0; i < e820.nr_map; i++) {
553     - struct e820entry *ei = &e820.map[i];
554     - unsigned long last, addr;
555     -
556     - if (ei->type != E820_RAM ||
557     - ei->addr+ei->size <= start ||
558     - ei->addr >= end)
559     - continue;
560     -
561     - addr = round_up(ei->addr, PAGE_SIZE);
562     - if (addr < start)
563     - addr = start;
564     -
565     - last = round_down(ei->addr + ei->size, PAGE_SIZE);
566     - if (last >= end)
567     - last = end;
568     -
569     - if (last > addr)
570     - ram += last - addr;
571     - }
572     - return ((end - start) - ram) >> PAGE_SHIFT;
573     -}
574     -
575     /*
576     * Mark e820 reserved areas as busy for the resource manager.
577     */
578     @@ -333,6 +254,98 @@
579     }
580     }
581    
582     +#ifndef CONFIG_XEN
583     +/* Mark pages corresponding to given address range as nosave */
584     +static void __init
585     +e820_mark_nosave_range(unsigned long start, unsigned long end)
586     +{
587     + unsigned long pfn, max_pfn;
588     +
589     + if (start >= end)
590     + return;
591     +
592     + printk("Nosave address range: %016lx - %016lx\n", start, end);
593     + max_pfn = end >> PAGE_SHIFT;
594     + for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
595     + if (pfn_valid(pfn))
596     + SetPageNosave(pfn_to_page(pfn));
597     +}
598     +
599     +/*
600     + * Find the ranges of physical addresses that do not correspond to
601     + * e820 RAM areas and mark the corresponding pages as nosave for software
602     + * suspend and suspend to RAM.
603     + *
604     + * This function requires the e820 map to be sorted and without any
605     + * overlapping entries and assumes the first e820 area to be RAM.
606     + */
607     +void __init e820_mark_nosave_regions(void)
608     +{
609     + int i;
610     + unsigned long paddr;
611     +
612     + paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
613     + for (i = 1; i < e820.nr_map; i++) {
614     + struct e820entry *ei = &e820.map[i];
615     +
616     + if (paddr < ei->addr)
617     + e820_mark_nosave_range(paddr,
618     + round_up(ei->addr, PAGE_SIZE));
619     +
620     + paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
621     + if (ei->type != E820_RAM)
622     + e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE),
623     + paddr);
624     +
625     + if (paddr >= (end_pfn << PAGE_SHIFT))
626     + break;
627     + }
628     +}
629     +#endif
630     +
631     +/* Walk the e820 map and register active regions within a node */
632     +void __init
633     +e820_register_active_regions(int nid, unsigned long start_pfn,
634     + unsigned long end_pfn)
635     +{
636     + int i;
637     + unsigned long ei_startpfn, ei_endpfn;
638     + for (i = 0; i < e820.nr_map; i++) {
639     + struct e820entry *ei = &e820.map[i];
640     + ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
641     + ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE)
642     + >> PAGE_SHIFT;
643     +
644     + /* Skip map entries smaller than a page */
645     + if (ei_startpfn >= ei_endpfn)
646     + continue;
647     +
648     + /* Check if end_pfn_map should be updated */
649     + if (ei->type != E820_RAM && ei_endpfn > end_pfn_map)
650     + end_pfn_map = ei_endpfn;
651     +
652     + /* Skip if map is outside the node */
653     + if (ei->type != E820_RAM ||
654     + ei_endpfn <= start_pfn ||
655     + ei_startpfn >= end_pfn)
656     + continue;
657     +
658     + /* Check for overlaps */
659     + if (ei_startpfn < start_pfn)
660     + ei_startpfn = start_pfn;
661     + if (ei_endpfn > end_pfn)
662     + ei_endpfn = end_pfn;
663     +
664     + /* Obey end_user_pfn to save on memmap */
665     + if (ei_startpfn >= end_user_pfn)
666     + continue;
667     + if (ei_endpfn > end_user_pfn)
668     + ei_endpfn = end_user_pfn;
669     +
670     + add_active_range(nid, ei_startpfn, ei_endpfn);
671     + }
672     +}
673     +
674     /*
675     * Add a memory region to the kernel e820 map.
676     */
677     @@ -553,13 +566,6 @@
678     * If we're lucky and live on a modern system, the setup code
679     * will have given us a memory map that we can use to properly
680     * set up memory. If we aren't, we'll fake a memory map.
681     - *
682     - * We check to see that the memory map contains at least 2 elements
683     - * before we'll use it, because the detection code in setup.S may
684     - * not be perfect and most every PC known to man has two memory
685     - * regions: one from 0 to 640k, and one from 1mb up. (The IBM
686     - * thinkpad 560x, for example, does not cooperate with the memory
687     - * detection code.)
688     */
689     static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
690     {
691     @@ -581,37 +587,20 @@
692     if (start > end)
693     return -1;
694    
695     -#ifndef CONFIG_XEN
696     - /*
697     - * Some BIOSes claim RAM in the 640k - 1M region.
698     - * Not right. Fix it up.
699     - *
700     - * This should be removed on Hammer which is supposed to not
701     - * have non e820 covered ISA mappings there, but I had some strange
702     - * problems so it stays for now. -AK
703     - */
704     - if (type == E820_RAM) {
705     - if (start < 0x100000ULL && end > 0xA0000ULL) {
706     - if (start < 0xA0000ULL)
707     - add_memory_region(start, 0xA0000ULL-start, type);
708     - if (end <= 0x100000ULL)
709     - continue;
710     - start = 0x100000ULL;
711     - size = end - start;
712     - }
713     - }
714     -#endif
715     -
716     add_memory_region(start, size, type);
717     } while (biosmap++,--nr_map);
718     return 0;
719     }
720    
721     +void early_panic(char *msg)
722     +{
723     + early_printk(msg);
724     + panic(msg);
725     +}
726     +
727     #ifndef CONFIG_XEN
728     void __init setup_memory_region(void)
729     {
730     - char *who = "BIOS-e820";
731     -
732     /*
733     * Try to copy the BIOS-supplied E820-map.
734     *
735     @@ -619,24 +608,10 @@
736     * the next section from 1mb->appropriate_mem_k
737     */
738     sanitize_e820_map(E820_MAP, &E820_MAP_NR);
739     - if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
740     - unsigned long mem_size;
741     -
742     - /* compare results from other methods and take the greater */
743     - if (ALT_MEM_K < EXT_MEM_K) {
744     - mem_size = EXT_MEM_K;
745     - who = "BIOS-88";
746     - } else {
747     - mem_size = ALT_MEM_K;
748     - who = "BIOS-e801";
749     - }
750     -
751     - e820.nr_map = 0;
752     - add_memory_region(0, LOWMEMSIZE(), E820_RAM);
753     - add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
754     - }
755     + if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0)
756     + early_panic("Cannot find a valid memory map");
757     printk(KERN_INFO "BIOS-provided physical RAM map:\n");
758     - e820_print_map(who);
759     + e820_print_map("BIOS-e820");
760     }
761    
762     #else /* CONFIG_XEN */
763     @@ -668,20 +643,23 @@
764    
765     sanitize_e820_map(map, (char *)&memmap.nr_entries);
766    
767     - BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0);
768     + if (copy_e820_map(map, (char)memmap.nr_entries) < 0)
769     + early_panic("Cannot find a valid memory map");
770    
771     printk(KERN_INFO "BIOS-provided physical RAM map:\n");
772     e820_print_map("Xen");
773     }
774     #endif
775    
776     -void __init parse_memopt(char *p, char **from)
777     -{
778     +static int __init parse_memopt(char *p)
779     +{
780     int i;
781     unsigned long current_end;
782     unsigned long end;
783    
784     - end_user_pfn = memparse(p, from);
785     + if (!p)
786     + return -EINVAL;
787     + end_user_pfn = memparse(p, &p);
788     end_user_pfn >>= PAGE_SHIFT;
789    
790     end = end_user_pfn<<PAGE_SHIFT;
791     @@ -698,27 +676,61 @@
792     else
793     add_memory_region(current_end, end - current_end, E820_RAM);
794     }
795     +
796     + return 0;
797     }
798     +early_param("mem", parse_memopt);
799     +
800     +static int userdef __initdata;
801    
802     -void __init parse_memmapopt(char *p, char **from)
803     +static int __init parse_memmap_opt(char *p)
804     {
805     + char *oldp;
806     unsigned long long start_at, mem_size;
807    
808     - mem_size = memparse(p, from);
809     - p = *from;
810     + if (!strcmp(p, "exactmap")) {
811     +#ifdef CONFIG_CRASH_DUMP
812     + /* If we are doing a crash dump, we
813     + * still need to know the real mem
814     + * size before original memory map is
815     + * reset.
816     + */
817     + e820_register_active_regions(0, 0, -1UL);
818     + saved_max_pfn = e820_end_of_ram();
819     + remove_all_active_ranges();
820     +#endif
821     + end_pfn_map = 0;
822     + e820.nr_map = 0;
823     + userdef = 1;
824     + return 0;
825     + }
826     +
827     + oldp = p;
828     + mem_size = memparse(p, &p);
829     + if (p == oldp)
830     + return -EINVAL;
831     if (*p == '@') {
832     - start_at = memparse(p+1, from);
833     + start_at = memparse(p+1, &p);
834     add_memory_region(start_at, mem_size, E820_RAM);
835     } else if (*p == '#') {
836     - start_at = memparse(p+1, from);
837     + start_at = memparse(p+1, &p);
838     add_memory_region(start_at, mem_size, E820_ACPI);
839     } else if (*p == '$') {
840     - start_at = memparse(p+1, from);
841     + start_at = memparse(p+1, &p);
842     add_memory_region(start_at, mem_size, E820_RESERVED);
843     } else {
844     end_user_pfn = (mem_size >> PAGE_SHIFT);
845     }
846     - p = *from;
847     + return *p == '\0' ? 0 : -EINVAL;
848     +}
849     +early_param("memmap", parse_memmap_opt);
850     +
851     +void finish_e820_parsing(void)
852     +{
853     + if (userdef) {
854     + printk(KERN_INFO "user-defined physical RAM map:\n");
855     + e820_print_map("user");
856     + }
857     }
858    
859     unsigned long pci_mem_start = 0xaeedbabe;
860     --- a/arch/x86/kernel/early_printk-xen.c
861     +++ b/arch/x86/kernel/early_printk-xen.c
862     @@ -244,20 +244,16 @@
863    
864     static int __initdata keep_early;
865    
866     -int __init setup_early_printk(char *opt)
867     +static int __init setup_early_printk(char *buf)
868     {
869     - char *space;
870     - char buf[256];
871     + if (!buf)
872     + return 0;
873    
874     if (early_console_initialized)
875     - return 1;
876     -
877     - strlcpy(buf,opt,sizeof(buf));
878     - space = strchr(buf, ' ');
879     - if (space)
880     - *space = 0;
881     + return 0;
882     + early_console_initialized = 1;
883    
884     - if (strstr(buf,"keep"))
885     + if (strstr(buf, "keep"))
886     keep_early = 1;
887    
888     if (!strncmp(buf, "serial", 6)) {
889     @@ -281,11 +277,12 @@
890     early_console = &simnow_console;
891     keep_early = 1;
892     }
893     - early_console_initialized = 1;
894     register_console(early_console);
895     return 0;
896     }
897    
898     +early_param("earlyprintk", setup_early_printk);
899     +
900     void __init disable_early_printk(void)
901     {
902     if (!early_console_initialized || !early_console)
903     @@ -299,4 +296,3 @@
904     }
905     }
906    
907     -__setup("earlyprintk=", setup_early_printk);
908     --- a/arch/x86/kernel/entry_32-xen.S
909     +++ b/arch/x86/kernel/entry_32-xen.S
910     @@ -80,8 +80,12 @@
911     NMI_MASK = 0x80000000
912    
913     #ifndef CONFIG_XEN
914     -#define DISABLE_INTERRUPTS cli
915     -#define ENABLE_INTERRUPTS sti
916     +/* These are replaces for paravirtualization */
917     +#define DISABLE_INTERRUPTS cli
918     +#define ENABLE_INTERRUPTS sti
919     +#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
920     +#define INTERRUPT_RETURN iret
921     +#define GET_CR0_INTO_EAX movl %cr0, %eax
922     #else
923     /* Offsets into shared_info_t. */
924     #define evtchn_upcall_pending /* 0 */
925     @@ -99,15 +103,29 @@
926    
927     #define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
928     #define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
929     +#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
930     #define DISABLE_INTERRUPTS GET_VCPU_INFO ; \
931     __DISABLE_INTERRUPTS
932     #define ENABLE_INTERRUPTS GET_VCPU_INFO ; \
933     __ENABLE_INTERRUPTS
934     -#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
935     +#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \
936     +sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \
937     + __TEST_PENDING ; \
938     + jnz 14f # process more events if necessary... ; \
939     + movl ESI(%esp), %esi ; \
940     + sysexit ; \
941     +14: __DISABLE_INTERRUPTS ; \
942     + TRACE_IRQS_OFF ; \
943     +sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \
944     + push %esp ; \
945     + call evtchn_do_upcall ; \
946     + add $4,%esp ; \
947     + jmp ret_from_intr
948     +#define INTERRUPT_RETURN iret
949     #endif
950    
951     #ifdef CONFIG_PREEMPT
952     -#define preempt_stop cli; TRACE_IRQS_OFF
953     +#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
954     #else
955     #define preempt_stop
956     #define resume_kernel restore_nocheck
957     @@ -206,18 +224,21 @@
958    
959     #define RING0_INT_FRAME \
960     CFI_STARTPROC simple;\
961     + CFI_SIGNAL_FRAME;\
962     CFI_DEF_CFA esp, 3*4;\
963     /*CFI_OFFSET cs, -2*4;*/\
964     CFI_OFFSET eip, -3*4
965    
966     #define RING0_EC_FRAME \
967     CFI_STARTPROC simple;\
968     + CFI_SIGNAL_FRAME;\
969     CFI_DEF_CFA esp, 4*4;\
970     /*CFI_OFFSET cs, -2*4;*/\
971     CFI_OFFSET eip, -3*4
972    
973     #define RING0_PTREGS_FRAME \
974     CFI_STARTPROC simple;\
975     + CFI_SIGNAL_FRAME;\
976     CFI_DEF_CFA esp, OLDESP-EBX;\
977     /*CFI_OFFSET cs, CS-OLDESP;*/\
978     CFI_OFFSET eip, EIP-OLDESP;\
979     @@ -263,8 +284,9 @@
980     check_userspace:
981     movl EFLAGS(%esp), %eax # mix EFLAGS and CS
982     movb CS(%esp), %al
983     - testl $(VM_MASK | 2), %eax
984     - jz resume_kernel
985     + andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
986     + cmpl $USER_RPL, %eax
987     + jb resume_kernel # not returning to v8086 or userspace
988     ENTRY(resume_userspace)
989     DISABLE_INTERRUPTS # make sure we don't miss an interrupt
990     # setting need_resched or sigpending
991     @@ -277,7 +299,7 @@
992    
993     #ifdef CONFIG_PREEMPT
994     ENTRY(resume_kernel)
995     - cli
996     + DISABLE_INTERRUPTS
997     cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
998     jnz restore_nocheck
999     need_resched:
1000     @@ -297,6 +319,7 @@
1001     # sysenter call handler stub
1002     ENTRY(sysenter_entry)
1003     CFI_STARTPROC simple
1004     + CFI_SIGNAL_FRAME
1005     CFI_DEF_CFA esp, 0
1006     CFI_REGISTER esp, ebp
1007     movl SYSENTER_stack_esp0(%esp),%esp
1008     @@ -305,7 +328,7 @@
1009     * No need to follow this irqs on/off section: the syscall
1010     * disabled irqs and here we enable it straight after entry:
1011     */
1012     - sti
1013     + ENABLE_INTERRUPTS
1014     pushl $(__USER_DS)
1015     CFI_ADJUST_CFA_OFFSET 4
1016     /*CFI_REL_OFFSET ss, 0*/
1017     @@ -359,26 +382,8 @@
1018     movl EIP(%esp), %edx
1019     movl OLDESP(%esp), %ecx
1020     xorl %ebp,%ebp
1021     -#ifdef CONFIG_XEN
1022     TRACE_IRQS_ON
1023     - __ENABLE_INTERRUPTS
1024     -sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/
1025     - __TEST_PENDING
1026     - jnz 14f # process more events if necessary...
1027     - movl ESI(%esp), %esi
1028     - sysexit
1029     -14: __DISABLE_INTERRUPTS
1030     - TRACE_IRQS_OFF
1031     -sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/
1032     - push %esp
1033     - call evtchn_do_upcall
1034     - add $4,%esp
1035     - jmp ret_from_intr
1036     -#else
1037     - TRACE_IRQS_ON
1038     - sti
1039     - sysexit
1040     -#endif /* !CONFIG_XEN */
1041     + ENABLE_INTERRUPTS_SYSEXIT
1042     CFI_ENDPROC
1043    
1044     # pv sysenter call handler stub
1045     @@ -444,8 +449,8 @@
1046     # See comments in process.c:copy_thread() for details.
1047     movb OLDSS(%esp), %ah
1048     movb CS(%esp), %al
1049     - andl $(VM_MASK | (4 << 8) | 3), %eax
1050     - cmpl $((4 << 8) | 3), %eax
1051     + andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
1052     + cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
1053     CFI_REMEMBER_STATE
1054     je ldt_ss # returning to user-space with LDT SS
1055     restore_nocheck:
1056     @@ -467,12 +472,11 @@
1057     RESTORE_REGS
1058     addl $4, %esp
1059     CFI_ADJUST_CFA_OFFSET -4
1060     -1: iret
1061     +1: INTERRUPT_RETURN
1062     .section .fixup,"ax"
1063     iret_exc:
1064     #ifndef CONFIG_XEN
1065     - TRACE_IRQS_ON
1066     - sti
1067     + ENABLE_INTERRUPTS
1068     #endif
1069     pushl $0 # no error code
1070     pushl $do_iret_error
1071     @@ -498,7 +502,7 @@
1072     * dosemu and wine happy. */
1073     subl $8, %esp # reserve space for switch16 pointer
1074     CFI_ADJUST_CFA_OFFSET 8
1075     - cli
1076     + DISABLE_INTERRUPTS
1077     TRACE_IRQS_OFF
1078     movl %esp, %eax
1079     /* Set up the 16bit stack frame with switch32 pointer on top,
1080     @@ -508,7 +512,7 @@
1081     TRACE_IRQS_IRET
1082     RESTORE_REGS
1083     lss 20+4(%esp), %esp # switch to 16bit stack
1084     -1: iret
1085     +1: INTERRUPT_RETURN
1086     .section __ex_table,"a"
1087     .align 4
1088     .long 1b,iret_exc
1089     @@ -524,7 +528,7 @@
1090     RESTORE_REGS
1091     addl $4, %esp
1092     CFI_ADJUST_CFA_OFFSET -4
1093     -1: iret
1094     +1: INTERRUPT_RETURN
1095     .section __ex_table,"a"
1096     .align 4
1097     .long 1b,iret_exc
1098     @@ -713,11 +717,9 @@
1099     #define UNWIND_ESPFIX_STACK
1100     #endif
1101    
1102     -ENTRY(divide_error)
1103     - RING0_INT_FRAME
1104     - pushl $0 # no error code
1105     - CFI_ADJUST_CFA_OFFSET 4
1106     - pushl $do_divide_error
1107     +KPROBE_ENTRY(page_fault)
1108     + RING0_EC_FRAME
1109     + pushl $do_page_fault
1110     CFI_ADJUST_CFA_OFFSET 4
1111     ALIGN
1112     error_code:
1113     @@ -767,6 +769,7 @@
1114     call *%edi
1115     jmp ret_from_exception
1116     CFI_ENDPROC
1117     +KPROBE_END(page_fault)
1118    
1119     #ifdef CONFIG_XEN
1120     # A note on the "critical region" in our callback handler.
1121     @@ -926,7 +929,7 @@
1122     CFI_ADJUST_CFA_OFFSET 4
1123     SAVE_ALL
1124     #ifndef CONFIG_XEN
1125     - movl %cr0, %eax
1126     + GET_CR0_INTO_EAX
1127     testl $0x4, %eax # EM (math emulation bit)
1128     je device_available_emulate
1129     pushl $0 # temporary storage for ORIG_EIP
1130     @@ -961,9 +964,15 @@
1131     jne ok; \
1132     label: \
1133     movl SYSENTER_stack_esp0+offset(%esp),%esp; \
1134     + CFI_DEF_CFA esp, 0; \
1135     + CFI_UNDEFINED eip; \
1136     pushfl; \
1137     + CFI_ADJUST_CFA_OFFSET 4; \
1138     pushl $__KERNEL_CS; \
1139     - pushl $sysenter_past_esp
1140     + CFI_ADJUST_CFA_OFFSET 4; \
1141     + pushl $sysenter_past_esp; \
1142     + CFI_ADJUST_CFA_OFFSET 4; \
1143     + CFI_REL_OFFSET eip, 0
1144     #endif /* CONFIG_XEN */
1145    
1146     KPROBE_ENTRY(debug)
1147     @@ -982,7 +991,8 @@
1148     call do_debug
1149     jmp ret_from_exception
1150     CFI_ENDPROC
1151     - .previous .text
1152     +KPROBE_END(debug)
1153     +
1154     #ifndef CONFIG_XEN
1155     /*
1156     * NMI is doubly nasty. It can happen _while_ we're handling
1157     @@ -992,7 +1002,7 @@
1158     * check whether we got an NMI on the debug path where the debug
1159     * fault happened on the sysenter path.
1160     */
1161     -ENTRY(nmi)
1162     +KPROBE_ENTRY(nmi)
1163     RING0_INT_FRAME
1164     pushl %eax
1165     CFI_ADJUST_CFA_OFFSET 4
1166     @@ -1017,6 +1027,7 @@
1167     cmpl $sysenter_entry,12(%esp)
1168     je nmi_debug_stack_check
1169     nmi_stack_correct:
1170     + /* We have a RING0_INT_FRAME here */
1171     pushl %eax
1172     CFI_ADJUST_CFA_OFFSET 4
1173     SAVE_ALL
1174     @@ -1027,9 +1038,12 @@
1175     CFI_ENDPROC
1176    
1177     nmi_stack_fixup:
1178     + RING0_INT_FRAME
1179     FIX_STACK(12,nmi_stack_correct, 1)
1180     jmp nmi_stack_correct
1181     +
1182     nmi_debug_stack_check:
1183     + /* We have a RING0_INT_FRAME here */
1184     cmpw $__KERNEL_CS,16(%esp)
1185     jne nmi_stack_correct
1186     cmpl $debug,(%esp)
1187     @@ -1040,8 +1054,10 @@
1188     jmp nmi_stack_correct
1189    
1190     nmi_16bit_stack:
1191     - RING0_INT_FRAME
1192     - /* create the pointer to lss back */
1193     + /* We have a RING0_INT_FRAME here.
1194     + *
1195     + * create the pointer to lss back
1196     + */
1197     pushl %ss
1198     CFI_ADJUST_CFA_OFFSET 4
1199     pushl %esp
1200     @@ -1062,14 +1078,14 @@
1201     call do_nmi
1202     RESTORE_REGS
1203     lss 12+4(%esp), %esp # back to 16bit stack
1204     -1: iret
1205     +1: INTERRUPT_RETURN
1206     CFI_ENDPROC
1207     .section __ex_table,"a"
1208     .align 4
1209     .long 1b,iret_exc
1210     .previous
1211     #else
1212     -ENTRY(nmi)
1213     +KPROBE_ENTRY(nmi)
1214     RING0_INT_FRAME
1215     pushl %eax
1216     CFI_ADJUST_CFA_OFFSET 4
1217     @@ -1081,6 +1097,7 @@
1218     jmp restore_all
1219     CFI_ENDPROC
1220     #endif
1221     +KPROBE_END(nmi)
1222    
1223     KPROBE_ENTRY(int3)
1224     RING0_INT_FRAME
1225     @@ -1092,7 +1109,7 @@
1226     call do_int3
1227     jmp ret_from_exception
1228     CFI_ENDPROC
1229     - .previous .text
1230     +KPROBE_END(int3)
1231    
1232     ENTRY(overflow)
1233     RING0_INT_FRAME
1234     @@ -1157,7 +1174,7 @@
1235     CFI_ADJUST_CFA_OFFSET 4
1236     jmp error_code
1237     CFI_ENDPROC
1238     - .previous .text
1239     +KPROBE_END(general_protection)
1240    
1241     ENTRY(alignment_check)
1242     RING0_EC_FRAME
1243     @@ -1166,13 +1183,14 @@
1244     jmp error_code
1245     CFI_ENDPROC
1246    
1247     -KPROBE_ENTRY(page_fault)
1248     - RING0_EC_FRAME
1249     - pushl $do_page_fault
1250     +ENTRY(divide_error)
1251     + RING0_INT_FRAME
1252     + pushl $0 # no error code
1253     + CFI_ADJUST_CFA_OFFSET 4
1254     + pushl $do_divide_error
1255     CFI_ADJUST_CFA_OFFSET 4
1256     jmp error_code
1257     CFI_ENDPROC
1258     - .previous .text
1259    
1260     #ifdef CONFIG_X86_MCE
1261     ENTRY(machine_check)
1262     @@ -1234,6 +1252,19 @@
1263     jmp error_code
1264     CFI_ENDPROC
1265    
1266     +ENTRY(kernel_thread_helper)
1267     + pushl $0 # fake return address for unwinder
1268     + CFI_STARTPROC
1269     + movl %edx,%eax
1270     + push %edx
1271     + CFI_ADJUST_CFA_OFFSET 4
1272     + call *%ebx
1273     + push %eax
1274     + CFI_ADJUST_CFA_OFFSET 4
1275     + call do_exit
1276     + CFI_ENDPROC
1277     +ENDPROC(kernel_thread_helper)
1278     +
1279     .section .rodata,"a"
1280     #include "syscall_table.S"
1281    
1282     --- a/arch/x86/kernel/entry_64-xen.S
1283     +++ b/arch/x86/kernel/entry_64-xen.S
1284     @@ -4,9 +4,6 @@
1285     * Copyright (C) 1991, 1992 Linus Torvalds
1286     * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
1287     * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
1288     - *
1289     - * $Id: 1020-2.6.25-xen-patch-2.6.19.patch,v 1.1 2008-05-23 17:35:36 niro Exp $
1290     - *
1291     * Jun Nakajima <jun.nakajima@intel.com>
1292     * Asit Mallick <asit.k.mallick@intel.com>
1293     * Modified for Xen
1294     @@ -26,15 +23,25 @@
1295     * at the top of the kernel process stack.
1296     * - partial stack frame: partially saved registers upto R11.
1297     * - full stack frame: Like partial stack frame, but all register saved.
1298     - *
1299     - * TODO:
1300     - * - schedule it carefully for the final hardware.
1301     + *
1302     + * Some macro usage:
1303     + * - CFI macros are used to generate dwarf2 unwind information for better
1304     + * backtraces. They don't change any code.
1305     + * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
1306     + * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
1307     + * There are unfortunately lots of special cases where some registers
1308     + * not touched. The macro is a big mess that should be cleaned up.
1309     + * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
1310     + * Gives a full stack frame.
1311     + * - ENTRY/END Define functions in the symbol table.
1312     + * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
1313     + * frame that is otherwise undefined after a SYSCALL
1314     + * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
1315     + * - errorentry/paranoidentry/zeroentry - Define exception entry points.
1316     */
1317    
1318     -#define ASSEMBLY 1
1319     #include <linux/linkage.h>
1320     #include <asm/segment.h>
1321     -#include <asm/smp.h>
1322     #include <asm/cache.h>
1323     #include <asm/errno.h>
1324     #include <asm/dwarf2.h>
1325     @@ -117,6 +124,7 @@
1326     .macro CFI_DEFAULT_STACK start=1,adj=0
1327     .if \start
1328     CFI_STARTPROC simple
1329     + CFI_SIGNAL_FRAME
1330     CFI_DEF_CFA rsp,SS+8-(\adj*ARGOFFSET)
1331     .else
1332     CFI_DEF_CFA_OFFSET SS+8-(\adj*ARGOFFSET)
1333     @@ -207,6 +215,7 @@
1334     */
1335     .macro _frame ref
1336     CFI_STARTPROC simple
1337     + CFI_SIGNAL_FRAME
1338     CFI_DEF_CFA rsp,SS+8-\ref
1339     /*CFI_REL_OFFSET ss,SS-\ref*/
1340     CFI_REL_OFFSET rsp,RSP-\ref
1341     @@ -334,6 +343,8 @@
1342     LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
1343     RESTORE_REST
1344     cmpq $__NR_syscall_max,%rax
1345     + movq $-ENOSYS,%rcx
1346     + cmova %rcx,%rax
1347     ja 1f
1348     movq %r10,%rcx /* fixup for C */
1349     call *sys_call_table(,%rax,8)
1350     @@ -349,6 +360,7 @@
1351     */
1352     ENTRY(int_ret_from_sys_call)
1353     CFI_STARTPROC simple
1354     + CFI_SIGNAL_FRAME
1355     CFI_DEF_CFA rsp,SS+8-ARGOFFSET
1356     /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
1357     CFI_REL_OFFSET rsp,RSP-ARGOFFSET
1358     @@ -583,8 +595,7 @@
1359     #ifdef CONFIG_PREEMPT
1360     /* Returning to kernel space. Check if we need preemption */
1361     /* rcx: threadinfo. interrupts off. */
1362     - .p2align
1363     -retint_kernel:
1364     +ENTRY(retint_kernel)
1365     cmpl $0,threadinfo_preempt_count(%rcx)
1366     jnz retint_restore_args
1367     bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
1368     @@ -644,7 +655,6 @@
1369     END(call_function_interrupt)
1370     #endif
1371    
1372     -#ifdef CONFIG_X86_LOCAL_APIC
1373     ENTRY(apic_timer_interrupt)
1374     apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
1375     END(apic_timer_interrupt)
1376     @@ -656,7 +666,6 @@
1377     ENTRY(spurious_interrupt)
1378     apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
1379     END(spurious_interrupt)
1380     -#endif
1381     #endif /* !CONFIG_XEN */
1382    
1383     /*
1384     @@ -755,7 +764,9 @@
1385     testl $3,CS(%rsp)
1386     jnz paranoid_userspace\trace
1387     paranoid_swapgs\trace:
1388     + .if \trace
1389     TRACE_IRQS_IRETQ 0
1390     + .endif
1391     swapgs
1392     paranoid_restore\trace:
1393     RESTORE_ALL 8
1394     @@ -802,7 +813,7 @@
1395     * Exception entry point. This expects an error code/orig_rax on the stack
1396     * and the exception handler in %rax.
1397     */
1398     -ENTRY(error_entry)
1399     +KPROBE_ENTRY(error_entry)
1400     _frame RDI
1401     CFI_REL_OFFSET rax,0
1402     /* rdi slot contains rax, oldrax contains error code */
1403     @@ -896,7 +907,7 @@
1404     jmp error_sti
1405     #endif
1406     CFI_ENDPROC
1407     -END(error_entry)
1408     +KPROBE_END(error_entry)
1409    
1410     ENTRY(hypervisor_callback)
1411     zeroentry do_hypervisor_callback
1412     @@ -936,26 +947,6 @@
1413     CFI_ENDPROC
1414     END(do_hypervisor_callback)
1415    
1416     -#ifdef CONFIG_X86_LOCAL_APIC
1417     -KPROBE_ENTRY(nmi)
1418     - zeroentry do_nmi_callback
1419     -ENTRY(do_nmi_callback)
1420     - CFI_STARTPROC
1421     - addq $8, %rsp
1422     - CFI_ENDPROC
1423     - CFI_DEFAULT_STACK
1424     - call do_nmi
1425     - orl $NMI_MASK,EFLAGS(%rsp)
1426     - RESTORE_REST
1427     - XEN_BLOCK_EVENTS(%rsi)
1428     - TRACE_IRQS_OFF
1429     - GET_THREAD_INFO(%rcx)
1430     - jmp retint_restore_args
1431     - CFI_ENDPROC
1432     - .previous .text
1433     -END(nmi)
1434     -#endif
1435     -
1436     ALIGN
1437     restore_all_enable_events:
1438     CFI_DEFAULT_STACK adj=1
1439     @@ -1121,7 +1112,7 @@
1440     * do_sys_execve asm fallback arguments:
1441     * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
1442     */
1443     -ENTRY(execve)
1444     +ENTRY(kernel_execve)
1445     CFI_STARTPROC
1446     FAKE_STACK_FRAME $0
1447     SAVE_ALL
1448     @@ -1135,12 +1126,11 @@
1449     UNFAKE_STACK_FRAME
1450     ret
1451     CFI_ENDPROC
1452     -ENDPROC(execve)
1453     +ENDPROC(kernel_execve)
1454    
1455     KPROBE_ENTRY(page_fault)
1456     errorentry do_page_fault
1457     -END(page_fault)
1458     - .previous .text
1459     +KPROBE_END(page_fault)
1460    
1461     ENTRY(coprocessor_error)
1462     zeroentry do_coprocessor_error
1463     @@ -1162,25 +1152,25 @@
1464     zeroentry do_debug
1465     /* paranoidexit
1466     CFI_ENDPROC */
1467     -END(debug)
1468     - .previous .text
1469     +KPROBE_END(debug)
1470    
1471     -#if 0
1472     - /* runs on exception stack */
1473     KPROBE_ENTRY(nmi)
1474     - INTR_FRAME
1475     - pushq $-1
1476     - CFI_ADJUST_CFA_OFFSET 8
1477     - paranoidentry do_nmi, 0, 0
1478     -#ifdef CONFIG_TRACE_IRQFLAGS
1479     - paranoidexit 0
1480     -#else
1481     - jmp paranoid_exit1
1482     - CFI_ENDPROC
1483     -#endif
1484     -END(nmi)
1485     - .previous .text
1486     -#endif
1487     + zeroentry do_nmi_callback
1488     +KPROBE_END(nmi)
1489     +do_nmi_callback:
1490     + CFI_STARTPROC
1491     + addq $8, %rsp
1492     + CFI_ENDPROC
1493     + CFI_DEFAULT_STACK
1494     + call do_nmi
1495     + orl $NMI_MASK,EFLAGS(%rsp)
1496     + RESTORE_REST
1497     + XEN_BLOCK_EVENTS(%rsi)
1498     + TRACE_IRQS_OFF
1499     + GET_THREAD_INFO(%rcx)
1500     + jmp retint_restore_args
1501     + CFI_ENDPROC
1502     +END(do_nmi_callback)
1503    
1504     KPROBE_ENTRY(int3)
1505     /* INTR_FRAME
1506     @@ -1189,8 +1179,7 @@
1507     zeroentry do_int3
1508     /* jmp paranoid_exit1
1509     CFI_ENDPROC */
1510     -END(int3)
1511     - .previous .text
1512     +KPROBE_END(int3)
1513    
1514     ENTRY(overflow)
1515     zeroentry do_overflow
1516     @@ -1241,8 +1230,7 @@
1517    
1518     KPROBE_ENTRY(general_protection)
1519     errorentry do_general_protection
1520     -END(general_protection)
1521     - .previous .text
1522     +KPROBE_END(general_protection)
1523    
1524     ENTRY(alignment_check)
1525     errorentry do_alignment_check
1526     --- a/arch/x86/kernel/genapic_xen_64.c
1527     +++ b/arch/x86/kernel/genapic_xen_64.c
1528     @@ -71,6 +71,13 @@
1529     return cpu_online_map;
1530     }
1531    
1532     +static cpumask_t xen_vector_allocation_domain(int cpu)
1533     +{
1534     + cpumask_t domain = CPU_MASK_NONE;
1535     + cpu_set(cpu, domain);
1536     + return domain;
1537     +}
1538     +
1539     /*
1540     * Set up the logical destination ID.
1541     * Do nothing, not called now.
1542     @@ -147,8 +154,8 @@
1543     .int_delivery_mode = dest_LowestPrio,
1544     #endif
1545     .int_dest_mode = (APIC_DEST_LOGICAL != 0),
1546     - .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST,
1547     .target_cpus = xen_target_cpus,
1548     + .vector_allocation_domain = xen_vector_allocation_domain,
1549     #ifdef CONFIG_XEN_PRIVILEGED_GUEST
1550     .apic_id_registered = xen_apic_id_registered,
1551     #endif
1552     --- a/arch/x86/kernel/head64-xen.c
1553     +++ b/arch/x86/kernel/head64-xen.c
1554     @@ -54,11 +54,9 @@
1555     new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
1556     if (!new_data) {
1557     if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
1558     - printk("so old bootloader that it does not support commandline?!\n");
1559     return;
1560     }
1561     new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
1562     - printk("old bootloader convention, maybe loadlin?\n");
1563     }
1564     command_line = (char *) ((u64)(new_data));
1565     memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
1566     @@ -70,25 +68,6 @@
1567     memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
1568     saved_command_line[max_cmdline-1] = '\0';
1569     #endif
1570     - printk("Bootdata ok (command line is %s)\n", saved_command_line);
1571     -}
1572     -
1573     -static void __init setup_boot_cpu_data(void)
1574     -{
1575     - unsigned int dummy, eax;
1576     -
1577     - /* get vendor info */
1578     - cpuid(0, (unsigned int *)&boot_cpu_data.cpuid_level,
1579     - (unsigned int *)&boot_cpu_data.x86_vendor_id[0],
1580     - (unsigned int *)&boot_cpu_data.x86_vendor_id[8],
1581     - (unsigned int *)&boot_cpu_data.x86_vendor_id[4]);
1582     -
1583     - /* get cpu type */
1584     - cpuid(1, &eax, &dummy, &dummy,
1585     - (unsigned int *) &boot_cpu_data.x86_capability);
1586     - boot_cpu_data.x86 = (eax >> 8) & 0xf;
1587     - boot_cpu_data.x86_model = (eax >> 4) & 0xf;
1588     - boot_cpu_data.x86_mask = eax & 0xf;
1589     }
1590    
1591     #include <xen/interface/memory.h>
1592     @@ -101,7 +80,6 @@
1593     {
1594     struct xen_machphys_mapping mapping;
1595     unsigned long machine_to_phys_nr_ents;
1596     - char *s;
1597     int i;
1598    
1599     setup_xen_features();
1600     @@ -128,10 +106,7 @@
1601     asm volatile("lidt %0" :: "m" (idt_descr));
1602     #endif
1603    
1604     - /*
1605     - * This must be called really, really early:
1606     - */
1607     - lockdep_init();
1608     + early_printk("Kernel alive\n");
1609    
1610     for (i = 0; i < NR_CPUS; i++)
1611     cpu_pda(i) = &boot_cpu_pda[i];
1612     @@ -141,22 +116,5 @@
1613     #ifdef CONFIG_SMP
1614     cpu_set(0, cpu_online_map);
1615     #endif
1616     - s = strstr(saved_command_line, "earlyprintk=");
1617     - if (s != NULL)
1618     - setup_early_printk(strchr(s, '=') + 1);
1619     -#ifdef CONFIG_NUMA
1620     - s = strstr(saved_command_line, "numa=");
1621     - if (s != NULL)
1622     - numa_setup(s+5);
1623     -#endif
1624     -#ifdef CONFIG_X86_IO_APIC
1625     - if (strstr(saved_command_line, "disableapic"))
1626     - disable_apic = 1;
1627     -#endif
1628     - /* You need early console to see that */
1629     - if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE)
1630     - panic("Kernel too big for kernel mapping\n");
1631     -
1632     - setup_boot_cpu_data();
1633     start_kernel();
1634     }
1635     --- a/arch/x86/kernel/head_32-xen.S
1636     +++ b/arch/x86/kernel/head_32-xen.S
1637     @@ -62,7 +62,7 @@
1638     movl %eax,%gs
1639     cld # gcc2 wants the direction flag cleared at all times
1640    
1641     - pushl %eax # fake return address
1642     + pushl $0 # fake return address for unwinder
1643     jmp start_kernel
1644    
1645     #define HYPERCALL_PAGE_OFFSET 0x1000
1646     --- a/arch/x86/kernel/head_64-xen.S
1647     +++ b/arch/x86/kernel/head_64-xen.S
1648     @@ -5,9 +5,6 @@
1649     * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
1650     * Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
1651     * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
1652     - *
1653     - * $Id: 1020-2.6.25-xen-patch-2.6.19.patch,v 1.1 2008-05-23 17:35:36 niro Exp $
1654     - *
1655     * Jun Nakajima <jun.nakajima@intel.com>
1656     * Modified for Xen
1657     */
1658     @@ -149,7 +146,7 @@
1659     .quad 0,0 /* TSS */
1660     .quad 0,0 /* LDT */
1661     .quad 0,0,0 /* three TLS descriptors */
1662     - .quad 0 /* unused */
1663     + .quad 0x0000f40000000000 /* node/CPU stored in limit */
1664     gdt_end:
1665     /* asm/segment.h:GDT_ENTRIES must match this */
1666     /* This should be a multiple of the cache line size */
1667     --- a/arch/x86/kernel/io_apic_32-xen.c
1668     +++ b/arch/x86/kernel/io_apic_32-xen.c
1669     @@ -31,6 +31,9 @@
1670     #include <linux/acpi.h>
1671     #include <linux/module.h>
1672     #include <linux/sysdev.h>
1673     +#include <linux/pci.h>
1674     +#include <linux/msi.h>
1675     +#include <linux/htirq.h>
1676    
1677     #include <asm/io.h>
1678     #include <asm/smp.h>
1679     @@ -38,13 +41,15 @@
1680     #include <asm/timer.h>
1681     #include <asm/i8259.h>
1682     #include <asm/nmi.h>
1683     +#include <asm/msidef.h>
1684     +#include <asm/hypertransport.h>
1685    
1686     #include <mach_apic.h>
1687     +#include <mach_apicdef.h>
1688    
1689     #include "io_ports.h"
1690    
1691     #ifdef CONFIG_XEN
1692     -
1693     #include <xen/interface/xen.h>
1694     #include <xen/interface/physdev.h>
1695    
1696     @@ -55,32 +60,7 @@
1697    
1698     unsigned long io_apic_irqs;
1699    
1700     -static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
1701     -{
1702     - struct physdev_apic apic_op;
1703     - int ret;
1704     -
1705     - apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
1706     - apic_op.reg = reg;
1707     - ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
1708     - if (ret)
1709     - return ret;
1710     - return apic_op.value;
1711     -}
1712     -
1713     -static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
1714     -{
1715     - struct physdev_apic apic_op;
1716     -
1717     - apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
1718     - apic_op.reg = reg;
1719     - apic_op.value = value;
1720     - WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
1721     -}
1722     -
1723     -#define io_apic_read(a,r) xen_io_apic_read(a,r)
1724     -#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
1725     -
1726     +#define clear_IO_APIC() ((void)0)
1727     #endif /* CONFIG_XEN */
1728    
1729     int (*ioapic_renumber_irq)(int ioapic, int irq);
1730     @@ -105,7 +85,7 @@
1731     */
1732     int nr_ioapic_registers[MAX_IO_APICS];
1733    
1734     -int disable_timer_pin_1 __initdata;
1735     +static int disable_timer_pin_1 __initdata;
1736    
1737     /*
1738     * Rough estimation of how many shared IRQs there are, can
1739     @@ -125,12 +105,122 @@
1740     int apic, pin, next;
1741     } irq_2_pin[PIN_MAP_SIZE];
1742    
1743     -int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
1744     -#ifdef CONFIG_PCI_MSI
1745     -#define vector_to_irq(vector) \
1746     - (platform_legacy_irq(vector) ? vector : vector_irq[vector])
1747     +#ifndef CONFIG_XEN
1748     +struct io_apic {
1749     + unsigned int index;
1750     + unsigned int unused[3];
1751     + unsigned int data;
1752     +};
1753     +
1754     +static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
1755     +{
1756     + return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
1757     + + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
1758     +}
1759     +#endif
1760     +
1761     +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
1762     +{
1763     +#ifndef CONFIG_XEN
1764     + struct io_apic __iomem *io_apic = io_apic_base(apic);
1765     + writel(reg, &io_apic->index);
1766     + return readl(&io_apic->data);
1767     +#else
1768     + struct physdev_apic apic_op;
1769     + int ret;
1770     +
1771     + apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
1772     + apic_op.reg = reg;
1773     + ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
1774     + if (ret)
1775     + return ret;
1776     + return apic_op.value;
1777     +#endif
1778     +}
1779     +
1780     +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
1781     +{
1782     +#ifndef CONFIG_XEN
1783     + struct io_apic __iomem *io_apic = io_apic_base(apic);
1784     + writel(reg, &io_apic->index);
1785     + writel(value, &io_apic->data);
1786     +#else
1787     + struct physdev_apic apic_op;
1788     +
1789     + apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
1790     + apic_op.reg = reg;
1791     + apic_op.value = value;
1792     + WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
1793     +#endif
1794     +}
1795     +
1796     +#ifndef CONFIG_XEN
1797     +/*
1798     + * Re-write a value: to be used for read-modify-write
1799     + * cycles where the read already set up the index register.
1800     + *
1801     + * Older SiS APIC requires we rewrite the index register
1802     + */
1803     +static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
1804     +{
1805     + volatile struct io_apic *io_apic = io_apic_base(apic);
1806     + if (sis_apic_bug)
1807     + writel(reg, &io_apic->index);
1808     + writel(value, &io_apic->data);
1809     +}
1810     #else
1811     -#define vector_to_irq(vector) (vector)
1812     +#define io_apic_modify io_apic_write
1813     +#endif
1814     +
1815     +union entry_union {
1816     + struct { u32 w1, w2; };
1817     + struct IO_APIC_route_entry entry;
1818     +};
1819     +
1820     +static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
1821     +{
1822     + union entry_union eu;
1823     + unsigned long flags;
1824     + spin_lock_irqsave(&ioapic_lock, flags);
1825     + eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
1826     + eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
1827     + spin_unlock_irqrestore(&ioapic_lock, flags);
1828     + return eu.entry;
1829     +}
1830     +
1831     +/*
1832     + * When we write a new IO APIC routing entry, we need to write the high
1833     + * word first! If the mask bit in the low word is clear, we will enable
1834     + * the interrupt, and we need to make sure the entry is fully populated
1835     + * before that happens.
1836     + */
1837     +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
1838     +{
1839     + unsigned long flags;
1840     + union entry_union eu;
1841     + eu.entry = e;
1842     + spin_lock_irqsave(&ioapic_lock, flags);
1843     + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
1844     + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
1845     + spin_unlock_irqrestore(&ioapic_lock, flags);
1846     +}
1847     +
1848     +#ifndef CONFIG_XEN
1849     +/*
1850     + * When we mask an IO APIC routing entry, we need to write the low
1851     + * word first, in order to set the mask bit before we change the
1852     + * high bits!
1853     + */
1854     +static void ioapic_mask_entry(int apic, int pin)
1855     +{
1856     + unsigned long flags;
1857     + union entry_union eu = { .entry.mask = 1 };
1858     +
1859     + spin_lock_irqsave(&ioapic_lock, flags);
1860     + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
1861     + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
1862     + spin_unlock_irqrestore(&ioapic_lock, flags);
1863     +}
1864     #endif
1865    
1866     /*
1867     @@ -156,9 +246,7 @@
1868     entry->pin = pin;
1869     }
1870    
1871     -#ifdef CONFIG_XEN
1872     -#define clear_IO_APIC() ((void)0)
1873     -#else
1874     +#ifndef CONFIG_XEN
1875     /*
1876     * Reroute an IRQ to a different pin.
1877     */
1878     @@ -243,25 +331,16 @@
1879     static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
1880     {
1881     struct IO_APIC_route_entry entry;
1882     - unsigned long flags;
1883    
1884     /* Check delivery_mode to be sure we're not clearing an SMI pin */
1885     - spin_lock_irqsave(&ioapic_lock, flags);
1886     - *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
1887     - *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
1888     - spin_unlock_irqrestore(&ioapic_lock, flags);
1889     + entry = ioapic_read_entry(apic, pin);
1890     if (entry.delivery_mode == dest_SMI)
1891     return;
1892    
1893     /*
1894     * Disable it in the IO-APIC irq-routing table:
1895     */
1896     - memset(&entry, 0, sizeof(entry));
1897     - entry.mask = 1;
1898     - spin_lock_irqsave(&ioapic_lock, flags);
1899     - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
1900     - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
1901     - spin_unlock_irqrestore(&ioapic_lock, flags);
1902     + ioapic_mask_entry(apic, pin);
1903     }
1904    
1905     static void clear_IO_APIC (void)
1906     @@ -301,7 +380,7 @@
1907     break;
1908     entry = irq_2_pin + entry->next;
1909     }
1910     - set_irq_info(irq, cpumask);
1911     + set_native_irq_info(irq, cpumask);
1912     spin_unlock_irqrestore(&ioapic_lock, flags);
1913     }
1914    
1915     @@ -1207,40 +1286,40 @@
1916     /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
1917     u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
1918    
1919     -int assign_irq_vector(int irq)
1920     +static int __assign_irq_vector(int irq)
1921     {
1922     - unsigned long flags;
1923     int vector;
1924     struct physdev_irq irq_op;
1925    
1926     - BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
1927     -
1928     - spin_lock_irqsave(&vector_lock, flags);
1929     + BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
1930    
1931     - if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
1932     - spin_unlock_irqrestore(&vector_lock, flags);
1933     - return IO_APIC_VECTOR(irq);
1934     - }
1935     + if (irq_vector[irq] > 0)
1936     + return irq_vector[irq];
1937    
1938     irq_op.irq = irq;
1939     - if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
1940     - spin_unlock_irqrestore(&vector_lock, flags);
1941     + if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
1942     return -ENOSPC;
1943     - }
1944    
1945     vector = irq_op.vector;
1946     - vector_irq[vector] = irq;
1947     - if (irq != AUTO_ASSIGN)
1948     - IO_APIC_VECTOR(irq) = vector;
1949     + irq_vector[irq] = vector;
1950     +
1951     + return vector;
1952     +}
1953    
1954     +static int assign_irq_vector(int irq)
1955     +{
1956     + unsigned long flags;
1957     + int vector;
1958     +
1959     + spin_lock_irqsave(&vector_lock, flags);
1960     + vector = __assign_irq_vector(irq);
1961     spin_unlock_irqrestore(&vector_lock, flags);
1962    
1963     return vector;
1964     }
1965    
1966     #ifndef CONFIG_XEN
1967     -static struct hw_interrupt_type ioapic_level_type;
1968     -static struct hw_interrupt_type ioapic_edge_type;
1969     +static struct irq_chip ioapic_chip;
1970    
1971     #define IOAPIC_AUTO -1
1972     #define IOAPIC_EDGE 0
1973     @@ -1248,16 +1327,16 @@
1974    
1975     static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
1976     {
1977     - unsigned idx;
1978     -
1979     - idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
1980     -
1981     if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1982     trigger == IOAPIC_LEVEL)
1983     - irq_desc[idx].chip = &ioapic_level_type;
1984     - else
1985     - irq_desc[idx].chip = &ioapic_edge_type;
1986     - set_intr_gate(vector, interrupt[idx]);
1987     + set_irq_chip_and_handler_name(irq, &ioapic_chip,
1988     + handle_fasteoi_irq, "fasteoi");
1989     + else {
1990     + irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
1991     + set_irq_chip_and_handler_name(irq, &ioapic_chip,
1992     + handle_edge_irq, "edge");
1993     + }
1994     + set_intr_gate(vector, interrupt[irq]);
1995     }
1996     #else
1997     #define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
1998     @@ -1328,9 +1407,8 @@
1999     if (!apic && (irq < 16))
2000     disable_8259A_irq(irq);
2001     }
2002     + ioapic_write_entry(apic, pin, entry);
2003     spin_lock_irqsave(&ioapic_lock, flags);
2004     - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
2005     - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
2006     set_native_irq_info(irq, TARGET_CPUS);
2007     spin_unlock_irqrestore(&ioapic_lock, flags);
2008     }
2009     @@ -1347,7 +1425,6 @@
2010     static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
2011     {
2012     struct IO_APIC_route_entry entry;
2013     - unsigned long flags;
2014    
2015     memset(&entry,0,sizeof(entry));
2016    
2017     @@ -1372,15 +1449,13 @@
2018     * The timer IRQ doesn't have to know that behind the
2019     * scene we have a 8259A-master in AEOI mode ...
2020     */
2021     - irq_desc[0].chip = &ioapic_edge_type;
2022     + irq_desc[0].chip = &ioapic_chip;
2023     + set_irq_handler(0, handle_edge_irq);
2024    
2025     /*
2026     * Add it to the IO-APIC irq-routing table:
2027     */
2028     - spin_lock_irqsave(&ioapic_lock, flags);
2029     - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
2030     - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
2031     - spin_unlock_irqrestore(&ioapic_lock, flags);
2032     + ioapic_write_entry(apic, pin, entry);
2033    
2034     enable_8259A_irq(0);
2035     }
2036     @@ -1490,10 +1565,7 @@
2037     for (i = 0; i <= reg_01.bits.entries; i++) {
2038     struct IO_APIC_route_entry entry;
2039    
2040     - spin_lock_irqsave(&ioapic_lock, flags);
2041     - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
2042     - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
2043     - spin_unlock_irqrestore(&ioapic_lock, flags);
2044     + entry = ioapic_read_entry(apic, i);
2045    
2046     printk(KERN_DEBUG " %02x %03X %02X ",
2047     i,
2048     @@ -1513,17 +1585,12 @@
2049     );
2050     }
2051     }
2052     - if (use_pci_vector())
2053     - printk(KERN_INFO "Using vector-based indexing\n");
2054     printk(KERN_DEBUG "IRQ to pin mappings:\n");
2055     for (i = 0; i < NR_IRQS; i++) {
2056     struct irq_pin_list *entry = irq_2_pin + i;
2057     if (entry->pin < 0)
2058     continue;
2059     - if (use_pci_vector() && !platform_legacy_irq(i))
2060     - printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
2061     - else
2062     - printk(KERN_DEBUG "IRQ%d ", i);
2063     + printk(KERN_DEBUG "IRQ%d ", i);
2064     for (;;) {
2065     printk("-> %d:%d", entry->apic, entry->pin);
2066     if (!entry->next)
2067     @@ -1709,10 +1776,7 @@
2068     /* See if any of the pins is in ExtINT mode */
2069     for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
2070     struct IO_APIC_route_entry entry;
2071     - spin_lock_irqsave(&ioapic_lock, flags);
2072     - *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
2073     - *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
2074     - spin_unlock_irqrestore(&ioapic_lock, flags);
2075     + entry = ioapic_read_entry(apic, pin);
2076    
2077    
2078     /* If the interrupt line is enabled and in ExtInt mode
2079     @@ -1770,7 +1834,6 @@
2080     */
2081     if (ioapic_i8259.pin != -1) {
2082     struct IO_APIC_route_entry entry;
2083     - unsigned long flags;
2084    
2085     memset(&entry, 0, sizeof(entry));
2086     entry.mask = 0; /* Enabled */
2087     @@ -1787,12 +1850,7 @@
2088     /*
2089     * Add it to the IO-APIC irq-routing table:
2090     */
2091     - spin_lock_irqsave(&ioapic_lock, flags);
2092     - io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
2093     - *(((int *)&entry)+1));
2094     - io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
2095     - *(((int *)&entry)+0));
2096     - spin_unlock_irqrestore(&ioapic_lock, flags);
2097     + ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
2098     }
2099     disconnect_bsp_APIC(ioapic_i8259.pin != -1);
2100     #endif
2101     @@ -1959,6 +2017,8 @@
2102     */
2103    
2104     /*
2105     + * Startup quirk:
2106     + *
2107     * Starting up a edge-triggered IO-APIC interrupt is
2108     * nasty - we need to make sure that we get the edge.
2109     * If it is already asserted for some reason, we need
2110     @@ -1966,8 +2026,10 @@
2111     *
2112     * This is not complete - we should be able to fake
2113     * an edge even if it isn't on the 8259A...
2114     + *
2115     + * (We do this for level-triggered IRQs too - it cannot hurt.)
2116     */
2117     -static unsigned int startup_edge_ioapic_irq(unsigned int irq)
2118     +static unsigned int startup_ioapic_irq(unsigned int irq)
2119     {
2120     int was_pending = 0;
2121     unsigned long flags;
2122     @@ -1984,47 +2046,18 @@
2123     return was_pending;
2124     }
2125    
2126     -/*
2127     - * Once we have recorded IRQ_PENDING already, we can mask the
2128     - * interrupt for real. This prevents IRQ storms from unhandled
2129     - * devices.
2130     - */
2131     -static void ack_edge_ioapic_irq(unsigned int irq)
2132     -{
2133     - move_irq(irq);
2134     - if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
2135     - == (IRQ_PENDING | IRQ_DISABLED))
2136     - mask_IO_APIC_irq(irq);
2137     - ack_APIC_irq();
2138     -}
2139     -
2140     -/*
2141     - * Level triggered interrupts can just be masked,
2142     - * and shutting down and starting up the interrupt
2143     - * is the same as enabling and disabling them -- except
2144     - * with a startup need to return a "was pending" value.
2145     - *
2146     - * Level triggered interrupts are special because we
2147     - * do not touch any IO-APIC register while handling
2148     - * them. We ack the APIC in the end-IRQ handler, not
2149     - * in the start-IRQ-handler. Protection against reentrance
2150     - * from the same interrupt is still provided, both by the
2151     - * generic IRQ layer and by the fact that an unacked local
2152     - * APIC does not accept IRQs.
2153     - */
2154     -static unsigned int startup_level_ioapic_irq (unsigned int irq)
2155     +static void ack_ioapic_irq(unsigned int irq)
2156     {
2157     - unmask_IO_APIC_irq(irq);
2158     -
2159     - return 0; /* don't check for pending */
2160     + move_native_irq(irq);
2161     + ack_APIC_irq();
2162     }
2163    
2164     -static void end_level_ioapic_irq (unsigned int irq)
2165     +static void ack_ioapic_quirk_irq(unsigned int irq)
2166     {
2167     unsigned long v;
2168     int i;
2169    
2170     - move_irq(irq);
2171     + move_native_irq(irq);
2172     /*
2173     * It appears there is an erratum which affects at least version 0x11
2174     * of I/O APIC (that's the 82093AA and cores integrated into various
2175     @@ -2044,7 +2077,7 @@
2176     * operation to prevent an edge-triggered interrupt escaping meanwhile.
2177     * The idea is from Manfred Spraul. --macro
2178     */
2179     - i = IO_APIC_VECTOR(irq);
2180     + i = irq_vector[irq];
2181    
2182     v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2183    
2184     @@ -2059,104 +2092,24 @@
2185     }
2186     }
2187    
2188     -#ifdef CONFIG_PCI_MSI
2189     -static unsigned int startup_edge_ioapic_vector(unsigned int vector)
2190     -{
2191     - int irq = vector_to_irq(vector);
2192     -
2193     - return startup_edge_ioapic_irq(irq);
2194     -}
2195     -
2196     -static void ack_edge_ioapic_vector(unsigned int vector)
2197     -{
2198     - int irq = vector_to_irq(vector);
2199     -
2200     - move_native_irq(vector);
2201     - ack_edge_ioapic_irq(irq);
2202     -}
2203     -
2204     -static unsigned int startup_level_ioapic_vector (unsigned int vector)
2205     -{
2206     - int irq = vector_to_irq(vector);
2207     -
2208     - return startup_level_ioapic_irq (irq);
2209     -}
2210     -
2211     -static void end_level_ioapic_vector (unsigned int vector)
2212     -{
2213     - int irq = vector_to_irq(vector);
2214     -
2215     - move_native_irq(vector);
2216     - end_level_ioapic_irq(irq);
2217     -}
2218     -
2219     -static void mask_IO_APIC_vector (unsigned int vector)
2220     -{
2221     - int irq = vector_to_irq(vector);
2222     -
2223     - mask_IO_APIC_irq(irq);
2224     -}
2225     -
2226     -static void unmask_IO_APIC_vector (unsigned int vector)
2227     -{
2228     - int irq = vector_to_irq(vector);
2229     -
2230     - unmask_IO_APIC_irq(irq);
2231     -}
2232     -
2233     -#ifdef CONFIG_SMP
2234     -static void set_ioapic_affinity_vector (unsigned int vector,
2235     - cpumask_t cpu_mask)
2236     -{
2237     - int irq = vector_to_irq(vector);
2238     -
2239     - set_native_irq_info(vector, cpu_mask);
2240     - set_ioapic_affinity_irq(irq, cpu_mask);
2241     -}
2242     -#endif
2243     -#endif
2244     -
2245     -static int ioapic_retrigger(unsigned int irq)
2246     +static int ioapic_retrigger_irq(unsigned int irq)
2247     {
2248     - send_IPI_self(IO_APIC_VECTOR(irq));
2249     + send_IPI_self(irq_vector[irq]);
2250    
2251     return 1;
2252     }
2253    
2254     -/*
2255     - * Level and edge triggered IO-APIC interrupts need different handling,
2256     - * so we use two separate IRQ descriptors. Edge triggered IRQs can be
2257     - * handled with the level-triggered descriptor, but that one has slightly
2258     - * more overhead. Level-triggered interrupts cannot be handled with the
2259     - * edge-triggered handler, without risking IRQ storms and other ugly
2260     - * races.
2261     - */
2262     -static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
2263     - .typename = "IO-APIC-edge",
2264     - .startup = startup_edge_ioapic,
2265     - .shutdown = shutdown_edge_ioapic,
2266     - .enable = enable_edge_ioapic,
2267     - .disable = disable_edge_ioapic,
2268     - .ack = ack_edge_ioapic,
2269     - .end = end_edge_ioapic,
2270     -#ifdef CONFIG_SMP
2271     - .set_affinity = set_ioapic_affinity,
2272     -#endif
2273     - .retrigger = ioapic_retrigger,
2274     -};
2275     -
2276     -static struct hw_interrupt_type ioapic_level_type __read_mostly = {
2277     - .typename = "IO-APIC-level",
2278     - .startup = startup_level_ioapic,
2279     - .shutdown = shutdown_level_ioapic,
2280     - .enable = enable_level_ioapic,
2281     - .disable = disable_level_ioapic,
2282     - .ack = mask_and_ack_level_ioapic,
2283     - .end = end_level_ioapic,
2284     +static struct irq_chip ioapic_chip __read_mostly = {
2285     + .name = "IO-APIC",
2286     + .startup = startup_ioapic_irq,
2287     + .mask = mask_IO_APIC_irq,
2288     + .unmask = unmask_IO_APIC_irq,
2289     + .ack = ack_ioapic_irq,
2290     + .eoi = ack_ioapic_quirk_irq,
2291     #ifdef CONFIG_SMP
2292     - .set_affinity = set_ioapic_affinity,
2293     + .set_affinity = set_ioapic_affinity_irq,
2294     #endif
2295     - .retrigger = ioapic_retrigger,
2296     + .retrigger = ioapic_retrigger_irq,
2297     };
2298     #endif /* !CONFIG_XEN */
2299    
2300     @@ -2177,12 +2130,7 @@
2301     */
2302     for (irq = 0; irq < NR_IRQS ; irq++) {
2303     int tmp = irq;
2304     - if (use_pci_vector()) {
2305     - if (!platform_legacy_irq(tmp))
2306     - if ((tmp = vector_to_irq(tmp)) == -1)
2307     - continue;
2308     - }
2309     - if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
2310     + if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
2311     /*
2312     * Hmm.. We don't have an entry for this,
2313     * so default to an old-fashioned 8259
2314     @@ -2193,22 +2141,23 @@
2315     #ifndef CONFIG_XEN
2316     else
2317     /* Strange. Oh, well.. */
2318     - irq_desc[irq].chip = &no_irq_type;
2319     + irq_desc[irq].chip = &no_irq_chip;
2320     #endif
2321     }
2322     }
2323     }
2324    
2325     #ifndef CONFIG_XEN
2326     -static void enable_lapic_irq (unsigned int irq)
2327     -{
2328     - unsigned long v;
2329     +/*
2330     + * The local APIC irq-chip implementation:
2331     + */
2332    
2333     - v = apic_read(APIC_LVT0);
2334     - apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
2335     +static void ack_apic(unsigned int irq)
2336     +{
2337     + ack_APIC_irq();
2338     }
2339    
2340     -static void disable_lapic_irq (unsigned int irq)
2341     +static void mask_lapic_irq (unsigned int irq)
2342     {
2343     unsigned long v;
2344    
2345     @@ -2216,21 +2165,19 @@
2346     apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
2347     }
2348    
2349     -static void ack_lapic_irq (unsigned int irq)
2350     +static void unmask_lapic_irq (unsigned int irq)
2351     {
2352     - ack_APIC_irq();
2353     -}
2354     + unsigned long v;
2355    
2356     -static void end_lapic_irq (unsigned int i) { /* nothing */ }
2357     + v = apic_read(APIC_LVT0);
2358     + apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
2359     +}
2360    
2361     -static struct hw_interrupt_type lapic_irq_type __read_mostly = {
2362     - .typename = "local-APIC-edge",
2363     - .startup = NULL, /* startup_irq() not used for IRQ0 */
2364     - .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
2365     - .enable = enable_lapic_irq,
2366     - .disable = disable_lapic_irq,
2367     - .ack = ack_lapic_irq,
2368     - .end = end_lapic_irq
2369     +static struct irq_chip lapic_chip __read_mostly = {
2370     + .name = "local-APIC-edge",
2371     + .mask = mask_lapic_irq,
2372     + .unmask = unmask_lapic_irq,
2373     + .eoi = ack_apic,
2374     };
2375    
2376     static void setup_nmi (void)
2377     @@ -2263,17 +2210,13 @@
2378     int apic, pin, i;
2379     struct IO_APIC_route_entry entry0, entry1;
2380     unsigned char save_control, save_freq_select;
2381     - unsigned long flags;
2382    
2383     pin = find_isa_irq_pin(8, mp_INT);
2384     apic = find_isa_irq_apic(8, mp_INT);
2385     if (pin == -1)
2386     return;
2387    
2388     - spin_lock_irqsave(&ioapic_lock, flags);
2389     - *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
2390     - *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
2391     - spin_unlock_irqrestore(&ioapic_lock, flags);
2392     + entry0 = ioapic_read_entry(apic, pin);
2393     clear_IO_APIC_pin(apic, pin);
2394    
2395     memset(&entry1, 0, sizeof(entry1));
2396     @@ -2286,10 +2229,7 @@
2397     entry1.trigger = 0;
2398     entry1.vector = 0;
2399    
2400     - spin_lock_irqsave(&ioapic_lock, flags);
2401     - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
2402     - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
2403     - spin_unlock_irqrestore(&ioapic_lock, flags);
2404     + ioapic_write_entry(apic, pin, entry1);
2405    
2406     save_control = CMOS_READ(RTC_CONTROL);
2407     save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
2408     @@ -2308,10 +2248,7 @@
2409     CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
2410     clear_IO_APIC_pin(apic, pin);
2411    
2412     - spin_lock_irqsave(&ioapic_lock, flags);
2413     - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
2414     - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
2415     - spin_unlock_irqrestore(&ioapic_lock, flags);
2416     + ioapic_write_entry(apic, pin, entry0);
2417     }
2418    
2419     int timer_uses_ioapic_pin_0;
2420     @@ -2411,7 +2348,8 @@
2421     printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
2422    
2423     disable_8259A_irq(0);
2424     - irq_desc[0].chip = &lapic_irq_type;
2425     + set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
2426     + "fasteio");
2427     apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
2428     enable_8259A_irq(0);
2429    
2430     @@ -2523,17 +2461,12 @@
2431     {
2432     struct IO_APIC_route_entry *entry;
2433     struct sysfs_ioapic_data *data;
2434     - unsigned long flags;
2435     int i;
2436    
2437     data = container_of(dev, struct sysfs_ioapic_data, dev);
2438     entry = data->entry;
2439     - spin_lock_irqsave(&ioapic_lock, flags);
2440     - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
2441     - *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
2442     - *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
2443     - }
2444     - spin_unlock_irqrestore(&ioapic_lock, flags);
2445     + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
2446     + entry[i] = ioapic_read_entry(dev->id, i);
2447    
2448     return 0;
2449     }
2450     @@ -2555,11 +2488,9 @@
2451     reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
2452     io_apic_write(dev->id, 0, reg_00.raw);
2453     }
2454     - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
2455     - io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
2456     - io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
2457     - }
2458     spin_unlock_irqrestore(&ioapic_lock, flags);
2459     + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
2460     + ioapic_write_entry(dev->id, i, entry[i]);
2461    
2462     return 0;
2463     }
2464     @@ -2605,6 +2536,240 @@
2465    
2466     device_initcall(ioapic_init_sysfs);
2467    
2468     +#ifndef CONFIG_XEN
2469     +/*
2470     + * Dynamic irq allocate and deallocation
2471     + */
2472     +int create_irq(void)
2473     +{
2474     + /* Allocate an unused irq */
2475     + int irq, new, vector;
2476     + unsigned long flags;
2477     +
2478     + irq = -ENOSPC;
2479     + spin_lock_irqsave(&vector_lock, flags);
2480     + for (new = (NR_IRQS - 1); new >= 0; new--) {
2481     + if (platform_legacy_irq(new))
2482     + continue;
2483     + if (irq_vector[new] != 0)
2484     + continue;
2485     + vector = __assign_irq_vector(new);
2486     + if (likely(vector > 0))
2487     + irq = new;
2488     + break;
2489     + }
2490     + spin_unlock_irqrestore(&vector_lock, flags);
2491     +
2492     + if (irq >= 0) {
2493     + set_intr_gate(vector, interrupt[irq]);
2494     + dynamic_irq_init(irq);
2495     + }
2496     + return irq;
2497     +}
2498     +
2499     +void destroy_irq(unsigned int irq)
2500     +{
2501     + unsigned long flags;
2502     +
2503     + dynamic_irq_cleanup(irq);
2504     +
2505     + spin_lock_irqsave(&vector_lock, flags);
2506     + irq_vector[irq] = 0;
2507     + spin_unlock_irqrestore(&vector_lock, flags);
2508     +}
2509     +#endif
2510     +
2511     +/*
2512     + * MSI mesage composition
2513     + */
2514     +#ifdef CONFIG_PCI_MSI
2515     +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
2516     +{
2517     + int vector;
2518     + unsigned dest;
2519     +
2520     + vector = assign_irq_vector(irq);
2521     + if (vector >= 0) {
2522     + dest = cpu_mask_to_apicid(TARGET_CPUS);
2523     +
2524     + msg->address_hi = MSI_ADDR_BASE_HI;
2525     + msg->address_lo =
2526     + MSI_ADDR_BASE_LO |
2527     + ((INT_DEST_MODE == 0) ?
2528     + MSI_ADDR_DEST_MODE_PHYSICAL:
2529     + MSI_ADDR_DEST_MODE_LOGICAL) |
2530     + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2531     + MSI_ADDR_REDIRECTION_CPU:
2532     + MSI_ADDR_REDIRECTION_LOWPRI) |
2533     + MSI_ADDR_DEST_ID(dest);
2534     +
2535     + msg->data =
2536     + MSI_DATA_TRIGGER_EDGE |
2537     + MSI_DATA_LEVEL_ASSERT |
2538     + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2539     + MSI_DATA_DELIVERY_FIXED:
2540     + MSI_DATA_DELIVERY_LOWPRI) |
2541     + MSI_DATA_VECTOR(vector);
2542     + }
2543     + return vector;
2544     +}
2545     +
2546     +#ifdef CONFIG_SMP
2547     +static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2548     +{
2549     + struct msi_msg msg;
2550     + unsigned int dest;
2551     + cpumask_t tmp;
2552     + int vector;
2553     +
2554     + cpus_and(tmp, mask, cpu_online_map);
2555     + if (cpus_empty(tmp))
2556     + tmp = TARGET_CPUS;
2557     +
2558     + vector = assign_irq_vector(irq);
2559     + if (vector < 0)
2560     + return;
2561     +
2562     + dest = cpu_mask_to_apicid(mask);
2563     +
2564     + read_msi_msg(irq, &msg);
2565     +
2566     + msg.data &= ~MSI_DATA_VECTOR_MASK;
2567     + msg.data |= MSI_DATA_VECTOR(vector);
2568     + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
2569     + msg.address_lo |= MSI_ADDR_DEST_ID(dest);
2570     +
2571     + write_msi_msg(irq, &msg);
2572     + set_native_irq_info(irq, mask);
2573     +}
2574     +#endif /* CONFIG_SMP */
2575     +
2576     +/*
2577     + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
2578     + * which implement the MSI or MSI-X Capability Structure.
2579     + */
2580     +static struct irq_chip msi_chip = {
2581     + .name = "PCI-MSI",
2582     + .unmask = unmask_msi_irq,
2583     + .mask = mask_msi_irq,
2584     + .ack = ack_ioapic_irq,
2585     +#ifdef CONFIG_SMP
2586     + .set_affinity = set_msi_irq_affinity,
2587     +#endif
2588     + .retrigger = ioapic_retrigger_irq,
2589     +};
2590     +
2591     +int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
2592     +{
2593     + struct msi_msg msg;
2594     + int ret;
2595     + ret = msi_compose_msg(dev, irq, &msg);
2596     + if (ret < 0)
2597     + return ret;
2598     +
2599     + write_msi_msg(irq, &msg);
2600     +
2601     + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
2602     + "edge");
2603     +
2604     + return 0;
2605     +}
2606     +
2607     +void arch_teardown_msi_irq(unsigned int irq)
2608     +{
2609     + return;
2610     +}
2611     +
2612     +#endif /* CONFIG_PCI_MSI */
2613     +
2614     +/*
2615     + * Hypertransport interrupt support
2616     + */
2617     +#ifdef CONFIG_HT_IRQ
2618     +
2619     +#ifdef CONFIG_SMP
2620     +
2621     +static void target_ht_irq(unsigned int irq, unsigned int dest)
2622     +{
2623     + struct ht_irq_msg msg;
2624     + fetch_ht_irq_msg(irq, &msg);
2625     +
2626     + msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
2627     + msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
2628     +
2629     + msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
2630     + msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
2631     +
2632     + write_ht_irq_msg(irq, &msg);
2633     +}
2634     +
2635     +static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
2636     +{
2637     + unsigned int dest;
2638     + cpumask_t tmp;
2639     +
2640     + cpus_and(tmp, mask, cpu_online_map);
2641     + if (cpus_empty(tmp))
2642     + tmp = TARGET_CPUS;
2643     +
2644     + cpus_and(mask, tmp, CPU_MASK_ALL);
2645     +
2646     + dest = cpu_mask_to_apicid(mask);
2647     +
2648     + target_ht_irq(irq, dest);
2649     + set_native_irq_info(irq, mask);
2650     +}
2651     +#endif
2652     +
2653     +static struct irq_chip ht_irq_chip = {
2654     + .name = "PCI-HT",
2655     + .mask = mask_ht_irq,
2656     + .unmask = unmask_ht_irq,
2657     + .ack = ack_ioapic_irq,
2658     +#ifdef CONFIG_SMP
2659     + .set_affinity = set_ht_irq_affinity,
2660     +#endif
2661     + .retrigger = ioapic_retrigger_irq,
2662     +};
2663     +
2664     +int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
2665     +{
2666     + int vector;
2667     +
2668     + vector = assign_irq_vector(irq);
2669     + if (vector >= 0) {
2670     + struct ht_irq_msg msg;
2671     + unsigned dest;
2672     + cpumask_t tmp;
2673     +
2674     + cpus_clear(tmp);
2675     + cpu_set(vector >> 8, tmp);
2676     + dest = cpu_mask_to_apicid(tmp);
2677     +
2678     + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
2679     +
2680     + msg.address_lo =
2681     + HT_IRQ_LOW_BASE |
2682     + HT_IRQ_LOW_DEST_ID(dest) |
2683     + HT_IRQ_LOW_VECTOR(vector) |
2684     + ((INT_DEST_MODE == 0) ?
2685     + HT_IRQ_LOW_DM_PHYSICAL :
2686     + HT_IRQ_LOW_DM_LOGICAL) |
2687     + HT_IRQ_LOW_RQEOI_EDGE |
2688     + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2689     + HT_IRQ_LOW_MT_FIXED :
2690     + HT_IRQ_LOW_MT_ARBITRATED) |
2691     + HT_IRQ_LOW_IRQ_MASKED;
2692     +
2693     + write_ht_irq_msg(irq, &msg);
2694     +
2695     + set_irq_chip_and_handler_name(irq, &ht_irq_chip,
2696     + handle_edge_irq, "edge");
2697     + }
2698     + return vector;
2699     +}
2700     +#endif /* CONFIG_HT_IRQ */
2701     +
2702     /* --------------------------------------------------------------------------
2703     ACPI-based IOAPIC Configuration
2704     -------------------------------------------------------------------------- */
2705     @@ -2758,13 +2923,34 @@
2706     if (!ioapic && (irq < 16))
2707     disable_8259A_irq(irq);
2708    
2709     + ioapic_write_entry(ioapic, pin, entry);
2710     spin_lock_irqsave(&ioapic_lock, flags);
2711     - io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
2712     - io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
2713     - set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
2714     + set_native_irq_info(irq, TARGET_CPUS);
2715     spin_unlock_irqrestore(&ioapic_lock, flags);
2716    
2717     return 0;
2718     }
2719    
2720     #endif /* CONFIG_ACPI */
2721     +
2722     +static int __init parse_disable_timer_pin_1(char *arg)
2723     +{
2724     + disable_timer_pin_1 = 1;
2725     + return 0;
2726     +}
2727     +early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
2728     +
2729     +static int __init parse_enable_timer_pin_1(char *arg)
2730     +{
2731     + disable_timer_pin_1 = -1;
2732     + return 0;
2733     +}
2734     +early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
2735     +
2736     +static int __init parse_noapic(char *arg)
2737     +{
2738     + /* disable IO-APIC */
2739     + disable_ioapic_setup();
2740     + return 0;
2741     +}
2742     +early_param("noapic", parse_noapic);
2743     --- a/arch/x86/kernel/io_apic_64-xen.c
2744     +++ b/arch/x86/kernel/io_apic_64-xen.c
2745     @@ -26,9 +26,12 @@
2746     #include <linux/delay.h>
2747     #include <linux/sched.h>
2748     #include <linux/smp_lock.h>
2749     +#include <linux/pci.h>
2750     #include <linux/mc146818rtc.h>
2751     #include <linux/acpi.h>
2752     #include <linux/sysdev.h>
2753     +#include <linux/msi.h>
2754     +#include <linux/htirq.h>
2755     #ifdef CONFIG_ACPI
2756     #include <acpi/acpi_bus.h>
2757     #endif
2758     @@ -41,6 +44,10 @@
2759     #include <asm/acpi.h>
2760     #include <asm/dma.h>
2761     #include <asm/nmi.h>
2762     +#include <asm/msidef.h>
2763     +#include <asm/hypertransport.h>
2764     +
2765     +static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result);
2766    
2767     #define __apicdebuginit __init
2768    
2769     @@ -48,17 +55,29 @@
2770    
2771     static int no_timer_check;
2772    
2773     -int disable_timer_pin_1 __initdata;
2774     +static int disable_timer_pin_1 __initdata;
2775    
2776     -#ifndef CONFIG_XEN
2777     -int timer_over_8254 __initdata = 0;
2778     +#ifdef CONFIG_XEN
2779     +#include <xen/interface/xen.h>
2780     +#include <xen/interface/physdev.h>
2781     +
2782     +/* Fake i8259 */
2783     +#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
2784     +#define disable_8259A_irq(_irq) ((void)0)
2785     +#define i8259A_irq_pending(_irq) (0)
2786     +
2787     +unsigned long io_apic_irqs;
2788     +
2789     +#define clear_IO_APIC() ((void)0)
2790     +#else
2791     +int timer_over_8254 __initdata = 1;
2792    
2793     /* Where if anywhere is the i8259 connect in external int mode */
2794     static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
2795     #endif
2796    
2797     static DEFINE_SPINLOCK(ioapic_lock);
2798     -static DEFINE_SPINLOCK(vector_lock);
2799     +DEFINE_SPINLOCK(vector_lock);
2800    
2801     /*
2802     * # of IRQ routing registers
2803     @@ -83,28 +102,27 @@
2804     short apic, pin, next;
2805     } irq_2_pin[PIN_MAP_SIZE];
2806    
2807     -int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
2808     -#ifdef CONFIG_PCI_MSI
2809     -#define vector_to_irq(vector) \
2810     - (platform_legacy_irq(vector) ? vector : vector_irq[vector])
2811     -#else
2812     -#define vector_to_irq(vector) (vector)
2813     -#endif
2814     -
2815     -#ifdef CONFIG_XEN
2816     -
2817     -#include <xen/interface/xen.h>
2818     -#include <xen/interface/physdev.h>
2819     -
2820     -/* Fake i8259 */
2821     -#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
2822     -#define disable_8259A_irq(_irq) ((void)0)
2823     -#define i8259A_irq_pending(_irq) (0)
2824     +#ifndef CONFIG_XEN
2825     +struct io_apic {
2826     + unsigned int index;
2827     + unsigned int unused[3];
2828     + unsigned int data;
2829     +};
2830    
2831     -unsigned long io_apic_irqs;
2832     +static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
2833     +{
2834     + return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
2835     + + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
2836     +}
2837     +#endif
2838    
2839     -static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
2840     +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
2841     {
2842     +#ifndef CONFIG_XEN
2843     + struct io_apic __iomem *io_apic = io_apic_base(apic);
2844     + writel(reg, &io_apic->index);
2845     + return readl(&io_apic->data);
2846     +#else
2847     struct physdev_apic apic_op;
2848     int ret;
2849    
2850     @@ -114,31 +132,131 @@
2851     if (ret)
2852     return ret;
2853     return apic_op.value;
2854     +#endif
2855     }
2856    
2857     -static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
2858     +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
2859     {
2860     +#ifndef CONFIG_XEN
2861     + struct io_apic __iomem *io_apic = io_apic_base(apic);
2862     + writel(reg, &io_apic->index);
2863     + writel(value, &io_apic->data);
2864     +#else
2865     struct physdev_apic apic_op;
2866    
2867     apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
2868     apic_op.reg = reg;
2869     apic_op.value = value;
2870     WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
2871     +#endif
2872     }
2873    
2874     -#define io_apic_read(a,r) xen_io_apic_read(a,r)
2875     -#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
2876     +#ifndef CONFIG_XEN
2877     +/*
2878     + * Re-write a value: to be used for read-modify-write
2879     + * cycles where the read already set up the index register.
2880     + */
2881     +static inline void io_apic_modify(unsigned int apic, unsigned int value)
2882     +{
2883     + struct io_apic __iomem *io_apic = io_apic_base(apic);
2884     + writel(value, &io_apic->data);
2885     +}
2886     +#else
2887     +#define io_apic_modify io_apic_write
2888     +#endif
2889    
2890     -#define clear_IO_APIC() ((void)0)
2891     +/*
2892     + * Synchronize the IO-APIC and the CPU by doing
2893     + * a dummy read from the IO-APIC
2894     + */
2895     +static inline void io_apic_sync(unsigned int apic)
2896     +{
2897     +#ifndef CONFIG_XEN
2898     + struct io_apic __iomem *io_apic = io_apic_base(apic);
2899     + readl(&io_apic->data);
2900     +#endif
2901     +}
2902    
2903     -#else
2904     +union entry_union {
2905     + struct { u32 w1, w2; };
2906     + struct IO_APIC_route_entry entry;
2907     +};
2908     +
2909     +static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
2910     +{
2911     + union entry_union eu;
2912     + unsigned long flags;
2913     + spin_lock_irqsave(&ioapic_lock, flags);
2914     + eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
2915     + eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
2916     + spin_unlock_irqrestore(&ioapic_lock, flags);
2917     + return eu.entry;
2918     +}
2919     +
2920     +/*
2921     + * When we write a new IO APIC routing entry, we need to write the high
2922     + * word first! If the mask bit in the low word is clear, we will enable
2923     + * the interrupt, and we need to make sure the entry is fully populated
2924     + * before that happens.
2925     + */
2926     +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
2927     +{
2928     + unsigned long flags;
2929     + union entry_union eu;
2930     + eu.entry = e;
2931     + spin_lock_irqsave(&ioapic_lock, flags);
2932     + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
2933     + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
2934     + spin_unlock_irqrestore(&ioapic_lock, flags);
2935     +}
2936     +
2937     +#ifndef CONFIG_XEN
2938     +/*
2939     + * When we mask an IO APIC routing entry, we need to write the low
2940     + * word first, in order to set the mask bit before we change the
2941     + * high bits!
2942     + */
2943     +static void ioapic_mask_entry(int apic, int pin)
2944     +{
2945     + unsigned long flags;
2946     + union entry_union eu = { .entry.mask = 1 };
2947     +
2948     + spin_lock_irqsave(&ioapic_lock, flags);
2949     + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
2950     + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
2951     + spin_unlock_irqrestore(&ioapic_lock, flags);
2952     +}
2953    
2954     #ifdef CONFIG_SMP
2955     +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
2956     +{
2957     + int apic, pin;
2958     + struct irq_pin_list *entry = irq_2_pin + irq;
2959     +
2960     + BUG_ON(irq >= NR_IRQS);
2961     + for (;;) {
2962     + unsigned int reg;
2963     + apic = entry->apic;
2964     + pin = entry->pin;
2965     + if (pin == -1)
2966     + break;
2967     + io_apic_write(apic, 0x11 + pin*2, dest);
2968     + reg = io_apic_read(apic, 0x10 + pin*2);
2969     + reg &= ~0x000000ff;
2970     + reg |= vector;
2971     + io_apic_modify(apic, reg);
2972     + if (!entry->next)
2973     + break;
2974     + entry = irq_2_pin + entry->next;
2975     + }
2976     +}
2977     +
2978     static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
2979     {
2980     unsigned long flags;
2981     unsigned int dest;
2982     cpumask_t tmp;
2983     + int vector;
2984    
2985     cpus_and(tmp, mask, cpu_online_map);
2986     if (cpus_empty(tmp))
2987     @@ -146,7 +264,11 @@
2988    
2989     cpus_and(mask, tmp, CPU_MASK_ALL);
2990    
2991     - dest = cpu_mask_to_apicid(mask);
2992     + vector = assign_irq_vector(irq, mask, &tmp);
2993     + if (vector < 0)
2994     + return;
2995     +
2996     + dest = cpu_mask_to_apicid(tmp);
2997    
2998     /*
2999     * Only the high 8 bits are valid.
3000     @@ -154,13 +276,12 @@
3001     dest = SET_APIC_LOGICAL_ID(dest);
3002    
3003     spin_lock_irqsave(&ioapic_lock, flags);
3004     - __DO_ACTION(1, = dest, )
3005     - set_irq_info(irq, mask);
3006     + __target_IO_APIC_irq(irq, dest, vector);
3007     + set_native_irq_info(irq, mask);
3008     spin_unlock_irqrestore(&ioapic_lock, flags);
3009     }
3010     #endif
3011     -
3012     -#endif /* !CONFIG_XEN */
3013     +#endif
3014    
3015     /*
3016     * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
3017     @@ -240,24 +361,15 @@
3018     static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
3019     {
3020     struct IO_APIC_route_entry entry;
3021     - unsigned long flags;
3022    
3023     /* Check delivery_mode to be sure we're not clearing an SMI pin */
3024     - spin_lock_irqsave(&ioapic_lock, flags);
3025     - *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
3026     - *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
3027     - spin_unlock_irqrestore(&ioapic_lock, flags);
3028     + entry = ioapic_read_entry(apic, pin);
3029     if (entry.delivery_mode == dest_SMI)
3030     return;
3031     /*
3032     * Disable it in the IO-APIC irq-routing table:
3033     */
3034     - memset(&entry, 0, sizeof(entry));
3035     - entry.mask = 1;
3036     - spin_lock_irqsave(&ioapic_lock, flags);
3037     - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
3038     - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
3039     - spin_unlock_irqrestore(&ioapic_lock, flags);
3040     + ioapic_mask_entry(apic, pin);
3041     }
3042    
3043     static void clear_IO_APIC (void)
3044     @@ -271,16 +383,6 @@
3045    
3046     #endif /* !CONFIG_XEN */
3047    
3048     -static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
3049     -
3050     -/*
3051     - * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
3052     - * specific CPU-side IRQs.
3053     - */
3054     -
3055     -#define MAX_PIRQS 8
3056     -static int pirq_entries [MAX_PIRQS];
3057     -static int pirqs_enabled;
3058     int skip_ioapic_setup;
3059     int ioapic_force;
3060    
3061     @@ -289,18 +391,17 @@
3062     static int __init disable_ioapic_setup(char *str)
3063     {
3064     skip_ioapic_setup = 1;
3065     - return 1;
3066     + return 0;
3067     }
3068     +early_param("noapic", disable_ioapic_setup);
3069    
3070     -static int __init enable_ioapic_setup(char *str)
3071     +/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
3072     +static int __init disable_timer_pin_setup(char *arg)
3073     {
3074     - ioapic_force = 1;
3075     - skip_ioapic_setup = 0;
3076     + disable_timer_pin_1 = 1;
3077     return 1;
3078     }
3079     -
3080     -__setup("noapic", disable_ioapic_setup);
3081     -__setup("apic", enable_ioapic_setup);
3082     +__setup("disable_timer_pin_1", disable_timer_pin_setup);
3083    
3084     #ifndef CONFIG_XEN
3085     static int __init setup_disable_8254_timer(char *s)
3086     @@ -318,137 +419,6 @@
3087     __setup("enable_8254_timer", setup_enable_8254_timer);
3088     #endif /* !CONFIG_XEN */
3089    
3090     -#include <asm/pci-direct.h>
3091     -#include <linux/pci_ids.h>
3092     -#include <linux/pci.h>
3093     -
3094     -
3095     -#ifdef CONFIG_ACPI
3096     -
3097     -static int nvidia_hpet_detected __initdata;
3098     -
3099     -static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
3100     -{
3101     - nvidia_hpet_detected = 1;
3102     - return 0;
3103     -}
3104     -#endif
3105     -
3106     -/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC
3107     - off. Check for an Nvidia or VIA PCI bridge and turn it off.
3108     - Use pci direct infrastructure because this runs before the PCI subsystem.
3109     -
3110     - Can be overwritten with "apic"
3111     -
3112     - And another hack to disable the IOMMU on VIA chipsets.
3113     -
3114     - ... and others. Really should move this somewhere else.
3115     -
3116     - Kludge-O-Rama. */
3117     -void __init check_ioapic(void)
3118     -{
3119     - int num,slot,func;
3120     - /* Poor man's PCI discovery */
3121     - for (num = 0; num < 32; num++) {
3122     - for (slot = 0; slot < 32; slot++) {
3123     - for (func = 0; func < 8; func++) {
3124     - u32 class;
3125     - u32 vendor;
3126     - u8 type;
3127     - class = read_pci_config(num,slot,func,
3128     - PCI_CLASS_REVISION);
3129     - if (class == 0xffffffff)
3130     - break;
3131     -
3132     - if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
3133     - continue;
3134     -
3135     - vendor = read_pci_config(num, slot, func,
3136     - PCI_VENDOR_ID);
3137     - vendor &= 0xffff;
3138     - switch (vendor) {
3139     - case PCI_VENDOR_ID_VIA:
3140     -#ifdef CONFIG_IOMMU
3141     - if ((end_pfn > MAX_DMA32_PFN ||
3142     - force_iommu) &&
3143     - !iommu_aperture_allowed) {
3144     - printk(KERN_INFO
3145     - "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n");
3146     - iommu_aperture_disabled = 1;
3147     - }
3148     -#endif
3149     - return;
3150     - case PCI_VENDOR_ID_NVIDIA:
3151     -#ifdef CONFIG_ACPI
3152     - /*
3153     - * All timer overrides on Nvidia are
3154     - * wrong unless HPET is enabled.
3155     - */
3156     - nvidia_hpet_detected = 0;
3157     - acpi_table_parse(ACPI_HPET,
3158     - nvidia_hpet_check);
3159     - if (nvidia_hpet_detected == 0) {
3160     - acpi_skip_timer_override = 1;
3161     - printk(KERN_INFO "Nvidia board "
3162     - "detected. Ignoring ACPI "
3163     - "timer override.\n");
3164     - }
3165     -#endif
3166     - /* RED-PEN skip them on mptables too? */
3167     - return;
3168     - case PCI_VENDOR_ID_ATI:
3169     -
3170     - /* This should be actually default, but
3171     - for 2.6.16 let's do it for ATI only where
3172     - it's really needed. */
3173     -#ifndef CONFIG_XEN
3174     - if (timer_over_8254 == 1) {
3175     - timer_over_8254 = 0;
3176     - printk(KERN_INFO
3177     - "ATI board detected. Disabling timer routing over 8254.\n");
3178     - }
3179     -#endif
3180     - return;
3181     - }
3182     -
3183     -
3184     - /* No multi-function device? */
3185     - type = read_pci_config_byte(num,slot,func,
3186     - PCI_HEADER_TYPE);
3187     - if (!(type & 0x80))
3188     - break;
3189     - }
3190     - }
3191     - }
3192     -}
3193     -
3194     -static int __init ioapic_pirq_setup(char *str)
3195     -{
3196     - int i, max;
3197     - int ints[MAX_PIRQS+1];
3198     -
3199     - get_options(str, ARRAY_SIZE(ints), ints);
3200     -
3201     - for (i = 0; i < MAX_PIRQS; i++)
3202     - pirq_entries[i] = -1;
3203     -
3204     - pirqs_enabled = 1;
3205     - apic_printk(APIC_VERBOSE, "PIRQ redirection, working around broken MP-BIOS.\n");
3206     - max = MAX_PIRQS;
3207     - if (ints[0] < MAX_PIRQS)
3208     - max = ints[0];
3209     -
3210     - for (i = 0; i < max; i++) {
3211     - apic_printk(APIC_VERBOSE, "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
3212     - /*
3213     - * PIRQs are mapped upside down, usually.
3214     - */
3215     - pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
3216     - }
3217     - return 1;
3218     -}
3219     -
3220     -__setup("pirq=", ioapic_pirq_setup);
3221    
3222     /*
3223     * Find the IRQ entry number of a certain pin.
3224     @@ -478,9 +448,7 @@
3225     for (i = 0; i < mp_irq_entries; i++) {
3226     int lbus = mp_irqs[i].mpc_srcbus;
3227    
3228     - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
3229     - mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
3230     - mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
3231     + if (test_bit(lbus, mp_bus_not_pci) &&
3232     (mp_irqs[i].mpc_irqtype == type) &&
3233     (mp_irqs[i].mpc_srcbusirq == irq))
3234    
3235     @@ -496,9 +464,7 @@
3236     for (i = 0; i < mp_irq_entries; i++) {
3237     int lbus = mp_irqs[i].mpc_srcbus;
3238    
3239     - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
3240     - mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
3241     - mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
3242     + if (test_bit(lbus, mp_bus_not_pci) &&
3243     (mp_irqs[i].mpc_irqtype == type) &&
3244     (mp_irqs[i].mpc_srcbusirq == irq))
3245     break;
3246     @@ -539,7 +505,7 @@
3247     mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
3248     break;
3249    
3250     - if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
3251     + if (!test_bit(lbus, mp_bus_not_pci) &&
3252     !mp_irqs[i].mpc_irqtype &&
3253     (bus == lbus) &&
3254     (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
3255     @@ -562,27 +528,6 @@
3256     return best_guess;
3257     }
3258    
3259     -/*
3260     - * EISA Edge/Level control register, ELCR
3261     - */
3262     -static int EISA_ELCR(unsigned int irq)
3263     -{
3264     - if (irq < 16) {
3265     - unsigned int port = 0x4d0 + (irq >> 3);
3266     - return (inb(port) >> (irq & 7)) & 1;
3267     - }
3268     - apic_printk(APIC_VERBOSE, "Broken MPtable reports ISA irq %d\n", irq);
3269     - return 0;
3270     -}
3271     -
3272     -/* EISA interrupts are always polarity zero and can be edge or level
3273     - * trigger depending on the ELCR value. If an interrupt is listed as
3274     - * EISA conforming in the MP table, that means its trigger type must
3275     - * be read in from the ELCR */
3276     -
3277     -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
3278     -#define default_EISA_polarity(idx) (0)
3279     -
3280     /* ISA interrupts are always polarity zero edge triggered,
3281     * when listed as conforming in the MP table. */
3282    
3283     @@ -595,12 +540,6 @@
3284     #define default_PCI_trigger(idx) (1)
3285     #define default_PCI_polarity(idx) (1)
3286    
3287     -/* MCA interrupts are always polarity zero level triggered,
3288     - * when listed as conforming in the MP table. */
3289     -
3290     -#define default_MCA_trigger(idx) (1)
3291     -#define default_MCA_polarity(idx) (0)
3292     -
3293     static int __init MPBIOS_polarity(int idx)
3294     {
3295     int bus = mp_irqs[idx].mpc_srcbus;
3296     @@ -612,38 +551,11 @@
3297     switch (mp_irqs[idx].mpc_irqflag & 3)
3298     {
3299     case 0: /* conforms, ie. bus-type dependent polarity */
3300     - {
3301     - switch (mp_bus_id_to_type[bus])
3302     - {
3303     - case MP_BUS_ISA: /* ISA pin */
3304     - {
3305     - polarity = default_ISA_polarity(idx);
3306     - break;
3307     - }
3308     - case MP_BUS_EISA: /* EISA pin */
3309     - {
3310     - polarity = default_EISA_polarity(idx);
3311     - break;
3312     - }
3313     - case MP_BUS_PCI: /* PCI pin */
3314     - {
3315     - polarity = default_PCI_polarity(idx);
3316     - break;
3317     - }
3318     - case MP_BUS_MCA: /* MCA pin */
3319     - {
3320     - polarity = default_MCA_polarity(idx);
3321     - break;
3322     - }
3323     - default:
3324     - {
3325     - printk(KERN_WARNING "broken BIOS!!\n");
3326     - polarity = 1;
3327     - break;
3328     - }
3329     - }
3330     + if (test_bit(bus, mp_bus_not_pci))
3331     + polarity = default_ISA_polarity(idx);
3332     + else
3333     + polarity = default_PCI_polarity(idx);
3334     break;
3335     - }
3336     case 1: /* high active */
3337     {
3338     polarity = 0;
3339     @@ -681,38 +593,11 @@
3340     switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
3341     {
3342     case 0: /* conforms, ie. bus-type dependent */
3343     - {
3344     - switch (mp_bus_id_to_type[bus])
3345     - {
3346     - case MP_BUS_ISA: /* ISA pin */
3347     - {
3348     - trigger = default_ISA_trigger(idx);
3349     - break;
3350     - }
3351     - case MP_BUS_EISA: /* EISA pin */
3352     - {
3353     - trigger = default_EISA_trigger(idx);
3354     - break;
3355     - }
3356     - case MP_BUS_PCI: /* PCI pin */
3357     - {
3358     - trigger = default_PCI_trigger(idx);
3359     - break;
3360     - }
3361     - case MP_BUS_MCA: /* MCA pin */
3362     - {
3363     - trigger = default_MCA_trigger(idx);
3364     - break;
3365     - }
3366     - default:
3367     - {
3368     - printk(KERN_WARNING "broken BIOS!!\n");
3369     - trigger = 1;
3370     - break;
3371     - }
3372     - }
3373     + if (test_bit(bus, mp_bus_not_pci))
3374     + trigger = default_ISA_trigger(idx);
3375     + else
3376     + trigger = default_PCI_trigger(idx);
3377     break;
3378     - }
3379     case 1: /* edge */
3380     {
3381     trigger = 0;
3382     @@ -749,64 +634,6 @@
3383     return MPBIOS_trigger(idx);
3384     }
3385    
3386     -static int next_irq = 16;
3387     -
3388     -/*
3389     - * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ
3390     - * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
3391     - * from ACPI, which can reach 800 in large boxen.
3392     - *
3393     - * Compact the sparse GSI space into a sequential IRQ series and reuse
3394     - * vectors if possible.
3395     - */
3396     -int gsi_irq_sharing(int gsi)
3397     -{
3398     - int i, tries, vector;
3399     -
3400     - BUG_ON(gsi >= NR_IRQ_VECTORS);
3401     -
3402     - if (platform_legacy_irq(gsi))
3403     - return gsi;
3404     -
3405     - if (gsi_2_irq[gsi] != 0xFF)
3406     - return (int)gsi_2_irq[gsi];
3407     -
3408     - tries = NR_IRQS;
3409     - try_again:
3410     - vector = assign_irq_vector(gsi);
3411     -
3412     - /*
3413     - * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
3414     - * use of vector and if found, return that IRQ. However, we never want
3415     - * to share legacy IRQs, which usually have a different trigger mode
3416     - * than PCI.
3417     - */
3418     - for (i = 0; i < NR_IRQS; i++)
3419     - if (IO_APIC_VECTOR(i) == vector)
3420     - break;
3421     - if (platform_legacy_irq(i)) {
3422     - if (--tries >= 0) {
3423     - IO_APIC_VECTOR(i) = 0;
3424     - goto try_again;
3425     - }
3426     - panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);
3427     - }
3428     - if (i < NR_IRQS) {
3429     - gsi_2_irq[gsi] = i;
3430     - printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
3431     - gsi, vector, i);
3432     - return i;
3433     - }
3434     -
3435     - i = next_irq++;
3436     - BUG_ON(i >= NR_IRQS);
3437     - gsi_2_irq[gsi] = i;
3438     - IO_APIC_VECTOR(i) = vector;
3439     - printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
3440     - gsi, vector, i);
3441     - return i;
3442     -}
3443     -
3444     static int pin_2_irq(int idx, int apic, int pin)
3445     {
3446     int irq, i;
3447     @@ -818,49 +645,16 @@
3448     if (mp_irqs[idx].mpc_dstirq != pin)
3449     printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
3450    
3451     - switch (mp_bus_id_to_type[bus])
3452     - {
3453     - case MP_BUS_ISA: /* ISA pin */
3454     - case MP_BUS_EISA:
3455     - case MP_BUS_MCA:
3456     - {
3457     - irq = mp_irqs[idx].mpc_srcbusirq;
3458     - break;
3459     - }
3460     - case MP_BUS_PCI: /* PCI pin */
3461     - {
3462     - /*
3463     - * PCI IRQs are mapped in order
3464     - */
3465     - i = irq = 0;
3466     - while (i < apic)
3467     - irq += nr_ioapic_registers[i++];
3468     - irq += pin;
3469     - irq = gsi_irq_sharing(irq);
3470     - break;
3471     - }
3472     - default:
3473     - {
3474     - printk(KERN_ERR "unknown bus type %d.\n",bus);
3475     - irq = 0;
3476     - break;
3477     - }
3478     - }
3479     - BUG_ON(irq >= NR_IRQS);
3480     -
3481     - /*
3482     - * PCI IRQ command line redirection. Yes, limits are hardcoded.
3483     - */
3484     - if ((pin >= 16) && (pin <= 23)) {
3485     - if (pirq_entries[pin-16] != -1) {
3486     - if (!pirq_entries[pin-16]) {
3487     - apic_printk(APIC_VERBOSE, "disabling PIRQ%d\n", pin-16);
3488     - } else {
3489     - irq = pirq_entries[pin-16];
3490     - apic_printk(APIC_VERBOSE, "using PIRQ%d -> IRQ %d\n",
3491     - pin-16, irq);
3492     - }
3493     - }
3494     + if (test_bit(bus, mp_bus_not_pci)) {
3495     + irq = mp_irqs[idx].mpc_srcbusirq;
3496     + } else {
3497     + /*
3498     + * PCI IRQs are mapped in order
3499     + */
3500     + i = irq = 0;
3501     + while (i < apic)
3502     + irq += nr_ioapic_registers[i++];
3503     + irq += pin;
3504     }
3505     BUG_ON(irq >= NR_IRQS);
3506     return irq;
3507     @@ -884,43 +678,68 @@
3508     }
3509    
3510     /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
3511     -u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
3512     +static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
3513    
3514     -int assign_irq_vector(int irq)
3515     +static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
3516     {
3517     - unsigned long flags;
3518     int vector;
3519     struct physdev_irq irq_op;
3520    
3521     - BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
3522     + BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
3523    
3524     - spin_lock_irqsave(&vector_lock, flags);
3525     + cpus_and(*result, mask, cpu_online_map);
3526    
3527     - if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
3528     - spin_unlock_irqrestore(&vector_lock, flags);
3529     - return IO_APIC_VECTOR(irq);
3530     - }
3531     + if (irq_vector[irq] > 0)
3532     + return irq_vector[irq];
3533    
3534     irq_op.irq = irq;
3535     - if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
3536     - spin_unlock_irqrestore(&vector_lock, flags);
3537     + if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
3538     return -ENOSPC;
3539     - }
3540    
3541     vector = irq_op.vector;
3542     - vector_irq[vector] = irq;
3543     - if (irq != AUTO_ASSIGN)
3544     - IO_APIC_VECTOR(irq) = vector;
3545     + irq_vector[irq] = vector;
3546    
3547     - spin_unlock_irqrestore(&vector_lock, flags);
3548     + return vector;
3549     +}
3550    
3551     +static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
3552     +{
3553     + int vector;
3554     + unsigned long flags;
3555     +
3556     + spin_lock_irqsave(&vector_lock, flags);
3557     + vector = __assign_irq_vector(irq, mask, result);
3558     + spin_unlock_irqrestore(&vector_lock, flags);
3559     return vector;
3560     }
3561    
3562     -extern void (*interrupt[NR_IRQS])(void);
3563     #ifndef CONFIG_XEN
3564     -static struct hw_interrupt_type ioapic_level_type;
3565     -static struct hw_interrupt_type ioapic_edge_type;
3566     +void __setup_vector_irq(int cpu)
3567     +{
3568     + /* Initialize vector_irq on a new cpu */
3569     + /* This function must be called with vector_lock held */
3570     + int irq, vector;
3571     +
3572     + /* Mark the inuse vectors */
3573     + for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) {
3574     + if (!cpu_isset(cpu, irq_domain[irq]))
3575     + continue;
3576     + vector = irq_vector[irq];
3577     + per_cpu(vector_irq, cpu)[vector] = irq;
3578     + }
3579     + /* Mark the free vectors */
3580     + for (vector = 0; vector < NR_VECTORS; ++vector) {
3581     + irq = per_cpu(vector_irq, cpu)[vector];
3582     + if (irq < 0)
3583     + continue;
3584     + if (!cpu_isset(cpu, irq_domain[irq]))
3585     + per_cpu(vector_irq, cpu)[vector] = -1;
3586     + }
3587     +}
3588     +
3589     +extern void (*interrupt[NR_IRQS])(void);
3590     +
3591     +static struct irq_chip ioapic_chip;
3592    
3593     #define IOAPIC_AUTO -1
3594     #define IOAPIC_EDGE 0
3595     @@ -928,16 +747,15 @@
3596    
3597     static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
3598     {
3599     - unsigned idx;
3600     -
3601     - idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
3602     -
3603     if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
3604     trigger == IOAPIC_LEVEL)
3605     - irq_desc[idx].chip = &ioapic_level_type;
3606     - else
3607     - irq_desc[idx].chip = &ioapic_edge_type;
3608     - set_intr_gate(vector, interrupt[idx]);
3609     + set_irq_chip_and_handler_name(irq, &ioapic_chip,
3610     + handle_fasteoi_irq, "fasteoi");
3611     + else {
3612     + irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
3613     + set_irq_chip_and_handler_name(irq, &ioapic_chip,
3614     + handle_edge_irq, "edge");
3615     + }
3616     }
3617     #else
3618     #define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
3619     @@ -990,16 +808,21 @@
3620     continue;
3621    
3622     if (IO_APIC_IRQ(irq)) {
3623     - vector = assign_irq_vector(irq);
3624     + cpumask_t mask;
3625     + vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
3626     + if (vector < 0)
3627     + continue;
3628     +
3629     + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
3630     entry.vector = vector;
3631    
3632     ioapic_register_intr(irq, vector, IOAPIC_AUTO);
3633     if (!apic && (irq < 16))
3634     disable_8259A_irq(irq);
3635     }
3636     + ioapic_write_entry(apic, pin, entry);
3637     +
3638     spin_lock_irqsave(&ioapic_lock, flags);
3639     - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
3640     - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
3641     set_native_irq_info(irq, TARGET_CPUS);
3642     spin_unlock_irqrestore(&ioapic_lock, flags);
3643     }
3644     @@ -1042,7 +865,7 @@
3645     * The timer IRQ doesn't have to know that behind the
3646     * scene we have a 8259A-master in AEOI mode ...
3647     */
3648     - irq_desc[0].chip = &ioapic_edge_type;
3649     + set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
3650    
3651     /*
3652     * Add it to the IO-APIC irq-routing table:
3653     @@ -1138,10 +961,7 @@
3654     for (i = 0; i <= reg_01.bits.entries; i++) {
3655     struct IO_APIC_route_entry entry;
3656    
3657     - spin_lock_irqsave(&ioapic_lock, flags);
3658     - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
3659     - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
3660     - spin_unlock_irqrestore(&ioapic_lock, flags);
3661     + entry = ioapic_read_entry(apic, i);
3662    
3663     printk(KERN_DEBUG " %02x %03X %02X ",
3664     i,
3665     @@ -1161,17 +981,12 @@
3666     );
3667     }
3668     }
3669     - if (use_pci_vector())
3670     - printk(KERN_INFO "Using vector-based indexing\n");
3671     printk(KERN_DEBUG "IRQ to pin mappings:\n");
3672     for (i = 0; i < NR_IRQS; i++) {
3673     struct irq_pin_list *entry = irq_2_pin + i;
3674     if (entry->pin < 0)
3675     continue;
3676     - if (use_pci_vector() && !platform_legacy_irq(i))
3677     - printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
3678     - else
3679     - printk(KERN_DEBUG "IRQ%d ", i);
3680     + printk(KERN_DEBUG "IRQ%d ", i);
3681     for (;;) {
3682     printk("-> %d:%d", entry->apic, entry->pin);
3683     if (!entry->next)
3684     @@ -1335,9 +1150,6 @@
3685     irq_2_pin[i].pin = -1;
3686     irq_2_pin[i].next = 0;
3687     }
3688     - if (!pirqs_enabled)
3689     - for (i = 0; i < MAX_PIRQS; i++)
3690     - pirq_entries[i] = -1;
3691    
3692     /*
3693     * The number of IO-APIC IRQ registers (== #pins):
3694     @@ -1354,11 +1166,7 @@
3695     /* See if any of the pins is in ExtINT mode */
3696     for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
3697     struct IO_APIC_route_entry entry;
3698     - spin_lock_irqsave(&ioapic_lock, flags);
3699     - *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
3700     - *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
3701     - spin_unlock_irqrestore(&ioapic_lock, flags);
3702     -
3703     + entry = ioapic_read_entry(apic, pin);
3704    
3705     /* If the interrupt line is enabled and in ExtInt mode
3706     * I have found the pin where the i8259 is connected.
3707     @@ -1412,7 +1220,6 @@
3708     */
3709     if (ioapic_i8259.pin != -1) {
3710     struct IO_APIC_route_entry entry;
3711     - unsigned long flags;
3712    
3713     memset(&entry, 0, sizeof(entry));
3714     entry.mask = 0; /* Enabled */
3715     @@ -1429,12 +1236,7 @@
3716     /*
3717     * Add it to the IO-APIC irq-routing table:
3718     */
3719     - spin_lock_irqsave(&ioapic_lock, flags);
3720     - io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
3721     - *(((int *)&entry)+1));
3722     - io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
3723     - *(((int *)&entry)+0));
3724     - spin_unlock_irqrestore(&ioapic_lock, flags);
3725     + ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
3726     }
3727    
3728     disconnect_bsp_APIC(ioapic_i8259.pin != -1);
3729     @@ -1442,76 +1244,6 @@
3730     }
3731    
3732     /*
3733     - * function to set the IO-APIC physical IDs based on the
3734     - * values stored in the MPC table.
3735     - *
3736     - * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
3737     - */
3738     -
3739     -#ifndef CONFIG_XEN
3740     -static void __init setup_ioapic_ids_from_mpc (void)
3741     -{
3742     - union IO_APIC_reg_00 reg_00;
3743     - int apic;
3744     - int i;
3745     - unsigned char old_id;
3746     - unsigned long flags;
3747     -
3748     - /*
3749     - * Set the IOAPIC ID to the value stored in the MPC table.
3750     - */
3751     - for (apic = 0; apic < nr_ioapics; apic++) {
3752     -
3753     - /* Read the register 0 value */
3754     - spin_lock_irqsave(&ioapic_lock, flags);
3755     - reg_00.raw = io_apic_read(apic, 0);
3756     - spin_unlock_irqrestore(&ioapic_lock, flags);
3757     -
3758     - old_id = mp_ioapics[apic].mpc_apicid;
3759     -
3760     -
3761     - printk(KERN_INFO "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid);
3762     -
3763     -
3764     - /*
3765     - * We need to adjust the IRQ routing table
3766     - * if the ID changed.
3767     - */
3768     - if (old_id != mp_ioapics[apic].mpc_apicid)
3769     - for (i = 0; i < mp_irq_entries; i++)
3770     - if (mp_irqs[i].mpc_dstapic == old_id)
3771     - mp_irqs[i].mpc_dstapic
3772     - = mp_ioapics[apic].mpc_apicid;
3773     -
3774     - /*
3775     - * Read the right value from the MPC table and
3776     - * write it into the ID register.
3777     - */
3778     - apic_printk(APIC_VERBOSE,KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
3779     - mp_ioapics[apic].mpc_apicid);
3780     -
3781     - reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
3782     - spin_lock_irqsave(&ioapic_lock, flags);
3783     - io_apic_write(apic, 0, reg_00.raw);
3784     - spin_unlock_irqrestore(&ioapic_lock, flags);
3785     -
3786     - /*
3787     - * Sanity check
3788     - */
3789     - spin_lock_irqsave(&ioapic_lock, flags);
3790     - reg_00.raw = io_apic_read(apic, 0);
3791     - spin_unlock_irqrestore(&ioapic_lock, flags);
3792     - if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
3793     - printk("could not set ID!\n");
3794     - else
3795     - apic_printk(APIC_VERBOSE," ok.\n");
3796     - }
3797     -}
3798     -#else
3799     -static void __init setup_ioapic_ids_from_mpc(void) { }
3800     -#endif
3801     -
3802     -/*
3803     * There is a nasty bug in some older SMP boards, their mptable lies
3804     * about the timer IRQ. We do the following to work around the situation:
3805     *
3806     @@ -1565,7 +1297,7 @@
3807     * an edge even if it isn't on the 8259A...
3808     */
3809    
3810     -static unsigned int startup_edge_ioapic_irq(unsigned int irq)
3811     +static unsigned int startup_ioapic_irq(unsigned int irq)
3812     {
3813     int was_pending = 0;
3814     unsigned long flags;
3815     @@ -1582,107 +1314,19 @@
3816     return was_pending;
3817     }
3818    
3819     -/*
3820     - * Once we have recorded IRQ_PENDING already, we can mask the
3821     - * interrupt for real. This prevents IRQ storms from unhandled
3822     - * devices.
3823     - */
3824     -static void ack_edge_ioapic_irq(unsigned int irq)
3825     -{
3826     - move_irq(irq);
3827     - if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
3828     - == (IRQ_PENDING | IRQ_DISABLED))
3829     - mask_IO_APIC_irq(irq);
3830     - ack_APIC_irq();
3831     -}
3832     -
3833     -/*
3834     - * Level triggered interrupts can just be masked,
3835     - * and shutting down and starting up the interrupt
3836     - * is the same as enabling and disabling them -- except
3837     - * with a startup need to return a "was pending" value.
3838     - *
3839     - * Level triggered interrupts are special because we
3840     - * do not touch any IO-APIC register while handling
3841     - * them. We ack the APIC in the end-IRQ handler, not
3842     - * in the start-IRQ-handler. Protection against reentrance
3843     - * from the same interrupt is still provided, both by the
3844     - * generic IRQ layer and by the fact that an unacked local
3845     - * APIC does not accept IRQs.
3846     - */
3847     -static unsigned int startup_level_ioapic_irq (unsigned int irq)
3848     -{
3849     - unmask_IO_APIC_irq(irq);
3850     -
3851     - return 0; /* don't check for pending */
3852     -}
3853     -
3854     -static void end_level_ioapic_irq (unsigned int irq)
3855     -{
3856     - move_irq(irq);
3857     - ack_APIC_irq();
3858     -}
3859     -
3860     -#ifdef CONFIG_PCI_MSI
3861     -static unsigned int startup_edge_ioapic_vector(unsigned int vector)
3862     -{
3863     - int irq = vector_to_irq(vector);
3864     -
3865     - return startup_edge_ioapic_irq(irq);
3866     -}
3867     -
3868     -static void ack_edge_ioapic_vector(unsigned int vector)
3869     -{
3870     - int irq = vector_to_irq(vector);
3871     -
3872     - move_native_irq(vector);
3873     - ack_edge_ioapic_irq(irq);
3874     -}
3875     -
3876     -static unsigned int startup_level_ioapic_vector (unsigned int vector)
3877     -{
3878     - int irq = vector_to_irq(vector);
3879     -
3880     - return startup_level_ioapic_irq (irq);
3881     -}
3882     -
3883     -static void end_level_ioapic_vector (unsigned int vector)
3884     -{
3885     - int irq = vector_to_irq(vector);
3886     -
3887     - move_native_irq(vector);
3888     - end_level_ioapic_irq(irq);
3889     -}
3890     -
3891     -static void mask_IO_APIC_vector (unsigned int vector)
3892     -{
3893     - int irq = vector_to_irq(vector);
3894     -
3895     - mask_IO_APIC_irq(irq);
3896     -}
3897     -
3898     -static void unmask_IO_APIC_vector (unsigned int vector)
3899     -{
3900     - int irq = vector_to_irq(vector);
3901     -
3902     - unmask_IO_APIC_irq(irq);
3903     -}
3904     -
3905     -#ifdef CONFIG_SMP
3906     -static void set_ioapic_affinity_vector (unsigned int vector,
3907     - cpumask_t cpu_mask)
3908     +static int ioapic_retrigger_irq(unsigned int irq)
3909     {
3910     - int irq = vector_to_irq(vector);
3911     + cpumask_t mask;
3912     + unsigned vector;
3913     + unsigned long flags;
3914    
3915     - set_native_irq_info(vector, cpu_mask);
3916     - set_ioapic_affinity_irq(irq, cpu_mask);
3917     -}
3918     -#endif // CONFIG_SMP
3919     -#endif // CONFIG_PCI_MSI
3920     + spin_lock_irqsave(&vector_lock, flags);
3921     + vector = irq_vector[irq];
3922     + cpus_clear(mask);
3923     + cpu_set(first_cpu(irq_domain[irq]), mask);
3924    
3925     -static int ioapic_retrigger(unsigned int irq)
3926     -{
3927     - send_IPI_self(IO_APIC_VECTOR(irq));
3928     + send_IPI_mask(mask, vector);
3929     + spin_unlock_irqrestore(&vector_lock, flags);
3930    
3931     return 1;
3932     }
3933     @@ -1696,32 +1340,47 @@
3934     * races.
3935     */
3936    
3937     -static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
3938     - .typename = "IO-APIC-edge",
3939     - .startup = startup_edge_ioapic,
3940     - .shutdown = shutdown_edge_ioapic,
3941     - .enable = enable_edge_ioapic,
3942     - .disable = disable_edge_ioapic,
3943     - .ack = ack_edge_ioapic,
3944     - .end = end_edge_ioapic,
3945     -#ifdef CONFIG_SMP
3946     - .set_affinity = set_ioapic_affinity,
3947     +static void ack_apic_edge(unsigned int irq)
3948     +{
3949     + move_native_irq(irq);
3950     + ack_APIC_irq();
3951     +}
3952     +
3953     +static void ack_apic_level(unsigned int irq)
3954     +{
3955     + int do_unmask_irq = 0;
3956     +
3957     +#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
3958     + /* If we are moving the irq we need to mask it */
3959     + if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
3960     + do_unmask_irq = 1;
3961     + mask_IO_APIC_irq(irq);
3962     + }
3963     #endif
3964     - .retrigger = ioapic_retrigger,
3965     -};
3966    
3967     -static struct hw_interrupt_type ioapic_level_type __read_mostly = {
3968     - .typename = "IO-APIC-level",
3969     - .startup = startup_level_ioapic,
3970     - .shutdown = shutdown_level_ioapic,
3971     - .enable = enable_level_ioapic,
3972     - .disable = disable_level_ioapic,
3973     - .ack = mask_and_ack_level_ioapic,
3974     - .end = end_level_ioapic,
3975     + /*
3976     + * We must acknowledge the irq before we move it or the acknowledge will
3977     + * not propogate properly.
3978     + */
3979     + ack_APIC_irq();
3980     +
3981     + /* Now we can move and renable the irq */
3982     + move_masked_irq(irq);
3983     + if (unlikely(do_unmask_irq))
3984     + unmask_IO_APIC_irq(irq);
3985     +}
3986     +
3987     +static struct irq_chip ioapic_chip __read_mostly = {
3988     + .name = "IO-APIC",
3989     + .startup = startup_ioapic_irq,
3990     + .mask = mask_IO_APIC_irq,
3991     + .unmask = unmask_IO_APIC_irq,
3992     + .ack = ack_apic_edge,
3993     + .eoi = ack_apic_level,
3994     #ifdef CONFIG_SMP
3995     - .set_affinity = set_ioapic_affinity,
3996     + .set_affinity = set_ioapic_affinity_irq,
3997     #endif
3998     - .retrigger = ioapic_retrigger,
3999     + .retrigger = ioapic_retrigger_irq,
4000     };
4001     #endif /* !CONFIG_XEN */
4002    
4003     @@ -1742,12 +1401,7 @@
4004     */
4005     for (irq = 0; irq < NR_IRQS ; irq++) {
4006     int tmp = irq;
4007     - if (use_pci_vector()) {
4008     - if (!platform_legacy_irq(tmp))
4009     - if ((tmp = vector_to_irq(tmp)) == -1)
4010     - continue;
4011     - }
4012     - if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
4013     + if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
4014     /*
4015     * Hmm.. We don't have an entry for this,
4016     * so default to an old-fashioned 8259
4017     @@ -1758,7 +1412,7 @@
4018     #ifndef CONFIG_XEN
4019     else
4020     /* Strange. Oh, well.. */
4021     - irq_desc[irq].chip = &no_irq_type;
4022     + irq_desc[irq].chip = &no_irq_chip;
4023     #endif
4024     }
4025     }
4026     @@ -1879,8 +1533,6 @@
4027     spin_unlock_irqrestore(&ioapic_lock, flags);
4028     }
4029    
4030     -int timer_uses_ioapic_pin_0;
4031     -
4032     /*
4033     * This code may look a bit paranoid, but it's supposed to cooperate with
4034     * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
4035     @@ -1893,13 +1545,13 @@
4036     {
4037     int apic1, pin1, apic2, pin2;
4038     int vector;
4039     + cpumask_t mask;
4040    
4041     /*
4042     * get/set the timer IRQ vector:
4043     */
4044     disable_8259A_irq(0);
4045     - vector = assign_irq_vector(0);
4046     - set_intr_gate(vector, interrupt[0]);
4047     + vector = assign_irq_vector(0, TARGET_CPUS, &mask);
4048    
4049     /*
4050     * Subtle, code in do_timer_interrupt() expects an AEOI
4051     @@ -1918,9 +1570,6 @@
4052     pin2 = ioapic_i8259.pin;
4053     apic2 = ioapic_i8259.apic;
4054    
4055     - if (pin1 == 0)
4056     - timer_uses_ioapic_pin_0 = 1;
4057     -
4058     apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
4059     vector, apic1, pin1, apic2, pin2);
4060    
4061     @@ -2035,11 +1684,6 @@
4062    
4063     apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
4064    
4065     - /*
4066     - * Set up the IO-APIC IRQ routing table.
4067     - */
4068     - if (!acpi_ioapic)
4069     - setup_ioapic_ids_from_mpc();
4070     #ifndef CONFIG_XEN
4071     sync_Arb_IDs();
4072     #endif /* !CONFIG_XEN */
4073     @@ -2060,17 +1704,12 @@
4074     {
4075     struct IO_APIC_route_entry *entry;
4076     struct sysfs_ioapic_data *data;
4077     - unsigned long flags;
4078     int i;
4079    
4080     data = container_of(dev, struct sysfs_ioapic_data, dev);
4081     entry = data->entry;
4082     - spin_lock_irqsave(&ioapic_lock, flags);
4083     - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
4084     - *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
4085     - *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
4086     - }
4087     - spin_unlock_irqrestore(&ioapic_lock, flags);
4088     + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
4089     + *entry = ioapic_read_entry(dev->id, i);
4090    
4091     return 0;
4092     }
4093     @@ -2092,11 +1731,9 @@
4094     reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
4095     io_apic_write(dev->id, 0, reg_00.raw);
4096     }
4097     - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
4098     - io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
4099     - io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
4100     - }
4101     spin_unlock_irqrestore(&ioapic_lock, flags);
4102     + for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
4103     + ioapic_write_entry(dev->id, i, entry[i]);
4104    
4105     return 0;
4106     }
4107     @@ -2142,26 +1779,254 @@
4108    
4109     device_initcall(ioapic_init_sysfs);
4110    
4111     -/* --------------------------------------------------------------------------
4112     - ACPI-based IOAPIC Configuration
4113     - -------------------------------------------------------------------------- */
4114     +#ifndef CONFIG_XEN
4115     +/*
4116     + * Dynamic irq allocate and deallocation
4117     + */
4118     +int create_irq(void)
4119     +{
4120     + /* Allocate an unused irq */
4121     + int irq;
4122     + int new;
4123     + int vector = 0;
4124     + unsigned long flags;
4125     + cpumask_t mask;
4126    
4127     -#ifdef CONFIG_ACPI
4128     + irq = -ENOSPC;
4129     + spin_lock_irqsave(&vector_lock, flags);
4130     + for (new = (NR_IRQS - 1); new >= 0; new--) {
4131     + if (platform_legacy_irq(new))
4132     + continue;
4133     + if (irq_vector[new] != 0)
4134     + continue;
4135     + vector = __assign_irq_vector(new, TARGET_CPUS, &mask);
4136     + if (likely(vector > 0))
4137     + irq = new;
4138     + break;
4139     + }
4140     + spin_unlock_irqrestore(&vector_lock, flags);
4141    
4142     -#define IO_APIC_MAX_ID 0xFE
4143     + if (irq >= 0) {
4144     + dynamic_irq_init(irq);
4145     + }
4146     + return irq;
4147     +}
4148    
4149     -int __init io_apic_get_version (int ioapic)
4150     +void destroy_irq(unsigned int irq)
4151     {
4152     - union IO_APIC_reg_01 reg_01;
4153     unsigned long flags;
4154    
4155     - spin_lock_irqsave(&ioapic_lock, flags);
4156     - reg_01.raw = io_apic_read(ioapic, 1);
4157     - spin_unlock_irqrestore(&ioapic_lock, flags);
4158     + dynamic_irq_cleanup(irq);
4159     +
4160     + spin_lock_irqsave(&vector_lock, flags);
4161     + irq_vector[irq] = 0;
4162     + spin_unlock_irqrestore(&vector_lock, flags);
4163     +}
4164     +#endif
4165     +
4166     +/*
4167     + * MSI mesage composition
4168     + */
4169     +#ifdef CONFIG_PCI_MSI
4170     +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
4171     +{
4172     + int vector;
4173     + unsigned dest;
4174     + cpumask_t tmp;
4175     +
4176     + vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
4177     + if (vector >= 0) {
4178     + dest = cpu_mask_to_apicid(tmp);
4179     +
4180     + msg->address_hi = MSI_ADDR_BASE_HI;
4181     + msg->address_lo =
4182     + MSI_ADDR_BASE_LO |
4183     + ((INT_DEST_MODE == 0) ?
4184     + MSI_ADDR_DEST_MODE_PHYSICAL:
4185     + MSI_ADDR_DEST_MODE_LOGICAL) |
4186     + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
4187     + MSI_ADDR_REDIRECTION_CPU:
4188     + MSI_ADDR_REDIRECTION_LOWPRI) |
4189     + MSI_ADDR_DEST_ID(dest);
4190     +
4191     + msg->data =
4192     + MSI_DATA_TRIGGER_EDGE |
4193     + MSI_DATA_LEVEL_ASSERT |
4194     + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
4195     + MSI_DATA_DELIVERY_FIXED:
4196     + MSI_DATA_DELIVERY_LOWPRI) |
4197     + MSI_DATA_VECTOR(vector);
4198     + }
4199     + return vector;
4200     +}
4201     +
4202     +#ifdef CONFIG_SMP
4203     +static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
4204     +{
4205     + struct msi_msg msg;
4206     + unsigned int dest;
4207     + cpumask_t tmp;
4208     + int vector;
4209     +
4210     + cpus_and(tmp, mask, cpu_online_map);
4211     + if (cpus_empty(tmp))
4212     + tmp = TARGET_CPUS;
4213     +
4214     + cpus_and(mask, tmp, CPU_MASK_ALL);
4215     +
4216     + vector = assign_irq_vector(irq, mask, &tmp);
4217     + if (vector < 0)
4218     + return;
4219     +
4220     + dest = cpu_mask_to_apicid(tmp);
4221     +
4222     + read_msi_msg(irq, &msg);
4223     +
4224     + msg.data &= ~MSI_DATA_VECTOR_MASK;
4225     + msg.data |= MSI_DATA_VECTOR(vector);
4226     + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
4227     + msg.address_lo |= MSI_ADDR_DEST_ID(dest);
4228     +
4229     + write_msi_msg(irq, &msg);
4230     + set_native_irq_info(irq, mask);
4231     +}
4232     +#endif /* CONFIG_SMP */
4233     +
4234     +/*
4235     + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
4236     + * which implement the MSI or MSI-X Capability Structure.
4237     + */
4238     +static struct irq_chip msi_chip = {
4239     + .name = "PCI-MSI",
4240     + .unmask = unmask_msi_irq,
4241     + .mask = mask_msi_irq,
4242     + .ack = ack_apic_edge,
4243     +#ifdef CONFIG_SMP
4244     + .set_affinity = set_msi_irq_affinity,
4245     +#endif
4246     + .retrigger = ioapic_retrigger_irq,
4247     +};
4248     +
4249     +int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
4250     +{
4251     + struct msi_msg msg;
4252     + int ret;
4253     + ret = msi_compose_msg(dev, irq, &msg);
4254     + if (ret < 0)
4255     + return ret;
4256     +
4257     + write_msi_msg(irq, &msg);
4258     +
4259     + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
4260     +
4261     + return 0;
4262     +}
4263     +
4264     +void arch_teardown_msi_irq(unsigned int irq)
4265     +{
4266     + return;
4267     +}
4268     +
4269     +#endif /* CONFIG_PCI_MSI */
4270     +
4271     +/*
4272     + * Hypertransport interrupt support
4273     + */
4274     +#ifdef CONFIG_HT_IRQ
4275     +
4276     +#ifdef CONFIG_SMP
4277     +
4278     +static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
4279     +{
4280     + struct ht_irq_msg msg;
4281     + fetch_ht_irq_msg(irq, &msg);
4282     +
4283     + msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
4284     + msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
4285    
4286     - return reg_01.bits.version;
4287     + msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
4288     + msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
4289     +
4290     + write_ht_irq_msg(irq, &msg);
4291     }
4292    
4293     +static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
4294     +{
4295     + unsigned int dest;
4296     + cpumask_t tmp;
4297     + int vector;
4298     +
4299     + cpus_and(tmp, mask, cpu_online_map);
4300     + if (cpus_empty(tmp))
4301     + tmp = TARGET_CPUS;
4302     +
4303     + cpus_and(mask, tmp, CPU_MASK_ALL);
4304     +
4305     + vector = assign_irq_vector(irq, mask, &tmp);
4306     + if (vector < 0)
4307     + return;
4308     +
4309     + dest = cpu_mask_to_apicid(tmp);
4310     +
4311     + target_ht_irq(irq, dest, vector);
4312     + set_native_irq_info(irq, mask);
4313     +}
4314     +#endif
4315     +
4316     +static struct irq_chip ht_irq_chip = {
4317     + .name = "PCI-HT",
4318     + .mask = mask_ht_irq,
4319     + .unmask = unmask_ht_irq,
4320     + .ack = ack_apic_edge,
4321     +#ifdef CONFIG_SMP
4322     + .set_affinity = set_ht_irq_affinity,
4323     +#endif
4324     + .retrigger = ioapic_retrigger_irq,
4325     +};
4326     +
4327     +int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
4328     +{
4329     + int vector;
4330     + cpumask_t tmp;
4331     +
4332     + vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
4333     + if (vector >= 0) {
4334     + struct ht_irq_msg msg;
4335     + unsigned dest;
4336     +
4337     + dest = cpu_mask_to_apicid(tmp);
4338     +
4339     + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
4340     +
4341     + msg.address_lo =
4342     + HT_IRQ_LOW_BASE |
4343     + HT_IRQ_LOW_DEST_ID(dest) |
4344     + HT_IRQ_LOW_VECTOR(vector) |
4345     + ((INT_DEST_MODE == 0) ?
4346     + HT_IRQ_LOW_DM_PHYSICAL :
4347     + HT_IRQ_LOW_DM_LOGICAL) |
4348     + HT_IRQ_LOW_RQEOI_EDGE |
4349     + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
4350     + HT_IRQ_LOW_MT_FIXED :
4351     + HT_IRQ_LOW_MT_ARBITRATED) |
4352     + HT_IRQ_LOW_IRQ_MASKED;
4353     +
4354     + write_ht_irq_msg(irq, &msg);
4355     +
4356     + set_irq_chip_and_handler_name(irq, &ht_irq_chip,
4357     + handle_edge_irq, "edge");
4358     + }
4359     + return vector;
4360     +}
4361     +#endif /* CONFIG_HT_IRQ */
4362     +
4363     +/* --------------------------------------------------------------------------
4364     + ACPI-based IOAPIC Configuration
4365     + -------------------------------------------------------------------------- */
4366     +
4367     +#ifdef CONFIG_ACPI
4368     +
4369     +#define IO_APIC_MAX_ID 0xFE
4370    
4371     int __init io_apic_get_redir_entries (int ioapic)
4372     {
4373     @@ -2180,6 +2045,8 @@
4374     {
4375     struct IO_APIC_route_entry entry;
4376     unsigned long flags;
4377     + int vector;
4378     + cpumask_t mask;
4379    
4380     if (!IO_APIC_IRQ(irq)) {
4381     apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
4382     @@ -2188,6 +2055,17 @@
4383     }
4384    
4385     /*
4386     + * IRQs < 16 are already in the irq_2_pin[] map
4387     + */
4388     + if (irq >= 16)
4389     + add_pin_to_irq(irq, ioapic, pin);
4390     +
4391     +
4392     + vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
4393     + if (vector < 0)
4394     + return vector;
4395     +
4396     + /*
4397     * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
4398     * Note that we mask (disable) IRQs now -- these get enabled when the
4399     * corresponding device driver registers for this IRQ.
4400     @@ -2197,19 +2075,11 @@
4401    
4402     entry.delivery_mode = INT_DELIVERY_MODE;
4403     entry.dest_mode = INT_DEST_MODE;
4404     - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
4405     + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
4406     entry.trigger = edge_level;
4407     entry.polarity = active_high_low;
4408     entry.mask = 1; /* Disabled (masked) */
4409     -
4410     - irq = gsi_irq_sharing(irq);
4411     - /*
4412     - * IRQs < 16 are already in the irq_2_pin[] map
4413     - */
4414     - if (irq >= 16)
4415     - add_pin_to_irq(irq, ioapic, pin);
4416     -
4417     - entry.vector = assign_irq_vector(irq);
4418     + entry.vector = vector & 0xff;
4419    
4420     apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
4421     "IRQ %d Mode:%i Active:%i)\n", ioapic,
4422     @@ -2221,10 +2091,10 @@
4423     if (!ioapic && (irq < 16))
4424     disable_8259A_irq(irq);
4425    
4426     + ioapic_write_entry(ioapic, pin, entry);
4427     +
4428     spin_lock_irqsave(&ioapic_lock, flags);
4429     - io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
4430     - io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
4431     - set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
4432     + set_native_irq_info(irq, TARGET_CPUS);
4433     spin_unlock_irqrestore(&ioapic_lock, flags);
4434    
4435     return 0;
4436     --- a/arch/x86/kernel/ioport_64-xen.c
4437     +++ b/arch/x86/kernel/ioport_64-xen.c
4438     @@ -58,6 +58,7 @@
4439    
4440     memset(bitmap, 0xff, IO_BITMAP_BYTES);
4441     t->io_bitmap_ptr = bitmap;
4442     + set_thread_flag(TIF_IO_BITMAP);
4443    
4444     set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap);
4445     set_iobitmap.nr_ports = IO_BITMAP_BITS;
4446     --- a/arch/x86/kernel/irq_32-xen.c
4447     +++ b/arch/x86/kernel/irq_32-xen.c
4448     @@ -53,8 +53,10 @@
4449     */
4450     fastcall unsigned int do_IRQ(struct pt_regs *regs)
4451     {
4452     + struct pt_regs *old_regs;
4453     /* high bit used in ret_from_ code */
4454     int irq = ~regs->orig_eax;
4455     + struct irq_desc *desc = irq_desc + irq;
4456     #ifdef CONFIG_4KSTACKS
4457     union irq_ctx *curctx, *irqctx;
4458     u32 *isp;
4459     @@ -66,6 +68,7 @@
4460     BUG();
4461     }
4462    
4463     + old_regs = set_irq_regs(regs);
4464     irq_enter();
4465     #ifdef CONFIG_DEBUG_STACKOVERFLOW
4466     /* Debugging check for stack overflow: is there less than 1KB free? */
4467     @@ -110,19 +113,20 @@
4468     (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
4469    
4470     asm volatile(
4471     - " xchgl %%ebx,%%esp \n"
4472     - " call __do_IRQ \n"
4473     + " xchgl %%ebx,%%esp \n"
4474     + " call *%%edi \n"
4475     " movl %%ebx,%%esp \n"
4476     : "=a" (arg1), "=d" (arg2), "=b" (ebx)
4477     - : "0" (irq), "1" (regs), "2" (isp)
4478     - : "memory", "cc", "ecx"
4479     + : "0" (irq), "1" (desc), "2" (isp),
4480     + "D" (desc->handle_irq)
4481     + : "memory", "cc"
4482     );
4483     } else
4484     #endif
4485     - __do_IRQ(irq, regs);
4486     + desc->handle_irq(irq, desc);
4487    
4488     irq_exit();
4489     -
4490     + set_irq_regs(old_regs);
4491     return 1;
4492     }
4493    
4494     @@ -253,7 +257,8 @@
4495     for_each_online_cpu(j)
4496     seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
4497     #endif
4498     - seq_printf(p, " %14s", irq_desc[i].chip->typename);
4499     + seq_printf(p, " %8s", irq_desc[i].chip->name);
4500     + seq_printf(p, "-%-8s", irq_desc[i].name);
4501     seq_printf(p, " %s", action->name);
4502    
4503     for (action=action->next; action; action = action->next)
4504     --- a/arch/x86/kernel/irq_64-xen.c
4505     +++ b/arch/x86/kernel/irq_64-xen.c
4506     @@ -20,11 +20,6 @@
4507     #include <asm/idle.h>
4508    
4509     atomic_t irq_err_count;
4510     -#ifdef CONFIG_X86_IO_APIC
4511     -#ifdef APIC_MISMATCH_DEBUG
4512     -atomic_t irq_mis_count;
4513     -#endif
4514     -#endif
4515    
4516     #ifdef CONFIG_DEBUG_STACKOVERFLOW
4517     /*
4518     @@ -79,7 +74,8 @@
4519     for_each_online_cpu(j)
4520     seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
4521     #endif
4522     - seq_printf(p, " %14s", irq_desc[i].chip->typename);
4523     + seq_printf(p, " %8s", irq_desc[i].chip->name);
4524     + seq_printf(p, "-%-8s", irq_desc[i].name);
4525    
4526     seq_printf(p, " %s", action->name);
4527     for (action=action->next; action; action = action->next)
4528     @@ -99,11 +95,6 @@
4529     seq_putc(p, '\n');
4530     #endif
4531     seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
4532     -#ifdef CONFIG_X86_IO_APIC
4533     -#ifdef APIC_MISMATCH_DEBUG
4534     - seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
4535     -#endif
4536     -#endif
4537     }
4538     return 0;
4539     }
4540     @@ -114,24 +105,28 @@
4541     * handlers).
4542     */
4543     asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
4544     -{
4545     +{
4546     + struct pt_regs *old_regs = set_irq_regs(regs);
4547     +
4548     /* high bit used in ret_from_ code */
4549     unsigned irq = ~regs->orig_rax;
4550    
4551     - if (unlikely(irq >= NR_IRQS)) {
4552     - printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
4553     - __FUNCTION__, irq);
4554     - BUG();
4555     - }
4556     -
4557     exit_idle();
4558     irq_enter();
4559     +
4560     #ifdef CONFIG_DEBUG_STACKOVERFLOW
4561     stack_overflow_check(regs);
4562     #endif
4563     - __do_IRQ(irq, regs);
4564     +
4565     + if (likely(irq < NR_IRQS))
4566     + generic_handle_irq(irq);
4567     + else
4568     + printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n",
4569     + __func__, smp_processor_id(), irq);
4570     +
4571     irq_exit();
4572    
4573     + set_irq_regs(old_regs);
4574     return 1;
4575     }
4576    
4577     @@ -192,6 +187,6 @@
4578     */
4579     void ack_bad_irq(unsigned int irq)
4580     {
4581     - printk("unexpected IRQ trap at vector %02x\n", irq);
4582     + printk("unexpected IRQ trap at irq %02x\n", irq);
4583     }
4584     #endif
4585     --- a/arch/x86/kernel/ldt_32-xen.c
4586     +++ b/arch/x86/kernel/ldt_32-xen.c
4587     @@ -1,5 +1,5 @@
4588     /*
4589     - * linux/kernel/ldt.c
4590     + * linux/arch/i386/kernel/ldt.c
4591     *
4592     * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
4593     * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
4594     --- a/arch/x86/kernel/microcode-xen.c
4595     +++ b/arch/x86/kernel/microcode-xen.c
4596     @@ -2,6 +2,7 @@
4597     * Intel CPU Microcode Update Driver for Linux
4598     *
4599     * Copyright (C) 2000-2004 Tigran Aivazian
4600     + * 2006 Shaohua Li <shaohua.li@intel.com>
4601     *
4602     * This driver allows to upgrade microcode on Intel processors
4603     * belonging to IA-32 family - PentiumPro, Pentium II,
4604     @@ -33,7 +34,9 @@
4605     #include <linux/spinlock.h>
4606     #include <linux/mm.h>
4607     #include <linux/mutex.h>
4608     -#include <linux/syscalls.h>
4609     +#include <linux/cpu.h>
4610     +#include <linux/firmware.h>
4611     +#include <linux/platform_device.h>
4612    
4613     #include <asm/msr.h>
4614     #include <asm/uaccess.h>
4615     @@ -55,12 +58,7 @@
4616     /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
4617     static DEFINE_MUTEX(microcode_mutex);
4618    
4619     -static int microcode_open (struct inode *unused1, struct file *unused2)
4620     -{
4621     - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
4622     -}
4623     -
4624     -
4625     +#ifdef CONFIG_MICROCODE_OLD_INTERFACE
4626     static int do_microcode_update (const void __user *ubuf, size_t len)
4627     {
4628     int err;
4629     @@ -85,6 +83,11 @@
4630     return err;
4631     }
4632    
4633     +static int microcode_open (struct inode *unused1, struct file *unused2)
4634     +{
4635     + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
4636     +}
4637     +
4638     static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
4639     {
4640     ssize_t ret;
4641     @@ -117,7 +120,7 @@
4642     .fops = &microcode_fops,
4643     };
4644    
4645     -static int __init microcode_init (void)
4646     +static int __init microcode_dev_init (void)
4647     {
4648     int error;
4649    
4650     @@ -129,6 +132,68 @@
4651     return error;
4652     }
4653    
4654     + return 0;
4655     +}
4656     +
4657     +static void __exit microcode_dev_exit (void)
4658     +{
4659     + misc_deregister(&microcode_dev);
4660     +}
4661     +
4662     +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
4663     +#else
4664     +#define microcode_dev_init() 0
4665     +#define microcode_dev_exit() do { } while(0)
4666     +#endif
4667     +
4668     +/* fake device for request_firmware */
4669     +static struct platform_device *microcode_pdev;
4670     +
4671     +static int request_microcode(void)
4672     +{
4673     + char name[30];
4674     + const struct cpuinfo_x86 *c = &boot_cpu_data;
4675     + const struct firmware *firmware;
4676     + int error;
4677     + struct xen_platform_op op;
4678     +
4679     + sprintf(name,"intel-ucode/%02x-%02x-%02x",
4680     + c->x86, c->x86_model, c->x86_mask);
4681     + error = request_firmware(&firmware, name, &microcode_pdev->dev);
4682     + if (error) {
4683     + pr_debug("ucode data file %s load failed\n", name);
4684     + return error;
4685     + }
4686     +
4687     + op.cmd = XENPF_microcode_update;
4688     + set_xen_guest_handle(op.u.microcode.data, (void *)firmware->data);
4689     + op.u.microcode.length = firmware->size;
4690     + error = HYPERVISOR_platform_op(&op);
4691     +
4692     + release_firmware(firmware);
4693     +
4694     + if (error)
4695     + pr_debug("ucode load failed\n");
4696     +
4697     + return error;
4698     +}
4699     +
4700     +static int __init microcode_init (void)
4701     +{
4702     + int error;
4703     +
4704     + error = microcode_dev_init();
4705     + if (error)
4706     + return error;
4707     + microcode_pdev = platform_device_register_simple("microcode", -1,
4708     + NULL, 0);
4709     + if (IS_ERR(microcode_pdev)) {
4710     + microcode_dev_exit();
4711     + return PTR_ERR(microcode_pdev);
4712     + }
4713     +
4714     + request_microcode();
4715     +
4716     printk(KERN_INFO
4717     "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
4718     return 0;
4719     @@ -136,9 +201,9 @@
4720    
4721     static void __exit microcode_exit (void)
4722     {
4723     - misc_deregister(&microcode_dev);
4724     + microcode_dev_exit();
4725     + platform_device_unregister(microcode_pdev);
4726     }
4727    
4728     module_init(microcode_init)
4729     module_exit(microcode_exit)
4730     -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
4731     --- a/arch/x86/kernel/mpparse_32-xen.c
4732     +++ b/arch/x86/kernel/mpparse_32-xen.c
4733     @@ -30,6 +30,7 @@
4734     #include <asm/io_apic.h>
4735    
4736     #include <mach_apic.h>
4737     +#include <mach_apicdef.h>
4738     #include <mach_mpparse.h>
4739     #include <bios_ebda.h>
4740    
4741     @@ -68,7 +69,7 @@
4742     /* Processor that is doing the boot up */
4743     unsigned int boot_cpu_physical_apicid = -1U;
4744     /* Internal processor count */
4745     -static unsigned int __devinitdata num_processors;
4746     +unsigned int __cpuinitdata num_processors;
4747    
4748     /* Bitmask of physically existing CPUs */
4749     physid_mask_t phys_cpu_present_map;
4750     @@ -235,12 +236,14 @@
4751    
4752     mpc_oem_bus_info(m, str, translation_table[mpc_record]);
4753    
4754     +#if MAX_MP_BUSSES < 256
4755     if (m->mpc_busid >= MAX_MP_BUSSES) {
4756     printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
4757     " is too large, max. supported is %d\n",
4758     m->mpc_busid, str, MAX_MP_BUSSES - 1);
4759     return;
4760     }
4761     +#endif
4762    
4763     if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
4764     mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
4765     @@ -300,19 +303,6 @@
4766     m->mpc_irqtype, m->mpc_irqflag & 3,
4767     (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
4768     m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
4769     - /*
4770     - * Well it seems all SMP boards in existence
4771     - * use ExtINT/LVT1 == LINT0 and
4772     - * NMI/LVT2 == LINT1 - the following check
4773     - * will show us if this assumptions is false.
4774     - * Until then we do not have to add baggage.
4775     - */
4776     - if ((m->mpc_irqtype == mp_ExtINT) &&
4777     - (m->mpc_destapiclint != 0))
4778     - BUG();
4779     - if ((m->mpc_irqtype == mp_NMI) &&
4780     - (m->mpc_destapiclint != 1))
4781     - BUG();
4782     }
4783    
4784     #ifdef CONFIG_X86_NUMAQ
4785     @@ -838,8 +828,7 @@
4786    
4787     #ifdef CONFIG_ACPI
4788    
4789     -void __init mp_register_lapic_address (
4790     - u64 address)
4791     +void __init mp_register_lapic_address(u64 address)
4792     {
4793     #ifndef CONFIG_XEN
4794     mp_lapic_addr = (unsigned long) address;
4795     @@ -853,13 +842,10 @@
4796     #endif
4797     }
4798    
4799     -
4800     -void __devinit mp_register_lapic (
4801     - u8 id,
4802     - u8 enabled)
4803     +void __devinit mp_register_lapic (u8 id, u8 enabled)
4804     {
4805     struct mpc_config_processor processor;
4806     - int boot_cpu = 0;
4807     + int boot_cpu = 0;
4808    
4809     if (MAX_APICS - id <= 0) {
4810     printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
4811     @@ -898,11 +884,9 @@
4812     u32 pin_programmed[4];
4813     } mp_ioapic_routing[MAX_IO_APICS];
4814    
4815     -
4816     -static int mp_find_ioapic (
4817     - int gsi)
4818     +static int mp_find_ioapic (int gsi)
4819     {
4820     - int i = 0;
4821     + int i = 0;
4822    
4823     /* Find the IOAPIC that manages this GSI. */
4824     for (i = 0; i < nr_ioapics; i++) {
4825     @@ -915,15 +899,11 @@
4826    
4827     return -1;
4828     }
4829     -
4830    
4831     -void __init mp_register_ioapic (
4832     - u8 id,
4833     - u32 address,
4834     - u32 gsi_base)
4835     +void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
4836     {
4837     - int idx = 0;
4838     - int tmpid;
4839     + int idx = 0;
4840     + int tmpid;
4841    
4842     if (nr_ioapics >= MAX_IO_APICS) {
4843     printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
4844     @@ -971,16 +951,10 @@
4845     mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
4846     mp_ioapic_routing[idx].gsi_base,
4847     mp_ioapic_routing[idx].gsi_end);
4848     -
4849     - return;
4850     }
4851    
4852     -
4853     -void __init mp_override_legacy_irq (
4854     - u8 bus_irq,
4855     - u8 polarity,
4856     - u8 trigger,
4857     - u32 gsi)
4858     +void __init
4859     +mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
4860     {
4861     struct mpc_config_intsrc intsrc;
4862     int ioapic = -1;
4863     @@ -1018,15 +992,13 @@
4864     mp_irqs[mp_irq_entries] = intsrc;
4865     if (++mp_irq_entries == MAX_IRQ_SOURCES)
4866     panic("Max # of irq sources exceeded!\n");
4867     -
4868     - return;
4869     }
4870    
4871     void __init mp_config_acpi_legacy_irqs (void)
4872     {
4873     struct mpc_config_intsrc intsrc;
4874     - int i = 0;
4875     - int ioapic = -1;
4876     + int i = 0;
4877     + int ioapic = -1;
4878    
4879     /*
4880     * Fabricate the legacy ISA bus (bus #31).
4881     @@ -1095,12 +1067,12 @@
4882    
4883     #define MAX_GSI_NUM 4096
4884    
4885     -int mp_register_gsi (u32 gsi, int triggering, int polarity)
4886     +int mp_register_gsi(u32 gsi, int triggering, int polarity)
4887     {
4888     - int ioapic = -1;
4889     - int ioapic_pin = 0;
4890     - int idx, bit = 0;
4891     - static int pci_irq = 16;
4892     + int ioapic = -1;
4893     + int ioapic_pin = 0;
4894     + int idx, bit = 0;
4895     + static int pci_irq = 16;
4896     /*
4897     * Mapping between Global System Interrups, which
4898     * represent all possible interrupts, and IRQs
4899     --- a/arch/x86/kernel/mpparse_64-xen.c
4900     +++ b/arch/x86/kernel/mpparse_64-xen.c
4901     @@ -41,8 +41,7 @@
4902     * Various Linux-internal data structures created from the
4903     * MP-table.
4904     */
4905     -unsigned char apic_version [MAX_APICS];
4906     -unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
4907     +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
4908     int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
4909    
4910     static int mp_current_pci_id = 0;
4911     @@ -56,7 +55,6 @@
4912     int mp_irq_entries;
4913    
4914     int nr_ioapics;
4915     -int pic_mode;
4916     unsigned long mp_lapic_addr = 0;
4917    
4918    
4919     @@ -71,19 +69,6 @@
4920     /* Bitmask of physically existing CPUs */
4921     physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
4922    
4923     -/* ACPI MADT entry parsing functions */
4924     -#ifdef CONFIG_ACPI
4925     -extern struct acpi_boot_flags acpi_boot;
4926     -#ifdef CONFIG_X86_LOCAL_APIC
4927     -extern int acpi_parse_lapic (acpi_table_entry_header *header);
4928     -extern int acpi_parse_lapic_addr_ovr (acpi_table_entry_header *header);
4929     -extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header);
4930     -#endif /*CONFIG_X86_LOCAL_APIC*/
4931     -#ifdef CONFIG_X86_IO_APIC
4932     -extern int acpi_parse_ioapic (acpi_table_entry_header *header);
4933     -#endif /*CONFIG_X86_IO_APIC*/
4934     -#endif /*CONFIG_ACPI*/
4935     -
4936     u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
4937    
4938    
4939     @@ -109,24 +94,20 @@
4940     static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
4941     {
4942     int cpu;
4943     - unsigned char ver;
4944     cpumask_t tmp_map;
4945     + char *bootup_cpu = "";
4946    
4947     if (!(m->mpc_cpuflag & CPU_ENABLED)) {
4948     disabled_cpus++;
4949     return;
4950     }
4951     -
4952     - printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
4953     - m->mpc_apicid,
4954     - (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8,
4955     - (m->mpc_cpufeature & CPU_MODEL_MASK)>>4,
4956     - m->mpc_apicver);
4957     -
4958     if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
4959     - Dprintk(" Bootup CPU\n");
4960     + bootup_cpu = " (Bootup-CPU)";
4961     boot_cpu_id = m->mpc_apicid;
4962     }
4963     +
4964     + printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
4965     +
4966     if (num_processors >= NR_CPUS) {
4967     printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
4968     " Processor ignored.\n", NR_CPUS);
4969     @@ -137,24 +118,7 @@
4970     cpus_complement(tmp_map, cpu_present_map);
4971     cpu = first_cpu(tmp_map);
4972    
4973     -#if MAX_APICS < 255
4974     - if ((int)m->mpc_apicid > MAX_APICS) {
4975     - printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
4976     - m->mpc_apicid, MAX_APICS);
4977     - return;
4978     - }
4979     -#endif
4980     - ver = m->mpc_apicver;
4981     -
4982     physid_set(m->mpc_apicid, phys_cpu_present_map);
4983     - /*
4984     - * Validate version
4985     - */
4986     - if (ver == 0x0) {
4987     - printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
4988     - ver = 0x10;
4989     - }
4990     - apic_version[m->mpc_apicid] = ver;
4991     if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
4992     /*
4993     * bios_cpu_apicid is required to have processors listed
4994     @@ -185,37 +149,42 @@
4995     Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
4996    
4997     if (strncmp(str, "ISA", 3) == 0) {
4998     - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
4999     - } else if (strncmp(str, "EISA", 4) == 0) {
5000     - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
5001     + set_bit(m->mpc_busid, mp_bus_not_pci);
5002     } else if (strncmp(str, "PCI", 3) == 0) {
5003     - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
5004     + clear_bit(m->mpc_busid, mp_bus_not_pci);
5005     mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
5006     mp_current_pci_id++;
5007     - } else if (strncmp(str, "MCA", 3) == 0) {
5008     - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
5009     } else {
5010     printk(KERN_ERR "Unknown bustype %s\n", str);
5011     }
5012     }
5013    
5014     +static int bad_ioapic(unsigned long address)
5015     +{
5016     + if (nr_ioapics >= MAX_IO_APICS) {
5017     + printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
5018     + "(found %d)\n", MAX_IO_APICS, nr_ioapics);
5019     + panic("Recompile kernel with bigger MAX_IO_APICS!\n");
5020     + }
5021     + if (!address) {
5022     + printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
5023     + " found in table, skipping!\n");
5024     + return 1;
5025     + }
5026     + return 0;
5027     +}
5028     +
5029     static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
5030     {
5031     if (!(m->mpc_flags & MPC_APIC_USABLE))
5032     return;
5033    
5034     - printk("I/O APIC #%d Version %d at 0x%X.\n",
5035     - m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
5036     - if (nr_ioapics >= MAX_IO_APICS) {
5037     - printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n",
5038     - MAX_IO_APICS, nr_ioapics);
5039     - panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
5040     - }
5041     - if (!m->mpc_apicaddr) {
5042     - printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
5043     - " found in MP table, skipping!\n");
5044     + printk("I/O APIC #%d at 0x%X.\n",
5045     + m->mpc_apicid, m->mpc_apicaddr);
5046     +
5047     + if (bad_ioapic(m->mpc_apicaddr))
5048     return;
5049     - }
5050     +
5051     mp_ioapics[nr_ioapics] = *m;
5052     nr_ioapics++;
5053     }
5054     @@ -239,19 +208,6 @@
5055     m->mpc_irqtype, m->mpc_irqflag & 3,
5056     (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
5057     m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
5058     - /*
5059     - * Well it seems all SMP boards in existence
5060     - * use ExtINT/LVT1 == LINT0 and
5061     - * NMI/LVT2 == LINT1 - the following check
5062     - * will show us if this assumptions is false.
5063     - * Until then we do not have to add baggage.
5064     - */
5065     - if ((m->mpc_irqtype == mp_ExtINT) &&
5066     - (m->mpc_destapiclint != 0))
5067     - BUG();
5068     - if ((m->mpc_irqtype == mp_NMI) &&
5069     - (m->mpc_destapiclint != 1))
5070     - BUG();
5071     }
5072    
5073     /*
5074     @@ -265,7 +221,7 @@
5075     unsigned char *mpt=((unsigned char *)mpc)+count;
5076    
5077     if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
5078     - printk("SMP mptable: bad signature [%c%c%c%c]!\n",
5079     + printk("MPTABLE: bad signature [%c%c%c%c]!\n",
5080     mpc->mpc_signature[0],
5081     mpc->mpc_signature[1],
5082     mpc->mpc_signature[2],
5083     @@ -273,31 +229,31 @@
5084     return 0;
5085     }
5086     if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
5087     - printk("SMP mptable: checksum error!\n");
5088     + printk("MPTABLE: checksum error!\n");
5089     return 0;
5090     }
5091     if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
5092     - printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
5093     + printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
5094     mpc->mpc_spec);
5095     return 0;
5096     }
5097     if (!mpc->mpc_lapic) {
5098     - printk(KERN_ERR "SMP mptable: null local APIC address!\n");
5099     + printk(KERN_ERR "MPTABLE: null local APIC address!\n");
5100     return 0;
5101     }
5102     memcpy(str,mpc->mpc_oem,8);
5103     - str[8]=0;
5104     - printk(KERN_INFO "OEM ID: %s ",str);
5105     + str[8] = 0;
5106     + printk(KERN_INFO "MPTABLE: OEM ID: %s ",str);
5107    
5108     memcpy(str,mpc->mpc_productid,12);
5109     - str[12]=0;
5110     - printk("Product ID: %s ",str);
5111     + str[12] = 0;
5112     + printk("MPTABLE: Product ID: %s ",str);
5113    
5114     - printk("APIC at: 0x%X\n",mpc->mpc_lapic);
5115     + printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic);
5116    
5117     /* save the local APIC address, it might be non-default */
5118     if (!acpi_lapic)
5119     - mp_lapic_addr = mpc->mpc_lapic;
5120     + mp_lapic_addr = mpc->mpc_lapic;
5121    
5122     /*
5123     * Now process the configuration blocks.
5124     @@ -309,7 +265,7 @@
5125     struct mpc_config_processor *m=
5126     (struct mpc_config_processor *)mpt;
5127     if (!acpi_lapic)
5128     - MP_processor_info(m);
5129     + MP_processor_info(m);
5130     mpt += sizeof(*m);
5131     count += sizeof(*m);
5132     break;
5133     @@ -328,8 +284,8 @@
5134     struct mpc_config_ioapic *m=
5135     (struct mpc_config_ioapic *)mpt;
5136     MP_ioapic_info(m);
5137     - mpt+=sizeof(*m);
5138     - count+=sizeof(*m);
5139     + mpt += sizeof(*m);
5140     + count += sizeof(*m);
5141     break;
5142     }
5143     case MP_INTSRC:
5144     @@ -338,8 +294,8 @@
5145     (struct mpc_config_intsrc *)mpt;
5146    
5147     MP_intsrc_info(m);
5148     - mpt+=sizeof(*m);
5149     - count+=sizeof(*m);
5150     + mpt += sizeof(*m);
5151     + count += sizeof(*m);
5152     break;
5153     }
5154     case MP_LINTSRC:
5155     @@ -347,15 +303,15 @@
5156     struct mpc_config_lintsrc *m=
5157     (struct mpc_config_lintsrc *)mpt;
5158     MP_lintsrc_info(m);
5159     - mpt+=sizeof(*m);
5160     - count+=sizeof(*m);
5161     + mpt += sizeof(*m);
5162     + count += sizeof(*m);
5163     break;
5164     }
5165     }
5166     }
5167     clustered_apic_check();
5168     if (!num_processors)
5169     - printk(KERN_ERR "SMP mptable: no processors registered!\n");
5170     + printk(KERN_ERR "MPTABLE: no processors registered!\n");
5171     return num_processors;
5172     }
5173    
5174     @@ -451,13 +407,10 @@
5175     * 2 CPUs, numbered 0 & 1.
5176     */
5177     processor.mpc_type = MP_PROCESSOR;
5178     - /* Either an integrated APIC or a discrete 82489DX. */
5179     - processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
5180     + processor.mpc_apicver = 0;
5181     processor.mpc_cpuflag = CPU_ENABLED;
5182     - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
5183     - (boot_cpu_data.x86_model << 4) |
5184     - boot_cpu_data.x86_mask;
5185     - processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
5186     + processor.mpc_cpufeature = 0;
5187     + processor.mpc_featureflag = 0;
5188     processor.mpc_reserved[0] = 0;
5189     processor.mpc_reserved[1] = 0;
5190     for (i = 0; i < 2; i++) {
5191     @@ -476,14 +429,6 @@
5192     case 5:
5193     memcpy(bus.mpc_bustype, "ISA ", 6);
5194     break;
5195     - case 2:
5196     - case 6:
5197     - case 3:
5198     - memcpy(bus.mpc_bustype, "EISA ", 6);
5199     - break;
5200     - case 4:
5201     - case 7:
5202     - memcpy(bus.mpc_bustype, "MCA ", 6);
5203     }
5204     MP_bus_info(&bus);
5205     if (mpc_default_type > 4) {
5206     @@ -494,7 +439,7 @@
5207    
5208     ioapic.mpc_type = MP_IOAPIC;
5209     ioapic.mpc_apicid = 2;
5210     - ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
5211     + ioapic.mpc_apicver = 0;
5212     ioapic.mpc_flags = MPC_APIC_USABLE;
5213     ioapic.mpc_apicaddr = 0xFEC00000;
5214     MP_ioapic_info(&ioapic);
5215     @@ -537,13 +482,6 @@
5216     printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
5217    
5218     printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
5219     - if (mpf->mpf_feature2 & (1<<7)) {
5220     - printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
5221     - pic_mode = 1;
5222     - } else {
5223     - printk(KERN_INFO " Virtual Wire compatibility mode.\n");
5224     - pic_mode = 0;
5225     - }
5226    
5227     /*
5228     * Now see if we need to read further.
5229     @@ -620,7 +558,7 @@
5230     return 0;
5231     }
5232    
5233     -void __init find_intel_smp (void)
5234     +void __init find_smp_config(void)
5235     {
5236     unsigned int address;
5237    
5238     @@ -637,9 +575,7 @@
5239     smp_scan_config(0xF0000,0x10000))
5240     return;
5241     /*
5242     - * If it is an SMP machine we should know now, unless the
5243     - * configuration is in an EISA/MCA bus machine with an
5244     - * extended bios data area.
5245     + * If it is an SMP machine we should know now.
5246     *
5247     * there is a real-mode segmented pointer pointing to the
5248     * 4K EBDA area at 0x40E, calculate and scan it here.
5249     @@ -660,64 +596,38 @@
5250     printk(KERN_INFO "No mptable found.\n");
5251     }
5252    
5253     -/*
5254     - * - Intel MP Configuration Table
5255     - */
5256     -void __init find_smp_config (void)
5257     -{
5258     -#ifdef CONFIG_X86_LOCAL_APIC
5259     - find_intel_smp();
5260     -#endif
5261     -}
5262     -
5263     -
5264     /* --------------------------------------------------------------------------
5265     ACPI-based MP Configuration
5266     -------------------------------------------------------------------------- */
5267    
5268     #ifdef CONFIG_ACPI
5269    
5270     -void __init mp_register_lapic_address (
5271     - u64 address)
5272     +void __init mp_register_lapic_address(u64 address)
5273     {
5274     #ifndef CONFIG_XEN
5275     mp_lapic_addr = (unsigned long) address;
5276     -
5277     set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
5278     -
5279     if (boot_cpu_id == -1U)
5280     boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
5281     -
5282     - Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
5283     #endif
5284     }
5285    
5286     -
5287     -void __cpuinit mp_register_lapic (
5288     - u8 id,
5289     - u8 enabled)
5290     +void __cpuinit mp_register_lapic (u8 id, u8 enabled)
5291     {
5292     struct mpc_config_processor processor;
5293     int boot_cpu = 0;
5294    
5295     - if (id >= MAX_APICS) {
5296     - printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
5297     - id, MAX_APICS);
5298     - return;
5299     - }
5300     -
5301     - if (id == boot_cpu_physical_apicid)
5302     + if (id == boot_cpu_id)
5303     boot_cpu = 1;
5304    
5305     #ifndef CONFIG_XEN
5306     processor.mpc_type = MP_PROCESSOR;
5307     processor.mpc_apicid = id;
5308     - processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
5309     + processor.mpc_apicver = 0;
5310     processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
5311     processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
5312     - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
5313     - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
5314     - processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
5315     + processor.mpc_cpufeature = 0;
5316     + processor.mpc_featureflag = 0;
5317     processor.mpc_reserved[0] = 0;
5318     processor.mpc_reserved[1] = 0;
5319     #endif
5320     @@ -725,8 +635,6 @@
5321     MP_processor_info(&processor);
5322     }
5323    
5324     -#ifdef CONFIG_X86_IO_APIC
5325     -
5326     #define MP_ISA_BUS 0
5327     #define MP_MAX_IOAPIC_PIN 127
5328    
5329     @@ -737,11 +645,9 @@
5330     u32 pin_programmed[4];
5331     } mp_ioapic_routing[MAX_IO_APICS];
5332    
5333     -
5334     -static int mp_find_ioapic (
5335     - int gsi)
5336     +static int mp_find_ioapic(int gsi)
5337     {
5338     - int i = 0;
5339     + int i = 0;
5340    
5341     /* Find the IOAPIC that manages this GSI. */
5342     for (i = 0; i < nr_ioapics; i++) {
5343     @@ -751,28 +657,15 @@
5344     }
5345    
5346     printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
5347     -
5348     return -1;
5349     }
5350     -
5351    
5352     -void __init mp_register_ioapic (
5353     - u8 id,
5354     - u32 address,
5355     - u32 gsi_base)
5356     +void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
5357     {
5358     - int idx = 0;
5359     + int idx = 0;
5360    
5361     - if (nr_ioapics >= MAX_IO_APICS) {
5362     - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
5363     - "(found %d)\n", MAX_IO_APICS, nr_ioapics);
5364     - panic("Recompile kernel with bigger MAX_IO_APICS!\n");
5365     - }
5366     - if (!address) {
5367     - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
5368     - " found in MADT table, skipping!\n");
5369     + if (bad_ioapic(address))
5370     return;
5371     - }
5372    
5373     idx = nr_ioapics++;
5374    
5375     @@ -784,7 +677,7 @@
5376     set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
5377     #endif
5378     mp_ioapics[idx].mpc_apicid = id;
5379     - mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
5380     + mp_ioapics[idx].mpc_apicver = 0;
5381    
5382     /*
5383     * Build basic IRQ lookup table to facilitate gsi->io_apic lookups
5384     @@ -795,21 +688,15 @@
5385     mp_ioapic_routing[idx].gsi_end = gsi_base +
5386     io_apic_get_redir_entries(idx);
5387    
5388     - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
5389     + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, "
5390     "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
5391     - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
5392     + mp_ioapics[idx].mpc_apicaddr,
5393     mp_ioapic_routing[idx].gsi_start,
5394     mp_ioapic_routing[idx].gsi_end);
5395     -
5396     - return;
5397     }
5398    
5399     -
5400     -void __init mp_override_legacy_irq (
5401     - u8 bus_irq,
5402     - u8 polarity,
5403     - u8 trigger,
5404     - u32 gsi)
5405     +void __init
5406     +mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
5407     {
5408     struct mpc_config_intsrc intsrc;
5409     int ioapic = -1;
5410     @@ -847,22 +734,18 @@
5411     mp_irqs[mp_irq_entries] = intsrc;
5412     if (++mp_irq_entries == MAX_IRQ_SOURCES)
5413     panic("Max # of irq sources exceeded!\n");
5414     -
5415     - return;
5416     }
5417    
5418     -
5419     -void __init mp_config_acpi_legacy_irqs (void)
5420     +void __init mp_config_acpi_legacy_irqs(void)
5421     {
5422     struct mpc_config_intsrc intsrc;
5423     - int i = 0;
5424     - int ioapic = -1;
5425     + int i = 0;
5426     + int ioapic = -1;
5427    
5428     /*
5429     * Fabricate the legacy ISA bus (bus #31).
5430     */
5431     - mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
5432     - Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
5433     + set_bit(MP_ISA_BUS, mp_bus_not_pci);
5434    
5435     /*
5436     * Locate the IOAPIC that manages the ISA IRQs (0-15).
5437     @@ -915,24 +798,13 @@
5438     if (++mp_irq_entries == MAX_IRQ_SOURCES)
5439     panic("Max # of irq sources exceeded!\n");
5440     }
5441     -
5442     - return;
5443     }
5444    
5445     -#define MAX_GSI_NUM 4096
5446     -
5447     int mp_register_gsi(u32 gsi, int triggering, int polarity)
5448     {
5449     - int ioapic = -1;
5450     - int ioapic_pin = 0;
5451     - int idx, bit = 0;
5452     - static int pci_irq = 16;
5453     - /*
5454     - * Mapping between Global System Interrupts, which
5455     - * represent all possible interrupts, to the IRQs
5456     - * assigned to actual devices.
5457     - */
5458     - static int gsi_to_irq[MAX_GSI_NUM];
5459     + int ioapic = -1;
5460     + int ioapic_pin = 0;
5461     + int idx, bit = 0;
5462    
5463     if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
5464     return gsi;
5465     @@ -965,47 +837,14 @@
5466     if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
5467     Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
5468     mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
5469     - return gsi_to_irq[gsi];
5470     + return gsi;
5471     }
5472    
5473     mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
5474    
5475     - if (triggering == ACPI_LEVEL_SENSITIVE) {
5476     - /*
5477     - * For PCI devices assign IRQs in order, avoiding gaps
5478     - * due to unused I/O APIC pins.
5479     - */
5480     - int irq = gsi;
5481     - if (gsi < MAX_GSI_NUM) {
5482     - /*
5483     - * Retain the VIA chipset work-around (gsi > 15), but
5484     - * avoid a problem where the 8254 timer (IRQ0) is setup
5485     - * via an override (so it's not on pin 0 of the ioapic),
5486     - * and at the same time, the pin 0 interrupt is a PCI
5487     - * type. The gsi > 15 test could cause these two pins
5488     - * to be shared as IRQ0, and they are not shareable.
5489     - * So test for this condition, and if necessary, avoid
5490     - * the pin collision.
5491     - */
5492     - if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
5493     - gsi = pci_irq++;
5494     - /*
5495     - * Don't assign IRQ used by ACPI SCI
5496     - */
5497     - if (gsi == acpi_fadt.sci_int)
5498     - gsi = pci_irq++;
5499     - gsi_to_irq[irq] = gsi;
5500     - } else {
5501     - printk(KERN_ERR "GSI %u is too high\n", gsi);
5502     - return gsi;
5503     - }
5504     - }
5505     -
5506     io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
5507     triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
5508     polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
5509     return gsi;
5510     }
5511     -
5512     -#endif /*CONFIG_X86_IO_APIC*/
5513     #endif /*CONFIG_ACPI*/
5514     --- a/arch/x86/kernel/pci-dma_32-xen.c
5515     +++ b/arch/x86/kernel/pci-dma_32-xen.c
5516     @@ -116,8 +116,7 @@
5517     {
5518     int i, rc;
5519    
5520     - if (direction == DMA_NONE)
5521     - BUG();
5522     + BUG_ON(!valid_dma_direction(direction));
5523     WARN_ON(nents == 0 || sg[0].length == 0);
5524    
5525     if (swiotlb) {
5526     @@ -148,7 +147,7 @@
5527     {
5528     int i;
5529    
5530     - BUG_ON(direction == DMA_NONE);
5531     + BUG_ON(!valid_dma_direction(direction));
5532     if (swiotlb)
5533     swiotlb_unmap_sg(hwdev, sg, nents, direction);
5534     else {
5535     @@ -165,8 +164,7 @@
5536     {
5537     dma_addr_t dma_addr;
5538    
5539     - BUG_ON(direction == DMA_NONE);
5540     -
5541     + BUG_ON(!valid_dma_direction(direction));
5542     if (swiotlb) {
5543     dma_addr = swiotlb_map_page(
5544     dev, page, offset, size, direction);
5545     @@ -183,7 +181,7 @@
5546     dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
5547     enum dma_data_direction direction)
5548     {
5549     - BUG_ON(direction == DMA_NONE);
5550     + BUG_ON(!valid_dma_direction(direction));
5551     if (swiotlb)
5552     swiotlb_unmap_page(dev, dma_address, size, direction);
5553     else
5554     @@ -365,8 +363,7 @@
5555     {
5556     dma_addr_t dma;
5557    
5558     - if (direction == DMA_NONE)
5559     - BUG();
5560     + BUG_ON(!valid_dma_direction(direction));
5561     WARN_ON(size == 0);
5562    
5563     if (swiotlb) {
5564     @@ -387,8 +384,7 @@
5565     dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
5566     enum dma_data_direction direction)
5567     {
5568     - if (direction == DMA_NONE)
5569     - BUG();
5570     + BUG_ON(!valid_dma_direction(direction));
5571     if (swiotlb)
5572     swiotlb_unmap_single(dev, dma_addr, size, direction);
5573     else
5574     --- a/arch/x86/kernel/pci-swiotlb_64-xen.c
5575     +++ b/arch/x86/kernel/pci-swiotlb_64-xen.c
5576     @@ -3,7 +3,8 @@
5577     #include <linux/pci.h>
5578     #include <linux/cache.h>
5579     #include <linux/module.h>
5580     -#include <asm/dma-mapping.h>
5581     +#include <linux/dma-mapping.h>
5582     +
5583     #include <asm/proto.h>
5584     #include <asm/swiotlb.h>
5585     #include <asm/dma.h>
5586     --- a/arch/x86/kernel/process_32-xen.c
5587     +++ b/arch/x86/kernel/process_32-xen.c
5588     @@ -37,6 +37,7 @@
5589     #include <linux/kallsyms.h>
5590     #include <linux/ptrace.h>
5591     #include <linux/random.h>
5592     +#include <linux/personality.h>
5593    
5594     #include <asm/uaccess.h>
5595     #include <asm/pgtable.h>
5596     @@ -186,7 +187,7 @@
5597     void cpu_idle_wait(void)
5598     {
5599     unsigned int cpu, this_cpu = get_cpu();
5600     - cpumask_t map;
5601     + cpumask_t map, tmp = current->cpus_allowed;
5602    
5603     set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
5604     put_cpu();
5605     @@ -208,6 +209,8 @@
5606     }
5607     cpus_and(map, map, cpu_online_map);
5608     } while (!cpus_empty(map));
5609     +
5610     + set_cpus_allowed(current, tmp);
5611     }
5612     EXPORT_SYMBOL_GPL(cpu_idle_wait);
5613    
5614     @@ -240,9 +243,9 @@
5615     if (user_mode_vm(regs))
5616     printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
5617     printk(" EFLAGS: %08lx %s (%s %.*s)\n",
5618     - regs->eflags, print_tainted(), system_utsname.release,
5619     - (int)strcspn(system_utsname.version, " "),
5620     - system_utsname.version);
5621     + regs->eflags, print_tainted(), init_utsname()->release,
5622     + (int)strcspn(init_utsname()->version, " "),
5623     + init_utsname()->version);
5624     printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
5625     regs->eax,regs->ebx,regs->ecx,regs->edx);
5626     printk("ESI: %08lx EDI: %08lx EBP: %08lx",
5627     @@ -264,15 +267,6 @@
5628     * the "args".
5629     */
5630     extern void kernel_thread_helper(void);
5631     -__asm__(".section .text\n"
5632     - ".align 4\n"
5633     - "kernel_thread_helper:\n\t"
5634     - "movl %edx,%eax\n\t"
5635     - "pushl %edx\n\t"
5636     - "call *%ebx\n\t"
5637     - "pushl %eax\n\t"
5638     - "call do_exit\n"
5639     - ".previous");
5640    
5641     /*
5642     * Create a kernel thread
5643     @@ -290,7 +284,7 @@
5644     regs.xes = __USER_DS;
5645     regs.orig_eax = -1;
5646     regs.eip = (unsigned long) kernel_thread_helper;
5647     - regs.xcs = GET_KERNEL_CS();
5648     + regs.xcs = __KERNEL_CS | get_kernel_rpl();
5649     regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
5650    
5651     /* Ok, create the new process.. */
5652     @@ -369,13 +363,12 @@
5653    
5654     tsk = current;
5655     if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
5656     - p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
5657     + p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
5658     + IO_BITMAP_BYTES, GFP_KERNEL);
5659     if (!p->thread.io_bitmap_ptr) {
5660     p->thread.io_bitmap_max = 0;
5661     return -ENOMEM;
5662     }
5663     - memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr,
5664     - IO_BITMAP_BYTES);
5665     set_tsk_thread_flag(p, TIF_IO_BITMAP);
5666     }
5667    
5668     @@ -850,7 +843,7 @@
5669    
5670     unsigned long arch_align_stack(unsigned long sp)
5671     {
5672     - if (randomize_va_space)
5673     + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
5674     sp -= get_random_int() % 8192;
5675     return sp & ~0xf;
5676     }
5677     --- a/arch/x86/kernel/process_64-xen.c
5678     +++ b/arch/x86/kernel/process_64-xen.c
5679     @@ -89,25 +89,24 @@
5680     }
5681     EXPORT_SYMBOL(idle_notifier_unregister);
5682    
5683     -enum idle_state { CPU_IDLE, CPU_NOT_IDLE };
5684     -static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
5685     -
5686     void enter_idle(void)
5687     {
5688     - __get_cpu_var(idle_state) = CPU_IDLE;
5689     + write_pda(isidle, 1);
5690     atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
5691     }
5692    
5693     static void __exit_idle(void)
5694     {
5695     - __get_cpu_var(idle_state) = CPU_NOT_IDLE;
5696     + if (test_and_clear_bit_pda(0, isidle) == 0)
5697     + return;
5698     atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
5699     }
5700    
5701     /* Called from interrupts to signify idle end */
5702     void exit_idle(void)
5703     {
5704     - if (current->pid | read_pda(irqcount))
5705     + /* idle loop has pid 0 */
5706     + if (current->pid)
5707     return;
5708     __exit_idle();
5709     }
5710     @@ -184,6 +183,9 @@
5711     play_dead();
5712     enter_idle();
5713     idle();
5714     + /* In many cases the interrupt that ended idle
5715     + has already called exit_idle. But some idle
5716     + loops can be woken up without interrupt. */
5717     __exit_idle();
5718     }
5719    
5720     @@ -196,7 +198,7 @@
5721     void cpu_idle_wait(void)
5722     {
5723     unsigned int cpu, this_cpu = get_cpu();
5724     - cpumask_t map;
5725     + cpumask_t map, tmp = current->cpus_allowed;
5726    
5727     set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
5728     put_cpu();
5729     @@ -219,6 +221,8 @@
5730     }
5731     cpus_and(map, map, cpu_online_map);
5732     } while (!cpus_empty(map));
5733     +
5734     + set_cpus_allowed(current, tmp);
5735     }
5736     EXPORT_SYMBOL_GPL(cpu_idle_wait);
5737    
5738     @@ -250,9 +254,9 @@
5739     print_modules();
5740     printk("Pid: %d, comm: %.20s %s %s %.*s\n",
5741     current->pid, current->comm, print_tainted(),
5742     - system_utsname.release,
5743     - (int)strcspn(system_utsname.version, " "),
5744     - system_utsname.version);
5745     + init_utsname()->release,
5746     + (int)strcspn(init_utsname()->version, " "),
5747     + init_utsname()->version);
5748     printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
5749     printk_address(regs->rip);
5750     printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
5751     @@ -310,6 +314,7 @@
5752    
5753     kfree(t->io_bitmap_ptr);
5754     t->io_bitmap_ptr = NULL;
5755     + clear_thread_flag(TIF_IO_BITMAP);
5756     /*
5757     * Careful, clear this in the TSS too:
5758     */
5759     @@ -340,6 +345,7 @@
5760     if (t->flags & _TIF_IA32)
5761     current_thread_info()->status |= TS_COMPAT;
5762     }
5763     + t->flags &= ~_TIF_DEBUG;
5764    
5765     tsk->thread.debugreg0 = 0;
5766     tsk->thread.debugreg1 = 0;
5767     @@ -432,7 +438,7 @@
5768     asm("mov %%es,%0" : "=m" (p->thread.es));
5769     asm("mov %%ds,%0" : "=m" (p->thread.ds));
5770    
5771     - if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
5772     + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
5773     p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
5774     if (!p->thread.io_bitmap_ptr) {
5775     p->thread.io_bitmap_max = 0;
5776     @@ -440,6 +446,7 @@
5777     }
5778     memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
5779     IO_BITMAP_BYTES);
5780     + set_tsk_thread_flag(p, TIF_IO_BITMAP);
5781     }
5782    
5783     /*
5784     @@ -474,6 +481,30 @@
5785     }
5786    
5787     /*
5788     + * This special macro can be used to load a debugging register
5789     + */
5790     +#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
5791     +
5792     +static inline void __switch_to_xtra(struct task_struct *prev_p,
5793     + struct task_struct *next_p)
5794     +{
5795     + struct thread_struct *prev, *next;
5796     +
5797     + prev = &prev_p->thread,
5798     + next = &next_p->thread;
5799     +
5800     + if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
5801     + loaddebug(next, 0);
5802     + loaddebug(next, 1);
5803     + loaddebug(next, 2);
5804     + loaddebug(next, 3);
5805     + /* no 4 and 5 */
5806     + loaddebug(next, 6);
5807     + loaddebug(next, 7);
5808     + }
5809     +}
5810     +
5811     +/*
5812     * switch_to(x,y) should switch tasks from x to y.
5813     *
5814     * This could still be optimized:
5815     @@ -495,6 +526,10 @@
5816     struct physdev_set_iobitmap iobmp_op;
5817     multicall_entry_t _mcl[8], *mcl = _mcl;
5818    
5819     + /* we're going to use this soon, after a few expensive things */
5820     + if (next_p->fpu_counter>5)
5821     + prefetch(&next->i387.fxsave);
5822     +
5823     /*
5824     * This is basically '__unlazy_fpu', except that we queue a
5825     * multicall to indicate FPU task switch, rather than
5826     @@ -507,7 +542,8 @@
5827     mcl->op = __HYPERVISOR_fpu_taskswitch;
5828     mcl->args[0] = 1;
5829     mcl++;
5830     - }
5831     + } else
5832     + prev_p->fpu_counter = 0;
5833    
5834     /*
5835     * Reload esp0, LDT and the page table pointer:
5836     @@ -587,21 +623,29 @@
5837     write_pda(oldrsp, next->userrsp);
5838     write_pda(pcurrent, next_p);
5839     write_pda(kernelstack,
5840     - task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
5841     + (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
5842     +#ifdef CONFIG_CC_STACKPROTECTOR
5843     + write_pda(stack_canary, next_p->stack_canary);
5844     +
5845     + /*
5846     + * Build time only check to make sure the stack_canary is at
5847     + * offset 40 in the pda; this is a gcc ABI requirement
5848     + */
5849     + BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
5850     +#endif
5851    
5852     /*
5853     * Now maybe reload the debug registers
5854     */
5855     - if (unlikely(next->debugreg7)) {
5856     - set_debugreg(next->debugreg0, 0);
5857     - set_debugreg(next->debugreg1, 1);
5858     - set_debugreg(next->debugreg2, 2);
5859     - set_debugreg(next->debugreg3, 3);
5860     - /* no 4 and 5 */
5861     - set_debugreg(next->debugreg6, 6);
5862     - set_debugreg(next->debugreg7, 7);
5863     - }
5864     + if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
5865     + __switch_to_xtra(prev_p, next_p);
5866    
5867     + /* If the task has used fpu the last 5 timeslices, just do a full
5868     + * restore of the math state immediately to avoid the trap; the
5869     + * chances of needing FPU soon are obviously high now
5870     + */
5871     + if (next_p->fpu_counter>5)
5872     + math_state_restore();
5873     return prev_p;
5874     }
5875    
5876     @@ -821,7 +865,7 @@
5877    
5878     unsigned long arch_align_stack(unsigned long sp)
5879     {
5880     - if (randomize_va_space)
5881     + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
5882     sp -= get_random_int() % 8192;
5883     return sp & ~0xf;
5884     }
5885     --- a/arch/x86/kernel/setup64-xen.c
5886     +++ b/arch/x86/kernel/setup64-xen.c
5887     @@ -31,7 +31,7 @@
5888     #include <asm/hypervisor.h>
5889     #endif
5890    
5891     -char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
5892     +char x86_boot_params[BOOT_PARAM_SIZE] __initdata;
5893    
5894     cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
5895    
5896     @@ -55,8 +55,10 @@
5897     on Enable(default)
5898     off Disable
5899     */
5900     -int __init nonx_setup(char *str)
5901     +static int __init nonx_setup(char *str)
5902     {
5903     + if (!str)
5904     + return -EINVAL;
5905     if (!strncmp(str, "on", 2)) {
5906     __supported_pte_mask |= _PAGE_NX;
5907     do_not_nx = 0;
5908     @@ -64,9 +66,9 @@
5909     do_not_nx = 1;
5910     __supported_pte_mask &= ~_PAGE_NX;
5911     }
5912     - return 1;
5913     + return 0;
5914     }
5915     -__setup("noexec=", nonx_setup); /* parsed early actually */
5916     +early_param("noexec", nonx_setup);
5917    
5918     int force_personality32 = 0;
5919    
5920     @@ -102,12 +104,9 @@
5921     #endif
5922    
5923     /* Copy section for each CPU (we discard the original) */
5924     - size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
5925     -#ifdef CONFIG_MODULES
5926     - if (size < PERCPU_ENOUGH_ROOM)
5927     - size = PERCPU_ENOUGH_ROOM;
5928     -#endif
5929     + size = PERCPU_ENOUGH_ROOM;
5930    
5931     + printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
5932     for_each_cpu_mask (i, cpu_possible_map) {
5933     char *ptr;
5934    
5935     @@ -169,7 +168,10 @@
5936     /* Setup up data that may be needed in __get_free_pages early */
5937     asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
5938     #ifndef CONFIG_XEN
5939     + /* Memory clobbers used to order PDA accessed */
5940     + mb();
5941     wrmsrl(MSR_GS_BASE, pda);
5942     + mb();
5943     #else
5944     if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
5945     (unsigned long)pda))
5946     @@ -302,28 +304,17 @@
5947     * set up and load the per-CPU TSS
5948     */
5949     for (v = 0; v < N_EXCEPTION_STACKS; v++) {
5950     + static const unsigned int order[N_EXCEPTION_STACKS] = {
5951     + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
5952     + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
5953     + };
5954     if (cpu) {
5955     - static const unsigned int order[N_EXCEPTION_STACKS] = {
5956     - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
5957     - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
5958     - };
5959     -
5960     estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
5961     if (!estacks)
5962     panic("Cannot allocate exception stack %ld %d\n",
5963     v, cpu);
5964     }
5965     - switch (v + 1) {
5966     -#if DEBUG_STKSZ > EXCEPTION_STKSZ
5967     - case DEBUG_STACK:
5968     - cpu_pda(cpu)->debugstack = (unsigned long)estacks;
5969     - estacks += DEBUG_STKSZ;
5970     - break;
5971     -#endif
5972     - default:
5973     - estacks += EXCEPTION_STKSZ;
5974     - break;
5975     - }
5976     + estacks += PAGE_SIZE << order[v];
5977     orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks;
5978     }
5979    
5980     --- a/arch/x86/kernel/setup_32-xen.c
5981     +++ b/arch/x86/kernel/setup_32-xen.c
5982     @@ -56,6 +56,7 @@
5983     #include <asm/apic.h>
5984     #include <asm/e820.h>
5985     #include <asm/mpspec.h>
5986     +#include <asm/mmzone.h>
5987     #include <asm/setup.h>
5988     #include <asm/arch_hooks.h>
5989     #include <asm/sections.h>
5990     @@ -105,18 +106,6 @@
5991    
5992     unsigned long mmu_cr4_features;
5993    
5994     -#ifdef CONFIG_ACPI
5995     - int acpi_disabled = 0;
5996     -#else
5997     - int acpi_disabled = 1;
5998     -#endif
5999     -EXPORT_SYMBOL(acpi_disabled);
6000     -
6001     -#ifdef CONFIG_ACPI
6002     -int __initdata acpi_force = 0;
6003     -extern acpi_interrupt_flags acpi_sci_flags;
6004     -#endif
6005     -
6006     /* for MCA, but anyone else can use it if they want */
6007     unsigned int machine_id;
6008     #ifdef CONFIG_MCA
6009     @@ -170,7 +159,6 @@
6010     #endif
6011    
6012     extern void early_cpu_init(void);
6013     -extern void generic_apic_probe(char *);
6014     extern int root_mountflags;
6015    
6016     unsigned long saved_videomode;
6017     @@ -243,9 +231,6 @@
6018     .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
6019     } };
6020    
6021     -#define ADAPTER_ROM_RESOURCES \
6022     - (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
6023     -
6024     static struct resource video_rom_resource = {
6025     .name = "Video ROM",
6026     .start = 0xc0000,
6027     @@ -307,9 +292,6 @@
6028     .flags = IORESOURCE_BUSY | IORESOURCE_IO
6029     } };
6030    
6031     -#define STANDARD_IO_RESOURCES \
6032     - (sizeof standard_io_resources / sizeof standard_io_resources[0])
6033     -
6034     #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
6035    
6036     static int __init romchecksum(unsigned char *rom, unsigned long length)
6037     @@ -372,7 +354,7 @@
6038     }
6039    
6040     /* check for adapter roms on 2k boundaries */
6041     - for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
6042     + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
6043     rom = isa_bus_to_virt(start);
6044     if (!romsignature(rom))
6045     continue;
6046     @@ -764,246 +746,152 @@
6047     }
6048     #endif
6049    
6050     -static void __init parse_cmdline_early (char ** cmdline_p)
6051     +static int __initdata user_defined_memmap = 0;
6052     +
6053     +/*
6054     + * "mem=nopentium" disables the 4MB page tables.
6055     + * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
6056     + * to <mem>, overriding the bios size.
6057     + * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
6058     + * <start> to <start>+<mem>, overriding the bios size.
6059     + *
6060     + * HPA tells me bootloaders need to parse mem=, so no new
6061     + * option should be mem= [also see Documentation/i386/boot.txt]
6062     + */
6063     +static int __init parse_mem(char *arg)
6064     {
6065     - char c = ' ', *to = command_line, *from = saved_command_line;
6066     - int len = 0, max_cmdline;
6067     - int userdef = 0;
6068     -
6069     - if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
6070     - max_cmdline = COMMAND_LINE_SIZE;
6071     - memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
6072     - /* Save unparsed command line copy for /proc/cmdline */
6073     - saved_command_line[max_cmdline-1] = '\0';
6074     -
6075     - for (;;) {
6076     - if (c != ' ')
6077     - goto next_char;
6078     - /*
6079     - * "mem=nopentium" disables the 4MB page tables.
6080     - * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
6081     - * to <mem>, overriding the bios size.
6082     - * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
6083     - * <start> to <start>+<mem>, overriding the bios size.
6084     - *
6085     - * HPA tells me bootloaders need to parse mem=, so no new
6086     - * option should be mem= [also see Documentation/i386/boot.txt]
6087     - */
6088     - if (!memcmp(from, "mem=", 4)) {
6089     - if (to != command_line)
6090     - to--;
6091     - if (!memcmp(from+4, "nopentium", 9)) {
6092     - from += 9+4;
6093     - clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
6094     - disable_pse = 1;
6095     - } else {
6096     - /* If the user specifies memory size, we
6097     - * limit the BIOS-provided memory map to
6098     - * that size. exactmap can be used to specify
6099     - * the exact map. mem=number can be used to
6100     - * trim the existing memory map.
6101     - */
6102     - unsigned long long mem_size;
6103     -
6104     - mem_size = memparse(from+4, &from);
6105     - limit_regions(mem_size);
6106     - userdef=1;
6107     - }
6108     - }
6109     + if (!arg)
6110     + return -EINVAL;
6111    
6112     - else if (!memcmp(from, "memmap=", 7)) {
6113     - if (to != command_line)
6114     - to--;
6115     - if (!memcmp(from+7, "exactmap", 8)) {
6116     -#ifdef CONFIG_CRASH_DUMP
6117     - /* If we are doing a crash dump, we
6118     - * still need to know the real mem
6119     - * size before original memory map is
6120     - * reset.
6121     - */
6122     - find_max_pfn();
6123     - saved_max_pfn = max_pfn;
6124     -#endif
6125     - from += 8+7;
6126     - e820.nr_map = 0;
6127     - userdef = 1;
6128     - } else {
6129     - /* If the user specifies memory size, we
6130     - * limit the BIOS-provided memory map to
6131     - * that size. exactmap can be used to specify
6132     - * the exact map. mem=number can be used to
6133     - * trim the existing memory map.
6134     - */
6135     - unsigned long long start_at, mem_size;
6136     + if (strcmp(arg, "nopentium") == 0) {
6137     + clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
6138     + disable_pse = 1;
6139     + } else {
6140     + /* If the user specifies memory size, we
6141     + * limit the BIOS-provided memory map to
6142     + * that size. exactmap can be used to specify
6143     + * the exact map. mem=number can be used to
6144     + * trim the existing memory map.
6145     + */
6146     + unsigned long long mem_size;
6147    
6148     - mem_size = memparse(from+7, &from);
6149     - if (*from == '@') {
6150     - start_at = memparse(from+1, &from);
6151     - add_memory_region(start_at, mem_size, E820_RAM);
6152     - } else if (*from == '#') {
6153     - start_at = memparse(from+1, &from);
6154     - add_memory_region(start_at, mem_size, E820_ACPI);
6155     - } else if (*from == '$') {
6156     - start_at = memparse(from+1, &from);
6157     - add_memory_region(start_at, mem_size, E820_RESERVED);
6158     - } else {
6159     - limit_regions(mem_size);
6160     - userdef=1;
6161     - }
6162     - }
6163     - }
6164     -
6165     - else if (!memcmp(from, "noexec=", 7))
6166     - noexec_setup(from + 7);
6167     + mem_size = memparse(arg, &arg);
6168     + limit_regions(mem_size);
6169     + user_defined_memmap = 1;
6170     + }
6171     + return 0;
6172     +}
6173     +early_param("mem", parse_mem);
6174    
6175     +static int __init parse_memmap(char *arg)
6176     +{
6177     + if (!arg)
6178     + return -EINVAL;
6179    
6180     -#ifdef CONFIG_X86_MPPARSE
6181     - /*
6182     - * If the BIOS enumerates physical processors before logical,
6183     - * maxcpus=N at enumeration-time can be used to disable HT.
6184     + if (strcmp(arg, "exactmap") == 0) {
6185     +#ifdef CONFIG_CRASH_DUMP
6186     + /* If we are doing a crash dump, we
6187     + * still need to know the real mem
6188     + * size before original memory map is
6189     + * reset.
6190     */
6191     - else if (!memcmp(from, "maxcpus=", 8)) {
6192     - extern unsigned int maxcpus;
6193     -
6194     - maxcpus = simple_strtoul(from + 8, NULL, 0);
6195     - }
6196     + find_max_pfn();
6197     + saved_max_pfn = max_pfn;
6198     #endif
6199     + e820.nr_map = 0;
6200     + user_defined_memmap = 1;
6201     + } else {
6202     + /* If the user specifies memory size, we
6203     + * limit the BIOS-provided memory map to
6204     + * that size. exactmap can be used to specify
6205     + * the exact map. mem=number can be used to
6206     + * trim the existing memory map.
6207     + */
6208     + unsigned long long start_at, mem_size;
6209    
6210     -#ifdef CONFIG_ACPI
6211     - /* "acpi=off" disables both ACPI table parsing and interpreter */
6212     - else if (!memcmp(from, "acpi=off", 8)) {
6213     - disable_acpi();
6214     - }
6215     -
6216     - /* acpi=force to over-ride black-list */
6217     - else if (!memcmp(from, "acpi=force", 10)) {
6218     - acpi_force = 1;
6219     - acpi_ht = 1;
6220     - acpi_disabled = 0;
6221     - }
6222     -
6223     - /* acpi=strict disables out-of-spec workarounds */
6224     - else if (!memcmp(from, "acpi=strict", 11)) {
6225     - acpi_strict = 1;
6226     - }
6227     -
6228     - /* Limit ACPI just to boot-time to enable HT */
6229     - else if (!memcmp(from, "acpi=ht", 7)) {
6230     - if (!acpi_force)
6231     - disable_acpi();
6232     - acpi_ht = 1;
6233     - }
6234     -
6235     - /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
6236     - else if (!memcmp(from, "pci=noacpi", 10)) {
6237     - acpi_disable_pci();
6238     - }
6239     - /* "acpi=noirq" disables ACPI interrupt routing */
6240     - else if (!memcmp(from, "acpi=noirq", 10)) {
6241     - acpi_noirq_set();
6242     + mem_size = memparse(arg, &arg);
6243     + if (*arg == '@') {
6244     + start_at = memparse(arg+1, &arg);
6245     + add_memory_region(start_at, mem_size, E820_RAM);
6246     + } else if (*arg == '#') {
6247     + start_at = memparse(arg+1, &arg);
6248     + add_memory_region(start_at, mem_size, E820_ACPI);
6249     + } else if (*arg == '$') {
6250     + start_at = memparse(arg+1, &arg);
6251     + add_memory_region(start_at, mem_size, E820_RESERVED);
6252     + } else {
6253     + limit_regions(mem_size);
6254     + user_defined_memmap = 1;
6255     }
6256     + }
6257     + return 0;
6258     +}
6259     +early_param("memmap", parse_memmap);
6260    
6261     - else if (!memcmp(from, "acpi_sci=edge", 13))
6262     - acpi_sci_flags.trigger = 1;
6263     +#ifdef CONFIG_PROC_VMCORE
6264     +/* elfcorehdr= specifies the location of elf core header
6265     + * stored by the crashed kernel.
6266     + */
6267     +static int __init parse_elfcorehdr(char *arg)
6268     +{
6269     + if (!arg)
6270     + return -EINVAL;
6271    
6272     - else if (!memcmp(from, "acpi_sci=level", 14))
6273     - acpi_sci_flags.trigger = 3;
6274     + elfcorehdr_addr = memparse(arg, &arg);
6275     + return 0;
6276     +}
6277     +early_param("elfcorehdr", parse_elfcorehdr);
6278     +#endif /* CONFIG_PROC_VMCORE */
6279    
6280     - else if (!memcmp(from, "acpi_sci=high", 13))
6281     - acpi_sci_flags.polarity = 1;
6282     +/*
6283     + * highmem=size forces highmem to be exactly 'size' bytes.
6284     + * This works even on boxes that have no highmem otherwise.
6285     + * This also works to reduce highmem size on bigger boxes.
6286     + */
6287     +static int __init parse_highmem(char *arg)
6288     +{
6289     + if (!arg)
6290     + return -EINVAL;
6291    
6292     - else if (!memcmp(from, "acpi_sci=low", 12))
6293     - acpi_sci_flags.polarity = 3;
6294     + highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
6295     + return 0;
6296     +}
6297     +early_param("highmem", parse_highmem);
6298    
6299     -#ifdef CONFIG_X86_IO_APIC
6300     - else if (!memcmp(from, "acpi_skip_timer_override", 24))
6301     - acpi_skip_timer_override = 1;
6302     +/*
6303     + * vmalloc=size forces the vmalloc area to be exactly 'size'
6304     + * bytes. This can be used to increase (or decrease) the
6305     + * vmalloc area - the default is 128m.
6306     + */
6307     +static int __init parse_vmalloc(char *arg)
6308     +{
6309     + if (!arg)
6310     + return -EINVAL;
6311    
6312     - if (!memcmp(from, "disable_timer_pin_1", 19))
6313     - disable_timer_pin_1 = 1;
6314     - if (!memcmp(from, "enable_timer_pin_1", 18))
6315     - disable_timer_pin_1 = -1;
6316     -
6317     - /* disable IO-APIC */
6318     - else if (!memcmp(from, "noapic", 6))
6319     - disable_ioapic_setup();
6320     -#endif /* CONFIG_X86_IO_APIC */
6321     -#endif /* CONFIG_ACPI */
6322     -
6323     -#ifdef CONFIG_X86_LOCAL_APIC
6324     - /* enable local APIC */
6325     - else if (!memcmp(from, "lapic", 5))
6326     - lapic_enable();
6327     -
6328     - /* disable local APIC */
6329     - else if (!memcmp(from, "nolapic", 6))
6330     - lapic_disable();
6331     -#endif /* CONFIG_X86_LOCAL_APIC */
6332     + __VMALLOC_RESERVE = memparse(arg, &arg);
6333     + return 0;
6334     +}
6335     +early_param("vmalloc", parse_vmalloc);
6336    
6337     -#ifdef CONFIG_KEXEC
6338     - /* crashkernel=size@addr specifies the location to reserve for
6339     - * a crash kernel. By reserving this memory we guarantee
6340     - * that linux never set's it up as a DMA target.
6341     - * Useful for holding code to do something appropriate
6342     - * after a kernel panic.
6343     - */
6344     - else if (!memcmp(from, "crashkernel=", 12)) {
6345     #ifndef CONFIG_XEN
6346     - unsigned long size, base;
6347     - size = memparse(from+12, &from);
6348     - if (*from == '@') {
6349     - base = memparse(from+1, &from);
6350     - /* FIXME: Do I want a sanity check
6351     - * to validate the memory range?
6352     - */
6353     - crashk_res.start = base;
6354     - crashk_res.end = base + size - 1;
6355     - }
6356     -#else
6357     - printk("Ignoring crashkernel command line, "
6358     - "parameter will be supplied by xen\n");
6359     -#endif
6360     - }
6361     -#endif
6362     -#ifdef CONFIG_PROC_VMCORE
6363     - /* elfcorehdr= specifies the location of elf core header
6364     - * stored by the crashed kernel.
6365     - */
6366     - else if (!memcmp(from, "elfcorehdr=", 11))
6367     - elfcorehdr_addr = memparse(from+11, &from);
6368     -#endif
6369     +/*
6370     + * reservetop=size reserves a hole at the top of the kernel address space which
6371     + * a hypervisor can load into later. Needed for dynamically loaded hypervisors,
6372     + * so relocating the fixmap can be done before paging initialization.
6373     + */
6374     +static int __init parse_reservetop(char *arg)
6375     +{
6376     + unsigned long address;
6377    
6378     - /*
6379     - * highmem=size forces highmem to be exactly 'size' bytes.
6380     - * This works even on boxes that have no highmem otherwise.
6381     - * This also works to reduce highmem size on bigger boxes.
6382     - */
6383     - else if (!memcmp(from, "highmem=", 8))
6384     - highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
6385     -
6386     - /*
6387     - * vmalloc=size forces the vmalloc area to be exactly 'size'
6388     - * bytes. This can be used to increase (or decrease) the
6389     - * vmalloc area - the default is 128m.
6390     - */
6391     - else if (!memcmp(from, "vmalloc=", 8))
6392     - __VMALLOC_RESERVE = memparse(from+8, &from);
6393     + if (!arg)
6394     + return -EINVAL;
6395    
6396     - next_char:
6397     - c = *(from++);
6398     - if (!c)
6399     - break;
6400     - if (COMMAND_LINE_SIZE <= ++len)
6401     - break;
6402     - *(to++) = c;
6403     - }
6404     - *to = '\0';
6405     - *cmdline_p = command_line;
6406     - if (userdef) {
6407     - printk(KERN_INFO "user-defined physical RAM map:\n");
6408     - print_memory_map("user");
6409     - }
6410     + address = memparse(arg, &arg);
6411     + reserve_top_address(address);
6412     + return 0;
6413     }
6414     +early_param("reservetop", parse_reservetop);
6415     +#endif
6416    
6417     /*
6418     * Callback for efi_memory_walk.
6419     @@ -1024,7 +912,7 @@
6420     static int __init
6421     efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
6422     {
6423     - memory_present(0, start, end);
6424     + memory_present(0, PFN_UP(start), PFN_DOWN(end));
6425     return 0;
6426     }
6427    
6428     @@ -1291,6 +1179,14 @@
6429     }
6430     printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
6431     pages_to_mb(highend_pfn - highstart_pfn));
6432     + num_physpages = highend_pfn;
6433     + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
6434     +#else
6435     + num_physpages = max_low_pfn;
6436     + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
6437     +#endif
6438     +#ifdef CONFIG_FLATMEM
6439     + max_mapnr = num_physpages;
6440     #endif
6441     printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
6442     pages_to_mb(max_low_pfn));
6443     @@ -1302,22 +1198,19 @@
6444    
6445     void __init zone_sizes_init(void)
6446     {
6447     - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
6448     - unsigned int max_dma, low;
6449     -
6450     - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
6451     - low = max_low_pfn;
6452     -
6453     - if (low < max_dma)
6454     - zones_size[ZONE_DMA] = low;
6455     - else {
6456     - zones_size[ZONE_DMA] = max_dma;
6457     - zones_size[ZONE_NORMAL] = low - max_dma;
6458     + unsigned long max_zone_pfns[MAX_NR_ZONES];
6459     + memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
6460     + max_zone_pfns[ZONE_DMA] =
6461     + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
6462     + max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
6463     #ifdef CONFIG_HIGHMEM
6464     - zones_size[ZONE_HIGHMEM] = highend_pfn - low;
6465     + max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
6466     + add_active_range(0, 0, highend_pfn);
6467     +#else
6468     + add_active_range(0, 0, max_low_pfn);
6469     #endif
6470     - }
6471     - free_area_init(zones_size);
6472     +
6473     + free_area_init_nodes(max_zone_pfns);
6474     }
6475     #else
6476     extern unsigned long __init setup_memory(void);
6477     @@ -1374,6 +1267,7 @@
6478     */
6479     acpi_reserve_bootmem();
6480     #endif
6481     + numa_kva_reserve();
6482     #endif /* !CONFIG_XEN */
6483    
6484     #ifdef CONFIG_BLK_DEV_INITRD
6485     @@ -1559,7 +1453,7 @@
6486     request_resource(&iomem_resource, &video_ram_resource);
6487    
6488     /* request I/O space for devices used on all i[345]86 PCs */
6489     - for (i = 0; i < STANDARD_IO_RESOURCES; i++)
6490     + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
6491     request_resource(&ioport_resource, &standard_io_resources[i]);
6492     return 0;
6493     }
6494     @@ -1700,17 +1594,19 @@
6495     data_resource.start = virt_to_phys(_etext);
6496     data_resource.end = virt_to_phys(_edata)-1;
6497    
6498     - parse_cmdline_early(cmdline_p);
6499     + if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
6500     + i = COMMAND_LINE_SIZE;
6501     + memcpy(saved_command_line, xen_start_info->cmd_line, i);
6502     + saved_command_line[i - 1] = '\0';
6503     + parse_early_param();
6504    
6505     -#ifdef CONFIG_EARLY_PRINTK
6506     - {
6507     - char *s = strstr(*cmdline_p, "earlyprintk=");
6508     - if (s) {
6509     - setup_early_printk(strchr(s, '=') + 1);
6510     - printk("early console enabled\n");
6511     - }
6512     + if (user_defined_memmap) {
6513     + printk(KERN_INFO "user-defined physical RAM map:\n");
6514     + print_memory_map("user");
6515     }
6516     -#endif
6517     +
6518     + strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
6519     + *cmdline_p = command_line;
6520    
6521     max_low_pfn = setup_memory();
6522    
6523     @@ -1817,7 +1713,7 @@
6524     dmi_scan_machine();
6525    
6526     #ifdef CONFIG_X86_GENERICARCH
6527     - generic_apic_probe(*cmdline_p);
6528     + generic_apic_probe();
6529     #endif
6530     if (efi_enabled)
6531     efi_map_memmap();
6532     @@ -1838,9 +1734,11 @@
6533     acpi_boot_table_init();
6534     #endif
6535    
6536     +#ifdef CONFIG_PCI
6537     #ifdef CONFIG_X86_IO_APIC
6538     check_acpi_pci(); /* Checks more than just ACPI actually */
6539     #endif
6540     +#endif
6541    
6542     #ifdef CONFIG_ACPI
6543     acpi_boot_init();
6544     --- a/arch/x86/kernel/setup_64-xen.c
6545     +++ b/arch/x86/kernel/setup_64-xen.c
6546     @@ -118,16 +118,6 @@
6547    
6548     unsigned long mmu_cr4_features;
6549    
6550     -int acpi_disabled;
6551     -EXPORT_SYMBOL(acpi_disabled);
6552     -#ifdef CONFIG_ACPI
6553     -extern int __initdata acpi_ht;
6554     -extern acpi_interrupt_flags acpi_sci_flags;
6555     -int __initdata acpi_force = 0;
6556     -#endif
6557     -
6558     -int acpi_numa __initdata;
6559     -
6560     /* Boot loader ID as an integer, for the benefit of proc_dointvec */
6561     int bootloader_type;
6562    
6563     @@ -151,10 +141,6 @@
6564    
6565     struct edid_info edid_info;
6566     EXPORT_SYMBOL_GPL(edid_info);
6567     -struct e820map e820;
6568     -#ifdef CONFIG_XEN
6569     -struct e820map machine_e820;
6570     -#endif
6571    
6572     extern int root_mountflags;
6573    
6574     @@ -181,9 +167,6 @@
6575     .flags = IORESOURCE_BUSY | IORESOURCE_IO }
6576     };
6577    
6578     -#define STANDARD_IO_RESOURCES \
6579     - (sizeof standard_io_resources / sizeof standard_io_resources[0])
6580     -
6581     #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
6582    
6583     struct resource data_resource = {
6584     @@ -230,9 +213,6 @@
6585     .flags = IORESOURCE_ROM }
6586     };
6587    
6588     -#define ADAPTER_ROM_RESOURCES \
6589     - (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
6590     -
6591     static struct resource video_rom_resource = {
6592     .name = "Video ROM",
6593     .start = 0xc0000,
6594     @@ -309,7 +289,8 @@
6595     }
6596    
6597     /* check for adapter roms on 2k boundaries */
6598     - for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
6599     + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper;
6600     + start += 2048) {
6601     rom = isa_bus_to_virt(start);
6602     if (!romsignature(rom))
6603     continue;
6604     @@ -329,187 +310,22 @@
6605     }
6606     }
6607    
6608     -/* Check for full argument with no trailing characters */
6609     -static int fullarg(char *p, char *arg)
6610     +#ifdef CONFIG_PROC_VMCORE
6611     +/* elfcorehdr= specifies the location of elf core header
6612     + * stored by the crashed kernel. This option will be passed
6613     + * by kexec loader to the capture kernel.
6614     + */
6615     +static int __init setup_elfcorehdr(char *arg)
6616     {
6617     - int l = strlen(arg);
6618     - return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l]));
6619     + char *end;
6620     + if (!arg)
6621     + return -EINVAL;
6622     + elfcorehdr_addr = memparse(arg, &end);
6623     + return end > arg ? 0 : -EINVAL;
6624     }
6625     -
6626     -static __init void parse_cmdline_early (char ** cmdline_p)
6627     -{
6628     - char c = ' ', *to = command_line, *from = COMMAND_LINE;
6629     - int len = 0;
6630     - int userdef = 0;
6631     -
6632     - for (;;) {
6633     - if (c != ' ')
6634     - goto next_char;
6635     -
6636     -#ifdef CONFIG_SMP
6637     - /*
6638     - * If the BIOS enumerates physical processors before logical,
6639     - * maxcpus=N at enumeration-time can be used to disable HT.
6640     - */
6641     - else if (!memcmp(from, "maxcpus=", 8)) {
6642     - extern unsigned int maxcpus;
6643     -
6644     - maxcpus = simple_strtoul(from + 8, NULL, 0);
6645     - }
6646     -#endif
6647     -#ifdef CONFIG_ACPI
6648     - /* "acpi=off" disables both ACPI table parsing and interpreter init */
6649     - if (fullarg(from,"acpi=off"))
6650     - disable_acpi();
6651     -
6652     - if (fullarg(from, "acpi=force")) {
6653     - /* add later when we do DMI horrors: */
6654     - acpi_force = 1;
6655     - acpi_disabled = 0;
6656     - }
6657     -
6658     - /* acpi=ht just means: do ACPI MADT parsing
6659     - at bootup, but don't enable the full ACPI interpreter */
6660     - if (fullarg(from, "acpi=ht")) {
6661     - if (!acpi_force)
6662     - disable_acpi();
6663     - acpi_ht = 1;
6664     - }
6665     - else if (fullarg(from, "pci=noacpi"))
6666     - acpi_disable_pci();
6667     - else if (fullarg(from, "acpi=noirq"))
6668     - acpi_noirq_set();
6669     -
6670     - else if (fullarg(from, "acpi_sci=edge"))
6671     - acpi_sci_flags.trigger = 1;
6672     - else if (fullarg(from, "acpi_sci=level"))
6673     - acpi_sci_flags.trigger = 3;
6674     - else if (fullarg(from, "acpi_sci=high"))
6675     - acpi_sci_flags.polarity = 1;
6676     - else if (fullarg(from, "acpi_sci=low"))
6677     - acpi_sci_flags.polarity = 3;
6678     -
6679     - /* acpi=strict disables out-of-spec workarounds */
6680     - else if (fullarg(from, "acpi=strict")) {
6681     - acpi_strict = 1;
6682     - }
6683     -#ifdef CONFIG_X86_IO_APIC
6684     - else if (fullarg(from, "acpi_skip_timer_override"))
6685     - acpi_skip_timer_override = 1;
6686     -#endif
6687     -#endif
6688     -
6689     -#ifndef CONFIG_XEN
6690     - if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) {
6691     - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
6692     - disable_apic = 1;
6693     - }
6694     -
6695     - if (fullarg(from, "noapic"))
6696     - skip_ioapic_setup = 1;
6697     -
6698     - if (fullarg(from,"apic")) {
6699     - skip_ioapic_setup = 0;
6700     - ioapic_force = 1;
6701     - }
6702     -#endif
6703     -
6704     - if (!memcmp(from, "mem=", 4))
6705     - parse_memopt(from+4, &from);
6706     -
6707     - if (!memcmp(from, "memmap=", 7)) {
6708     - /* exactmap option is for used defined memory */
6709     - if (!memcmp(from+7, "exactmap", 8)) {
6710     -#ifdef CONFIG_CRASH_DUMP
6711     - /* If we are doing a crash dump, we
6712     - * still need to know the real mem
6713     - * size before original memory map is
6714     - * reset.
6715     - */
6716     - saved_max_pfn = e820_end_of_ram();
6717     -#endif
6718     - from += 8+7;
6719     - end_pfn_map = 0;
6720     - e820.nr_map = 0;
6721     - userdef = 1;
6722     - }
6723     - else {
6724     - parse_memmapopt(from+7, &from);
6725     - userdef = 1;
6726     - }
6727     - }
6728     -
6729     -#ifdef CONFIG_NUMA
6730     - if (!memcmp(from, "numa=", 5))
6731     - numa_setup(from+5);
6732     +early_param("elfcorehdr", setup_elfcorehdr);
6733     #endif
6734    
6735     - if (!memcmp(from,"iommu=",6)) {
6736     - iommu_setup(from+6);
6737     - }
6738     -
6739     - if (fullarg(from,"oops=panic"))
6740     - panic_on_oops = 1;
6741     -
6742     - if (!memcmp(from, "noexec=", 7))
6743     - nonx_setup(from + 7);
6744     -
6745     -#ifdef CONFIG_KEXEC
6746     - /* crashkernel=size@addr specifies the location to reserve for
6747     - * a crash kernel. By reserving this memory we guarantee
6748     - * that linux never set's it up as a DMA target.
6749     - * Useful for holding code to do something appropriate
6750     - * after a kernel panic.
6751     - */
6752     - else if (!memcmp(from, "crashkernel=", 12)) {
6753     -#ifndef CONFIG_XEN
6754     - unsigned long size, base;
6755     - size = memparse(from+12, &from);
6756     - if (*from == '@') {
6757     - base = memparse(from+1, &from);
6758     - /* FIXME: Do I want a sanity check
6759     - * to validate the memory range?
6760     - */
6761     - crashk_res.start = base;
6762     - crashk_res.end = base + size - 1;
6763     - }
6764     -#else
6765     - printk("Ignoring crashkernel command line, "
6766     - "parameter will be supplied by xen\n");
6767     -#endif
6768     - }
6769     -#endif
6770     -
6771     -#ifdef CONFIG_PROC_VMCORE
6772     - /* elfcorehdr= specifies the location of elf core header
6773     - * stored by the crashed kernel. This option will be passed
6774     - * by kexec loader to the capture kernel.
6775     - */
6776     - else if(!memcmp(from, "elfcorehdr=", 11))
6777     - elfcorehdr_addr = memparse(from+11, &from);
6778     -#endif
6779     -
6780     -#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_XEN)
6781     - else if (!memcmp(from, "additional_cpus=", 16))
6782     - setup_additional_cpus(from+16);
6783     -#endif
6784     -
6785     - next_char:
6786     - c = *(from++);
6787     - if (!c)
6788     - break;
6789     - if (COMMAND_LINE_SIZE <= ++len)
6790     - break;
6791     - *(to++) = c;
6792     - }
6793     - if (userdef) {
6794     - printk(KERN_INFO "user-defined physical RAM map:\n");
6795     - e820_print_map("user");
6796     - }
6797     - *to = '\0';
6798     - *cmdline_p = command_line;
6799     -}
6800     -
6801     #ifndef CONFIG_NUMA
6802     static void __init
6803     contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
6804     @@ -521,10 +337,11 @@
6805     if (bootmap == -1L)
6806     panic("Cannot find bootmem map of size %ld\n",bootmap_size);
6807     bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
6808     + e820_register_active_regions(0, start_pfn, end_pfn);
6809     #ifdef CONFIG_XEN
6810     - e820_bootmem_free(NODE_DATA(0), 0, xen_start_info->nr_pages<<PAGE_SHIFT);
6811     + free_bootmem_with_active_regions(0, xen_start_info->nr_pages);
6812     #else
6813     - e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
6814     + free_bootmem_with_active_regions(0, end_pfn);
6815     #endif
6816     reserve_bootmem(bootmap, bootmap_size);
6817     }
6818     @@ -587,6 +404,10 @@
6819     void __init setup_arch(char **cmdline_p)
6820     {
6821     #ifdef CONFIG_XEN
6822     + extern struct e820map machine_e820;
6823     +
6824     + printk(KERN_INFO "Command line: %s\n", saved_command_line);
6825     +
6826     /* Register a call for panic conditions. */
6827     atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
6828    
6829     @@ -612,6 +433,8 @@
6830    
6831     ARCH_SETUP
6832     #else
6833     + printk(KERN_INFO "Command line: %s\n", saved_command_line);
6834     +
6835     ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
6836     screen_info = SCREEN_INFO;
6837     edid_info = EDID_INFO;
6838     @@ -639,16 +462,22 @@
6839     data_resource.start = virt_to_phys(&_etext);
6840     data_resource.end = virt_to_phys(&_edata)-1;
6841    
6842     - parse_cmdline_early(cmdline_p);
6843     -
6844     early_identify_cpu(&boot_cpu_data);
6845    
6846     + strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
6847     + *cmdline_p = command_line;
6848     +
6849     + parse_early_param();
6850     +
6851     + finish_e820_parsing();
6852     +
6853     + e820_register_active_regions(0, 0, -1UL);
6854     /*
6855     * partially used pages are not usable - thus
6856     * we are rounding upwards:
6857     */
6858     end_pfn = e820_end_of_ram();
6859     - num_physpages = end_pfn; /* for pfn_valid */
6860     + num_physpages = end_pfn;
6861    
6862     check_efer();
6863    
6864     @@ -659,6 +488,14 @@
6865     if (is_initial_xendomain())
6866     dmi_scan_machine();
6867    
6868     + /* How many end-of-memory variables you have, grandma! */
6869     + max_low_pfn = end_pfn;
6870     + max_pfn = end_pfn;
6871     + high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
6872     +
6873     + /* Remove active ranges so rediscovery with NUMA-awareness happens */
6874     + remove_all_active_ranges();
6875     +
6876     #ifdef CONFIG_ACPI_NUMA
6877     /*
6878     * Parse SRAT to discover nodes.
6879     @@ -848,16 +685,16 @@
6880     BUG();
6881     }
6882    
6883     +#ifdef CONFIG_ACPI
6884     if (!is_initial_xendomain()) {
6885     acpi_disabled = 1;
6886     -#ifdef CONFIG_ACPI
6887     acpi_ht = 0;
6888     -#endif
6889     }
6890     #endif
6891     +#endif
6892    
6893     -#ifndef CONFIG_XEN
6894     - check_ioapic();
6895     +#if defined(CONFIG_PCI) && !defined(CONFIG_XEN)
6896     + early_quirks();
6897     #endif
6898    
6899     zap_low_mappings(0);
6900     @@ -917,6 +754,7 @@
6901     }
6902     #else
6903     e820_reserve_resources(e820.map, e820.nr_map);
6904     + e820_mark_nosave_regions();
6905     #endif
6906    
6907     request_resource(&iomem_resource, &video_ram_resource);
6908     @@ -924,7 +762,7 @@
6909     {
6910     unsigned i;
6911     /* request I/O space for devices used on all i[345]86 PCs */
6912     - for (i = 0; i < STANDARD_IO_RESOURCES; i++)
6913     + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
6914     request_resource(&ioport_resource, &standard_io_resources[i]);
6915     }
6916    
6917     @@ -1108,7 +946,7 @@
6918     #endif
6919     }
6920    
6921     -static void __init init_amd(struct cpuinfo_x86 *c)
6922     +static void __cpuinit init_amd(struct cpuinfo_x86 *c)
6923     {
6924     unsigned level;
6925    
6926     @@ -1164,6 +1002,12 @@
6927    
6928     /* Fix cpuid4 emulation for more */
6929     num_cache_leaves = 3;
6930     +
6931     + /* When there is only one core no need to synchronize RDTSC */
6932     + if (num_possible_cpus() == 1)
6933     + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
6934     + else
6935     + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
6936     }
6937    
6938     static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
6939     @@ -1245,8 +1089,7 @@
6940     node = first_node(node_online_map);
6941     numa_set_node(cpu, node);
6942    
6943     - if (acpi_numa > 0)
6944     - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
6945     + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
6946     #endif
6947     }
6948    
6949     @@ -1280,6 +1123,8 @@
6950     if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
6951     (c->x86 == 0x6 && c->x86_model >= 0x0e))
6952     set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
6953     + if (c->x86 == 6)
6954     + set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
6955     set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
6956     c->x86_max_cores = intel_num_cpu_cores(c);
6957    
6958     @@ -1498,8 +1343,8 @@
6959    
6960     /* Intel-defined (#2) */
6961     "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
6962     - "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
6963     - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
6964     + "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
6965     + NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL,
6966     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
6967    
6968     /* VIA/Cyrix/Centaur-defined */
6969     --- a/arch/x86/kernel/smp_32-xen.c
6970     +++ b/arch/x86/kernel/smp_32-xen.c
6971     @@ -279,8 +279,7 @@
6972     * 2) Leave the mm if we are in the lazy tlb mode.
6973     */
6974    
6975     -irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
6976     - struct pt_regs *regs)
6977     +irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id)
6978     {
6979     unsigned long cpu;
6980    
6981     @@ -567,16 +566,14 @@
6982     * all the work is done automatically when
6983     * we return from the interrupt.
6984     */
6985     -irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
6986     - struct pt_regs *regs)
6987     +irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
6988     {
6989    
6990     return IRQ_HANDLED;
6991     }
6992    
6993     #include <linux/kallsyms.h>
6994     -irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
6995     - struct pt_regs *regs)
6996     +irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
6997     {
6998     void (*func) (void *info) = call_data->func;
6999     void *info = call_data->info;
7000     @@ -603,3 +600,69 @@
7001     return IRQ_HANDLED;
7002     }
7003    
7004     +/*
7005     + * this function sends a 'generic call function' IPI to one other CPU
7006     + * in the system.
7007     + *
7008     + * cpu is a standard Linux logical CPU number.
7009     + */
7010     +static void
7011     +__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
7012     + int nonatomic, int wait)
7013     +{
7014     + struct call_data_struct data;
7015     + int cpus = 1;
7016     +
7017     + data.func = func;
7018     + data.info = info;
7019     + atomic_set(&data.started, 0);
7020     + data.wait = wait;
7021     + if (wait)
7022     + atomic_set(&data.finished, 0);
7023     +
7024     + call_data = &data;
7025     + wmb();
7026     + /* Send a message to all other CPUs and wait for them to respond */
7027     + send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
7028     +
7029     + /* Wait for response */
7030     + while (atomic_read(&data.started) != cpus)
7031     + cpu_relax();
7032     +
7033     + if (!wait)
7034     + return;
7035     +
7036     + while (atomic_read(&data.finished) != cpus)
7037     + cpu_relax();
7038     +}
7039     +
7040     +/*
7041     + * smp_call_function_single - Run a function on another CPU
7042     + * @func: The function to run. This must be fast and non-blocking.
7043     + * @info: An arbitrary pointer to pass to the function.
7044     + * @nonatomic: Currently unused.
7045     + * @wait: If true, wait until function has completed on other CPUs.
7046     + *
7047     + * Retrurns 0 on success, else a negative status code.
7048     + *
7049     + * Does not return until the remote CPU is nearly ready to execute <func>
7050     + * or is or has executed.
7051     + */
7052     +
7053     +int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
7054     + int nonatomic, int wait)
7055     +{
7056     + /* prevent preemption and reschedule on another processor */
7057     + int me = get_cpu();
7058     + if (cpu == me) {
7059     + WARN_ON(1);
7060     + put_cpu();
7061     + return -EBUSY;
7062     + }
7063     + spin_lock_bh(&call_lock);
7064     + __smp_call_function_single(cpu, func, info, nonatomic, wait);
7065     + spin_unlock_bh(&call_lock);
7066     + put_cpu();
7067     + return 0;
7068     +}
7069     +EXPORT_SYMBOL(smp_call_function_single);
7070     --- a/arch/x86/kernel/smp_64-xen.c
7071     +++ b/arch/x86/kernel/smp_64-xen.c
7072     @@ -381,9 +381,8 @@
7073     /* prevent preemption and reschedule on another processor */
7074     int me = get_cpu();
7075     if (cpu == me) {
7076     - WARN_ON(1);
7077     put_cpu();
7078     - return -EBUSY;
7079     + return 0;
7080     }
7081     spin_lock_bh(&call_lock);
7082     __smp_call_function_single(cpu, func, info, nonatomic, wait);
7083     @@ -501,7 +500,7 @@
7084     #ifndef CONFIG_XEN
7085     asmlinkage void smp_reschedule_interrupt(void)
7086     #else
7087     -asmlinkage irqreturn_t smp_reschedule_interrupt(void)
7088     +asmlinkage irqreturn_t smp_reschedule_interrupt(int irq, void *ctx)
7089     #endif
7090     {
7091     #ifndef CONFIG_XEN
7092     @@ -514,7 +513,7 @@
7093     #ifndef CONFIG_XEN
7094     asmlinkage void smp_call_function_interrupt(void)
7095     #else
7096     -asmlinkage irqreturn_t smp_call_function_interrupt(void)
7097     +asmlinkage irqreturn_t smp_call_function_interrupt(int irq, void *ctx)
7098     #endif
7099     {
7100     void (*func) (void *info) = call_data->func;
7101     @@ -545,31 +544,3 @@
7102     return IRQ_HANDLED;
7103     #endif
7104     }
7105     -
7106     -int safe_smp_processor_id(void)
7107     -{
7108     -#ifdef CONFIG_XEN
7109     - return smp_processor_id();
7110     -#else
7111     - unsigned apicid, i;
7112     -
7113     - if (disable_apic)
7114     - return 0;
7115     -
7116     - apicid = hard_smp_processor_id();
7117     - if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid)
7118     - return apicid;
7119     -
7120     - for (i = 0; i < NR_CPUS; ++i) {
7121     - if (x86_cpu_to_apicid[i] == apicid)
7122     - return i;
7123     - }
7124     -
7125     - /* No entries in x86_cpu_to_apicid? Either no MPS|ACPI,
7126     - * or called too early. Either way, we must be CPU 0. */
7127     - if (x86_cpu_to_apicid[0] == BAD_APICID)
7128     - return 0;
7129     -
7130     - return 0; /* Should not happen */
7131     -#endif
7132     -}
7133     --- a/arch/x86/kernel/time_32-xen.c
7134     +++ b/arch/x86/kernel/time_32-xen.c
7135     @@ -89,7 +89,6 @@
7136     unsigned long vxtime_hz = PIT_TICK_RATE;
7137     struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
7138     volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
7139     -unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
7140     struct timespec __xtime __section_xtime;
7141     struct timezone __sys_tz __section_sys_tz;
7142     #endif
7143     @@ -97,8 +96,6 @@
7144     unsigned int cpu_khz; /* Detected as we calibrate the TSC */
7145     EXPORT_SYMBOL(cpu_khz);
7146    
7147     -extern unsigned long wall_jiffies;
7148     -
7149     DEFINE_SPINLOCK(rtc_lock);
7150     EXPORT_SYMBOL(rtc_lock);
7151    
7152     @@ -265,11 +262,10 @@
7153     time_t wtm_sec, xtime_sec;
7154     u64 tmp, wc_nsec;
7155    
7156     - /* Adjust wall-clock time base based on wall_jiffies ticks. */
7157     + /* Adjust wall-clock time base. */
7158     wc_nsec = processed_system_time;
7159     wc_nsec += sec * (u64)NSEC_PER_SEC;
7160     wc_nsec += nsec;
7161     - wc_nsec -= (jiffies - wall_jiffies) * (u64)NS_PER_TICK;
7162    
7163     /* Split wallclock base into seconds and nanoseconds. */
7164     tmp = wc_nsec;
7165     @@ -387,16 +383,10 @@
7166     shadow = &per_cpu(shadow_time, cpu);
7167    
7168     do {
7169     - unsigned long lost;
7170     -
7171     local_time_version = shadow->version;
7172     seq = read_seqbegin(&xtime_lock);
7173    
7174     usec = get_usec_offset(shadow);
7175     - lost = jiffies - wall_jiffies;
7176     -
7177     - if (unlikely(lost))
7178     - usec += lost * (USEC_PER_SEC / HZ);
7179    
7180     sec = xtime.tv_sec;
7181     usec += (xtime.tv_nsec / NSEC_PER_USEC);
7182     @@ -519,7 +509,7 @@
7183     write_seqlock_irq(&xtime_lock);
7184    
7185     sec = xtime.tv_sec;
7186     - nsec = xtime.tv_nsec + ((jiffies - wall_jiffies) * (u64)NS_PER_TICK);
7187     + nsec = xtime.tv_nsec;
7188     __normalize_time(&sec, &nsec);
7189    
7190     op.cmd = XENPF_settime;
7191     @@ -593,42 +583,49 @@
7192     }
7193     #endif
7194    
7195     -#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
7196     unsigned long profile_pc(struct pt_regs *regs)
7197     {
7198     unsigned long pc = instruction_pointer(regs);
7199    
7200     -#ifdef __x86_64__
7201     - /* Assume the lock function has either no stack frame or only a single word.
7202     - This checks if the address on the stack looks like a kernel text address.
7203     - There is a small window for false hits, but in that case the tick
7204     - is just accounted to the spinlock function.
7205     - Better would be to write these functions in assembler again
7206     - and check exactly. */
7207     +#if defined(CONFIG_SMP) || defined(__x86_64__)
7208     if (!user_mode_vm(regs) && in_lock_functions(pc)) {
7209     - char *v = *(char **)regs->rsp;
7210     - if ((v >= _stext && v <= _etext) ||
7211     - (v >= _sinittext && v <= _einittext) ||
7212     - (v >= (char *)MODULES_VADDR && v <= (char *)MODULES_END))
7213     - return (unsigned long)v;
7214     - return ((unsigned long *)regs->rsp)[1];
7215     +# ifdef CONFIG_FRAME_POINTER
7216     +# ifdef __i386__
7217     + return ((unsigned long *)regs->ebp)[1];
7218     +# else
7219     + return ((unsigned long *)regs->rbp)[1];
7220     +# endif
7221     +# else
7222     +# ifdef __i386__
7223     + unsigned long *sp;
7224     + if ((regs->xcs & 2) == 0)
7225     + sp = (unsigned long *)&regs->esp;
7226     + else
7227     + sp = (unsigned long *)regs->esp;
7228     +# else
7229     + unsigned long *sp = (unsigned long *)regs->rsp;
7230     +# endif
7231     + /* Return address is either directly at stack pointer
7232     + or above a saved eflags. Eflags has bits 22-31 zero,
7233     + kernel addresses don't. */
7234     + if (sp[0] >> 22)
7235     + return sp[0];
7236     + if (sp[1] >> 22)
7237     + return sp[1];
7238     +# endif
7239     }
7240     -#else
7241     - if (!user_mode_vm(regs) && in_lock_functions(pc))
7242     - return *(unsigned long *)(regs->ebp + 4);
7243     #endif
7244    
7245     return pc;
7246     }
7247     EXPORT_SYMBOL(profile_pc);
7248     -#endif
7249    
7250     /*
7251     * This is the same as the above, except we _also_ save the current
7252     * Time Stamp Counter value at the time of the timer interrupt, so that
7253     * we later on can estimate the time of day more exactly.
7254     */
7255     -irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
7256     +irqreturn_t timer_interrupt(int irq, void *dev_id)
7257     {
7258     s64 delta, delta_cpu, stolen, blocked;
7259     u64 sched_time;
7260     @@ -686,10 +683,14 @@
7261     }
7262    
7263     /* System-wide jiffy work. */
7264     - while (delta >= NS_PER_TICK) {
7265     - delta -= NS_PER_TICK;
7266     - processed_system_time += NS_PER_TICK;
7267     - do_timer(regs);
7268     + if (delta >= NS_PER_TICK) {
7269     + do_div(delta, NS_PER_TICK);
7270     + processed_system_time += delta * NS_PER_TICK;
7271     + while (delta > HZ) {
7272     + do_timer(HZ);
7273     + delta -= HZ;
7274     + }
7275     + do_timer(delta);
7276     }
7277    
7278     if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
7279     @@ -734,7 +735,7 @@
7280     if (delta_cpu > 0) {
7281     do_div(delta_cpu, NS_PER_TICK);
7282     per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
7283     - if (user_mode_vm(regs))
7284     + if (user_mode_vm(get_irq_regs()))
7285     account_user_time(current, (cputime_t)delta_cpu);
7286     else
7287     account_system_time(current, HARDIRQ_OFFSET,
7288     @@ -748,10 +749,10 @@
7289     /* Local timer processing (see update_process_times()). */
7290     run_local_timers();
7291     if (rcu_pending(cpu))
7292     - rcu_check_callbacks(cpu, user_mode_vm(regs));
7293     + rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs()));
7294     scheduler_tick();
7295     run_posix_cpu_timers(current);
7296     - profile_tick(CPU_PROFILING, regs);
7297     + profile_tick(CPU_PROFILING);
7298    
7299     return IRQ_HANDLED;
7300     }
7301     @@ -959,10 +960,11 @@
7302     /* Duplicate of time_init() below, with hpet_enable part added */
7303     static void __init hpet_time_init(void)
7304     {
7305     - xtime.tv_sec = get_cmos_time();
7306     - xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
7307     - set_normalized_timespec(&wall_to_monotonic,
7308     - -xtime.tv_sec, -xtime.tv_nsec);
7309     + struct timespec ts;
7310     + ts.tv_sec = get_cmos_time();
7311     + ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
7312     +
7313     + do_settimeofday(&ts);
7314    
7315     if ((hpet_enable() >= 0) && hpet_use_timer) {
7316     printk("Using HPET for base-timer\n");
7317     --- a/arch/x86/kernel/traps_32-xen.c
7318     +++ b/arch/x86/kernel/traps_32-xen.c
7319     @@ -28,6 +28,7 @@
7320     #include <linux/kprobes.h>
7321     #include <linux/kexec.h>
7322     #include <linux/unwind.h>
7323     +#include <linux/uaccess.h>
7324    
7325     #ifdef CONFIG_EISA
7326     #include <linux/ioport.h>
7327     @@ -40,7 +41,6 @@
7328    
7329     #include <asm/processor.h>
7330     #include <asm/system.h>
7331     -#include <asm/uaccess.h>
7332     #include <asm/io.h>
7333     #include <asm/atomic.h>
7334     #include <asm/debugreg.h>
7335     @@ -51,11 +51,14 @@
7336     #include <asm/smp.h>
7337     #include <asm/arch_hooks.h>
7338     #include <asm/kdebug.h>
7339     +#include <asm/stacktrace.h>
7340    
7341     #include <linux/module.h>
7342    
7343     #include "mach_traps.h"
7344    
7345     +int panic_on_unrecovered_nmi;
7346     +
7347     asmlinkage int system_call(void);
7348    
7349     struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
7350     @@ -124,62 +127,63 @@
7351     p < (void *)tinfo + THREAD_SIZE - 3;
7352     }
7353    
7354     -/*
7355     - * Print one address/symbol entries per line.
7356     - */
7357     -static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl)
7358     -{
7359     - printk(" [<%08lx>] ", addr);
7360     -
7361     - print_symbol("%s\n", addr);
7362     -}
7363     -
7364     static inline unsigned long print_context_stack(struct thread_info *tinfo,
7365     unsigned long *stack, unsigned long ebp,
7366     - char *log_lvl)
7367     + struct stacktrace_ops *ops, void *data)
7368     {
7369     unsigned long addr;
7370    
7371     #ifdef CONFIG_FRAME_POINTER
7372     while (valid_stack_ptr(tinfo, (void *)ebp)) {
7373     + unsigned long new_ebp;
7374     addr = *(unsigned long *)(ebp + 4);
7375     - print_addr_and_symbol(addr, log_lvl);
7376     + ops->address(data, addr);
7377     /*
7378     * break out of recursive entries (such as
7379     - * end_of_stack_stop_unwind_function):
7380     + * end_of_stack_stop_unwind_function). Also,
7381     + * we can never allow a frame pointer to
7382     + * move downwards!
7383     */
7384     - if (ebp == *(unsigned long *)ebp)
7385     + new_ebp = *(unsigned long *)ebp;
7386     + if (new_ebp <= ebp)
7387     break;
7388     - ebp = *(unsigned long *)ebp;
7389     + ebp = new_ebp;
7390     }
7391     #else
7392     while (valid_stack_ptr(tinfo, stack)) {
7393     addr = *stack++;
7394     if (__kernel_text_address(addr))
7395     - print_addr_and_symbol(addr, log_lvl);
7396     + ops->address(data, addr);
7397     }
7398     #endif
7399     return ebp;
7400     }
7401    
7402     +struct ops_and_data {
7403     + struct stacktrace_ops *ops;
7404     + void *data;
7405     +};
7406     +
7407     static asmlinkage int
7408     -show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
7409     +dump_trace_unwind(struct unwind_frame_info *info, void *data)
7410     {
7411     + struct ops_and_data *oad = (struct ops_and_data *)data;
7412     int n = 0;
7413    
7414     while (unwind(info) == 0 && UNW_PC(info)) {
7415     n++;
7416     - print_addr_and_symbol(UNW_PC(info), log_lvl);
7417     + oad->ops->address(oad->data, UNW_PC(info));
7418     if (arch_unw_user_mode(info))
7419     break;
7420     }
7421     return n;
7422     }
7423    
7424     -static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
7425     - unsigned long *stack, char *log_lvl)
7426     +void dump_trace(struct task_struct *task, struct pt_regs *regs,
7427     + unsigned long *stack,
7428     + struct stacktrace_ops *ops, void *data)
7429     {
7430     - unsigned long ebp;
7431     + unsigned long ebp = 0;
7432    
7433     if (!task)
7434     task = current;
7435     @@ -187,54 +191,116 @@
7436     if (call_trace >= 0) {
7437     int unw_ret = 0;
7438     struct unwind_frame_info info;
7439     + struct ops_and_data oad = { .ops = ops, .data = data };
7440    
7441     if (regs) {
7442     if (unwind_init_frame_info(&info, task, regs) == 0)
7443     - unw_ret = show_trace_unwind(&info, log_lvl);
7444     + unw_ret = dump_trace_unwind(&info, &oad);
7445     } else if (task == current)
7446     - unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
7447     + unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
7448     else {
7449     if (unwind_init_blocked(&info, task) == 0)
7450     - unw_ret = show_trace_unwind(&info, log_lvl);
7451     + unw_ret = dump_trace_unwind(&info, &oad);
7452     }
7453     if (unw_ret > 0) {
7454     if (call_trace == 1 && !arch_unw_user_mode(&info)) {
7455     - print_symbol("DWARF2 unwinder stuck at %s\n",
7456     + ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
7457     UNW_PC(&info));
7458     if (UNW_SP(&info) >= PAGE_OFFSET) {
7459     - printk("Leftover inexact backtrace:\n");
7460     + ops->warning(data, "Leftover inexact backtrace:\n");
7461     stack = (void *)UNW_SP(&info);
7462     + if (!stack)
7463     + return;
7464     + ebp = UNW_FP(&info);
7465     } else
7466     - printk("Full inexact backtrace again:\n");
7467     + ops->warning(data, "Full inexact backtrace again:\n");
7468     } else if (call_trace >= 1)
7469     return;
7470     else
7471     - printk("Full inexact backtrace again:\n");
7472     + ops->warning(data, "Full inexact backtrace again:\n");
7473     } else
7474     - printk("Inexact backtrace:\n");
7475     + ops->warning(data, "Inexact backtrace:\n");
7476     }
7477     -
7478     - if (task == current) {
7479     - /* Grab ebp right from our regs */
7480     - asm ("movl %%ebp, %0" : "=r" (ebp) : );
7481     - } else {
7482     - /* ebp is the last reg pushed by switch_to */
7483     - ebp = *(unsigned long *) task->thread.esp;
7484     + if (!stack) {
7485     + unsigned long dummy;
7486     + stack = &dummy;
7487     + if (task && task != current)
7488     + stack = (unsigned long *)task->thread.esp;
7489     + }
7490     +
7491     +#ifdef CONFIG_FRAME_POINTER
7492     + if (!ebp) {
7493     + if (task == current) {
7494     + /* Grab ebp right from our regs */
7495     + asm ("movl %%ebp, %0" : "=r" (ebp) : );
7496     + } else {
7497     + /* ebp is the last reg pushed by switch_to */
7498     + ebp = *(unsigned long *) task->thread.esp;
7499     + }
7500     }
7501     +#endif
7502    
7503     while (1) {
7504     struct thread_info *context;
7505     context = (struct thread_info *)
7506     ((unsigned long)stack & (~(THREAD_SIZE - 1)));
7507     - ebp = print_context_stack(context, stack, ebp, log_lvl);
7508     + ebp = print_context_stack(context, stack, ebp, ops, data);
7509     + /* Should be after the line below, but somewhere
7510     + in early boot context comes out corrupted and we
7511     + can't reference it -AK */
7512     + if (ops->stack(data, "IRQ") < 0)
7513     + break;
7514     stack = (unsigned long*)context->previous_esp;
7515     if (!stack)
7516     break;
7517     - printk("%s =======================\n", log_lvl);
7518     }
7519     }
7520     +EXPORT_SYMBOL(dump_trace);
7521    
7522     -void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
7523     +static void
7524     +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
7525     +{
7526     + printk(data);
7527     + print_symbol(msg, symbol);
7528     + printk("\n");
7529     +}
7530     +
7531     +static void print_trace_warning(void *data, char *msg)
7532     +{
7533     + printk("%s%s\n", (char *)data, msg);
7534     +}
7535     +
7536     +static int print_trace_stack(void *data, char *name)
7537     +{
7538     + return 0;
7539     +}
7540     +
7541     +/*
7542     + * Print one address/symbol entries per line.
7543     + */
7544     +static void print_trace_address(void *data, unsigned long addr)
7545     +{
7546     + printk("%s [<%08lx>] ", (char *)data, addr);
7547     + print_symbol("%s\n", addr);
7548     +}
7549     +
7550     +static struct stacktrace_ops print_trace_ops = {
7551     + .warning = print_trace_warning,
7552     + .warning_symbol = print_trace_warning_symbol,
7553     + .stack = print_trace_stack,
7554     + .address = print_trace_address,
7555     +};
7556     +
7557     +static void
7558     +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
7559     + unsigned long * stack, char *log_lvl)
7560     +{
7561     + dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
7562     + printk("%s =======================\n", log_lvl);
7563     +}
7564     +
7565     +void show_trace(struct task_struct *task, struct pt_regs *regs,
7566     + unsigned long * stack)
7567     {
7568     show_trace_log_lvl(task, regs, stack, "");
7569     }
7570     @@ -297,12 +363,13 @@
7571     ss = regs->xss & 0xffff;
7572     }
7573     print_modules();
7574     - printk(KERN_EMERG "CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\n"
7575     - "EFLAGS: %08lx (%s %.*s) \n",
7576     + printk(KERN_EMERG "CPU: %d\n"
7577     + KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n"
7578     + KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n",
7579     smp_processor_id(), 0xffff & regs->xcs, regs->eip,
7580     - print_tainted(), regs->eflags, system_utsname.release,
7581     - (int)strcspn(system_utsname.version, " "),
7582     - system_utsname.version);
7583     + print_tainted(), regs->eflags, init_utsname()->release,
7584     + (int)strcspn(init_utsname()->version, " "),
7585     + init_utsname()->version);
7586     print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip);
7587     printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
7588     regs->eax, regs->ebx, regs->ecx, regs->edx);
7589     @@ -319,6 +386,8 @@
7590     */
7591     if (in_kernel) {
7592     u8 __user *eip;
7593     + int code_bytes = 64;
7594     + unsigned char c;
7595    
7596     printk("\n" KERN_EMERG "Stack: ");
7597     show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
7598     @@ -326,9 +395,12 @@
7599     printk(KERN_EMERG "Code: ");
7600    
7601     eip = (u8 __user *)regs->eip - 43;
7602     - for (i = 0; i < 64; i++, eip++) {
7603     - unsigned char c;
7604     -
7605     + if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
7606     + /* try starting at EIP */
7607     + eip = (u8 __user *)regs->eip;
7608     + code_bytes = 32;
7609     + }
7610     + for (i = 0; i < code_bytes; i++, eip++) {
7611     if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
7612     printk(" Bad EIP value.");
7613     break;
7614     @@ -349,7 +421,7 @@
7615    
7616     if (eip < PAGE_OFFSET)
7617     return;
7618     - if (__get_user(ud2, (unsigned short __user *)eip))
7619     + if (probe_kernel_address((unsigned short __user *)eip, ud2))
7620     return;
7621     if (ud2 != 0x0b0f)
7622     return;
7623     @@ -362,7 +434,8 @@
7624     char *file;
7625     char c;
7626    
7627     - if (__get_user(line, (unsigned short __user *)(eip + 2)))
7628     + if (probe_kernel_address((unsigned short __user *)(eip + 2),
7629     + line))
7630     break;
7631     if (__get_user(file, (char * __user *)(eip + 4)) ||
7632     (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
7633     @@ -604,18 +677,24 @@
7634     }
7635     }
7636    
7637     -static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
7638     +static __kprobes void
7639     +mem_parity_error(unsigned char reason, struct pt_regs * regs)
7640     {
7641     - printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying "
7642     - "to continue\n");
7643     + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
7644     + "CPU %d.\n", reason, smp_processor_id());
7645     printk(KERN_EMERG "You probably have a hardware problem with your RAM "
7646     "chips\n");
7647     + if (panic_on_unrecovered_nmi)
7648     + panic("NMI: Not continuing");
7649     +
7650     + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
7651    
7652     /* Clear and disable the memory parity error line. */
7653     clear_mem_error(reason);
7654     }
7655    
7656     -static void io_check_error(unsigned char reason, struct pt_regs * regs)
7657     +static __kprobes void
7658     +io_check_error(unsigned char reason, struct pt_regs * regs)
7659     {
7660     printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
7661     show_registers(regs);
7662     @@ -624,7 +703,8 @@
7663     clear_io_check_error(reason);
7664     }
7665    
7666     -static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
7667     +static __kprobes void
7668     +unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
7669     {
7670     #ifdef CONFIG_MCA
7671     /* Might actually be able to figure out what the guilty party
7672     @@ -634,15 +714,18 @@
7673     return;
7674     }
7675     #endif
7676     - printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
7677     - reason, smp_processor_id());
7678     - printk("Dazed and confused, but trying to continue\n");
7679     - printk("Do you have a strange power saving mode enabled?\n");
7680     + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
7681     + "CPU %d.\n", reason, smp_processor_id());
7682     + printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
7683     + if (panic_on_unrecovered_nmi)
7684     + panic("NMI: Not continuing");
7685     +
7686     + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
7687     }
7688    
7689     static DEFINE_SPINLOCK(nmi_print_lock);
7690    
7691     -void die_nmi (struct pt_regs *regs, const char *msg)
7692     +void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
7693     {
7694     if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
7695     NOTIFY_STOP)
7696     @@ -674,7 +757,7 @@
7697     do_exit(SIGSEGV);
7698     }
7699    
7700     -static void default_do_nmi(struct pt_regs * regs)
7701     +static __kprobes void default_do_nmi(struct pt_regs * regs)
7702     {
7703     unsigned char reason = 0;
7704    
7705     @@ -691,12 +774,12 @@
7706     * Ok, so this is none of the documented NMI sources,
7707     * so it must be the NMI watchdog.
7708     */
7709     - if (nmi_watchdog) {
7710     - nmi_watchdog_tick(regs);
7711     + if (nmi_watchdog_tick(regs, reason))
7712     return;
7713     - }
7714     + if (!do_nmi_callback(regs, smp_processor_id()))
7715     #endif
7716     - unknown_nmi_error(reason, regs);
7717     + unknown_nmi_error(reason, regs);
7718     +
7719     return;
7720     }
7721     if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
7722     @@ -712,14 +795,7 @@
7723     reassert_nmi();
7724     }
7725    
7726     -static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
7727     -{
7728     - return 0;
7729     -}
7730     -
7731     -static nmi_callback_t nmi_callback = dummy_nmi_callback;
7732     -
7733     -fastcall void do_nmi(struct pt_regs * regs, long error_code)
7734     +fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
7735     {
7736     int cpu;
7737    
7738     @@ -729,25 +805,11 @@
7739    
7740     ++nmi_count(cpu);
7741    
7742     - if (!rcu_dereference(nmi_callback)(regs, cpu))
7743     - default_do_nmi(regs);
7744     + default_do_nmi(regs);
7745    
7746     nmi_exit();
7747     }
7748    
7749     -void set_nmi_callback(nmi_callback_t callback)
7750     -{
7751     - vmalloc_sync_all();
7752     - rcu_assign_pointer(nmi_callback, callback);
7753     -}
7754     -EXPORT_SYMBOL_GPL(set_nmi_callback);
7755     -
7756     -void unset_nmi_callback(void)
7757     -{
7758     - nmi_callback = dummy_nmi_callback;
7759     -}
7760     -EXPORT_SYMBOL_GPL(unset_nmi_callback);
7761     -
7762     #ifdef CONFIG_KPROBES
7763     fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
7764     {
7765     --- a/arch/x86/kernel/traps_64-xen.c
7766     +++ b/arch/x86/kernel/traps_64-xen.c
7767     @@ -23,6 +23,7 @@
7768     #include <linux/delay.h>
7769     #include <linux/spinlock.h>
7770     #include <linux/interrupt.h>
7771     +#include <linux/kallsyms.h>
7772     #include <linux/module.h>
7773     #include <linux/moduleparam.h>
7774     #include <linux/nmi.h>
7775     @@ -45,6 +46,7 @@
7776     #include <asm/pda.h>
7777     #include <asm/proto.h>
7778     #include <asm/nmi.h>
7779     +#include <asm/stacktrace.h>
7780    
7781     asmlinkage void divide_error(void);
7782     asmlinkage void debug(void);
7783     @@ -114,7 +116,6 @@
7784     #endif
7785    
7786     #ifdef CONFIG_KALLSYMS
7787     -# include <linux/kallsyms.h>
7788     void printk_address(unsigned long address)
7789     {
7790     unsigned long offset = 0, symsize;
7791     @@ -142,7 +143,7 @@
7792     #endif
7793    
7794     static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
7795     - unsigned *usedp, const char **idp)
7796     + unsigned *usedp, char **idp)
7797     {
7798     #ifndef CONFIG_X86_NO_TSS
7799     static char ids[][8] = {
7800     @@ -162,26 +163,7 @@
7801     * 'stack' is in one of them:
7802     */
7803     for (k = 0; k < N_EXCEPTION_STACKS; k++) {
7804     - unsigned long end;
7805     -
7806     - /*
7807     - * set 'end' to the end of the exception stack.
7808     - */
7809     - switch (k + 1) {
7810     - /*
7811     - * TODO: this block is not needed i think, because
7812     - * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK]
7813     - * properly too.
7814     - */
7815     -#if DEBUG_STKSZ > EXCEPTION_STKSZ
7816     - case DEBUG_STACK:
7817     - end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ;
7818     - break;
7819     -#endif
7820     - default:
7821     - end = per_cpu(orig_ist, cpu).ist[k];
7822     - break;
7823     - }
7824     + unsigned long end = per_cpu(orig_ist, cpu).ist[k];
7825     /*
7826     * Is 'stack' above this exception frame's end?
7827     * If yes then skip to the next frame.
7828     @@ -236,13 +218,19 @@
7829     return NULL;
7830     }
7831    
7832     -static int show_trace_unwind(struct unwind_frame_info *info, void *context)
7833     +struct ops_and_data {
7834     + struct stacktrace_ops *ops;
7835     + void *data;
7836     +};
7837     +
7838     +static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
7839     {
7840     + struct ops_and_data *oad = (struct ops_and_data *)context;
7841     int n = 0;
7842    
7843     while (unwind(info) == 0 && UNW_PC(info)) {
7844     n++;
7845     - printk_address(UNW_PC(info));
7846     + oad->ops->address(oad->data, UNW_PC(info));
7847     if (arch_unw_user_mode(info))
7848     break;
7849     }
7850     @@ -256,13 +244,19 @@
7851     * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
7852     */
7853    
7854     -void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack)
7855     +static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
7856     {
7857     - const unsigned cpu = safe_smp_processor_id();
7858     + void *t = (void *)tinfo;
7859     + return p > t && p < t + THREAD_SIZE - 3;
7860     +}
7861     +
7862     +void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack,
7863     + struct stacktrace_ops *ops, void *data)
7864     +{
7865     + const unsigned cpu = smp_processor_id();
7866     unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
7867     unsigned used = 0;
7868     -
7869     - printk("\nCall Trace:\n");
7870     + struct thread_info *tinfo;
7871    
7872     if (!tsk)
7873     tsk = current;
7874     @@ -270,32 +264,47 @@
7875     if (call_trace >= 0) {
7876     int unw_ret = 0;
7877     struct unwind_frame_info info;
7878     + struct ops_and_data oad = { .ops = ops, .data = data };
7879    
7880     if (regs) {
7881     if (unwind_init_frame_info(&info, tsk, regs) == 0)
7882     - unw_ret = show_trace_unwind(&info, NULL);
7883     + unw_ret = dump_trace_unwind(&info, &oad);
7884     } else if (tsk == current)
7885     - unw_ret = unwind_init_running(&info, show_trace_unwind, NULL);
7886     + unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
7887     else {
7888     if (unwind_init_blocked(&info, tsk) == 0)
7889     - unw_ret = show_trace_unwind(&info, NULL);
7890     + unw_ret = dump_trace_unwind(&info, &oad);
7891     }
7892     if (unw_ret > 0) {
7893     if (call_trace == 1 && !arch_unw_user_mode(&info)) {
7894     - print_symbol("DWARF2 unwinder stuck at %s\n",
7895     + ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
7896     UNW_PC(&info));
7897     if ((long)UNW_SP(&info) < 0) {
7898     - printk("Leftover inexact backtrace:\n");
7899     + ops->warning(data, "Leftover inexact backtrace:\n");
7900     stack = (unsigned long *)UNW_SP(&info);
7901     + if (!stack)
7902     + return;
7903     } else
7904     - printk("Full inexact backtrace again:\n");
7905     + ops->warning(data, "Full inexact backtrace again:\n");
7906     } else if (call_trace >= 1)
7907     return;
7908     else
7909     - printk("Full inexact backtrace again:\n");
7910     + ops->warning(data, "Full inexact backtrace again:\n");
7911     } else
7912     - printk("Inexact backtrace:\n");
7913     + ops->warning(data, "Inexact backtrace:\n");
7914     + }
7915     + if (!stack) {
7916     + unsigned long dummy;
7917     + stack = &dummy;
7918     + if (tsk && tsk != current)
7919     + stack = (unsigned long *)tsk->thread.rsp;
7920     }
7921     + /*
7922     + * Align the stack pointer on word boundary, later loops
7923     + * rely on that (and corruption / debug info bugs can cause
7924     + * unaligned values here):
7925     + */
7926     + stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1));
7927    
7928     /*
7929     * Print function call entries within a stack. 'cond' is the
7930     @@ -305,7 +314,9 @@
7931     #define HANDLE_STACK(cond) \
7932     do while (cond) { \
7933     unsigned long addr = *stack++; \
7934     - if (kernel_text_address(addr)) { \
7935     + if (oops_in_progress ? \
7936     + __kernel_text_address(addr) : \
7937     + kernel_text_address(addr)) { \
7938     /* \
7939     * If the address is either in the text segment of the \
7940     * kernel, or in the region which contains vmalloc'ed \
7941     @@ -314,7 +325,7 @@
7942     * down the cause of the crash will be able to figure \
7943     * out the call path that was taken. \
7944     */ \
7945     - printk_address(addr); \
7946     + ops->address(data, addr); \
7947     } \
7948     } while (0)
7949    
7950     @@ -323,16 +334,17 @@
7951     * current stack address. If the stacks consist of nested
7952     * exceptions
7953     */
7954     - for ( ; ; ) {
7955     - const char *id;
7956     + for (;;) {
7957     + char *id;
7958     unsigned long *estack_end;
7959     estack_end = in_exception_stack(cpu, (unsigned long)stack,
7960     &used, &id);
7961    
7962     if (estack_end) {
7963     - printk(" <%s>", id);
7964     + if (ops->stack(data, id) < 0)
7965     + break;
7966     HANDLE_STACK (stack < estack_end);
7967     - printk(" <EOE>");
7968     + ops->stack(data, "<EOE>");
7969     /*
7970     * We link to the next stack via the
7971     * second-to-last pointer (index -2 to end) in the
7972     @@ -347,7 +359,8 @@
7973     (IRQSTACKSIZE - 64) / sizeof(*irqstack);
7974    
7975     if (stack >= irqstack && stack < irqstack_end) {
7976     - printk(" <IRQ>");
7977     + if (ops->stack(data, "IRQ") < 0)
7978     + break;
7979     HANDLE_STACK (stack < irqstack_end);
7980     /*
7981     * We link to the next stack (which would be
7982     @@ -356,7 +369,7 @@
7983     */
7984     stack = (unsigned long *) (irqstack_end[-1]);
7985     irqstack_end = NULL;
7986     - printk(" <EOI>");
7987     + ops->stack(data, "EOI");
7988     continue;
7989     }
7990     }
7991     @@ -364,19 +377,58 @@
7992     }
7993    
7994     /*
7995     - * This prints the process stack:
7996     + * This handles the process stack:
7997     */
7998     - HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
7999     + tinfo = current_thread_info();
8000     + HANDLE_STACK (valid_stack_ptr(tinfo, stack));
8001     #undef HANDLE_STACK
8002     +}
8003     +EXPORT_SYMBOL(dump_trace);
8004     +
8005     +static void
8006     +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
8007     +{
8008     + print_symbol(msg, symbol);
8009     + printk("\n");
8010     +}
8011     +
8012     +static void print_trace_warning(void *data, char *msg)
8013     +{
8014     + printk("%s\n", msg);
8015     +}
8016     +
8017     +static int print_trace_stack(void *data, char *name)
8018     +{
8019     + printk(" <%s> ", name);
8020     + return 0;
8021     +}
8022     +
8023     +static void print_trace_address(void *data, unsigned long addr)
8024     +{
8025     + printk_address(addr);
8026     +}
8027     +
8028     +static struct stacktrace_ops print_trace_ops = {
8029     + .warning = print_trace_warning,
8030     + .warning_symbol = print_trace_warning_symbol,
8031     + .stack = print_trace_stack,
8032     + .address = print_trace_address,
8033     +};
8034    
8035     +void
8036     +show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack)
8037     +{
8038     + printk("\nCall Trace:\n");
8039     + dump_trace(tsk, regs, stack, &print_trace_ops, NULL);
8040     printk("\n");
8041     }
8042    
8043     -static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp)
8044     +static void
8045     +_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
8046     {
8047     unsigned long *stack;
8048     int i;
8049     - const int cpu = safe_smp_processor_id();
8050     + const int cpu = smp_processor_id();
8051     unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
8052     unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
8053    
8054     @@ -430,7 +482,7 @@
8055     int i;
8056     int in_kernel = !user_mode(regs);
8057     unsigned long rsp;
8058     - const int cpu = safe_smp_processor_id();
8059     + const int cpu = smp_processor_id();
8060     struct task_struct *cur = cpu_pda(cpu)->pcurrent;
8061    
8062     rsp = regs->rsp;
8063     @@ -505,9 +557,11 @@
8064    
8065     unsigned __kprobes long oops_begin(void)
8066     {
8067     - int cpu = safe_smp_processor_id();
8068     + int cpu = smp_processor_id();
8069     unsigned long flags;
8070    
8071     + oops_enter();
8072     +
8073     /* racy, but better than risking deadlock. */
8074     local_irq_save(flags);
8075     if (!spin_trylock(&die_lock)) {
8076     @@ -536,6 +590,7 @@
8077     spin_unlock_irqrestore(&die_lock, flags);
8078     if (panic_on_oops)
8079     panic("Fatal exception");
8080     + oops_exit();
8081     }
8082    
8083     void __kprobes __die(const char * str, struct pt_regs * regs, long err)
8084     @@ -573,7 +628,7 @@
8085     }
8086    
8087     #ifdef CONFIG_X86_LOCAL_APIC
8088     -void __kprobes die_nmi(char *str, struct pt_regs *regs)
8089     +void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
8090     {
8091     unsigned long flags = oops_begin();
8092    
8093     @@ -581,13 +636,12 @@
8094     * We are in trouble anyway, lets at least try
8095     * to get a message out.
8096     */
8097     - printk(str, safe_smp_processor_id());
8098     + printk(str, smp_processor_id());
8099     show_registers(regs);
8100     if (kexec_should_crash(current))
8101     crash_kexec(regs);
8102     - if (panic_on_timeout || panic_on_oops)
8103     - panic("nmi watchdog");
8104     - printk("console shuts up ...\n");
8105     + if (do_panic || panic_on_oops)
8106     + panic("Non maskable interrupt");
8107     oops_end(flags);
8108     nmi_exit();
8109     local_irq_enable();
8110     @@ -734,8 +788,15 @@
8111     static __kprobes void
8112     mem_parity_error(unsigned char reason, struct pt_regs * regs)
8113     {
8114     - printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
8115     - printk("You probably have a hardware problem with your RAM chips\n");
8116     + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
8117     + reason);
8118     + printk(KERN_EMERG "You probably have a hardware problem with your "
8119     + "RAM chips\n");
8120     +
8121     + if (panic_on_unrecovered_nmi)
8122     + panic("NMI: Not continuing");
8123     +
8124     + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
8125    
8126     #if 0 /* XEN */
8127     /* Clear and disable the memory parity error line. */
8128     @@ -762,9 +823,15 @@
8129    
8130     static __kprobes void
8131     unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
8132     -{ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
8133     - printk("Dazed and confused, but trying to continue\n");
8134     - printk("Do you have a strange power saving mode enabled?\n");
8135     +{
8136     + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
8137     + reason);
8138     + printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
8139     +
8140     + if (panic_on_unrecovered_nmi)
8141     + panic("NMI: Not continuing");
8142     +
8143     + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
8144     }
8145    
8146     /* Runs on IST stack. This code must keep interrupts off all the time.
8147     @@ -789,12 +856,12 @@
8148     * Ok, so this is none of the documented NMI sources,
8149     * so it must be the NMI watchdog.
8150     */
8151     - if (nmi_watchdog > 0) {
8152     - nmi_watchdog_tick(regs,reason);
8153     + if (nmi_watchdog_tick(regs,reason))
8154     return;
8155     - }
8156     #endif
8157     - unknown_nmi_error(reason, regs);
8158     + if (!do_nmi_callback(regs,cpu))
8159     + unknown_nmi_error(reason, regs);
8160     +
8161     return;
8162     }
8163     if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
8164     @@ -1081,6 +1148,7 @@
8165     init_fpu(me);
8166     restore_fpu_checking(&me->thread.i387.fxsave);
8167     task_thread_info(me)->status |= TS_USEDFPU;
8168     + me->fpu_counter++;
8169     }
8170    
8171    
8172     @@ -1141,24 +1209,30 @@
8173     }
8174    
8175    
8176     -/* Actual parsing is done early in setup.c. */
8177     -static int __init oops_dummy(char *s)
8178     +static int __init oops_setup(char *s)
8179     {
8180     - panic_on_oops = 1;
8181     - return 1;
8182     + if (!s)
8183     + return -EINVAL;
8184     + if (!strcmp(s, "panic"))
8185     + panic_on_oops = 1;
8186     + return 0;
8187     }
8188     -__setup("oops=", oops_dummy);
8189     +early_param("oops", oops_setup);
8190    
8191     static int __init kstack_setup(char *s)
8192     {
8193     + if (!s)
8194     + return -EINVAL;
8195     kstack_depth_to_print = simple_strtoul(s,NULL,0);
8196     - return 1;
8197     + return 0;
8198     }
8199     -__setup("kstack=", kstack_setup);
8200     +early_param("kstack", kstack_setup);
8201    
8202     #ifdef CONFIG_STACK_UNWIND
8203     static int __init call_trace_setup(char *s)
8204     {
8205     + if (!s)
8206     + return -EINVAL;
8207     if (strcmp(s, "old") == 0)
8208     call_trace = -1;
8209     else if (strcmp(s, "both") == 0)
8210     @@ -1167,7 +1241,7 @@
8211     call_trace = 1;
8212     else if (strcmp(s, "new") == 0)
8213     call_trace = 2;
8214     - return 1;
8215     + return 0;
8216     }
8217     -__setup("call_trace=", call_trace_setup);
8218     +early_param("call_trace", call_trace_setup);
8219     #endif
8220     --- a/arch/x86/kernel/vsyscall_64-xen.c
8221     +++ b/arch/x86/kernel/vsyscall_64-xen.c
8222     @@ -26,6 +26,10 @@
8223     #include <linux/seqlock.h>
8224     #include <linux/jiffies.h>
8225     #include <linux/sysctl.h>
8226     +#include <linux/getcpu.h>
8227     +#include <linux/cpu.h>
8228     +#include <linux/smp.h>
8229     +#include <linux/notifier.h>
8230    
8231     #include <asm/vsyscall.h>
8232     #include <asm/pgtable.h>
8233     @@ -33,11 +37,15 @@
8234     #include <asm/fixmap.h>
8235     #include <asm/errno.h>
8236     #include <asm/io.h>
8237     +#include <asm/segment.h>
8238     +#include <asm/desc.h>
8239     +#include <asm/topology.h>
8240    
8241     #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
8242    
8243     int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
8244     seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
8245     +int __vgetcpu_mode __section_vgetcpu_mode;
8246    
8247     #include <asm/unistd.h>
8248    
8249     @@ -61,8 +69,7 @@
8250     sequence = read_seqbegin(&__xtime_lock);
8251    
8252     sec = __xtime.tv_sec;
8253     - usec = (__xtime.tv_nsec / 1000) +
8254     - (__jiffies - __wall_jiffies) * (1000000 / HZ);
8255     + usec = __xtime.tv_nsec / 1000;
8256    
8257     if (__vxtime.mode != VXTIME_HPET) {
8258     t = get_cycles_sync();
8259     @@ -72,7 +79,8 @@
8260     __vxtime.tsc_quot) >> 32;
8261     /* See comment in x86_64 do_gettimeofday. */
8262     } else {
8263     - usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
8264     + usec += ((readl((void __iomem *)
8265     + fix_to_virt(VSYSCALL_HPET) + 0xf0) -
8266     __vxtime.last) * __vxtime.quot) >> 32;
8267     }
8268     } while (read_seqretry(&__xtime_lock, sequence));
8269     @@ -127,9 +135,46 @@
8270     return __xtime.tv_sec;
8271     }
8272    
8273     -long __vsyscall(2) venosys_0(void)
8274     -{
8275     - return -ENOSYS;
8276     +/* Fast way to get current CPU and node.
8277     + This helps to do per node and per CPU caches in user space.
8278     + The result is not guaranteed without CPU affinity, but usually
8279     + works out because the scheduler tries to keep a thread on the same
8280     + CPU.
8281     +
8282     + tcache must point to a two element sized long array.
8283     + All arguments can be NULL. */
8284     +long __vsyscall(2)
8285     +vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
8286     +{
8287     + unsigned int dummy, p;
8288     + unsigned long j = 0;
8289     +
8290     + /* Fast cache - only recompute value once per jiffies and avoid
8291     + relatively costly rdtscp/cpuid otherwise.
8292     + This works because the scheduler usually keeps the process
8293     + on the same CPU and this syscall doesn't guarantee its
8294     + results anyways.
8295     + We do this here because otherwise user space would do it on
8296     + its own in a likely inferior way (no access to jiffies).
8297     + If you don't like it pass NULL. */
8298     + if (tcache && tcache->blob[0] == (j = __jiffies)) {
8299     + p = tcache->blob[1];
8300     + } else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
8301     + /* Load per CPU data from RDTSCP */
8302     + rdtscp(dummy, dummy, p);
8303     + } else {
8304     + /* Load per CPU data from GDT */
8305     + asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
8306     + }
8307     + if (tcache) {
8308     + tcache->blob[0] = j;
8309     + tcache->blob[1] = p;
8310     + }
8311     + if (cpu)
8312     + *cpu = p & 0xfff;
8313     + if (node)
8314     + *node = p >> 12;
8315     + return 0;
8316     }
8317    
8318     long __vsyscall(3) venosys_1(void)
8319     @@ -149,7 +194,8 @@
8320     void __user *buffer, size_t *lenp, loff_t *ppos)
8321     {
8322     extern u16 vsysc1, vsysc2;
8323     - u16 *map1, *map2;
8324     + u16 __iomem *map1;
8325     + u16 __iomem *map2;
8326     int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
8327     if (!write)
8328     return ret;
8329     @@ -164,11 +210,11 @@
8330     goto out;
8331     }
8332     if (!sysctl_vsyscall) {
8333     - *map1 = SYSCALL;
8334     - *map2 = SYSCALL;
8335     + writew(SYSCALL, map1);
8336     + writew(SYSCALL, map2);
8337     } else {
8338     - *map1 = NOP2;
8339     - *map2 = NOP2;
8340     + writew(NOP2, map1);
8341     + writew(NOP2, map2);
8342     }
8343     iounmap(map2);
8344     out:
8345     @@ -200,6 +246,48 @@
8346    
8347     #endif
8348    
8349     +/* Assume __initcall executes before all user space. Hopefully kmod
8350     + doesn't violate that. We'll find out if it does. */
8351     +static void __cpuinit vsyscall_set_cpu(int cpu)
8352     +{
8353     + unsigned long d;
8354     + unsigned long node = 0;
8355     +#ifdef CONFIG_NUMA
8356     + node = cpu_to_node[cpu];
8357     +#endif
8358     + if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP))
8359     + write_rdtscp_aux((node << 12) | cpu);
8360     +
8361     + /* Store cpu number in limit so that it can be loaded quickly
8362     + in user space in vgetcpu.
8363     + 12 bits for the CPU and 8 bits for the node. */
8364     + d = 0x0f40000000000ULL;
8365     + d |= cpu;
8366     + d |= (node & 0xf) << 12;
8367     + d |= (node >> 4) << 48;
8368     + if (HYPERVISOR_update_descriptor(virt_to_machine(cpu_gdt(cpu)
8369     + + GDT_ENTRY_PER_CPU),
8370     + d))
8371     + BUG();
8372     +}
8373     +
8374     +static void __cpuinit cpu_vsyscall_init(void *arg)
8375     +{
8376     + /* preemption should be already off */
8377     + vsyscall_set_cpu(raw_smp_processor_id());
8378     +}
8379     +
8380     +#ifdef CONFIG_HOTPLUG_CPU
8381     +static int __cpuinit
8382     +cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
8383     +{
8384     + long cpu = (long)arg;
8385     + if (action == CPU_ONLINE)
8386     + smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
8387     + return NOTIFY_DONE;
8388     +}
8389     +#endif
8390     +
8391     static void __init map_vsyscall(void)
8392     {
8393     extern char __vsyscall_0;
8394     @@ -214,13 +302,20 @@
8395     VSYSCALL_ADDR(__NR_vgettimeofday)));
8396     BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
8397     BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
8398     + BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
8399     map_vsyscall();
8400     #ifdef CONFIG_XEN
8401     sysctl_vsyscall = 0; /* disable vgettimeofay() */
8402     + if (boot_cpu_has(X86_FEATURE_RDTSCP))
8403     + vgetcpu_mode = VGETCPU_RDTSCP;
8404     + else
8405     + vgetcpu_mode = VGETCPU_LSL;
8406     #endif
8407     #ifdef CONFIG_SYSCTL
8408     register_sysctl_table(kernel_root_table2, 0);
8409     #endif
8410     + on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
8411     + hotcpu_notifier(cpu_vsyscall_notifier, 0);
8412     return 0;
8413     }
8414    
8415     --- a/arch/x86/mach-xen/setup.c
8416     +++ b/arch/x86/mach-xen/setup.c
8417     @@ -103,8 +103,10 @@
8418    
8419     setup_xen_features();
8420    
8421     - if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
8422     - set_fixaddr_top(pp.virt_start);
8423     + if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) {
8424     + hypervisor_virt_start = pp.virt_start;
8425     + reserve_top_address(0UL - pp.virt_start);
8426     + }
8427    
8428     if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
8429     machine_to_phys_mapping = (unsigned long *)mapping.v_start;
8430     --- a/arch/x86/mm/fault_32-xen.c
8431     +++ b/arch/x86/mm/fault_32-xen.c
8432     @@ -27,21 +27,24 @@
8433     #include <asm/uaccess.h>
8434     #include <asm/desc.h>
8435     #include <asm/kdebug.h>
8436     +#include <asm/segment.h>
8437    
8438     extern void die(const char *,struct pt_regs *,long);
8439    
8440     -#ifdef CONFIG_KPROBES
8441     -ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
8442     +static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
8443     +
8444     int register_page_fault_notifier(struct notifier_block *nb)
8445     {
8446     vmalloc_sync_all();
8447     return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
8448     }
8449     +EXPORT_SYMBOL_GPL(register_page_fault_notifier);
8450    
8451     int unregister_page_fault_notifier(struct notifier_block *nb)
8452     {
8453     return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
8454     }
8455     +EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
8456    
8457     static inline int notify_page_fault(enum die_val val, const char *str,
8458     struct pt_regs *regs, long err, int trap, int sig)
8459     @@ -55,14 +58,6 @@
8460     };
8461     return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
8462     }
8463     -#else
8464     -static inline int notify_page_fault(enum die_val val, const char *str,
8465     - struct pt_regs *regs, long err, int trap, int sig)
8466     -{
8467     - return NOTIFY_DONE;
8468     -}
8469     -#endif
8470     -
8471    
8472     /*
8473     * Unlock any spinlocks which will prevent us from getting the
8474     @@ -119,10 +114,10 @@
8475     }
8476    
8477     /* The standard kernel/user address space limit. */
8478     - *eip_limit = (seg & 2) ? USER_DS.seg : KERNEL_DS.seg;
8479     + *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
8480    
8481     /* By far the most common cases. */
8482     - if (likely(seg == __USER_CS || seg == GET_KERNEL_CS()))
8483     + if (likely(SEGMENT_IS_FLAT_CODE(seg)))
8484     return eip;
8485    
8486     /* Check the segment exists, is within the current LDT/GDT size,
8487     @@ -559,11 +554,7 @@
8488     write = 0;
8489     switch (error_code & 3) {
8490     default: /* 3: write, present */
8491     -#ifdef TEST_VERIFY_AREA
8492     - if (regs->cs == GET_KERNEL_CS())
8493     - printk("WP fault at %08lx\n", regs->eip);
8494     -#endif
8495     - /* fall through */
8496     + /* fall through */
8497     case 2: /* write, not present */
8498     if (!(vma->vm_flags & VM_WRITE))
8499     goto bad_area;
8500     @@ -572,7 +563,7 @@
8501     case 1: /* read, present */
8502     goto bad_area;
8503     case 0: /* read, not present */
8504     - if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
8505     + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
8506     goto bad_area;
8507     }
8508    
8509     @@ -704,7 +695,7 @@
8510     */
8511     out_of_memory:
8512     up_read(&mm->mmap_sem);
8513     - if (tsk->pid == 1) {
8514     + if (is_init(tsk)) {
8515     yield();
8516     down_read(&mm->mmap_sem);
8517     goto survive;
8518     --- a/arch/x86/mm/fault_64-xen.c
8519     +++ b/arch/x86/mm/fault_64-xen.c
8520     @@ -40,8 +40,7 @@
8521     #define PF_RSVD (1<<3)
8522     #define PF_INSTR (1<<4)
8523    
8524     -#ifdef CONFIG_KPROBES
8525     -ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
8526     +static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
8527    
8528     /* Hook to register for page fault notifications */
8529     int register_page_fault_notifier(struct notifier_block *nb)
8530     @@ -49,11 +48,13 @@
8531     vmalloc_sync_all();
8532     return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
8533     }
8534     +EXPORT_SYMBOL_GPL(register_page_fault_notifier);
8535    
8536     int unregister_page_fault_notifier(struct notifier_block *nb)
8537     {
8538     return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
8539     }
8540     +EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
8541    
8542     static inline int notify_page_fault(enum die_val val, const char *str,
8543     struct pt_regs *regs, long err, int trap, int sig)
8544     @@ -67,13 +68,6 @@
8545     };
8546     return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
8547     }
8548     -#else
8549     -static inline int notify_page_fault(enum die_val val, const char *str,
8550     - struct pt_regs *regs, long err, int trap, int sig)
8551     -{
8552     - return NOTIFY_DONE;
8553     -}
8554     -#endif
8555    
8556     void bust_spinlocks(int yes)
8557     {
8558     @@ -102,7 +96,7 @@
8559     static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
8560     unsigned long error_code)
8561     {
8562     - unsigned char *instr;
8563     + unsigned char __user *instr;
8564     int scan_more = 1;
8565     int prefetch = 0;
8566     unsigned char *max_instr;
8567     @@ -111,7 +105,7 @@
8568     if (error_code & PF_INSTR)
8569     return 0;
8570    
8571     - instr = (unsigned char *)convert_rip_to_linear(current, regs);
8572     + instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
8573     max_instr = instr + 15;
8574    
8575     if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
8576     @@ -122,7 +116,7 @@
8577     unsigned char instr_hi;
8578     unsigned char instr_lo;
8579    
8580     - if (__get_user(opcode, instr))
8581     + if (__get_user(opcode, (char __user *)instr))
8582     break;
8583    
8584     instr_hi = opcode & 0xf0;
8585     @@ -160,7 +154,7 @@
8586     case 0x00:
8587     /* Prefetch instruction is 0x0F0D or 0x0F18 */
8588     scan_more = 0;
8589     - if (__get_user(opcode, instr))
8590     + if (__get_user(opcode, (char __user *)instr))
8591     break;
8592     prefetch = (instr_lo == 0xF) &&
8593     (opcode == 0x0D || opcode == 0x18);
8594     @@ -176,7 +170,7 @@
8595     static int bad_address(void *p)
8596     {
8597     unsigned long dummy;
8598     - return __get_user(dummy, (unsigned long *)p);
8599     + return __get_user(dummy, (unsigned long __user *)p);
8600     }
8601    
8602     void dump_pagetable(unsigned long address)
8603     @@ -248,7 +242,7 @@
8604    
8605     int unhandled_signal(struct task_struct *tsk, int sig)
8606     {
8607     - if (tsk->pid == 1)
8608     + if (is_init(tsk))
8609     return 1;
8610     if (tsk->ptrace & PT_PTRACED)
8611     return 0;
8612     @@ -300,7 +294,7 @@
8613     if (pgd_none(*pgd))
8614     set_pgd(pgd, *pgd_ref);
8615     else
8616     - BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
8617     + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
8618    
8619     /* Below here mismatches are bugs because these lower tables
8620     are shared */
8621     @@ -309,7 +303,7 @@
8622     pud_ref = pud_offset(pgd_ref, address);
8623     if (pud_none(*pud_ref))
8624     return -1;
8625     - if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref))
8626     + if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
8627     BUG();
8628     pmd = pmd_offset(pud, address);
8629     pmd_ref = pmd_offset(pud_ref, address);
8630     @@ -531,7 +525,7 @@
8631     case PF_PROT: /* read, present */
8632     goto bad_area;
8633     case 0: /* read, not present */
8634     - if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
8635     + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
8636     goto bad_area;
8637     }
8638    
8639     @@ -647,7 +641,7 @@
8640     */
8641     out_of_memory:
8642     up_read(&mm->mmap_sem);
8643     - if (current->pid == 1) {
8644     + if (is_init(current)) {
8645     yield();
8646     goto again;
8647     }
8648     @@ -702,7 +696,7 @@
8649     if (pgd_none(*pgd))
8650     set_pgd(pgd, *pgd_ref);
8651     else
8652     - BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
8653     + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
8654     }
8655     spin_unlock(&pgd_lock);
8656     set_bit(pgd_index(address), insync);
8657     --- a/arch/x86/mm/highmem_32-xen.c
8658     +++ b/arch/x86/mm/highmem_32-xen.c
8659     @@ -38,11 +38,9 @@
8660    
8661     idx = type + KM_TYPE_NR*smp_processor_id();
8662     vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
8663     -#ifdef CONFIG_DEBUG_HIGHMEM
8664     if (!pte_none(*(kmap_pte-idx)))
8665     BUG();
8666     -#endif
8667     - set_pte_at_sync(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
8668     + set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
8669    
8670     return (void*) vaddr;
8671     }
8672     @@ -62,36 +60,26 @@
8673    
8674     void kunmap_atomic(void *kvaddr, enum km_type type)
8675     {
8676     -#if defined(CONFIG_DEBUG_HIGHMEM) || defined(CONFIG_XEN)
8677     unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
8678     enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
8679    
8680     - if (vaddr < FIXADDR_START) { // FIXME
8681     +#ifdef CONFIG_DEBUG_HIGHMEM
8682     + if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) {
8683     dec_preempt_count();
8684     preempt_check_resched();
8685     return;
8686     }
8687     -#endif
8688    
8689     -#if defined(CONFIG_DEBUG_HIGHMEM)
8690     if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
8691     BUG();
8692     -
8693     - /*
8694     - * force other mappings to Oops if they'll try to access
8695     - * this pte without first remap it
8696     - */
8697     - pte_clear(&init_mm, vaddr, kmap_pte-idx);
8698     - __flush_tlb_one(vaddr);
8699     -#elif defined(CONFIG_XEN)
8700     +#endif
8701     /*
8702     - * We must ensure there are no dangling pagetable references when
8703     - * returning memory to Xen (decrease_reservation).
8704     - * XXX TODO: We could make this faster by only zapping when
8705     - * kmap_flush_unused is called but that is trickier and more invasive.
8706     + * Force other mappings to Oops if they'll try to access this pte
8707     + * without first remap it. Keeping stale mappings around is a bad idea
8708     + * also, in case the page changes cacheability attributes or becomes
8709     + * a protected page in a hypervisor.
8710     */
8711     - pte_clear(&init_mm, vaddr, kmap_pte-idx);
8712     -#endif
8713     + kpte_clear_flush(kmap_pte-idx, vaddr);
8714    
8715     dec_preempt_count();
8716     preempt_check_resched();
8717     @@ -110,7 +98,6 @@
8718     idx = type + KM_TYPE_NR*smp_processor_id();
8719     vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
8720     set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
8721     - __flush_tlb_one(vaddr);
8722    
8723     return (void*) vaddr;
8724     }
8725     --- a/arch/x86/mm/hypervisor.c
8726     +++ b/arch/x86/mm/hypervisor.c
8727     @@ -569,7 +569,8 @@
8728     #define MAX_BATCHED_FULL_PTES 32
8729    
8730     int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
8731     - unsigned long addr, unsigned long end, pgprot_t newprot)
8732     + unsigned long addr, unsigned long end, pgprot_t newprot,
8733     + int dirty_accountable)
8734     {
8735     int rc = 0, i = 0;
8736     mmu_update_t u[MAX_BATCHED_FULL_PTES];
8737     @@ -582,10 +583,14 @@
8738     pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
8739     do {
8740     if (pte_present(*pte)) {
8741     + pte_t ptent = pte_modify(*pte, newprot);
8742     +
8743     + if (dirty_accountable && pte_dirty(ptent))
8744     + ptent = pte_mkwrite(ptent);
8745     u[i].ptr = (__pmd_val(*pmd) & PHYSICAL_PAGE_MASK)
8746     | ((unsigned long)pte & ~PAGE_MASK)
8747     | MMU_PT_UPDATE_PRESERVE_AD;
8748     - u[i].val = __pte_val(pte_modify(*pte, newprot));
8749     + u[i].val = __pte_val(ptent);
8750     if (++i == MAX_BATCHED_FULL_PTES) {
8751     if ((rc = HYPERVISOR_mmu_update(
8752     &u[0], i, NULL, DOMID_SELF)) != 0)
8753     --- a/arch/x86/mm/init_32-xen.c
8754     +++ b/arch/x86/mm/init_32-xen.c
8755     @@ -464,16 +464,22 @@
8756     * on Enable
8757     * off Disable
8758     */
8759     -void __init noexec_setup(const char *str)
8760     +static int __init noexec_setup(char *str)
8761     {
8762     - if (!strncmp(str, "on",2) && cpu_has_nx) {
8763     - __supported_pte_mask |= _PAGE_NX;
8764     - disable_nx = 0;
8765     - } else if (!strncmp(str,"off",3)) {
8766     + if (!str || !strcmp(str, "on")) {
8767     + if (cpu_has_nx) {
8768     + __supported_pte_mask |= _PAGE_NX;
8769     + disable_nx = 0;
8770     + }
8771     + } else if (!strcmp(str,"off")) {
8772     disable_nx = 1;
8773     __supported_pte_mask &= ~_PAGE_NX;
8774     - }
8775     + } else
8776     + return -EINVAL;
8777     +
8778     + return 0;
8779     }
8780     +early_param("noexec", noexec_setup);
8781    
8782     int nx_enabled = 0;
8783     #ifdef CONFIG_X86_PAE
8784     @@ -516,6 +522,7 @@
8785     pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
8786     else
8787     pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
8788     + pte_update_defer(&init_mm, vaddr, pte);
8789     __flush_tlb_all();
8790     out:
8791     return ret;
8792     @@ -598,18 +605,6 @@
8793     }
8794     }
8795    
8796     -static void __init set_max_mapnr_init(void)
8797     -{
8798     -#ifdef CONFIG_HIGHMEM
8799     - num_physpages = highend_pfn;
8800     -#else
8801     - num_physpages = max_low_pfn;
8802     -#endif
8803     -#ifdef CONFIG_FLATMEM
8804     - max_mapnr = num_physpages;
8805     -#endif
8806     -}
8807     -
8808     static struct kcore_list kcore_mem, kcore_vmalloc;
8809    
8810     void __init mem_init(void)
8811     @@ -630,8 +625,7 @@
8812     #endif
8813    
8814     #ifdef CONFIG_FLATMEM
8815     - if (!mem_map)
8816     - BUG();
8817     + BUG_ON(!mem_map);
8818     #endif
8819    
8820     bad_ppro = ppro_with_ram_bug();
8821     @@ -646,17 +640,6 @@
8822     }
8823     #endif
8824    
8825     - set_max_mapnr_init();
8826     -
8827     -#ifdef CONFIG_HIGHMEM
8828     - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
8829     -#else
8830     - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
8831     -#endif
8832     - printk("vmalloc area: %lx-%lx, maxmem %lx\n",
8833     - VMALLOC_START,VMALLOC_END,MAXMEM);
8834     - BUG_ON(VMALLOC_START > VMALLOC_END);
8835     -
8836     /* this will put all low memory onto the freelists */
8837     totalram_pages += free_all_bootmem();
8838     /* XEN: init and count low-mem pages outside initial allocation. */
8839     @@ -694,6 +677,48 @@
8840     (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
8841     );
8842    
8843     +#if 1 /* double-sanity-check paranoia */
8844     + printk("virtual kernel memory layout:\n"
8845     + " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
8846     +#ifdef CONFIG_HIGHMEM
8847     + " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
8848     +#endif
8849     + " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
8850     + " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
8851     + " .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
8852     + " .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
8853     + " .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
8854     + FIXADDR_START, FIXADDR_TOP,
8855     + (FIXADDR_TOP - FIXADDR_START) >> 10,
8856     +
8857     +#ifdef CONFIG_HIGHMEM
8858     + PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
8859     + (LAST_PKMAP*PAGE_SIZE) >> 10,
8860     +#endif
8861     +
8862     + VMALLOC_START, VMALLOC_END,
8863     + (VMALLOC_END - VMALLOC_START) >> 20,
8864     +
8865     + (unsigned long)__va(0), (unsigned long)high_memory,
8866     + ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
8867     +
8868     + (unsigned long)&__init_begin, (unsigned long)&__init_end,
8869     + ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10,
8870     +
8871     + (unsigned long)&_etext, (unsigned long)&_edata,
8872     + ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
8873     +
8874     + (unsigned long)&_text, (unsigned long)&_etext,
8875     + ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
8876     +
8877     +#ifdef CONFIG_HIGHMEM
8878     + BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
8879     + BUG_ON(VMALLOC_END > PKMAP_BASE);
8880     +#endif
8881     + BUG_ON(VMALLOC_START > VMALLOC_END);
8882     + BUG_ON((unsigned long)high_memory > VMALLOC_START);
8883     +#endif /* double-sanity-check paranoia */
8884     +
8885     #ifdef CONFIG_X86_PAE
8886     if (!cpu_has_pae)
8887     panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
8888     @@ -724,7 +749,7 @@
8889     int arch_add_memory(int nid, u64 start, u64 size)
8890     {
8891     struct pglist_data *pgdata = &contig_page_data;
8892     - struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
8893     + struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
8894     unsigned long start_pfn = start >> PAGE_SHIFT;
8895     unsigned long nr_pages = size >> PAGE_SHIFT;
8896    
8897     --- a/arch/x86/mm/init_64-xen.c
8898     +++ b/arch/x86/mm/init_64-xen.c
8899     @@ -61,8 +61,6 @@
8900    
8901     extern unsigned long *contiguous_bitmap;
8902    
8903     -static unsigned long dma_reserve __initdata;
8904     -
8905     DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
8906     extern unsigned long start_pfn;
8907    
8908     @@ -416,7 +414,6 @@
8909    
8910     /* actually usually some more */
8911     if (size >= LARGE_PAGE_SIZE) {
8912     - printk("SMBIOS area too long %lu\n", size);
8913     return NULL;
8914     }
8915     set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
8916     @@ -438,13 +435,15 @@
8917     #endif
8918    
8919     static void __meminit
8920     -phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
8921     +phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
8922     {
8923     - int i, k;
8924     + int i = pmd_index(address);
8925    
8926     - for (i = 0; i < PTRS_PER_PMD; pmd++, i++) {
8927     + for (; i < PTRS_PER_PMD; i++) {
8928     unsigned long pte_phys;
8929     + pmd_t *pmd = pmd_page + i;
8930     pte_t *pte, *pte_save;
8931     + int k;
8932    
8933     if (address >= end) {
8934     if (!after_bootmem)
8935     @@ -452,6 +451,12 @@
8936     set_pmd(pmd, __pmd(0));
8937     break;
8938     }
8939     +
8940     + if (__pmd_val(*pmd)) {
8941     + address += PMD_SIZE;
8942     + continue;
8943     + }
8944     +
8945     pte = alloc_static_page(&pte_phys);
8946     pte_save = pte;
8947     for (k = 0; k < PTRS_PER_PTE; pte++, k++, address += PTE_SIZE) {
8948     @@ -474,40 +479,35 @@
8949     static void __meminit
8950     phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
8951     {
8952     - pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
8953     -
8954     - if (pmd_none(*pmd)) {
8955     - spin_lock(&init_mm.page_table_lock);
8956     - phys_pmd_init(pmd, address, end);
8957     - spin_unlock(&init_mm.page_table_lock);
8958     - __flush_tlb_all();
8959     - }
8960     + pmd_t *pmd = pmd_offset(pud,0);
8961     + spin_lock(&init_mm.page_table_lock);
8962     + phys_pmd_init(pmd, address, end);
8963     + spin_unlock(&init_mm.page_table_lock);
8964     + __flush_tlb_all();
8965     }
8966    
8967     -static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
8968     +static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
8969     {
8970     - long i = pud_index(address);
8971     -
8972     - pud = pud + i;
8973     -
8974     - if (after_bootmem && pud_val(*pud)) {
8975     - phys_pmd_update(pud, address, end);
8976     - return;
8977     - }
8978     + int i = pud_index(addr);
8979    
8980     - for (; i < PTRS_PER_PUD; pud++, i++) {
8981     - unsigned long paddr, pmd_phys;
8982     + for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
8983     + unsigned long pmd_phys;
8984     + pud_t *pud = pud_page + pud_index(addr);
8985     pmd_t *pmd;
8986    
8987     - paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
8988     - if (paddr >= end)
8989     + if (addr >= end)
8990     break;
8991    
8992     + if (__pud_val(*pud)) {
8993     + phys_pmd_update(pud, addr, end);
8994     + continue;
8995     + }
8996     +
8997     pmd = alloc_static_page(&pmd_phys);
8998     early_make_page_readonly(pmd, XENFEAT_writable_page_tables);
8999     spin_lock(&init_mm.page_table_lock);
9000     set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
9001     - phys_pmd_init(pmd, paddr, end);
9002     + phys_pmd_init(pmd, addr, end);
9003     spin_unlock(&init_mm.page_table_lock);
9004     }
9005     __flush_tlb();
9006     @@ -771,69 +771,18 @@
9007     #endif
9008     }
9009    
9010     -/* Compute zone sizes for the DMA and DMA32 zones in a node. */
9011     -__init void
9012     -size_zones(unsigned long *z, unsigned long *h,
9013     - unsigned long start_pfn, unsigned long end_pfn)
9014     -{
9015     - int i;
9016     - unsigned long w;
9017     -
9018     - for (i = 0; i < MAX_NR_ZONES; i++)
9019     - z[i] = 0;
9020     -
9021     - if (start_pfn < MAX_DMA_PFN)
9022     - z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
9023     - if (start_pfn < MAX_DMA32_PFN) {
9024     - unsigned long dma32_pfn = MAX_DMA32_PFN;
9025     - if (dma32_pfn > end_pfn)
9026     - dma32_pfn = end_pfn;
9027     - z[ZONE_DMA32] = dma32_pfn - start_pfn;
9028     - }
9029     - z[ZONE_NORMAL] = end_pfn - start_pfn;
9030     -
9031     - /* Remove lower zones from higher ones. */
9032     - w = 0;
9033     - for (i = 0; i < MAX_NR_ZONES; i++) {
9034     - if (z[i])
9035     - z[i] -= w;
9036     - w += z[i];
9037     - }
9038     -
9039     - /* Compute holes */
9040     - w = start_pfn;
9041     - for (i = 0; i < MAX_NR_ZONES; i++) {
9042     - unsigned long s = w;
9043     - w += z[i];
9044     - h[i] = e820_hole_size(s, w);
9045     - }
9046     -
9047     - /* Add the space pace needed for mem_map to the holes too. */
9048     - for (i = 0; i < MAX_NR_ZONES; i++)
9049     - h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
9050     -
9051     - /* The 16MB DMA zone has the kernel and other misc mappings.
9052     - Account them too */
9053     - if (h[ZONE_DMA]) {
9054     - h[ZONE_DMA] += dma_reserve;
9055     - if (h[ZONE_DMA] >= z[ZONE_DMA]) {
9056     - printk(KERN_WARNING
9057     - "Kernel too large and filling up ZONE_DMA?\n");
9058     - h[ZONE_DMA] = z[ZONE_DMA];
9059     - }
9060     - }
9061     -}
9062     -
9063     #ifndef CONFIG_NUMA
9064     void __init paging_init(void)
9065     {
9066     - unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
9067     + unsigned long max_zone_pfns[MAX_NR_ZONES];
9068     + memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
9069     + max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
9070     + max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
9071     + max_zone_pfns[ZONE_NORMAL] = end_pfn;
9072    
9073     memory_present(0, 0, end_pfn);
9074     sparse_init();
9075     - size_zones(zones, holes, 0, end_pfn);
9076     - free_area_init_node(0, NODE_DATA(0), zones,
9077     - __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
9078     + free_area_init_nodes(max_zone_pfns);
9079    
9080     init_mm.context.pinned = 1;
9081     }
9082     @@ -887,36 +836,23 @@
9083    
9084     #ifdef CONFIG_MEMORY_HOTPLUG
9085     /*
9086     - * XXX: memory_add_physaddr_to_nid() is to find node id from physical address
9087     - * via probe interface of sysfs. If acpi notifies hot-add event, then it
9088     - * can tell node id by searching dsdt. But, probe interface doesn't have
9089     - * node id. So, return 0 as node id at this time.
9090     - */
9091     -#ifdef CONFIG_NUMA
9092     -int memory_add_physaddr_to_nid(u64 start)
9093     -{
9094     - return 0;
9095     -}
9096     -#endif
9097     -
9098     -/*
9099     * Memory is added always to NORMAL zone. This means you will never get
9100     * additional DMA/DMA32 memory.
9101     */
9102     int arch_add_memory(int nid, u64 start, u64 size)
9103     {
9104     struct pglist_data *pgdat = NODE_DATA(nid);
9105     - struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
9106     + struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
9107     unsigned long start_pfn = start >> PAGE_SHIFT;
9108     unsigned long nr_pages = size >> PAGE_SHIFT;
9109     int ret;
9110    
9111     + init_memory_mapping(start, (start + size -1));
9112     +
9113     ret = __add_pages(zone, start_pfn, nr_pages);
9114     if (ret)
9115     goto error;
9116    
9117     - init_memory_mapping(start, (start + size -1));
9118     -
9119     return ret;
9120     error:
9121     printk("%s: Problem encountered in __add_pages!\n", __func__);
9122     @@ -930,7 +866,17 @@
9123     }
9124     EXPORT_SYMBOL_GPL(remove_memory);
9125    
9126     -#else /* CONFIG_MEMORY_HOTPLUG */
9127     +#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
9128     +int memory_add_physaddr_to_nid(u64 start)
9129     +{
9130     + return 0;
9131     +}
9132     +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
9133     +#endif
9134     +
9135     +#endif /* CONFIG_MEMORY_HOTPLUG */
9136     +
9137     +#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
9138     /*
9139     * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
9140     * just online the pages.
9141     @@ -956,7 +902,7 @@
9142     }
9143     return err;
9144     }
9145     -#endif /* CONFIG_MEMORY_HOTPLUG */
9146     +#endif
9147    
9148     static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
9149     kcore_vsyscall;
9150     @@ -973,12 +919,6 @@
9151    
9152     pci_iommu_alloc();
9153    
9154     - /* How many end-of-memory variables you have, grandma! */
9155     - max_low_pfn = end_pfn;
9156     - max_pfn = end_pfn;
9157     - num_physpages = end_pfn;
9158     - high_memory = (void *) __va(end_pfn * PAGE_SIZE);
9159     -
9160     /* clear the zero-page */
9161     memset(empty_zero_page, 0, PAGE_SIZE);
9162    
9163     @@ -996,7 +936,8 @@
9164     init_page_count(pfn_to_page(pfn));
9165     totalram_pages++;
9166     }
9167     - reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
9168     + reservedpages = end_pfn - totalram_pages -
9169     + absent_pages_in_range(0, end_pfn);
9170    
9171     after_bootmem = 1;
9172    
9173     @@ -1103,15 +1044,34 @@
9174    
9175     void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
9176     {
9177     - /* Should check here against the e820 map to avoid double free */
9178     #ifdef CONFIG_NUMA
9179     int nid = phys_to_nid(phys);
9180     +#endif
9181     + unsigned long pfn = phys >> PAGE_SHIFT;
9182     + if (pfn >= end_pfn) {
9183     + /* This can happen with kdump kernels when accessing firmware
9184     + tables. */
9185     + if (pfn < end_pfn_map)
9186     + return;
9187     + printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
9188     + phys, len);
9189     + return;
9190     + }
9191     +
9192     + /* Should check here against the e820 map to avoid double free */
9193     +#ifdef CONFIG_NUMA
9194     reserve_bootmem_node(NODE_DATA(nid), phys, len);
9195     #else
9196     reserve_bootmem(phys, len);
9197     #endif
9198     - if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
9199     +#ifndef CONFIG_XEN
9200     + if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
9201     + static unsigned long dma_reserve __initdata;
9202     +
9203     dma_reserve += len / PAGE_SIZE;
9204     + set_dma_reserve(dma_reserve);
9205     + }
9206     +#endif
9207     }
9208    
9209     int kern_addr_valid(unsigned long addr)
9210     --- a/arch/x86/mm/ioremap_32-xen.c
9211     +++ b/arch/x86/mm/ioremap_32-xen.c
9212     @@ -12,7 +12,7 @@
9213     #include <linux/init.h>
9214     #include <linux/slab.h>
9215     #include <linux/module.h>
9216     -#include <asm/io.h>
9217     +#include <linux/io.h>
9218     #include <asm/fixmap.h>
9219     #include <asm/cacheflush.h>
9220     #include <asm/tlbflush.h>
9221     @@ -118,7 +118,7 @@
9222     if (domid == DOMID_SELF)
9223     return -EINVAL;
9224    
9225     - vma->vm_flags |= VM_IO | VM_RESERVED;
9226     + vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
9227    
9228     vma->vm_mm->context.has_foreign_mappings = 1;
9229    
9230     @@ -203,6 +203,7 @@
9231     void __iomem * addr;
9232     struct vm_struct * area;
9233     unsigned long offset, last_addr;
9234     + pgprot_t prot;
9235     domid_t domid = DOMID_IO;
9236    
9237     /* Don't allow wraparound or zero size */
9238     @@ -234,6 +235,8 @@
9239     domid = DOMID_SELF;
9240     }
9241    
9242     + prot = __pgprot(_KERNPG_TABLE | flags);
9243     +
9244     /*
9245     * Mappings have to be page-aligned
9246     */
9247     @@ -249,10 +252,9 @@
9248     return NULL;
9249     area->phys_addr = phys_addr;
9250     addr = (void __iomem *) area->addr;
9251     - flags |= _KERNPG_TABLE;
9252     if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr,
9253     phys_addr>>PAGE_SHIFT,
9254     - size, __pgprot(flags), domid)) {
9255     + size, prot, domid)) {
9256     vunmap((void __force *) addr);
9257     return NULL;
9258     }
9259     --- a/arch/x86/mm/pageattr_64-xen.c
9260     +++ b/arch/x86/mm/pageattr_64-xen.c
9261     @@ -371,8 +371,8 @@
9262     BUG_ON(pud_none(*pud));
9263     pmd = pmd_offset(pud, address);
9264     BUG_ON(__pmd_val(*pmd) & _PAGE_PSE);
9265     - pgprot_val(ref_prot) |= _PAGE_PSE;
9266     large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot);
9267     + large_pte = pte_mkhuge(large_pte);
9268     set_pte((pte_t *)pmd, large_pte);
9269     }
9270    
9271     @@ -382,32 +382,28 @@
9272     {
9273     pte_t *kpte;
9274     struct page *kpte_page;
9275     - unsigned kpte_flags;
9276     pgprot_t ref_prot2;
9277     kpte = lookup_address(address);
9278     if (!kpte) return 0;
9279     kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
9280     - kpte_flags = pte_val(*kpte);
9281     if (pgprot_val(prot) != pgprot_val(ref_prot)) {
9282     - if ((kpte_flags & _PAGE_PSE) == 0) {
9283     + if (!pte_huge(*kpte)) {
9284     set_pte(kpte, pfn_pte(pfn, prot));
9285     } else {
9286     /*
9287     * split_large_page will take the reference for this
9288     * change_page_attr on the split page.
9289     */
9290     -
9291     struct page *split;
9292     - ref_prot2 = __pgprot(pgprot_val(pte_pgprot(*lookup_address(address))) & ~(1<<_PAGE_BIT_PSE));
9293     -
9294     + ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
9295     split = split_large_page(address, prot, ref_prot2);
9296     if (!split)
9297     return -ENOMEM;
9298     - set_pte(kpte,mk_pte(split, ref_prot2));
9299     + set_pte(kpte, mk_pte(split, ref_prot2));
9300     kpte_page = split;
9301     - }
9302     + }
9303     page_private(kpte_page)++;
9304     - } else if ((kpte_flags & _PAGE_PSE) == 0) {
9305     + } else if (!pte_huge(*kpte)) {
9306     set_pte(kpte, pfn_pte(pfn, ref_prot));
9307     BUG_ON(page_private(kpte_page) == 0);
9308     page_private(kpte_page)--;
9309     @@ -464,10 +460,12 @@
9310     * lowmem */
9311     if (__pa(address) < KERNEL_TEXT_SIZE) {
9312     unsigned long addr2;
9313     - pgprot_t prot2 = prot;
9314     + pgprot_t prot2;
9315     addr2 = __START_KERNEL_map + __pa(address);
9316     - pgprot_val(prot2) &= ~_PAGE_NX;
9317     - err = __change_page_attr(addr2, pfn, prot2, PAGE_KERNEL_EXEC);
9318     + /* Make sure the kernel mappings stay executable */
9319     + prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
9320     + err = __change_page_attr(addr2, pfn, prot2,
9321     + PAGE_KERNEL_EXEC);
9322     }
9323     }
9324     up_write(&init_mm.mmap_sem);
9325     --- a/arch/x86/mm/pgtable_32-xen.c
9326     +++ b/arch/x86/mm/pgtable_32-xen.c
9327     @@ -68,7 +68,9 @@
9328     printk(KERN_INFO "%lu pages writeback\n",
9329     global_page_state(NR_WRITEBACK));
9330     printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
9331     - printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB));
9332     + printk(KERN_INFO "%lu pages slab\n",
9333     + global_page_state(NR_SLAB_RECLAIMABLE) +
9334     + global_page_state(NR_SLAB_UNRECLAIMABLE));
9335     printk(KERN_INFO "%lu pages pagetables\n",
9336     global_page_state(NR_PAGETABLE));
9337     }
9338     @@ -108,18 +110,11 @@
9339     __flush_tlb_one(vaddr);
9340     }
9341    
9342     -static int nr_fixmaps = 0;
9343     +static int fixmaps;
9344     unsigned long hypervisor_virt_start = HYPERVISOR_VIRT_START;
9345     -unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE);
9346     +unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - PAGE_SIZE);
9347     EXPORT_SYMBOL(__FIXADDR_TOP);
9348    
9349     -void __init set_fixaddr_top(unsigned long top)
9350     -{
9351     - BUG_ON(nr_fixmaps > 0);
9352     - hypervisor_virt_start = top;
9353     - __FIXADDR_TOP = hypervisor_virt_start - 2 * PAGE_SIZE;
9354     -}
9355     -
9356     void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
9357     {
9358     unsigned long address = __fix_to_virt(idx);
9359     @@ -141,7 +136,21 @@
9360     if (HYPERVISOR_update_va_mapping(address, pte,
9361     UVMF_INVLPG|UVMF_ALL))
9362     BUG();
9363     - nr_fixmaps++;
9364     + fixmaps++;
9365     +}
9366     +
9367     +/**
9368     + * reserve_top_address - reserves a hole in the top of kernel address space
9369     + * @reserve - size of hole to reserve
9370     + *
9371     + * Can be used to relocate the fixmap area and poke a hole in the top
9372     + * of kernel address space to make room for a hypervisor.
9373     + */
9374     +void __init reserve_top_address(unsigned long reserve)
9375     +{
9376     + BUG_ON(fixmaps > 0);
9377     + __FIXADDR_TOP = -reserve - PAGE_SIZE;
9378     + __VMALLOC_RESERVE += reserve;
9379     }
9380    
9381     pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
9382     --- a/arch/x86/pci/irq-xen.c
9383     +++ b/arch/x86/pci/irq-xen.c
9384     @@ -991,10 +991,6 @@
9385     pci_name(bridge), 'A' + pin, irq);
9386     }
9387     if (irq >= 0) {
9388     - if (use_pci_vector() &&
9389     - !platform_legacy_irq(irq))
9390     - irq = IO_APIC_VECTOR(irq);
9391     -
9392     printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
9393     pci_name(dev), 'A' + pin, irq);
9394     dev->irq = irq;
9395     @@ -1155,10 +1151,6 @@
9396     }
9397     dev = temp_dev;
9398     if (irq >= 0) {
9399     -#ifdef CONFIG_PCI_MSI
9400     - if (!platform_legacy_irq(irq))
9401     - irq = IO_APIC_VECTOR(irq);
9402     -#endif
9403     printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
9404     pci_name(dev), 'A' + pin, irq);
9405     dev->irq = irq;
9406     @@ -1179,33 +1171,3 @@
9407     }
9408     return 0;
9409     }
9410     -
9411     -int pci_vector_resources(int last, int nr_released)
9412     -{
9413     - int count = nr_released;
9414     -
9415     - int next = last;
9416     - int offset = (last % 8);
9417     -
9418     - while (next < FIRST_SYSTEM_VECTOR) {
9419     - next += 8;
9420     -#ifdef CONFIG_X86_64
9421     - if (next == IA32_SYSCALL_VECTOR)
9422     - continue;
9423     -#else
9424     - if (next == SYSCALL_VECTOR)
9425     - continue;
9426     -#endif
9427     - count++;
9428     - if (next >= FIRST_SYSTEM_VECTOR) {
9429     - if (offset%8) {
9430     - next = FIRST_DEVICE_VECTOR + offset;
9431     - offset++;
9432     - continue;
9433     - }
9434     - count--;
9435     - }
9436     - }
9437     -
9438     - return count;
9439     -}
9440     --- a/drivers/char/tpm/tpm_xen.c
9441     +++ b/drivers/char/tpm/tpm_xen.c
9442     @@ -85,8 +85,7 @@
9443    
9444     /* local function prototypes */
9445     static irqreturn_t tpmif_int(int irq,
9446     - void *tpm_priv,
9447     - struct pt_regs *ptregs);
9448     + void *tpm_priv);
9449     static void tpmif_rx_action(unsigned long unused);
9450     static int tpmif_connect(struct xenbus_device *dev,
9451     struct tpm_private *tp,
9452     @@ -559,7 +558,7 @@
9453     }
9454    
9455    
9456     -static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
9457     +static irqreturn_t tpmif_int(int irq, void *tpm_priv)
9458     {
9459     struct tpm_private *tp = tpm_priv;
9460     unsigned long flags;
9461     --- a/drivers/pci/Kconfig
9462     +++ b/drivers/pci/Kconfig
9463     @@ -45,7 +45,7 @@
9464     config HT_IRQ
9465     bool "Interrupts on hypertransport devices"
9466     default y
9467     - depends on PCI && X86_LOCAL_APIC && X86_IO_APIC
9468     + depends on PCI && X86_LOCAL_APIC && X86_IO_APIC && !XEN
9469     help
9470     This allows native hypertransport devices to use interrupts.
9471    
9472     --- a/drivers/xen/Kconfig
9473     +++ b/drivers/xen/Kconfig
9474     @@ -278,6 +278,9 @@
9475     config HAVE_IRQ_IGNORE_UNHANDLED
9476     def_bool y
9477    
9478     +config GENERIC_HARDIRQS_NO__DO_IRQ
9479     + def_bool y
9480     +
9481     config NO_IDLE_HZ
9482     def_bool y
9483    
9484     --- a/drivers/xen/balloon/balloon.c
9485     +++ b/drivers/xen/balloon/balloon.c
9486     @@ -84,7 +84,7 @@
9487     /* VM /proc information for memory */
9488     extern unsigned long totalram_pages;
9489    
9490     -#ifndef MODULE
9491     +#if !defined(MODULE) && defined(CONFIG_HIGHMEM)
9492     extern unsigned long totalhigh_pages;
9493     #define inc_totalhigh_pages() (totalhigh_pages++)
9494     #define dec_totalhigh_pages() (totalhigh_pages--)
9495     --- a/drivers/xen/blkback/blkback.c
9496     +++ b/drivers/xen/blkback/blkback.c
9497     @@ -288,7 +288,7 @@
9498     wake_up(&blkif->wq);
9499     }
9500    
9501     -irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
9502     +irqreturn_t blkif_be_int(int irq, void *dev_id)
9503     {
9504     blkif_notify_work(dev_id);
9505     return IRQ_HANDLED;
9506     --- a/drivers/xen/blkback/common.h
9507     +++ b/drivers/xen/blkback/common.h
9508     @@ -130,7 +130,7 @@
9509    
9510     void blkif_xenbus_init(void);
9511    
9512     -irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
9513     +irqreturn_t blkif_be_int(int irq, void *dev_id);
9514     int blkif_schedule(void *arg);
9515    
9516     int blkback_barrier(struct xenbus_transaction xbt,
9517     --- a/drivers/xen/blkfront/blkfront.c
9518     +++ b/drivers/xen/blkfront/blkfront.c
9519     @@ -69,7 +69,7 @@
9520    
9521     static void kick_pending_request_queues(struct blkfront_info *);
9522    
9523     -static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs);
9524     +static irqreturn_t blkif_int(int irq, void *dev_id);
9525     static void blkif_restart_queue(void *arg);
9526     static void blkif_recover(struct blkfront_info *);
9527     static void blkif_completion(struct blk_shadow *);
9528     @@ -698,7 +698,7 @@
9529     }
9530    
9531    
9532     -static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
9533     +static irqreturn_t blkif_int(int irq, void *dev_id)
9534     {
9535     struct request *req;
9536     blkif_response_t *bret;
9537     --- a/drivers/xen/blktap/blktap.c
9538     +++ b/drivers/xen/blktap/blktap.c
9539     @@ -1175,7 +1175,7 @@
9540     wake_up(&blkif->wq);
9541     }
9542    
9543     -irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
9544     +irqreturn_t tap_blkif_be_int(int irq, void *dev_id)
9545     {
9546     blkif_notify_work(dev_id);
9547     return IRQ_HANDLED;
9548     --- a/drivers/xen/blktap/common.h
9549     +++ b/drivers/xen/blktap/common.h
9550     @@ -112,7 +112,7 @@
9551    
9552     void tap_blkif_xenbus_init(void);
9553    
9554     -irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
9555     +irqreturn_t tap_blkif_be_int(int irq, void *dev_id);
9556     int tap_blkif_schedule(void *arg);
9557    
9558     int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif);
9559     --- a/drivers/xen/console/console.c
9560     +++ b/drivers/xen/console/console.c
9561     @@ -345,7 +345,7 @@
9562     static int xencons_priv_irq;
9563     static char x_char;
9564    
9565     -void xencons_rx(char *buf, unsigned len, struct pt_regs *regs)
9566     +void xencons_rx(char *buf, unsigned len)
9567     {
9568     int i;
9569     unsigned long flags;
9570     @@ -370,8 +370,7 @@
9571     if (time_before(jiffies, sysrq_timeout)) {
9572     spin_unlock_irqrestore(
9573     &xencons_lock, flags);
9574     - handle_sysrq(
9575     - buf[i], regs, xencons_tty);
9576     + handle_sysrq(buf[i], xencons_tty);
9577     spin_lock_irqsave(
9578     &xencons_lock, flags);
9579     continue;
9580     @@ -436,14 +435,13 @@
9581     }
9582    
9583     /* Privileged receive callback and transmit kicker. */
9584     -static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id,
9585     - struct pt_regs *regs)
9586     +static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id)
9587     {
9588     static char rbuf[16];
9589     int l;
9590    
9591     while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0)
9592     - xencons_rx(rbuf, l, regs);
9593     + xencons_rx(rbuf, l);
9594    
9595     xencons_tx();
9596    
9597     --- a/drivers/xen/console/xencons_ring.c
9598     +++ b/drivers/xen/console/xencons_ring.c
9599     @@ -83,7 +83,7 @@
9600     return sent;
9601     }
9602    
9603     -static irqreturn_t handle_input(int irq, void *unused, struct pt_regs *regs)
9604     +static irqreturn_t handle_input(int irq, void *unused)
9605     {
9606     struct xencons_interface *intf = xencons_interface();
9607     XENCONS_RING_IDX cons, prod;
9608     @@ -94,7 +94,7 @@
9609     BUG_ON((prod - cons) > sizeof(intf->in));
9610    
9611     while (cons != prod) {
9612     - xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1, regs);
9613     + xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1);
9614     cons++;
9615     }
9616    
9617     --- a/drivers/xen/core/evtchn.c
9618     +++ b/drivers/xen/core/evtchn.c
9619     @@ -507,7 +507,7 @@
9620    
9621     int bind_caller_port_to_irqhandler(
9622     unsigned int caller_port,
9623     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
9624     + irq_handler_t handler,
9625     unsigned long irqflags,
9626     const char *devname,
9627     void *dev_id)
9628     @@ -530,7 +530,7 @@
9629    
9630     int bind_listening_port_to_irqhandler(
9631     unsigned int remote_domain,
9632     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
9633     + irq_handler_t handler,
9634     unsigned long irqflags,
9635     const char *devname,
9636     void *dev_id)
9637     @@ -554,7 +554,7 @@
9638     int bind_interdomain_evtchn_to_irqhandler(
9639     unsigned int remote_domain,
9640     unsigned int remote_port,
9641     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
9642     + irq_handler_t handler,
9643     unsigned long irqflags,
9644     const char *devname,
9645     void *dev_id)
9646     @@ -578,7 +578,7 @@
9647     int bind_virq_to_irqhandler(
9648     unsigned int virq,
9649     unsigned int cpu,
9650     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
9651     + irq_handler_t handler,
9652     unsigned long irqflags,
9653     const char *devname,
9654     void *dev_id)
9655     @@ -602,7 +602,7 @@
9656     int bind_ipi_to_irqhandler(
9657     unsigned int ipi,
9658     unsigned int cpu,
9659     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
9660     + irq_handler_t handler,
9661     unsigned long irqflags,
9662     const char *devname,
9663     void *dev_id)
9664     @@ -687,15 +687,7 @@
9665     return 0;
9666     }
9667    
9668     -static void shutdown_dynirq(unsigned int irq)
9669     -{
9670     - int evtchn = evtchn_from_irq(irq);
9671     -
9672     - if (VALID_EVTCHN(evtchn))
9673     - mask_evtchn(evtchn);
9674     -}
9675     -
9676     -static void enable_dynirq(unsigned int irq)
9677     +static void unmask_dynirq(unsigned int irq)
9678     {
9679     int evtchn = evtchn_from_irq(irq);
9680    
9681     @@ -703,7 +695,7 @@
9682     unmask_evtchn(evtchn);
9683     }
9684    
9685     -static void disable_dynirq(unsigned int irq)
9686     +static void mask_dynirq(unsigned int irq)
9687     {
9688     int evtchn = evtchn_from_irq(irq);
9689    
9690     @@ -731,12 +723,12 @@
9691     unmask_evtchn(evtchn);
9692     }
9693    
9694     -static struct hw_interrupt_type dynirq_type = {
9695     - .typename = "Dynamic-irq",
9696     +static struct irq_chip dynirq_chip = {
9697     + .name = "Dynamic-irq",
9698     .startup = startup_dynirq,
9699     - .shutdown = shutdown_dynirq,
9700     - .enable = enable_dynirq,
9701     - .disable = disable_dynirq,
9702     + .mask = mask_dynirq,
9703     + .unmask = unmask_dynirq,
9704     + .mask_ack = ack_dynirq,
9705     .ack = ack_dynirq,
9706     .end = end_dynirq,
9707     #ifdef CONFIG_SMP
9708     @@ -820,12 +812,12 @@
9709     irq_info[irq] = IRQ_UNBOUND;
9710     }
9711    
9712     -static void enable_pirq(unsigned int irq)
9713     +static void unmask_pirq(unsigned int irq)
9714     {
9715     startup_pirq(irq);
9716     }
9717    
9718     -static void disable_pirq(unsigned int irq)
9719     +static void mask_pirq(unsigned int irq)
9720     {
9721     }
9722    
9723     @@ -854,12 +846,14 @@
9724     }
9725     }
9726    
9727     -static struct hw_interrupt_type pirq_type = {
9728     +static struct irq_chip pirq_chip = {
9729     + .name = "Phys-irq",
9730     .typename = "Phys-irq",
9731     .startup = startup_pirq,
9732     .shutdown = shutdown_pirq,
9733     - .enable = enable_pirq,
9734     - .disable = disable_pirq,
9735     + .mask = mask_pirq,
9736     + .unmask = unmask_pirq,
9737     + .mask_ack = ack_pirq,
9738     .ack = ack_pirq,
9739     .end = end_pirq,
9740     #ifdef CONFIG_SMP
9741     @@ -1043,7 +1037,8 @@
9742     irq_desc[dynirq_to_irq(i)].status = IRQ_DISABLED;
9743     irq_desc[dynirq_to_irq(i)].action = NULL;
9744     irq_desc[dynirq_to_irq(i)].depth = 1;
9745     - irq_desc[dynirq_to_irq(i)].chip = &dynirq_type;
9746     + set_irq_chip_and_handler_name(dynirq_to_irq(i), &dynirq_chip,
9747     + handle_level_irq, "level");
9748     }
9749    
9750     /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */
9751     @@ -1059,6 +1054,7 @@
9752     irq_desc[pirq_to_irq(i)].status = IRQ_DISABLED;
9753     irq_desc[pirq_to_irq(i)].action = NULL;
9754     irq_desc[pirq_to_irq(i)].depth = 1;
9755     - irq_desc[pirq_to_irq(i)].chip = &pirq_type;
9756     + set_irq_chip_and_handler_name(pirq_to_irq(i), &pirq_chip,
9757     + handle_level_irq, "level");
9758     }
9759     }
9760     --- a/drivers/xen/core/reboot.c
9761     +++ b/drivers/xen/core/reboot.c
9762     @@ -13,6 +13,7 @@
9763    
9764     #ifdef HAVE_XEN_PLATFORM_COMPAT_H
9765     #include <xen/platform-compat.h>
9766     +#undef handle_sysrq
9767     #endif
9768    
9769     MODULE_LICENSE("Dual BSD/GPL");
9770     @@ -203,7 +204,7 @@
9771    
9772     #ifdef CONFIG_MAGIC_SYSRQ
9773     if (sysrq_key != '\0')
9774     - handle_sysrq(sysrq_key, NULL, NULL);
9775     + handle_sysrq(sysrq_key, NULL);
9776     #endif
9777     }
9778    
9779     --- a/drivers/xen/core/smpboot.c
9780     +++ b/drivers/xen/core/smpboot.c
9781     @@ -25,8 +25,8 @@
9782     #include <xen/cpu_hotplug.h>
9783     #include <xen/xenbus.h>
9784    
9785     -extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
9786     -extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
9787     +extern irqreturn_t smp_reschedule_interrupt(int, void *);
9788     +extern irqreturn_t smp_call_function_interrupt(int, void *);
9789    
9790     extern int local_setup_timer(unsigned int cpu);
9791     extern void local_teardown_timer(unsigned int cpu);
9792     @@ -66,8 +66,6 @@
9793     #if defined(__i386__)
9794     u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff };
9795     EXPORT_SYMBOL(x86_cpu_to_apicid);
9796     -#elif !defined(CONFIG_X86_IO_APIC)
9797     -unsigned int maxcpus = NR_CPUS;
9798     #endif
9799    
9800     void __init prefill_possible_map(void)
9801     --- a/drivers/xen/fbfront/xenfb.c
9802     +++ b/drivers/xen/fbfront/xenfb.c
9803     @@ -523,8 +523,7 @@
9804     .fb_set_par = xenfb_set_par,
9805     };
9806    
9807     -static irqreturn_t xenfb_event_handler(int rq, void *dev_id,
9808     - struct pt_regs *regs)
9809     +static irqreturn_t xenfb_event_handler(int rq, void *dev_id)
9810     {
9811     /*
9812     * No in events recognized, simply ignore them all.
9813     --- a/drivers/xen/fbfront/xenkbd.c
9814     +++ b/drivers/xen/fbfront/xenkbd.c
9815     @@ -46,7 +46,7 @@
9816     * to do that.
9817     */
9818    
9819     -static irqreturn_t input_handler(int rq, void *dev_id, struct pt_regs *regs)
9820     +static irqreturn_t input_handler(int rq, void *dev_id)
9821     {
9822     struct xenkbd_info *info = dev_id;
9823     struct xenkbd_page *page = info->page;
9824     --- a/drivers/xen/gntdev/gntdev.c
9825     +++ b/drivers/xen/gntdev/gntdev.c
9826     @@ -755,9 +755,6 @@
9827     BUG();
9828     }
9829    
9830     - /* Copy the existing value of the PTE for returning. */
9831     - copy = *ptep;
9832     -
9833     /* Calculate the grant relating to this PTE. */
9834     slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
9835    
9836     @@ -772,6 +769,10 @@
9837     GNTDEV_INVALID_HANDLE &&
9838     !xen_feature(XENFEAT_auto_translated_physmap)) {
9839     /* NOT USING SHADOW PAGE TABLES. */
9840     +
9841     + /* Copy the existing value of the PTE for returning. */
9842     + copy = *ptep;
9843     +
9844     gnttab_set_unmap_op(&op, virt_to_machine(ptep),
9845     GNTMAP_contains_pte,
9846     private_data->grants[slot_index]
9847     @@ -784,7 +785,7 @@
9848     op.status);
9849     } else {
9850     /* USING SHADOW PAGE TABLES. */
9851     - pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
9852     + copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
9853     }
9854    
9855     /* Finally, we unmap the grant from kernel space. */
9856     @@ -812,7 +813,7 @@
9857     >> PAGE_SHIFT, INVALID_P2M_ENTRY);
9858    
9859     } else {
9860     - pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
9861     + copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
9862     }
9863    
9864     return copy;
9865     --- a/drivers/xen/netback/accel.c
9866     +++ b/drivers/xen/netback/accel.c
9867     @@ -65,7 +65,7 @@
9868    
9869     if (IS_ERR(eth_name)) {
9870     /* Probably means not present */
9871     - DPRINTK("%s: no match due to xenbus_read accel error %d\n",
9872     + DPRINTK("%s: no match due to xenbus_read accel error %ld\n",
9873     __FUNCTION__, PTR_ERR(eth_name));
9874     return 0;
9875     } else {
9876     --- a/drivers/xen/netback/common.h
9877     +++ b/drivers/xen/netback/common.h
9878     @@ -200,7 +200,7 @@
9879    
9880     int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
9881     struct net_device_stats *netif_be_get_stats(struct net_device *dev);
9882     -irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
9883     +irqreturn_t netif_be_int(int irq, void *dev_id);
9884    
9885     static inline int netbk_can_queue(struct net_device *dev)
9886     {
9887     --- a/drivers/xen/netback/loopback.c
9888     +++ b/drivers/xen/netback/loopback.c
9889     @@ -151,7 +151,7 @@
9890     np->stats.rx_bytes += skb->len;
9891     np->stats.rx_packets++;
9892    
9893     - if (skb->ip_summed == CHECKSUM_HW) {
9894     + if (skb->ip_summed == CHECKSUM_PARTIAL) {
9895     /* Defer checksum calculation. */
9896     skb->proto_csum_blank = 1;
9897     /* Must be a local packet: assert its integrity. */
9898     --- a/drivers/xen/netback/netback.c
9899     +++ b/drivers/xen/netback/netback.c
9900     @@ -677,7 +677,7 @@
9901     id = meta[npo.meta_cons].id;
9902     flags = nr_frags ? NETRXF_more_data : 0;
9903    
9904     - if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
9905     + if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
9906     flags |= NETRXF_csum_blank | NETRXF_data_validated;
9907     else if (skb->proto_data_valid) /* remote but checksummed? */
9908     flags |= NETRXF_data_validated;
9909     @@ -1441,7 +1441,7 @@
9910     netif_idx_release(netif_page_index(page));
9911     }
9912    
9913     -irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
9914     +irqreturn_t netif_be_int(int irq, void *dev_id)
9915     {
9916     netif_t *netif = dev_id;
9917    
9918     @@ -1508,7 +1508,7 @@
9919     }
9920    
9921     #ifdef NETBE_DEBUG_INTERRUPT
9922     -static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
9923     +static irqreturn_t netif_be_dbg(int irq, void *dev_id)
9924     {
9925     struct list_head *ent;
9926     netif_t *netif;
9927     --- a/drivers/xen/netfront/netfront.c
9928     +++ b/drivers/xen/netfront/netfront.c
9929     @@ -136,7 +136,7 @@
9930     {
9931     return skb_is_gso(skb) &&
9932     (!skb_gso_ok(skb, dev->features) ||
9933     - unlikely(skb->ip_summed != CHECKSUM_HW));
9934     + unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
9935     }
9936     #else
9937     #define HAVE_GSO 0
9938     @@ -222,7 +222,7 @@
9939     static void network_alloc_rx_buffers(struct net_device *);
9940     static void send_fake_arp(struct net_device *);
9941    
9942     -static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs);
9943     +static irqreturn_t netif_int(int irq, void *dev_id);
9944    
9945     #ifdef CONFIG_SYSFS
9946     static int xennet_sysfs_addif(struct net_device *netdev);
9947     @@ -992,7 +992,7 @@
9948     tx->flags = 0;
9949     extra = NULL;
9950    
9951     - if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
9952     + if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
9953     tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
9954     #ifdef CONFIG_XEN
9955     if (skb->proto_data_valid) /* remote but checksummed? */
9956     @@ -1049,7 +1049,7 @@
9957     return 0;
9958     }
9959    
9960     -static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
9961     +static irqreturn_t netif_int(int irq, void *dev_id)
9962     {
9963     struct net_device *dev = dev_id;
9964     struct netfront_info *np = netdev_priv(dev);
9965     --- a/drivers/xen/pciback/pciback.h
9966     +++ b/drivers/xen/pciback/pciback.h
9967     @@ -87,7 +87,7 @@
9968     void pciback_release_devices(struct pciback_device *pdev);
9969    
9970     /* Handles events from front-end */
9971     -irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
9972     +irqreturn_t pciback_handle_event(int irq, void *dev_id);
9973     void pciback_do_op(void *data);
9974    
9975     int pciback_xenbus_register(void);
9976     --- a/drivers/xen/pciback/pciback_ops.c
9977     +++ b/drivers/xen/pciback/pciback_ops.c
9978     @@ -85,7 +85,7 @@
9979     test_and_schedule_op(pdev);
9980     }
9981    
9982     -irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
9983     +irqreturn_t pciback_handle_event(int irq, void *dev_id)
9984     {
9985     struct pciback_device *pdev = dev_id;
9986    
9987     --- a/drivers/xen/pcifront/pci_op.c
9988     +++ b/drivers/xen/pcifront/pci_op.c
9989     @@ -392,10 +392,16 @@
9990    
9991     d = pci_scan_single_device(b, devfn);
9992     if (d) {
9993     + int err;
9994     +
9995     dev_info(&pdev->xdev->dev, "New device on "
9996     "%04x:%02x:%02x.%02x found.\n", domain, bus,
9997     PCI_SLOT(devfn), PCI_FUNC(devfn));
9998     - pci_bus_add_device(d);
9999     + err = pci_bus_add_device(d);
10000     + if (err)
10001     + dev_err(&pdev->xdev->dev,
10002     + "error %d adding device, continuing.\n",
10003     + err);
10004     }
10005     }
10006    
10007     --- a/drivers/xen/privcmd/compat_privcmd.c
10008     +++ b/drivers/xen/privcmd/compat_privcmd.c
10009     @@ -18,7 +18,6 @@
10010     * Authors: Jimi Xenidis <jimix@watson.ibm.com>
10011     */
10012    
10013     -#include <linux/config.h>
10014     #include <linux/compat.h>
10015     #include <linux/ioctl.h>
10016     #include <linux/syscalls.h>
10017     --- a/drivers/xen/privcmd/privcmd.c
10018     +++ b/drivers/xen/privcmd/privcmd.c
10019     @@ -236,7 +236,7 @@
10020     #endif
10021    
10022     /* DONTCOPY is essential for Xen as copy_page_range is broken. */
10023     - vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
10024     + vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTCOPY;
10025     vma->vm_ops = &privcmd_vm_ops;
10026     vma->vm_private_data = NULL;
10027    
10028     --- a/drivers/xen/sfc_netback/accel_xenbus.c
10029     +++ b/drivers/xen/sfc_netback/accel_xenbus.c
10030     @@ -68,8 +68,7 @@
10031    
10032    
10033     /* Demultiplex a message IRQ from the frontend driver. */
10034     -static irqreturn_t msgirq_from_frontend(int irq, void *context,
10035     - struct pt_regs *unused)
10036     +static irqreturn_t msgirq_from_frontend(int irq, void *context)
10037     {
10038     struct xenbus_device *dev = context;
10039     struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
10040     @@ -84,8 +83,7 @@
10041     * functionally, but we need it to pass to the bind function, and may
10042     * get called spuriously
10043     */
10044     -static irqreturn_t netirq_from_frontend(int irq, void *context,
10045     - struct pt_regs *unused)
10046     +static irqreturn_t netirq_from_frontend(int irq, void *context)
10047     {
10048     VPRINTK("netirq %d from device %s\n", irq,
10049     ((struct xenbus_device *)context)->nodename);
10050     --- a/drivers/xen/sfc_netfront/accel.h
10051     +++ b/drivers/xen/sfc_netfront/accel.h
10052     @@ -449,10 +449,8 @@
10053     u32 ip, u16 port, u8 protocol);
10054    
10055     /* Process an IRQ received from back end driver */
10056     -irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
10057     - struct pt_regs *unused);
10058     -irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
10059     - struct pt_regs *unused);
10060     +irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context);
10061     +irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context);
10062    
10063     #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
10064     extern void netfront_accel_msg_from_bend(struct work_struct *context);
10065     --- a/drivers/xen/sfc_netfront/accel_msg.c
10066     +++ b/drivers/xen/sfc_netfront/accel_msg.c
10067     @@ -490,8 +490,7 @@
10068     }
10069    
10070    
10071     -irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
10072     - struct pt_regs *unused)
10073     +irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context)
10074     {
10075     netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
10076     VPRINTK("irq %d from device %s\n", irq, vnic->dev->nodename);
10077     @@ -502,8 +501,7 @@
10078     }
10079    
10080     /* Process an interrupt received from the NIC via backend */
10081     -irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
10082     - struct pt_regs *unused)
10083     +irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context)
10084     {
10085     netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
10086     struct net_device *net_dev = vnic->net_dev;
10087     --- a/drivers/xen/sfc_netfront/accel_tso.c
10088     +++ b/drivers/xen/sfc_netfront/accel_tso.c
10089     @@ -363,7 +363,7 @@
10090    
10091     tso_check_safe(skb);
10092    
10093     - if (skb->ip_summed != CHECKSUM_HW)
10094     + if (skb->ip_summed != CHECKSUM_PARTIAL)
10095     EPRINTK("Trying to TSO send a packet without HW checksum\n");
10096    
10097     tso_start(&state, skb);
10098     --- a/drivers/xen/sfc_netfront/accel_vi.c
10099     +++ b/drivers/xen/sfc_netfront/accel_vi.c
10100     @@ -461,7 +461,7 @@
10101    
10102     frag_i = -1;
10103    
10104     - if (skb->ip_summed == CHECKSUM_HW) {
10105     + if (skb->ip_summed == CHECKSUM_PARTIAL) {
10106     /* Set to zero to encourage falcon to work it out for us */
10107     *(u16*)(skb->h.raw + skb->csum) = 0;
10108     }
10109     @@ -580,7 +580,7 @@
10110    
10111     kva = buf->pkt_kva;
10112    
10113     - if (skb->ip_summed == CHECKSUM_HW) {
10114     + if (skb->ip_summed == CHECKSUM_PARTIAL) {
10115     /* Set to zero to encourage falcon to work it out for us */
10116     *(u16*)(skb->h.raw + skb->csum) = 0;
10117     }
10118     --- a/drivers/xen/tpmback/common.h
10119     +++ b/drivers/xen/tpmback/common.h
10120     @@ -61,7 +61,7 @@
10121     void tpmif_xenbus_init(void);
10122     void tpmif_xenbus_exit(void);
10123     int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn);
10124     -irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs);
10125     +irqreturn_t tpmif_be_int(int irq, void *dev_id);
10126    
10127     long int tpmback_get_instance(struct backend_info *bi);
10128    
10129     --- a/drivers/xen/tpmback/tpmback.c
10130     +++ b/drivers/xen/tpmback/tpmback.c
10131     @@ -502,7 +502,7 @@
10132     list_del(&pak->next);
10133     write_unlock_irqrestore(&dataex.pak_lock, flags);
10134    
10135     - DPRINTK("size given by app: %d, available: %d\n", size, left);
10136     + DPRINTK("size given by app: %zu, available: %u\n", size, left);
10137    
10138     ret_size = min_t(size_t, size, left);
10139    
10140     @@ -899,7 +899,7 @@
10141     }
10142     }
10143    
10144     -irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs)
10145     +irqreturn_t tpmif_be_int(int irq, void *dev_id)
10146     {
10147     tpmif_t *tpmif = (tpmif_t *) dev_id;
10148    
10149     --- a/drivers/xen/xenbus/xenbus_comms.c
10150     +++ b/drivers/xen/xenbus/xenbus_comms.c
10151     @@ -55,7 +55,7 @@
10152    
10153     static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
10154    
10155     -static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
10156     +static irqreturn_t wake_waiting(int irq, void *unused)
10157     {
10158     if (unlikely(xenstored_ready == 0)) {
10159     xenstored_ready = 1;
10160     --- a/drivers/xen/xenoprof/xenoprofile.c
10161     +++ b/drivers/xen/xenoprof/xenoprofile.c
10162     @@ -195,7 +195,7 @@
10163     }
10164    
10165     static irqreturn_t
10166     -xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
10167     +xenoprof_ovf_interrupt(int irq, void * dev_id)
10168     {
10169     struct xenoprof_buf * buf;
10170     static unsigned long flag;
10171     --- a/include/asm-generic/pgtable.h
10172     +++ b/include/asm-generic/pgtable.h
10173     @@ -100,7 +100,7 @@
10174     #endif
10175    
10176     #ifndef arch_change_pte_range
10177     -#define arch_change_pte_range(mm, pmd, addr, end, newprot) 0
10178     +#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) 0
10179     #endif
10180    
10181     #ifndef __HAVE_ARCH_PTE_SAME
10182     --- a/include/asm-x86/mach-xen/asm/desc_32.h
10183     +++ b/include/asm-x86/mach-xen/asm/desc_32.h
10184     @@ -32,52 +32,110 @@
10185     return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
10186     }
10187    
10188     +/*
10189     + * This is the ldt that every process will get unless we need
10190     + * something other than this.
10191     + */
10192     +extern struct desc_struct default_ldt[];
10193     +extern struct desc_struct idt_table[];
10194     +extern void set_intr_gate(unsigned int irq, void * addr);
10195     +
10196     +static inline void pack_descriptor(__u32 *a, __u32 *b,
10197     + unsigned long base, unsigned long limit, unsigned char type, unsigned char flags)
10198     +{
10199     + *a = ((base & 0xffff) << 16) | (limit & 0xffff);
10200     + *b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
10201     + (limit & 0x000f0000) | ((type & 0xff) << 8) | ((flags & 0xf) << 20);
10202     +}
10203     +
10204     +static inline void pack_gate(__u32 *a, __u32 *b,
10205     + unsigned long base, unsigned short seg, unsigned char type, unsigned char flags)
10206     +{
10207     + *a = (seg << 16) | (base & 0xffff);
10208     + *b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff);
10209     +}
10210     +
10211     +#define DESCTYPE_LDT 0x82 /* present, system, DPL-0, LDT */
10212     +#define DESCTYPE_TSS 0x89 /* present, system, DPL-0, 32-bit TSS */
10213     +#define DESCTYPE_TASK 0x85 /* present, system, DPL-0, task gate */
10214     +#define DESCTYPE_INT 0x8e /* present, system, DPL-0, interrupt gate */
10215     +#define DESCTYPE_TRAP 0x8f /* present, system, DPL-0, trap gate */
10216     +#define DESCTYPE_DPL3 0x60 /* DPL-3 */
10217     +#define DESCTYPE_S 0x10 /* !system */
10218     +
10219     #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
10220     #define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8))
10221    
10222     #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
10223     #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
10224     -#define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr))
10225     -#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt))
10226     +#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
10227     +#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
10228    
10229     #define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr))
10230     #define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
10231     -#define store_tr(tr) __asm__ ("str %0":"=mr" (tr))
10232     -#define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt))
10233     +#define store_tr(tr) __asm__ ("str %0":"=m" (tr))
10234     +#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
10235    
10236     -/*
10237     - * This is the ldt that every process will get unless we need
10238     - * something other than this.
10239     - */
10240     -extern struct desc_struct default_ldt[];
10241     -extern void set_intr_gate(unsigned int irq, void * addr);
10242     +#if TLS_SIZE != 24
10243     +# error update this code.
10244     +#endif
10245     +
10246     +static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
10247     +{
10248     +#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \
10249     + *(u64 *)&t->tls_array[i]) \
10250     + BUG()
10251     + C(0); C(1); C(2);
10252     +#undef C
10253     +}
10254    
10255     -#define _set_tssldt_desc(n,addr,limit,type) \
10256     -__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
10257     - "movw %w1,2(%2)\n\t" \
10258     - "rorl $16,%1\n\t" \
10259     - "movb %b1,4(%2)\n\t" \
10260     - "movb %4,5(%2)\n\t" \
10261     - "movb $0,6(%2)\n\t" \
10262     - "movb %h1,7(%2)\n\t" \
10263     - "rorl $16,%1" \
10264     - : "=m"(*(n)) : "q" (addr), "r"(n), "ir"(limit), "i"(type))
10265     +#ifndef CONFIG_XEN
10266     +static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
10267     +{
10268     + __u32 *lp = (__u32 *)((char *)dt + entry*8);
10269     + *lp = entry_a;
10270     + *(lp+1) = entry_b;
10271     +}
10272    
10273     -#ifndef CONFIG_X86_NO_TSS
10274     -static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr)
10275     +#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
10276     +#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
10277     +#else
10278     +extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
10279     +extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
10280     +#endif
10281     +#ifndef CONFIG_X86_NO_IDT
10282     +#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
10283     +
10284     +static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
10285     {
10286     - _set_tssldt_desc(&get_cpu_gdt_table(cpu)[entry], (int)addr,
10287     - offsetof(struct tss_struct, __cacheline_filler) - 1, 0x89);
10288     + __u32 a, b;
10289     + pack_gate(&a, &b, (unsigned long)addr, seg, type, 0);
10290     + write_idt_entry(idt_table, gate, a, b);
10291     }
10292     +#endif
10293    
10294     -#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
10295     +#ifndef CONFIG_X86_NO_TSS
10296     +static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
10297     +{
10298     + __u32 a, b;
10299     + pack_descriptor(&a, &b, (unsigned long)addr,
10300     + offsetof(struct tss_struct, __cacheline_filler) - 1,
10301     + DESCTYPE_TSS, 0);
10302     + write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b);
10303     +}
10304     #endif
10305    
10306     -static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
10307     +static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries)
10308     {
10309     - _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
10310     + __u32 a, b;
10311     + pack_descriptor(&a, &b, (unsigned long)addr,
10312     + entries * sizeof(struct desc_struct) - 1,
10313     + DESCTYPE_LDT, 0);
10314     + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
10315     }
10316    
10317     +#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
10318     +
10319     #define LDT_entry_a(info) \
10320     ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
10321    
10322     @@ -103,21 +161,6 @@
10323     (info)->seg_not_present == 1 && \
10324     (info)->useable == 0 )
10325    
10326     -extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
10327     -
10328     -#if TLS_SIZE != 24
10329     -# error update this code.
10330     -#endif
10331     -
10332     -static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
10333     -{
10334     -#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \
10335     - *(u64 *)&t->tls_array[i])) \
10336     - BUG();
10337     - C(0); C(1); C(2);
10338     -#undef C
10339     -}
10340     -
10341     static inline void clear_LDT(void)
10342     {
10343     int cpu = get_cpu();
10344     --- a/include/asm-x86/mach-xen/asm/dma-mapping_64.h
10345     +++ b/include/asm-x86/mach-xen/asm/dma-mapping_64.h
10346     @@ -55,13 +55,6 @@
10347     extern struct dma_mapping_ops* dma_ops;
10348     extern int iommu_merge;
10349    
10350     -static inline int valid_dma_direction(int dma_direction)
10351     -{
10352     - return ((dma_direction == DMA_BIDIRECTIONAL) ||
10353     - (dma_direction == DMA_TO_DEVICE) ||
10354     - (dma_direction == DMA_FROM_DEVICE));
10355     -}
10356     -
10357     #if 0
10358     static inline int dma_mapping_error(dma_addr_t dma_addr)
10359     {
10360     --- a/include/asm-x86/mach-xen/asm/e820_64.h
10361     +++ b/include/asm-x86/mach-xen/asm/e820_64.h
10362     @@ -19,13 +19,9 @@
10363    
10364     #define E820_RAM 1
10365     #define E820_RESERVED 2
10366     -#define E820_ACPI 3 /* usable as RAM once ACPI tables have been read */
10367     +#define E820_ACPI 3
10368     #define E820_NVS 4
10369    
10370     -#define HIGH_MEMORY (1024*1024)
10371     -
10372     -#define LOWMEMSIZE() (0x9f000)
10373     -
10374     #ifndef __ASSEMBLY__
10375     struct e820entry {
10376     u64 addr; /* start of memory segment */
10377     @@ -46,17 +42,16 @@
10378     extern void contig_e820_setup(void);
10379     extern unsigned long e820_end_of_ram(void);
10380     extern void e820_reserve_resources(struct e820entry *e820, int nr_map);
10381     +extern void e820_mark_nosave_regions(void);
10382     extern void e820_print_map(char *who);
10383     extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
10384     extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
10385    
10386     -extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end);
10387     extern void e820_setup_gap(struct e820entry *e820, int nr_map);
10388     -extern unsigned long e820_hole_size(unsigned long start_pfn,
10389     - unsigned long end_pfn);
10390     +extern void e820_register_active_regions(int nid,
10391     + unsigned long start_pfn, unsigned long end_pfn);
10392    
10393     -extern void __init parse_memopt(char *p, char **end);
10394     -extern void __init parse_memmapopt(char *p, char **end);
10395     +extern void finish_e820_parsing(void);
10396    
10397     extern struct e820map e820;
10398    
10399     --- a/include/asm-x86/mach-xen/asm/fixmap_32.h
10400     +++ b/include/asm-x86/mach-xen/asm/fixmap_32.h
10401     @@ -55,7 +55,7 @@
10402     #ifdef CONFIG_X86_LOCAL_APIC
10403     FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
10404     #endif
10405     -#ifdef CONFIG_X86_IO_APIC
10406     +#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_XEN)
10407     FIX_IO_APIC_BASE_0,
10408     FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
10409     #endif
10410     @@ -95,10 +95,9 @@
10411     __end_of_fixed_addresses
10412     };
10413    
10414     -extern void set_fixaddr_top(unsigned long top);
10415     -
10416     extern void __set_fixmap(enum fixed_addresses idx,
10417     maddr_t phys, pgprot_t flags);
10418     +extern void reserve_top_address(unsigned long reserve);
10419    
10420     #define set_fixmap(idx, phys) \
10421     __set_fixmap(idx, phys, PAGE_KERNEL)
10422     --- a/include/asm-x86/mach-xen/asm/fixmap_64.h
10423     +++ b/include/asm-x86/mach-xen/asm/fixmap_64.h
10424     @@ -41,7 +41,7 @@
10425     #ifdef CONFIG_X86_LOCAL_APIC
10426     FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
10427     #endif
10428     -#ifdef CONFIG_X86_IO_APIC
10429     +#ifndef CONFIG_XEN
10430     FIX_IO_APIC_BASE_0,
10431     FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
10432     #endif
10433     --- a/include/asm-x86/mach-xen/asm/hw_irq_32.h
10434     +++ b/include/asm-x86/mach-xen/asm/hw_irq_32.h
10435     @@ -17,8 +17,6 @@
10436     #include <asm/irq.h>
10437     #include <asm/sections.h>
10438    
10439     -struct hw_interrupt_type;
10440     -
10441     #define NMI_VECTOR 0x02
10442    
10443     /*
10444     @@ -28,10 +26,6 @@
10445     * Interrupt entry/exit code at both C and assembly level
10446     */
10447    
10448     -extern u8 irq_vector[NR_IRQ_VECTORS];
10449     -#define IO_APIC_VECTOR(irq) (irq_vector[irq])
10450     -#define AUTO_ASSIGN -1
10451     -
10452     extern void (*interrupt[NR_IRQS])(void);
10453    
10454     #ifdef CONFIG_SMP
10455     @@ -44,7 +38,7 @@
10456     fastcall void apic_timer_interrupt(void);
10457     fastcall void error_interrupt(void);
10458     fastcall void spurious_interrupt(void);
10459     -fastcall void thermal_interrupt(struct pt_regs *);
10460     +fastcall void thermal_interrupt(void);
10461     #define platform_legacy_irq(irq) ((irq) < 16)
10462     #endif
10463    
10464     --- a/include/asm-x86/mach-xen/asm/hw_irq_64.h
10465     +++ b/include/asm-x86/mach-xen/asm/hw_irq_64.h
10466     @@ -19,8 +19,7 @@
10467     #include <asm/irq.h>
10468     #include <linux/profile.h>
10469     #include <linux/smp.h>
10470     -
10471     -struct hw_interrupt_type;
10472     +#include <linux/percpu.h>
10473     #endif
10474    
10475     #define NMI_VECTOR 0x02
10476     @@ -77,9 +76,10 @@
10477    
10478    
10479     #ifndef __ASSEMBLY__
10480     -extern u8 irq_vector[NR_IRQ_VECTORS];
10481     -#define IO_APIC_VECTOR(irq) (irq_vector[irq])
10482     -#define AUTO_ASSIGN -1
10483     +typedef int vector_irq_t[NR_VECTORS];
10484     +DECLARE_PER_CPU(vector_irq_t, vector_irq);
10485     +extern void __setup_vector_irq(int cpu);
10486     +extern spinlock_t vector_lock;
10487    
10488     /*
10489     * Various low-level irq details needed by irq.c, process.c,
10490     --- a/include/asm-x86/mach-xen/asm/io_32.h
10491     +++ b/include/asm-x86/mach-xen/asm/io_32.h
10492     @@ -237,33 +237,6 @@
10493    
10494     #define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(b),(c),(d))
10495    
10496     -/**
10497     - * check_signature - find BIOS signatures
10498     - * @io_addr: mmio address to check
10499     - * @signature: signature block
10500     - * @length: length of signature
10501     - *
10502     - * Perform a signature comparison with the mmio address io_addr. This
10503     - * address should have been obtained by ioremap.
10504     - * Returns 1 on a match.
10505     - */
10506     -
10507     -static inline int check_signature(volatile void __iomem * io_addr,
10508     - const unsigned char *signature, int length)
10509     -{
10510     - int retval = 0;
10511     - do {
10512     - if (readb(io_addr) != *signature)
10513     - goto out;
10514     - io_addr++;
10515     - signature++;
10516     - length--;
10517     - } while (length);
10518     - retval = 1;
10519     -out:
10520     - return retval;
10521     -}
10522     -
10523     /*
10524     * Cache management
10525     *
10526     --- a/include/asm-x86/mach-xen/asm/io_64.h
10527     +++ b/include/asm-x86/mach-xen/asm/io_64.h
10528     @@ -273,33 +273,6 @@
10529    
10530     #define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(b),(c),(d))
10531    
10532     -/**
10533     - * check_signature - find BIOS signatures
10534     - * @io_addr: mmio address to check
10535     - * @signature: signature block
10536     - * @length: length of signature
10537     - *
10538     - * Perform a signature comparison with the mmio address io_addr. This
10539     - * address should have been obtained by ioremap.
10540     - * Returns 1 on a match.
10541     - */
10542     -
10543     -static inline int check_signature(void __iomem *io_addr,
10544     - const unsigned char *signature, int length)
10545     -{
10546     - int retval = 0;
10547     - do {
10548     - if (readb(io_addr) != *signature)
10549     - goto out;
10550     - io_addr++;
10551     - signature++;
10552     - length--;
10553     - } while (length);
10554     - retval = 1;
10555     -out:
10556     - return retval;
10557     -}
10558     -
10559     /* Nothing to do */
10560    
10561     #define dma_cache_inv(_start,_size) do { } while (0)
10562     --- a/include/asm-x86/mach-xen/asm/pgtable-2level.h
10563     +++ b/include/asm-x86/mach-xen/asm/pgtable-2level.h
10564     @@ -23,14 +23,6 @@
10565     set_pte((ptep), (pteval)); \
10566     } while (0)
10567    
10568     -#define set_pte_at_sync(_mm,addr,ptep,pteval) do { \
10569     - if (((_mm) != current->mm && (_mm) != &init_mm) || \
10570     - HYPERVISOR_update_va_mapping((addr), (pteval), UVMF_INVLPG)) { \
10571     - set_pte((ptep), (pteval)); \
10572     - xen_invlpg((addr)); \
10573     - } \
10574     -} while (0)
10575     -
10576     #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
10577    
10578     #define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval))
10579     @@ -40,6 +32,7 @@
10580    
10581     #define pte_none(x) (!(x).pte_low)
10582    
10583     +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
10584     static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
10585     {
10586     pte_t pte = *ptep;
10587     @@ -51,6 +44,7 @@
10588     return pte;
10589     }
10590    
10591     +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
10592     #define ptep_clear_flush(vma, addr, ptep) \
10593     ({ \
10594     pte_t *__ptep = (ptep); \
10595     @@ -66,8 +60,6 @@
10596     __res; \
10597     })
10598    
10599     -#define pte_same(a, b) ((a).pte_low == (b).pte_low)
10600     -
10601     #define __pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
10602     #define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \
10603     __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte)))
10604     --- a/include/asm-x86/mach-xen/asm/pgtable-3level.h
10605     +++ b/include/asm-x86/mach-xen/asm/pgtable-3level.h
10606     @@ -53,7 +53,6 @@
10607     * not possible, use pte_get_and_clear to obtain the old pte
10608     * value and then use set_pte to update it. -ben
10609     */
10610     -#define __HAVE_ARCH_SET_PTE_ATOMIC
10611    
10612     static inline void set_pte(pte_t *ptep, pte_t pte)
10613     {
10614     @@ -70,14 +69,6 @@
10615     set_pte((ptep), (pteval)); \
10616     } while (0)
10617    
10618     -#define set_pte_at_sync(_mm,addr,ptep,pteval) do { \
10619     - if (((_mm) != current->mm && (_mm) != &init_mm) || \
10620     - HYPERVISOR_update_va_mapping((addr), (pteval), UVMF_INVLPG)) { \
10621     - set_pte((ptep), (pteval)); \
10622     - xen_invlpg((addr)); \
10623     - } \
10624     -} while (0)
10625     -
10626     #define set_pmd(pmdptr,pmdval) \
10627     xen_l2_entry_update((pmdptr), (pmdval))
10628     #define set_pud(pudptr,pudval) \
10629     @@ -94,7 +85,7 @@
10630     #define pud_page(pud) \
10631     ((struct page *) __va(pud_val(pud) & PAGE_MASK))
10632    
10633     -#define pud_page_kernel(pud) \
10634     +#define pud_page_vaddr(pud) \
10635     ((unsigned long) __va(pud_val(pud) & PAGE_MASK))
10636    
10637    
10638     @@ -124,6 +115,7 @@
10639    
10640     #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
10641    
10642     +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
10643     static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
10644     {
10645     pte_t pte = *ptep;
10646     @@ -142,6 +134,7 @@
10647     return pte;
10648     }
10649    
10650     +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
10651     #define ptep_clear_flush(vma, addr, ptep) \
10652     ({ \
10653     pte_t *__ptep = (ptep); \
10654     @@ -159,6 +152,7 @@
10655     __res; \
10656     })
10657    
10658     +#define __HAVE_ARCH_PTE_SAME
10659     static inline int pte_same(pte_t a, pte_t b)
10660     {
10661     return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
10662     --- a/include/asm-x86/mach-xen/asm/pgtable_32.h
10663     +++ b/include/asm-x86/mach-xen/asm/pgtable_32.h
10664     @@ -260,31 +260,89 @@
10665     # include <asm/pgtable-2level.h>
10666     #endif
10667    
10668     -#define ptep_test_and_clear_dirty(vma, addr, ptep) \
10669     +/*
10670     + * Rules for using pte_update - it must be called after any PTE update which
10671     + * has not been done using the set_pte / clear_pte interfaces. It is used by
10672     + * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE
10673     + * updates should either be sets, clears, or set_pte_atomic for P->P
10674     + * transitions, which means this hook should only be called for user PTEs.
10675     + * This hook implies a P->P protection or access change has taken place, which
10676     + * requires a subsequent TLB flush. The notification can optionally be delayed
10677     + * until the TLB flush event by using the pte_update_defer form of the
10678     + * interface, but care must be taken to assure that the flush happens while
10679     + * still holding the same page table lock so that the shadow and primary pages
10680     + * do not become out of sync on SMP.
10681     + */
10682     +#define pte_update(mm, addr, ptep) do { } while (0)
10683     +#define pte_update_defer(mm, addr, ptep) do { } while (0)
10684     +
10685     +
10686     +/*
10687     + * We only update the dirty/accessed state if we set
10688     + * the dirty bit by hand in the kernel, since the hardware
10689     + * will do the accessed bit for us, and we don't want to
10690     + * race with other CPU's that might be updating the dirty
10691     + * bit at the same time.
10692     + */
10693     +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
10694     +#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
10695     +do { \
10696     + if (dirty) \
10697     + ptep_establish(vma, address, ptep, entry); \
10698     +} while (0)
10699     +
10700     +/*
10701     + * We don't actually have these, but we want to advertise them so that
10702     + * we can encompass the flush here.
10703     + */
10704     +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
10705     +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
10706     +
10707     +/*
10708     + * Rules for using ptep_establish: the pte MUST be a user pte, and
10709     + * must be a present->present transition.
10710     + */
10711     +#define __HAVE_ARCH_PTEP_ESTABLISH
10712     +#define ptep_establish(vma, address, ptep, pteval) \
10713     +do { \
10714     + if ( likely((vma)->vm_mm == current->mm) ) { \
10715     + BUG_ON(HYPERVISOR_update_va_mapping(address, \
10716     + pteval, \
10717     + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
10718     + UVMF_INVLPG|UVMF_MULTI)); \
10719     + } else { \
10720     + xen_l1_entry_update(ptep, pteval); \
10721     + flush_tlb_page(vma, address); \
10722     + } \
10723     +} while (0)
10724     +
10725     +#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
10726     +#define ptep_clear_flush_dirty(vma, address, ptep) \
10727     ({ \
10728     pte_t __pte = *(ptep); \
10729     - int __ret = pte_dirty(__pte); \
10730     - if (__ret) { \
10731     - __pte = pte_mkclean(__pte); \
10732     - if ((vma)->vm_mm != current->mm || \
10733     - HYPERVISOR_update_va_mapping(addr, __pte, 0)) \
10734     - (ptep)->pte_low = __pte.pte_low; \
10735     - } \
10736     - __ret; \
10737     + int __dirty = pte_dirty(__pte); \
10738     + __pte = pte_mkclean(__pte); \
10739     + if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
10740     + ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
10741     + else if (__dirty) \
10742     + (ptep)->pte_low = __pte.pte_low; \
10743     + __dirty; \
10744     })
10745    
10746     -#define ptep_test_and_clear_young(vma, addr, ptep) \
10747     +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
10748     +#define ptep_clear_flush_young(vma, address, ptep) \
10749     ({ \
10750     pte_t __pte = *(ptep); \
10751     - int __ret = pte_young(__pte); \
10752     - if (__ret) \
10753     - __pte = pte_mkold(__pte); \
10754     - if ((vma)->vm_mm != current->mm || \
10755     - HYPERVISOR_update_va_mapping(addr, __pte, 0)) \
10756     - (ptep)->pte_low = __pte.pte_low; \
10757     - __ret; \
10758     + int __young = pte_young(__pte); \
10759     + __pte = pte_mkold(__pte); \
10760     + if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
10761     + ptep_set_access_flags(vma, address, ptep, __pte, __young); \
10762     + else if (__young) \
10763     + (ptep)->pte_low = __pte.pte_low; \
10764     + __young; \
10765     })
10766    
10767     +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
10768     #define ptep_get_and_clear_full(mm, addr, ptep, full) \
10769     ((full) ? ({ \
10770     pte_t __res = *(ptep); \
10771     @@ -296,6 +354,7 @@
10772     }) : \
10773     ptep_get_and_clear(mm, addr, ptep))
10774    
10775     +#define __HAVE_ARCH_PTEP_SET_WRPROTECT
10776     static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
10777     {
10778     pte_t pte = *ptep;
10779     @@ -391,11 +450,11 @@
10780     #define pte_index(address) \
10781     (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
10782     #define pte_offset_kernel(dir, address) \
10783     - ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address))
10784     + ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
10785    
10786     #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
10787    
10788     -#define pmd_page_kernel(pmd) \
10789     +#define pmd_page_vaddr(pmd) \
10790     ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
10791    
10792     /*
10793     @@ -418,8 +477,6 @@
10794     static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
10795     #endif
10796    
10797     -extern void noexec_setup(const char *str);
10798     -
10799     #if defined(CONFIG_HIGHPTE)
10800     #define pte_offset_map(dir, address) \
10801     ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \
10802     @@ -437,37 +494,17 @@
10803     #define pte_unmap_nested(pte) do { } while (0)
10804     #endif
10805    
10806     -#define __HAVE_ARCH_PTEP_ESTABLISH
10807     -#define ptep_establish(vma, address, ptep, pteval) \
10808     - do { \
10809     - if ( likely((vma)->vm_mm == current->mm) ) { \
10810     - BUG_ON(HYPERVISOR_update_va_mapping(address, \
10811     - pteval, \
10812     - (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
10813     - UVMF_INVLPG|UVMF_MULTI)); \
10814     - } else { \
10815     - xen_l1_entry_update(ptep, pteval); \
10816     - flush_tlb_page(vma, address); \
10817     - } \
10818     - } while (0)
10819     +/* Clear a kernel PTE and flush it from the TLB */
10820     +#define kpte_clear_flush(ptep, vaddr) do { \
10821     + if (HYPERVISOR_update_va_mapping(vaddr, __pte(0), UVMF_INVLPG)) \
10822     + BUG(); \
10823     +} while (0)
10824    
10825     /*
10826     * The i386 doesn't have any external MMU info: the kernel page
10827     * tables contain all the necessary information.
10828     - *
10829     - * Also, we only update the dirty/accessed state if we set
10830     - * the dirty bit by hand in the kernel, since the hardware
10831     - * will do the accessed bit for us, and we don't want to
10832     - * race with other CPU's that might be updating the dirty
10833     - * bit at the same time.
10834     */
10835     #define update_mmu_cache(vma,address,pte) do { } while (0)
10836     -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
10837     -#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
10838     - do { \
10839     - if (dirty) \
10840     - ptep_establish(vma, address, ptep, entry); \
10841     - } while (0)
10842    
10843     #include <xen/features.h>
10844     void make_lowmem_page_readonly(void *va, unsigned int feature);
10845     @@ -516,10 +553,11 @@
10846     unsigned long size);
10847    
10848     int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
10849     - unsigned long addr, unsigned long end, pgprot_t newprot);
10850     + unsigned long addr, unsigned long end, pgprot_t newprot,
10851     + int dirty_accountable);
10852    
10853     -#define arch_change_pte_range(mm, pmd, addr, end, newprot) \
10854     - xen_change_pte_range(mm, pmd, addr, end, newprot)
10855     +#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
10856     + xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
10857    
10858     #define io_remap_pfn_range(vma,from,pfn,size,prot) \
10859     direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
10860     @@ -528,13 +566,6 @@
10861     #define GET_IOSPACE(pfn) 0
10862     #define GET_PFN(pfn) (pfn)
10863    
10864     -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
10865     -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
10866     -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
10867     -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
10868     -#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
10869     -#define __HAVE_ARCH_PTEP_SET_WRPROTECT
10870     -#define __HAVE_ARCH_PTE_SAME
10871     #include <asm-generic/pgtable.h>
10872    
10873     #endif /* _I386_PGTABLE_H */
10874     --- a/include/asm-x86/mach-xen/asm/pgtable_64.h
10875     +++ b/include/asm-x86/mach-xen/asm/pgtable_64.h
10876     @@ -43,12 +43,9 @@
10877    
10878     #define swapper_pg_dir init_level4_pgt
10879    
10880     -extern int nonx_setup(char *str);
10881     extern void paging_init(void);
10882     extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
10883    
10884     -extern unsigned long pgkern_mask;
10885     -
10886     /*
10887     * ZERO_PAGE is a global shared page that is always zero: used
10888     * for zero-mapped memory areas etc..
10889     @@ -118,9 +115,6 @@
10890     set_pgd(__user_pgd(pgd), __pgd(0));
10891     }
10892    
10893     -#define pud_page(pud) \
10894     - ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
10895     -
10896     #define pte_same(a, b) ((a).pte == (b).pte)
10897    
10898     #define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK))
10899     @@ -332,7 +326,7 @@
10900     #define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT)
10901     static inline int pte_user(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
10902     static inline int pte_read(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
10903     -static inline int pte_exec(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
10904     +static inline int pte_exec(pte_t pte) { return !(__pte_val(pte) & _PAGE_NX); }
10905     static inline int pte_dirty(pte_t pte) { return __pte_val(pte) & _PAGE_DIRTY; }
10906     static inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED; }
10907     static inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; }
10908     @@ -345,29 +339,12 @@
10909     static inline pte_t pte_mkold(pte_t pte) { __pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
10910     static inline pte_t pte_wrprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_RW; return pte; }
10911     static inline pte_t pte_mkread(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
10912     -static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
10913     +static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) &= ~_PAGE_NX; return pte; }
10914     static inline pte_t pte_mkdirty(pte_t pte) { __pte_val(pte) |= _PAGE_DIRTY; return pte; }
10915     static inline pte_t pte_mkyoung(pte_t pte) { __pte_val(pte) |= _PAGE_ACCESSED; return pte; }
10916     static inline pte_t pte_mkwrite(pte_t pte) { __pte_val(pte) |= _PAGE_RW; return pte; }
10917     static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; }
10918     -
10919     -#define ptep_test_and_clear_dirty(vma, addr, ptep) \
10920     -({ \
10921     - pte_t __pte = *(ptep); \
10922     - int __ret = pte_dirty(__pte); \
10923     - if (__ret) \
10924     - set_pte_at((vma)->vm_mm, addr, ptep, pte_mkclean(__pte)); \
10925     - __ret; \
10926     -})
10927     -
10928     -#define ptep_test_and_clear_young(vma, addr, ptep) \
10929     -({ \
10930     - pte_t __pte = *(ptep); \
10931     - int __ret = pte_young(__pte); \
10932     - if (__ret) \
10933     - set_pte_at((vma)->vm_mm, addr, ptep, pte_mkold(__pte)); \
10934     - __ret; \
10935     -})
10936     +static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; }
10937    
10938     static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
10939     {
10940     @@ -395,7 +372,8 @@
10941     * Level 4 access.
10942     * Never use these in the common code.
10943     */
10944     -#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
10945     +#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
10946     +#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT))
10947     #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
10948     #define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
10949     #define pgd_offset_k(address) (init_level4_pgt + pgd_index(address))
10950     @@ -404,16 +382,18 @@
10951    
10952     /* PUD - Level3 access */
10953     /* to find an entry in a page-table-directory. */
10954     +#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
10955     +#define pud_page(pud) (pfn_to_page(pud_val(pud) >> PAGE_SHIFT))
10956     #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
10957     -#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address))
10958     +#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address))
10959     #define pud_present(pud) (__pud_val(pud) & _PAGE_PRESENT)
10960    
10961     /* PMD - Level 2 access */
10962     -#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
10963     +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
10964     #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
10965    
10966     #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
10967     -#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
10968     +#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \
10969     pmd_index(address))
10970     #define pmd_none(x) (!__pmd_val(x))
10971     #if CONFIG_XEN_COMPAT <= 0x030002
10972     @@ -444,6 +424,7 @@
10973     {
10974     unsigned long pteval;
10975     pteval = physpage | pgprot_val(pgprot);
10976     + pteval &= __supported_pte_mask;
10977     return __pte(pteval);
10978     }
10979    
10980     @@ -465,7 +446,7 @@
10981    
10982     #define pte_index(address) \
10983     (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
10984     -#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
10985     +#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
10986     pte_index(address))
10987    
10988     /* x86-64 always has all page tables mapped. */
10989     @@ -506,6 +487,40 @@
10990     ptep_establish(vma, address, ptep, entry); \
10991     } while (0)
10992    
10993     +
10994     +/*
10995     + * i386 says: We don't actually have these, but we want to advertise
10996     + * them so that we can encompass the flush here.
10997     + */
10998     +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
10999     +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
11000     +
11001     +#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
11002     +#define ptep_clear_flush_dirty(vma, address, ptep) \
11003     +({ \
11004     + pte_t __pte = *(ptep); \
11005     + int __dirty = pte_dirty(__pte); \
11006     + __pte = pte_mkclean(__pte); \
11007     + if ((vma)->vm_mm->context.pinned) \
11008     + ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
11009     + else if (__dirty) \
11010     + set_pte(ptep, __pte); \
11011     + __dirty; \
11012     +})
11013     +
11014     +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
11015     +#define ptep_clear_flush_young(vma, address, ptep) \
11016     +({ \
11017     + pte_t __pte = *(ptep); \
11018     + int __young = pte_young(__pte); \
11019     + __pte = pte_mkold(__pte); \
11020     + if ((vma)->vm_mm->context.pinned) \
11021     + ptep_set_access_flags(vma, address, ptep, __pte, __young); \
11022     + else if (__young) \
11023     + set_pte(ptep, __pte); \
11024     + __young; \
11025     +})
11026     +
11027     /* Encode and de-code a swap entry */
11028     #define __swp_type(x) (((x).val >> 1) & 0x3f)
11029     #define __swp_offset(x) ((x).val >> 8)
11030     @@ -547,10 +562,11 @@
11031     unsigned long size);
11032    
11033     int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
11034     - unsigned long addr, unsigned long end, pgprot_t newprot);
11035     + unsigned long addr, unsigned long end, pgprot_t newprot,
11036     + int dirty_accountable);
11037    
11038     -#define arch_change_pte_range(mm, pmd, addr, end, newprot) \
11039     - xen_change_pte_range(mm, pmd, addr, end, newprot)
11040     +#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
11041     + xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
11042    
11043     #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
11044     direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
11045     @@ -572,8 +588,6 @@
11046     #define kc_offset_to_vaddr(o) \
11047     (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
11048    
11049     -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
11050     -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
11051     #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
11052     #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
11053     #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
11054     --- a/include/asm-x86/mach-xen/asm/processor_32.h
11055     +++ b/include/asm-x86/mach-xen/asm/processor_32.h
11056     @@ -146,6 +146,18 @@
11057     #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
11058     #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
11059    
11060     +static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
11061     + unsigned int *ecx, unsigned int *edx)
11062     +{
11063     + /* ecx is often an input as well as an output. */
11064     + __asm__(XEN_CPUID
11065     + : "=a" (*eax),
11066     + "=b" (*ebx),
11067     + "=c" (*ecx),
11068     + "=d" (*edx)
11069     + : "0" (*eax), "2" (*ecx));
11070     +}
11071     +
11072     /*
11073     * Generic CPUID function
11074     * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
11075     @@ -153,24 +165,18 @@
11076     */
11077     static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
11078     {
11079     - __asm__(XEN_CPUID
11080     - : "=a" (*eax),
11081     - "=b" (*ebx),
11082     - "=c" (*ecx),
11083     - "=d" (*edx)
11084     - : "0" (op), "c"(0));
11085     + *eax = op;
11086     + *ecx = 0;
11087     + __cpuid(eax, ebx, ecx, edx);
11088     }
11089    
11090     /* Some CPUID calls want 'count' to be placed in ecx */
11091     static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
11092     - int *edx)
11093     + int *edx)
11094     {
11095     - __asm__(XEN_CPUID
11096     - : "=a" (*eax),
11097     - "=b" (*ebx),
11098     - "=c" (*ecx),
11099     - "=d" (*edx)
11100     - : "0" (op), "c" (count));
11101     + *eax = op;
11102     + *ecx = count;
11103     + __cpuid(eax, ebx, ecx, edx);
11104     }
11105    
11106     /*
11107     @@ -178,42 +184,30 @@
11108     */
11109     static inline unsigned int cpuid_eax(unsigned int op)
11110     {
11111     - unsigned int eax;
11112     + unsigned int eax, ebx, ecx, edx;
11113    
11114     - __asm__(XEN_CPUID
11115     - : "=a" (eax)
11116     - : "0" (op)
11117     - : "bx", "cx", "dx");
11118     + cpuid(op, &eax, &ebx, &ecx, &edx);
11119     return eax;
11120     }
11121     static inline unsigned int cpuid_ebx(unsigned int op)
11122     {
11123     - unsigned int eax, ebx;
11124     + unsigned int eax, ebx, ecx, edx;
11125    
11126     - __asm__(XEN_CPUID
11127     - : "=a" (eax), "=b" (ebx)
11128     - : "0" (op)
11129     - : "cx", "dx" );
11130     + cpuid(op, &eax, &ebx, &ecx, &edx);
11131     return ebx;
11132     }
11133     static inline unsigned int cpuid_ecx(unsigned int op)
11134     {
11135     - unsigned int eax, ecx;
11136     + unsigned int eax, ebx, ecx, edx;
11137    
11138     - __asm__(XEN_CPUID
11139     - : "=a" (eax), "=c" (ecx)
11140     - : "0" (op)
11141     - : "bx", "dx" );
11142     + cpuid(op, &eax, &ebx, &ecx, &edx);
11143     return ecx;
11144     }
11145     static inline unsigned int cpuid_edx(unsigned int op)
11146     {
11147     - unsigned int eax, edx;
11148     + unsigned int eax, ebx, ecx, edx;
11149    
11150     - __asm__(XEN_CPUID
11151     - : "=a" (eax), "=d" (edx)
11152     - : "0" (op)
11153     - : "bx", "cx");
11154     + cpuid(op, &eax, &ebx, &ecx, &edx);
11155     return edx;
11156     }
11157    
11158     @@ -315,6 +309,8 @@
11159     : :"a" (eax), "c" (ecx));
11160     }
11161    
11162     +extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
11163     +
11164     /* from system description table in BIOS. Mostly for MCA use, but
11165     others may find it useful. */
11166     extern unsigned int machine_id;
11167     --- a/include/asm-x86/mach-xen/asm/processor_64.h
11168     +++ b/include/asm-x86/mach-xen/asm/processor_64.h
11169     @@ -484,6 +484,8 @@
11170     : :"a" (eax), "c" (ecx));
11171     }
11172    
11173     +extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
11174     +
11175     #define stack_current() \
11176     ({ \
11177     struct thread_info *ti; \
11178     --- a/include/asm-x86/mach-xen/asm/segment_32.h
11179     +++ b/include/asm-x86/mach-xen/asm/segment_32.h
11180     @@ -61,11 +61,9 @@
11181    
11182     #define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0)
11183     #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
11184     -#define GET_KERNEL_CS() (__KERNEL_CS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) )
11185    
11186     #define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1)
11187     #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
11188     -#define GET_KERNEL_DS() (__KERNEL_DS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) )
11189    
11190     #define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4)
11191     #define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5)
11192     @@ -85,6 +83,11 @@
11193    
11194     #define GDT_SIZE (GDT_ENTRIES * 8)
11195    
11196     +/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
11197     +#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
11198     +/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
11199     +#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
11200     +
11201     /* Simple and small GDT entries for booting only */
11202    
11203     #define GDT_ENTRY_BOOT_CS 2
11204     @@ -114,4 +117,16 @@
11205     */
11206     #define IDT_ENTRIES 256
11207    
11208     +/* Bottom two bits of selector give the ring privilege level */
11209     +#define SEGMENT_RPL_MASK 0x3
11210     +/* Bit 2 is table indicator (LDT/GDT) */
11211     +#define SEGMENT_TI_MASK 0x4
11212     +
11213     +/* User mode is privilege level 3 */
11214     +#define USER_RPL 0x3
11215     +/* LDT segment has TI set, GDT has it cleared */
11216     +#define SEGMENT_LDT 0x4
11217     +#define SEGMENT_GDT 0x0
11218     +
11219     +#define get_kernel_rpl() (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1)
11220     #endif
11221     --- a/include/asm-x86/mach-xen/asm/smp_32.h
11222     +++ b/include/asm-x86/mach-xen/asm/smp_32.h
11223     @@ -79,25 +79,36 @@
11224     return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
11225     }
11226     #endif
11227     -
11228     -static __inline int logical_smp_processor_id(void)
11229     -{
11230     - /* we don't want to mark this access volatile - bad code generation */
11231     - return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
11232     -}
11233     -
11234     #endif
11235    
11236     +#define safe_smp_processor_id() smp_processor_id()
11237     extern int __cpu_disable(void);
11238     extern void __cpu_die(unsigned int cpu);
11239     extern void prefill_possible_map(void);
11240     +extern unsigned int num_processors;
11241     +
11242     #endif /* !__ASSEMBLY__ */
11243    
11244     #else /* CONFIG_SMP */
11245    
11246     +#define safe_smp_processor_id() 0
11247     #define cpu_physical_id(cpu) boot_cpu_physical_apicid
11248    
11249     #define NO_PROC_ID 0xFF /* No processor magic marker */
11250    
11251     #endif
11252     +
11253     +#ifndef __ASSEMBLY__
11254     +
11255     +extern u8 apicid_2_node[];
11256     +
11257     +#ifdef CONFIG_X86_LOCAL_APIC
11258     +static __inline int logical_smp_processor_id(void)
11259     +{
11260     + /* we don't want to mark this access volatile - bad code generation */
11261     + return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
11262     +}
11263     +#endif
11264     +#endif
11265     +
11266     #endif
11267     --- a/include/asm-x86/mach-xen/asm/smp_64.h
11268     +++ b/include/asm-x86/mach-xen/asm/smp_64.h
11269     @@ -4,15 +4,12 @@
11270     /*
11271     * We need the APIC definitions automatically as part of 'smp.h'
11272     */
11273     -#ifndef __ASSEMBLY__
11274     #include <linux/threads.h>
11275     #include <linux/cpumask.h>
11276     #include <linux/bitops.h>
11277     extern int disable_apic;
11278     -#endif
11279    
11280     #ifdef CONFIG_X86_LOCAL_APIC
11281     -#ifndef __ASSEMBLY__
11282     #include <asm/fixmap.h>
11283     #include <asm/mpspec.h>
11284     #ifdef CONFIG_X86_IO_APIC
11285     @@ -21,10 +18,8 @@
11286     #include <asm/apic.h>
11287     #include <asm/thread_info.h>
11288     #endif
11289     -#endif
11290    
11291     #ifdef CONFIG_SMP
11292     -#ifndef ASSEMBLY
11293    
11294     #include <asm/pda.h>
11295    
11296     @@ -41,14 +36,11 @@
11297    
11298     extern void smp_alloc_memory(void);
11299     extern volatile unsigned long smp_invalidate_needed;
11300     -extern int pic_mode;
11301     extern void lock_ipi_call_lock(void);
11302     extern void unlock_ipi_call_lock(void);
11303     extern int smp_num_siblings;
11304     extern void smp_send_reschedule(int cpu);
11305     void smp_stop_cpu(void);
11306     -extern int smp_call_function_single(int cpuid, void (*func) (void *info),
11307     - void *info, int retry, int wait);
11308    
11309     extern cpumask_t cpu_sibling_map[NR_CPUS];
11310     extern cpumask_t cpu_core_map[NR_CPUS];
11311     @@ -77,20 +69,16 @@
11312     }
11313     #endif
11314    
11315     -extern int safe_smp_processor_id(void);
11316     extern int __cpu_disable(void);
11317     extern void __cpu_die(unsigned int cpu);
11318     extern void prefill_possible_map(void);
11319     extern unsigned num_processors;
11320     extern unsigned disabled_cpus;
11321    
11322     -#endif /* !ASSEMBLY */
11323     -
11324     #define NO_PROC_ID 0xFF /* No processor magic marker */
11325    
11326     #endif
11327    
11328     -#ifndef ASSEMBLY
11329     /*
11330     * Some lowlevel functions might want to know about
11331     * the real APIC ID <-> CPU # mapping.
11332     @@ -114,11 +102,8 @@
11333     }
11334     #endif
11335    
11336     -#endif /* !ASSEMBLY */
11337     -
11338     #ifndef CONFIG_SMP
11339     #define stack_smp_processor_id() 0
11340     -#define safe_smp_processor_id() 0
11341     #define cpu_logical_map(x) (x)
11342     #else
11343     #include <asm/thread_info.h>
11344     @@ -130,7 +115,6 @@
11345     })
11346     #endif
11347    
11348     -#ifndef __ASSEMBLY__
11349     #ifdef CONFIG_X86_LOCAL_APIC
11350     static __inline int logical_smp_processor_id(void)
11351     {
11352     @@ -138,13 +122,18 @@
11353     return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
11354     }
11355     #endif
11356     -#endif
11357    
11358     #ifdef CONFIG_SMP
11359     #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
11360     #else
11361     #define cpu_physical_id(cpu) boot_cpu_id
11362     -#endif
11363     -
11364     +static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
11365     + void *info, int retry, int wait)
11366     +{
11367     + /* Disable interrupts here? */
11368     + func(info);
11369     + return 0;
11370     +}
11371     +#endif /* !CONFIG_SMP */
11372     #endif
11373    
11374     --- a/include/asm-x86/mach-xen/asm/system_32.h
11375     +++ b/include/asm-x86/mach-xen/asm/system_32.h
11376     @@ -267,6 +267,9 @@
11377     #define cmpxchg(ptr,o,n)\
11378     ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
11379     (unsigned long)(n),sizeof(*(ptr))))
11380     +#define sync_cmpxchg(ptr,o,n)\
11381     + ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
11382     + (unsigned long)(n),sizeof(*(ptr))))
11383     #endif
11384    
11385     static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
11386     @@ -291,6 +294,39 @@
11387     : "=a"(prev)
11388     : "r"(new), "m"(*__xg(ptr)), "0"(old)
11389     : "memory");
11390     + return prev;
11391     + }
11392     + return old;
11393     +}
11394     +
11395     +/*
11396     + * Always use locked operations when touching memory shared with a
11397     + * hypervisor, since the system may be SMP even if the guest kernel
11398     + * isn't.
11399     + */
11400     +static inline unsigned long __sync_cmpxchg(volatile void *ptr,
11401     + unsigned long old,
11402     + unsigned long new, int size)
11403     +{
11404     + unsigned long prev;
11405     + switch (size) {
11406     + case 1:
11407     + __asm__ __volatile__("lock; cmpxchgb %b1,%2"
11408     + : "=a"(prev)
11409     + : "q"(new), "m"(*__xg(ptr)), "0"(old)
11410     + : "memory");
11411     + return prev;
11412     + case 2:
11413     + __asm__ __volatile__("lock; cmpxchgw %w1,%2"
11414     + : "=a"(prev)
11415     + : "r"(new), "m"(*__xg(ptr)), "0"(old)
11416     + : "memory");
11417     + return prev;
11418     + case 4:
11419     + __asm__ __volatile__("lock; cmpxchgl %1,%2"
11420     + : "=a"(prev)
11421     + : "r"(new), "m"(*__xg(ptr)), "0"(old)
11422     + : "memory");
11423     return prev;
11424     }
11425     return old;
11426     --- a/include/asm-x86/mach-xen/asm/system_64.h
11427     +++ b/include/asm-x86/mach-xen/asm/system_64.h
11428     @@ -24,6 +24,7 @@
11429     #define __EXTRA_CLOBBER \
11430     ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15"
11431    
11432     +/* Save restore flags to clear handle leaking NT */
11433     #define switch_to(prev,next,last) \
11434     asm volatile(SAVE_CONTEXT \
11435     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
11436     --- a/include/asm-x86/mach-xen/asm/tlbflush_32.h
11437     +++ b/include/asm-x86/mach-xen/asm/tlbflush_32.h
11438     @@ -8,8 +8,6 @@
11439     #define __flush_tlb_global() xen_tlb_flush()
11440     #define __flush_tlb_all() xen_tlb_flush()
11441    
11442     -extern unsigned long pgkern_mask;
11443     -
11444     #define cpu_has_invlpg (boot_cpu_data.x86 > 3)
11445    
11446     #define __flush_tlb_single(addr) xen_invlpg(addr)
11447     --- a/include/asm-x86/mach-xen/asm/tlbflush_64.h
11448     +++ b/include/asm-x86/mach-xen/asm/tlbflush_64.h
11449     @@ -12,9 +12,6 @@
11450     */
11451     #define __flush_tlb_global() xen_tlb_flush()
11452    
11453     -
11454     -extern unsigned long pgkern_mask;
11455     -
11456     #define __flush_tlb_all() __flush_tlb_global()
11457    
11458     #define __flush_tlb_one(addr) xen_invlpg((unsigned long)addr)
11459     --- a/include/asm-x86/thread_info_64.h
11460     +++ b/include/asm-x86/thread_info_64.h
11461     @@ -157,10 +157,14 @@
11462     (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
11463    
11464     /* flags to check in __switch_to() */
11465     +#ifndef CONFIG_XEN
11466     #define _TIF_WORK_CTXSW \
11467     (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS)
11468     #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
11469     #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
11470     +#else
11471     +#define _TIF_WORK_CTXSW _TIF_DEBUG
11472     +#endif
11473    
11474     #define PREEMPT_ACTIVE 0x10000000
11475    
11476     --- a/include/linux/skbuff.h
11477     +++ b/include/linux/skbuff.h
11478     @@ -1821,5 +1821,12 @@
11479     }
11480    
11481     bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
11482     +
11483     +#ifdef CONFIG_XEN
11484     +int skb_checksum_setup(struct sk_buff *skb);
11485     +#else
11486     +static inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
11487     +#endif
11488     +
11489     #endif /* __KERNEL__ */
11490     #endif /* _LINUX_SKBUFF_H */
11491     --- a/include/xen/evtchn.h
11492     +++ b/include/xen/evtchn.h
11493     @@ -54,34 +54,34 @@
11494     */
11495     int bind_caller_port_to_irqhandler(
11496     unsigned int caller_port,
11497     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
11498     + irq_handler_t handler,
11499     unsigned long irqflags,
11500     const char *devname,
11501     void *dev_id);
11502     int bind_listening_port_to_irqhandler(
11503     unsigned int remote_domain,
11504     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
11505     + irq_handler_t handler,
11506     unsigned long irqflags,
11507     const char *devname,
11508     void *dev_id);
11509     int bind_interdomain_evtchn_to_irqhandler(
11510     unsigned int remote_domain,
11511     unsigned int remote_port,
11512     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
11513     + irq_handler_t handler,
11514     unsigned long irqflags,
11515     const char *devname,
11516     void *dev_id);
11517     int bind_virq_to_irqhandler(
11518     unsigned int virq,
11519     unsigned int cpu,
11520     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
11521     + irq_handler_t handler,
11522     unsigned long irqflags,
11523     const char *devname,
11524     void *dev_id);
11525     int bind_ipi_to_irqhandler(
11526     unsigned int ipi,
11527     unsigned int cpu,
11528     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
11529     + irq_handler_t handler,
11530     unsigned long irqflags,
11531     const char *devname,
11532     void *dev_id);
11533     --- a/include/xen/xencons.h
11534     +++ b/include/xen/xencons.h
11535     @@ -8,7 +8,7 @@
11536     void xencons_resume(void);
11537    
11538     /* Interrupt work hooks. Receive data, or kick data out. */
11539     -void xencons_rx(char *buf, unsigned len, struct pt_regs *regs);
11540     +void xencons_rx(char *buf, unsigned len);
11541     void xencons_tx(void);
11542    
11543     int xencons_ring_init(void);
11544     --- a/mm/mprotect.c
11545     +++ b/mm/mprotect.c
11546     @@ -86,7 +86,7 @@
11547     next = pmd_addr_end(addr, end);
11548     if (pmd_none_or_clear_bad(pmd))
11549     continue;
11550     - if (arch_change_pte_range(mm, pmd, addr, next, newprot))
11551     + if (arch_change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable))
11552     continue;
11553     change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable);
11554     } while (pmd++, addr = next, addr != end);
11555     --- a/net/core/dev.c
11556     +++ b/net/core/dev.c
11557     @@ -1611,15 +1611,14 @@
11558     }
11559     if ((skb->h.raw + skb->csum + 2) > skb->tail)
11560     goto out;
11561     - skb->ip_summed = CHECKSUM_HW;
11562     + skb->ip_summed = CHECKSUM_PARTIAL;
11563     skb->proto_csum_blank = 0;
11564     }
11565     return 0;
11566     out:
11567     return -EPROTO;
11568     }
11569     -#else
11570     -inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
11571     +EXPORT_SYMBOL(skb_checksum_setup);
11572     #endif
11573    
11574     /**
11575     @@ -2115,7 +2114,7 @@
11576     case CHECKSUM_UNNECESSARY:
11577     skb->proto_data_valid = 1;
11578     break;
11579     - case CHECKSUM_HW:
11580     + case CHECKSUM_PARTIAL:
11581     /* XXX Implement me. */
11582     default:
11583     skb->proto_data_valid = 0;
11584     @@ -4648,7 +4647,6 @@
11585     EXPORT_SYMBOL(net_enable_timestamp);
11586     EXPORT_SYMBOL(net_disable_timestamp);
11587     EXPORT_SYMBOL(dev_get_flags);
11588     -EXPORT_SYMBOL(skb_checksum_setup);
11589    
11590     #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
11591     EXPORT_SYMBOL(br_handle_frame_hook);