Magellan Linux

Annotation of /trunk/kernel26-xen/patches-2.6.25-r1/1022-2.6.25-xen-patch-2.6.21.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 609 - (hide annotations) (download)
Fri May 23 17:35:37 2008 UTC (16 years ago) by niro
File size: 123925 byte(s)
-using opensuse xen patchset, updated kernel configs

1 niro 609 From: www.kernel.org
2     Subject: Linux 2.6.21
3     Patch-mainline: 2.6.21
4    
5     Automatically created from "patches.kernel.org/patch-2.6.21" by xen-port-patches.py
6    
7     Acked-by: jbeulich@novell.com
8    
9     ---
10     arch/x86/Kconfig | 4
11     arch/x86/ia32/ia32entry-xen.S | 5
12     arch/x86/kernel/Makefile | 4
13     arch/x86/kernel/acpi/sleep_64-xen.c | 6
14     arch/x86/kernel/apic_32-xen.c | 65 ----
15     arch/x86/kernel/cpu/common-xen.c | 14
16     arch/x86/kernel/e820_32-xen.c | 18 -
17     arch/x86/kernel/e820_64-xen.c | 40 ++
18     arch/x86/kernel/entry_32-xen.S | 80 +++--
19     arch/x86/kernel/entry_64-xen.S | 3
20     arch/x86/kernel/genapic_64-xen.c | 4
21     arch/x86/kernel/head64-xen.c | 8
22     arch/x86/kernel/head_32-xen.S | 9
23     arch/x86/kernel/io_apic_32-xen.c | 43 +-
24     arch/x86/kernel/io_apic_64-xen.c | 413 +++++++++++++-------------
25     arch/x86/kernel/irq_32-xen.c | 22 +
26     arch/x86/kernel/irq_64-xen.c | 13
27     arch/x86/kernel/microcode-xen.c | 2
28     arch/x86/kernel/mpparse_32-xen.c | 4
29     arch/x86/kernel/mpparse_64-xen.c | 6
30     arch/x86/kernel/pci-dma_32-xen.c | 2
31     arch/x86/kernel/pci-swiotlb_64-xen.c | 2
32     arch/x86/kernel/pcspeaker.c | 5
33     arch/x86/kernel/process_32-xen.c | 42 +-
34     arch/x86/kernel/process_64-xen.c | 13
35     arch/x86/kernel/setup_32-xen.c | 46 --
36     arch/x86/kernel/setup_64-xen.c | 184 +----------
37     arch/x86/kernel/smp_32-xen.c | 5
38     arch/x86/kernel/time_32-xen.c | 275 +----------------
39     arch/x86/kernel/traps_32-xen.c | 27 +
40     arch/x86/kernel/vsyscall_64-xen.c | 127 ++++---
41     arch/x86/mm/fault_32-xen.c | 44 --
42     arch/x86/mm/fault_64-xen.c | 39 --
43     arch/x86/mm/highmem_32-xen.c | 9
44     arch/x86/mm/init_32-xen.c | 2
45     arch/x86/mm/init_64-xen.c | 24 +
46     arch/x86/mm/pageattr_64-xen.c | 6
47     arch/x86/mm/pgtable_32-xen.c | 28 +
48     drivers/char/tpm/tpm_xen.c | 5
49     drivers/xen/balloon/sysfs.c | 1
50     drivers/xen/blkback/xenbus.c | 4
51     drivers/xen/blkfront/blkfront.c | 1
52     drivers/xen/blktap/xenbus.c | 4
53     drivers/xen/core/evtchn.c | 4
54     drivers/xen/core/smpboot.c | 18 -
55     drivers/xen/fbfront/xenfb.c | 1
56     drivers/xen/fbfront/xenkbd.c | 1
57     drivers/xen/netback/xenbus.c | 4
58     drivers/xen/netfront/netfront.c | 49 +--
59     drivers/xen/pciback/xenbus.c | 1
60     drivers/xen/pcifront/xenbus.c | 1
61     drivers/xen/tpmback/common.h | 4
62     drivers/xen/tpmback/interface.c | 5
63     drivers/xen/tpmback/tpmback.c | 16 -
64     drivers/xen/tpmback/xenbus.c | 5
65     drivers/xen/xenbus/xenbus_probe.c | 17 -
66     drivers/xen/xenbus/xenbus_probe.h | 4
67     drivers/xen/xenbus/xenbus_probe_backend.c | 8
68     include/asm-x86/i8253.h | 4
69     include/asm-x86/mach-xen/asm/desc_32.h | 2
70     include/asm-x86/mach-xen/asm/dma-mapping_64.h | 4
71     include/asm-x86/mach-xen/asm/e820_64.h | 2
72     include/asm-x86/mach-xen/asm/hw_irq_64.h | 33 +-
73     include/asm-x86/mach-xen/asm/hypervisor.h | 2
74     include/asm-x86/mach-xen/asm/io_32.h | 6
75     include/asm-x86/mach-xen/asm/io_64.h | 8
76     include/asm-x86/mach-xen/asm/mmu_context_32.h | 10
77     include/asm-x86/mach-xen/asm/pgalloc_32.h | 21 +
78     include/asm-x86/mach-xen/asm/pgtable_32.h | 25 +
79     include/asm-x86/mach-xen/asm/pgtable_64.h | 9
80     include/asm-x86/mach-xen/asm/processor_32.h | 6
81     include/asm-x86/mach-xen/asm/segment_32.h | 23 +
82     include/asm-x86/mach-xen/asm/smp_32.h | 5
83     include/asm-x86/mach-xen/asm/smp_64.h | 3
84     include/xen/xenbus.h | 24 +
85     lib/swiotlb-xen.c | 19 -
86     76 files changed, 889 insertions(+), 1113 deletions(-)
87    
88     --- a/arch/x86/Kconfig
89     +++ b/arch/x86/Kconfig
90     @@ -48,13 +48,15 @@
91    
92     config CLOCKSOURCE_WATCHDOG
93     def_bool y
94     + depends on !X86_XEN
95    
96     config GENERIC_CLOCKEVENTS
97     def_bool y
98     + depends on !X86_XEN
99    
100     config GENERIC_CLOCKEVENTS_BROADCAST
101     def_bool y
102     - depends on X86_64 || (X86_32 && X86_LOCAL_APIC)
103     + depends on X86_64 || (X86_32 && X86_LOCAL_APIC && !X86_XEN)
104    
105     config LOCKDEP_SUPPORT
106     def_bool y
107     --- a/arch/x86/ia32/ia32entry-xen.S
108     +++ b/arch/x86/ia32/ia32entry-xen.S
109     @@ -465,7 +465,7 @@
110     .quad sys32_vm86_warning /* vm86old */
111     .quad compat_sys_wait4
112     .quad sys_swapoff /* 115 */
113     - .quad sys32_sysinfo
114     + .quad compat_sys_sysinfo
115     .quad sys32_ipc
116     .quad sys_fsync
117     .quad stub32_sigreturn
118     @@ -510,7 +510,7 @@
119     .quad sys_sched_yield
120     .quad sys_sched_get_priority_max
121     .quad sys_sched_get_priority_min /* 160 */
122     - .quad sys_sched_rr_get_interval
123     + .quad sys32_sched_rr_get_interval
124     .quad compat_sys_nanosleep
125     .quad sys_mremap
126     .quad sys_setresuid16
127     @@ -668,4 +668,5 @@
128     .quad compat_sys_vmsplice
129     .quad compat_sys_move_pages
130     .quad sys_getcpu
131     + .quad sys_epoll_pwait
132     ia32_syscall_end:
133     --- a/arch/x86/kernel/Makefile
134     +++ b/arch/x86/kernel/Makefile
135     @@ -104,6 +104,6 @@
136     pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o
137     endif
138    
139     -disabled-obj-$(CONFIG_XEN) := early-quirks.o i8253.o i8259_$(BITS).o reboot.o \
140     - smpboot_$(BITS).o tsc_$(BITS).o
141     +disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \
142     + smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o
143     %/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) :=
144     --- a/arch/x86/kernel/acpi/sleep_64-xen.c
145     +++ b/arch/x86/kernel/acpi/sleep_64-xen.c
146     @@ -59,7 +59,7 @@
147     unsigned long acpi_video_flags;
148     extern char wakeup_start, wakeup_end;
149    
150     -extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
151     +extern unsigned long acpi_copy_wakeup_routine(unsigned long);
152    
153     static pgd_t low_ptr;
154    
155     @@ -67,8 +67,10 @@
156     {
157     pgd_t *slot0 = pgd_offset(current->mm, 0UL);
158     low_ptr = *slot0;
159     + /* FIXME: We're playing with the current task's page tables here, which
160     + * is potentially dangerous on SMP systems.
161     + */
162     set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET));
163     - WARN_ON(num_online_cpus() != 1);
164     local_flush_tlb();
165     }
166     #endif
167     --- a/arch/x86/kernel/apic_32-xen.c
168     +++ b/arch/x86/kernel/apic_32-xen.c
169     @@ -25,6 +25,8 @@
170     #include <linux/kernel_stat.h>
171     #include <linux/sysdev.h>
172     #include <linux/cpu.h>
173     +#include <linux/clockchips.h>
174     +#include <linux/acpi_pmtmr.h>
175     #include <linux/module.h>
176    
177     #include <asm/atomic.h>
178     @@ -56,83 +58,26 @@
179     */
180    
181     /*
182     - * Debug level
183     + * Debug level, exported for io_apic.c
184     */
185     int apic_verbosity;
186    
187     #ifndef CONFIG_XEN
188     static int modern_apic(void)
189     {
190     - unsigned int lvr, version;
191     /* AMD systems use old APIC versions, so check the CPU */
192     if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
193     - boot_cpu_data.x86 >= 0xf)
194     + boot_cpu_data.x86 >= 0xf)
195     return 1;
196     - lvr = apic_read(APIC_LVR);
197     - version = GET_APIC_VERSION(lvr);
198     - return version >= 0x14;
199     + return lapic_get_version() >= 0x14;
200     }
201     #endif /* !CONFIG_XEN */
202    
203     -/*
204     - * 'what should we do if we get a hw irq event on an illegal vector'.
205     - * each architecture has to answer this themselves.
206     - */
207     -void ack_bad_irq(unsigned int irq)
208     -{
209     - printk("unexpected IRQ trap at vector %02x\n", irq);
210     - /*
211     - * Currently unexpected vectors happen only on SMP and APIC.
212     - * We _must_ ack these because every local APIC has only N
213     - * irq slots per priority level, and a 'hanging, unacked' IRQ
214     - * holds up an irq slot - in excessive cases (when multiple
215     - * unexpected vectors occur) that might lock up the APIC
216     - * completely.
217     - * But only ack when the APIC is enabled -AK
218     - */
219     - if (cpu_has_apic)
220     - ack_APIC_irq();
221     -}
222     -
223     int get_physical_broadcast(void)
224     {
225     return 0xff;
226     }
227    
228     -#ifndef CONFIG_XEN
229     -#ifndef CONFIG_SMP
230     -static void up_apic_timer_interrupt_call(void)
231     -{
232     - int cpu = smp_processor_id();
233     -
234     - /*
235     - * the NMI deadlock-detector uses this.
236     - */
237     - per_cpu(irq_stat, cpu).apic_timer_irqs++;
238     -
239     - smp_local_timer_interrupt();
240     -}
241     -#endif
242     -
243     -void smp_send_timer_broadcast_ipi(void)
244     -{
245     - cpumask_t mask;
246     -
247     - cpus_and(mask, cpu_online_map, timer_bcast_ipi);
248     - if (!cpus_empty(mask)) {
249     -#ifdef CONFIG_SMP
250     - send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
251     -#else
252     - /*
253     - * We can directly call the apic timer interrupt handler
254     - * in UP case. Minus all irq related functions
255     - */
256     - up_apic_timer_interrupt_call();
257     -#endif
258     - }
259     -}
260     -#endif
261     -
262     int setup_profiling_timer(unsigned int multiplier)
263     {
264     return -EINVAL;
265     --- a/arch/x86/kernel/cpu/common-xen.c
266     +++ b/arch/x86/kernel/cpu/common-xen.c
267     @@ -610,7 +610,7 @@
268     struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
269     {
270     memset(regs, 0, sizeof(struct pt_regs));
271     - regs->xgs = __KERNEL_PDA;
272     + regs->xfs = __KERNEL_PDA;
273     return regs;
274     }
275    
276     @@ -667,12 +667,12 @@
277     .pcurrent = &init_task,
278     };
279    
280     -static inline void set_kernel_gs(void)
281     +static inline void set_kernel_fs(void)
282     {
283     - /* Set %gs for this CPU's PDA. Memory clobber is to create a
284     + /* Set %fs for this CPU's PDA. Memory clobber is to create a
285     barrier with respect to any PDA operations, so the compiler
286     doesn't move any before here. */
287     - asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory");
288     + asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory");
289     }
290    
291     /* Initialize the CPU's GDT and PDA. The boot CPU does this for
292     @@ -730,7 +730,7 @@
293     }
294     BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8));
295    
296     - set_kernel_gs();
297     + set_kernel_fs();
298     }
299    
300     /* Common CPU init for both boot and secondary CPUs */
301     @@ -775,8 +775,8 @@
302     __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
303     #endif
304    
305     - /* Clear %fs. */
306     - asm volatile ("mov %0, %%fs" : : "r" (0));
307     + /* Clear %gs. */
308     + asm volatile ("mov %0, %%gs" : : "r" (0));
309    
310     /* Clear all 6 debug registers: */
311     set_debugreg(0, 0);
312     --- a/arch/x86/kernel/e820_32-xen.c
313     +++ b/arch/x86/kernel/e820_32-xen.c
314     @@ -14,6 +14,7 @@
315     #include <asm/pgtable.h>
316     #include <asm/page.h>
317     #include <asm/e820.h>
318     +#include <asm/setup.h>
319     #include <xen/interface/memory.h>
320    
321     #ifdef CONFIG_EFI
322     @@ -157,21 +158,22 @@
323     .flags = IORESOURCE_BUSY | IORESOURCE_IO
324     } };
325    
326     -static int romsignature(const unsigned char *x)
327     +#define ROMSIGNATURE 0xaa55
328     +
329     +static int __init romsignature(const unsigned char *rom)
330     {
331     unsigned short sig;
332     - int ret = 0;
333     - if (probe_kernel_address((const unsigned short *)x, sig) == 0)
334     - ret = (sig == 0xaa55);
335     - return ret;
336     +
337     + return probe_kernel_address((const unsigned short *)rom, sig) == 0 &&
338     + sig == ROMSIGNATURE;
339     }
340    
341     static int __init romchecksum(unsigned char *rom, unsigned long length)
342     {
343     - unsigned char *p, sum = 0;
344     + unsigned char sum;
345    
346     - for (p = rom; p < rom + length; p++)
347     - sum += *p;
348     + for (sum = 0; length; length--)
349     + sum += *rom++;
350     return sum == 0;
351     }
352    
353     --- a/arch/x86/kernel/e820_64-xen.c
354     +++ b/arch/x86/kernel/e820_64-xen.c
355     @@ -88,6 +88,13 @@
356     return 1;
357     }
358    
359     +#ifdef CONFIG_NUMA
360     + /* NUMA memory to node map */
361     + if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) {
362     + *addrp = nodemap_addr + nodemap_size;
363     + return 1;
364     + }
365     +#endif
366     /* XXX ramdisk image here? */
367     #else
368     if (last < (table_end<<PAGE_SHIFT)) {
369     @@ -215,6 +222,37 @@
370     }
371    
372     /*
373     + * Find the hole size in the range.
374     + */
375     +unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
376     +{
377     + unsigned long ram = 0;
378     + int i;
379     +
380     + for (i = 0; i < e820.nr_map; i++) {
381     + struct e820entry *ei = &e820.map[i];
382     + unsigned long last, addr;
383     +
384     + if (ei->type != E820_RAM ||
385     + ei->addr+ei->size <= start ||
386     + ei->addr >= end)
387     + continue;
388     +
389     + addr = round_up(ei->addr, PAGE_SIZE);
390     + if (addr < start)
391     + addr = start;
392     +
393     + last = round_down(ei->addr + ei->size, PAGE_SIZE);
394     + if (last >= end)
395     + last = end;
396     +
397     + if (last > addr)
398     + ram += last - addr;
399     + }
400     + return ((end - start) - ram);
401     +}
402     +
403     +/*
404     * Mark e820 reserved areas as busy for the resource manager.
405     */
406     void __init e820_reserve_resources(struct e820entry *e820, int nr_map)
407     @@ -725,7 +763,7 @@
408     }
409     early_param("memmap", parse_memmap_opt);
410    
411     -void finish_e820_parsing(void)
412     +void __init finish_e820_parsing(void)
413     {
414     if (userdef) {
415     printk(KERN_INFO "user-defined physical RAM map:\n");
416     --- a/arch/x86/kernel/entry_32-xen.S
417     +++ b/arch/x86/kernel/entry_32-xen.S
418     @@ -30,7 +30,7 @@
419     * 18(%esp) - %eax
420     * 1C(%esp) - %ds
421     * 20(%esp) - %es
422     - * 24(%esp) - %gs
423     + * 24(%esp) - %fs
424     * 28(%esp) - orig_eax
425     * 2C(%esp) - %eip
426     * 30(%esp) - %cs
427     @@ -102,9 +102,9 @@
428    
429     #define SAVE_ALL \
430     cld; \
431     - pushl %gs; \
432     + pushl %fs; \
433     CFI_ADJUST_CFA_OFFSET 4;\
434     - /*CFI_REL_OFFSET gs, 0;*/\
435     + /*CFI_REL_OFFSET fs, 0;*/\
436     pushl %es; \
437     CFI_ADJUST_CFA_OFFSET 4;\
438     /*CFI_REL_OFFSET es, 0;*/\
439     @@ -136,7 +136,7 @@
440     movl %edx, %ds; \
441     movl %edx, %es; \
442     movl $(__KERNEL_PDA), %edx; \
443     - movl %edx, %gs
444     + movl %edx, %fs
445    
446     #define RESTORE_INT_REGS \
447     popl %ebx; \
448     @@ -169,9 +169,9 @@
449     2: popl %es; \
450     CFI_ADJUST_CFA_OFFSET -4;\
451     /*CFI_RESTORE es;*/\
452     -3: popl %gs; \
453     +3: popl %fs; \
454     CFI_ADJUST_CFA_OFFSET -4;\
455     - /*CFI_RESTORE gs;*/\
456     + /*CFI_RESTORE fs;*/\
457     .pushsection .fixup,"ax"; \
458     4: movl $0,(%esp); \
459     jmp 1b; \
460     @@ -230,6 +230,7 @@
461     CFI_ADJUST_CFA_OFFSET -4
462     jmp syscall_exit
463     CFI_ENDPROC
464     +END(ret_from_fork)
465    
466     /*
467     * Return to user mode is not as complex as all this looks,
468     @@ -261,6 +262,7 @@
469     # int/exception return?
470     jne work_pending
471     jmp restore_all
472     +END(ret_from_exception)
473    
474     #ifdef CONFIG_PREEMPT
475     ENTRY(resume_kernel)
476     @@ -275,6 +277,7 @@
477     jz restore_all
478     call preempt_schedule_irq
479     jmp need_resched
480     +END(resume_kernel)
481     #endif
482     CFI_ENDPROC
483    
484     @@ -352,16 +355,17 @@
485     movl PT_OLDESP(%esp), %ecx
486     xorl %ebp,%ebp
487     TRACE_IRQS_ON
488     -1: mov PT_GS(%esp), %gs
489     +1: mov PT_FS(%esp), %fs
490     ENABLE_INTERRUPTS_SYSEXIT
491     CFI_ENDPROC
492     .pushsection .fixup,"ax"
493     -2: movl $0,PT_GS(%esp)
494     +2: movl $0,PT_FS(%esp)
495     jmp 1b
496     .section __ex_table,"a"
497     .align 4
498     .long 1b,2b
499     .popsection
500     +ENDPROC(sysenter_entry)
501    
502     # pv sysenter call handler stub
503     ENTRY(sysenter_entry_pv)
504     @@ -533,6 +537,7 @@
505     jmp hypercall_page + (__HYPERVISOR_iret * 32)
506     #endif
507     CFI_ENDPROC
508     +ENDPROC(system_call)
509    
510     # perform work that needs to be done immediately before resumption
511     ALIGN
512     @@ -578,6 +583,7 @@
513     xorl %edx, %edx
514     call do_notify_resume
515     jmp resume_userspace_sig
516     +END(work_pending)
517    
518     # perform syscall exit tracing
519     ALIGN
520     @@ -593,6 +599,7 @@
521     cmpl $(nr_syscalls), %eax
522     jnae syscall_call
523     jmp syscall_exit
524     +END(syscall_trace_entry)
525    
526     # perform syscall exit tracing
527     ALIGN
528     @@ -606,6 +613,7 @@
529     movl $1, %edx
530     call do_syscall_trace
531     jmp resume_userspace
532     +END(syscall_exit_work)
533     CFI_ENDPROC
534    
535     RING0_INT_FRAME # can't unwind into user space anyway
536     @@ -616,16 +624,18 @@
537     GET_THREAD_INFO(%ebp)
538     movl $-EFAULT,PT_EAX(%esp)
539     jmp resume_userspace
540     +END(syscall_fault)
541    
542     syscall_badsys:
543     movl $-ENOSYS,PT_EAX(%esp)
544     jmp resume_userspace
545     +END(syscall_badsys)
546     CFI_ENDPROC
547    
548     #ifndef CONFIG_XEN
549     #define FIXUP_ESPFIX_STACK \
550     /* since we are on a wrong stack, we cant make it a C code :( */ \
551     - movl %gs:PDA_cpu, %ebx; \
552     + movl %fs:PDA_cpu, %ebx; \
553     PER_CPU(cpu_gdt_descr, %ebx); \
554     movl GDS_address(%ebx), %ebx; \
555     GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
556     @@ -656,9 +666,9 @@
557     ENTRY(interrupt)
558     .text
559    
560     -vector=0
561     ENTRY(irq_entries_start)
562     RING0_INT_FRAME
563     +vector=0
564     .rept NR_IRQS
565     ALIGN
566     .if vector
567     @@ -667,11 +677,16 @@
568     1: pushl $~(vector)
569     CFI_ADJUST_CFA_OFFSET 4
570     jmp common_interrupt
571     -.data
572     + .previous
573     .long 1b
574     -.text
575     + .text
576     vector=vector+1
577     .endr
578     +END(irq_entries_start)
579     +
580     +.previous
581     +END(interrupt)
582     +.previous
583    
584     /*
585     * the CPU automatically disables interrupts when executing an IRQ vector,
586     @@ -684,6 +699,7 @@
587     movl %esp,%eax
588     call do_IRQ
589     jmp ret_from_intr
590     +ENDPROC(common_interrupt)
591     CFI_ENDPROC
592    
593     #define BUILD_INTERRUPT(name, nr) \
594     @@ -696,10 +712,16 @@
595     movl %esp,%eax; \
596     call smp_/**/name; \
597     jmp ret_from_intr; \
598     - CFI_ENDPROC
599     + CFI_ENDPROC; \
600     +ENDPROC(name)
601    
602     /* The include is where all of the SMP etc. interrupts come from */
603     #include "entry_arch.h"
604     +
605     +/* This alternate entry is needed because we hijack the apic LVTT */
606     +#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC)
607     +BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR)
608     +#endif
609     #else
610     #define UNWIND_ESPFIX_STACK
611     #endif
612     @@ -710,7 +732,7 @@
613     CFI_ADJUST_CFA_OFFSET 4
614     ALIGN
615     error_code:
616     - /* the function address is in %gs's slot on the stack */
617     + /* the function address is in %fs's slot on the stack */
618     pushl %es
619     CFI_ADJUST_CFA_OFFSET 4
620     /*CFI_REL_OFFSET es, 0*/
621     @@ -739,20 +761,20 @@
622     CFI_ADJUST_CFA_OFFSET 4
623     CFI_REL_OFFSET ebx, 0
624     cld
625     - pushl %gs
626     + pushl %fs
627     CFI_ADJUST_CFA_OFFSET 4
628     - /*CFI_REL_OFFSET gs, 0*/
629     + /*CFI_REL_OFFSET fs, 0*/
630     movl $(__KERNEL_PDA), %ecx
631     - movl %ecx, %gs
632     + movl %ecx, %fs
633     UNWIND_ESPFIX_STACK
634     popl %ecx
635     CFI_ADJUST_CFA_OFFSET -4
636     /*CFI_REGISTER es, ecx*/
637     - movl PT_GS(%esp), %edi # get the function address
638     + movl PT_FS(%esp), %edi # get the function address
639     movl PT_ORIG_EAX(%esp), %edx # get the error code
640     movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
641     - mov %ecx, PT_GS(%esp)
642     - /*CFI_REL_OFFSET gs, ES*/
643     + mov %ecx, PT_FS(%esp)
644     + /*CFI_REL_OFFSET fs, ES*/
645     movl $(__USER_DS), %ecx
646     movl %ecx, %ds
647     movl %ecx, %es
648     @@ -839,7 +861,7 @@
649     .byte 0x18 # pop %eax
650     .byte 0x1c # pop %ds
651     .byte 0x20 # pop %es
652     - .byte 0x24,0x24 # pop %gs
653     + .byte 0x24,0x24 # pop %fs
654     .byte 0x28,0x28,0x28 # add $4,%esp
655     .byte 0x2c # iret
656     .byte 0xff,0xff,0xff,0xff # movb $1,1(%esi)
657     @@ -905,6 +927,7 @@
658     CFI_ADJUST_CFA_OFFSET 4
659     jmp error_code
660     CFI_ENDPROC
661     +END(coprocessor_error)
662    
663     ENTRY(simd_coprocessor_error)
664     RING0_INT_FRAME
665     @@ -914,6 +937,7 @@
666     CFI_ADJUST_CFA_OFFSET 4
667     jmp error_code
668     CFI_ENDPROC
669     +END(simd_coprocessor_error)
670    
671     ENTRY(device_not_available)
672     RING0_INT_FRAME
673     @@ -936,6 +960,7 @@
674     call math_state_restore
675     jmp ret_from_exception
676     CFI_ENDPROC
677     +END(device_not_available)
678    
679     #ifndef CONFIG_XEN
680     /*
681     @@ -1097,10 +1122,12 @@
682     .align 4
683     .long 1b,iret_exc
684     .previous
685     +END(native_iret)
686    
687     ENTRY(native_irq_enable_sysexit)
688     sti
689     sysexit
690     +END(native_irq_enable_sysexit)
691     #endif
692    
693     KPROBE_ENTRY(int3)
694     @@ -1123,6 +1150,7 @@
695     CFI_ADJUST_CFA_OFFSET 4
696     jmp error_code
697     CFI_ENDPROC
698     +END(overflow)
699    
700     ENTRY(bounds)
701     RING0_INT_FRAME
702     @@ -1132,6 +1160,7 @@
703     CFI_ADJUST_CFA_OFFSET 4
704     jmp error_code
705     CFI_ENDPROC
706     +END(bounds)
707    
708     ENTRY(invalid_op)
709     RING0_INT_FRAME
710     @@ -1141,6 +1170,7 @@
711     CFI_ADJUST_CFA_OFFSET 4
712     jmp error_code
713     CFI_ENDPROC
714     +END(invalid_op)
715    
716     ENTRY(coprocessor_segment_overrun)
717     RING0_INT_FRAME
718     @@ -1150,6 +1180,7 @@
719     CFI_ADJUST_CFA_OFFSET 4
720     jmp error_code
721     CFI_ENDPROC
722     +END(coprocessor_segment_overrun)
723    
724     ENTRY(invalid_TSS)
725     RING0_EC_FRAME
726     @@ -1157,6 +1188,7 @@
727     CFI_ADJUST_CFA_OFFSET 4
728     jmp error_code
729     CFI_ENDPROC
730     +END(invalid_TSS)
731    
732     ENTRY(segment_not_present)
733     RING0_EC_FRAME
734     @@ -1164,6 +1196,7 @@
735     CFI_ADJUST_CFA_OFFSET 4
736     jmp error_code
737     CFI_ENDPROC
738     +END(segment_not_present)
739    
740     ENTRY(stack_segment)
741     RING0_EC_FRAME
742     @@ -1171,6 +1204,7 @@
743     CFI_ADJUST_CFA_OFFSET 4
744     jmp error_code
745     CFI_ENDPROC
746     +END(stack_segment)
747    
748     KPROBE_ENTRY(general_protection)
749     RING0_EC_FRAME
750     @@ -1186,6 +1220,7 @@
751     CFI_ADJUST_CFA_OFFSET 4
752     jmp error_code
753     CFI_ENDPROC
754     +END(alignment_check)
755    
756     ENTRY(divide_error)
757     RING0_INT_FRAME
758     @@ -1195,6 +1230,7 @@
759     CFI_ADJUST_CFA_OFFSET 4
760     jmp error_code
761     CFI_ENDPROC
762     +END(divide_error)
763    
764     #ifdef CONFIG_X86_MCE
765     ENTRY(machine_check)
766     @@ -1205,6 +1241,7 @@
767     CFI_ADJUST_CFA_OFFSET 4
768     jmp error_code
769     CFI_ENDPROC
770     +END(machine_check)
771     #endif
772    
773     #ifndef CONFIG_XEN
774     @@ -1224,6 +1261,7 @@
775     CFI_ADJUST_CFA_OFFSET 4
776     jmp error_code
777     CFI_ENDPROC
778     +END(spurious_interrupt_bug)
779    
780     ENTRY(kernel_thread_helper)
781     pushl $0 # fake return address for unwinder
782     --- a/arch/x86/kernel/entry_64-xen.S
783     +++ b/arch/x86/kernel/entry_64-xen.S
784     @@ -629,6 +629,9 @@
785     ENTRY(call_function_interrupt)
786     apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
787     END(call_function_interrupt)
788     +ENTRY(irq_move_cleanup_interrupt)
789     + apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
790     +END(irq_move_cleanup_interrupt)
791     #endif
792    
793     ENTRY(apic_timer_interrupt)
794     --- a/arch/x86/kernel/genapic_64-xen.c
795     +++ b/arch/x86/kernel/genapic_64-xen.c
796     @@ -65,8 +65,8 @@
797     * Some x86_64 machines use physical APIC mode regardless of how many
798     * procs/clusters are present (x86_64 ES7000 is an example).
799     */
800     - if (acpi_fadt.revision > FADT2_REVISION_ID)
801     - if (acpi_fadt.force_apic_physical_destination_mode) {
802     + if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID)
803     + if (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) {
804     genapic = &apic_cluster;
805     goto print;
806     }
807     --- a/arch/x86/kernel/head64-xen.c
808     +++ b/arch/x86/kernel/head64-xen.c
809     @@ -42,8 +42,6 @@
810     #define OLD_CL_BASE_ADDR 0x90000
811     #define OLD_CL_OFFSET 0x90022
812    
813     -extern char saved_command_line[];
814     -
815     static void __init copy_bootdata(char *real_mode_data)
816     {
817     #ifndef CONFIG_XEN
818     @@ -59,14 +57,14 @@
819     new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
820     }
821     command_line = (char *) ((u64)(new_data));
822     - memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
823     + memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
824     #else
825     int max_cmdline;
826    
827     if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
828     max_cmdline = COMMAND_LINE_SIZE;
829     - memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
830     - saved_command_line[max_cmdline-1] = '\0';
831     + memcpy(boot_command_line, xen_start_info->cmd_line, max_cmdline);
832     + boot_command_line[max_cmdline-1] = '\0';
833     #endif
834     }
835    
836     --- a/arch/x86/kernel/head_32-xen.S
837     +++ b/arch/x86/kernel/head_32-xen.S
838     @@ -27,6 +27,7 @@
839     #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
840     #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id
841    
842     +.section .text.head,"ax",@progbits
843     #define VIRT_ENTRY_OFFSET 0x0
844     .org VIRT_ENTRY_OFFSET
845     ENTRY(startup_32)
846     @@ -60,11 +61,11 @@
847    
848     movb $1,X86_HARD_MATH
849    
850     - xorl %eax,%eax # Clear FS
851     - movl %eax,%fs
852     + xorl %eax,%eax # Clear GS
853     + movl %eax,%gs
854    
855     movl $(__KERNEL_PDA),%eax
856     - mov %eax,%gs
857     + mov %eax,%fs
858    
859     cld # gcc2 wants the direction flag cleared at all times
860    
861     @@ -75,7 +76,7 @@
862     * Point the GDT at this CPU's PDA. This will be
863     * cpu_gdt_table and boot_pda.
864     */
865     -setup_pda:
866     +ENTRY(setup_pda)
867     /* get the PDA pointer */
868     movl $boot_pda, %eax
869    
870     --- a/arch/x86/kernel/io_apic_32-xen.c
871     +++ b/arch/x86/kernel/io_apic_32-xen.c
872     @@ -164,7 +164,7 @@
873     */
874     static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
875     {
876     - volatile struct io_apic *io_apic = io_apic_base(apic);
877     + volatile struct io_apic __iomem *io_apic = io_apic_base(apic);
878     if (sis_apic_bug)
879     writel(reg, &io_apic->index);
880     writel(value, &io_apic->data);
881     @@ -387,7 +387,7 @@
882     break;
883     entry = irq_2_pin + entry->next;
884     }
885     - set_native_irq_info(irq, cpumask);
886     + irq_desc[irq].affinity = cpumask;
887     spin_unlock_irqrestore(&ioapic_lock, flags);
888     }
889    
890     @@ -526,8 +526,8 @@
891     package_index = CPU_TO_PACKAGEINDEX(i);
892     for (j = 0; j < NR_IRQS; j++) {
893     unsigned long value_now, delta;
894     - /* Is this an active IRQ? */
895     - if (!irq_desc[j].action)
896     + /* Is this an active IRQ or balancing disabled ? */
897     + if (!irq_desc[j].action || irq_balancing_disabled(j))
898     continue;
899     if ( package_index == i )
900     IRQ_DELTA(package_index,j) = 0;
901     @@ -780,7 +780,7 @@
902     return 0;
903     }
904    
905     -int __init irqbalance_disable(char *str)
906     +int __devinit irqbalance_disable(char *str)
907     {
908     irqbalance_disabled = 1;
909     return 1;
910     @@ -1319,11 +1319,9 @@
911     trigger == IOAPIC_LEVEL)
912     set_irq_chip_and_handler_name(irq, &ioapic_chip,
913     handle_fasteoi_irq, "fasteoi");
914     - else {
915     - irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
916     + else
917     set_irq_chip_and_handler_name(irq, &ioapic_chip,
918     handle_edge_irq, "edge");
919     - }
920     set_intr_gate(vector, interrupt[irq]);
921     }
922     #else
923     @@ -1397,7 +1395,6 @@
924     }
925     spin_lock_irqsave(&ioapic_lock, flags);
926     __ioapic_write_entry(apic, pin, entry);
927     - set_native_irq_info(irq, TARGET_CPUS);
928     spin_unlock_irqrestore(&ioapic_lock, flags);
929     }
930     }
931     @@ -1628,7 +1625,7 @@
932     v = apic_read(APIC_LVR);
933     printk(KERN_INFO "... APIC VERSION: %08x\n", v);
934     ver = GET_APIC_VERSION(v);
935     - maxlvt = get_maxlvt();
936     + maxlvt = lapic_get_maxlvt();
937    
938     v = apic_read(APIC_TASKPRI);
939     printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
940     @@ -1962,7 +1959,7 @@
941     #endif
942    
943     #ifndef CONFIG_XEN
944     -static int no_timer_check __initdata;
945     +int no_timer_check __initdata;
946    
947     static int __init notimercheck(char *s)
948     {
949     @@ -2355,7 +2352,7 @@
950    
951     disable_8259A_irq(0);
952     set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
953     - "fasteio");
954     + "fasteoi");
955     apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
956     enable_8259A_irq(0);
957    
958     @@ -2646,7 +2643,7 @@
959     msg.address_lo |= MSI_ADDR_DEST_ID(dest);
960    
961     write_msi_msg(irq, &msg);
962     - set_native_irq_info(irq, mask);
963     + irq_desc[irq].affinity = mask;
964     }
965     #endif /* CONFIG_SMP */
966    
967     @@ -2665,25 +2662,32 @@
968     .retrigger = ioapic_retrigger_irq,
969     };
970    
971     -int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
972     +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
973     {
974     struct msi_msg msg;
975     - int ret;
976     + int irq, ret;
977     + irq = create_irq();
978     + if (irq < 0)
979     + return irq;
980     +
981     + set_irq_msi(irq, desc);
982     ret = msi_compose_msg(dev, irq, &msg);
983     - if (ret < 0)
984     + if (ret < 0) {
985     + destroy_irq(irq);
986     return ret;
987     + }
988    
989     write_msi_msg(irq, &msg);
990    
991     set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
992     "edge");
993    
994     - return 0;
995     + return irq;
996     }
997    
998     void arch_teardown_msi_irq(unsigned int irq)
999     {
1000     - return;
1001     + destroy_irq(irq);
1002     }
1003    
1004     #endif /* CONFIG_PCI_MSI */
1005     @@ -2723,7 +2727,7 @@
1006     dest = cpu_mask_to_apicid(mask);
1007    
1008     target_ht_irq(irq, dest);
1009     - set_native_irq_info(irq, mask);
1010     + irq_desc[irq].affinity = mask;
1011     }
1012     #endif
1013    
1014     @@ -2931,7 +2935,6 @@
1015    
1016     spin_lock_irqsave(&ioapic_lock, flags);
1017     __ioapic_write_entry(ioapic, pin, entry);
1018     - set_native_irq_info(irq, TARGET_CPUS);
1019     spin_unlock_irqrestore(&ioapic_lock, flags);
1020    
1021     return 0;
1022     --- a/arch/x86/kernel/io_apic_64-xen.c
1023     +++ b/arch/x86/kernel/io_apic_64-xen.c
1024     @@ -36,6 +36,7 @@
1025     #include <acpi/acpi_bus.h>
1026     #endif
1027    
1028     +#include <asm/idle.h>
1029     #include <asm/io.h>
1030     #include <asm/smp.h>
1031     #include <asm/desc.h>
1032     @@ -47,7 +48,20 @@
1033     #include <asm/msidef.h>
1034     #include <asm/hypertransport.h>
1035    
1036     -static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result);
1037     +struct irq_cfg {
1038     +#ifndef CONFIG_XEN
1039     + cpumask_t domain;
1040     + cpumask_t old_domain;
1041     +#endif
1042     + unsigned move_cleanup_count;
1043     + u8 vector;
1044     + u8 move_in_progress : 1;
1045     +};
1046     +
1047     +/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
1048     +struct irq_cfg irq_cfg[NR_IRQS] __read_mostly;
1049     +
1050     +static int assign_irq_vector(int irq, cpumask_t mask);
1051    
1052     #define __apicdebuginit __init
1053    
1054     @@ -88,7 +102,7 @@
1055     * Rough estimation of how many shared IRQs there are, can
1056     * be changed anytime.
1057     */
1058     -#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS
1059     +#define MAX_PLUS_SHARED_IRQS NR_IRQS
1060     #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
1061    
1062     /*
1063     @@ -259,21 +273,19 @@
1064    
1065     static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
1066     {
1067     + struct irq_cfg *cfg = irq_cfg + irq;
1068     unsigned long flags;
1069     unsigned int dest;
1070     cpumask_t tmp;
1071     - int vector;
1072    
1073     cpus_and(tmp, mask, cpu_online_map);
1074     if (cpus_empty(tmp))
1075     - tmp = TARGET_CPUS;
1076     -
1077     - cpus_and(mask, tmp, CPU_MASK_ALL);
1078     + return;
1079    
1080     - vector = assign_irq_vector(irq, mask, &tmp);
1081     - if (vector < 0)
1082     + if (assign_irq_vector(irq, mask))
1083     return;
1084    
1085     + cpus_and(tmp, cfg->domain, mask);
1086     dest = cpu_mask_to_apicid(tmp);
1087    
1088     /*
1089     @@ -282,8 +294,8 @@
1090     dest = SET_APIC_LOGICAL_ID(dest);
1091    
1092     spin_lock_irqsave(&ioapic_lock, flags);
1093     - __target_IO_APIC_irq(irq, dest, vector);
1094     - set_native_irq_info(irq, mask);
1095     + __target_IO_APIC_irq(irq, dest, cfg->vector);
1096     + irq_desc[irq].affinity = mask;
1097     spin_unlock_irqrestore(&ioapic_lock, flags);
1098     }
1099     #endif
1100     @@ -329,11 +341,11 @@
1101     reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
1102     reg ACTION; \
1103     io_apic_modify(entry->apic, reg); \
1104     + FINAL; \
1105     if (!entry->next) \
1106     break; \
1107     entry = irq_2_pin + entry->next; \
1108     } \
1109     - FINAL; \
1110     }
1111    
1112     #define DO_ACTION(name,R,ACTION, FINAL) \
1113     @@ -666,74 +678,58 @@
1114     return irq;
1115     }
1116    
1117     -static inline int IO_APIC_irq_trigger(int irq)
1118     -{
1119     - int apic, idx, pin;
1120     -
1121     - for (apic = 0; apic < nr_ioapics; apic++) {
1122     - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1123     - idx = find_irq_entry(apic,pin,mp_INT);
1124     - if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
1125     - return irq_trigger(idx);
1126     - }
1127     - }
1128     - /*
1129     - * nonexistent IRQs are edge default
1130     - */
1131     - return 0;
1132     -}
1133     -
1134     -/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
1135     -static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
1136     -
1137     -static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
1138     +static int __assign_irq_vector(int irq, cpumask_t mask)
1139     {
1140     - int vector;
1141     struct physdev_irq irq_op;
1142     + struct irq_cfg *cfg;
1143    
1144     - BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
1145     + BUG_ON((unsigned)irq >= NR_IRQS);
1146     + cfg = &irq_cfg[irq];
1147    
1148     - cpus_and(*result, mask, cpu_online_map);
1149     + if ((cfg->move_in_progress) || cfg->move_cleanup_count)
1150     + return -EBUSY;
1151    
1152     - if (irq_vector[irq] > 0)
1153     - return irq_vector[irq];
1154     + if (cfg->vector)
1155     + return 0;
1156    
1157     irq_op.irq = irq;
1158     if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
1159     return -ENOSPC;
1160    
1161     - vector = irq_op.vector;
1162     - irq_vector[irq] = vector;
1163     + cfg->vector = irq_op.vector;
1164    
1165     - return vector;
1166     + return 0;
1167     }
1168    
1169     -static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
1170     +static int assign_irq_vector(int irq, cpumask_t mask)
1171     {
1172     - int vector;
1173     + int err;
1174     unsigned long flags;
1175    
1176     spin_lock_irqsave(&vector_lock, flags);
1177     - vector = __assign_irq_vector(irq, mask, result);
1178     + err = __assign_irq_vector(irq, mask);
1179     spin_unlock_irqrestore(&vector_lock, flags);
1180     - return vector;
1181     + return err;
1182     }
1183    
1184     #ifndef CONFIG_XEN
1185     static void __clear_irq_vector(int irq)
1186     {
1187     + struct irq_cfg *cfg;
1188     cpumask_t mask;
1189     int cpu, vector;
1190    
1191     - BUG_ON(!irq_vector[irq]);
1192     + BUG_ON((unsigned)irq >= NR_IRQS);
1193     + cfg = &irq_cfg[irq];
1194     + BUG_ON(!cfg->vector);
1195    
1196     - vector = irq_vector[irq];
1197     - cpus_and(mask, irq_domain[irq], cpu_online_map);
1198     + vector = cfg->vector;
1199     + cpus_and(mask, cfg->domain, cpu_online_map);
1200     for_each_cpu_mask(cpu, mask)
1201     per_cpu(vector_irq, cpu)[vector] = -1;
1202    
1203     - irq_vector[irq] = 0;
1204     - irq_domain[irq] = CPU_MASK_NONE;
1205     + cfg->vector = 0;
1206     + cfg->domain = CPU_MASK_NONE;
1207     }
1208    
1209     void __setup_vector_irq(int cpu)
1210     @@ -743,10 +739,10 @@
1211     int irq, vector;
1212    
1213     /* Mark the inuse vectors */
1214     - for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) {
1215     - if (!cpu_isset(cpu, irq_domain[irq]))
1216     + for (irq = 0; irq < NR_IRQS; ++irq) {
1217     + if (!cpu_isset(cpu, irq_cfg[irq].domain))
1218     continue;
1219     - vector = irq_vector[irq];
1220     + vector = irq_cfg[irq].vector;
1221     per_cpu(vector_irq, cpu)[vector] = irq;
1222     }
1223     /* Mark the free vectors */
1224     @@ -754,41 +750,49 @@
1225     irq = per_cpu(vector_irq, cpu)[vector];
1226     if (irq < 0)
1227     continue;
1228     - if (!cpu_isset(cpu, irq_domain[irq]))
1229     + if (!cpu_isset(cpu, irq_cfg[irq].domain))
1230     per_cpu(vector_irq, cpu)[vector] = -1;
1231     }
1232     }
1233    
1234     -extern void (*interrupt[NR_IRQS])(void);
1235     -
1236     static struct irq_chip ioapic_chip;
1237    
1238     -#define IOAPIC_AUTO -1
1239     -#define IOAPIC_EDGE 0
1240     -#define IOAPIC_LEVEL 1
1241     -
1242     -static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
1243     +static void ioapic_register_intr(int irq, unsigned long trigger)
1244     {
1245     - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1246     - trigger == IOAPIC_LEVEL)
1247     + if (trigger)
1248     set_irq_chip_and_handler_name(irq, &ioapic_chip,
1249     handle_fasteoi_irq, "fasteoi");
1250     - else {
1251     - irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
1252     + else
1253     set_irq_chip_and_handler_name(irq, &ioapic_chip,
1254     handle_edge_irq, "edge");
1255     - }
1256     }
1257     #else
1258     -#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
1259     +#define ioapic_register_intr(irq,trigger) ((void)0)
1260     #endif /* !CONFIG_XEN */
1261    
1262     -static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq)
1263     +static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
1264     + int trigger, int polarity)
1265     {
1266     + struct irq_cfg *cfg = irq_cfg + irq;
1267     struct IO_APIC_route_entry entry;
1268     - int vector;
1269     - unsigned long flags;
1270     + cpumask_t mask;
1271    
1272     + if (!IO_APIC_IRQ(irq))
1273     + return;
1274     +
1275     + mask = TARGET_CPUS;
1276     + if (assign_irq_vector(irq, mask))
1277     + return;
1278     +
1279     +#ifndef CONFIG_XEN
1280     + cpus_and(mask, cfg->domain, mask);
1281     +#endif
1282     +
1283     + apic_printk(APIC_VERBOSE,KERN_DEBUG
1284     + "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
1285     + "IRQ %d Mode:%i Active:%i)\n",
1286     + apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector,
1287     + irq, trigger, polarity);
1288    
1289     /*
1290     * add it to the IO-APIC irq-routing table:
1291     @@ -797,41 +801,23 @@
1292    
1293     entry.delivery_mode = INT_DELIVERY_MODE;
1294     entry.dest_mode = INT_DEST_MODE;
1295     + entry.dest = cpu_mask_to_apicid(mask);
1296     entry.mask = 0; /* enable IRQ */
1297     - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
1298     + entry.trigger = trigger;
1299     + entry.polarity = polarity;
1300     + entry.vector = cfg->vector;
1301    
1302     - entry.trigger = irq_trigger(idx);
1303     - entry.polarity = irq_polarity(idx);
1304     -
1305     - if (irq_trigger(idx)) {
1306     - entry.trigger = 1;
1307     + /* Mask level triggered irqs.
1308     + * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1309     + */
1310     + if (trigger)
1311     entry.mask = 1;
1312     - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
1313     - }
1314    
1315     - if (/* !apic && */ !IO_APIC_IRQ(irq))
1316     - return;
1317     -
1318     - if (IO_APIC_IRQ(irq)) {
1319     - cpumask_t mask;
1320     - vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
1321     - if (vector < 0)
1322     - return;
1323     -
1324     - entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
1325     - entry.vector = vector;
1326     -
1327     - ioapic_register_intr(irq, vector, IOAPIC_AUTO);
1328     - if (!apic && (irq < 16))
1329     - disable_8259A_irq(irq);
1330     - }
1331     + ioapic_register_intr(irq, trigger);
1332     + if (irq < 16)
1333     + disable_8259A_irq(irq);
1334    
1335     ioapic_write_entry(apic, pin, entry);
1336     -
1337     - spin_lock_irqsave(&ioapic_lock, flags);
1338     - set_native_irq_info(irq, TARGET_CPUS);
1339     - spin_unlock_irqrestore(&ioapic_lock, flags);
1340     -
1341     }
1342    
1343     static void __init setup_IO_APIC_irqs(void)
1344     @@ -856,8 +842,8 @@
1345     irq = pin_2_irq(idx, apic, pin);
1346     add_pin_to_irq(irq, apic, pin);
1347    
1348     - setup_IO_APIC_irq(apic, pin, idx, irq);
1349     -
1350     + setup_IO_APIC_irq(apic, pin, irq,
1351     + irq_trigger(idx), irq_polarity(idx));
1352     }
1353     }
1354    
1355     @@ -888,7 +874,7 @@
1356     */
1357     entry.dest_mode = INT_DEST_MODE;
1358     entry.mask = 0; /* unmask IRQ now */
1359     - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
1360     + entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
1361     entry.delivery_mode = INT_DELIVERY_MODE;
1362     entry.polarity = 0;
1363     entry.trigger = 0;
1364     @@ -988,18 +974,17 @@
1365    
1366     printk(KERN_DEBUG ".... IRQ redirection table:\n");
1367    
1368     - printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
1369     - " Stat Dest Deli Vect: \n");
1370     + printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
1371     + " Stat Dmod Deli Vect: \n");
1372    
1373     for (i = 0; i <= reg_01.bits.entries; i++) {
1374     struct IO_APIC_route_entry entry;
1375    
1376     entry = ioapic_read_entry(apic, i);
1377    
1378     - printk(KERN_DEBUG " %02x %03X %02X ",
1379     + printk(KERN_DEBUG " %02x %03X ",
1380     i,
1381     - entry.dest.logical.logical_dest,
1382     - entry.dest.physical.physical_dest
1383     + entry.dest
1384     );
1385    
1386     printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
1387     @@ -1263,8 +1248,7 @@
1388     entry.dest_mode = 0; /* Physical */
1389     entry.delivery_mode = dest_ExtINT; /* ExtInt */
1390     entry.vector = 0;
1391     - entry.dest.physical.physical_dest =
1392     - GET_APIC_ID(apic_read(APIC_ID));
1393     + entry.dest = GET_APIC_ID(apic_read(APIC_ID));
1394    
1395     /*
1396     * Add it to the IO-APIC irq-routing table:
1397     @@ -1349,16 +1333,15 @@
1398    
1399     static int ioapic_retrigger_irq(unsigned int irq)
1400     {
1401     + struct irq_cfg *cfg = &irq_cfg[irq];
1402     cpumask_t mask;
1403     - unsigned vector;
1404     unsigned long flags;
1405    
1406     spin_lock_irqsave(&vector_lock, flags);
1407     - vector = irq_vector[irq];
1408     cpus_clear(mask);
1409     - cpu_set(first_cpu(irq_domain[irq]), mask);
1410     + cpu_set(first_cpu(cfg->domain), mask);
1411    
1412     - send_IPI_mask(mask, vector);
1413     + send_IPI_mask(mask, cfg->vector);
1414     spin_unlock_irqrestore(&vector_lock, flags);
1415    
1416     return 1;
1417     @@ -1373,8 +1356,68 @@
1418     * races.
1419     */
1420    
1421     +#ifdef CONFIG_SMP
1422     +asmlinkage void smp_irq_move_cleanup_interrupt(void)
1423     +{
1424     + unsigned vector, me;
1425     + ack_APIC_irq();
1426     + exit_idle();
1427     + irq_enter();
1428     +
1429     + me = smp_processor_id();
1430     + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
1431     + unsigned int irq;
1432     + struct irq_desc *desc;
1433     + struct irq_cfg *cfg;
1434     + irq = __get_cpu_var(vector_irq)[vector];
1435     + if (irq >= NR_IRQS)
1436     + continue;
1437     +
1438     + desc = irq_desc + irq;
1439     + cfg = irq_cfg + irq;
1440     + spin_lock(&desc->lock);
1441     + if (!cfg->move_cleanup_count)
1442     + goto unlock;
1443     +
1444     + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
1445     + goto unlock;
1446     +
1447     + __get_cpu_var(vector_irq)[vector] = -1;
1448     + cfg->move_cleanup_count--;
1449     +unlock:
1450     + spin_unlock(&desc->lock);
1451     + }
1452     +
1453     + irq_exit();
1454     +}
1455     +
1456     +static void irq_complete_move(unsigned int irq)
1457     +{
1458     + struct irq_cfg *cfg = irq_cfg + irq;
1459     + unsigned vector, me;
1460     +
1461     + if (likely(!cfg->move_in_progress))
1462     + return;
1463     +
1464     + vector = ~get_irq_regs()->orig_rax;
1465     + me = smp_processor_id();
1466     + if ((vector == cfg->vector) &&
1467     + cpu_isset(smp_processor_id(), cfg->domain)) {
1468     + cpumask_t cleanup_mask;
1469     +
1470     + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
1471     + cfg->move_cleanup_count = cpus_weight(cleanup_mask);
1472     + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
1473     + cfg->move_in_progress = 0;
1474     + }
1475     +}
1476     +#else
1477     +static inline void irq_complete_move(unsigned int irq) {}
1478     +#endif
1479     +
1480     static void ack_apic_edge(unsigned int irq)
1481     {
1482     + irq_complete_move(irq);
1483     move_native_irq(irq);
1484     ack_APIC_irq();
1485     }
1486     @@ -1383,6 +1426,7 @@
1487     {
1488     int do_unmask_irq = 0;
1489    
1490     + irq_complete_move(irq);
1491     #if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
1492     /* If we are moving the irq we need to mask it */
1493     if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
1494     @@ -1434,7 +1478,7 @@
1495     */
1496     for (irq = 0; irq < NR_IRQS ; irq++) {
1497     int tmp = irq;
1498     - if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
1499     + if (IO_APIC_IRQ(tmp) && !irq_cfg[tmp].vector) {
1500     /*
1501     * Hmm.. We don't have an entry for this,
1502     * so default to an old-fashioned 8259
1503     @@ -1532,7 +1576,7 @@
1504    
1505     entry1.dest_mode = 0; /* physical delivery */
1506     entry1.mask = 0; /* unmask IRQ now */
1507     - entry1.dest.physical.physical_dest = hard_smp_processor_id();
1508     + entry1.dest = hard_smp_processor_id();
1509     entry1.delivery_mode = dest_ExtINT;
1510     entry1.polarity = entry0.polarity;
1511     entry1.trigger = 0;
1512     @@ -1576,15 +1620,14 @@
1513     */
1514     static inline void check_timer(void)
1515     {
1516     + struct irq_cfg *cfg = irq_cfg + 0;
1517     int apic1, pin1, apic2, pin2;
1518     - int vector;
1519     - cpumask_t mask;
1520    
1521     /*
1522     * get/set the timer IRQ vector:
1523     */
1524     disable_8259A_irq(0);
1525     - vector = assign_irq_vector(0, TARGET_CPUS, &mask);
1526     + assign_irq_vector(0, TARGET_CPUS);
1527    
1528     /*
1529     * Subtle, code in do_timer_interrupt() expects an AEOI
1530     @@ -1604,7 +1647,7 @@
1531     apic2 = ioapic_i8259.apic;
1532    
1533     apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
1534     - vector, apic1, pin1, apic2, pin2);
1535     + cfg->vector, apic1, pin1, apic2, pin2);
1536    
1537     if (pin1 != -1) {
1538     /*
1539     @@ -1635,7 +1678,7 @@
1540     /*
1541     * legacy devices should be connected to IO APIC #0
1542     */
1543     - setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
1544     + setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector);
1545     if (timer_irq_works()) {
1546     apic_printk(APIC_VERBOSE," works.\n");
1547     nmi_watchdog_default();
1548     @@ -1660,14 +1703,14 @@
1549    
1550     disable_8259A_irq(0);
1551     irq_desc[0].chip = &lapic_irq_type;
1552     - apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
1553     + apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
1554     enable_8259A_irq(0);
1555    
1556     if (timer_irq_works()) {
1557     apic_printk(APIC_VERBOSE," works.\n");
1558     return;
1559     }
1560     - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
1561     + apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
1562     apic_printk(APIC_VERBOSE," failed.\n");
1563    
1564     apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ...");
1565     @@ -1821,19 +1864,16 @@
1566     /* Allocate an unused irq */
1567     int irq;
1568     int new;
1569     - int vector = 0;
1570     unsigned long flags;
1571     - cpumask_t mask;
1572    
1573     irq = -ENOSPC;
1574     spin_lock_irqsave(&vector_lock, flags);
1575     for (new = (NR_IRQS - 1); new >= 0; new--) {
1576     if (platform_legacy_irq(new))
1577     continue;
1578     - if (irq_vector[new] != 0)
1579     + if (irq_cfg[new].vector != 0)
1580     continue;
1581     - vector = __assign_irq_vector(new, TARGET_CPUS, &mask);
1582     - if (likely(vector > 0))
1583     + if (__assign_irq_vector(new, TARGET_CPUS) == 0)
1584     irq = new;
1585     break;
1586     }
1587     @@ -1863,12 +1903,15 @@
1588     #ifdef CONFIG_PCI_MSI
1589     static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
1590     {
1591     - int vector;
1592     + struct irq_cfg *cfg = irq_cfg + irq;
1593     + int err;
1594     unsigned dest;
1595     cpumask_t tmp;
1596    
1597     - vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
1598     - if (vector >= 0) {
1599     + tmp = TARGET_CPUS;
1600     + err = assign_irq_vector(irq, tmp);
1601     + if (!err) {
1602     + cpus_and(tmp, cfg->domain, tmp);
1603     dest = cpu_mask_to_apicid(tmp);
1604    
1605     msg->address_hi = MSI_ADDR_BASE_HI;
1606     @@ -1888,40 +1931,38 @@
1607     ((INT_DELIVERY_MODE != dest_LowestPrio) ?
1608     MSI_DATA_DELIVERY_FIXED:
1609     MSI_DATA_DELIVERY_LOWPRI) |
1610     - MSI_DATA_VECTOR(vector);
1611     + MSI_DATA_VECTOR(cfg->vector);
1612     }
1613     - return vector;
1614     + return err;
1615     }
1616    
1617     #ifdef CONFIG_SMP
1618     static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
1619     {
1620     + struct irq_cfg *cfg = irq_cfg + irq;
1621     struct msi_msg msg;
1622     unsigned int dest;
1623     cpumask_t tmp;
1624     - int vector;
1625    
1626     cpus_and(tmp, mask, cpu_online_map);
1627     if (cpus_empty(tmp))
1628     - tmp = TARGET_CPUS;
1629     -
1630     - cpus_and(mask, tmp, CPU_MASK_ALL);
1631     + return;
1632    
1633     - vector = assign_irq_vector(irq, mask, &tmp);
1634     - if (vector < 0)
1635     + if (assign_irq_vector(irq, mask))
1636     return;
1637    
1638     + cpus_and(tmp, cfg->domain, mask);
1639     dest = cpu_mask_to_apicid(tmp);
1640    
1641     read_msi_msg(irq, &msg);
1642    
1643     msg.data &= ~MSI_DATA_VECTOR_MASK;
1644     - msg.data |= MSI_DATA_VECTOR(vector);
1645     + msg.data |= MSI_DATA_VECTOR(cfg->vector);
1646     msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
1647     msg.address_lo |= MSI_ADDR_DEST_ID(dest);
1648    
1649     write_msi_msg(irq, &msg);
1650     - set_native_irq_info(irq, mask);
1651     + irq_desc[irq].affinity = mask;
1652     }
1653     #endif /* CONFIG_SMP */
1654    
1655     @@ -1940,24 +1981,31 @@
1656     .retrigger = ioapic_retrigger_irq,
1657     };
1658    
1659     -int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
1660     +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
1661     {
1662     struct msi_msg msg;
1663     - int ret;
1664     + int irq, ret;
1665     + irq = create_irq();
1666     + if (irq < 0)
1667     + return irq;
1668     +
1669     + set_irq_msi(irq, desc);
1670     ret = msi_compose_msg(dev, irq, &msg);
1671     - if (ret < 0)
1672     + if (ret < 0) {
1673     + destroy_irq(irq);
1674     return ret;
1675     + }
1676    
1677     write_msi_msg(irq, &msg);
1678    
1679     set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
1680    
1681     - return 0;
1682     + return irq;
1683     }
1684    
1685     void arch_teardown_msi_irq(unsigned int irq)
1686     {
1687     - return;
1688     + destroy_irq(irq);
1689     }
1690    
1691     #endif /* CONFIG_PCI_MSI */
1692     @@ -1985,24 +2033,22 @@
1693    
1694     static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
1695     {
1696     + struct irq_cfg *cfg = irq_cfg + irq;
1697     unsigned int dest;
1698     cpumask_t tmp;
1699     - int vector;
1700    
1701     cpus_and(tmp, mask, cpu_online_map);
1702     if (cpus_empty(tmp))
1703     - tmp = TARGET_CPUS;
1704     -
1705     - cpus_and(mask, tmp, CPU_MASK_ALL);
1706     + return;
1707    
1708     - vector = assign_irq_vector(irq, mask, &tmp);
1709     - if (vector < 0)
1710     + if (assign_irq_vector(irq, mask))
1711     return;
1712    
1713     + cpus_and(tmp, cfg->domain, mask);
1714     dest = cpu_mask_to_apicid(tmp);
1715    
1716     - target_ht_irq(irq, dest, vector);
1717     - set_native_irq_info(irq, mask);
1718     + target_ht_irq(irq, dest, cfg->vector);
1719     + irq_desc[irq].affinity = mask;
1720     }
1721     #endif
1722    
1723     @@ -2019,14 +2065,17 @@
1724    
1725     int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
1726     {
1727     - int vector;
1728     + struct irq_cfg *cfg = irq_cfg + irq;
1729     + int err;
1730     cpumask_t tmp;
1731    
1732     - vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
1733     - if (vector >= 0) {
1734     + tmp = TARGET_CPUS;
1735     + err = assign_irq_vector(irq, tmp);
1736     + if (!err) {
1737     struct ht_irq_msg msg;
1738     unsigned dest;
1739    
1740     + cpus_and(tmp, cfg->domain, tmp);
1741     dest = cpu_mask_to_apicid(tmp);
1742    
1743     msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
1744     @@ -2034,7 +2083,7 @@
1745     msg.address_lo =
1746     HT_IRQ_LOW_BASE |
1747     HT_IRQ_LOW_DEST_ID(dest) |
1748     - HT_IRQ_LOW_VECTOR(vector) |
1749     + HT_IRQ_LOW_VECTOR(cfg->vector) |
1750     ((INT_DEST_MODE == 0) ?
1751     HT_IRQ_LOW_DM_PHYSICAL :
1752     HT_IRQ_LOW_DM_LOGICAL) |
1753     @@ -2049,7 +2098,7 @@
1754     set_irq_chip_and_handler_name(irq, &ht_irq_chip,
1755     handle_edge_irq, "edge");
1756     }
1757     - return vector;
1758     + return err;
1759     }
1760     #endif /* CONFIG_HT_IRQ */
1761    
1762     @@ -2074,13 +2123,8 @@
1763     }
1764    
1765    
1766     -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
1767     +int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
1768     {
1769     - struct IO_APIC_route_entry entry;
1770     - unsigned long flags;
1771     - int vector;
1772     - cpumask_t mask;
1773     -
1774     if (!IO_APIC_IRQ(irq)) {
1775     apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
1776     ioapic);
1777     @@ -2093,42 +2137,7 @@
1778     if (irq >= 16)
1779     add_pin_to_irq(irq, ioapic, pin);
1780    
1781     -
1782     - vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
1783     - if (vector < 0)
1784     - return vector;
1785     -
1786     - /*
1787     - * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
1788     - * Note that we mask (disable) IRQs now -- these get enabled when the
1789     - * corresponding device driver registers for this IRQ.
1790     - */
1791     -
1792     - memset(&entry,0,sizeof(entry));
1793     -
1794     - entry.delivery_mode = INT_DELIVERY_MODE;
1795     - entry.dest_mode = INT_DEST_MODE;
1796     - entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
1797     - entry.trigger = edge_level;
1798     - entry.polarity = active_high_low;
1799     - entry.mask = 1; /* Disabled (masked) */
1800     - entry.vector = vector & 0xff;
1801     -
1802     - apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
1803     - "IRQ %d Mode:%i Active:%i)\n", ioapic,
1804     - mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
1805     - edge_level, active_high_low);
1806     -
1807     - ioapic_register_intr(irq, entry.vector, edge_level);
1808     -
1809     - if (!ioapic && (irq < 16))
1810     - disable_8259A_irq(irq);
1811     -
1812     - ioapic_write_entry(ioapic, pin, entry);
1813     -
1814     - spin_lock_irqsave(&ioapic_lock, flags);
1815     - set_native_irq_info(irq, TARGET_CPUS);
1816     - spin_unlock_irqrestore(&ioapic_lock, flags);
1817     + setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
1818    
1819     return 0;
1820     }
1821     @@ -2161,8 +2170,10 @@
1822     * when you have too many devices, because at that time only boot
1823     * cpu is online.
1824     */
1825     - if(!irq_vector[irq])
1826     - setup_IO_APIC_irq(ioapic, pin, irq_entry, irq);
1827     + if (!irq_cfg[irq].vector)
1828     + setup_IO_APIC_irq(ioapic, pin, irq,
1829     + irq_trigger(irq_entry),
1830     + irq_polarity(irq_entry));
1831     else
1832     set_ioapic_affinity_irq(irq, TARGET_CPUS);
1833     }
1834     --- a/arch/x86/kernel/irq_32-xen.c
1835     +++ b/arch/x86/kernel/irq_32-xen.c
1836     @@ -10,7 +10,6 @@
1837     * io_apic.c.)
1838     */
1839    
1840     -#include <asm/uaccess.h>
1841     #include <linux/module.h>
1842     #include <linux/seq_file.h>
1843     #include <linux/interrupt.h>
1844     @@ -19,19 +18,34 @@
1845     #include <linux/cpu.h>
1846     #include <linux/delay.h>
1847    
1848     +#include <asm/apic.h>
1849     +#include <asm/uaccess.h>
1850     +
1851     DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
1852     EXPORT_PER_CPU_SYMBOL(irq_stat);
1853    
1854     -#ifndef CONFIG_X86_LOCAL_APIC
1855     /*
1856     * 'what should we do if we get a hw irq event on an illegal vector'.
1857     * each architecture has to answer this themselves.
1858     */
1859     void ack_bad_irq(unsigned int irq)
1860     {
1861     - printk("unexpected IRQ trap at vector %02x\n", irq);
1862     -}
1863     + printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
1864     +
1865     +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
1866     + /*
1867     + * Currently unexpected vectors happen only on SMP and APIC.
1868     + * We _must_ ack these because every local APIC has only N
1869     + * irq slots per priority level, and a 'hanging, unacked' IRQ
1870     + * holds up an irq slot - in excessive cases (when multiple
1871     + * unexpected vectors occur) that might lock up the APIC
1872     + * completely.
1873     + * But only ack when the APIC is enabled -AK
1874     + */
1875     + if (cpu_has_apic)
1876     + ack_APIC_irq();
1877     #endif
1878     +}
1879    
1880     #ifdef CONFIG_4KSTACKS
1881     /*
1882     --- a/arch/x86/kernel/irq_64-xen.c
1883     +++ b/arch/x86/kernel/irq_64-xen.c
1884     @@ -18,6 +18,7 @@
1885     #include <asm/uaccess.h>
1886     #include <asm/io_apic.h>
1887     #include <asm/idle.h>
1888     +#include <asm/smp.h>
1889    
1890     atomic_t irq_err_count;
1891    
1892     @@ -120,9 +121,15 @@
1893    
1894     if (likely(irq < NR_IRQS))
1895     generic_handle_irq(irq);
1896     - else if (printk_ratelimit())
1897     - printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n",
1898     - __func__, smp_processor_id(), irq);
1899     + else {
1900     +#ifndef CONFIG_XEN
1901     + if (!disable_apic)
1902     + ack_APIC_irq();
1903     +#endif
1904     + if (printk_ratelimit())
1905     + printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n",
1906     + __func__, smp_processor_id(), irq);
1907     + }
1908    
1909     irq_exit();
1910    
1911     --- a/arch/x86/kernel/microcode-xen.c
1912     +++ b/arch/x86/kernel/microcode-xen.c
1913     @@ -108,7 +108,7 @@
1914     return ret;
1915     }
1916    
1917     -static struct file_operations microcode_fops = {
1918     +static const struct file_operations microcode_fops = {
1919     .owner = THIS_MODULE,
1920     .write = microcode_write,
1921     .open = microcode_open,
1922     --- a/arch/x86/kernel/mpparse_32-xen.c
1923     +++ b/arch/x86/kernel/mpparse_32-xen.c
1924     @@ -1079,7 +1079,7 @@
1925     static int gsi_to_irq[MAX_GSI_NUM];
1926    
1927     /* Don't set up the ACPI SCI because it's already set up */
1928     - if (acpi_fadt.sci_int == gsi)
1929     + if (acpi_gbl_FADT.sci_interrupt == gsi)
1930     return gsi;
1931    
1932     ioapic = mp_find_ioapic(gsi);
1933     @@ -1136,7 +1136,7 @@
1934     /*
1935     * Don't assign IRQ used by ACPI SCI
1936     */
1937     - if (gsi == acpi_fadt.sci_int)
1938     + if (gsi == acpi_gbl_FADT.sci_interrupt)
1939     gsi = pci_irq++;
1940     gsi_to_irq[irq] = gsi;
1941     } else {
1942     --- a/arch/x86/kernel/mpparse_64-xen.c
1943     +++ b/arch/x86/kernel/mpparse_64-xen.c
1944     @@ -60,9 +60,9 @@
1945     /* Processor that is doing the boot up */
1946     unsigned int boot_cpu_id = -1U;
1947     /* Internal processor count */
1948     -unsigned int num_processors __initdata = 0;
1949     +unsigned int num_processors __cpuinitdata = 0;
1950    
1951     -unsigned disabled_cpus __initdata;
1952     +unsigned disabled_cpus __cpuinitdata;
1953    
1954     /* Bitmask of physically existing CPUs */
1955     physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
1956     @@ -808,7 +808,7 @@
1957     return gsi;
1958    
1959     /* Don't set up the ACPI SCI because it's already set up */
1960     - if (acpi_fadt.sci_int == gsi)
1961     + if (acpi_gbl_FADT.sci_interrupt == gsi)
1962     return gsi;
1963    
1964     ioapic = mp_find_ioapic(gsi);
1965     --- a/arch/x86/kernel/pci-dma_32-xen.c
1966     +++ b/arch/x86/kernel/pci-dma_32-xen.c
1967     @@ -317,7 +317,7 @@
1968     return DMA_MEMORY_IO;
1969    
1970     free1_out:
1971     - kfree(dev->dma_mem->bitmap);
1972     + kfree(dev->dma_mem);
1973     out:
1974     if (mem_base)
1975     iounmap(mem_base);
1976     --- a/arch/x86/kernel/pci-swiotlb_64-xen.c
1977     +++ b/arch/x86/kernel/pci-swiotlb_64-xen.c
1978     @@ -35,7 +35,7 @@
1979     #endif
1980     };
1981    
1982     -void pci_swiotlb_init(void)
1983     +void __init pci_swiotlb_init(void)
1984     {
1985     #if 0
1986     /* don't initialize swiotlb if iommu=off (no_iommu=1) */
1987     --- a/arch/x86/kernel/pcspeaker.c
1988     +++ b/arch/x86/kernel/pcspeaker.c
1989     @@ -7,6 +7,11 @@
1990     struct platform_device *pd;
1991     int ret;
1992    
1993     +#ifdef CONFIG_XEN
1994     + if (!is_initial_xendomain())
1995     + return 0;
1996     +#endif
1997     +
1998     pd = platform_device_alloc("pcspkr", -1);
1999     if (!pd)
2000     return -ENOMEM;
2001     --- a/arch/x86/kernel/process_32-xen.c
2002     +++ b/arch/x86/kernel/process_32-xen.c
2003     @@ -38,6 +38,7 @@
2004     #include <linux/ptrace.h>
2005     #include <linux/random.h>
2006     #include <linux/personality.h>
2007     +#include <linux/tick.h>
2008    
2009     #include <asm/uaccess.h>
2010     #include <asm/pgtable.h>
2011     @@ -160,6 +161,7 @@
2012    
2013     /* endless idle loop with no priority at all */
2014     while (1) {
2015     + tick_nohz_stop_sched_tick();
2016     while (!need_resched()) {
2017     void (*idle)(void);
2018    
2019     @@ -175,6 +177,7 @@
2020     __get_cpu_var(irq_stat).idle_timestamp = jiffies;
2021     idle();
2022     }
2023     + tick_nohz_restart_sched_tick();
2024     preempt_enable_no_resched();
2025     schedule();
2026     preempt_disable();
2027     @@ -247,8 +250,8 @@
2028     regs->eax,regs->ebx,regs->ecx,regs->edx);
2029     printk("ESI: %08lx EDI: %08lx EBP: %08lx",
2030     regs->esi, regs->edi, regs->ebp);
2031     - printk(" DS: %04x ES: %04x GS: %04x\n",
2032     - 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs);
2033     + printk(" DS: %04x ES: %04x FS: %04x\n",
2034     + 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs);
2035    
2036     cr0 = read_cr0();
2037     cr2 = read_cr2();
2038     @@ -279,7 +282,7 @@
2039    
2040     regs.xds = __USER_DS;
2041     regs.xes = __USER_DS;
2042     - regs.xgs = __KERNEL_PDA;
2043     + regs.xfs = __KERNEL_PDA;
2044     regs.orig_eax = -1;
2045     regs.eip = (unsigned long) kernel_thread_helper;
2046     regs.xcs = __KERNEL_CS | get_kernel_rpl();
2047     @@ -356,7 +359,7 @@
2048    
2049     p->thread.eip = (unsigned long) ret_from_fork;
2050    
2051     - savesegment(fs,p->thread.fs);
2052     + savesegment(gs,p->thread.gs);
2053    
2054     tsk = current;
2055     if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
2056     @@ -434,8 +437,8 @@
2057     dump->regs.eax = regs->eax;
2058     dump->regs.ds = regs->xds;
2059     dump->regs.es = regs->xes;
2060     - savesegment(fs,dump->regs.fs);
2061     - dump->regs.gs = regs->xgs;
2062     + dump->regs.fs = regs->xfs;
2063     + savesegment(gs,dump->regs.gs);
2064     dump->regs.orig_eax = regs->orig_eax;
2065     dump->regs.eip = regs->eip;
2066     dump->regs.cs = regs->xcs;
2067     @@ -616,16 +619,6 @@
2068     prefetch(&next->i387.fxsave);
2069    
2070     /*
2071     - * Restore %fs if needed.
2072     - *
2073     - * Glibc normally makes %fs be zero.
2074     - */
2075     - if (unlikely(next->fs))
2076     - loadsegment(fs, next->fs);
2077     -
2078     - write_pda(pcurrent, next_p);
2079     -
2080     - /*
2081     * Now maybe handle debug registers
2082     */
2083     if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
2084     @@ -633,6 +626,15 @@
2085    
2086     disable_tsc(prev_p, next_p);
2087    
2088     + /*
2089     + * Leave lazy mode, flushing any hypercalls made here.
2090     + * This must be done before restoring TLS segments so
2091     + * the GDT and LDT are properly updated, and must be
2092     + * done before math_state_restore, so the TS bit is up
2093     + * to date.
2094     + */
2095     + arch_leave_lazy_cpu_mode();
2096     +
2097     /* If the task has used fpu the last 5 timeslices, just do a full
2098     * restore of the math state immediately to avoid the trap; the
2099     * chances of needing FPU soon are obviously high now
2100     @@ -640,6 +642,14 @@
2101     if (next_p->fpu_counter > 5)
2102     math_state_restore();
2103    
2104     + /*
2105     + * Restore %gs if needed (which is common)
2106     + */
2107     + if (prev->gs | next->gs)
2108     + loadsegment(gs, next->gs);
2109     +
2110     + write_pda(pcurrent, next_p);
2111     +
2112     return prev_p;
2113     }
2114    
2115     --- a/arch/x86/kernel/process_64-xen.c
2116     +++ b/arch/x86/kernel/process_64-xen.c
2117     @@ -338,14 +338,17 @@
2118     void flush_thread(void)
2119     {
2120     struct task_struct *tsk = current;
2121     - struct thread_info *t = current_thread_info();
2122    
2123     - if (t->flags & _TIF_ABI_PENDING) {
2124     - t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
2125     - if (t->flags & _TIF_IA32)
2126     + if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
2127     + clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
2128     + if (test_tsk_thread_flag(tsk, TIF_IA32)) {
2129     + clear_tsk_thread_flag(tsk, TIF_IA32);
2130     + } else {
2131     + set_tsk_thread_flag(tsk, TIF_IA32);
2132     current_thread_info()->status |= TS_COMPAT;
2133     + }
2134     }
2135     - t->flags &= ~_TIF_DEBUG;
2136     + clear_tsk_thread_flag(tsk, TIF_DEBUG);
2137    
2138     tsk->thread.debugreg0 = 0;
2139     tsk->thread.debugreg1 = 0;
2140     --- a/arch/x86/kernel/setup_32-xen.c
2141     +++ b/arch/x86/kernel/setup_32-xen.c
2142     @@ -33,7 +33,6 @@
2143     #include <linux/initrd.h>
2144     #include <linux/bootmem.h>
2145     #include <linux/seq_file.h>
2146     -#include <linux/platform_device.h>
2147     #include <linux/console.h>
2148     #include <linux/mca.h>
2149     #include <linux/root_dev.h>
2150     @@ -151,7 +150,7 @@
2151     #define RAMDISK_PROMPT_FLAG 0x8000
2152     #define RAMDISK_LOAD_FLAG 0x4000
2153    
2154     -static char command_line[COMMAND_LINE_SIZE];
2155     +static char __initdata command_line[COMMAND_LINE_SIZE];
2156    
2157     unsigned char __initdata boot_params[PARAM_SIZE];
2158    
2159     @@ -650,8 +649,8 @@
2160    
2161     if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
2162     i = COMMAND_LINE_SIZE;
2163     - memcpy(saved_command_line, xen_start_info->cmd_line, i);
2164     - saved_command_line[i - 1] = '\0';
2165     + memcpy(boot_command_line, xen_start_info->cmd_line, i);
2166     + boot_command_line[i - 1] = '\0';
2167     parse_early_param();
2168    
2169     if (user_defined_memmap) {
2170     @@ -659,11 +658,19 @@
2171     print_memory_map("user");
2172     }
2173    
2174     - strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
2175     + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
2176     *cmdline_p = command_line;
2177    
2178     max_low_pfn = setup_memory();
2179    
2180     +#ifdef CONFIG_VMI
2181     + /*
2182     + * Must be after max_low_pfn is determined, and before kernel
2183     + * pagetables are setup.
2184     + */
2185     + vmi_init();
2186     +#endif
2187     +
2188     /*
2189     * NOTE: before this point _nobody_ is allowed to allocate
2190     * any memory using the bootmem allocator. Although the
2191     @@ -826,7 +833,6 @@
2192     conswitchp = &dummy_con;
2193     #endif
2194     }
2195     - tsc_init();
2196     }
2197    
2198     static int
2199     @@ -836,31 +842,3 @@
2200     /* we're never actually going to get here... */
2201     return NOTIFY_DONE;
2202     }
2203     -
2204     -static __init int add_pcspkr(void)
2205     -{
2206     - struct platform_device *pd;
2207     - int ret;
2208     -
2209     - if (!is_initial_xendomain())
2210     - return 0;
2211     -
2212     - pd = platform_device_alloc("pcspkr", -1);
2213     - if (!pd)
2214     - return -ENOMEM;
2215     -
2216     - ret = platform_device_add(pd);
2217     - if (ret)
2218     - platform_device_put(pd);
2219     -
2220     - return ret;
2221     -}
2222     -device_initcall(add_pcspkr);
2223     -
2224     -/*
2225     - * Local Variables:
2226     - * mode:c
2227     - * c-file-style:"k&r"
2228     - * c-basic-offset:8
2229     - * End:
2230     - */
2231     --- a/arch/x86/kernel/setup_64-xen.c
2232     +++ b/arch/x86/kernel/setup_64-xen.c
2233     @@ -144,7 +144,7 @@
2234    
2235     extern int root_mountflags;
2236    
2237     -char command_line[COMMAND_LINE_SIZE];
2238     +char __initdata command_line[COMMAND_LINE_SIZE];
2239    
2240     struct resource standard_io_resources[] = {
2241     { .name = "dma1", .start = 0x00, .end = 0x1f,
2242     @@ -182,134 +182,6 @@
2243     .flags = IORESOURCE_RAM,
2244     };
2245    
2246     -#define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM)
2247     -
2248     -static struct resource system_rom_resource = {
2249     - .name = "System ROM",
2250     - .start = 0xf0000,
2251     - .end = 0xfffff,
2252     - .flags = IORESOURCE_ROM,
2253     -};
2254     -
2255     -static struct resource extension_rom_resource = {
2256     - .name = "Extension ROM",
2257     - .start = 0xe0000,
2258     - .end = 0xeffff,
2259     - .flags = IORESOURCE_ROM,
2260     -};
2261     -
2262     -static struct resource adapter_rom_resources[] = {
2263     - { .name = "Adapter ROM", .start = 0xc8000, .end = 0,
2264     - .flags = IORESOURCE_ROM },
2265     - { .name = "Adapter ROM", .start = 0, .end = 0,
2266     - .flags = IORESOURCE_ROM },
2267     - { .name = "Adapter ROM", .start = 0, .end = 0,
2268     - .flags = IORESOURCE_ROM },
2269     - { .name = "Adapter ROM", .start = 0, .end = 0,
2270     - .flags = IORESOURCE_ROM },
2271     - { .name = "Adapter ROM", .start = 0, .end = 0,
2272     - .flags = IORESOURCE_ROM },
2273     - { .name = "Adapter ROM", .start = 0, .end = 0,
2274     - .flags = IORESOURCE_ROM }
2275     -};
2276     -
2277     -static struct resource video_rom_resource = {
2278     - .name = "Video ROM",
2279     - .start = 0xc0000,
2280     - .end = 0xc7fff,
2281     - .flags = IORESOURCE_ROM,
2282     -};
2283     -
2284     -static struct resource video_ram_resource = {
2285     - .name = "Video RAM area",
2286     - .start = 0xa0000,
2287     - .end = 0xbffff,
2288     - .flags = IORESOURCE_RAM,
2289     -};
2290     -
2291     -#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
2292     -
2293     -static int __init romchecksum(unsigned char *rom, unsigned long length)
2294     -{
2295     - unsigned char *p, sum = 0;
2296     -
2297     - for (p = rom; p < rom + length; p++)
2298     - sum += *p;
2299     - return sum == 0;
2300     -}
2301     -
2302     -static void __init probe_roms(void)
2303     -{
2304     - unsigned long start, length, upper;
2305     - unsigned char *rom;
2306     - int i;
2307     -
2308     -#ifdef CONFIG_XEN
2309     - /* Nothing to do if not running in dom0. */
2310     - if (!is_initial_xendomain())
2311     - return;
2312     -#endif
2313     -
2314     - /* video rom */
2315     - upper = adapter_rom_resources[0].start;
2316     - for (start = video_rom_resource.start; start < upper; start += 2048) {
2317     - rom = isa_bus_to_virt(start);
2318     - if (!romsignature(rom))
2319     - continue;
2320     -
2321     - video_rom_resource.start = start;
2322     -
2323     - /* 0 < length <= 0x7f * 512, historically */
2324     - length = rom[2] * 512;
2325     -
2326     - /* if checksum okay, trust length byte */
2327     - if (length && romchecksum(rom, length))
2328     - video_rom_resource.end = start + length - 1;
2329     -
2330     - request_resource(&iomem_resource, &video_rom_resource);
2331     - break;
2332     - }
2333     -
2334     - start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
2335     - if (start < upper)
2336     - start = upper;
2337     -
2338     - /* system rom */
2339     - request_resource(&iomem_resource, &system_rom_resource);
2340     - upper = system_rom_resource.start;
2341     -
2342     - /* check for extension rom (ignore length byte!) */
2343     - rom = isa_bus_to_virt(extension_rom_resource.start);
2344     - if (romsignature(rom)) {
2345     - length = extension_rom_resource.end - extension_rom_resource.start + 1;
2346     - if (romchecksum(rom, length)) {
2347     - request_resource(&iomem_resource, &extension_rom_resource);
2348     - upper = extension_rom_resource.start;
2349     - }
2350     - }
2351     -
2352     - /* check for adapter roms on 2k boundaries */
2353     - for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper;
2354     - start += 2048) {
2355     - rom = isa_bus_to_virt(start);
2356     - if (!romsignature(rom))
2357     - continue;
2358     -
2359     - /* 0 < length <= 0x7f * 512, historically */
2360     - length = rom[2] * 512;
2361     -
2362     - /* but accept any length that fits if checksum okay */
2363     - if (!length || start + length > upper || !romchecksum(rom, length))
2364     - continue;
2365     -
2366     - adapter_rom_resources[i].start = start;
2367     - adapter_rom_resources[i].end = start + length - 1;
2368     - request_resource(&iomem_resource, &adapter_rom_resources[i]);
2369     -
2370     - start = adapter_rom_resources[i++].end & ~2047UL;
2371     - }
2372     -}
2373     -
2374     #ifdef CONFIG_PROC_VMCORE
2375     /* elfcorehdr= specifies the location of elf core header
2376     * stored by the crashed kernel. This option will be passed
2377     @@ -406,7 +278,7 @@
2378     #ifdef CONFIG_XEN
2379     extern struct e820map machine_e820;
2380    
2381     - printk(KERN_INFO "Command line: %s\n", saved_command_line);
2382     + printk(KERN_INFO "Command line: %s\n", boot_command_line);
2383    
2384     /* Register a call for panic conditions. */
2385     atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
2386     @@ -433,7 +305,7 @@
2387    
2388     ARCH_SETUP
2389     #else
2390     - printk(KERN_INFO "Command line: %s\n", saved_command_line);
2391     + printk(KERN_INFO "Command line: %s\n", boot_command_line);
2392    
2393     ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
2394     screen_info = SCREEN_INFO;
2395     @@ -464,7 +336,7 @@
2396    
2397     early_identify_cpu(&boot_cpu_data);
2398    
2399     - strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
2400     + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
2401     *cmdline_p = command_line;
2402    
2403     parse_early_param();
2404     @@ -534,6 +406,11 @@
2405     /* reserve ebda region */
2406     if (ebda_addr)
2407     reserve_bootmem_generic(ebda_addr, ebda_size);
2408     +#ifdef CONFIG_NUMA
2409     + /* reserve nodemap region */
2410     + if (nodemap_addr)
2411     + reserve_bootmem_generic(nodemap_addr, nodemap_size);
2412     +#endif
2413    
2414     #ifdef CONFIG_SMP
2415     /*
2416     @@ -734,10 +611,8 @@
2417     #endif
2418    
2419     /*
2420     - * Request address space for all standard RAM and ROM resources
2421     - * and also for regions reported as reserved by the e820.
2422     + * We trust e820 completely. No explicit ROM probing in memory.
2423     */
2424     - probe_roms();
2425     #ifdef CONFIG_XEN
2426     if (is_initial_xendomain()) {
2427     struct xen_memory_map memmap;
2428     @@ -756,8 +631,6 @@
2429     e820_mark_nosave_regions();
2430     #endif
2431    
2432     - request_resource(&iomem_resource, &video_ram_resource);
2433     -
2434     {
2435     unsigned i;
2436     /* request I/O space for devices used on all i[345]86 PCs */
2437     @@ -1334,7 +1207,8 @@
2438     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2439     NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
2440     NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
2441     - NULL, "fxsr_opt", NULL, "rdtscp", NULL, "lm", "3dnowext", "3dnow",
2442     + NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
2443     + "3dnowext", "3dnow",
2444    
2445     /* Transmeta-defined */
2446     "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
2447     @@ -1352,7 +1226,7 @@
2448     /* Intel-defined (#2) */
2449     "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
2450     "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
2451     - NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL,
2452     + NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt",
2453     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2454    
2455     /* VIA/Cyrix/Centaur-defined */
2456     @@ -1362,8 +1236,10 @@
2457     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2458    
2459     /* AMD-defined (#2) */
2460     - "lahf_lm", "cmp_legacy", "svm", NULL, "cr8_legacy", NULL, NULL, NULL,
2461     - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2462     + "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy",
2463     + "altmovcr8", "abm", "sse4a",
2464     + "misalignsse", "3dnowprefetch",
2465     + "osvw", "ibs", NULL, NULL, NULL, NULL,
2466     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2467     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2468     };
2469     @@ -1374,6 +1250,9 @@
2470     "ttp", /* thermal trip */
2471     "tm",
2472     "stc",
2473     + "100mhzsteps",
2474     + "hwpstate",
2475     + NULL, /* tsc invariant mapped to constant_tsc */
2476     NULL,
2477     /* nothing */ /* constant_tsc - moved to flags */
2478     };
2479     @@ -1490,26 +1369,3 @@
2480     .stop = c_stop,
2481     .show = show_cpuinfo,
2482     };
2483     -
2484     -#if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE)
2485     -#include <linux/platform_device.h>
2486     -static __init int add_pcspkr(void)
2487     -{
2488     - struct platform_device *pd;
2489     - int ret;
2490     -
2491     - if (!is_initial_xendomain())
2492     - return 0;
2493     -
2494     - pd = platform_device_alloc("pcspkr", -1);
2495     - if (!pd)
2496     - return -ENOMEM;
2497     -
2498     - ret = platform_device_add(pd);
2499     - if (ret)
2500     - platform_device_put(pd);
2501     -
2502     - return ret;
2503     -}
2504     -device_initcall(add_pcspkr);
2505     -#endif
2506     --- a/arch/x86/kernel/smp_32-xen.c
2507     +++ b/arch/x86/kernel/smp_32-xen.c
2508     @@ -335,8 +335,7 @@
2509     /*
2510     * i'm not happy about this global shared spinlock in the
2511     * MM hot path, but we'll see how contended it is.
2512     - * Temporarily this turns IRQs off, so that lockups are
2513     - * detected by the NMI watchdog.
2514     + * AK: x86-64 has a faster method that could be ported.
2515     */
2516     spin_lock(&tlbstate_lock);
2517    
2518     @@ -361,7 +360,7 @@
2519    
2520     while (!cpus_empty(flush_cpumask))
2521     /* nothing. lockup detection does not belong here */
2522     - mb();
2523     + cpu_relax();
2524    
2525     flush_mm = NULL;
2526     flush_va = 0;
2527     --- a/arch/x86/kernel/time_32-xen.c
2528     +++ b/arch/x86/kernel/time_32-xen.c
2529     @@ -51,6 +51,7 @@
2530     #include <linux/kernel_stat.h>
2531     #include <linux/posix-timers.h>
2532     #include <linux/cpufreq.h>
2533     +#include <linux/clocksource.h>
2534    
2535     #include <asm/io.h>
2536     #include <asm/smp.h>
2537     @@ -75,25 +76,17 @@
2538     #include <xen/evtchn.h>
2539     #include <xen/interface/vcpu.h>
2540    
2541     -#if defined (__i386__)
2542     -#include <asm/i8259.h>
2543     +#ifdef CONFIG_X86_32
2544     #include <asm/i8253.h>
2545     DEFINE_SPINLOCK(i8253_lock);
2546     EXPORT_SYMBOL(i8253_lock);
2547     -#endif
2548     -
2549     -#define XEN_SHIFT 22
2550     -
2551     int pit_latch_buggy; /* extern */
2552     -
2553     -#if defined(__x86_64__)
2554     -unsigned long vxtime_hz = PIT_TICK_RATE;
2555     -struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
2556     +#else
2557     volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
2558     -struct timespec __xtime __section_xtime;
2559     -struct timezone __sys_tz __section_sys_tz;
2560     #endif
2561    
2562     +#define XEN_SHIFT 22
2563     +
2564     unsigned int cpu_khz; /* Detected as we calibrate the TSC */
2565     EXPORT_SYMBOL(cpu_khz);
2566    
2567     @@ -113,9 +106,6 @@
2568     static struct timespec shadow_tv;
2569     static u32 shadow_tv_version;
2570    
2571     -static struct timeval monotonic_tv;
2572     -static spinlock_t monotonic_lock = SPIN_LOCK_UNLOCKED;
2573     -
2574     /* Keep track of last time we did processing/updating of jiffies and xtime. */
2575     static u64 processed_system_time; /* System time (ns) at last processing. */
2576     static DEFINE_PER_CPU(u64, processed_system_time);
2577     @@ -228,7 +218,7 @@
2578     }
2579     #endif
2580    
2581     -void init_cpu_khz(void)
2582     +static void init_cpu_khz(void)
2583     {
2584     u64 __cpu_khz = 1000000ULL << 32;
2585     struct vcpu_time_info *info = &vcpu_info(0)->time;
2586     @@ -247,16 +237,6 @@
2587     return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
2588     }
2589    
2590     -#ifdef CONFIG_X86_64
2591     -static unsigned long get_usec_offset(struct shadow_time_info *shadow)
2592     -{
2593     - u64 now, delta;
2594     - rdtscll(now);
2595     - delta = now - shadow->tsc_timestamp;
2596     - return scale_delta(delta, shadow->tsc_to_usec_mul, shadow->tsc_shift);
2597     -}
2598     -#endif
2599     -
2600     static void __update_wallclock(time_t sec, long nsec)
2601     {
2602     long wtm_nsec, xtime_nsec;
2603     @@ -364,138 +344,6 @@
2604     }
2605     EXPORT_SYMBOL(rtc_cmos_write);
2606    
2607     -#ifdef CONFIG_X86_64
2608     -
2609     -/*
2610     - * This version of gettimeofday has microsecond resolution
2611     - * and better than microsecond precision on fast x86 machines with TSC.
2612     - */
2613     -void do_gettimeofday(struct timeval *tv)
2614     -{
2615     - unsigned long seq;
2616     - unsigned long usec, sec;
2617     - unsigned long flags;
2618     - s64 nsec;
2619     - unsigned int cpu;
2620     - struct shadow_time_info *shadow;
2621     - u32 local_time_version;
2622     -
2623     - cpu = get_cpu();
2624     - shadow = &per_cpu(shadow_time, cpu);
2625     -
2626     - do {
2627     - local_time_version = shadow->version;
2628     - seq = read_seqbegin(&xtime_lock);
2629     -
2630     - usec = get_usec_offset(shadow);
2631     -
2632     - sec = xtime.tv_sec;
2633     - usec += (xtime.tv_nsec / NSEC_PER_USEC);
2634     -
2635     - nsec = shadow->system_timestamp - processed_system_time;
2636     - __normalize_time(&sec, &nsec);
2637     - usec += (long)nsec / NSEC_PER_USEC;
2638     -
2639     - if (unlikely(!time_values_up_to_date(cpu))) {
2640     - /*
2641     - * We may have blocked for a long time,
2642     - * rendering our calculations invalid
2643     - * (e.g. the time delta may have
2644     - * overflowed). Detect that and recalculate
2645     - * with fresh values.
2646     - */
2647     - get_time_values_from_xen(cpu);
2648     - continue;
2649     - }
2650     - } while (read_seqretry(&xtime_lock, seq) ||
2651     - (local_time_version != shadow->version));
2652     -
2653     - put_cpu();
2654     -
2655     - while (usec >= USEC_PER_SEC) {
2656     - usec -= USEC_PER_SEC;
2657     - sec++;
2658     - }
2659     -
2660     - spin_lock_irqsave(&monotonic_lock, flags);
2661     - if ((sec > monotonic_tv.tv_sec) ||
2662     - ((sec == monotonic_tv.tv_sec) && (usec > monotonic_tv.tv_usec)))
2663     - {
2664     - monotonic_tv.tv_sec = sec;
2665     - monotonic_tv.tv_usec = usec;
2666     - } else {
2667     - sec = monotonic_tv.tv_sec;
2668     - usec = monotonic_tv.tv_usec;
2669     - }
2670     - spin_unlock_irqrestore(&monotonic_lock, flags);
2671     -
2672     - tv->tv_sec = sec;
2673     - tv->tv_usec = usec;
2674     -}
2675     -
2676     -EXPORT_SYMBOL(do_gettimeofday);
2677     -
2678     -int do_settimeofday(struct timespec *tv)
2679     -{
2680     - time_t sec;
2681     - s64 nsec;
2682     - unsigned int cpu;
2683     - struct shadow_time_info *shadow;
2684     - struct xen_platform_op op;
2685     -
2686     - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
2687     - return -EINVAL;
2688     -
2689     - cpu = get_cpu();
2690     - shadow = &per_cpu(shadow_time, cpu);
2691     -
2692     - write_seqlock_irq(&xtime_lock);
2693     -
2694     - /*
2695     - * Ensure we don't get blocked for a long time so that our time delta
2696     - * overflows. If that were to happen then our shadow time values would
2697     - * be stale, so we can retry with fresh ones.
2698     - */
2699     - for (;;) {
2700     - nsec = tv->tv_nsec - get_nsec_offset(shadow);
2701     - if (time_values_up_to_date(cpu))
2702     - break;
2703     - get_time_values_from_xen(cpu);
2704     - }
2705     - sec = tv->tv_sec;
2706     - __normalize_time(&sec, &nsec);
2707     -
2708     - if (is_initial_xendomain() && !independent_wallclock) {
2709     - op.cmd = XENPF_settime;
2710     - op.u.settime.secs = sec;
2711     - op.u.settime.nsecs = nsec;
2712     - op.u.settime.system_time = shadow->system_timestamp;
2713     - WARN_ON(HYPERVISOR_platform_op(&op));
2714     - update_wallclock();
2715     - } else if (independent_wallclock) {
2716     - nsec -= shadow->system_timestamp;
2717     - __normalize_time(&sec, &nsec);
2718     - __update_wallclock(sec, nsec);
2719     - }
2720     -
2721     - /* Reset monotonic gettimeofday() timeval. */
2722     - spin_lock(&monotonic_lock);
2723     - monotonic_tv.tv_sec = 0;
2724     - monotonic_tv.tv_usec = 0;
2725     - spin_unlock(&monotonic_lock);
2726     -
2727     - write_sequnlock_irq(&xtime_lock);
2728     -
2729     - put_cpu();
2730     -
2731     - clock_was_set();
2732     - return 0;
2733     -}
2734     -
2735     -EXPORT_SYMBOL(do_settimeofday);
2736     -
2737     -#endif
2738     -
2739     static void sync_xen_wallclock(unsigned long dummy);
2740     static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0);
2741     static void sync_xen_wallclock(unsigned long dummy)
2742     @@ -544,15 +392,7 @@
2743     return retval;
2744     }
2745    
2746     -#ifdef CONFIG_X86_64
2747     -/* monotonic_clock(): returns # of nanoseconds passed since time_init()
2748     - * Note: This function is required to return accurate
2749     - * time even in the absence of multiple timer ticks.
2750     - */
2751     -unsigned long long monotonic_clock(void)
2752     -#else
2753     unsigned long long sched_clock(void)
2754     -#endif
2755     {
2756     unsigned int cpu = get_cpu();
2757     struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
2758     @@ -572,21 +412,18 @@
2759    
2760     return time;
2761     }
2762     -#ifdef CONFIG_X86_64
2763     -EXPORT_SYMBOL(monotonic_clock);
2764     -
2765     -unsigned long long sched_clock(void)
2766     -{
2767     - return monotonic_clock();
2768     -}
2769     -#endif
2770    
2771     unsigned long profile_pc(struct pt_regs *regs)
2772     {
2773     unsigned long pc = instruction_pointer(regs);
2774    
2775     #if defined(CONFIG_SMP) || defined(__x86_64__)
2776     - if (!user_mode_vm(regs) && in_lock_functions(pc)) {
2777     +# ifdef __i386__
2778     + if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->xcs)
2779     +# else
2780     + if (!user_mode(regs)
2781     +# endif
2782     + && in_lock_functions(pc)) {
2783     # ifdef CONFIG_FRAME_POINTER
2784     # ifdef __i386__
2785     return ((unsigned long *)regs->ebp)[1];
2786     @@ -595,14 +432,11 @@
2787     # endif
2788     # else
2789     # ifdef __i386__
2790     - unsigned long *sp;
2791     - if ((regs->xcs & 2) == 0)
2792     - sp = (unsigned long *)&regs->esp;
2793     - else
2794     - sp = (unsigned long *)regs->esp;
2795     + unsigned long *sp = (unsigned long *)&regs->esp;
2796     # else
2797     unsigned long *sp = (unsigned long *)regs->rsp;
2798     # endif
2799     +
2800     /* Return address is either directly at stack pointer
2801     or above a saved eflags. Eflags has bits 22-31 zero,
2802     kernel addresses don't. */
2803     @@ -755,19 +589,6 @@
2804     return IRQ_HANDLED;
2805     }
2806    
2807     -#ifndef CONFIG_X86_64
2808     -
2809     -void tsc_init(void)
2810     -{
2811     - init_cpu_khz();
2812     - printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
2813     - cpu_khz / 1000, cpu_khz % 1000);
2814     -
2815     - use_tsc_delay();
2816     -}
2817     -
2818     -#include <linux/clocksource.h>
2819     -
2820     void mark_tsc_unstable(void)
2821     {
2822     #ifndef CONFIG_XEN /* XXX Should tell the hypervisor about this fact. */
2823     @@ -821,21 +642,9 @@
2824     .mask = CLOCKSOURCE_MASK(64),
2825     .mult = 1 << XEN_SHIFT, /* time directly in nanoseconds */
2826     .shift = XEN_SHIFT,
2827     - .is_continuous = 1,
2828     + .flags = CLOCK_SOURCE_IS_CONTINUOUS,
2829     };
2830    
2831     -static int __init init_xen_clocksource(void)
2832     -{
2833     - clocksource_xen.mult = clocksource_khz2mult(cpu_khz,
2834     - clocksource_xen.shift);
2835     -
2836     - return clocksource_register(&clocksource_xen);
2837     -}
2838     -
2839     -module_init(init_xen_clocksource);
2840     -
2841     -#endif
2842     -
2843     static void init_missing_ticks_accounting(unsigned int cpu)
2844     {
2845     struct vcpu_register_runstate_memory_area area;
2846     @@ -856,7 +665,7 @@
2847     }
2848    
2849     /* not static: needed by APM */
2850     -unsigned long get_cmos_time(void)
2851     +unsigned long read_persistent_clock(void)
2852     {
2853     unsigned long retval;
2854     unsigned long flags;
2855     @@ -869,11 +678,11 @@
2856    
2857     return retval;
2858     }
2859     -EXPORT_SYMBOL(get_cmos_time);
2860    
2861     static void sync_cmos_clock(unsigned long dummy);
2862    
2863     static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
2864     +int no_sync_cmos_clock;
2865    
2866     static void sync_cmos_clock(unsigned long dummy)
2867     {
2868     @@ -917,7 +726,8 @@
2869    
2870     void notify_arch_cmos_timer(void)
2871     {
2872     - mod_timer(&sync_cmos_timer, jiffies + 1);
2873     + if (!no_sync_cmos_clock)
2874     + mod_timer(&sync_cmos_timer, jiffies + 1);
2875     mod_timer(&sync_xen_wallclock_timer, jiffies + 1);
2876     }
2877    
2878     @@ -950,29 +760,11 @@
2879    
2880     device_initcall(time_init_device);
2881    
2882     -#ifdef CONFIG_HPET_TIMER
2883     extern void (*late_time_init)(void);
2884     -/* Duplicate of time_init() below, with hpet_enable part added */
2885     -static void __init hpet_time_init(void)
2886     -{
2887     - struct timespec ts;
2888     - ts.tv_sec = get_cmos_time();
2889     - ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
2890     -
2891     - do_settimeofday(&ts);
2892     -
2893     - if ((hpet_enable() >= 0) && hpet_use_timer) {
2894     - printk("Using HPET for base-timer\n");
2895     - }
2896     -
2897     - do_time_init();
2898     -}
2899     -#endif
2900    
2901     /* Dynamically-mapped IRQ. */
2902     DEFINE_PER_CPU(int, timer_irq);
2903    
2904     -extern void (*late_time_init)(void);
2905     static void setup_cpu0_timer_irq(void)
2906     {
2907     per_cpu(timer_irq, 0) =
2908     @@ -992,16 +784,9 @@
2909    
2910     void __init time_init(void)
2911     {
2912     -#ifdef CONFIG_HPET_TIMER
2913     - if (is_hpet_capable()) {
2914     - /*
2915     - * HPET initialization needs to do memory-mapped io. So, let
2916     - * us do a late initialization after mem_init().
2917     - */
2918     - late_time_init = hpet_time_init;
2919     - return;
2920     - }
2921     -#endif
2922     + init_cpu_khz();
2923     + printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
2924     + cpu_khz / 1000, cpu_khz % 1000);
2925    
2926     switch (HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, 0,
2927     &xen_set_periodic_tick)) {
2928     @@ -1020,18 +805,12 @@
2929     per_cpu(processed_system_time, 0) = processed_system_time;
2930     init_missing_ticks_accounting(0);
2931    
2932     - update_wallclock();
2933     + clocksource_register(&clocksource_xen);
2934    
2935     -#ifdef CONFIG_X86_64
2936     - init_cpu_khz();
2937     - printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
2938     - cpu_khz / 1000, cpu_khz % 1000);
2939     + update_wallclock();
2940    
2941     - vxtime.mode = VXTIME_TSC;
2942     - vxtime.quot = (1000000L << 32) / vxtime_hz;
2943     - vxtime.tsc_quot = (1000L << 32) / cpu_khz;
2944     - sync_core();
2945     - rdtscll(vxtime.last_tsc);
2946     +#ifndef CONFIG_X86_64
2947     + use_tsc_delay();
2948     #endif
2949    
2950     /* Cannot request_irq() until kmem is initialised. */
2951     @@ -1277,7 +1056,7 @@
2952     };
2953     static int __init xen_sysctl_init(void)
2954     {
2955     - (void)register_sysctl_table(xen_table, 0);
2956     + (void)register_sysctl_table(xen_table);
2957     return 0;
2958     }
2959     __initcall(xen_sysctl_init);
2960     --- a/arch/x86/kernel/traps_32-xen.c
2961     +++ b/arch/x86/kernel/traps_32-xen.c
2962     @@ -100,6 +100,7 @@
2963     asmlinkage void machine_check(void);
2964    
2965     int kstack_depth_to_print = 24;
2966     +static unsigned int code_bytes = 64;
2967     ATOMIC_NOTIFIER_HEAD(i386die_chain);
2968    
2969     int register_die_notifier(struct notifier_block *nb)
2970     @@ -297,10 +298,11 @@
2971     int i;
2972     int in_kernel = 1;
2973     unsigned long esp;
2974     - unsigned short ss;
2975     + unsigned short ss, gs;
2976    
2977     esp = (unsigned long) (&regs->esp);
2978     savesegment(ss, ss);
2979     + savesegment(gs, gs);
2980     if (user_mode_vm(regs)) {
2981     in_kernel = 0;
2982     esp = regs->esp;
2983     @@ -319,8 +321,8 @@
2984     regs->eax, regs->ebx, regs->ecx, regs->edx);
2985     printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
2986     regs->esi, regs->edi, regs->ebp, esp);
2987     - printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n",
2988     - regs->xds & 0xffff, regs->xes & 0xffff, ss);
2989     + printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
2990     + regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
2991     printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
2992     TASK_COMM_LEN, current->comm, current->pid,
2993     current_thread_info(), current, current->thread_info);
2994     @@ -330,7 +332,8 @@
2995     */
2996     if (in_kernel) {
2997     u8 *eip;
2998     - int code_bytes = 64;
2999     + unsigned int code_prologue = code_bytes * 43 / 64;
3000     + unsigned int code_len = code_bytes;
3001     unsigned char c;
3002    
3003     printk("\n" KERN_EMERG "Stack: ");
3004     @@ -338,14 +341,14 @@
3005    
3006     printk(KERN_EMERG "Code: ");
3007    
3008     - eip = (u8 *)regs->eip - 43;
3009     + eip = (u8 *)regs->eip - code_prologue;
3010     if (eip < (u8 *)PAGE_OFFSET ||
3011     probe_kernel_address(eip, c)) {
3012     /* try starting at EIP */
3013     eip = (u8 *)regs->eip;
3014     - code_bytes = 32;
3015     + code_len = code_len - code_prologue + 1;
3016     }
3017     - for (i = 0; i < code_bytes; i++, eip++) {
3018     + for (i = 0; i < code_len; i++, eip++) {
3019     if (eip < (u8 *)PAGE_OFFSET ||
3020     probe_kernel_address(eip, c)) {
3021     printk(" Bad EIP value.");
3022     @@ -1134,3 +1137,13 @@
3023     return 1;
3024     }
3025     __setup("kstack=", kstack_setup);
3026     +
3027     +static int __init code_bytes_setup(char *s)
3028     +{
3029     + code_bytes = simple_strtoul(s, NULL, 0);
3030     + if (code_bytes > 8192)
3031     + code_bytes = 8192;
3032     +
3033     + return 1;
3034     +}
3035     +__setup("code_bytes=", code_bytes_setup);
3036     --- a/arch/x86/kernel/vsyscall_64-xen.c
3037     +++ b/arch/x86/kernel/vsyscall_64-xen.c
3038     @@ -26,6 +26,7 @@
3039     #include <linux/seqlock.h>
3040     #include <linux/jiffies.h>
3041     #include <linux/sysctl.h>
3042     +#include <linux/clocksource.h>
3043     #include <linux/getcpu.h>
3044     #include <linux/cpu.h>
3045     #include <linux/smp.h>
3046     @@ -34,6 +35,7 @@
3047     #include <asm/vsyscall.h>
3048     #include <asm/pgtable.h>
3049     #include <asm/page.h>
3050     +#include <asm/unistd.h>
3051     #include <asm/fixmap.h>
3052     #include <asm/errno.h>
3053     #include <asm/io.h>
3054     @@ -44,56 +46,41 @@
3055     #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
3056     #define __syscall_clobber "r11","rcx","memory"
3057    
3058     -int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
3059     -seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
3060     +struct vsyscall_gtod_data_t {
3061     + seqlock_t lock;
3062     + int sysctl_enabled;
3063     + struct timeval wall_time_tv;
3064     + struct timezone sys_tz;
3065     + cycle_t offset_base;
3066     + struct clocksource clock;
3067     +};
3068     int __vgetcpu_mode __section_vgetcpu_mode;
3069    
3070     -#include <asm/unistd.h>
3071     -
3072     -static __always_inline void timeval_normalize(struct timeval * tv)
3073     +struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data =
3074     {
3075     - time_t __sec;
3076     -
3077     - __sec = tv->tv_usec / 1000000;
3078     - if (__sec) {
3079     - tv->tv_usec %= 1000000;
3080     - tv->tv_sec += __sec;
3081     - }
3082     -}
3083     + .lock = SEQLOCK_UNLOCKED,
3084     + .sysctl_enabled = 1,
3085     +};
3086    
3087     -static __always_inline void do_vgettimeofday(struct timeval * tv)
3088     +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
3089     {
3090     - long sequence, t;
3091     - unsigned long sec, usec;
3092     + unsigned long flags;
3093    
3094     - do {
3095     - sequence = read_seqbegin(&__xtime_lock);
3096     -
3097     - sec = __xtime.tv_sec;
3098     - usec = __xtime.tv_nsec / 1000;
3099     -
3100     - if (__vxtime.mode != VXTIME_HPET) {
3101     - t = get_cycles_sync();
3102     - if (t < __vxtime.last_tsc)
3103     - t = __vxtime.last_tsc;
3104     - usec += ((t - __vxtime.last_tsc) *
3105     - __vxtime.tsc_quot) >> 32;
3106     - /* See comment in x86_64 do_gettimeofday. */
3107     - } else {
3108     - usec += ((readl((void __iomem *)
3109     - fix_to_virt(VSYSCALL_HPET) + 0xf0) -
3110     - __vxtime.last) * __vxtime.quot) >> 32;
3111     - }
3112     - } while (read_seqretry(&__xtime_lock, sequence));
3113     -
3114     - tv->tv_sec = sec + usec / 1000000;
3115     - tv->tv_usec = usec % 1000000;
3116     + write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
3117     + /* copy vsyscall data */
3118     + vsyscall_gtod_data.clock = *clock;
3119     + vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec;
3120     + vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000;
3121     + vsyscall_gtod_data.sys_tz = sys_tz;
3122     + write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
3123     }
3124    
3125     -/* RED-PEN may want to readd seq locking, but then the variable should be write-once. */
3126     +/* RED-PEN may want to readd seq locking, but then the variable should be
3127     + * write-once.
3128     + */
3129     static __always_inline void do_get_tz(struct timezone * tz)
3130     {
3131     - *tz = __sys_tz;
3132     + *tz = __vsyscall_gtod_data.sys_tz;
3133     }
3134    
3135     static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
3136     @@ -101,7 +88,8 @@
3137     int ret;
3138     asm volatile("vsysc2: syscall"
3139     : "=a" (ret)
3140     - : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber );
3141     + : "0" (__NR_gettimeofday),"D" (tv),"S" (tz)
3142     + : __syscall_clobber );
3143     return ret;
3144     }
3145    
3146     @@ -114,10 +102,44 @@
3147     return secs;
3148     }
3149    
3150     +static __always_inline void do_vgettimeofday(struct timeval * tv)
3151     +{
3152     + cycle_t now, base, mask, cycle_delta;
3153     + unsigned long seq, mult, shift, nsec_delta;
3154     + cycle_t (*vread)(void);
3155     + do {
3156     + seq = read_seqbegin(&__vsyscall_gtod_data.lock);
3157     +
3158     + vread = __vsyscall_gtod_data.clock.vread;
3159     + if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) {
3160     + gettimeofday(tv,NULL);
3161     + return;
3162     + }
3163     + now = vread();
3164     + base = __vsyscall_gtod_data.clock.cycle_last;
3165     + mask = __vsyscall_gtod_data.clock.mask;
3166     + mult = __vsyscall_gtod_data.clock.mult;
3167     + shift = __vsyscall_gtod_data.clock.shift;
3168     +
3169     + *tv = __vsyscall_gtod_data.wall_time_tv;
3170     +
3171     + } while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
3172     +
3173     + /* calculate interval: */
3174     + cycle_delta = (now - base) & mask;
3175     + /* convert to nsecs: */
3176     + nsec_delta = (cycle_delta * mult) >> shift;
3177     +
3178     + /* convert to usecs and add to timespec: */
3179     + tv->tv_usec += nsec_delta / NSEC_PER_USEC;
3180     + while (tv->tv_usec > USEC_PER_SEC) {
3181     + tv->tv_sec += 1;
3182     + tv->tv_usec -= USEC_PER_SEC;
3183     + }
3184     +}
3185     +
3186     int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
3187     {
3188     - if (!__sysctl_vsyscall)
3189     - return gettimeofday(tv,tz);
3190     if (tv)
3191     do_vgettimeofday(tv);
3192     if (tz)
3193     @@ -129,11 +151,11 @@
3194     * unlikely */
3195     time_t __vsyscall(1) vtime(time_t *t)
3196     {
3197     - if (!__sysctl_vsyscall)
3198     + if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
3199     return time_syscall(t);
3200     else if (t)
3201     - *t = __xtime.tv_sec;
3202     - return __xtime.tv_sec;
3203     + *t = __vsyscall_gtod_data.wall_time_tv.tv_sec;
3204     + return __vsyscall_gtod_data.wall_time_tv.tv_sec;
3205     }
3206    
3207     /* Fast way to get current CPU and node.
3208     @@ -210,7 +232,7 @@
3209     ret = -ENOMEM;
3210     goto out;
3211     }
3212     - if (!sysctl_vsyscall) {
3213     + if (!vsyscall_gtod_data.sysctl_enabled) {
3214     writew(SYSCALL, map1);
3215     writew(SYSCALL, map2);
3216     } else {
3217     @@ -232,16 +254,17 @@
3218    
3219     static ctl_table kernel_table2[] = {
3220     { .ctl_name = 99, .procname = "vsyscall64",
3221     - .data = &sysctl_vsyscall, .maxlen = sizeof(int), .mode = 0644,
3222     + .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
3223     + .mode = 0644,
3224     .strategy = vsyscall_sysctl_nostrat,
3225     .proc_handler = vsyscall_sysctl_change },
3226     - { 0, }
3227     + {}
3228     };
3229    
3230     static ctl_table kernel_root_table2[] = {
3231     { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555,
3232     .child = kernel_table2 },
3233     - { 0 },
3234     + {}
3235     };
3236    
3237     #endif
3238     @@ -304,14 +327,14 @@
3239     BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
3240     map_vsyscall();
3241     #ifdef CONFIG_XEN
3242     - sysctl_vsyscall = 0; /* disable vgettimeofay() */
3243     + vsyscall_gtod_data.sysctl_enabled = 0; /* disable vgettimeofay() */
3244     if (boot_cpu_has(X86_FEATURE_RDTSCP))
3245     vgetcpu_mode = VGETCPU_RDTSCP;
3246     else
3247     vgetcpu_mode = VGETCPU_LSL;
3248     #endif
3249     #ifdef CONFIG_SYSCTL
3250     - register_sysctl_table(kernel_root_table2, 0);
3251     + register_sysctl_table(kernel_root_table2);
3252     #endif
3253     on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
3254     hotcpu_notifier(cpu_vsyscall_notifier, 0);
3255     --- a/arch/x86/mm/fault_32-xen.c
3256     +++ b/arch/x86/mm/fault_32-xen.c
3257     @@ -46,43 +46,17 @@
3258     }
3259     EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
3260    
3261     -static inline int notify_page_fault(enum die_val val, const char *str,
3262     - struct pt_regs *regs, long err, int trap, int sig)
3263     +static inline int notify_page_fault(struct pt_regs *regs, long err)
3264     {
3265     struct die_args args = {
3266     .regs = regs,
3267     - .str = str,
3268     + .str = "page fault",
3269     .err = err,
3270     - .trapnr = trap,
3271     - .signr = sig
3272     + .trapnr = 14,
3273     + .signr = SIGSEGV
3274     };
3275     - return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
3276     -}
3277     -
3278     -/*
3279     - * Unlock any spinlocks which will prevent us from getting the
3280     - * message out
3281     - */
3282     -void bust_spinlocks(int yes)
3283     -{
3284     - int loglevel_save = console_loglevel;
3285     -
3286     - if (yes) {
3287     - oops_in_progress = 1;
3288     - return;
3289     - }
3290     -#ifdef CONFIG_VT
3291     - unblank_screen();
3292     -#endif
3293     - oops_in_progress = 0;
3294     - /*
3295     - * OK, the message is on the console. Now we call printk()
3296     - * without oops_in_progress set so that printk will give klogd
3297     - * a poke. Hold onto your hats...
3298     - */
3299     - console_loglevel = 15; /* NMI oopser may have shut the console up */
3300     - printk(" ");
3301     - console_loglevel = loglevel_save;
3302     + return atomic_notifier_call_chain(&notify_page_fault_chain,
3303     + DIE_PAGE_FAULT, &args);
3304     }
3305    
3306     /*
3307     @@ -476,8 +450,7 @@
3308     /* Can take a spurious fault if mapping changes R/O -> R/W. */
3309     if (spurious_fault(regs, address, error_code))
3310     return;
3311     - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
3312     - SIGSEGV) == NOTIFY_STOP)
3313     + if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
3314     return;
3315     /*
3316     * Don't take the mm semaphore here. If we fixup a prefetch
3317     @@ -486,8 +459,7 @@
3318     goto bad_area_nosemaphore;
3319     }
3320    
3321     - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
3322     - SIGSEGV) == NOTIFY_STOP)
3323     + if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
3324     return;
3325    
3326     /* It's safe to allow irq's after cr2 has been saved and the vmalloc
3327     --- a/arch/x86/mm/fault_64-xen.c
3328     +++ b/arch/x86/mm/fault_64-xen.c
3329     @@ -56,38 +56,17 @@
3330     }
3331     EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
3332    
3333     -static inline int notify_page_fault(enum die_val val, const char *str,
3334     - struct pt_regs *regs, long err, int trap, int sig)
3335     +static inline int notify_page_fault(struct pt_regs *regs, long err)
3336     {
3337     struct die_args args = {
3338     .regs = regs,
3339     - .str = str,
3340     + .str = "page fault",
3341     .err = err,
3342     - .trapnr = trap,
3343     - .signr = sig
3344     + .trapnr = 14,
3345     + .signr = SIGSEGV
3346     };
3347     - return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
3348     -}
3349     -
3350     -void bust_spinlocks(int yes)
3351     -{
3352     - int loglevel_save = console_loglevel;
3353     - if (yes) {
3354     - oops_in_progress = 1;
3355     - } else {
3356     -#ifdef CONFIG_VT
3357     - unblank_screen();
3358     -#endif
3359     - oops_in_progress = 0;
3360     - /*
3361     - * OK, the message is on the console. Now we call printk()
3362     - * without oops_in_progress set so that printk will give klogd
3363     - * a poke. Hold onto your hats...
3364     - */
3365     - console_loglevel = 15; /* NMI oopser may have shut the console up */
3366     - printk(" ");
3367     - console_loglevel = loglevel_save;
3368     - }
3369     + return atomic_notifier_call_chain(&notify_page_fault_chain,
3370     + DIE_PAGE_FAULT, &args);
3371     }
3372    
3373     /* Sometimes the CPU reports invalid exceptions on prefetch.
3374     @@ -437,8 +416,7 @@
3375     /* Can take a spurious fault if mapping changes R/O -> R/W. */
3376     if (spurious_fault(regs, address, error_code))
3377     return;
3378     - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
3379     - SIGSEGV) == NOTIFY_STOP)
3380     + if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
3381     return;
3382     /*
3383     * Don't take the mm semaphore here. If we fixup a prefetch
3384     @@ -447,8 +425,7 @@
3385     goto bad_area_nosemaphore;
3386     }
3387    
3388     - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
3389     - SIGSEGV) == NOTIFY_STOP)
3390     + if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
3391     return;
3392    
3393     if (likely(regs->eflags & X86_EFLAGS_IF))
3394     --- a/arch/x86/mm/highmem_32-xen.c
3395     +++ b/arch/x86/mm/highmem_32-xen.c
3396     @@ -33,14 +33,16 @@
3397    
3398     /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
3399     pagefault_disable();
3400     +
3401     + idx = type + KM_TYPE_NR*smp_processor_id();
3402     + BUG_ON(!pte_none(*(kmap_pte-idx)));
3403     +
3404     if (!PageHighMem(page))
3405     return page_address(page);
3406    
3407     - idx = type + KM_TYPE_NR*smp_processor_id();
3408     vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
3409     - if (!pte_none(*(kmap_pte-idx)))
3410     - BUG();
3411     set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
3412     + arch_flush_lazy_mmu_mode();
3413    
3414     return (void*) vaddr;
3415     }
3416     @@ -94,6 +96,7 @@
3417     idx = type + KM_TYPE_NR*smp_processor_id();
3418     vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
3419     set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
3420     + arch_flush_lazy_mmu_mode();
3421    
3422     return (void*) vaddr;
3423     }
3424     --- a/arch/x86/mm/init_32-xen.c
3425     +++ b/arch/x86/mm/init_32-xen.c
3426     @@ -68,6 +68,7 @@
3427    
3428     #ifdef CONFIG_X86_PAE
3429     pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
3430     + paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
3431     make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
3432     set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
3433     pud = pud_offset(pgd, 0);
3434     @@ -89,6 +90,7 @@
3435     {
3436     if (pmd_none(*pmd)) {
3437     pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
3438     + paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
3439     make_lowmem_page_readonly(page_table,
3440     XENFEAT_writable_page_tables);
3441     set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
3442     --- a/arch/x86/mm/init_64-xen.c
3443     +++ b/arch/x86/mm/init_64-xen.c
3444     @@ -1111,20 +1111,30 @@
3445     extern int exception_trace, page_fault_trace;
3446    
3447     static ctl_table debug_table2[] = {
3448     - { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
3449     - proc_dointvec },
3450     - { 0, }
3451     + {
3452     + .ctl_name = 99,
3453     + .procname = "exception-trace",
3454     + .data = &exception_trace,
3455     + .maxlen = sizeof(int),
3456     + .mode = 0644,
3457     + .proc_handler = proc_dointvec
3458     + },
3459     + {}
3460     };
3461    
3462     static ctl_table debug_root_table2[] = {
3463     - { .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555,
3464     - .child = debug_table2 },
3465     - { 0 },
3466     + {
3467     + .ctl_name = CTL_DEBUG,
3468     + .procname = "debug",
3469     + .mode = 0555,
3470     + .child = debug_table2
3471     + },
3472     + {}
3473     };
3474    
3475     static __init int x8664_sysctl_init(void)
3476     {
3477     - register_sysctl_table(debug_root_table2, 1);
3478     + register_sysctl_table(debug_root_table2);
3479     return 0;
3480     }
3481     __initcall(x8664_sysctl_init);
3482     --- a/arch/x86/mm/pageattr_64-xen.c
3483     +++ b/arch/x86/mm/pageattr_64-xen.c
3484     @@ -344,8 +344,8 @@
3485     void *adr = page_address(pg);
3486     if (cpu_has_clflush)
3487     cache_flush_page(adr);
3488     - __flush_tlb_one(adr);
3489     }
3490     + __flush_tlb_all();
3491     }
3492    
3493     static inline void flush_map(struct list_head *l)
3494     @@ -370,6 +370,7 @@
3495     pud_t *pud;
3496     pmd_t *pmd;
3497     pte_t large_pte;
3498     + unsigned long pfn;
3499    
3500     pgd = pgd_offset_k(address);
3501     BUG_ON(pgd_none(*pgd));
3502     @@ -377,7 +378,8 @@
3503     BUG_ON(pud_none(*pud));
3504     pmd = pmd_offset(pud, address);
3505     BUG_ON(__pmd_val(*pmd) & _PAGE_PSE);
3506     - large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot);
3507     + pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT;
3508     + large_pte = pfn_pte(pfn, ref_prot);
3509     large_pte = pte_mkhuge(large_pte);
3510     set_pte((pte_t *)pmd, large_pte);
3511     }
3512     --- a/arch/x86/mm/pgtable_32-xen.c
3513     +++ b/arch/x86/mm/pgtable_32-xen.c
3514     @@ -149,6 +149,8 @@
3515     void __init reserve_top_address(unsigned long reserve)
3516     {
3517     BUG_ON(fixmaps > 0);
3518     + printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
3519     + (int)-reserve);
3520     __FIXADDR_TOP = -reserve - PAGE_SIZE;
3521     __VMALLOC_RESERVE += reserve;
3522     }
3523     @@ -252,6 +254,12 @@
3524     swapper_pg_dir + USER_PTRS_PER_PGD,
3525     KERNEL_PGD_PTRS);
3526     memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
3527     +
3528     + /* must happen under lock */
3529     + paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
3530     + __pa(swapper_pg_dir) >> PAGE_SHIFT,
3531     + USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD);
3532     +
3533     pgd_list_add(pgd);
3534     spin_unlock_irqrestore(&pgd_lock, flags);
3535     }
3536     @@ -262,6 +270,7 @@
3537     {
3538     unsigned long flags; /* can be called from interrupt context */
3539    
3540     + paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
3541     spin_lock_irqsave(&pgd_lock, flags);
3542     pgd_list_del(pgd);
3543     spin_unlock_irqrestore(&pgd_lock, flags);
3544     @@ -286,6 +295,7 @@
3545     pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
3546     if (!pmd)
3547     goto out_oom;
3548     + paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
3549     set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
3550     }
3551     return pgd;
3552     @@ -308,6 +318,7 @@
3553     pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
3554     if (!pmd[i])
3555     goto out_oom;
3556     + paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
3557     }
3558    
3559     spin_lock_irqsave(&pgd_lock, flags);
3560     @@ -348,12 +359,17 @@
3561    
3562     out_oom:
3563     if (HAVE_SHARED_KERNEL_PMD) {
3564     - for (i--; i >= 0; i--)
3565     - kmem_cache_free(pmd_cache,
3566     - (void *)__va(pgd_val(pgd[i])-1));
3567     + for (i--; i >= 0; i--) {
3568     + pgd_t pgdent = pgd[i];
3569     + void* pmd = (void *)__va(pgd_val(pgdent)-1);
3570     + paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
3571     + kmem_cache_free(pmd_cache, pmd);
3572     + }
3573     } else {
3574     - for (i--; i >= 0; i--)
3575     + for (i--; i >= 0; i--) {
3576     + paravirt_release_pd(__pa(pmd[i]) >> PAGE_SHIFT);
3577     kmem_cache_free(pmd_cache, pmd[i]);
3578     + }
3579     kfree(pmd);
3580     }
3581     kmem_cache_free(pgd_cache, pgd);
3582     @@ -377,7 +393,9 @@
3583     /* in the PAE case user pgd entries are overwritten before usage */
3584     if (PTRS_PER_PMD > 1) {
3585     for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
3586     - pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
3587     + pgd_t pgdent = pgd[i];
3588     + void* pmd = (void *)__va(pgd_val(pgdent)-1);
3589     + paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
3590     kmem_cache_free(pmd_cache, pmd);
3591     }
3592    
3593     --- a/drivers/char/tpm/tpm_xen.c
3594     +++ b/drivers/char/tpm/tpm_xen.c
3595     @@ -481,7 +481,6 @@
3596    
3597     static struct xenbus_driver tpmfront = {
3598     .name = "vtpm",
3599     - .owner = THIS_MODULE,
3600     .ids = tpmfront_ids,
3601     .probe = tpmfront_probe,
3602     .remove = tpmfront_remove,
3603     @@ -491,9 +490,9 @@
3604     .suspend_cancel = tpmfront_suspend_cancel,
3605     };
3606    
3607     -static void __init init_tpm_xenbus(void)
3608     +static int __init init_tpm_xenbus(void)
3609     {
3610     - xenbus_register_frontend(&tpmfront);
3611     + return xenbus_register_frontend(&tpmfront);
3612     }
3613    
3614     static int tpmif_allocate_tx_buffers(struct tpm_private *tp)
3615     --- a/drivers/xen/balloon/sysfs.c
3616     +++ b/drivers/xen/balloon/sysfs.c
3617     @@ -33,6 +33,7 @@
3618     #include <linux/stat.h>
3619     #include <linux/string.h>
3620     #include <linux/sysdev.h>
3621     +#include <linux/module.h>
3622     #include "common.h"
3623    
3624     #ifdef HAVE_XEN_PLATFORM_COMPAT_H
3625     --- a/drivers/xen/blkback/xenbus.c
3626     +++ b/drivers/xen/blkback/xenbus.c
3627     @@ -519,7 +519,6 @@
3628    
3629     static struct xenbus_driver blkback = {
3630     .name = "vbd",
3631     - .owner = THIS_MODULE,
3632     .ids = blkback_ids,
3633     .probe = blkback_probe,
3634     .remove = blkback_remove,
3635     @@ -529,5 +528,6 @@
3636    
3637     void blkif_xenbus_init(void)
3638     {
3639     - xenbus_register_backend(&blkback);
3640     + if (xenbus_register_backend(&blkback))
3641     + BUG();
3642     }
3643     --- a/drivers/xen/blkfront/blkfront.c
3644     +++ b/drivers/xen/blkfront/blkfront.c
3645     @@ -893,7 +893,6 @@
3646    
3647     static struct xenbus_driver blkfront = {
3648     .name = "vbd",
3649     - .owner = THIS_MODULE,
3650     .ids = blkfront_ids,
3651     .probe = blkfront_probe,
3652     .remove = blkfront_remove,
3653     --- a/drivers/xen/blktap/xenbus.c
3654     +++ b/drivers/xen/blktap/xenbus.c
3655     @@ -463,7 +463,6 @@
3656    
3657     static struct xenbus_driver blktap = {
3658     .name = "tap",
3659     - .owner = THIS_MODULE,
3660     .ids = blktap_ids,
3661     .probe = blktap_probe,
3662     .remove = blktap_remove,
3663     @@ -473,5 +472,6 @@
3664    
3665     void tap_blkif_xenbus_init(void)
3666     {
3667     - xenbus_register_backend(&blktap);
3668     + if (xenbus_register_backend(&blktap))
3669     + BUG();
3670     }
3671     --- a/drivers/xen/core/evtchn.c
3672     +++ b/drivers/xen/core/evtchn.c
3673     @@ -133,7 +133,7 @@
3674     BUG_ON(!test_bit(chn, s->evtchn_mask));
3675    
3676     if (irq != -1)
3677     - set_native_irq_info(irq, cpumask_of_cpu(cpu));
3678     + irq_desc[irq].affinity = cpumask_of_cpu(cpu);
3679    
3680     clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]);
3681     set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]);
3682     @@ -146,7 +146,7 @@
3683    
3684     /* By default all event channels notify CPU#0. */
3685     for (i = 0; i < NR_IRQS; i++)
3686     - set_native_irq_info(i, cpumask_of_cpu(0));
3687     + irq_desc[i].affinity = cpumask_of_cpu(0);
3688    
3689     memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
3690     memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
3691     --- a/drivers/xen/core/smpboot.c
3692     +++ b/drivers/xen/core/smpboot.c
3693     @@ -261,7 +261,7 @@
3694     {
3695     unsigned int cpu;
3696     struct task_struct *idle;
3697     - int apicid, acpiid;
3698     + int apicid;
3699     struct vcpu_get_physid cpu_id;
3700     #ifdef __x86_64__
3701     struct desc_ptr *gdt_descr;
3702     @@ -270,14 +270,8 @@
3703     #endif
3704    
3705     apicid = 0;
3706     - if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) {
3707     + if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0)
3708     apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
3709     - acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
3710     -#ifdef CONFIG_ACPI
3711     - if (acpiid != 0xff)
3712     - x86_acpiid_to_apicid[acpiid] = apicid;
3713     -#endif
3714     - }
3715     boot_cpu_data.apicid = apicid;
3716     cpu_data[0] = boot_cpu_data;
3717    
3718     @@ -333,14 +327,8 @@
3719     XENFEAT_writable_descriptor_tables);
3720    
3721     apicid = cpu;
3722     - if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) {
3723     + if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0)
3724     apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
3725     - acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
3726     -#ifdef CONFIG_ACPI
3727     - if (acpiid != 0xff)
3728     - x86_acpiid_to_apicid[acpiid] = apicid;
3729     -#endif
3730     - }
3731     cpu_data[cpu] = boot_cpu_data;
3732     cpu_data[cpu].apicid = apicid;
3733    
3734     --- a/drivers/xen/fbfront/xenfb.c
3735     +++ b/drivers/xen/fbfront/xenfb.c
3736     @@ -856,7 +856,6 @@
3737    
3738     static struct xenbus_driver xenfb_driver = {
3739     .name = "vfb",
3740     - .owner = THIS_MODULE,
3741     .ids = xenfb_ids,
3742     .probe = xenfb_probe,
3743     .remove = xenfb_remove,
3744     --- a/drivers/xen/fbfront/xenkbd.c
3745     +++ b/drivers/xen/fbfront/xenkbd.c
3746     @@ -323,7 +323,6 @@
3747    
3748     static struct xenbus_driver xenkbd_driver = {
3749     .name = "vkbd",
3750     - .owner = THIS_MODULE,
3751     .ids = xenkbd_ids,
3752     .probe = xenkbd_probe,
3753     .remove = xenkbd_remove,
3754     --- a/drivers/xen/netback/xenbus.c
3755     +++ b/drivers/xen/netback/xenbus.c
3756     @@ -437,7 +437,6 @@
3757    
3758     static struct xenbus_driver netback = {
3759     .name = "vif",
3760     - .owner = THIS_MODULE,
3761     .ids = netback_ids,
3762     .probe = netback_probe,
3763     .remove = netback_remove,
3764     @@ -448,5 +447,6 @@
3765    
3766     void netif_xenbus_init(void)
3767     {
3768     - xenbus_register_backend(&netback);
3769     + if (xenbus_register_backend(&netback))
3770     + BUG();
3771     }
3772     --- a/drivers/xen/netfront/netfront.c
3773     +++ b/drivers/xen/netfront/netfront.c
3774     @@ -1893,20 +1893,19 @@
3775     };
3776    
3777     #ifdef CONFIG_SYSFS
3778     -static ssize_t show_rxbuf_min(struct class_device *cd, char *buf)
3779     +static ssize_t show_rxbuf_min(struct device *dev,
3780     + struct device_attribute *attr, char *buf)
3781     {
3782     - struct net_device *netdev = container_of(cd, struct net_device,
3783     - class_dev);
3784     - struct netfront_info *info = netdev_priv(netdev);
3785     + struct netfront_info *info = netdev_priv(to_net_dev(dev));
3786    
3787     return sprintf(buf, "%u\n", info->rx_min_target);
3788     }
3789    
3790     -static ssize_t store_rxbuf_min(struct class_device *cd,
3791     +static ssize_t store_rxbuf_min(struct device *dev,
3792     + struct device_attribute *attr,
3793     const char *buf, size_t len)
3794     {
3795     - struct net_device *netdev = container_of(cd, struct net_device,
3796     - class_dev);
3797     + struct net_device *netdev = to_net_dev(dev);
3798     struct netfront_info *np = netdev_priv(netdev);
3799     char *endp;
3800     unsigned long target;
3801     @@ -1936,20 +1935,19 @@
3802     return len;
3803     }
3804    
3805     -static ssize_t show_rxbuf_max(struct class_device *cd, char *buf)
3806     +static ssize_t show_rxbuf_max(struct device *dev,
3807     + struct device_attribute *attr, char *buf)
3808     {
3809     - struct net_device *netdev = container_of(cd, struct net_device,
3810     - class_dev);
3811     - struct netfront_info *info = netdev_priv(netdev);
3812     + struct netfront_info *info = netdev_priv(to_net_dev(dev));
3813    
3814     return sprintf(buf, "%u\n", info->rx_max_target);
3815     }
3816    
3817     -static ssize_t store_rxbuf_max(struct class_device *cd,
3818     +static ssize_t store_rxbuf_max(struct device *dev,
3819     + struct device_attribute *attr,
3820     const char *buf, size_t len)
3821     {
3822     - struct net_device *netdev = container_of(cd, struct net_device,
3823     - class_dev);
3824     + struct net_device *netdev = to_net_dev(dev);
3825     struct netfront_info *np = netdev_priv(netdev);
3826     char *endp;
3827     unsigned long target;
3828     @@ -1979,16 +1977,15 @@
3829     return len;
3830     }
3831    
3832     -static ssize_t show_rxbuf_cur(struct class_device *cd, char *buf)
3833     +static ssize_t show_rxbuf_cur(struct device *dev,
3834     + struct device_attribute *attr, char *buf)
3835     {
3836     - struct net_device *netdev = container_of(cd, struct net_device,
3837     - class_dev);
3838     - struct netfront_info *info = netdev_priv(netdev);
3839     + struct netfront_info *info = netdev_priv(to_net_dev(dev));
3840    
3841     return sprintf(buf, "%u\n", info->rx_target);
3842     }
3843    
3844     -static const struct class_device_attribute xennet_attrs[] = {
3845     +static struct device_attribute xennet_attrs[] = {
3846     __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
3847     __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
3848     __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
3849     @@ -2000,8 +1997,8 @@
3850     int error = 0;
3851    
3852     for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
3853     - error = class_device_create_file(&netdev->class_dev,
3854     - &xennet_attrs[i]);
3855     + error = device_create_file(&netdev->dev,
3856     + &xennet_attrs[i]);
3857     if (error)
3858     goto fail;
3859     }
3860     @@ -2009,8 +2006,7 @@
3861    
3862     fail:
3863     while (--i >= 0)
3864     - class_device_remove_file(&netdev->class_dev,
3865     - &xennet_attrs[i]);
3866     + device_remove_file(&netdev->dev, &xennet_attrs[i]);
3867     return error;
3868     }
3869    
3870     @@ -2018,10 +2014,8 @@
3871     {
3872     int i;
3873    
3874     - for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
3875     - class_device_remove_file(&netdev->class_dev,
3876     - &xennet_attrs[i]);
3877     - }
3878     + for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++)
3879     + device_remove_file(&netdev->dev, &xennet_attrs[i]);
3880     }
3881    
3882     #endif /* CONFIG_SYSFS */
3883     @@ -2187,7 +2181,6 @@
3884    
3885     static struct xenbus_driver netfront_driver = {
3886     .name = "vif",
3887     - .owner = THIS_MODULE,
3888     .ids = netfront_ids,
3889     .probe = netfront_probe,
3890     .remove = __devexit_p(netfront_remove),
3891     --- a/drivers/xen/pciback/xenbus.c
3892     +++ b/drivers/xen/pciback/xenbus.c
3893     @@ -663,7 +663,6 @@
3894    
3895     static struct xenbus_driver xenbus_pciback_driver = {
3896     .name = "pciback",
3897     - .owner = THIS_MODULE,
3898     .ids = xenpci_ids,
3899     .probe = pciback_xenbus_probe,
3900     .remove = pciback_xenbus_remove,
3901     --- a/drivers/xen/pcifront/xenbus.c
3902     +++ b/drivers/xen/pcifront/xenbus.c
3903     @@ -435,7 +435,6 @@
3904    
3905     static struct xenbus_driver xenbus_pcifront_driver = {
3906     .name = "pcifront",
3907     - .owner = THIS_MODULE,
3908     .ids = xenpci_ids,
3909     .probe = pcifront_xenbus_probe,
3910     .remove = pcifront_xenbus_remove,
3911     --- a/drivers/xen/tpmback/common.h
3912     +++ b/drivers/xen/tpmback/common.h
3913     @@ -54,11 +54,11 @@
3914    
3915     void tpmif_disconnect_complete(tpmif_t * tpmif);
3916     tpmif_t *tpmif_find(domid_t domid, struct backend_info *bi);
3917     -void tpmif_interface_init(void);
3918     +int tpmif_interface_init(void);
3919     void tpmif_interface_exit(void);
3920     void tpmif_schedule_work(tpmif_t * tpmif);
3921     void tpmif_deschedule_work(tpmif_t * tpmif);
3922     -void tpmif_xenbus_init(void);
3923     +int tpmif_xenbus_init(void);
3924     void tpmif_xenbus_exit(void);
3925     int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn);
3926     irqreturn_t tpmif_be_int(int irq, void *dev_id);
3927     --- a/drivers/xen/tpmback/interface.c
3928     +++ b/drivers/xen/tpmback/interface.c
3929     @@ -156,13 +156,14 @@
3930     free_tpmif(tpmif);
3931     }
3932    
3933     -void __init tpmif_interface_init(void)
3934     +int __init tpmif_interface_init(void)
3935     {
3936     tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof (tpmif_t),
3937     0, 0, NULL, NULL);
3938     + return tpmif_cachep ? 0 : -ENOMEM;
3939     }
3940    
3941     -void __exit tpmif_interface_exit(void)
3942     +void tpmif_interface_exit(void)
3943     {
3944     kmem_cache_destroy(tpmif_cachep);
3945     }
3946     --- a/drivers/xen/tpmback/tpmback.c
3947     +++ b/drivers/xen/tpmback/tpmback.c
3948     @@ -923,22 +923,30 @@
3949     spin_lock_init(&tpm_schedule_list_lock);
3950     INIT_LIST_HEAD(&tpm_schedule_list);
3951    
3952     - tpmif_interface_init();
3953     - tpmif_xenbus_init();
3954     + rc = tpmif_interface_init();
3955     + if (!rc) {
3956     + rc = tpmif_xenbus_init();
3957     + if (rc)
3958     + tpmif_interface_exit();
3959     + }
3960     + if (rc) {
3961     + misc_deregister(&vtpms_miscdevice);
3962     + return rc;
3963     + }
3964    
3965     printk(KERN_ALERT "Successfully initialized TPM backend driver.\n");
3966    
3967     return 0;
3968     }
3969     -
3970     module_init(tpmback_init);
3971    
3972     -void __exit tpmback_exit(void)
3973     +static void __exit tpmback_exit(void)
3974     {
3975     vtpm_release_packets(NULL, 0);
3976     tpmif_xenbus_exit();
3977     tpmif_interface_exit();
3978     misc_deregister(&vtpms_miscdevice);
3979     }
3980     +module_exit(tpmback_exit)
3981    
3982     MODULE_LICENSE("Dual BSD/GPL");
3983     --- a/drivers/xen/tpmback/xenbus.c
3984     +++ b/drivers/xen/tpmback/xenbus.c
3985     @@ -270,7 +270,6 @@
3986    
3987     static struct xenbus_driver tpmback = {
3988     .name = "vtpm",
3989     - .owner = THIS_MODULE,
3990     .ids = tpmback_ids,
3991     .probe = tpmback_probe,
3992     .remove = tpmback_remove,
3993     @@ -278,9 +277,9 @@
3994     };
3995    
3996    
3997     -void tpmif_xenbus_init(void)
3998     +int tpmif_xenbus_init(void)
3999     {
4000     - xenbus_register_backend(&tpmback);
4001     + return xenbus_register_backend(&tpmback);
4002     }
4003    
4004     void tpmif_xenbus_exit(void)
4005     --- a/drivers/xen/xenbus/xenbus_probe.c
4006     +++ b/drivers/xen/xenbus/xenbus_probe.c
4007     @@ -362,7 +362,9 @@
4008     }
4009    
4010     int xenbus_register_driver_common(struct xenbus_driver *drv,
4011     - struct xen_bus_type *bus)
4012     + struct xen_bus_type *bus,
4013     + struct module *owner,
4014     + const char *mod_name)
4015     {
4016     int ret;
4017    
4018     @@ -372,7 +374,10 @@
4019     drv->driver.name = drv->name;
4020     drv->driver.bus = &bus->bus;
4021     #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
4022     - drv->driver.owner = drv->owner;
4023     + drv->driver.owner = owner;
4024     +#endif
4025     +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21)
4026     + drv->driver.mod_name = mod_name;
4027     #endif
4028     #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
4029     drv->driver.probe = xenbus_dev_probe;
4030     @@ -386,13 +391,15 @@
4031     return ret;
4032     }
4033    
4034     -int xenbus_register_frontend(struct xenbus_driver *drv)
4035     +int __xenbus_register_frontend(struct xenbus_driver *drv,
4036     + struct module *owner, const char *mod_name)
4037     {
4038     int ret;
4039    
4040     drv->read_otherend_details = read_backend_details;
4041    
4042     - ret = xenbus_register_driver_common(drv, &xenbus_frontend);
4043     + ret = xenbus_register_driver_common(drv, &xenbus_frontend,
4044     + owner, mod_name);
4045     if (ret)
4046     return ret;
4047    
4048     @@ -401,7 +408,7 @@
4049    
4050     return 0;
4051     }
4052     -EXPORT_SYMBOL_GPL(xenbus_register_frontend);
4053     +EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
4054    
4055     void xenbus_unregister_driver(struct xenbus_driver *drv)
4056     {
4057     --- a/drivers/xen/xenbus/xenbus_probe.h
4058     +++ b/drivers/xen/xenbus/xenbus_probe.h
4059     @@ -63,7 +63,9 @@
4060     extern int xenbus_dev_probe(struct device *_dev);
4061     extern int xenbus_dev_remove(struct device *_dev);
4062     extern int xenbus_register_driver_common(struct xenbus_driver *drv,
4063     - struct xen_bus_type *bus);
4064     + struct xen_bus_type *bus,
4065     + struct module *owner,
4066     + const char *mod_name);
4067     extern int xenbus_probe_node(struct xen_bus_type *bus,
4068     const char *type,
4069     const char *nodename);
4070     --- a/drivers/xen/xenbus/xenbus_probe_backend.c
4071     +++ b/drivers/xen/xenbus/xenbus_probe_backend.c
4072     @@ -172,13 +172,15 @@
4073     return 0;
4074     }
4075    
4076     -int xenbus_register_backend(struct xenbus_driver *drv)
4077     +int __xenbus_register_backend(struct xenbus_driver *drv,
4078     + struct module *owner, const char *mod_name)
4079     {
4080     drv->read_otherend_details = read_frontend_details;
4081    
4082     - return xenbus_register_driver_common(drv, &xenbus_backend);
4083     + return xenbus_register_driver_common(drv, &xenbus_backend,
4084     + owner, mod_name);
4085     }
4086     -EXPORT_SYMBOL_GPL(xenbus_register_backend);
4087     +EXPORT_SYMBOL_GPL(__xenbus_register_backend);
4088    
4089     /* backend/<typename>/<frontend-uuid>/<name> */
4090     static int xenbus_probe_backend_unit(const char *dir,
4091     --- a/include/asm-x86/i8253.h
4092     +++ b/include/asm-x86/i8253.h
4093     @@ -8,10 +8,14 @@
4094    
4095     extern spinlock_t i8253_lock;
4096    
4097     +#ifdef CONFIG_GENERIC_CLOCKEVENTS
4098     +
4099     extern struct clock_event_device *global_clock_event;
4100    
4101     extern void setup_pit_timer(void);
4102    
4103     +#endif
4104     +
4105     #define inb_pit inb_p
4106     #define outb_pit outb_p
4107    
4108     --- a/include/asm-x86/mach-xen/asm/desc_32.h
4109     +++ b/include/asm-x86/mach-xen/asm/desc_32.h
4110     @@ -21,7 +21,7 @@
4111    
4112     extern struct Xgt_desc_struct idt_descr;
4113     DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
4114     -
4115     +extern struct Xgt_desc_struct early_gdt_descr;
4116    
4117     static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
4118     {
4119     --- a/include/asm-x86/mach-xen/asm/dma-mapping_64.h
4120     +++ b/include/asm-x86/mach-xen/asm/dma-mapping_64.h
4121     @@ -9,7 +9,6 @@
4122    
4123     #include <asm/scatterlist.h>
4124     #include <asm/io.h>
4125     -#include <asm/swiotlb.h>
4126    
4127     struct dma_mapping_ops {
4128     int (*mapping_error)(dma_addr_t dma_addr);
4129     @@ -66,6 +65,9 @@
4130    
4131     #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
4132     #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
4133     +
4134     +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
4135     +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
4136    
4137     extern void *dma_alloc_coherent(struct device *dev, size_t size,
4138     dma_addr_t *dma_handle, gfp_t gfp);
4139     --- a/include/asm-x86/mach-xen/asm/e820_64.h
4140     +++ b/include/asm-x86/mach-xen/asm/e820_64.h
4141     @@ -46,6 +46,7 @@
4142     extern void e820_print_map(char *who);
4143     extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
4144     extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
4145     +extern unsigned long e820_hole_size(unsigned long start, unsigned long end);
4146    
4147     extern void e820_setup_gap(struct e820entry *e820, int nr_map);
4148     extern void e820_register_active_regions(int nid,
4149     @@ -56,6 +57,7 @@
4150     extern struct e820map e820;
4151    
4152     extern unsigned ebda_addr, ebda_size;
4153     +extern unsigned long nodemap_addr, nodemap_size;
4154     #endif/*!__ASSEMBLY__*/
4155    
4156     #endif/*__E820_HEADER*/
4157     --- a/include/asm-x86/mach-xen/asm/hw_irq_64.h
4158     +++ b/include/asm-x86/mach-xen/asm/hw_irq_64.h
4159     @@ -31,10 +31,32 @@
4160    
4161     #define IA32_SYSCALL_VECTOR 0x80
4162    
4163     +#ifndef CONFIG_XEN
4164     +
4165     +/* Reserve the lowest usable priority level 0x20 - 0x2f for triggering
4166     + * cleanup after irq migration.
4167     + */
4168     +#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR
4169    
4170     /*
4171     - * Vectors 0x20-0x2f are used for ISA interrupts.
4172     + * Vectors 0x30-0x3f are used for ISA interrupts.
4173     */
4174     +#define IRQ0_VECTOR FIRST_EXTERNAL_VECTOR + 0x10
4175     +#define IRQ1_VECTOR IRQ0_VECTOR + 1
4176     +#define IRQ2_VECTOR IRQ0_VECTOR + 2
4177     +#define IRQ3_VECTOR IRQ0_VECTOR + 3
4178     +#define IRQ4_VECTOR IRQ0_VECTOR + 4
4179     +#define IRQ5_VECTOR IRQ0_VECTOR + 5
4180     +#define IRQ6_VECTOR IRQ0_VECTOR + 6
4181     +#define IRQ7_VECTOR IRQ0_VECTOR + 7
4182     +#define IRQ8_VECTOR IRQ0_VECTOR + 8
4183     +#define IRQ9_VECTOR IRQ0_VECTOR + 9
4184     +#define IRQ10_VECTOR IRQ0_VECTOR + 10
4185     +#define IRQ11_VECTOR IRQ0_VECTOR + 11
4186     +#define IRQ12_VECTOR IRQ0_VECTOR + 12
4187     +#define IRQ13_VECTOR IRQ0_VECTOR + 13
4188     +#define IRQ14_VECTOR IRQ0_VECTOR + 14
4189     +#define IRQ15_VECTOR IRQ0_VECTOR + 15
4190    
4191     /*
4192     * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
4193     @@ -43,7 +65,6 @@
4194     * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
4195     * TLB, reschedule and local APIC vectors are performance-critical.
4196     */
4197     -#ifndef CONFIG_XEN
4198     #define SPURIOUS_APIC_VECTOR 0xff
4199     #define ERROR_APIC_VECTOR 0xfe
4200     #define RESCHEDULE_VECTOR 0xfd
4201     @@ -57,7 +78,6 @@
4202     #define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
4203    
4204     #define NUM_INVALIDATE_TLB_VECTORS 8
4205     -#endif
4206    
4207     /*
4208     * Local APIC timer IRQ vector is on a different priority level,
4209     @@ -68,12 +88,13 @@
4210    
4211     /*
4212     * First APIC vector available to drivers: (vectors 0x30-0xee)
4213     - * we start at 0x31 to spread out vectors evenly between priority
4214     + * we start at 0x41 to spread out vectors evenly between priority
4215     * levels. (0x80 is the syscall vector)
4216     */
4217     -#define FIRST_DEVICE_VECTOR 0x31
4218     +#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2)
4219     #define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */
4220    
4221     +#endif
4222    
4223     #ifndef __ASSEMBLY__
4224     typedef int vector_irq_t[NR_VECTORS];
4225     @@ -93,7 +114,7 @@
4226     extern int i8259A_irq_pending(unsigned int irq);
4227     extern void make_8259A_irq(unsigned int irq);
4228     extern void init_8259A(int aeoi);
4229     -extern void FASTCALL(send_IPI_self(int vector));
4230     +extern void send_IPI_self(int vector);
4231     extern void init_VISWS_APIC_irqs(void);
4232     extern void setup_IO_APIC(void);
4233     extern void disable_IO_APIC(void);
4234     --- a/include/asm-x86/mach-xen/asm/hypervisor.h
4235     +++ b/include/asm-x86/mach-xen/asm/hypervisor.h
4236     @@ -171,7 +171,7 @@
4237     return rc;
4238     }
4239    
4240     -static inline void /*__noreturn*/
4241     +static inline void __noreturn
4242     HYPERVISOR_shutdown(
4243     unsigned int reason)
4244     {
4245     --- a/include/asm-x86/mach-xen/asm/io_32.h
4246     +++ b/include/asm-x86/mach-xen/asm/io_32.h
4247     @@ -232,12 +232,6 @@
4248     #define __ISA_IO_base ((char __iomem *)(fix_to_virt(FIX_ISAMAP_BEGIN)))
4249    
4250     /*
4251     - * Again, i386 does not require mem IO specific function.
4252     - */
4253     -
4254     -#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(b),(c),(d))
4255     -
4256     -/*
4257     * Cache management
4258     *
4259     * This needed for two cases
4260     --- a/include/asm-x86/mach-xen/asm/io_64.h
4261     +++ b/include/asm-x86/mach-xen/asm/io_64.h
4262     @@ -101,7 +101,7 @@
4263    
4264     #define IO_SPACE_LIMIT 0xffff
4265    
4266     -#if defined(__KERNEL__) && __x86_64__
4267     +#if defined(__KERNEL__) && defined(__x86_64__)
4268    
4269     #include <linux/vmalloc.h>
4270    
4271     @@ -267,12 +267,6 @@
4272     */
4273     #define __ISA_IO_base ((char __iomem *)(fix_to_virt(FIX_ISAMAP_BEGIN)))
4274    
4275     -/*
4276     - * Again, x86-64 does not require mem IO specific function.
4277     - */
4278     -
4279     -#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(b),(c),(d))
4280     -
4281     /* Nothing to do */
4282    
4283     #define dma_cache_inv(_start,_size) do { } while (0)
4284     --- a/include/asm-x86/mach-xen/asm/mmu_context_32.h
4285     +++ b/include/asm-x86/mach-xen/asm/mmu_context_32.h
4286     @@ -27,13 +27,13 @@
4287     static inline void __prepare_arch_switch(void)
4288     {
4289     /*
4290     - * Save away %fs. No need to save %gs, as it was saved on the
4291     + * Save away %gs. No need to save %fs, as it was saved on the
4292     * stack on entry. No need to save %es and %ds, as those are
4293     * always kernel segments while inside the kernel.
4294     */
4295     - asm volatile ( "mov %%fs,%0"
4296     - : "=m" (current->thread.fs));
4297     - asm volatile ( "movl %0,%%fs"
4298     + asm volatile ( "mov %%gs,%0"
4299     + : "=m" (current->thread.gs));
4300     + asm volatile ( "movl %0,%%gs"
4301     : : "r" (0) );
4302     }
4303    
4304     @@ -95,7 +95,7 @@
4305     }
4306    
4307     #define deactivate_mm(tsk, mm) \
4308     - asm("movl %0,%%fs": :"r" (0));
4309     + asm("movl %0,%%gs": :"r" (0));
4310    
4311     static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
4312     {
4313     --- a/include/asm-x86/mach-xen/asm/pgalloc_32.h
4314     +++ b/include/asm-x86/mach-xen/asm/pgalloc_32.h
4315     @@ -6,12 +6,23 @@
4316     #include <linux/mm.h> /* for struct page */
4317     #include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */
4318    
4319     -#define pmd_populate_kernel(mm, pmd, pte) \
4320     - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
4321     +#define paravirt_alloc_pt(pfn) do { } while (0)
4322     +#define paravirt_alloc_pd(pfn) do { } while (0)
4323     +#define paravirt_alloc_pd(pfn) do { } while (0)
4324     +#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0)
4325     +#define paravirt_release_pt(pfn) do { } while (0)
4326     +#define paravirt_release_pd(pfn) do { } while (0)
4327     +
4328     +#define pmd_populate_kernel(mm, pmd, pte) \
4329     +do { \
4330     + paravirt_alloc_pt(__pa(pte) >> PAGE_SHIFT); \
4331     + set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \
4332     +} while (0)
4333    
4334     #define pmd_populate(mm, pmd, pte) \
4335     do { \
4336     unsigned long pfn = page_to_pfn(pte); \
4337     + paravirt_alloc_pt(pfn); \
4338     if (test_bit(PG_pinned, &virt_to_page((mm)->pgd)->flags)) { \
4339     if (!PageHighMem(pte)) \
4340     BUG_ON(HYPERVISOR_update_va_mapping( \
4341     @@ -42,7 +53,11 @@
4342    
4343     extern void pte_free(struct page *pte);
4344    
4345     -#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
4346     +#define __pte_free_tlb(tlb,pte) \
4347     +do { \
4348     + paravirt_release_pt(page_to_pfn(pte)); \
4349     + tlb_remove_page((tlb),(pte)); \
4350     +} while (0)
4351    
4352     #ifdef CONFIG_X86_PAE
4353     /*
4354     --- a/include/asm-x86/mach-xen/asm/pgtable_32.h
4355     +++ b/include/asm-x86/mach-xen/asm/pgtable_32.h
4356     @@ -275,6 +275,7 @@
4357     */
4358     #define pte_update(mm, addr, ptep) do { } while (0)
4359     #define pte_update_defer(mm, addr, ptep) do { } while (0)
4360     +#define paravirt_map_pt_hook(slot, va, pfn) do { } while (0)
4361    
4362     /*
4363     * We only update the dirty/accessed state if we set
4364     @@ -490,12 +491,24 @@
4365     #endif
4366    
4367     #if defined(CONFIG_HIGHPTE)
4368     -#define pte_offset_map(dir, address) \
4369     - ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \
4370     - pte_index(address))
4371     -#define pte_offset_map_nested(dir, address) \
4372     - ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + \
4373     - pte_index(address))
4374     +#define pte_offset_map(dir, address) \
4375     +({ \
4376     + pte_t *__ptep; \
4377     + unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \
4378     + __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE0); \
4379     + paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \
4380     + __ptep = __ptep + pte_index(address); \
4381     + __ptep; \
4382     +})
4383     +#define pte_offset_map_nested(dir, address) \
4384     +({ \
4385     + pte_t *__ptep; \
4386     + unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \
4387     + __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE1); \
4388     + paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \
4389     + __ptep = __ptep + pte_index(address); \
4390     + __ptep; \
4391     +})
4392     #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
4393     #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
4394     #else
4395     --- a/include/asm-x86/mach-xen/asm/pgtable_64.h
4396     +++ b/include/asm-x86/mach-xen/asm/pgtable_64.h
4397     @@ -416,15 +416,6 @@
4398     #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
4399     #define mk_pte_huge(entry) (__pte_val(entry) |= _PAGE_PRESENT | _PAGE_PSE)
4400    
4401     -/* physical address -> PTE */
4402     -static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
4403     -{
4404     - unsigned long pteval;
4405     - pteval = physpage | pgprot_val(pgprot);
4406     - pteval &= __supported_pte_mask;
4407     - return __pte(pteval);
4408     -}
4409     -
4410     /* Change flags of a PTE */
4411     static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
4412     {
4413     --- a/include/asm-x86/mach-xen/asm/processor_32.h
4414     +++ b/include/asm-x86/mach-xen/asm/processor_32.h
4415     @@ -431,7 +431,7 @@
4416     .vm86_info = NULL, \
4417     .sysenter_cs = __KERNEL_CS, \
4418     .io_bitmap_ptr = NULL, \
4419     - .gs = __KERNEL_PDA, \
4420     + .fs = __KERNEL_PDA, \
4421     }
4422    
4423     /*
4424     @@ -449,8 +449,8 @@
4425     }
4426    
4427     #define start_thread(regs, new_eip, new_esp) do { \
4428     - __asm__("movl %0,%%fs": :"r" (0)); \
4429     - regs->xgs = 0; \
4430     + __asm__("movl %0,%%gs": :"r" (0)); \
4431     + regs->xfs = 0; \
4432     set_fs(USER_DS); \
4433     regs->xds = __USER_DS; \
4434     regs->xes = __USER_DS; \
4435     --- a/include/asm-x86/mach-xen/asm/segment_32.h
4436     +++ b/include/asm-x86/mach-xen/asm/segment_32.h
4437     @@ -83,14 +83,8 @@
4438     * The GDT has 32 entries
4439     */
4440     #define GDT_ENTRIES 32
4441     -
4442     #define GDT_SIZE (GDT_ENTRIES * 8)
4443    
4444     -/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
4445     -#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
4446     -/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
4447     -#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
4448     -
4449     /* Simple and small GDT entries for booting only */
4450    
4451     #define GDT_ENTRY_BOOT_CS 2
4452     @@ -132,4 +126,21 @@
4453     #define SEGMENT_GDT 0x0
4454    
4455     #define get_kernel_rpl() (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1)
4456     +
4457     +/*
4458     + * Matching rules for certain types of segments.
4459     + */
4460     +
4461     +/* Matches only __KERNEL_CS, ignoring PnP / USER / APM segments */
4462     +#define SEGMENT_IS_KERNEL_CODE(x) (((x) & ~3) == GDT_ENTRY_KERNEL_CS * 8 \
4463     + || ((x) & ~3) == (FLAT_KERNEL_CS & ~3))
4464     +
4465     +/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
4466     +#define SEGMENT_IS_FLAT_CODE(x) (((x) & ~0x13) == GDT_ENTRY_KERNEL_CS * 8 \
4467     + || ((x) & ~3) == (FLAT_KERNEL_CS & ~3) \
4468     + || ((x) & ~3) == (FLAT_USER_CS & ~3))
4469     +
4470     +/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
4471     +#define SEGMENT_IS_PNP_CODE(x) (((x) & ~0x0b) == GDT_ENTRY_PNPBIOS_BASE * 8)
4472     +
4473     #endif
4474     --- a/include/asm-x86/mach-xen/asm/smp_32.h
4475     +++ b/include/asm-x86/mach-xen/asm/smp_32.h
4476     @@ -52,6 +52,11 @@
4477     extern void cpu_uninit(void);
4478     #endif
4479    
4480     +#ifndef CONFIG_PARAVIRT
4481     +#define startup_ipi_hook(phys_apicid, start_eip, start_esp) \
4482     +do { } while (0)
4483     +#endif
4484     +
4485     /*
4486     * This function is needed by all SMP systems. It must _always_ be valid
4487     * from the initial startup. We map APIC_BASE very early in page_setup(),
4488     --- a/include/asm-x86/mach-xen/asm/smp_64.h
4489     +++ b/include/asm-x86/mach-xen/asm/smp_64.h
4490     @@ -7,6 +7,7 @@
4491     #include <linux/threads.h>
4492     #include <linux/cpumask.h>
4493     #include <linux/bitops.h>
4494     +#include <linux/init.h>
4495     extern int disable_apic;
4496    
4497     #ifdef CONFIG_X86_LOCAL_APIC
4498     @@ -73,7 +74,7 @@
4499     extern void __cpu_die(unsigned int cpu);
4500     extern void prefill_possible_map(void);
4501     extern unsigned num_processors;
4502     -extern unsigned disabled_cpus;
4503     +extern unsigned __cpuinitdata disabled_cpus;
4504    
4505     #define NO_PROC_ID 0xFF /* No processor magic marker */
4506    
4507     --- a/include/xen/xenbus.h
4508     +++ b/include/xen/xenbus.h
4509     @@ -93,8 +93,7 @@
4510    
4511     /* A xenbus driver. */
4512     struct xenbus_driver {
4513     - char *name;
4514     - struct module *owner;
4515     + const char *name;
4516     const struct xenbus_device_id *ids;
4517     int (*probe)(struct xenbus_device *dev,
4518     const struct xenbus_device_id *id);
4519     @@ -115,8 +114,25 @@
4520     return container_of(drv, struct xenbus_driver, driver);
4521     }
4522    
4523     -int xenbus_register_frontend(struct xenbus_driver *drv);
4524     -int xenbus_register_backend(struct xenbus_driver *drv);
4525     +int __must_check __xenbus_register_frontend(struct xenbus_driver *drv,
4526     + struct module *owner,
4527     + const char *mod_name);
4528     +
4529     +static inline int __must_check
4530     +xenbus_register_frontend(struct xenbus_driver *drv)
4531     +{
4532     + return __xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME);
4533     +}
4534     +
4535     +int __must_check __xenbus_register_backend(struct xenbus_driver *drv,
4536     + struct module *owner,
4537     + const char *mod_name);
4538     +static inline int __must_check
4539     +xenbus_register_backend(struct xenbus_driver *drv)
4540     +{
4541     + return __xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME);
4542     +}
4543     +
4544     void xenbus_unregister_driver(struct xenbus_driver *drv);
4545    
4546     struct xenbus_transaction
4547     --- a/lib/swiotlb-xen.c
4548     +++ b/lib/swiotlb-xen.c
4549     @@ -138,8 +138,8 @@
4550     * Statically reserve bounce buffer space and initialize bounce buffer data
4551     * structures for the software IO TLB used to implement the PCI DMA API.
4552     */
4553     -void
4554     -swiotlb_init_with_default_size (size_t default_size)
4555     +void __init
4556     +swiotlb_init_with_default_size(size_t default_size)
4557     {
4558     unsigned long i, bytes;
4559     int rc;
4560     @@ -227,7 +227,7 @@
4561     dma_bits);
4562     }
4563    
4564     -void
4565     +void __init
4566     swiotlb_init(void)
4567     {
4568     long ram_end;
4569     @@ -463,7 +463,7 @@
4570     * When the mapping is small enough return a static buffer to limit
4571     * the damage, or panic when the transfer is too big.
4572     */
4573     - printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at "
4574     + printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %zu bytes at "
4575     "device %s\n", (unsigned long)size, dev ? dev->bus_id : "?");
4576    
4577     if (size > io_tlb_overflow && do_panic) {
4578     @@ -608,7 +608,7 @@
4579     sg[0].dma_length = 0;
4580     return 0;
4581     }
4582     - sg->dma_address = (dma_addr_t)virt_to_bus(map);
4583     + sg->dma_address = virt_to_bus(map);
4584     } else
4585     sg->dma_address = dev_addr;
4586     sg->dma_length = sg->length;
4587     @@ -630,8 +630,7 @@
4588    
4589     for (i = 0; i < nelems; i++, sg++)
4590     if (in_swiotlb_aperture(sg->dma_address))
4591     - unmap_single(hwdev,
4592     - (void *)bus_to_virt(sg->dma_address),
4593     + unmap_single(hwdev, bus_to_virt(sg->dma_address),
4594     sg->dma_length, dir);
4595     else
4596     gnttab_dma_unmap_page(sg->dma_address);
4597     @@ -654,8 +653,7 @@
4598    
4599     for (i = 0; i < nelems; i++, sg++)
4600     if (in_swiotlb_aperture(sg->dma_address))
4601     - sync_single(hwdev,
4602     - (void *)bus_to_virt(sg->dma_address),
4603     + sync_single(hwdev, bus_to_virt(sg->dma_address),
4604     sg->dma_length, dir);
4605     }
4606    
4607     @@ -669,8 +667,7 @@
4608    
4609     for (i = 0; i < nelems; i++, sg++)
4610     if (in_swiotlb_aperture(sg->dma_address))
4611     - sync_single(hwdev,
4612     - (void *)bus_to_virt(sg->dma_address),
4613     + sync_single(hwdev, bus_to_virt(sg->dma_address),
4614     sg->dma_length, dir);
4615     }
4616