Magellan Linux

Contents of /trunk/kernel26-xen/patches-2.6.25-r1/1022-2.6.25-xen-patch-2.6.21.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 609 - (show annotations) (download)
Fri May 23 17:35:37 2008 UTC (15 years, 11 months ago) by niro
File size: 123925 byte(s)
-using opensuse xen patchset, updated kernel configs

1 From: www.kernel.org
2 Subject: Linux 2.6.21
3 Patch-mainline: 2.6.21
4
5 Automatically created from "patches.kernel.org/patch-2.6.21" by xen-port-patches.py
6
7 Acked-by: jbeulich@novell.com
8
9 ---
10 arch/x86/Kconfig | 4
11 arch/x86/ia32/ia32entry-xen.S | 5
12 arch/x86/kernel/Makefile | 4
13 arch/x86/kernel/acpi/sleep_64-xen.c | 6
14 arch/x86/kernel/apic_32-xen.c | 65 ----
15 arch/x86/kernel/cpu/common-xen.c | 14
16 arch/x86/kernel/e820_32-xen.c | 18 -
17 arch/x86/kernel/e820_64-xen.c | 40 ++
18 arch/x86/kernel/entry_32-xen.S | 80 +++--
19 arch/x86/kernel/entry_64-xen.S | 3
20 arch/x86/kernel/genapic_64-xen.c | 4
21 arch/x86/kernel/head64-xen.c | 8
22 arch/x86/kernel/head_32-xen.S | 9
23 arch/x86/kernel/io_apic_32-xen.c | 43 +-
24 arch/x86/kernel/io_apic_64-xen.c | 413 +++++++++++++-------------
25 arch/x86/kernel/irq_32-xen.c | 22 +
26 arch/x86/kernel/irq_64-xen.c | 13
27 arch/x86/kernel/microcode-xen.c | 2
28 arch/x86/kernel/mpparse_32-xen.c | 4
29 arch/x86/kernel/mpparse_64-xen.c | 6
30 arch/x86/kernel/pci-dma_32-xen.c | 2
31 arch/x86/kernel/pci-swiotlb_64-xen.c | 2
32 arch/x86/kernel/pcspeaker.c | 5
33 arch/x86/kernel/process_32-xen.c | 42 +-
34 arch/x86/kernel/process_64-xen.c | 13
35 arch/x86/kernel/setup_32-xen.c | 46 --
36 arch/x86/kernel/setup_64-xen.c | 184 +----------
37 arch/x86/kernel/smp_32-xen.c | 5
38 arch/x86/kernel/time_32-xen.c | 275 +----------------
39 arch/x86/kernel/traps_32-xen.c | 27 +
40 arch/x86/kernel/vsyscall_64-xen.c | 127 ++++---
41 arch/x86/mm/fault_32-xen.c | 44 --
42 arch/x86/mm/fault_64-xen.c | 39 --
43 arch/x86/mm/highmem_32-xen.c | 9
44 arch/x86/mm/init_32-xen.c | 2
45 arch/x86/mm/init_64-xen.c | 24 +
46 arch/x86/mm/pageattr_64-xen.c | 6
47 arch/x86/mm/pgtable_32-xen.c | 28 +
48 drivers/char/tpm/tpm_xen.c | 5
49 drivers/xen/balloon/sysfs.c | 1
50 drivers/xen/blkback/xenbus.c | 4
51 drivers/xen/blkfront/blkfront.c | 1
52 drivers/xen/blktap/xenbus.c | 4
53 drivers/xen/core/evtchn.c | 4
54 drivers/xen/core/smpboot.c | 18 -
55 drivers/xen/fbfront/xenfb.c | 1
56 drivers/xen/fbfront/xenkbd.c | 1
57 drivers/xen/netback/xenbus.c | 4
58 drivers/xen/netfront/netfront.c | 49 +--
59 drivers/xen/pciback/xenbus.c | 1
60 drivers/xen/pcifront/xenbus.c | 1
61 drivers/xen/tpmback/common.h | 4
62 drivers/xen/tpmback/interface.c | 5
63 drivers/xen/tpmback/tpmback.c | 16 -
64 drivers/xen/tpmback/xenbus.c | 5
65 drivers/xen/xenbus/xenbus_probe.c | 17 -
66 drivers/xen/xenbus/xenbus_probe.h | 4
67 drivers/xen/xenbus/xenbus_probe_backend.c | 8
68 include/asm-x86/i8253.h | 4
69 include/asm-x86/mach-xen/asm/desc_32.h | 2
70 include/asm-x86/mach-xen/asm/dma-mapping_64.h | 4
71 include/asm-x86/mach-xen/asm/e820_64.h | 2
72 include/asm-x86/mach-xen/asm/hw_irq_64.h | 33 +-
73 include/asm-x86/mach-xen/asm/hypervisor.h | 2
74 include/asm-x86/mach-xen/asm/io_32.h | 6
75 include/asm-x86/mach-xen/asm/io_64.h | 8
76 include/asm-x86/mach-xen/asm/mmu_context_32.h | 10
77 include/asm-x86/mach-xen/asm/pgalloc_32.h | 21 +
78 include/asm-x86/mach-xen/asm/pgtable_32.h | 25 +
79 include/asm-x86/mach-xen/asm/pgtable_64.h | 9
80 include/asm-x86/mach-xen/asm/processor_32.h | 6
81 include/asm-x86/mach-xen/asm/segment_32.h | 23 +
82 include/asm-x86/mach-xen/asm/smp_32.h | 5
83 include/asm-x86/mach-xen/asm/smp_64.h | 3
84 include/xen/xenbus.h | 24 +
85 lib/swiotlb-xen.c | 19 -
86 76 files changed, 889 insertions(+), 1113 deletions(-)
87
88 --- a/arch/x86/Kconfig
89 +++ b/arch/x86/Kconfig
90 @@ -48,13 +48,15 @@
91
92 config CLOCKSOURCE_WATCHDOG
93 def_bool y
94 + depends on !X86_XEN
95
96 config GENERIC_CLOCKEVENTS
97 def_bool y
98 + depends on !X86_XEN
99
100 config GENERIC_CLOCKEVENTS_BROADCAST
101 def_bool y
102 - depends on X86_64 || (X86_32 && X86_LOCAL_APIC)
103 + depends on X86_64 || (X86_32 && X86_LOCAL_APIC && !X86_XEN)
104
105 config LOCKDEP_SUPPORT
106 def_bool y
107 --- a/arch/x86/ia32/ia32entry-xen.S
108 +++ b/arch/x86/ia32/ia32entry-xen.S
109 @@ -465,7 +465,7 @@
110 .quad sys32_vm86_warning /* vm86old */
111 .quad compat_sys_wait4
112 .quad sys_swapoff /* 115 */
113 - .quad sys32_sysinfo
114 + .quad compat_sys_sysinfo
115 .quad sys32_ipc
116 .quad sys_fsync
117 .quad stub32_sigreturn
118 @@ -510,7 +510,7 @@
119 .quad sys_sched_yield
120 .quad sys_sched_get_priority_max
121 .quad sys_sched_get_priority_min /* 160 */
122 - .quad sys_sched_rr_get_interval
123 + .quad sys32_sched_rr_get_interval
124 .quad compat_sys_nanosleep
125 .quad sys_mremap
126 .quad sys_setresuid16
127 @@ -668,4 +668,5 @@
128 .quad compat_sys_vmsplice
129 .quad compat_sys_move_pages
130 .quad sys_getcpu
131 + .quad sys_epoll_pwait
132 ia32_syscall_end:
133 --- a/arch/x86/kernel/Makefile
134 +++ b/arch/x86/kernel/Makefile
135 @@ -104,6 +104,6 @@
136 pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o
137 endif
138
139 -disabled-obj-$(CONFIG_XEN) := early-quirks.o i8253.o i8259_$(BITS).o reboot.o \
140 - smpboot_$(BITS).o tsc_$(BITS).o
141 +disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \
142 + smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o
143 %/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) :=
144 --- a/arch/x86/kernel/acpi/sleep_64-xen.c
145 +++ b/arch/x86/kernel/acpi/sleep_64-xen.c
146 @@ -59,7 +59,7 @@
147 unsigned long acpi_video_flags;
148 extern char wakeup_start, wakeup_end;
149
150 -extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
151 +extern unsigned long acpi_copy_wakeup_routine(unsigned long);
152
153 static pgd_t low_ptr;
154
155 @@ -67,8 +67,10 @@
156 {
157 pgd_t *slot0 = pgd_offset(current->mm, 0UL);
158 low_ptr = *slot0;
159 + /* FIXME: We're playing with the current task's page tables here, which
160 + * is potentially dangerous on SMP systems.
161 + */
162 set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET));
163 - WARN_ON(num_online_cpus() != 1);
164 local_flush_tlb();
165 }
166 #endif
167 --- a/arch/x86/kernel/apic_32-xen.c
168 +++ b/arch/x86/kernel/apic_32-xen.c
169 @@ -25,6 +25,8 @@
170 #include <linux/kernel_stat.h>
171 #include <linux/sysdev.h>
172 #include <linux/cpu.h>
173 +#include <linux/clockchips.h>
174 +#include <linux/acpi_pmtmr.h>
175 #include <linux/module.h>
176
177 #include <asm/atomic.h>
178 @@ -56,83 +58,26 @@
179 */
180
181 /*
182 - * Debug level
183 + * Debug level, exported for io_apic.c
184 */
185 int apic_verbosity;
186
187 #ifndef CONFIG_XEN
188 static int modern_apic(void)
189 {
190 - unsigned int lvr, version;
191 /* AMD systems use old APIC versions, so check the CPU */
192 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
193 - boot_cpu_data.x86 >= 0xf)
194 + boot_cpu_data.x86 >= 0xf)
195 return 1;
196 - lvr = apic_read(APIC_LVR);
197 - version = GET_APIC_VERSION(lvr);
198 - return version >= 0x14;
199 + return lapic_get_version() >= 0x14;
200 }
201 #endif /* !CONFIG_XEN */
202
203 -/*
204 - * 'what should we do if we get a hw irq event on an illegal vector'.
205 - * each architecture has to answer this themselves.
206 - */
207 -void ack_bad_irq(unsigned int irq)
208 -{
209 - printk("unexpected IRQ trap at vector %02x\n", irq);
210 - /*
211 - * Currently unexpected vectors happen only on SMP and APIC.
212 - * We _must_ ack these because every local APIC has only N
213 - * irq slots per priority level, and a 'hanging, unacked' IRQ
214 - * holds up an irq slot - in excessive cases (when multiple
215 - * unexpected vectors occur) that might lock up the APIC
216 - * completely.
217 - * But only ack when the APIC is enabled -AK
218 - */
219 - if (cpu_has_apic)
220 - ack_APIC_irq();
221 -}
222 -
223 int get_physical_broadcast(void)
224 {
225 return 0xff;
226 }
227
228 -#ifndef CONFIG_XEN
229 -#ifndef CONFIG_SMP
230 -static void up_apic_timer_interrupt_call(void)
231 -{
232 - int cpu = smp_processor_id();
233 -
234 - /*
235 - * the NMI deadlock-detector uses this.
236 - */
237 - per_cpu(irq_stat, cpu).apic_timer_irqs++;
238 -
239 - smp_local_timer_interrupt();
240 -}
241 -#endif
242 -
243 -void smp_send_timer_broadcast_ipi(void)
244 -{
245 - cpumask_t mask;
246 -
247 - cpus_and(mask, cpu_online_map, timer_bcast_ipi);
248 - if (!cpus_empty(mask)) {
249 -#ifdef CONFIG_SMP
250 - send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
251 -#else
252 - /*
253 - * We can directly call the apic timer interrupt handler
254 - * in UP case. Minus all irq related functions
255 - */
256 - up_apic_timer_interrupt_call();
257 -#endif
258 - }
259 -}
260 -#endif
261 -
262 int setup_profiling_timer(unsigned int multiplier)
263 {
264 return -EINVAL;
265 --- a/arch/x86/kernel/cpu/common-xen.c
266 +++ b/arch/x86/kernel/cpu/common-xen.c
267 @@ -610,7 +610,7 @@
268 struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
269 {
270 memset(regs, 0, sizeof(struct pt_regs));
271 - regs->xgs = __KERNEL_PDA;
272 + regs->xfs = __KERNEL_PDA;
273 return regs;
274 }
275
276 @@ -667,12 +667,12 @@
277 .pcurrent = &init_task,
278 };
279
280 -static inline void set_kernel_gs(void)
281 +static inline void set_kernel_fs(void)
282 {
283 - /* Set %gs for this CPU's PDA. Memory clobber is to create a
284 + /* Set %fs for this CPU's PDA. Memory clobber is to create a
285 barrier with respect to any PDA operations, so the compiler
286 doesn't move any before here. */
287 - asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory");
288 + asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory");
289 }
290
291 /* Initialize the CPU's GDT and PDA. The boot CPU does this for
292 @@ -730,7 +730,7 @@
293 }
294 BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8));
295
296 - set_kernel_gs();
297 + set_kernel_fs();
298 }
299
300 /* Common CPU init for both boot and secondary CPUs */
301 @@ -775,8 +775,8 @@
302 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
303 #endif
304
305 - /* Clear %fs. */
306 - asm volatile ("mov %0, %%fs" : : "r" (0));
307 + /* Clear %gs. */
308 + asm volatile ("mov %0, %%gs" : : "r" (0));
309
310 /* Clear all 6 debug registers: */
311 set_debugreg(0, 0);
312 --- a/arch/x86/kernel/e820_32-xen.c
313 +++ b/arch/x86/kernel/e820_32-xen.c
314 @@ -14,6 +14,7 @@
315 #include <asm/pgtable.h>
316 #include <asm/page.h>
317 #include <asm/e820.h>
318 +#include <asm/setup.h>
319 #include <xen/interface/memory.h>
320
321 #ifdef CONFIG_EFI
322 @@ -157,21 +158,22 @@
323 .flags = IORESOURCE_BUSY | IORESOURCE_IO
324 } };
325
326 -static int romsignature(const unsigned char *x)
327 +#define ROMSIGNATURE 0xaa55
328 +
329 +static int __init romsignature(const unsigned char *rom)
330 {
331 unsigned short sig;
332 - int ret = 0;
333 - if (probe_kernel_address((const unsigned short *)x, sig) == 0)
334 - ret = (sig == 0xaa55);
335 - return ret;
336 +
337 + return probe_kernel_address((const unsigned short *)rom, sig) == 0 &&
338 + sig == ROMSIGNATURE;
339 }
340
341 static int __init romchecksum(unsigned char *rom, unsigned long length)
342 {
343 - unsigned char *p, sum = 0;
344 + unsigned char sum;
345
346 - for (p = rom; p < rom + length; p++)
347 - sum += *p;
348 + for (sum = 0; length; length--)
349 + sum += *rom++;
350 return sum == 0;
351 }
352
353 --- a/arch/x86/kernel/e820_64-xen.c
354 +++ b/arch/x86/kernel/e820_64-xen.c
355 @@ -88,6 +88,13 @@
356 return 1;
357 }
358
359 +#ifdef CONFIG_NUMA
360 + /* NUMA memory to node map */
361 + if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) {
362 + *addrp = nodemap_addr + nodemap_size;
363 + return 1;
364 + }
365 +#endif
366 /* XXX ramdisk image here? */
367 #else
368 if (last < (table_end<<PAGE_SHIFT)) {
369 @@ -215,6 +222,37 @@
370 }
371
372 /*
373 + * Find the hole size in the range.
374 + */
375 +unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
376 +{
377 + unsigned long ram = 0;
378 + int i;
379 +
380 + for (i = 0; i < e820.nr_map; i++) {
381 + struct e820entry *ei = &e820.map[i];
382 + unsigned long last, addr;
383 +
384 + if (ei->type != E820_RAM ||
385 + ei->addr+ei->size <= start ||
386 + ei->addr >= end)
387 + continue;
388 +
389 + addr = round_up(ei->addr, PAGE_SIZE);
390 + if (addr < start)
391 + addr = start;
392 +
393 + last = round_down(ei->addr + ei->size, PAGE_SIZE);
394 + if (last >= end)
395 + last = end;
396 +
397 + if (last > addr)
398 + ram += last - addr;
399 + }
400 + return ((end - start) - ram);
401 +}
402 +
403 +/*
404 * Mark e820 reserved areas as busy for the resource manager.
405 */
406 void __init e820_reserve_resources(struct e820entry *e820, int nr_map)
407 @@ -725,7 +763,7 @@
408 }
409 early_param("memmap", parse_memmap_opt);
410
411 -void finish_e820_parsing(void)
412 +void __init finish_e820_parsing(void)
413 {
414 if (userdef) {
415 printk(KERN_INFO "user-defined physical RAM map:\n");
416 --- a/arch/x86/kernel/entry_32-xen.S
417 +++ b/arch/x86/kernel/entry_32-xen.S
418 @@ -30,7 +30,7 @@
419 * 18(%esp) - %eax
420 * 1C(%esp) - %ds
421 * 20(%esp) - %es
422 - * 24(%esp) - %gs
423 + * 24(%esp) - %fs
424 * 28(%esp) - orig_eax
425 * 2C(%esp) - %eip
426 * 30(%esp) - %cs
427 @@ -102,9 +102,9 @@
428
429 #define SAVE_ALL \
430 cld; \
431 - pushl %gs; \
432 + pushl %fs; \
433 CFI_ADJUST_CFA_OFFSET 4;\
434 - /*CFI_REL_OFFSET gs, 0;*/\
435 + /*CFI_REL_OFFSET fs, 0;*/\
436 pushl %es; \
437 CFI_ADJUST_CFA_OFFSET 4;\
438 /*CFI_REL_OFFSET es, 0;*/\
439 @@ -136,7 +136,7 @@
440 movl %edx, %ds; \
441 movl %edx, %es; \
442 movl $(__KERNEL_PDA), %edx; \
443 - movl %edx, %gs
444 + movl %edx, %fs
445
446 #define RESTORE_INT_REGS \
447 popl %ebx; \
448 @@ -169,9 +169,9 @@
449 2: popl %es; \
450 CFI_ADJUST_CFA_OFFSET -4;\
451 /*CFI_RESTORE es;*/\
452 -3: popl %gs; \
453 +3: popl %fs; \
454 CFI_ADJUST_CFA_OFFSET -4;\
455 - /*CFI_RESTORE gs;*/\
456 + /*CFI_RESTORE fs;*/\
457 .pushsection .fixup,"ax"; \
458 4: movl $0,(%esp); \
459 jmp 1b; \
460 @@ -230,6 +230,7 @@
461 CFI_ADJUST_CFA_OFFSET -4
462 jmp syscall_exit
463 CFI_ENDPROC
464 +END(ret_from_fork)
465
466 /*
467 * Return to user mode is not as complex as all this looks,
468 @@ -261,6 +262,7 @@
469 # int/exception return?
470 jne work_pending
471 jmp restore_all
472 +END(ret_from_exception)
473
474 #ifdef CONFIG_PREEMPT
475 ENTRY(resume_kernel)
476 @@ -275,6 +277,7 @@
477 jz restore_all
478 call preempt_schedule_irq
479 jmp need_resched
480 +END(resume_kernel)
481 #endif
482 CFI_ENDPROC
483
484 @@ -352,16 +355,17 @@
485 movl PT_OLDESP(%esp), %ecx
486 xorl %ebp,%ebp
487 TRACE_IRQS_ON
488 -1: mov PT_GS(%esp), %gs
489 +1: mov PT_FS(%esp), %fs
490 ENABLE_INTERRUPTS_SYSEXIT
491 CFI_ENDPROC
492 .pushsection .fixup,"ax"
493 -2: movl $0,PT_GS(%esp)
494 +2: movl $0,PT_FS(%esp)
495 jmp 1b
496 .section __ex_table,"a"
497 .align 4
498 .long 1b,2b
499 .popsection
500 +ENDPROC(sysenter_entry)
501
502 # pv sysenter call handler stub
503 ENTRY(sysenter_entry_pv)
504 @@ -533,6 +537,7 @@
505 jmp hypercall_page + (__HYPERVISOR_iret * 32)
506 #endif
507 CFI_ENDPROC
508 +ENDPROC(system_call)
509
510 # perform work that needs to be done immediately before resumption
511 ALIGN
512 @@ -578,6 +583,7 @@
513 xorl %edx, %edx
514 call do_notify_resume
515 jmp resume_userspace_sig
516 +END(work_pending)
517
518 # perform syscall exit tracing
519 ALIGN
520 @@ -593,6 +599,7 @@
521 cmpl $(nr_syscalls), %eax
522 jnae syscall_call
523 jmp syscall_exit
524 +END(syscall_trace_entry)
525
526 # perform syscall exit tracing
527 ALIGN
528 @@ -606,6 +613,7 @@
529 movl $1, %edx
530 call do_syscall_trace
531 jmp resume_userspace
532 +END(syscall_exit_work)
533 CFI_ENDPROC
534
535 RING0_INT_FRAME # can't unwind into user space anyway
536 @@ -616,16 +624,18 @@
537 GET_THREAD_INFO(%ebp)
538 movl $-EFAULT,PT_EAX(%esp)
539 jmp resume_userspace
540 +END(syscall_fault)
541
542 syscall_badsys:
543 movl $-ENOSYS,PT_EAX(%esp)
544 jmp resume_userspace
545 +END(syscall_badsys)
546 CFI_ENDPROC
547
548 #ifndef CONFIG_XEN
549 #define FIXUP_ESPFIX_STACK \
550 /* since we are on a wrong stack, we cant make it a C code :( */ \
551 - movl %gs:PDA_cpu, %ebx; \
552 + movl %fs:PDA_cpu, %ebx; \
553 PER_CPU(cpu_gdt_descr, %ebx); \
554 movl GDS_address(%ebx), %ebx; \
555 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
556 @@ -656,9 +666,9 @@
557 ENTRY(interrupt)
558 .text
559
560 -vector=0
561 ENTRY(irq_entries_start)
562 RING0_INT_FRAME
563 +vector=0
564 .rept NR_IRQS
565 ALIGN
566 .if vector
567 @@ -667,11 +677,16 @@
568 1: pushl $~(vector)
569 CFI_ADJUST_CFA_OFFSET 4
570 jmp common_interrupt
571 -.data
572 + .previous
573 .long 1b
574 -.text
575 + .text
576 vector=vector+1
577 .endr
578 +END(irq_entries_start)
579 +
580 +.previous
581 +END(interrupt)
582 +.previous
583
584 /*
585 * the CPU automatically disables interrupts when executing an IRQ vector,
586 @@ -684,6 +699,7 @@
587 movl %esp,%eax
588 call do_IRQ
589 jmp ret_from_intr
590 +ENDPROC(common_interrupt)
591 CFI_ENDPROC
592
593 #define BUILD_INTERRUPT(name, nr) \
594 @@ -696,10 +712,16 @@
595 movl %esp,%eax; \
596 call smp_/**/name; \
597 jmp ret_from_intr; \
598 - CFI_ENDPROC
599 + CFI_ENDPROC; \
600 +ENDPROC(name)
601
602 /* The include is where all of the SMP etc. interrupts come from */
603 #include "entry_arch.h"
604 +
605 +/* This alternate entry is needed because we hijack the apic LVTT */
606 +#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC)
607 +BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR)
608 +#endif
609 #else
610 #define UNWIND_ESPFIX_STACK
611 #endif
612 @@ -710,7 +732,7 @@
613 CFI_ADJUST_CFA_OFFSET 4
614 ALIGN
615 error_code:
616 - /* the function address is in %gs's slot on the stack */
617 + /* the function address is in %fs's slot on the stack */
618 pushl %es
619 CFI_ADJUST_CFA_OFFSET 4
620 /*CFI_REL_OFFSET es, 0*/
621 @@ -739,20 +761,20 @@
622 CFI_ADJUST_CFA_OFFSET 4
623 CFI_REL_OFFSET ebx, 0
624 cld
625 - pushl %gs
626 + pushl %fs
627 CFI_ADJUST_CFA_OFFSET 4
628 - /*CFI_REL_OFFSET gs, 0*/
629 + /*CFI_REL_OFFSET fs, 0*/
630 movl $(__KERNEL_PDA), %ecx
631 - movl %ecx, %gs
632 + movl %ecx, %fs
633 UNWIND_ESPFIX_STACK
634 popl %ecx
635 CFI_ADJUST_CFA_OFFSET -4
636 /*CFI_REGISTER es, ecx*/
637 - movl PT_GS(%esp), %edi # get the function address
638 + movl PT_FS(%esp), %edi # get the function address
639 movl PT_ORIG_EAX(%esp), %edx # get the error code
640 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
641 - mov %ecx, PT_GS(%esp)
642 - /*CFI_REL_OFFSET gs, ES*/
643 + mov %ecx, PT_FS(%esp)
644 + /*CFI_REL_OFFSET fs, ES*/
645 movl $(__USER_DS), %ecx
646 movl %ecx, %ds
647 movl %ecx, %es
648 @@ -839,7 +861,7 @@
649 .byte 0x18 # pop %eax
650 .byte 0x1c # pop %ds
651 .byte 0x20 # pop %es
652 - .byte 0x24,0x24 # pop %gs
653 + .byte 0x24,0x24 # pop %fs
654 .byte 0x28,0x28,0x28 # add $4,%esp
655 .byte 0x2c # iret
656 .byte 0xff,0xff,0xff,0xff # movb $1,1(%esi)
657 @@ -905,6 +927,7 @@
658 CFI_ADJUST_CFA_OFFSET 4
659 jmp error_code
660 CFI_ENDPROC
661 +END(coprocessor_error)
662
663 ENTRY(simd_coprocessor_error)
664 RING0_INT_FRAME
665 @@ -914,6 +937,7 @@
666 CFI_ADJUST_CFA_OFFSET 4
667 jmp error_code
668 CFI_ENDPROC
669 +END(simd_coprocessor_error)
670
671 ENTRY(device_not_available)
672 RING0_INT_FRAME
673 @@ -936,6 +960,7 @@
674 call math_state_restore
675 jmp ret_from_exception
676 CFI_ENDPROC
677 +END(device_not_available)
678
679 #ifndef CONFIG_XEN
680 /*
681 @@ -1097,10 +1122,12 @@
682 .align 4
683 .long 1b,iret_exc
684 .previous
685 +END(native_iret)
686
687 ENTRY(native_irq_enable_sysexit)
688 sti
689 sysexit
690 +END(native_irq_enable_sysexit)
691 #endif
692
693 KPROBE_ENTRY(int3)
694 @@ -1123,6 +1150,7 @@
695 CFI_ADJUST_CFA_OFFSET 4
696 jmp error_code
697 CFI_ENDPROC
698 +END(overflow)
699
700 ENTRY(bounds)
701 RING0_INT_FRAME
702 @@ -1132,6 +1160,7 @@
703 CFI_ADJUST_CFA_OFFSET 4
704 jmp error_code
705 CFI_ENDPROC
706 +END(bounds)
707
708 ENTRY(invalid_op)
709 RING0_INT_FRAME
710 @@ -1141,6 +1170,7 @@
711 CFI_ADJUST_CFA_OFFSET 4
712 jmp error_code
713 CFI_ENDPROC
714 +END(invalid_op)
715
716 ENTRY(coprocessor_segment_overrun)
717 RING0_INT_FRAME
718 @@ -1150,6 +1180,7 @@
719 CFI_ADJUST_CFA_OFFSET 4
720 jmp error_code
721 CFI_ENDPROC
722 +END(coprocessor_segment_overrun)
723
724 ENTRY(invalid_TSS)
725 RING0_EC_FRAME
726 @@ -1157,6 +1188,7 @@
727 CFI_ADJUST_CFA_OFFSET 4
728 jmp error_code
729 CFI_ENDPROC
730 +END(invalid_TSS)
731
732 ENTRY(segment_not_present)
733 RING0_EC_FRAME
734 @@ -1164,6 +1196,7 @@
735 CFI_ADJUST_CFA_OFFSET 4
736 jmp error_code
737 CFI_ENDPROC
738 +END(segment_not_present)
739
740 ENTRY(stack_segment)
741 RING0_EC_FRAME
742 @@ -1171,6 +1204,7 @@
743 CFI_ADJUST_CFA_OFFSET 4
744 jmp error_code
745 CFI_ENDPROC
746 +END(stack_segment)
747
748 KPROBE_ENTRY(general_protection)
749 RING0_EC_FRAME
750 @@ -1186,6 +1220,7 @@
751 CFI_ADJUST_CFA_OFFSET 4
752 jmp error_code
753 CFI_ENDPROC
754 +END(alignment_check)
755
756 ENTRY(divide_error)
757 RING0_INT_FRAME
758 @@ -1195,6 +1230,7 @@
759 CFI_ADJUST_CFA_OFFSET 4
760 jmp error_code
761 CFI_ENDPROC
762 +END(divide_error)
763
764 #ifdef CONFIG_X86_MCE
765 ENTRY(machine_check)
766 @@ -1205,6 +1241,7 @@
767 CFI_ADJUST_CFA_OFFSET 4
768 jmp error_code
769 CFI_ENDPROC
770 +END(machine_check)
771 #endif
772
773 #ifndef CONFIG_XEN
774 @@ -1224,6 +1261,7 @@
775 CFI_ADJUST_CFA_OFFSET 4
776 jmp error_code
777 CFI_ENDPROC
778 +END(spurious_interrupt_bug)
779
780 ENTRY(kernel_thread_helper)
781 pushl $0 # fake return address for unwinder
782 --- a/arch/x86/kernel/entry_64-xen.S
783 +++ b/arch/x86/kernel/entry_64-xen.S
784 @@ -629,6 +629,9 @@
785 ENTRY(call_function_interrupt)
786 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
787 END(call_function_interrupt)
788 +ENTRY(irq_move_cleanup_interrupt)
789 + apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
790 +END(irq_move_cleanup_interrupt)
791 #endif
792
793 ENTRY(apic_timer_interrupt)
794 --- a/arch/x86/kernel/genapic_64-xen.c
795 +++ b/arch/x86/kernel/genapic_64-xen.c
796 @@ -65,8 +65,8 @@
797 * Some x86_64 machines use physical APIC mode regardless of how many
798 * procs/clusters are present (x86_64 ES7000 is an example).
799 */
800 - if (acpi_fadt.revision > FADT2_REVISION_ID)
801 - if (acpi_fadt.force_apic_physical_destination_mode) {
802 + if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID)
803 + if (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) {
804 genapic = &apic_cluster;
805 goto print;
806 }
807 --- a/arch/x86/kernel/head64-xen.c
808 +++ b/arch/x86/kernel/head64-xen.c
809 @@ -42,8 +42,6 @@
810 #define OLD_CL_BASE_ADDR 0x90000
811 #define OLD_CL_OFFSET 0x90022
812
813 -extern char saved_command_line[];
814 -
815 static void __init copy_bootdata(char *real_mode_data)
816 {
817 #ifndef CONFIG_XEN
818 @@ -59,14 +57,14 @@
819 new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
820 }
821 command_line = (char *) ((u64)(new_data));
822 - memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
823 + memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
824 #else
825 int max_cmdline;
826
827 if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
828 max_cmdline = COMMAND_LINE_SIZE;
829 - memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
830 - saved_command_line[max_cmdline-1] = '\0';
831 + memcpy(boot_command_line, xen_start_info->cmd_line, max_cmdline);
832 + boot_command_line[max_cmdline-1] = '\0';
833 #endif
834 }
835
836 --- a/arch/x86/kernel/head_32-xen.S
837 +++ b/arch/x86/kernel/head_32-xen.S
838 @@ -27,6 +27,7 @@
839 #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
840 #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id
841
842 +.section .text.head,"ax",@progbits
843 #define VIRT_ENTRY_OFFSET 0x0
844 .org VIRT_ENTRY_OFFSET
845 ENTRY(startup_32)
846 @@ -60,11 +61,11 @@
847
848 movb $1,X86_HARD_MATH
849
850 - xorl %eax,%eax # Clear FS
851 - movl %eax,%fs
852 + xorl %eax,%eax # Clear GS
853 + movl %eax,%gs
854
855 movl $(__KERNEL_PDA),%eax
856 - mov %eax,%gs
857 + mov %eax,%fs
858
859 cld # gcc2 wants the direction flag cleared at all times
860
861 @@ -75,7 +76,7 @@
862 * Point the GDT at this CPU's PDA. This will be
863 * cpu_gdt_table and boot_pda.
864 */
865 -setup_pda:
866 +ENTRY(setup_pda)
867 /* get the PDA pointer */
868 movl $boot_pda, %eax
869
870 --- a/arch/x86/kernel/io_apic_32-xen.c
871 +++ b/arch/x86/kernel/io_apic_32-xen.c
872 @@ -164,7 +164,7 @@
873 */
874 static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
875 {
876 - volatile struct io_apic *io_apic = io_apic_base(apic);
877 + volatile struct io_apic __iomem *io_apic = io_apic_base(apic);
878 if (sis_apic_bug)
879 writel(reg, &io_apic->index);
880 writel(value, &io_apic->data);
881 @@ -387,7 +387,7 @@
882 break;
883 entry = irq_2_pin + entry->next;
884 }
885 - set_native_irq_info(irq, cpumask);
886 + irq_desc[irq].affinity = cpumask;
887 spin_unlock_irqrestore(&ioapic_lock, flags);
888 }
889
890 @@ -526,8 +526,8 @@
891 package_index = CPU_TO_PACKAGEINDEX(i);
892 for (j = 0; j < NR_IRQS; j++) {
893 unsigned long value_now, delta;
894 - /* Is this an active IRQ? */
895 - if (!irq_desc[j].action)
896 + /* Is this an active IRQ or balancing disabled ? */
897 + if (!irq_desc[j].action || irq_balancing_disabled(j))
898 continue;
899 if ( package_index == i )
900 IRQ_DELTA(package_index,j) = 0;
901 @@ -780,7 +780,7 @@
902 return 0;
903 }
904
905 -int __init irqbalance_disable(char *str)
906 +int __devinit irqbalance_disable(char *str)
907 {
908 irqbalance_disabled = 1;
909 return 1;
910 @@ -1319,11 +1319,9 @@
911 trigger == IOAPIC_LEVEL)
912 set_irq_chip_and_handler_name(irq, &ioapic_chip,
913 handle_fasteoi_irq, "fasteoi");
914 - else {
915 - irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
916 + else
917 set_irq_chip_and_handler_name(irq, &ioapic_chip,
918 handle_edge_irq, "edge");
919 - }
920 set_intr_gate(vector, interrupt[irq]);
921 }
922 #else
923 @@ -1397,7 +1395,6 @@
924 }
925 spin_lock_irqsave(&ioapic_lock, flags);
926 __ioapic_write_entry(apic, pin, entry);
927 - set_native_irq_info(irq, TARGET_CPUS);
928 spin_unlock_irqrestore(&ioapic_lock, flags);
929 }
930 }
931 @@ -1628,7 +1625,7 @@
932 v = apic_read(APIC_LVR);
933 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
934 ver = GET_APIC_VERSION(v);
935 - maxlvt = get_maxlvt();
936 + maxlvt = lapic_get_maxlvt();
937
938 v = apic_read(APIC_TASKPRI);
939 printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
940 @@ -1962,7 +1959,7 @@
941 #endif
942
943 #ifndef CONFIG_XEN
944 -static int no_timer_check __initdata;
945 +int no_timer_check __initdata;
946
947 static int __init notimercheck(char *s)
948 {
949 @@ -2355,7 +2352,7 @@
950
951 disable_8259A_irq(0);
952 set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
953 - "fasteio");
954 + "fasteoi");
955 apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
956 enable_8259A_irq(0);
957
958 @@ -2646,7 +2643,7 @@
959 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
960
961 write_msi_msg(irq, &msg);
962 - set_native_irq_info(irq, mask);
963 + irq_desc[irq].affinity = mask;
964 }
965 #endif /* CONFIG_SMP */
966
967 @@ -2665,25 +2662,32 @@
968 .retrigger = ioapic_retrigger_irq,
969 };
970
971 -int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
972 +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
973 {
974 struct msi_msg msg;
975 - int ret;
976 + int irq, ret;
977 + irq = create_irq();
978 + if (irq < 0)
979 + return irq;
980 +
981 + set_irq_msi(irq, desc);
982 ret = msi_compose_msg(dev, irq, &msg);
983 - if (ret < 0)
984 + if (ret < 0) {
985 + destroy_irq(irq);
986 return ret;
987 + }
988
989 write_msi_msg(irq, &msg);
990
991 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
992 "edge");
993
994 - return 0;
995 + return irq;
996 }
997
998 void arch_teardown_msi_irq(unsigned int irq)
999 {
1000 - return;
1001 + destroy_irq(irq);
1002 }
1003
1004 #endif /* CONFIG_PCI_MSI */
1005 @@ -2723,7 +2727,7 @@
1006 dest = cpu_mask_to_apicid(mask);
1007
1008 target_ht_irq(irq, dest);
1009 - set_native_irq_info(irq, mask);
1010 + irq_desc[irq].affinity = mask;
1011 }
1012 #endif
1013
1014 @@ -2931,7 +2935,6 @@
1015
1016 spin_lock_irqsave(&ioapic_lock, flags);
1017 __ioapic_write_entry(ioapic, pin, entry);
1018 - set_native_irq_info(irq, TARGET_CPUS);
1019 spin_unlock_irqrestore(&ioapic_lock, flags);
1020
1021 return 0;
1022 --- a/arch/x86/kernel/io_apic_64-xen.c
1023 +++ b/arch/x86/kernel/io_apic_64-xen.c
1024 @@ -36,6 +36,7 @@
1025 #include <acpi/acpi_bus.h>
1026 #endif
1027
1028 +#include <asm/idle.h>
1029 #include <asm/io.h>
1030 #include <asm/smp.h>
1031 #include <asm/desc.h>
1032 @@ -47,7 +48,20 @@
1033 #include <asm/msidef.h>
1034 #include <asm/hypertransport.h>
1035
1036 -static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result);
1037 +struct irq_cfg {
1038 +#ifndef CONFIG_XEN
1039 + cpumask_t domain;
1040 + cpumask_t old_domain;
1041 +#endif
1042 + unsigned move_cleanup_count;
1043 + u8 vector;
1044 + u8 move_in_progress : 1;
1045 +};
1046 +
1047 +/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
1048 +struct irq_cfg irq_cfg[NR_IRQS] __read_mostly;
1049 +
1050 +static int assign_irq_vector(int irq, cpumask_t mask);
1051
1052 #define __apicdebuginit __init
1053
1054 @@ -88,7 +102,7 @@
1055 * Rough estimation of how many shared IRQs there are, can
1056 * be changed anytime.
1057 */
1058 -#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS
1059 +#define MAX_PLUS_SHARED_IRQS NR_IRQS
1060 #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
1061
1062 /*
1063 @@ -259,21 +273,19 @@
1064
1065 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
1066 {
1067 + struct irq_cfg *cfg = irq_cfg + irq;
1068 unsigned long flags;
1069 unsigned int dest;
1070 cpumask_t tmp;
1071 - int vector;
1072
1073 cpus_and(tmp, mask, cpu_online_map);
1074 if (cpus_empty(tmp))
1075 - tmp = TARGET_CPUS;
1076 -
1077 - cpus_and(mask, tmp, CPU_MASK_ALL);
1078 + return;
1079
1080 - vector = assign_irq_vector(irq, mask, &tmp);
1081 - if (vector < 0)
1082 + if (assign_irq_vector(irq, mask))
1083 return;
1084
1085 + cpus_and(tmp, cfg->domain, mask);
1086 dest = cpu_mask_to_apicid(tmp);
1087
1088 /*
1089 @@ -282,8 +294,8 @@
1090 dest = SET_APIC_LOGICAL_ID(dest);
1091
1092 spin_lock_irqsave(&ioapic_lock, flags);
1093 - __target_IO_APIC_irq(irq, dest, vector);
1094 - set_native_irq_info(irq, mask);
1095 + __target_IO_APIC_irq(irq, dest, cfg->vector);
1096 + irq_desc[irq].affinity = mask;
1097 spin_unlock_irqrestore(&ioapic_lock, flags);
1098 }
1099 #endif
1100 @@ -329,11 +341,11 @@
1101 reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
1102 reg ACTION; \
1103 io_apic_modify(entry->apic, reg); \
1104 + FINAL; \
1105 if (!entry->next) \
1106 break; \
1107 entry = irq_2_pin + entry->next; \
1108 } \
1109 - FINAL; \
1110 }
1111
1112 #define DO_ACTION(name,R,ACTION, FINAL) \
1113 @@ -666,74 +678,58 @@
1114 return irq;
1115 }
1116
1117 -static inline int IO_APIC_irq_trigger(int irq)
1118 -{
1119 - int apic, idx, pin;
1120 -
1121 - for (apic = 0; apic < nr_ioapics; apic++) {
1122 - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1123 - idx = find_irq_entry(apic,pin,mp_INT);
1124 - if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
1125 - return irq_trigger(idx);
1126 - }
1127 - }
1128 - /*
1129 - * nonexistent IRQs are edge default
1130 - */
1131 - return 0;
1132 -}
1133 -
1134 -/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
1135 -static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
1136 -
1137 -static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
1138 +static int __assign_irq_vector(int irq, cpumask_t mask)
1139 {
1140 - int vector;
1141 struct physdev_irq irq_op;
1142 + struct irq_cfg *cfg;
1143
1144 - BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
1145 + BUG_ON((unsigned)irq >= NR_IRQS);
1146 + cfg = &irq_cfg[irq];
1147
1148 - cpus_and(*result, mask, cpu_online_map);
1149 + if ((cfg->move_in_progress) || cfg->move_cleanup_count)
1150 + return -EBUSY;
1151
1152 - if (irq_vector[irq] > 0)
1153 - return irq_vector[irq];
1154 + if (cfg->vector)
1155 + return 0;
1156
1157 irq_op.irq = irq;
1158 if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
1159 return -ENOSPC;
1160
1161 - vector = irq_op.vector;
1162 - irq_vector[irq] = vector;
1163 + cfg->vector = irq_op.vector;
1164
1165 - return vector;
1166 + return 0;
1167 }
1168
1169 -static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
1170 +static int assign_irq_vector(int irq, cpumask_t mask)
1171 {
1172 - int vector;
1173 + int err;
1174 unsigned long flags;
1175
1176 spin_lock_irqsave(&vector_lock, flags);
1177 - vector = __assign_irq_vector(irq, mask, result);
1178 + err = __assign_irq_vector(irq, mask);
1179 spin_unlock_irqrestore(&vector_lock, flags);
1180 - return vector;
1181 + return err;
1182 }
1183
1184 #ifndef CONFIG_XEN
1185 static void __clear_irq_vector(int irq)
1186 {
1187 + struct irq_cfg *cfg;
1188 cpumask_t mask;
1189 int cpu, vector;
1190
1191 - BUG_ON(!irq_vector[irq]);
1192 + BUG_ON((unsigned)irq >= NR_IRQS);
1193 + cfg = &irq_cfg[irq];
1194 + BUG_ON(!cfg->vector);
1195
1196 - vector = irq_vector[irq];
1197 - cpus_and(mask, irq_domain[irq], cpu_online_map);
1198 + vector = cfg->vector;
1199 + cpus_and(mask, cfg->domain, cpu_online_map);
1200 for_each_cpu_mask(cpu, mask)
1201 per_cpu(vector_irq, cpu)[vector] = -1;
1202
1203 - irq_vector[irq] = 0;
1204 - irq_domain[irq] = CPU_MASK_NONE;
1205 + cfg->vector = 0;
1206 + cfg->domain = CPU_MASK_NONE;
1207 }
1208
1209 void __setup_vector_irq(int cpu)
1210 @@ -743,10 +739,10 @@
1211 int irq, vector;
1212
1213 /* Mark the inuse vectors */
1214 - for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) {
1215 - if (!cpu_isset(cpu, irq_domain[irq]))
1216 + for (irq = 0; irq < NR_IRQS; ++irq) {
1217 + if (!cpu_isset(cpu, irq_cfg[irq].domain))
1218 continue;
1219 - vector = irq_vector[irq];
1220 + vector = irq_cfg[irq].vector;
1221 per_cpu(vector_irq, cpu)[vector] = irq;
1222 }
1223 /* Mark the free vectors */
1224 @@ -754,41 +750,49 @@
1225 irq = per_cpu(vector_irq, cpu)[vector];
1226 if (irq < 0)
1227 continue;
1228 - if (!cpu_isset(cpu, irq_domain[irq]))
1229 + if (!cpu_isset(cpu, irq_cfg[irq].domain))
1230 per_cpu(vector_irq, cpu)[vector] = -1;
1231 }
1232 }
1233
1234 -extern void (*interrupt[NR_IRQS])(void);
1235 -
1236 static struct irq_chip ioapic_chip;
1237
1238 -#define IOAPIC_AUTO -1
1239 -#define IOAPIC_EDGE 0
1240 -#define IOAPIC_LEVEL 1
1241 -
1242 -static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
1243 +static void ioapic_register_intr(int irq, unsigned long trigger)
1244 {
1245 - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1246 - trigger == IOAPIC_LEVEL)
1247 + if (trigger)
1248 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1249 handle_fasteoi_irq, "fasteoi");
1250 - else {
1251 - irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
1252 + else
1253 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1254 handle_edge_irq, "edge");
1255 - }
1256 }
1257 #else
1258 -#define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
1259 +#define ioapic_register_intr(irq,trigger) ((void)0)
1260 #endif /* !CONFIG_XEN */
1261
1262 -static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq)
1263 +static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
1264 + int trigger, int polarity)
1265 {
1266 + struct irq_cfg *cfg = irq_cfg + irq;
1267 struct IO_APIC_route_entry entry;
1268 - int vector;
1269 - unsigned long flags;
1270 + cpumask_t mask;
1271
1272 + if (!IO_APIC_IRQ(irq))
1273 + return;
1274 +
1275 + mask = TARGET_CPUS;
1276 + if (assign_irq_vector(irq, mask))
1277 + return;
1278 +
1279 +#ifndef CONFIG_XEN
1280 + cpus_and(mask, cfg->domain, mask);
1281 +#endif
1282 +
1283 + apic_printk(APIC_VERBOSE,KERN_DEBUG
1284 + "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
1285 + "IRQ %d Mode:%i Active:%i)\n",
1286 + apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector,
1287 + irq, trigger, polarity);
1288
1289 /*
1290 * add it to the IO-APIC irq-routing table:
1291 @@ -797,41 +801,23 @@
1292
1293 entry.delivery_mode = INT_DELIVERY_MODE;
1294 entry.dest_mode = INT_DEST_MODE;
1295 + entry.dest = cpu_mask_to_apicid(mask);
1296 entry.mask = 0; /* enable IRQ */
1297 - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
1298 + entry.trigger = trigger;
1299 + entry.polarity = polarity;
1300 + entry.vector = cfg->vector;
1301
1302 - entry.trigger = irq_trigger(idx);
1303 - entry.polarity = irq_polarity(idx);
1304 -
1305 - if (irq_trigger(idx)) {
1306 - entry.trigger = 1;
1307 + /* Mask level triggered irqs.
1308 + * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1309 + */
1310 + if (trigger)
1311 entry.mask = 1;
1312 - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
1313 - }
1314
1315 - if (/* !apic && */ !IO_APIC_IRQ(irq))
1316 - return;
1317 -
1318 - if (IO_APIC_IRQ(irq)) {
1319 - cpumask_t mask;
1320 - vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
1321 - if (vector < 0)
1322 - return;
1323 -
1324 - entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
1325 - entry.vector = vector;
1326 -
1327 - ioapic_register_intr(irq, vector, IOAPIC_AUTO);
1328 - if (!apic && (irq < 16))
1329 - disable_8259A_irq(irq);
1330 - }
1331 + ioapic_register_intr(irq, trigger);
1332 + if (irq < 16)
1333 + disable_8259A_irq(irq);
1334
1335 ioapic_write_entry(apic, pin, entry);
1336 -
1337 - spin_lock_irqsave(&ioapic_lock, flags);
1338 - set_native_irq_info(irq, TARGET_CPUS);
1339 - spin_unlock_irqrestore(&ioapic_lock, flags);
1340 -
1341 }
1342
1343 static void __init setup_IO_APIC_irqs(void)
1344 @@ -856,8 +842,8 @@
1345 irq = pin_2_irq(idx, apic, pin);
1346 add_pin_to_irq(irq, apic, pin);
1347
1348 - setup_IO_APIC_irq(apic, pin, idx, irq);
1349 -
1350 + setup_IO_APIC_irq(apic, pin, irq,
1351 + irq_trigger(idx), irq_polarity(idx));
1352 }
1353 }
1354
1355 @@ -888,7 +874,7 @@
1356 */
1357 entry.dest_mode = INT_DEST_MODE;
1358 entry.mask = 0; /* unmask IRQ now */
1359 - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
1360 + entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
1361 entry.delivery_mode = INT_DELIVERY_MODE;
1362 entry.polarity = 0;
1363 entry.trigger = 0;
1364 @@ -988,18 +974,17 @@
1365
1366 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1367
1368 - printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
1369 - " Stat Dest Deli Vect: \n");
1370 + printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
1371 + " Stat Dmod Deli Vect: \n");
1372
1373 for (i = 0; i <= reg_01.bits.entries; i++) {
1374 struct IO_APIC_route_entry entry;
1375
1376 entry = ioapic_read_entry(apic, i);
1377
1378 - printk(KERN_DEBUG " %02x %03X %02X ",
1379 + printk(KERN_DEBUG " %02x %03X ",
1380 i,
1381 - entry.dest.logical.logical_dest,
1382 - entry.dest.physical.physical_dest
1383 + entry.dest
1384 );
1385
1386 printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
1387 @@ -1263,8 +1248,7 @@
1388 entry.dest_mode = 0; /* Physical */
1389 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1390 entry.vector = 0;
1391 - entry.dest.physical.physical_dest =
1392 - GET_APIC_ID(apic_read(APIC_ID));
1393 + entry.dest = GET_APIC_ID(apic_read(APIC_ID));
1394
1395 /*
1396 * Add it to the IO-APIC irq-routing table:
1397 @@ -1349,16 +1333,15 @@
1398
1399 static int ioapic_retrigger_irq(unsigned int irq)
1400 {
1401 + struct irq_cfg *cfg = &irq_cfg[irq];
1402 cpumask_t mask;
1403 - unsigned vector;
1404 unsigned long flags;
1405
1406 spin_lock_irqsave(&vector_lock, flags);
1407 - vector = irq_vector[irq];
1408 cpus_clear(mask);
1409 - cpu_set(first_cpu(irq_domain[irq]), mask);
1410 + cpu_set(first_cpu(cfg->domain), mask);
1411
1412 - send_IPI_mask(mask, vector);
1413 + send_IPI_mask(mask, cfg->vector);
1414 spin_unlock_irqrestore(&vector_lock, flags);
1415
1416 return 1;
1417 @@ -1373,8 +1356,68 @@
1418 * races.
1419 */
1420
1421 +#ifdef CONFIG_SMP
1422 +asmlinkage void smp_irq_move_cleanup_interrupt(void)
1423 +{
1424 + unsigned vector, me;
1425 + ack_APIC_irq();
1426 + exit_idle();
1427 + irq_enter();
1428 +
1429 + me = smp_processor_id();
1430 + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
1431 + unsigned int irq;
1432 + struct irq_desc *desc;
1433 + struct irq_cfg *cfg;
1434 + irq = __get_cpu_var(vector_irq)[vector];
1435 + if (irq >= NR_IRQS)
1436 + continue;
1437 +
1438 + desc = irq_desc + irq;
1439 + cfg = irq_cfg + irq;
1440 + spin_lock(&desc->lock);
1441 + if (!cfg->move_cleanup_count)
1442 + goto unlock;
1443 +
1444 + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
1445 + goto unlock;
1446 +
1447 + __get_cpu_var(vector_irq)[vector] = -1;
1448 + cfg->move_cleanup_count--;
1449 +unlock:
1450 + spin_unlock(&desc->lock);
1451 + }
1452 +
1453 + irq_exit();
1454 +}
1455 +
1456 +static void irq_complete_move(unsigned int irq)
1457 +{
1458 + struct irq_cfg *cfg = irq_cfg + irq;
1459 + unsigned vector, me;
1460 +
1461 + if (likely(!cfg->move_in_progress))
1462 + return;
1463 +
1464 + vector = ~get_irq_regs()->orig_rax;
1465 + me = smp_processor_id();
1466 + if ((vector == cfg->vector) &&
1467 + cpu_isset(smp_processor_id(), cfg->domain)) {
1468 + cpumask_t cleanup_mask;
1469 +
1470 + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
1471 + cfg->move_cleanup_count = cpus_weight(cleanup_mask);
1472 + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
1473 + cfg->move_in_progress = 0;
1474 + }
1475 +}
1476 +#else
1477 +static inline void irq_complete_move(unsigned int irq) {}
1478 +#endif
1479 +
1480 static void ack_apic_edge(unsigned int irq)
1481 {
1482 + irq_complete_move(irq);
1483 move_native_irq(irq);
1484 ack_APIC_irq();
1485 }
1486 @@ -1383,6 +1426,7 @@
1487 {
1488 int do_unmask_irq = 0;
1489
1490 + irq_complete_move(irq);
1491 #if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
1492 /* If we are moving the irq we need to mask it */
1493 if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
1494 @@ -1434,7 +1478,7 @@
1495 */
1496 for (irq = 0; irq < NR_IRQS ; irq++) {
1497 int tmp = irq;
1498 - if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
1499 + if (IO_APIC_IRQ(tmp) && !irq_cfg[tmp].vector) {
1500 /*
1501 * Hmm.. We don't have an entry for this,
1502 * so default to an old-fashioned 8259
1503 @@ -1532,7 +1576,7 @@
1504
1505 entry1.dest_mode = 0; /* physical delivery */
1506 entry1.mask = 0; /* unmask IRQ now */
1507 - entry1.dest.physical.physical_dest = hard_smp_processor_id();
1508 + entry1.dest = hard_smp_processor_id();
1509 entry1.delivery_mode = dest_ExtINT;
1510 entry1.polarity = entry0.polarity;
1511 entry1.trigger = 0;
1512 @@ -1576,15 +1620,14 @@
1513 */
1514 static inline void check_timer(void)
1515 {
1516 + struct irq_cfg *cfg = irq_cfg + 0;
1517 int apic1, pin1, apic2, pin2;
1518 - int vector;
1519 - cpumask_t mask;
1520
1521 /*
1522 * get/set the timer IRQ vector:
1523 */
1524 disable_8259A_irq(0);
1525 - vector = assign_irq_vector(0, TARGET_CPUS, &mask);
1526 + assign_irq_vector(0, TARGET_CPUS);
1527
1528 /*
1529 * Subtle, code in do_timer_interrupt() expects an AEOI
1530 @@ -1604,7 +1647,7 @@
1531 apic2 = ioapic_i8259.apic;
1532
1533 apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
1534 - vector, apic1, pin1, apic2, pin2);
1535 + cfg->vector, apic1, pin1, apic2, pin2);
1536
1537 if (pin1 != -1) {
1538 /*
1539 @@ -1635,7 +1678,7 @@
1540 /*
1541 * legacy devices should be connected to IO APIC #0
1542 */
1543 - setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
1544 + setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector);
1545 if (timer_irq_works()) {
1546 apic_printk(APIC_VERBOSE," works.\n");
1547 nmi_watchdog_default();
1548 @@ -1660,14 +1703,14 @@
1549
1550 disable_8259A_irq(0);
1551 irq_desc[0].chip = &lapic_irq_type;
1552 - apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
1553 + apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
1554 enable_8259A_irq(0);
1555
1556 if (timer_irq_works()) {
1557 apic_printk(APIC_VERBOSE," works.\n");
1558 return;
1559 }
1560 - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
1561 + apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
1562 apic_printk(APIC_VERBOSE," failed.\n");
1563
1564 apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ...");
1565 @@ -1821,19 +1864,16 @@
1566 /* Allocate an unused irq */
1567 int irq;
1568 int new;
1569 - int vector = 0;
1570 unsigned long flags;
1571 - cpumask_t mask;
1572
1573 irq = -ENOSPC;
1574 spin_lock_irqsave(&vector_lock, flags);
1575 for (new = (NR_IRQS - 1); new >= 0; new--) {
1576 if (platform_legacy_irq(new))
1577 continue;
1578 - if (irq_vector[new] != 0)
1579 + if (irq_cfg[new].vector != 0)
1580 continue;
1581 - vector = __assign_irq_vector(new, TARGET_CPUS, &mask);
1582 - if (likely(vector > 0))
1583 + if (__assign_irq_vector(new, TARGET_CPUS) == 0)
1584 irq = new;
1585 break;
1586 }
1587 @@ -1863,12 +1903,15 @@
1588 #ifdef CONFIG_PCI_MSI
1589 static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
1590 {
1591 - int vector;
1592 + struct irq_cfg *cfg = irq_cfg + irq;
1593 + int err;
1594 unsigned dest;
1595 cpumask_t tmp;
1596
1597 - vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
1598 - if (vector >= 0) {
1599 + tmp = TARGET_CPUS;
1600 + err = assign_irq_vector(irq, tmp);
1601 + if (!err) {
1602 + cpus_and(tmp, cfg->domain, tmp);
1603 dest = cpu_mask_to_apicid(tmp);
1604
1605 msg->address_hi = MSI_ADDR_BASE_HI;
1606 @@ -1888,40 +1931,38 @@
1607 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
1608 MSI_DATA_DELIVERY_FIXED:
1609 MSI_DATA_DELIVERY_LOWPRI) |
1610 - MSI_DATA_VECTOR(vector);
1611 + MSI_DATA_VECTOR(cfg->vector);
1612 }
1613 - return vector;
1614 + return err;
1615 }
1616
1617 #ifdef CONFIG_SMP
1618 static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
1619 {
1620 + struct irq_cfg *cfg = irq_cfg + irq;
1621 struct msi_msg msg;
1622 unsigned int dest;
1623 cpumask_t tmp;
1624 - int vector;
1625
1626 cpus_and(tmp, mask, cpu_online_map);
1627 if (cpus_empty(tmp))
1628 - tmp = TARGET_CPUS;
1629 -
1630 - cpus_and(mask, tmp, CPU_MASK_ALL);
1631 + return;
1632
1633 - vector = assign_irq_vector(irq, mask, &tmp);
1634 - if (vector < 0)
1635 + if (assign_irq_vector(irq, mask))
1636 return;
1637
1638 + cpus_and(tmp, cfg->domain, mask);
1639 dest = cpu_mask_to_apicid(tmp);
1640
1641 read_msi_msg(irq, &msg);
1642
1643 msg.data &= ~MSI_DATA_VECTOR_MASK;
1644 - msg.data |= MSI_DATA_VECTOR(vector);
1645 + msg.data |= MSI_DATA_VECTOR(cfg->vector);
1646 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
1647 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
1648
1649 write_msi_msg(irq, &msg);
1650 - set_native_irq_info(irq, mask);
1651 + irq_desc[irq].affinity = mask;
1652 }
1653 #endif /* CONFIG_SMP */
1654
1655 @@ -1940,24 +1981,31 @@
1656 .retrigger = ioapic_retrigger_irq,
1657 };
1658
1659 -int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
1660 +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
1661 {
1662 struct msi_msg msg;
1663 - int ret;
1664 + int irq, ret;
1665 + irq = create_irq();
1666 + if (irq < 0)
1667 + return irq;
1668 +
1669 + set_irq_msi(irq, desc);
1670 ret = msi_compose_msg(dev, irq, &msg);
1671 - if (ret < 0)
1672 + if (ret < 0) {
1673 + destroy_irq(irq);
1674 return ret;
1675 + }
1676
1677 write_msi_msg(irq, &msg);
1678
1679 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
1680
1681 - return 0;
1682 + return irq;
1683 }
1684
1685 void arch_teardown_msi_irq(unsigned int irq)
1686 {
1687 - return;
1688 + destroy_irq(irq);
1689 }
1690
1691 #endif /* CONFIG_PCI_MSI */
1692 @@ -1985,24 +2033,22 @@
1693
1694 static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
1695 {
1696 + struct irq_cfg *cfg = irq_cfg + irq;
1697 unsigned int dest;
1698 cpumask_t tmp;
1699 - int vector;
1700
1701 cpus_and(tmp, mask, cpu_online_map);
1702 if (cpus_empty(tmp))
1703 - tmp = TARGET_CPUS;
1704 -
1705 - cpus_and(mask, tmp, CPU_MASK_ALL);
1706 + return;
1707
1708 - vector = assign_irq_vector(irq, mask, &tmp);
1709 - if (vector < 0)
1710 + if (assign_irq_vector(irq, mask))
1711 return;
1712
1713 + cpus_and(tmp, cfg->domain, mask);
1714 dest = cpu_mask_to_apicid(tmp);
1715
1716 - target_ht_irq(irq, dest, vector);
1717 - set_native_irq_info(irq, mask);
1718 + target_ht_irq(irq, dest, cfg->vector);
1719 + irq_desc[irq].affinity = mask;
1720 }
1721 #endif
1722
1723 @@ -2019,14 +2065,17 @@
1724
1725 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
1726 {
1727 - int vector;
1728 + struct irq_cfg *cfg = irq_cfg + irq;
1729 + int err;
1730 cpumask_t tmp;
1731
1732 - vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
1733 - if (vector >= 0) {
1734 + tmp = TARGET_CPUS;
1735 + err = assign_irq_vector(irq, tmp);
1736 + if (!err) {
1737 struct ht_irq_msg msg;
1738 unsigned dest;
1739
1740 + cpus_and(tmp, cfg->domain, tmp);
1741 dest = cpu_mask_to_apicid(tmp);
1742
1743 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
1744 @@ -2034,7 +2083,7 @@
1745 msg.address_lo =
1746 HT_IRQ_LOW_BASE |
1747 HT_IRQ_LOW_DEST_ID(dest) |
1748 - HT_IRQ_LOW_VECTOR(vector) |
1749 + HT_IRQ_LOW_VECTOR(cfg->vector) |
1750 ((INT_DEST_MODE == 0) ?
1751 HT_IRQ_LOW_DM_PHYSICAL :
1752 HT_IRQ_LOW_DM_LOGICAL) |
1753 @@ -2049,7 +2098,7 @@
1754 set_irq_chip_and_handler_name(irq, &ht_irq_chip,
1755 handle_edge_irq, "edge");
1756 }
1757 - return vector;
1758 + return err;
1759 }
1760 #endif /* CONFIG_HT_IRQ */
1761
1762 @@ -2074,13 +2123,8 @@
1763 }
1764
1765
1766 -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
1767 +int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
1768 {
1769 - struct IO_APIC_route_entry entry;
1770 - unsigned long flags;
1771 - int vector;
1772 - cpumask_t mask;
1773 -
1774 if (!IO_APIC_IRQ(irq)) {
1775 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
1776 ioapic);
1777 @@ -2093,42 +2137,7 @@
1778 if (irq >= 16)
1779 add_pin_to_irq(irq, ioapic, pin);
1780
1781 -
1782 - vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
1783 - if (vector < 0)
1784 - return vector;
1785 -
1786 - /*
1787 - * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
1788 - * Note that we mask (disable) IRQs now -- these get enabled when the
1789 - * corresponding device driver registers for this IRQ.
1790 - */
1791 -
1792 - memset(&entry,0,sizeof(entry));
1793 -
1794 - entry.delivery_mode = INT_DELIVERY_MODE;
1795 - entry.dest_mode = INT_DEST_MODE;
1796 - entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
1797 - entry.trigger = edge_level;
1798 - entry.polarity = active_high_low;
1799 - entry.mask = 1; /* Disabled (masked) */
1800 - entry.vector = vector & 0xff;
1801 -
1802 - apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
1803 - "IRQ %d Mode:%i Active:%i)\n", ioapic,
1804 - mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
1805 - edge_level, active_high_low);
1806 -
1807 - ioapic_register_intr(irq, entry.vector, edge_level);
1808 -
1809 - if (!ioapic && (irq < 16))
1810 - disable_8259A_irq(irq);
1811 -
1812 - ioapic_write_entry(ioapic, pin, entry);
1813 -
1814 - spin_lock_irqsave(&ioapic_lock, flags);
1815 - set_native_irq_info(irq, TARGET_CPUS);
1816 - spin_unlock_irqrestore(&ioapic_lock, flags);
1817 + setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
1818
1819 return 0;
1820 }
1821 @@ -2161,8 +2170,10 @@
1822 * when you have too many devices, because at that time only boot
1823 * cpu is online.
1824 */
1825 - if(!irq_vector[irq])
1826 - setup_IO_APIC_irq(ioapic, pin, irq_entry, irq);
1827 + if (!irq_cfg[irq].vector)
1828 + setup_IO_APIC_irq(ioapic, pin, irq,
1829 + irq_trigger(irq_entry),
1830 + irq_polarity(irq_entry));
1831 else
1832 set_ioapic_affinity_irq(irq, TARGET_CPUS);
1833 }
1834 --- a/arch/x86/kernel/irq_32-xen.c
1835 +++ b/arch/x86/kernel/irq_32-xen.c
1836 @@ -10,7 +10,6 @@
1837 * io_apic.c.)
1838 */
1839
1840 -#include <asm/uaccess.h>
1841 #include <linux/module.h>
1842 #include <linux/seq_file.h>
1843 #include <linux/interrupt.h>
1844 @@ -19,19 +18,34 @@
1845 #include <linux/cpu.h>
1846 #include <linux/delay.h>
1847
1848 +#include <asm/apic.h>
1849 +#include <asm/uaccess.h>
1850 +
1851 DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
1852 EXPORT_PER_CPU_SYMBOL(irq_stat);
1853
1854 -#ifndef CONFIG_X86_LOCAL_APIC
1855 /*
1856 * 'what should we do if we get a hw irq event on an illegal vector'.
1857 * each architecture has to answer this themselves.
1858 */
1859 void ack_bad_irq(unsigned int irq)
1860 {
1861 - printk("unexpected IRQ trap at vector %02x\n", irq);
1862 -}
1863 + printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
1864 +
1865 +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
1866 + /*
1867 + * Currently unexpected vectors happen only on SMP and APIC.
1868 + * We _must_ ack these because every local APIC has only N
1869 + * irq slots per priority level, and a 'hanging, unacked' IRQ
1870 + * holds up an irq slot - in excessive cases (when multiple
1871 + * unexpected vectors occur) that might lock up the APIC
1872 + * completely.
1873 + * But only ack when the APIC is enabled -AK
1874 + */
1875 + if (cpu_has_apic)
1876 + ack_APIC_irq();
1877 #endif
1878 +}
1879
1880 #ifdef CONFIG_4KSTACKS
1881 /*
1882 --- a/arch/x86/kernel/irq_64-xen.c
1883 +++ b/arch/x86/kernel/irq_64-xen.c
1884 @@ -18,6 +18,7 @@
1885 #include <asm/uaccess.h>
1886 #include <asm/io_apic.h>
1887 #include <asm/idle.h>
1888 +#include <asm/smp.h>
1889
1890 atomic_t irq_err_count;
1891
1892 @@ -120,9 +121,15 @@
1893
1894 if (likely(irq < NR_IRQS))
1895 generic_handle_irq(irq);
1896 - else if (printk_ratelimit())
1897 - printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n",
1898 - __func__, smp_processor_id(), irq);
1899 + else {
1900 +#ifndef CONFIG_XEN
1901 + if (!disable_apic)
1902 + ack_APIC_irq();
1903 +#endif
1904 + if (printk_ratelimit())
1905 + printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n",
1906 + __func__, smp_processor_id(), irq);
1907 + }
1908
1909 irq_exit();
1910
1911 --- a/arch/x86/kernel/microcode-xen.c
1912 +++ b/arch/x86/kernel/microcode-xen.c
1913 @@ -108,7 +108,7 @@
1914 return ret;
1915 }
1916
1917 -static struct file_operations microcode_fops = {
1918 +static const struct file_operations microcode_fops = {
1919 .owner = THIS_MODULE,
1920 .write = microcode_write,
1921 .open = microcode_open,
1922 --- a/arch/x86/kernel/mpparse_32-xen.c
1923 +++ b/arch/x86/kernel/mpparse_32-xen.c
1924 @@ -1079,7 +1079,7 @@
1925 static int gsi_to_irq[MAX_GSI_NUM];
1926
1927 /* Don't set up the ACPI SCI because it's already set up */
1928 - if (acpi_fadt.sci_int == gsi)
1929 + if (acpi_gbl_FADT.sci_interrupt == gsi)
1930 return gsi;
1931
1932 ioapic = mp_find_ioapic(gsi);
1933 @@ -1136,7 +1136,7 @@
1934 /*
1935 * Don't assign IRQ used by ACPI SCI
1936 */
1937 - if (gsi == acpi_fadt.sci_int)
1938 + if (gsi == acpi_gbl_FADT.sci_interrupt)
1939 gsi = pci_irq++;
1940 gsi_to_irq[irq] = gsi;
1941 } else {
1942 --- a/arch/x86/kernel/mpparse_64-xen.c
1943 +++ b/arch/x86/kernel/mpparse_64-xen.c
1944 @@ -60,9 +60,9 @@
1945 /* Processor that is doing the boot up */
1946 unsigned int boot_cpu_id = -1U;
1947 /* Internal processor count */
1948 -unsigned int num_processors __initdata = 0;
1949 +unsigned int num_processors __cpuinitdata = 0;
1950
1951 -unsigned disabled_cpus __initdata;
1952 +unsigned disabled_cpus __cpuinitdata;
1953
1954 /* Bitmask of physically existing CPUs */
1955 physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
1956 @@ -808,7 +808,7 @@
1957 return gsi;
1958
1959 /* Don't set up the ACPI SCI because it's already set up */
1960 - if (acpi_fadt.sci_int == gsi)
1961 + if (acpi_gbl_FADT.sci_interrupt == gsi)
1962 return gsi;
1963
1964 ioapic = mp_find_ioapic(gsi);
1965 --- a/arch/x86/kernel/pci-dma_32-xen.c
1966 +++ b/arch/x86/kernel/pci-dma_32-xen.c
1967 @@ -317,7 +317,7 @@
1968 return DMA_MEMORY_IO;
1969
1970 free1_out:
1971 - kfree(dev->dma_mem->bitmap);
1972 + kfree(dev->dma_mem);
1973 out:
1974 if (mem_base)
1975 iounmap(mem_base);
1976 --- a/arch/x86/kernel/pci-swiotlb_64-xen.c
1977 +++ b/arch/x86/kernel/pci-swiotlb_64-xen.c
1978 @@ -35,7 +35,7 @@
1979 #endif
1980 };
1981
1982 -void pci_swiotlb_init(void)
1983 +void __init pci_swiotlb_init(void)
1984 {
1985 #if 0
1986 /* don't initialize swiotlb if iommu=off (no_iommu=1) */
1987 --- a/arch/x86/kernel/pcspeaker.c
1988 +++ b/arch/x86/kernel/pcspeaker.c
1989 @@ -7,6 +7,11 @@
1990 struct platform_device *pd;
1991 int ret;
1992
1993 +#ifdef CONFIG_XEN
1994 + if (!is_initial_xendomain())
1995 + return 0;
1996 +#endif
1997 +
1998 pd = platform_device_alloc("pcspkr", -1);
1999 if (!pd)
2000 return -ENOMEM;
2001 --- a/arch/x86/kernel/process_32-xen.c
2002 +++ b/arch/x86/kernel/process_32-xen.c
2003 @@ -38,6 +38,7 @@
2004 #include <linux/ptrace.h>
2005 #include <linux/random.h>
2006 #include <linux/personality.h>
2007 +#include <linux/tick.h>
2008
2009 #include <asm/uaccess.h>
2010 #include <asm/pgtable.h>
2011 @@ -160,6 +161,7 @@
2012
2013 /* endless idle loop with no priority at all */
2014 while (1) {
2015 + tick_nohz_stop_sched_tick();
2016 while (!need_resched()) {
2017 void (*idle)(void);
2018
2019 @@ -175,6 +177,7 @@
2020 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
2021 idle();
2022 }
2023 + tick_nohz_restart_sched_tick();
2024 preempt_enable_no_resched();
2025 schedule();
2026 preempt_disable();
2027 @@ -247,8 +250,8 @@
2028 regs->eax,regs->ebx,regs->ecx,regs->edx);
2029 printk("ESI: %08lx EDI: %08lx EBP: %08lx",
2030 regs->esi, regs->edi, regs->ebp);
2031 - printk(" DS: %04x ES: %04x GS: %04x\n",
2032 - 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs);
2033 + printk(" DS: %04x ES: %04x FS: %04x\n",
2034 + 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs);
2035
2036 cr0 = read_cr0();
2037 cr2 = read_cr2();
2038 @@ -279,7 +282,7 @@
2039
2040 regs.xds = __USER_DS;
2041 regs.xes = __USER_DS;
2042 - regs.xgs = __KERNEL_PDA;
2043 + regs.xfs = __KERNEL_PDA;
2044 regs.orig_eax = -1;
2045 regs.eip = (unsigned long) kernel_thread_helper;
2046 regs.xcs = __KERNEL_CS | get_kernel_rpl();
2047 @@ -356,7 +359,7 @@
2048
2049 p->thread.eip = (unsigned long) ret_from_fork;
2050
2051 - savesegment(fs,p->thread.fs);
2052 + savesegment(gs,p->thread.gs);
2053
2054 tsk = current;
2055 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
2056 @@ -434,8 +437,8 @@
2057 dump->regs.eax = regs->eax;
2058 dump->regs.ds = regs->xds;
2059 dump->regs.es = regs->xes;
2060 - savesegment(fs,dump->regs.fs);
2061 - dump->regs.gs = regs->xgs;
2062 + dump->regs.fs = regs->xfs;
2063 + savesegment(gs,dump->regs.gs);
2064 dump->regs.orig_eax = regs->orig_eax;
2065 dump->regs.eip = regs->eip;
2066 dump->regs.cs = regs->xcs;
2067 @@ -616,16 +619,6 @@
2068 prefetch(&next->i387.fxsave);
2069
2070 /*
2071 - * Restore %fs if needed.
2072 - *
2073 - * Glibc normally makes %fs be zero.
2074 - */
2075 - if (unlikely(next->fs))
2076 - loadsegment(fs, next->fs);
2077 -
2078 - write_pda(pcurrent, next_p);
2079 -
2080 - /*
2081 * Now maybe handle debug registers
2082 */
2083 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
2084 @@ -633,6 +626,15 @@
2085
2086 disable_tsc(prev_p, next_p);
2087
2088 + /*
2089 + * Leave lazy mode, flushing any hypercalls made here.
2090 + * This must be done before restoring TLS segments so
2091 + * the GDT and LDT are properly updated, and must be
2092 + * done before math_state_restore, so the TS bit is up
2093 + * to date.
2094 + */
2095 + arch_leave_lazy_cpu_mode();
2096 +
2097 /* If the task has used fpu the last 5 timeslices, just do a full
2098 * restore of the math state immediately to avoid the trap; the
2099 * chances of needing FPU soon are obviously high now
2100 @@ -640,6 +642,14 @@
2101 if (next_p->fpu_counter > 5)
2102 math_state_restore();
2103
2104 + /*
2105 + * Restore %gs if needed (which is common)
2106 + */
2107 + if (prev->gs | next->gs)
2108 + loadsegment(gs, next->gs);
2109 +
2110 + write_pda(pcurrent, next_p);
2111 +
2112 return prev_p;
2113 }
2114
2115 --- a/arch/x86/kernel/process_64-xen.c
2116 +++ b/arch/x86/kernel/process_64-xen.c
2117 @@ -338,14 +338,17 @@
2118 void flush_thread(void)
2119 {
2120 struct task_struct *tsk = current;
2121 - struct thread_info *t = current_thread_info();
2122
2123 - if (t->flags & _TIF_ABI_PENDING) {
2124 - t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
2125 - if (t->flags & _TIF_IA32)
2126 + if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
2127 + clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
2128 + if (test_tsk_thread_flag(tsk, TIF_IA32)) {
2129 + clear_tsk_thread_flag(tsk, TIF_IA32);
2130 + } else {
2131 + set_tsk_thread_flag(tsk, TIF_IA32);
2132 current_thread_info()->status |= TS_COMPAT;
2133 + }
2134 }
2135 - t->flags &= ~_TIF_DEBUG;
2136 + clear_tsk_thread_flag(tsk, TIF_DEBUG);
2137
2138 tsk->thread.debugreg0 = 0;
2139 tsk->thread.debugreg1 = 0;
2140 --- a/arch/x86/kernel/setup_32-xen.c
2141 +++ b/arch/x86/kernel/setup_32-xen.c
2142 @@ -33,7 +33,6 @@
2143 #include <linux/initrd.h>
2144 #include <linux/bootmem.h>
2145 #include <linux/seq_file.h>
2146 -#include <linux/platform_device.h>
2147 #include <linux/console.h>
2148 #include <linux/mca.h>
2149 #include <linux/root_dev.h>
2150 @@ -151,7 +150,7 @@
2151 #define RAMDISK_PROMPT_FLAG 0x8000
2152 #define RAMDISK_LOAD_FLAG 0x4000
2153
2154 -static char command_line[COMMAND_LINE_SIZE];
2155 +static char __initdata command_line[COMMAND_LINE_SIZE];
2156
2157 unsigned char __initdata boot_params[PARAM_SIZE];
2158
2159 @@ -650,8 +649,8 @@
2160
2161 if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
2162 i = COMMAND_LINE_SIZE;
2163 - memcpy(saved_command_line, xen_start_info->cmd_line, i);
2164 - saved_command_line[i - 1] = '\0';
2165 + memcpy(boot_command_line, xen_start_info->cmd_line, i);
2166 + boot_command_line[i - 1] = '\0';
2167 parse_early_param();
2168
2169 if (user_defined_memmap) {
2170 @@ -659,11 +658,19 @@
2171 print_memory_map("user");
2172 }
2173
2174 - strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
2175 + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
2176 *cmdline_p = command_line;
2177
2178 max_low_pfn = setup_memory();
2179
2180 +#ifdef CONFIG_VMI
2181 + /*
2182 + * Must be after max_low_pfn is determined, and before kernel
2183 + * pagetables are setup.
2184 + */
2185 + vmi_init();
2186 +#endif
2187 +
2188 /*
2189 * NOTE: before this point _nobody_ is allowed to allocate
2190 * any memory using the bootmem allocator. Although the
2191 @@ -826,7 +833,6 @@
2192 conswitchp = &dummy_con;
2193 #endif
2194 }
2195 - tsc_init();
2196 }
2197
2198 static int
2199 @@ -836,31 +842,3 @@
2200 /* we're never actually going to get here... */
2201 return NOTIFY_DONE;
2202 }
2203 -
2204 -static __init int add_pcspkr(void)
2205 -{
2206 - struct platform_device *pd;
2207 - int ret;
2208 -
2209 - if (!is_initial_xendomain())
2210 - return 0;
2211 -
2212 - pd = platform_device_alloc("pcspkr", -1);
2213 - if (!pd)
2214 - return -ENOMEM;
2215 -
2216 - ret = platform_device_add(pd);
2217 - if (ret)
2218 - platform_device_put(pd);
2219 -
2220 - return ret;
2221 -}
2222 -device_initcall(add_pcspkr);
2223 -
2224 -/*
2225 - * Local Variables:
2226 - * mode:c
2227 - * c-file-style:"k&r"
2228 - * c-basic-offset:8
2229 - * End:
2230 - */
2231 --- a/arch/x86/kernel/setup_64-xen.c
2232 +++ b/arch/x86/kernel/setup_64-xen.c
2233 @@ -144,7 +144,7 @@
2234
2235 extern int root_mountflags;
2236
2237 -char command_line[COMMAND_LINE_SIZE];
2238 +char __initdata command_line[COMMAND_LINE_SIZE];
2239
2240 struct resource standard_io_resources[] = {
2241 { .name = "dma1", .start = 0x00, .end = 0x1f,
2242 @@ -182,134 +182,6 @@
2243 .flags = IORESOURCE_RAM,
2244 };
2245
2246 -#define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM)
2247 -
2248 -static struct resource system_rom_resource = {
2249 - .name = "System ROM",
2250 - .start = 0xf0000,
2251 - .end = 0xfffff,
2252 - .flags = IORESOURCE_ROM,
2253 -};
2254 -
2255 -static struct resource extension_rom_resource = {
2256 - .name = "Extension ROM",
2257 - .start = 0xe0000,
2258 - .end = 0xeffff,
2259 - .flags = IORESOURCE_ROM,
2260 -};
2261 -
2262 -static struct resource adapter_rom_resources[] = {
2263 - { .name = "Adapter ROM", .start = 0xc8000, .end = 0,
2264 - .flags = IORESOURCE_ROM },
2265 - { .name = "Adapter ROM", .start = 0, .end = 0,
2266 - .flags = IORESOURCE_ROM },
2267 - { .name = "Adapter ROM", .start = 0, .end = 0,
2268 - .flags = IORESOURCE_ROM },
2269 - { .name = "Adapter ROM", .start = 0, .end = 0,
2270 - .flags = IORESOURCE_ROM },
2271 - { .name = "Adapter ROM", .start = 0, .end = 0,
2272 - .flags = IORESOURCE_ROM },
2273 - { .name = "Adapter ROM", .start = 0, .end = 0,
2274 - .flags = IORESOURCE_ROM }
2275 -};
2276 -
2277 -static struct resource video_rom_resource = {
2278 - .name = "Video ROM",
2279 - .start = 0xc0000,
2280 - .end = 0xc7fff,
2281 - .flags = IORESOURCE_ROM,
2282 -};
2283 -
2284 -static struct resource video_ram_resource = {
2285 - .name = "Video RAM area",
2286 - .start = 0xa0000,
2287 - .end = 0xbffff,
2288 - .flags = IORESOURCE_RAM,
2289 -};
2290 -
2291 -#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
2292 -
2293 -static int __init romchecksum(unsigned char *rom, unsigned long length)
2294 -{
2295 - unsigned char *p, sum = 0;
2296 -
2297 - for (p = rom; p < rom + length; p++)
2298 - sum += *p;
2299 - return sum == 0;
2300 -}
2301 -
2302 -static void __init probe_roms(void)
2303 -{
2304 - unsigned long start, length, upper;
2305 - unsigned char *rom;
2306 - int i;
2307 -
2308 -#ifdef CONFIG_XEN
2309 - /* Nothing to do if not running in dom0. */
2310 - if (!is_initial_xendomain())
2311 - return;
2312 -#endif
2313 -
2314 - /* video rom */
2315 - upper = adapter_rom_resources[0].start;
2316 - for (start = video_rom_resource.start; start < upper; start += 2048) {
2317 - rom = isa_bus_to_virt(start);
2318 - if (!romsignature(rom))
2319 - continue;
2320 -
2321 - video_rom_resource.start = start;
2322 -
2323 - /* 0 < length <= 0x7f * 512, historically */
2324 - length = rom[2] * 512;
2325 -
2326 - /* if checksum okay, trust length byte */
2327 - if (length && romchecksum(rom, length))
2328 - video_rom_resource.end = start + length - 1;
2329 -
2330 - request_resource(&iomem_resource, &video_rom_resource);
2331 - break;
2332 - }
2333 -
2334 - start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
2335 - if (start < upper)
2336 - start = upper;
2337 -
2338 - /* system rom */
2339 - request_resource(&iomem_resource, &system_rom_resource);
2340 - upper = system_rom_resource.start;
2341 -
2342 - /* check for extension rom (ignore length byte!) */
2343 - rom = isa_bus_to_virt(extension_rom_resource.start);
2344 - if (romsignature(rom)) {
2345 - length = extension_rom_resource.end - extension_rom_resource.start + 1;
2346 - if (romchecksum(rom, length)) {
2347 - request_resource(&iomem_resource, &extension_rom_resource);
2348 - upper = extension_rom_resource.start;
2349 - }
2350 - }
2351 -
2352 - /* check for adapter roms on 2k boundaries */
2353 - for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper;
2354 - start += 2048) {
2355 - rom = isa_bus_to_virt(start);
2356 - if (!romsignature(rom))
2357 - continue;
2358 -
2359 - /* 0 < length <= 0x7f * 512, historically */
2360 - length = rom[2] * 512;
2361 -
2362 - /* but accept any length that fits if checksum okay */
2363 - if (!length || start + length > upper || !romchecksum(rom, length))
2364 - continue;
2365 -
2366 - adapter_rom_resources[i].start = start;
2367 - adapter_rom_resources[i].end = start + length - 1;
2368 - request_resource(&iomem_resource, &adapter_rom_resources[i]);
2369 -
2370 - start = adapter_rom_resources[i++].end & ~2047UL;
2371 - }
2372 -}
2373 -
2374 #ifdef CONFIG_PROC_VMCORE
2375 /* elfcorehdr= specifies the location of elf core header
2376 * stored by the crashed kernel. This option will be passed
2377 @@ -406,7 +278,7 @@
2378 #ifdef CONFIG_XEN
2379 extern struct e820map machine_e820;
2380
2381 - printk(KERN_INFO "Command line: %s\n", saved_command_line);
2382 + printk(KERN_INFO "Command line: %s\n", boot_command_line);
2383
2384 /* Register a call for panic conditions. */
2385 atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
2386 @@ -433,7 +305,7 @@
2387
2388 ARCH_SETUP
2389 #else
2390 - printk(KERN_INFO "Command line: %s\n", saved_command_line);
2391 + printk(KERN_INFO "Command line: %s\n", boot_command_line);
2392
2393 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
2394 screen_info = SCREEN_INFO;
2395 @@ -464,7 +336,7 @@
2396
2397 early_identify_cpu(&boot_cpu_data);
2398
2399 - strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
2400 + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
2401 *cmdline_p = command_line;
2402
2403 parse_early_param();
2404 @@ -534,6 +406,11 @@
2405 /* reserve ebda region */
2406 if (ebda_addr)
2407 reserve_bootmem_generic(ebda_addr, ebda_size);
2408 +#ifdef CONFIG_NUMA
2409 + /* reserve nodemap region */
2410 + if (nodemap_addr)
2411 + reserve_bootmem_generic(nodemap_addr, nodemap_size);
2412 +#endif
2413
2414 #ifdef CONFIG_SMP
2415 /*
2416 @@ -734,10 +611,8 @@
2417 #endif
2418
2419 /*
2420 - * Request address space for all standard RAM and ROM resources
2421 - * and also for regions reported as reserved by the e820.
2422 + * We trust e820 completely. No explicit ROM probing in memory.
2423 */
2424 - probe_roms();
2425 #ifdef CONFIG_XEN
2426 if (is_initial_xendomain()) {
2427 struct xen_memory_map memmap;
2428 @@ -756,8 +631,6 @@
2429 e820_mark_nosave_regions();
2430 #endif
2431
2432 - request_resource(&iomem_resource, &video_ram_resource);
2433 -
2434 {
2435 unsigned i;
2436 /* request I/O space for devices used on all i[345]86 PCs */
2437 @@ -1334,7 +1207,8 @@
2438 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2439 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
2440 NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
2441 - NULL, "fxsr_opt", NULL, "rdtscp", NULL, "lm", "3dnowext", "3dnow",
2442 + NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
2443 + "3dnowext", "3dnow",
2444
2445 /* Transmeta-defined */
2446 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
2447 @@ -1352,7 +1226,7 @@
2448 /* Intel-defined (#2) */
2449 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
2450 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
2451 - NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL,
2452 + NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt",
2453 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2454
2455 /* VIA/Cyrix/Centaur-defined */
2456 @@ -1362,8 +1236,10 @@
2457 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2458
2459 /* AMD-defined (#2) */
2460 - "lahf_lm", "cmp_legacy", "svm", NULL, "cr8_legacy", NULL, NULL, NULL,
2461 - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2462 + "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy",
2463 + "altmovcr8", "abm", "sse4a",
2464 + "misalignsse", "3dnowprefetch",
2465 + "osvw", "ibs", NULL, NULL, NULL, NULL,
2466 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2467 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2468 };
2469 @@ -1374,6 +1250,9 @@
2470 "ttp", /* thermal trip */
2471 "tm",
2472 "stc",
2473 + "100mhzsteps",
2474 + "hwpstate",
2475 + NULL, /* tsc invariant mapped to constant_tsc */
2476 NULL,
2477 /* nothing */ /* constant_tsc - moved to flags */
2478 };
2479 @@ -1490,26 +1369,3 @@
2480 .stop = c_stop,
2481 .show = show_cpuinfo,
2482 };
2483 -
2484 -#if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE)
2485 -#include <linux/platform_device.h>
2486 -static __init int add_pcspkr(void)
2487 -{
2488 - struct platform_device *pd;
2489 - int ret;
2490 -
2491 - if (!is_initial_xendomain())
2492 - return 0;
2493 -
2494 - pd = platform_device_alloc("pcspkr", -1);
2495 - if (!pd)
2496 - return -ENOMEM;
2497 -
2498 - ret = platform_device_add(pd);
2499 - if (ret)
2500 - platform_device_put(pd);
2501 -
2502 - return ret;
2503 -}
2504 -device_initcall(add_pcspkr);
2505 -#endif
2506 --- a/arch/x86/kernel/smp_32-xen.c
2507 +++ b/arch/x86/kernel/smp_32-xen.c
2508 @@ -335,8 +335,7 @@
2509 /*
2510 * i'm not happy about this global shared spinlock in the
2511 * MM hot path, but we'll see how contended it is.
2512 - * Temporarily this turns IRQs off, so that lockups are
2513 - * detected by the NMI watchdog.
2514 + * AK: x86-64 has a faster method that could be ported.
2515 */
2516 spin_lock(&tlbstate_lock);
2517
2518 @@ -361,7 +360,7 @@
2519
2520 while (!cpus_empty(flush_cpumask))
2521 /* nothing. lockup detection does not belong here */
2522 - mb();
2523 + cpu_relax();
2524
2525 flush_mm = NULL;
2526 flush_va = 0;
2527 --- a/arch/x86/kernel/time_32-xen.c
2528 +++ b/arch/x86/kernel/time_32-xen.c
2529 @@ -51,6 +51,7 @@
2530 #include <linux/kernel_stat.h>
2531 #include <linux/posix-timers.h>
2532 #include <linux/cpufreq.h>
2533 +#include <linux/clocksource.h>
2534
2535 #include <asm/io.h>
2536 #include <asm/smp.h>
2537 @@ -75,25 +76,17 @@
2538 #include <xen/evtchn.h>
2539 #include <xen/interface/vcpu.h>
2540
2541 -#if defined (__i386__)
2542 -#include <asm/i8259.h>
2543 +#ifdef CONFIG_X86_32
2544 #include <asm/i8253.h>
2545 DEFINE_SPINLOCK(i8253_lock);
2546 EXPORT_SYMBOL(i8253_lock);
2547 -#endif
2548 -
2549 -#define XEN_SHIFT 22
2550 -
2551 int pit_latch_buggy; /* extern */
2552 -
2553 -#if defined(__x86_64__)
2554 -unsigned long vxtime_hz = PIT_TICK_RATE;
2555 -struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
2556 +#else
2557 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
2558 -struct timespec __xtime __section_xtime;
2559 -struct timezone __sys_tz __section_sys_tz;
2560 #endif
2561
2562 +#define XEN_SHIFT 22
2563 +
2564 unsigned int cpu_khz; /* Detected as we calibrate the TSC */
2565 EXPORT_SYMBOL(cpu_khz);
2566
2567 @@ -113,9 +106,6 @@
2568 static struct timespec shadow_tv;
2569 static u32 shadow_tv_version;
2570
2571 -static struct timeval monotonic_tv;
2572 -static spinlock_t monotonic_lock = SPIN_LOCK_UNLOCKED;
2573 -
2574 /* Keep track of last time we did processing/updating of jiffies and xtime. */
2575 static u64 processed_system_time; /* System time (ns) at last processing. */
2576 static DEFINE_PER_CPU(u64, processed_system_time);
2577 @@ -228,7 +218,7 @@
2578 }
2579 #endif
2580
2581 -void init_cpu_khz(void)
2582 +static void init_cpu_khz(void)
2583 {
2584 u64 __cpu_khz = 1000000ULL << 32;
2585 struct vcpu_time_info *info = &vcpu_info(0)->time;
2586 @@ -247,16 +237,6 @@
2587 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
2588 }
2589
2590 -#ifdef CONFIG_X86_64
2591 -static unsigned long get_usec_offset(struct shadow_time_info *shadow)
2592 -{
2593 - u64 now, delta;
2594 - rdtscll(now);
2595 - delta = now - shadow->tsc_timestamp;
2596 - return scale_delta(delta, shadow->tsc_to_usec_mul, shadow->tsc_shift);
2597 -}
2598 -#endif
2599 -
2600 static void __update_wallclock(time_t sec, long nsec)
2601 {
2602 long wtm_nsec, xtime_nsec;
2603 @@ -364,138 +344,6 @@
2604 }
2605 EXPORT_SYMBOL(rtc_cmos_write);
2606
2607 -#ifdef CONFIG_X86_64
2608 -
2609 -/*
2610 - * This version of gettimeofday has microsecond resolution
2611 - * and better than microsecond precision on fast x86 machines with TSC.
2612 - */
2613 -void do_gettimeofday(struct timeval *tv)
2614 -{
2615 - unsigned long seq;
2616 - unsigned long usec, sec;
2617 - unsigned long flags;
2618 - s64 nsec;
2619 - unsigned int cpu;
2620 - struct shadow_time_info *shadow;
2621 - u32 local_time_version;
2622 -
2623 - cpu = get_cpu();
2624 - shadow = &per_cpu(shadow_time, cpu);
2625 -
2626 - do {
2627 - local_time_version = shadow->version;
2628 - seq = read_seqbegin(&xtime_lock);
2629 -
2630 - usec = get_usec_offset(shadow);
2631 -
2632 - sec = xtime.tv_sec;
2633 - usec += (xtime.tv_nsec / NSEC_PER_USEC);
2634 -
2635 - nsec = shadow->system_timestamp - processed_system_time;
2636 - __normalize_time(&sec, &nsec);
2637 - usec += (long)nsec / NSEC_PER_USEC;
2638 -
2639 - if (unlikely(!time_values_up_to_date(cpu))) {
2640 - /*
2641 - * We may have blocked for a long time,
2642 - * rendering our calculations invalid
2643 - * (e.g. the time delta may have
2644 - * overflowed). Detect that and recalculate
2645 - * with fresh values.
2646 - */
2647 - get_time_values_from_xen(cpu);
2648 - continue;
2649 - }
2650 - } while (read_seqretry(&xtime_lock, seq) ||
2651 - (local_time_version != shadow->version));
2652 -
2653 - put_cpu();
2654 -
2655 - while (usec >= USEC_PER_SEC) {
2656 - usec -= USEC_PER_SEC;
2657 - sec++;
2658 - }
2659 -
2660 - spin_lock_irqsave(&monotonic_lock, flags);
2661 - if ((sec > monotonic_tv.tv_sec) ||
2662 - ((sec == monotonic_tv.tv_sec) && (usec > monotonic_tv.tv_usec)))
2663 - {
2664 - monotonic_tv.tv_sec = sec;
2665 - monotonic_tv.tv_usec = usec;
2666 - } else {
2667 - sec = monotonic_tv.tv_sec;
2668 - usec = monotonic_tv.tv_usec;
2669 - }
2670 - spin_unlock_irqrestore(&monotonic_lock, flags);
2671 -
2672 - tv->tv_sec = sec;
2673 - tv->tv_usec = usec;
2674 -}
2675 -
2676 -EXPORT_SYMBOL(do_gettimeofday);
2677 -
2678 -int do_settimeofday(struct timespec *tv)
2679 -{
2680 - time_t sec;
2681 - s64 nsec;
2682 - unsigned int cpu;
2683 - struct shadow_time_info *shadow;
2684 - struct xen_platform_op op;
2685 -
2686 - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
2687 - return -EINVAL;
2688 -
2689 - cpu = get_cpu();
2690 - shadow = &per_cpu(shadow_time, cpu);
2691 -
2692 - write_seqlock_irq(&xtime_lock);
2693 -
2694 - /*
2695 - * Ensure we don't get blocked for a long time so that our time delta
2696 - * overflows. If that were to happen then our shadow time values would
2697 - * be stale, so we can retry with fresh ones.
2698 - */
2699 - for (;;) {
2700 - nsec = tv->tv_nsec - get_nsec_offset(shadow);
2701 - if (time_values_up_to_date(cpu))
2702 - break;
2703 - get_time_values_from_xen(cpu);
2704 - }
2705 - sec = tv->tv_sec;
2706 - __normalize_time(&sec, &nsec);
2707 -
2708 - if (is_initial_xendomain() && !independent_wallclock) {
2709 - op.cmd = XENPF_settime;
2710 - op.u.settime.secs = sec;
2711 - op.u.settime.nsecs = nsec;
2712 - op.u.settime.system_time = shadow->system_timestamp;
2713 - WARN_ON(HYPERVISOR_platform_op(&op));
2714 - update_wallclock();
2715 - } else if (independent_wallclock) {
2716 - nsec -= shadow->system_timestamp;
2717 - __normalize_time(&sec, &nsec);
2718 - __update_wallclock(sec, nsec);
2719 - }
2720 -
2721 - /* Reset monotonic gettimeofday() timeval. */
2722 - spin_lock(&monotonic_lock);
2723 - monotonic_tv.tv_sec = 0;
2724 - monotonic_tv.tv_usec = 0;
2725 - spin_unlock(&monotonic_lock);
2726 -
2727 - write_sequnlock_irq(&xtime_lock);
2728 -
2729 - put_cpu();
2730 -
2731 - clock_was_set();
2732 - return 0;
2733 -}
2734 -
2735 -EXPORT_SYMBOL(do_settimeofday);
2736 -
2737 -#endif
2738 -
2739 static void sync_xen_wallclock(unsigned long dummy);
2740 static DEFINE_TIMER(sync_xen_wallclock_timer, sync_xen_wallclock, 0, 0);
2741 static void sync_xen_wallclock(unsigned long dummy)
2742 @@ -544,15 +392,7 @@
2743 return retval;
2744 }
2745
2746 -#ifdef CONFIG_X86_64
2747 -/* monotonic_clock(): returns # of nanoseconds passed since time_init()
2748 - * Note: This function is required to return accurate
2749 - * time even in the absence of multiple timer ticks.
2750 - */
2751 -unsigned long long monotonic_clock(void)
2752 -#else
2753 unsigned long long sched_clock(void)
2754 -#endif
2755 {
2756 unsigned int cpu = get_cpu();
2757 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
2758 @@ -572,21 +412,18 @@
2759
2760 return time;
2761 }
2762 -#ifdef CONFIG_X86_64
2763 -EXPORT_SYMBOL(monotonic_clock);
2764 -
2765 -unsigned long long sched_clock(void)
2766 -{
2767 - return monotonic_clock();
2768 -}
2769 -#endif
2770
2771 unsigned long profile_pc(struct pt_regs *regs)
2772 {
2773 unsigned long pc = instruction_pointer(regs);
2774
2775 #if defined(CONFIG_SMP) || defined(__x86_64__)
2776 - if (!user_mode_vm(regs) && in_lock_functions(pc)) {
2777 +# ifdef __i386__
2778 + if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->xcs)
2779 +# else
2780 + if (!user_mode(regs)
2781 +# endif
2782 + && in_lock_functions(pc)) {
2783 # ifdef CONFIG_FRAME_POINTER
2784 # ifdef __i386__
2785 return ((unsigned long *)regs->ebp)[1];
2786 @@ -595,14 +432,11 @@
2787 # endif
2788 # else
2789 # ifdef __i386__
2790 - unsigned long *sp;
2791 - if ((regs->xcs & 2) == 0)
2792 - sp = (unsigned long *)&regs->esp;
2793 - else
2794 - sp = (unsigned long *)regs->esp;
2795 + unsigned long *sp = (unsigned long *)&regs->esp;
2796 # else
2797 unsigned long *sp = (unsigned long *)regs->rsp;
2798 # endif
2799 +
2800 /* Return address is either directly at stack pointer
2801 or above a saved eflags. Eflags has bits 22-31 zero,
2802 kernel addresses don't. */
2803 @@ -755,19 +589,6 @@
2804 return IRQ_HANDLED;
2805 }
2806
2807 -#ifndef CONFIG_X86_64
2808 -
2809 -void tsc_init(void)
2810 -{
2811 - init_cpu_khz();
2812 - printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
2813 - cpu_khz / 1000, cpu_khz % 1000);
2814 -
2815 - use_tsc_delay();
2816 -}
2817 -
2818 -#include <linux/clocksource.h>
2819 -
2820 void mark_tsc_unstable(void)
2821 {
2822 #ifndef CONFIG_XEN /* XXX Should tell the hypervisor about this fact. */
2823 @@ -821,21 +642,9 @@
2824 .mask = CLOCKSOURCE_MASK(64),
2825 .mult = 1 << XEN_SHIFT, /* time directly in nanoseconds */
2826 .shift = XEN_SHIFT,
2827 - .is_continuous = 1,
2828 + .flags = CLOCK_SOURCE_IS_CONTINUOUS,
2829 };
2830
2831 -static int __init init_xen_clocksource(void)
2832 -{
2833 - clocksource_xen.mult = clocksource_khz2mult(cpu_khz,
2834 - clocksource_xen.shift);
2835 -
2836 - return clocksource_register(&clocksource_xen);
2837 -}
2838 -
2839 -module_init(init_xen_clocksource);
2840 -
2841 -#endif
2842 -
2843 static void init_missing_ticks_accounting(unsigned int cpu)
2844 {
2845 struct vcpu_register_runstate_memory_area area;
2846 @@ -856,7 +665,7 @@
2847 }
2848
2849 /* not static: needed by APM */
2850 -unsigned long get_cmos_time(void)
2851 +unsigned long read_persistent_clock(void)
2852 {
2853 unsigned long retval;
2854 unsigned long flags;
2855 @@ -869,11 +678,11 @@
2856
2857 return retval;
2858 }
2859 -EXPORT_SYMBOL(get_cmos_time);
2860
2861 static void sync_cmos_clock(unsigned long dummy);
2862
2863 static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
2864 +int no_sync_cmos_clock;
2865
2866 static void sync_cmos_clock(unsigned long dummy)
2867 {
2868 @@ -917,7 +726,8 @@
2869
2870 void notify_arch_cmos_timer(void)
2871 {
2872 - mod_timer(&sync_cmos_timer, jiffies + 1);
2873 + if (!no_sync_cmos_clock)
2874 + mod_timer(&sync_cmos_timer, jiffies + 1);
2875 mod_timer(&sync_xen_wallclock_timer, jiffies + 1);
2876 }
2877
2878 @@ -950,29 +760,11 @@
2879
2880 device_initcall(time_init_device);
2881
2882 -#ifdef CONFIG_HPET_TIMER
2883 extern void (*late_time_init)(void);
2884 -/* Duplicate of time_init() below, with hpet_enable part added */
2885 -static void __init hpet_time_init(void)
2886 -{
2887 - struct timespec ts;
2888 - ts.tv_sec = get_cmos_time();
2889 - ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
2890 -
2891 - do_settimeofday(&ts);
2892 -
2893 - if ((hpet_enable() >= 0) && hpet_use_timer) {
2894 - printk("Using HPET for base-timer\n");
2895 - }
2896 -
2897 - do_time_init();
2898 -}
2899 -#endif
2900
2901 /* Dynamically-mapped IRQ. */
2902 DEFINE_PER_CPU(int, timer_irq);
2903
2904 -extern void (*late_time_init)(void);
2905 static void setup_cpu0_timer_irq(void)
2906 {
2907 per_cpu(timer_irq, 0) =
2908 @@ -992,16 +784,9 @@
2909
2910 void __init time_init(void)
2911 {
2912 -#ifdef CONFIG_HPET_TIMER
2913 - if (is_hpet_capable()) {
2914 - /*
2915 - * HPET initialization needs to do memory-mapped io. So, let
2916 - * us do a late initialization after mem_init().
2917 - */
2918 - late_time_init = hpet_time_init;
2919 - return;
2920 - }
2921 -#endif
2922 + init_cpu_khz();
2923 + printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
2924 + cpu_khz / 1000, cpu_khz % 1000);
2925
2926 switch (HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, 0,
2927 &xen_set_periodic_tick)) {
2928 @@ -1020,18 +805,12 @@
2929 per_cpu(processed_system_time, 0) = processed_system_time;
2930 init_missing_ticks_accounting(0);
2931
2932 - update_wallclock();
2933 + clocksource_register(&clocksource_xen);
2934
2935 -#ifdef CONFIG_X86_64
2936 - init_cpu_khz();
2937 - printk(KERN_INFO "Xen reported: %u.%03u MHz processor.\n",
2938 - cpu_khz / 1000, cpu_khz % 1000);
2939 + update_wallclock();
2940
2941 - vxtime.mode = VXTIME_TSC;
2942 - vxtime.quot = (1000000L << 32) / vxtime_hz;
2943 - vxtime.tsc_quot = (1000L << 32) / cpu_khz;
2944 - sync_core();
2945 - rdtscll(vxtime.last_tsc);
2946 +#ifndef CONFIG_X86_64
2947 + use_tsc_delay();
2948 #endif
2949
2950 /* Cannot request_irq() until kmem is initialised. */
2951 @@ -1277,7 +1056,7 @@
2952 };
2953 static int __init xen_sysctl_init(void)
2954 {
2955 - (void)register_sysctl_table(xen_table, 0);
2956 + (void)register_sysctl_table(xen_table);
2957 return 0;
2958 }
2959 __initcall(xen_sysctl_init);
2960 --- a/arch/x86/kernel/traps_32-xen.c
2961 +++ b/arch/x86/kernel/traps_32-xen.c
2962 @@ -100,6 +100,7 @@
2963 asmlinkage void machine_check(void);
2964
2965 int kstack_depth_to_print = 24;
2966 +static unsigned int code_bytes = 64;
2967 ATOMIC_NOTIFIER_HEAD(i386die_chain);
2968
2969 int register_die_notifier(struct notifier_block *nb)
2970 @@ -297,10 +298,11 @@
2971 int i;
2972 int in_kernel = 1;
2973 unsigned long esp;
2974 - unsigned short ss;
2975 + unsigned short ss, gs;
2976
2977 esp = (unsigned long) (&regs->esp);
2978 savesegment(ss, ss);
2979 + savesegment(gs, gs);
2980 if (user_mode_vm(regs)) {
2981 in_kernel = 0;
2982 esp = regs->esp;
2983 @@ -319,8 +321,8 @@
2984 regs->eax, regs->ebx, regs->ecx, regs->edx);
2985 printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
2986 regs->esi, regs->edi, regs->ebp, esp);
2987 - printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n",
2988 - regs->xds & 0xffff, regs->xes & 0xffff, ss);
2989 + printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
2990 + regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
2991 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
2992 TASK_COMM_LEN, current->comm, current->pid,
2993 current_thread_info(), current, current->thread_info);
2994 @@ -330,7 +332,8 @@
2995 */
2996 if (in_kernel) {
2997 u8 *eip;
2998 - int code_bytes = 64;
2999 + unsigned int code_prologue = code_bytes * 43 / 64;
3000 + unsigned int code_len = code_bytes;
3001 unsigned char c;
3002
3003 printk("\n" KERN_EMERG "Stack: ");
3004 @@ -338,14 +341,14 @@
3005
3006 printk(KERN_EMERG "Code: ");
3007
3008 - eip = (u8 *)regs->eip - 43;
3009 + eip = (u8 *)regs->eip - code_prologue;
3010 if (eip < (u8 *)PAGE_OFFSET ||
3011 probe_kernel_address(eip, c)) {
3012 /* try starting at EIP */
3013 eip = (u8 *)regs->eip;
3014 - code_bytes = 32;
3015 + code_len = code_len - code_prologue + 1;
3016 }
3017 - for (i = 0; i < code_bytes; i++, eip++) {
3018 + for (i = 0; i < code_len; i++, eip++) {
3019 if (eip < (u8 *)PAGE_OFFSET ||
3020 probe_kernel_address(eip, c)) {
3021 printk(" Bad EIP value.");
3022 @@ -1134,3 +1137,13 @@
3023 return 1;
3024 }
3025 __setup("kstack=", kstack_setup);
3026 +
3027 +static int __init code_bytes_setup(char *s)
3028 +{
3029 + code_bytes = simple_strtoul(s, NULL, 0);
3030 + if (code_bytes > 8192)
3031 + code_bytes = 8192;
3032 +
3033 + return 1;
3034 +}
3035 +__setup("code_bytes=", code_bytes_setup);
3036 --- a/arch/x86/kernel/vsyscall_64-xen.c
3037 +++ b/arch/x86/kernel/vsyscall_64-xen.c
3038 @@ -26,6 +26,7 @@
3039 #include <linux/seqlock.h>
3040 #include <linux/jiffies.h>
3041 #include <linux/sysctl.h>
3042 +#include <linux/clocksource.h>
3043 #include <linux/getcpu.h>
3044 #include <linux/cpu.h>
3045 #include <linux/smp.h>
3046 @@ -34,6 +35,7 @@
3047 #include <asm/vsyscall.h>
3048 #include <asm/pgtable.h>
3049 #include <asm/page.h>
3050 +#include <asm/unistd.h>
3051 #include <asm/fixmap.h>
3052 #include <asm/errno.h>
3053 #include <asm/io.h>
3054 @@ -44,56 +46,41 @@
3055 #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
3056 #define __syscall_clobber "r11","rcx","memory"
3057
3058 -int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
3059 -seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
3060 +struct vsyscall_gtod_data_t {
3061 + seqlock_t lock;
3062 + int sysctl_enabled;
3063 + struct timeval wall_time_tv;
3064 + struct timezone sys_tz;
3065 + cycle_t offset_base;
3066 + struct clocksource clock;
3067 +};
3068 int __vgetcpu_mode __section_vgetcpu_mode;
3069
3070 -#include <asm/unistd.h>
3071 -
3072 -static __always_inline void timeval_normalize(struct timeval * tv)
3073 +struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data =
3074 {
3075 - time_t __sec;
3076 -
3077 - __sec = tv->tv_usec / 1000000;
3078 - if (__sec) {
3079 - tv->tv_usec %= 1000000;
3080 - tv->tv_sec += __sec;
3081 - }
3082 -}
3083 + .lock = SEQLOCK_UNLOCKED,
3084 + .sysctl_enabled = 1,
3085 +};
3086
3087 -static __always_inline void do_vgettimeofday(struct timeval * tv)
3088 +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
3089 {
3090 - long sequence, t;
3091 - unsigned long sec, usec;
3092 + unsigned long flags;
3093
3094 - do {
3095 - sequence = read_seqbegin(&__xtime_lock);
3096 -
3097 - sec = __xtime.tv_sec;
3098 - usec = __xtime.tv_nsec / 1000;
3099 -
3100 - if (__vxtime.mode != VXTIME_HPET) {
3101 - t = get_cycles_sync();
3102 - if (t < __vxtime.last_tsc)
3103 - t = __vxtime.last_tsc;
3104 - usec += ((t - __vxtime.last_tsc) *
3105 - __vxtime.tsc_quot) >> 32;
3106 - /* See comment in x86_64 do_gettimeofday. */
3107 - } else {
3108 - usec += ((readl((void __iomem *)
3109 - fix_to_virt(VSYSCALL_HPET) + 0xf0) -
3110 - __vxtime.last) * __vxtime.quot) >> 32;
3111 - }
3112 - } while (read_seqretry(&__xtime_lock, sequence));
3113 -
3114 - tv->tv_sec = sec + usec / 1000000;
3115 - tv->tv_usec = usec % 1000000;
3116 + write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
3117 + /* copy vsyscall data */
3118 + vsyscall_gtod_data.clock = *clock;
3119 + vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec;
3120 + vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000;
3121 + vsyscall_gtod_data.sys_tz = sys_tz;
3122 + write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
3123 }
3124
3125 -/* RED-PEN may want to readd seq locking, but then the variable should be write-once. */
3126 +/* RED-PEN may want to readd seq locking, but then the variable should be
3127 + * write-once.
3128 + */
3129 static __always_inline void do_get_tz(struct timezone * tz)
3130 {
3131 - *tz = __sys_tz;
3132 + *tz = __vsyscall_gtod_data.sys_tz;
3133 }
3134
3135 static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
3136 @@ -101,7 +88,8 @@
3137 int ret;
3138 asm volatile("vsysc2: syscall"
3139 : "=a" (ret)
3140 - : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber );
3141 + : "0" (__NR_gettimeofday),"D" (tv),"S" (tz)
3142 + : __syscall_clobber );
3143 return ret;
3144 }
3145
3146 @@ -114,10 +102,44 @@
3147 return secs;
3148 }
3149
3150 +static __always_inline void do_vgettimeofday(struct timeval * tv)
3151 +{
3152 + cycle_t now, base, mask, cycle_delta;
3153 + unsigned long seq, mult, shift, nsec_delta;
3154 + cycle_t (*vread)(void);
3155 + do {
3156 + seq = read_seqbegin(&__vsyscall_gtod_data.lock);
3157 +
3158 + vread = __vsyscall_gtod_data.clock.vread;
3159 + if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) {
3160 + gettimeofday(tv,NULL);
3161 + return;
3162 + }
3163 + now = vread();
3164 + base = __vsyscall_gtod_data.clock.cycle_last;
3165 + mask = __vsyscall_gtod_data.clock.mask;
3166 + mult = __vsyscall_gtod_data.clock.mult;
3167 + shift = __vsyscall_gtod_data.clock.shift;
3168 +
3169 + *tv = __vsyscall_gtod_data.wall_time_tv;
3170 +
3171 + } while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
3172 +
3173 + /* calculate interval: */
3174 + cycle_delta = (now - base) & mask;
3175 + /* convert to nsecs: */
3176 + nsec_delta = (cycle_delta * mult) >> shift;
3177 +
3178 + /* convert to usecs and add to timespec: */
3179 + tv->tv_usec += nsec_delta / NSEC_PER_USEC;
3180 + while (tv->tv_usec > USEC_PER_SEC) {
3181 + tv->tv_sec += 1;
3182 + tv->tv_usec -= USEC_PER_SEC;
3183 + }
3184 +}
3185 +
3186 int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
3187 {
3188 - if (!__sysctl_vsyscall)
3189 - return gettimeofday(tv,tz);
3190 if (tv)
3191 do_vgettimeofday(tv);
3192 if (tz)
3193 @@ -129,11 +151,11 @@
3194 * unlikely */
3195 time_t __vsyscall(1) vtime(time_t *t)
3196 {
3197 - if (!__sysctl_vsyscall)
3198 + if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
3199 return time_syscall(t);
3200 else if (t)
3201 - *t = __xtime.tv_sec;
3202 - return __xtime.tv_sec;
3203 + *t = __vsyscall_gtod_data.wall_time_tv.tv_sec;
3204 + return __vsyscall_gtod_data.wall_time_tv.tv_sec;
3205 }
3206
3207 /* Fast way to get current CPU and node.
3208 @@ -210,7 +232,7 @@
3209 ret = -ENOMEM;
3210 goto out;
3211 }
3212 - if (!sysctl_vsyscall) {
3213 + if (!vsyscall_gtod_data.sysctl_enabled) {
3214 writew(SYSCALL, map1);
3215 writew(SYSCALL, map2);
3216 } else {
3217 @@ -232,16 +254,17 @@
3218
3219 static ctl_table kernel_table2[] = {
3220 { .ctl_name = 99, .procname = "vsyscall64",
3221 - .data = &sysctl_vsyscall, .maxlen = sizeof(int), .mode = 0644,
3222 + .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
3223 + .mode = 0644,
3224 .strategy = vsyscall_sysctl_nostrat,
3225 .proc_handler = vsyscall_sysctl_change },
3226 - { 0, }
3227 + {}
3228 };
3229
3230 static ctl_table kernel_root_table2[] = {
3231 { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555,
3232 .child = kernel_table2 },
3233 - { 0 },
3234 + {}
3235 };
3236
3237 #endif
3238 @@ -304,14 +327,14 @@
3239 BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
3240 map_vsyscall();
3241 #ifdef CONFIG_XEN
3242 - sysctl_vsyscall = 0; /* disable vgettimeofay() */
3243 + vsyscall_gtod_data.sysctl_enabled = 0; /* disable vgettimeofay() */
3244 if (boot_cpu_has(X86_FEATURE_RDTSCP))
3245 vgetcpu_mode = VGETCPU_RDTSCP;
3246 else
3247 vgetcpu_mode = VGETCPU_LSL;
3248 #endif
3249 #ifdef CONFIG_SYSCTL
3250 - register_sysctl_table(kernel_root_table2, 0);
3251 + register_sysctl_table(kernel_root_table2);
3252 #endif
3253 on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
3254 hotcpu_notifier(cpu_vsyscall_notifier, 0);
3255 --- a/arch/x86/mm/fault_32-xen.c
3256 +++ b/arch/x86/mm/fault_32-xen.c
3257 @@ -46,43 +46,17 @@
3258 }
3259 EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
3260
3261 -static inline int notify_page_fault(enum die_val val, const char *str,
3262 - struct pt_regs *regs, long err, int trap, int sig)
3263 +static inline int notify_page_fault(struct pt_regs *regs, long err)
3264 {
3265 struct die_args args = {
3266 .regs = regs,
3267 - .str = str,
3268 + .str = "page fault",
3269 .err = err,
3270 - .trapnr = trap,
3271 - .signr = sig
3272 + .trapnr = 14,
3273 + .signr = SIGSEGV
3274 };
3275 - return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
3276 -}
3277 -
3278 -/*
3279 - * Unlock any spinlocks which will prevent us from getting the
3280 - * message out
3281 - */
3282 -void bust_spinlocks(int yes)
3283 -{
3284 - int loglevel_save = console_loglevel;
3285 -
3286 - if (yes) {
3287 - oops_in_progress = 1;
3288 - return;
3289 - }
3290 -#ifdef CONFIG_VT
3291 - unblank_screen();
3292 -#endif
3293 - oops_in_progress = 0;
3294 - /*
3295 - * OK, the message is on the console. Now we call printk()
3296 - * without oops_in_progress set so that printk will give klogd
3297 - * a poke. Hold onto your hats...
3298 - */
3299 - console_loglevel = 15; /* NMI oopser may have shut the console up */
3300 - printk(" ");
3301 - console_loglevel = loglevel_save;
3302 + return atomic_notifier_call_chain(&notify_page_fault_chain,
3303 + DIE_PAGE_FAULT, &args);
3304 }
3305
3306 /*
3307 @@ -476,8 +450,7 @@
3308 /* Can take a spurious fault if mapping changes R/O -> R/W. */
3309 if (spurious_fault(regs, address, error_code))
3310 return;
3311 - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
3312 - SIGSEGV) == NOTIFY_STOP)
3313 + if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
3314 return;
3315 /*
3316 * Don't take the mm semaphore here. If we fixup a prefetch
3317 @@ -486,8 +459,7 @@
3318 goto bad_area_nosemaphore;
3319 }
3320
3321 - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
3322 - SIGSEGV) == NOTIFY_STOP)
3323 + if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
3324 return;
3325
3326 /* It's safe to allow irq's after cr2 has been saved and the vmalloc
3327 --- a/arch/x86/mm/fault_64-xen.c
3328 +++ b/arch/x86/mm/fault_64-xen.c
3329 @@ -56,38 +56,17 @@
3330 }
3331 EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
3332
3333 -static inline int notify_page_fault(enum die_val val, const char *str,
3334 - struct pt_regs *regs, long err, int trap, int sig)
3335 +static inline int notify_page_fault(struct pt_regs *regs, long err)
3336 {
3337 struct die_args args = {
3338 .regs = regs,
3339 - .str = str,
3340 + .str = "page fault",
3341 .err = err,
3342 - .trapnr = trap,
3343 - .signr = sig
3344 + .trapnr = 14,
3345 + .signr = SIGSEGV
3346 };
3347 - return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
3348 -}
3349 -
3350 -void bust_spinlocks(int yes)
3351 -{
3352 - int loglevel_save = console_loglevel;
3353 - if (yes) {
3354 - oops_in_progress = 1;
3355 - } else {
3356 -#ifdef CONFIG_VT
3357 - unblank_screen();
3358 -#endif
3359 - oops_in_progress = 0;
3360 - /*
3361 - * OK, the message is on the console. Now we call printk()
3362 - * without oops_in_progress set so that printk will give klogd
3363 - * a poke. Hold onto your hats...
3364 - */
3365 - console_loglevel = 15; /* NMI oopser may have shut the console up */
3366 - printk(" ");
3367 - console_loglevel = loglevel_save;
3368 - }
3369 + return atomic_notifier_call_chain(&notify_page_fault_chain,
3370 + DIE_PAGE_FAULT, &args);
3371 }
3372
3373 /* Sometimes the CPU reports invalid exceptions on prefetch.
3374 @@ -437,8 +416,7 @@
3375 /* Can take a spurious fault if mapping changes R/O -> R/W. */
3376 if (spurious_fault(regs, address, error_code))
3377 return;
3378 - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
3379 - SIGSEGV) == NOTIFY_STOP)
3380 + if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
3381 return;
3382 /*
3383 * Don't take the mm semaphore here. If we fixup a prefetch
3384 @@ -447,8 +425,7 @@
3385 goto bad_area_nosemaphore;
3386 }
3387
3388 - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
3389 - SIGSEGV) == NOTIFY_STOP)
3390 + if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
3391 return;
3392
3393 if (likely(regs->eflags & X86_EFLAGS_IF))
3394 --- a/arch/x86/mm/highmem_32-xen.c
3395 +++ b/arch/x86/mm/highmem_32-xen.c
3396 @@ -33,14 +33,16 @@
3397
3398 /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
3399 pagefault_disable();
3400 +
3401 + idx = type + KM_TYPE_NR*smp_processor_id();
3402 + BUG_ON(!pte_none(*(kmap_pte-idx)));
3403 +
3404 if (!PageHighMem(page))
3405 return page_address(page);
3406
3407 - idx = type + KM_TYPE_NR*smp_processor_id();
3408 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
3409 - if (!pte_none(*(kmap_pte-idx)))
3410 - BUG();
3411 set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
3412 + arch_flush_lazy_mmu_mode();
3413
3414 return (void*) vaddr;
3415 }
3416 @@ -94,6 +96,7 @@
3417 idx = type + KM_TYPE_NR*smp_processor_id();
3418 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
3419 set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
3420 + arch_flush_lazy_mmu_mode();
3421
3422 return (void*) vaddr;
3423 }
3424 --- a/arch/x86/mm/init_32-xen.c
3425 +++ b/arch/x86/mm/init_32-xen.c
3426 @@ -68,6 +68,7 @@
3427
3428 #ifdef CONFIG_X86_PAE
3429 pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
3430 + paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
3431 make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
3432 set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
3433 pud = pud_offset(pgd, 0);
3434 @@ -89,6 +90,7 @@
3435 {
3436 if (pmd_none(*pmd)) {
3437 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
3438 + paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
3439 make_lowmem_page_readonly(page_table,
3440 XENFEAT_writable_page_tables);
3441 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
3442 --- a/arch/x86/mm/init_64-xen.c
3443 +++ b/arch/x86/mm/init_64-xen.c
3444 @@ -1111,20 +1111,30 @@
3445 extern int exception_trace, page_fault_trace;
3446
3447 static ctl_table debug_table2[] = {
3448 - { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
3449 - proc_dointvec },
3450 - { 0, }
3451 + {
3452 + .ctl_name = 99,
3453 + .procname = "exception-trace",
3454 + .data = &exception_trace,
3455 + .maxlen = sizeof(int),
3456 + .mode = 0644,
3457 + .proc_handler = proc_dointvec
3458 + },
3459 + {}
3460 };
3461
3462 static ctl_table debug_root_table2[] = {
3463 - { .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555,
3464 - .child = debug_table2 },
3465 - { 0 },
3466 + {
3467 + .ctl_name = CTL_DEBUG,
3468 + .procname = "debug",
3469 + .mode = 0555,
3470 + .child = debug_table2
3471 + },
3472 + {}
3473 };
3474
3475 static __init int x8664_sysctl_init(void)
3476 {
3477 - register_sysctl_table(debug_root_table2, 1);
3478 + register_sysctl_table(debug_root_table2);
3479 return 0;
3480 }
3481 __initcall(x8664_sysctl_init);
3482 --- a/arch/x86/mm/pageattr_64-xen.c
3483 +++ b/arch/x86/mm/pageattr_64-xen.c
3484 @@ -344,8 +344,8 @@
3485 void *adr = page_address(pg);
3486 if (cpu_has_clflush)
3487 cache_flush_page(adr);
3488 - __flush_tlb_one(adr);
3489 }
3490 + __flush_tlb_all();
3491 }
3492
3493 static inline void flush_map(struct list_head *l)
3494 @@ -370,6 +370,7 @@
3495 pud_t *pud;
3496 pmd_t *pmd;
3497 pte_t large_pte;
3498 + unsigned long pfn;
3499
3500 pgd = pgd_offset_k(address);
3501 BUG_ON(pgd_none(*pgd));
3502 @@ -377,7 +378,8 @@
3503 BUG_ON(pud_none(*pud));
3504 pmd = pmd_offset(pud, address);
3505 BUG_ON(__pmd_val(*pmd) & _PAGE_PSE);
3506 - large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot);
3507 + pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT;
3508 + large_pte = pfn_pte(pfn, ref_prot);
3509 large_pte = pte_mkhuge(large_pte);
3510 set_pte((pte_t *)pmd, large_pte);
3511 }
3512 --- a/arch/x86/mm/pgtable_32-xen.c
3513 +++ b/arch/x86/mm/pgtable_32-xen.c
3514 @@ -149,6 +149,8 @@
3515 void __init reserve_top_address(unsigned long reserve)
3516 {
3517 BUG_ON(fixmaps > 0);
3518 + printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
3519 + (int)-reserve);
3520 __FIXADDR_TOP = -reserve - PAGE_SIZE;
3521 __VMALLOC_RESERVE += reserve;
3522 }
3523 @@ -252,6 +254,12 @@
3524 swapper_pg_dir + USER_PTRS_PER_PGD,
3525 KERNEL_PGD_PTRS);
3526 memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
3527 +
3528 + /* must happen under lock */
3529 + paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
3530 + __pa(swapper_pg_dir) >> PAGE_SHIFT,
3531 + USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD);
3532 +
3533 pgd_list_add(pgd);
3534 spin_unlock_irqrestore(&pgd_lock, flags);
3535 }
3536 @@ -262,6 +270,7 @@
3537 {
3538 unsigned long flags; /* can be called from interrupt context */
3539
3540 + paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
3541 spin_lock_irqsave(&pgd_lock, flags);
3542 pgd_list_del(pgd);
3543 spin_unlock_irqrestore(&pgd_lock, flags);
3544 @@ -286,6 +295,7 @@
3545 pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
3546 if (!pmd)
3547 goto out_oom;
3548 + paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
3549 set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
3550 }
3551 return pgd;
3552 @@ -308,6 +318,7 @@
3553 pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
3554 if (!pmd[i])
3555 goto out_oom;
3556 + paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
3557 }
3558
3559 spin_lock_irqsave(&pgd_lock, flags);
3560 @@ -348,12 +359,17 @@
3561
3562 out_oom:
3563 if (HAVE_SHARED_KERNEL_PMD) {
3564 - for (i--; i >= 0; i--)
3565 - kmem_cache_free(pmd_cache,
3566 - (void *)__va(pgd_val(pgd[i])-1));
3567 + for (i--; i >= 0; i--) {
3568 + pgd_t pgdent = pgd[i];
3569 + void* pmd = (void *)__va(pgd_val(pgdent)-1);
3570 + paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
3571 + kmem_cache_free(pmd_cache, pmd);
3572 + }
3573 } else {
3574 - for (i--; i >= 0; i--)
3575 + for (i--; i >= 0; i--) {
3576 + paravirt_release_pd(__pa(pmd[i]) >> PAGE_SHIFT);
3577 kmem_cache_free(pmd_cache, pmd[i]);
3578 + }
3579 kfree(pmd);
3580 }
3581 kmem_cache_free(pgd_cache, pgd);
3582 @@ -377,7 +393,9 @@
3583 /* in the PAE case user pgd entries are overwritten before usage */
3584 if (PTRS_PER_PMD > 1) {
3585 for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
3586 - pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
3587 + pgd_t pgdent = pgd[i];
3588 + void* pmd = (void *)__va(pgd_val(pgdent)-1);
3589 + paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
3590 kmem_cache_free(pmd_cache, pmd);
3591 }
3592
3593 --- a/drivers/char/tpm/tpm_xen.c
3594 +++ b/drivers/char/tpm/tpm_xen.c
3595 @@ -481,7 +481,6 @@
3596
3597 static struct xenbus_driver tpmfront = {
3598 .name = "vtpm",
3599 - .owner = THIS_MODULE,
3600 .ids = tpmfront_ids,
3601 .probe = tpmfront_probe,
3602 .remove = tpmfront_remove,
3603 @@ -491,9 +490,9 @@
3604 .suspend_cancel = tpmfront_suspend_cancel,
3605 };
3606
3607 -static void __init init_tpm_xenbus(void)
3608 +static int __init init_tpm_xenbus(void)
3609 {
3610 - xenbus_register_frontend(&tpmfront);
3611 + return xenbus_register_frontend(&tpmfront);
3612 }
3613
3614 static int tpmif_allocate_tx_buffers(struct tpm_private *tp)
3615 --- a/drivers/xen/balloon/sysfs.c
3616 +++ b/drivers/xen/balloon/sysfs.c
3617 @@ -33,6 +33,7 @@
3618 #include <linux/stat.h>
3619 #include <linux/string.h>
3620 #include <linux/sysdev.h>
3621 +#include <linux/module.h>
3622 #include "common.h"
3623
3624 #ifdef HAVE_XEN_PLATFORM_COMPAT_H
3625 --- a/drivers/xen/blkback/xenbus.c
3626 +++ b/drivers/xen/blkback/xenbus.c
3627 @@ -519,7 +519,6 @@
3628
3629 static struct xenbus_driver blkback = {
3630 .name = "vbd",
3631 - .owner = THIS_MODULE,
3632 .ids = blkback_ids,
3633 .probe = blkback_probe,
3634 .remove = blkback_remove,
3635 @@ -529,5 +528,6 @@
3636
3637 void blkif_xenbus_init(void)
3638 {
3639 - xenbus_register_backend(&blkback);
3640 + if (xenbus_register_backend(&blkback))
3641 + BUG();
3642 }
3643 --- a/drivers/xen/blkfront/blkfront.c
3644 +++ b/drivers/xen/blkfront/blkfront.c
3645 @@ -893,7 +893,6 @@
3646
3647 static struct xenbus_driver blkfront = {
3648 .name = "vbd",
3649 - .owner = THIS_MODULE,
3650 .ids = blkfront_ids,
3651 .probe = blkfront_probe,
3652 .remove = blkfront_remove,
3653 --- a/drivers/xen/blktap/xenbus.c
3654 +++ b/drivers/xen/blktap/xenbus.c
3655 @@ -463,7 +463,6 @@
3656
3657 static struct xenbus_driver blktap = {
3658 .name = "tap",
3659 - .owner = THIS_MODULE,
3660 .ids = blktap_ids,
3661 .probe = blktap_probe,
3662 .remove = blktap_remove,
3663 @@ -473,5 +472,6 @@
3664
3665 void tap_blkif_xenbus_init(void)
3666 {
3667 - xenbus_register_backend(&blktap);
3668 + if (xenbus_register_backend(&blktap))
3669 + BUG();
3670 }
3671 --- a/drivers/xen/core/evtchn.c
3672 +++ b/drivers/xen/core/evtchn.c
3673 @@ -133,7 +133,7 @@
3674 BUG_ON(!test_bit(chn, s->evtchn_mask));
3675
3676 if (irq != -1)
3677 - set_native_irq_info(irq, cpumask_of_cpu(cpu));
3678 + irq_desc[irq].affinity = cpumask_of_cpu(cpu);
3679
3680 clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]);
3681 set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]);
3682 @@ -146,7 +146,7 @@
3683
3684 /* By default all event channels notify CPU#0. */
3685 for (i = 0; i < NR_IRQS; i++)
3686 - set_native_irq_info(i, cpumask_of_cpu(0));
3687 + irq_desc[i].affinity = cpumask_of_cpu(0);
3688
3689 memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
3690 memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
3691 --- a/drivers/xen/core/smpboot.c
3692 +++ b/drivers/xen/core/smpboot.c
3693 @@ -261,7 +261,7 @@
3694 {
3695 unsigned int cpu;
3696 struct task_struct *idle;
3697 - int apicid, acpiid;
3698 + int apicid;
3699 struct vcpu_get_physid cpu_id;
3700 #ifdef __x86_64__
3701 struct desc_ptr *gdt_descr;
3702 @@ -270,14 +270,8 @@
3703 #endif
3704
3705 apicid = 0;
3706 - if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) {
3707 + if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0)
3708 apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
3709 - acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
3710 -#ifdef CONFIG_ACPI
3711 - if (acpiid != 0xff)
3712 - x86_acpiid_to_apicid[acpiid] = apicid;
3713 -#endif
3714 - }
3715 boot_cpu_data.apicid = apicid;
3716 cpu_data[0] = boot_cpu_data;
3717
3718 @@ -333,14 +327,8 @@
3719 XENFEAT_writable_descriptor_tables);
3720
3721 apicid = cpu;
3722 - if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) {
3723 + if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0)
3724 apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
3725 - acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
3726 -#ifdef CONFIG_ACPI
3727 - if (acpiid != 0xff)
3728 - x86_acpiid_to_apicid[acpiid] = apicid;
3729 -#endif
3730 - }
3731 cpu_data[cpu] = boot_cpu_data;
3732 cpu_data[cpu].apicid = apicid;
3733
3734 --- a/drivers/xen/fbfront/xenfb.c
3735 +++ b/drivers/xen/fbfront/xenfb.c
3736 @@ -856,7 +856,6 @@
3737
3738 static struct xenbus_driver xenfb_driver = {
3739 .name = "vfb",
3740 - .owner = THIS_MODULE,
3741 .ids = xenfb_ids,
3742 .probe = xenfb_probe,
3743 .remove = xenfb_remove,
3744 --- a/drivers/xen/fbfront/xenkbd.c
3745 +++ b/drivers/xen/fbfront/xenkbd.c
3746 @@ -323,7 +323,6 @@
3747
3748 static struct xenbus_driver xenkbd_driver = {
3749 .name = "vkbd",
3750 - .owner = THIS_MODULE,
3751 .ids = xenkbd_ids,
3752 .probe = xenkbd_probe,
3753 .remove = xenkbd_remove,
3754 --- a/drivers/xen/netback/xenbus.c
3755 +++ b/drivers/xen/netback/xenbus.c
3756 @@ -437,7 +437,6 @@
3757
3758 static struct xenbus_driver netback = {
3759 .name = "vif",
3760 - .owner = THIS_MODULE,
3761 .ids = netback_ids,
3762 .probe = netback_probe,
3763 .remove = netback_remove,
3764 @@ -448,5 +447,6 @@
3765
3766 void netif_xenbus_init(void)
3767 {
3768 - xenbus_register_backend(&netback);
3769 + if (xenbus_register_backend(&netback))
3770 + BUG();
3771 }
3772 --- a/drivers/xen/netfront/netfront.c
3773 +++ b/drivers/xen/netfront/netfront.c
3774 @@ -1893,20 +1893,19 @@
3775 };
3776
3777 #ifdef CONFIG_SYSFS
3778 -static ssize_t show_rxbuf_min(struct class_device *cd, char *buf)
3779 +static ssize_t show_rxbuf_min(struct device *dev,
3780 + struct device_attribute *attr, char *buf)
3781 {
3782 - struct net_device *netdev = container_of(cd, struct net_device,
3783 - class_dev);
3784 - struct netfront_info *info = netdev_priv(netdev);
3785 + struct netfront_info *info = netdev_priv(to_net_dev(dev));
3786
3787 return sprintf(buf, "%u\n", info->rx_min_target);
3788 }
3789
3790 -static ssize_t store_rxbuf_min(struct class_device *cd,
3791 +static ssize_t store_rxbuf_min(struct device *dev,
3792 + struct device_attribute *attr,
3793 const char *buf, size_t len)
3794 {
3795 - struct net_device *netdev = container_of(cd, struct net_device,
3796 - class_dev);
3797 + struct net_device *netdev = to_net_dev(dev);
3798 struct netfront_info *np = netdev_priv(netdev);
3799 char *endp;
3800 unsigned long target;
3801 @@ -1936,20 +1935,19 @@
3802 return len;
3803 }
3804
3805 -static ssize_t show_rxbuf_max(struct class_device *cd, char *buf)
3806 +static ssize_t show_rxbuf_max(struct device *dev,
3807 + struct device_attribute *attr, char *buf)
3808 {
3809 - struct net_device *netdev = container_of(cd, struct net_device,
3810 - class_dev);
3811 - struct netfront_info *info = netdev_priv(netdev);
3812 + struct netfront_info *info = netdev_priv(to_net_dev(dev));
3813
3814 return sprintf(buf, "%u\n", info->rx_max_target);
3815 }
3816
3817 -static ssize_t store_rxbuf_max(struct class_device *cd,
3818 +static ssize_t store_rxbuf_max(struct device *dev,
3819 + struct device_attribute *attr,
3820 const char *buf, size_t len)
3821 {
3822 - struct net_device *netdev = container_of(cd, struct net_device,
3823 - class_dev);
3824 + struct net_device *netdev = to_net_dev(dev);
3825 struct netfront_info *np = netdev_priv(netdev);
3826 char *endp;
3827 unsigned long target;
3828 @@ -1979,16 +1977,15 @@
3829 return len;
3830 }
3831
3832 -static ssize_t show_rxbuf_cur(struct class_device *cd, char *buf)
3833 +static ssize_t show_rxbuf_cur(struct device *dev,
3834 + struct device_attribute *attr, char *buf)
3835 {
3836 - struct net_device *netdev = container_of(cd, struct net_device,
3837 - class_dev);
3838 - struct netfront_info *info = netdev_priv(netdev);
3839 + struct netfront_info *info = netdev_priv(to_net_dev(dev));
3840
3841 return sprintf(buf, "%u\n", info->rx_target);
3842 }
3843
3844 -static const struct class_device_attribute xennet_attrs[] = {
3845 +static struct device_attribute xennet_attrs[] = {
3846 __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
3847 __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
3848 __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
3849 @@ -2000,8 +1997,8 @@
3850 int error = 0;
3851
3852 for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
3853 - error = class_device_create_file(&netdev->class_dev,
3854 - &xennet_attrs[i]);
3855 + error = device_create_file(&netdev->dev,
3856 + &xennet_attrs[i]);
3857 if (error)
3858 goto fail;
3859 }
3860 @@ -2009,8 +2006,7 @@
3861
3862 fail:
3863 while (--i >= 0)
3864 - class_device_remove_file(&netdev->class_dev,
3865 - &xennet_attrs[i]);
3866 + device_remove_file(&netdev->dev, &xennet_attrs[i]);
3867 return error;
3868 }
3869
3870 @@ -2018,10 +2014,8 @@
3871 {
3872 int i;
3873
3874 - for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
3875 - class_device_remove_file(&netdev->class_dev,
3876 - &xennet_attrs[i]);
3877 - }
3878 + for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++)
3879 + device_remove_file(&netdev->dev, &xennet_attrs[i]);
3880 }
3881
3882 #endif /* CONFIG_SYSFS */
3883 @@ -2187,7 +2181,6 @@
3884
3885 static struct xenbus_driver netfront_driver = {
3886 .name = "vif",
3887 - .owner = THIS_MODULE,
3888 .ids = netfront_ids,
3889 .probe = netfront_probe,
3890 .remove = __devexit_p(netfront_remove),
3891 --- a/drivers/xen/pciback/xenbus.c
3892 +++ b/drivers/xen/pciback/xenbus.c
3893 @@ -663,7 +663,6 @@
3894
3895 static struct xenbus_driver xenbus_pciback_driver = {
3896 .name = "pciback",
3897 - .owner = THIS_MODULE,
3898 .ids = xenpci_ids,
3899 .probe = pciback_xenbus_probe,
3900 .remove = pciback_xenbus_remove,
3901 --- a/drivers/xen/pcifront/xenbus.c
3902 +++ b/drivers/xen/pcifront/xenbus.c
3903 @@ -435,7 +435,6 @@
3904
3905 static struct xenbus_driver xenbus_pcifront_driver = {
3906 .name = "pcifront",
3907 - .owner = THIS_MODULE,
3908 .ids = xenpci_ids,
3909 .probe = pcifront_xenbus_probe,
3910 .remove = pcifront_xenbus_remove,
3911 --- a/drivers/xen/tpmback/common.h
3912 +++ b/drivers/xen/tpmback/common.h
3913 @@ -54,11 +54,11 @@
3914
3915 void tpmif_disconnect_complete(tpmif_t * tpmif);
3916 tpmif_t *tpmif_find(domid_t domid, struct backend_info *bi);
3917 -void tpmif_interface_init(void);
3918 +int tpmif_interface_init(void);
3919 void tpmif_interface_exit(void);
3920 void tpmif_schedule_work(tpmif_t * tpmif);
3921 void tpmif_deschedule_work(tpmif_t * tpmif);
3922 -void tpmif_xenbus_init(void);
3923 +int tpmif_xenbus_init(void);
3924 void tpmif_xenbus_exit(void);
3925 int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn);
3926 irqreturn_t tpmif_be_int(int irq, void *dev_id);
3927 --- a/drivers/xen/tpmback/interface.c
3928 +++ b/drivers/xen/tpmback/interface.c
3929 @@ -156,13 +156,14 @@
3930 free_tpmif(tpmif);
3931 }
3932
3933 -void __init tpmif_interface_init(void)
3934 +int __init tpmif_interface_init(void)
3935 {
3936 tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof (tpmif_t),
3937 0, 0, NULL, NULL);
3938 + return tpmif_cachep ? 0 : -ENOMEM;
3939 }
3940
3941 -void __exit tpmif_interface_exit(void)
3942 +void tpmif_interface_exit(void)
3943 {
3944 kmem_cache_destroy(tpmif_cachep);
3945 }
3946 --- a/drivers/xen/tpmback/tpmback.c
3947 +++ b/drivers/xen/tpmback/tpmback.c
3948 @@ -923,22 +923,30 @@
3949 spin_lock_init(&tpm_schedule_list_lock);
3950 INIT_LIST_HEAD(&tpm_schedule_list);
3951
3952 - tpmif_interface_init();
3953 - tpmif_xenbus_init();
3954 + rc = tpmif_interface_init();
3955 + if (!rc) {
3956 + rc = tpmif_xenbus_init();
3957 + if (rc)
3958 + tpmif_interface_exit();
3959 + }
3960 + if (rc) {
3961 + misc_deregister(&vtpms_miscdevice);
3962 + return rc;
3963 + }
3964
3965 printk(KERN_ALERT "Successfully initialized TPM backend driver.\n");
3966
3967 return 0;
3968 }
3969 -
3970 module_init(tpmback_init);
3971
3972 -void __exit tpmback_exit(void)
3973 +static void __exit tpmback_exit(void)
3974 {
3975 vtpm_release_packets(NULL, 0);
3976 tpmif_xenbus_exit();
3977 tpmif_interface_exit();
3978 misc_deregister(&vtpms_miscdevice);
3979 }
3980 +module_exit(tpmback_exit)
3981
3982 MODULE_LICENSE("Dual BSD/GPL");
3983 --- a/drivers/xen/tpmback/xenbus.c
3984 +++ b/drivers/xen/tpmback/xenbus.c
3985 @@ -270,7 +270,6 @@
3986
3987 static struct xenbus_driver tpmback = {
3988 .name = "vtpm",
3989 - .owner = THIS_MODULE,
3990 .ids = tpmback_ids,
3991 .probe = tpmback_probe,
3992 .remove = tpmback_remove,
3993 @@ -278,9 +277,9 @@
3994 };
3995
3996
3997 -void tpmif_xenbus_init(void)
3998 +int tpmif_xenbus_init(void)
3999 {
4000 - xenbus_register_backend(&tpmback);
4001 + return xenbus_register_backend(&tpmback);
4002 }
4003
4004 void tpmif_xenbus_exit(void)
4005 --- a/drivers/xen/xenbus/xenbus_probe.c
4006 +++ b/drivers/xen/xenbus/xenbus_probe.c
4007 @@ -362,7 +362,9 @@
4008 }
4009
4010 int xenbus_register_driver_common(struct xenbus_driver *drv,
4011 - struct xen_bus_type *bus)
4012 + struct xen_bus_type *bus,
4013 + struct module *owner,
4014 + const char *mod_name)
4015 {
4016 int ret;
4017
4018 @@ -372,7 +374,10 @@
4019 drv->driver.name = drv->name;
4020 drv->driver.bus = &bus->bus;
4021 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
4022 - drv->driver.owner = drv->owner;
4023 + drv->driver.owner = owner;
4024 +#endif
4025 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21)
4026 + drv->driver.mod_name = mod_name;
4027 #endif
4028 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
4029 drv->driver.probe = xenbus_dev_probe;
4030 @@ -386,13 +391,15 @@
4031 return ret;
4032 }
4033
4034 -int xenbus_register_frontend(struct xenbus_driver *drv)
4035 +int __xenbus_register_frontend(struct xenbus_driver *drv,
4036 + struct module *owner, const char *mod_name)
4037 {
4038 int ret;
4039
4040 drv->read_otherend_details = read_backend_details;
4041
4042 - ret = xenbus_register_driver_common(drv, &xenbus_frontend);
4043 + ret = xenbus_register_driver_common(drv, &xenbus_frontend,
4044 + owner, mod_name);
4045 if (ret)
4046 return ret;
4047
4048 @@ -401,7 +408,7 @@
4049
4050 return 0;
4051 }
4052 -EXPORT_SYMBOL_GPL(xenbus_register_frontend);
4053 +EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
4054
4055 void xenbus_unregister_driver(struct xenbus_driver *drv)
4056 {
4057 --- a/drivers/xen/xenbus/xenbus_probe.h
4058 +++ b/drivers/xen/xenbus/xenbus_probe.h
4059 @@ -63,7 +63,9 @@
4060 extern int xenbus_dev_probe(struct device *_dev);
4061 extern int xenbus_dev_remove(struct device *_dev);
4062 extern int xenbus_register_driver_common(struct xenbus_driver *drv,
4063 - struct xen_bus_type *bus);
4064 + struct xen_bus_type *bus,
4065 + struct module *owner,
4066 + const char *mod_name);
4067 extern int xenbus_probe_node(struct xen_bus_type *bus,
4068 const char *type,
4069 const char *nodename);
4070 --- a/drivers/xen/xenbus/xenbus_probe_backend.c
4071 +++ b/drivers/xen/xenbus/xenbus_probe_backend.c
4072 @@ -172,13 +172,15 @@
4073 return 0;
4074 }
4075
4076 -int xenbus_register_backend(struct xenbus_driver *drv)
4077 +int __xenbus_register_backend(struct xenbus_driver *drv,
4078 + struct module *owner, const char *mod_name)
4079 {
4080 drv->read_otherend_details = read_frontend_details;
4081
4082 - return xenbus_register_driver_common(drv, &xenbus_backend);
4083 + return xenbus_register_driver_common(drv, &xenbus_backend,
4084 + owner, mod_name);
4085 }
4086 -EXPORT_SYMBOL_GPL(xenbus_register_backend);
4087 +EXPORT_SYMBOL_GPL(__xenbus_register_backend);
4088
4089 /* backend/<typename>/<frontend-uuid>/<name> */
4090 static int xenbus_probe_backend_unit(const char *dir,
4091 --- a/include/asm-x86/i8253.h
4092 +++ b/include/asm-x86/i8253.h
4093 @@ -8,10 +8,14 @@
4094
4095 extern spinlock_t i8253_lock;
4096
4097 +#ifdef CONFIG_GENERIC_CLOCKEVENTS
4098 +
4099 extern struct clock_event_device *global_clock_event;
4100
4101 extern void setup_pit_timer(void);
4102
4103 +#endif
4104 +
4105 #define inb_pit inb_p
4106 #define outb_pit outb_p
4107
4108 --- a/include/asm-x86/mach-xen/asm/desc_32.h
4109 +++ b/include/asm-x86/mach-xen/asm/desc_32.h
4110 @@ -21,7 +21,7 @@
4111
4112 extern struct Xgt_desc_struct idt_descr;
4113 DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
4114 -
4115 +extern struct Xgt_desc_struct early_gdt_descr;
4116
4117 static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
4118 {
4119 --- a/include/asm-x86/mach-xen/asm/dma-mapping_64.h
4120 +++ b/include/asm-x86/mach-xen/asm/dma-mapping_64.h
4121 @@ -9,7 +9,6 @@
4122
4123 #include <asm/scatterlist.h>
4124 #include <asm/io.h>
4125 -#include <asm/swiotlb.h>
4126
4127 struct dma_mapping_ops {
4128 int (*mapping_error)(dma_addr_t dma_addr);
4129 @@ -66,6 +65,9 @@
4130
4131 #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
4132 #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
4133 +
4134 +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
4135 +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
4136
4137 extern void *dma_alloc_coherent(struct device *dev, size_t size,
4138 dma_addr_t *dma_handle, gfp_t gfp);
4139 --- a/include/asm-x86/mach-xen/asm/e820_64.h
4140 +++ b/include/asm-x86/mach-xen/asm/e820_64.h
4141 @@ -46,6 +46,7 @@
4142 extern void e820_print_map(char *who);
4143 extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
4144 extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
4145 +extern unsigned long e820_hole_size(unsigned long start, unsigned long end);
4146
4147 extern void e820_setup_gap(struct e820entry *e820, int nr_map);
4148 extern void e820_register_active_regions(int nid,
4149 @@ -56,6 +57,7 @@
4150 extern struct e820map e820;
4151
4152 extern unsigned ebda_addr, ebda_size;
4153 +extern unsigned long nodemap_addr, nodemap_size;
4154 #endif/*!__ASSEMBLY__*/
4155
4156 #endif/*__E820_HEADER*/
4157 --- a/include/asm-x86/mach-xen/asm/hw_irq_64.h
4158 +++ b/include/asm-x86/mach-xen/asm/hw_irq_64.h
4159 @@ -31,10 +31,32 @@
4160
4161 #define IA32_SYSCALL_VECTOR 0x80
4162
4163 +#ifndef CONFIG_XEN
4164 +
4165 +/* Reserve the lowest usable priority level 0x20 - 0x2f for triggering
4166 + * cleanup after irq migration.
4167 + */
4168 +#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR
4169
4170 /*
4171 - * Vectors 0x20-0x2f are used for ISA interrupts.
4172 + * Vectors 0x30-0x3f are used for ISA interrupts.
4173 */
4174 +#define IRQ0_VECTOR FIRST_EXTERNAL_VECTOR + 0x10
4175 +#define IRQ1_VECTOR IRQ0_VECTOR + 1
4176 +#define IRQ2_VECTOR IRQ0_VECTOR + 2
4177 +#define IRQ3_VECTOR IRQ0_VECTOR + 3
4178 +#define IRQ4_VECTOR IRQ0_VECTOR + 4
4179 +#define IRQ5_VECTOR IRQ0_VECTOR + 5
4180 +#define IRQ6_VECTOR IRQ0_VECTOR + 6
4181 +#define IRQ7_VECTOR IRQ0_VECTOR + 7
4182 +#define IRQ8_VECTOR IRQ0_VECTOR + 8
4183 +#define IRQ9_VECTOR IRQ0_VECTOR + 9
4184 +#define IRQ10_VECTOR IRQ0_VECTOR + 10
4185 +#define IRQ11_VECTOR IRQ0_VECTOR + 11
4186 +#define IRQ12_VECTOR IRQ0_VECTOR + 12
4187 +#define IRQ13_VECTOR IRQ0_VECTOR + 13
4188 +#define IRQ14_VECTOR IRQ0_VECTOR + 14
4189 +#define IRQ15_VECTOR IRQ0_VECTOR + 15
4190
4191 /*
4192 * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
4193 @@ -43,7 +65,6 @@
4194 * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
4195 * TLB, reschedule and local APIC vectors are performance-critical.
4196 */
4197 -#ifndef CONFIG_XEN
4198 #define SPURIOUS_APIC_VECTOR 0xff
4199 #define ERROR_APIC_VECTOR 0xfe
4200 #define RESCHEDULE_VECTOR 0xfd
4201 @@ -57,7 +78,6 @@
4202 #define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
4203
4204 #define NUM_INVALIDATE_TLB_VECTORS 8
4205 -#endif
4206
4207 /*
4208 * Local APIC timer IRQ vector is on a different priority level,
4209 @@ -68,12 +88,13 @@
4210
4211 /*
4212 * First APIC vector available to drivers: (vectors 0x30-0xee)
4213 - * we start at 0x31 to spread out vectors evenly between priority
4214 + * we start at 0x41 to spread out vectors evenly between priority
4215 * levels. (0x80 is the syscall vector)
4216 */
4217 -#define FIRST_DEVICE_VECTOR 0x31
4218 +#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2)
4219 #define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */
4220
4221 +#endif
4222
4223 #ifndef __ASSEMBLY__
4224 typedef int vector_irq_t[NR_VECTORS];
4225 @@ -93,7 +114,7 @@
4226 extern int i8259A_irq_pending(unsigned int irq);
4227 extern void make_8259A_irq(unsigned int irq);
4228 extern void init_8259A(int aeoi);
4229 -extern void FASTCALL(send_IPI_self(int vector));
4230 +extern void send_IPI_self(int vector);
4231 extern void init_VISWS_APIC_irqs(void);
4232 extern void setup_IO_APIC(void);
4233 extern void disable_IO_APIC(void);
4234 --- a/include/asm-x86/mach-xen/asm/hypervisor.h
4235 +++ b/include/asm-x86/mach-xen/asm/hypervisor.h
4236 @@ -171,7 +171,7 @@
4237 return rc;
4238 }
4239
4240 -static inline void /*__noreturn*/
4241 +static inline void __noreturn
4242 HYPERVISOR_shutdown(
4243 unsigned int reason)
4244 {
4245 --- a/include/asm-x86/mach-xen/asm/io_32.h
4246 +++ b/include/asm-x86/mach-xen/asm/io_32.h
4247 @@ -232,12 +232,6 @@
4248 #define __ISA_IO_base ((char __iomem *)(fix_to_virt(FIX_ISAMAP_BEGIN)))
4249
4250 /*
4251 - * Again, i386 does not require mem IO specific function.
4252 - */
4253 -
4254 -#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(b),(c),(d))
4255 -
4256 -/*
4257 * Cache management
4258 *
4259 * This needed for two cases
4260 --- a/include/asm-x86/mach-xen/asm/io_64.h
4261 +++ b/include/asm-x86/mach-xen/asm/io_64.h
4262 @@ -101,7 +101,7 @@
4263
4264 #define IO_SPACE_LIMIT 0xffff
4265
4266 -#if defined(__KERNEL__) && __x86_64__
4267 +#if defined(__KERNEL__) && defined(__x86_64__)
4268
4269 #include <linux/vmalloc.h>
4270
4271 @@ -267,12 +267,6 @@
4272 */
4273 #define __ISA_IO_base ((char __iomem *)(fix_to_virt(FIX_ISAMAP_BEGIN)))
4274
4275 -/*
4276 - * Again, x86-64 does not require mem IO specific function.
4277 - */
4278 -
4279 -#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(b),(c),(d))
4280 -
4281 /* Nothing to do */
4282
4283 #define dma_cache_inv(_start,_size) do { } while (0)
4284 --- a/include/asm-x86/mach-xen/asm/mmu_context_32.h
4285 +++ b/include/asm-x86/mach-xen/asm/mmu_context_32.h
4286 @@ -27,13 +27,13 @@
4287 static inline void __prepare_arch_switch(void)
4288 {
4289 /*
4290 - * Save away %fs. No need to save %gs, as it was saved on the
4291 + * Save away %gs. No need to save %fs, as it was saved on the
4292 * stack on entry. No need to save %es and %ds, as those are
4293 * always kernel segments while inside the kernel.
4294 */
4295 - asm volatile ( "mov %%fs,%0"
4296 - : "=m" (current->thread.fs));
4297 - asm volatile ( "movl %0,%%fs"
4298 + asm volatile ( "mov %%gs,%0"
4299 + : "=m" (current->thread.gs));
4300 + asm volatile ( "movl %0,%%gs"
4301 : : "r" (0) );
4302 }
4303
4304 @@ -95,7 +95,7 @@
4305 }
4306
4307 #define deactivate_mm(tsk, mm) \
4308 - asm("movl %0,%%fs": :"r" (0));
4309 + asm("movl %0,%%gs": :"r" (0));
4310
4311 static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
4312 {
4313 --- a/include/asm-x86/mach-xen/asm/pgalloc_32.h
4314 +++ b/include/asm-x86/mach-xen/asm/pgalloc_32.h
4315 @@ -6,12 +6,23 @@
4316 #include <linux/mm.h> /* for struct page */
4317 #include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */
4318
4319 -#define pmd_populate_kernel(mm, pmd, pte) \
4320 - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte)))
4321 +#define paravirt_alloc_pt(pfn) do { } while (0)
4322 +#define paravirt_alloc_pd(pfn) do { } while (0)
4323 +#define paravirt_alloc_pd(pfn) do { } while (0)
4324 +#define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0)
4325 +#define paravirt_release_pt(pfn) do { } while (0)
4326 +#define paravirt_release_pd(pfn) do { } while (0)
4327 +
4328 +#define pmd_populate_kernel(mm, pmd, pte) \
4329 +do { \
4330 + paravirt_alloc_pt(__pa(pte) >> PAGE_SHIFT); \
4331 + set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \
4332 +} while (0)
4333
4334 #define pmd_populate(mm, pmd, pte) \
4335 do { \
4336 unsigned long pfn = page_to_pfn(pte); \
4337 + paravirt_alloc_pt(pfn); \
4338 if (test_bit(PG_pinned, &virt_to_page((mm)->pgd)->flags)) { \
4339 if (!PageHighMem(pte)) \
4340 BUG_ON(HYPERVISOR_update_va_mapping( \
4341 @@ -42,7 +53,11 @@
4342
4343 extern void pte_free(struct page *pte);
4344
4345 -#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
4346 +#define __pte_free_tlb(tlb,pte) \
4347 +do { \
4348 + paravirt_release_pt(page_to_pfn(pte)); \
4349 + tlb_remove_page((tlb),(pte)); \
4350 +} while (0)
4351
4352 #ifdef CONFIG_X86_PAE
4353 /*
4354 --- a/include/asm-x86/mach-xen/asm/pgtable_32.h
4355 +++ b/include/asm-x86/mach-xen/asm/pgtable_32.h
4356 @@ -275,6 +275,7 @@
4357 */
4358 #define pte_update(mm, addr, ptep) do { } while (0)
4359 #define pte_update_defer(mm, addr, ptep) do { } while (0)
4360 +#define paravirt_map_pt_hook(slot, va, pfn) do { } while (0)
4361
4362 /*
4363 * We only update the dirty/accessed state if we set
4364 @@ -490,12 +491,24 @@
4365 #endif
4366
4367 #if defined(CONFIG_HIGHPTE)
4368 -#define pte_offset_map(dir, address) \
4369 - ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \
4370 - pte_index(address))
4371 -#define pte_offset_map_nested(dir, address) \
4372 - ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + \
4373 - pte_index(address))
4374 +#define pte_offset_map(dir, address) \
4375 +({ \
4376 + pte_t *__ptep; \
4377 + unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \
4378 + __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE0); \
4379 + paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \
4380 + __ptep = __ptep + pte_index(address); \
4381 + __ptep; \
4382 +})
4383 +#define pte_offset_map_nested(dir, address) \
4384 +({ \
4385 + pte_t *__ptep; \
4386 + unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \
4387 + __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE1); \
4388 + paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \
4389 + __ptep = __ptep + pte_index(address); \
4390 + __ptep; \
4391 +})
4392 #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
4393 #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
4394 #else
4395 --- a/include/asm-x86/mach-xen/asm/pgtable_64.h
4396 +++ b/include/asm-x86/mach-xen/asm/pgtable_64.h
4397 @@ -416,15 +416,6 @@
4398 #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
4399 #define mk_pte_huge(entry) (__pte_val(entry) |= _PAGE_PRESENT | _PAGE_PSE)
4400
4401 -/* physical address -> PTE */
4402 -static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
4403 -{
4404 - unsigned long pteval;
4405 - pteval = physpage | pgprot_val(pgprot);
4406 - pteval &= __supported_pte_mask;
4407 - return __pte(pteval);
4408 -}
4409 -
4410 /* Change flags of a PTE */
4411 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
4412 {
4413 --- a/include/asm-x86/mach-xen/asm/processor_32.h
4414 +++ b/include/asm-x86/mach-xen/asm/processor_32.h
4415 @@ -431,7 +431,7 @@
4416 .vm86_info = NULL, \
4417 .sysenter_cs = __KERNEL_CS, \
4418 .io_bitmap_ptr = NULL, \
4419 - .gs = __KERNEL_PDA, \
4420 + .fs = __KERNEL_PDA, \
4421 }
4422
4423 /*
4424 @@ -449,8 +449,8 @@
4425 }
4426
4427 #define start_thread(regs, new_eip, new_esp) do { \
4428 - __asm__("movl %0,%%fs": :"r" (0)); \
4429 - regs->xgs = 0; \
4430 + __asm__("movl %0,%%gs": :"r" (0)); \
4431 + regs->xfs = 0; \
4432 set_fs(USER_DS); \
4433 regs->xds = __USER_DS; \
4434 regs->xes = __USER_DS; \
4435 --- a/include/asm-x86/mach-xen/asm/segment_32.h
4436 +++ b/include/asm-x86/mach-xen/asm/segment_32.h
4437 @@ -83,14 +83,8 @@
4438 * The GDT has 32 entries
4439 */
4440 #define GDT_ENTRIES 32
4441 -
4442 #define GDT_SIZE (GDT_ENTRIES * 8)
4443
4444 -/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
4445 -#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
4446 -/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
4447 -#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
4448 -
4449 /* Simple and small GDT entries for booting only */
4450
4451 #define GDT_ENTRY_BOOT_CS 2
4452 @@ -132,4 +126,21 @@
4453 #define SEGMENT_GDT 0x0
4454
4455 #define get_kernel_rpl() (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1)
4456 +
4457 +/*
4458 + * Matching rules for certain types of segments.
4459 + */
4460 +
4461 +/* Matches only __KERNEL_CS, ignoring PnP / USER / APM segments */
4462 +#define SEGMENT_IS_KERNEL_CODE(x) (((x) & ~3) == GDT_ENTRY_KERNEL_CS * 8 \
4463 + || ((x) & ~3) == (FLAT_KERNEL_CS & ~3))
4464 +
4465 +/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
4466 +#define SEGMENT_IS_FLAT_CODE(x) (((x) & ~0x13) == GDT_ENTRY_KERNEL_CS * 8 \
4467 + || ((x) & ~3) == (FLAT_KERNEL_CS & ~3) \
4468 + || ((x) & ~3) == (FLAT_USER_CS & ~3))
4469 +
4470 +/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
4471 +#define SEGMENT_IS_PNP_CODE(x) (((x) & ~0x0b) == GDT_ENTRY_PNPBIOS_BASE * 8)
4472 +
4473 #endif
4474 --- a/include/asm-x86/mach-xen/asm/smp_32.h
4475 +++ b/include/asm-x86/mach-xen/asm/smp_32.h
4476 @@ -52,6 +52,11 @@
4477 extern void cpu_uninit(void);
4478 #endif
4479
4480 +#ifndef CONFIG_PARAVIRT
4481 +#define startup_ipi_hook(phys_apicid, start_eip, start_esp) \
4482 +do { } while (0)
4483 +#endif
4484 +
4485 /*
4486 * This function is needed by all SMP systems. It must _always_ be valid
4487 * from the initial startup. We map APIC_BASE very early in page_setup(),
4488 --- a/include/asm-x86/mach-xen/asm/smp_64.h
4489 +++ b/include/asm-x86/mach-xen/asm/smp_64.h
4490 @@ -7,6 +7,7 @@
4491 #include <linux/threads.h>
4492 #include <linux/cpumask.h>
4493 #include <linux/bitops.h>
4494 +#include <linux/init.h>
4495 extern int disable_apic;
4496
4497 #ifdef CONFIG_X86_LOCAL_APIC
4498 @@ -73,7 +74,7 @@
4499 extern void __cpu_die(unsigned int cpu);
4500 extern void prefill_possible_map(void);
4501 extern unsigned num_processors;
4502 -extern unsigned disabled_cpus;
4503 +extern unsigned __cpuinitdata disabled_cpus;
4504
4505 #define NO_PROC_ID 0xFF /* No processor magic marker */
4506
4507 --- a/include/xen/xenbus.h
4508 +++ b/include/xen/xenbus.h
4509 @@ -93,8 +93,7 @@
4510
4511 /* A xenbus driver. */
4512 struct xenbus_driver {
4513 - char *name;
4514 - struct module *owner;
4515 + const char *name;
4516 const struct xenbus_device_id *ids;
4517 int (*probe)(struct xenbus_device *dev,
4518 const struct xenbus_device_id *id);
4519 @@ -115,8 +114,25 @@
4520 return container_of(drv, struct xenbus_driver, driver);
4521 }
4522
4523 -int xenbus_register_frontend(struct xenbus_driver *drv);
4524 -int xenbus_register_backend(struct xenbus_driver *drv);
4525 +int __must_check __xenbus_register_frontend(struct xenbus_driver *drv,
4526 + struct module *owner,
4527 + const char *mod_name);
4528 +
4529 +static inline int __must_check
4530 +xenbus_register_frontend(struct xenbus_driver *drv)
4531 +{
4532 + return __xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME);
4533 +}
4534 +
4535 +int __must_check __xenbus_register_backend(struct xenbus_driver *drv,
4536 + struct module *owner,
4537 + const char *mod_name);
4538 +static inline int __must_check
4539 +xenbus_register_backend(struct xenbus_driver *drv)
4540 +{
4541 + return __xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME);
4542 +}
4543 +
4544 void xenbus_unregister_driver(struct xenbus_driver *drv);
4545
4546 struct xenbus_transaction
4547 --- a/lib/swiotlb-xen.c
4548 +++ b/lib/swiotlb-xen.c
4549 @@ -138,8 +138,8 @@
4550 * Statically reserve bounce buffer space and initialize bounce buffer data
4551 * structures for the software IO TLB used to implement the PCI DMA API.
4552 */
4553 -void
4554 -swiotlb_init_with_default_size (size_t default_size)
4555 +void __init
4556 +swiotlb_init_with_default_size(size_t default_size)
4557 {
4558 unsigned long i, bytes;
4559 int rc;
4560 @@ -227,7 +227,7 @@
4561 dma_bits);
4562 }
4563
4564 -void
4565 +void __init
4566 swiotlb_init(void)
4567 {
4568 long ram_end;
4569 @@ -463,7 +463,7 @@
4570 * When the mapping is small enough return a static buffer to limit
4571 * the damage, or panic when the transfer is too big.
4572 */
4573 - printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at "
4574 + printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %zu bytes at "
4575 "device %s\n", (unsigned long)size, dev ? dev->bus_id : "?");
4576
4577 if (size > io_tlb_overflow && do_panic) {
4578 @@ -608,7 +608,7 @@
4579 sg[0].dma_length = 0;
4580 return 0;
4581 }
4582 - sg->dma_address = (dma_addr_t)virt_to_bus(map);
4583 + sg->dma_address = virt_to_bus(map);
4584 } else
4585 sg->dma_address = dev_addr;
4586 sg->dma_length = sg->length;
4587 @@ -630,8 +630,7 @@
4588
4589 for (i = 0; i < nelems; i++, sg++)
4590 if (in_swiotlb_aperture(sg->dma_address))
4591 - unmap_single(hwdev,
4592 - (void *)bus_to_virt(sg->dma_address),
4593 + unmap_single(hwdev, bus_to_virt(sg->dma_address),
4594 sg->dma_length, dir);
4595 else
4596 gnttab_dma_unmap_page(sg->dma_address);
4597 @@ -654,8 +653,7 @@
4598
4599 for (i = 0; i < nelems; i++, sg++)
4600 if (in_swiotlb_aperture(sg->dma_address))
4601 - sync_single(hwdev,
4602 - (void *)bus_to_virt(sg->dma_address),
4603 + sync_single(hwdev, bus_to_virt(sg->dma_address),
4604 sg->dma_length, dir);
4605 }
4606
4607 @@ -669,8 +667,7 @@
4608
4609 for (i = 0; i < nelems; i++, sg++)
4610 if (in_swiotlb_aperture(sg->dma_address))
4611 - sync_single(hwdev,
4612 - (void *)bus_to_virt(sg->dma_address),
4613 + sync_single(hwdev, bus_to_virt(sg->dma_address),
4614 sg->dma_length, dir);
4615 }
4616