Magellan Linux

Contents of /trunk/kernel26-xen/patches-2.6.25-r1/1025-2.6.25-xen-patch-2.6.24.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 609 - (show annotations) (download)
Fri May 23 17:35:37 2008 UTC (15 years, 11 months ago) by niro
File size: 204287 byte(s)
-using opensuse xen patchset, updated kernel configs

1 From: www.kernel.org
2 Subject: Update to 2.6.24
3 Patch-mainline: 2.6.24
4
5 Automatically created from "patches.kernel.org/patch-2.6.24" by xen-port-patches.py
6
7 Acked-by: jbeulich@novell.com
8
9 ---
10 include/asm-x86/mach-xen/asm/mmu_64.h | 31 --
11 include/asm-x86/mach-xen/asm/scatterlist_32.h | 24 -
12 arch/x86/Kconfig | 8
13 arch/x86/Makefile | 6
14 arch/x86/ia32/ia32entry-xen.S | 19 -
15 arch/x86/kernel/Makefile | 2
16 arch/x86/kernel/acpi/sleep_32-xen.c | 2
17 arch/x86/kernel/acpi/sleep_64-xen.c | 3
18 arch/x86/kernel/apic_64-xen.c | 46 ++-
19 arch/x86/kernel/cpu/common-xen.c | 2
20 arch/x86/kernel/e820_32-xen.c | 23 +
21 arch/x86/kernel/e820_64-xen.c | 41 ++-
22 arch/x86/kernel/early_printk-xen.c | 15 -
23 arch/x86/kernel/entry_32-xen.S | 8
24 arch/x86/kernel/entry_64-xen.S | 16 -
25 arch/x86/kernel/genapic_64-xen.c | 19 +
26 arch/x86/kernel/head64-xen.c | 24 -
27 arch/x86/kernel/init_task-xen.c | 15 -
28 arch/x86/kernel/io_apic_32-xen.c | 58 +++-
29 arch/x86/kernel/io_apic_64-xen.c | 114 +++++++-
30 arch/x86/kernel/ioport_32-xen.c | 2
31 arch/x86/kernel/ioport_64-xen.c | 2
32 arch/x86/kernel/irq_32-xen.c | 59 +++-
33 arch/x86/kernel/irq_64-xen.c | 57 +++-
34 arch/x86/kernel/ldt_32-xen.c | 16 -
35 arch/x86/kernel/ldt_64-xen.c | 16 -
36 arch/x86/kernel/mpparse_32-xen.c | 2
37 arch/x86/kernel/mpparse_64-xen.c | 19 +
38 arch/x86/kernel/pci-dma_32-xen.c | 42 +--
39 arch/x86/kernel/pci-swiotlb_64-xen.c | 2
40 arch/x86/kernel/process_32-xen.c | 71 +++--
41 arch/x86/kernel/process_64-xen.c | 20 +
42 arch/x86/kernel/quirks-xen.c | 352 +++++++++++++++++++++++++-
43 arch/x86/kernel/setup64-xen.c | 16 -
44 arch/x86/kernel/setup_32-xen.c | 139 ++++++----
45 arch/x86/kernel/setup_64-xen.c | 219 ++++++++++------
46 arch/x86/kernel/smp_32-xen.c | 9
47 arch/x86/kernel/smp_64-xen.c | 125 +++++----
48 arch/x86/kernel/time_32-xen.c | 27 +
49 arch/x86/kernel/traps_32-xen.c | 105 ++-----
50 arch/x86/kernel/traps_64-xen.c | 49 +--
51 arch/x86/kernel/vsyscall_64-xen.c | 34 +-
52 arch/x86/mm/fault_32-xen.c | 72 ++---
53 arch/x86/mm/fault_64-xen.c | 71 ++---
54 arch/x86/mm/init_32-xen.c | 47 +--
55 arch/x86/mm/init_64-xen.c | 61 +++-
56 arch/x86/mm/pageattr_64-xen.c | 18 -
57 arch/x86/mm/pgtable_32-xen.c | 5
58 arch/x86/pci/Makefile | 4
59 arch/x86/pci/Makefile_32 | 4
60 arch/x86/pci/Makefile_64 | 4
61 arch/x86/pci/irq-xen.c | 47 +++
62 drivers/xen/blkback/blkback.c | 5
63 drivers/xen/blkfront/blkfront.c | 55 +---
64 drivers/xen/core/machine_kexec.c | 4
65 drivers/xen/core/smpboot.c | 53 +--
66 drivers/xen/netback/loopback.c | 4
67 drivers/xen/netback/netback.c | 4
68 drivers/xen/netback/xenbus.c | 12
69 drivers/xen/netfront/accel.c | 8
70 drivers/xen/netfront/netfront.c | 26 -
71 drivers/xen/netfront/netfront.h | 2
72 drivers/xen/pciback/Makefile | 4
73 drivers/xen/pcifront/Makefile | 4
74 drivers/xen/sfc_netback/accel_fwd.c | 12
75 drivers/xen/sfc_netback/accel_msg.c | 4
76 drivers/xen/sfc_netfront/accel_msg.c | 23 +
77 drivers/xen/sfc_netfront/accel_vi.c | 11
78 drivers/xen/sfc_netutil/accel_util.h | 3
79 drivers/xen/xenbus/xenbus_probe.c | 13
80 drivers/xen/xenbus/xenbus_probe_backend.c | 27 -
81 fs/xfs/linux-2.6/xfs_buf.c | 2
82 include/asm-x86/mach-xen/asm/agp.h | 30 +-
83 include/asm-x86/mach-xen/asm/desc.h | 5
84 include/asm-x86/mach-xen/asm/desc_64.h | 34 ++
85 include/asm-x86/mach-xen/asm/dma-mapping.h | 5
86 include/asm-x86/mach-xen/asm/dma-mapping_32.h | 2
87 include/asm-x86/mach-xen/asm/dma-mapping_64.h | 5
88 include/asm-x86/mach-xen/asm/e820.h | 33 ++
89 include/asm-x86/mach-xen/asm/e820_64.h | 20 -
90 include/asm-x86/mach-xen/asm/fixmap.h | 5
91 include/asm-x86/mach-xen/asm/hw_irq.h | 5
92 include/asm-x86/mach-xen/asm/hw_irq_64.h | 35 +-
93 include/asm-x86/mach-xen/asm/hypercall.h | 5
94 include/asm-x86/mach-xen/asm/io.h | 5
95 include/asm-x86/mach-xen/asm/io_32.h | 28 --
96 include/asm-x86/mach-xen/asm/io_64.h | 6
97 include/asm-x86/mach-xen/asm/irq.h | 5
98 include/asm-x86/mach-xen/asm/irqflags.h | 5
99 include/asm-x86/mach-xen/asm/irqflags_32.h | 30 ++
100 include/asm-x86/mach-xen/asm/irqflags_64.h | 30 ++
101 include/asm-x86/mach-xen/asm/maddr.h | 5
102 include/asm-x86/mach-xen/asm/mmu.h | 28 +-
103 include/asm-x86/mach-xen/asm/mmu_context.h | 5
104 include/asm-x86/mach-xen/asm/nmi.h | 7
105 include/asm-x86/mach-xen/asm/page.h | 13
106 include/asm-x86/mach-xen/asm/page_64.h | 1
107 include/asm-x86/mach-xen/asm/pci.h | 100 +++++++
108 include/asm-x86/mach-xen/asm/pci_32.h | 65 ----
109 include/asm-x86/mach-xen/asm/pci_64.h | 62 ----
110 include/asm-x86/mach-xen/asm/pgalloc.h | 5
111 include/asm-x86/mach-xen/asm/pgtable.h | 5
112 include/asm-x86/mach-xen/asm/pgtable_32.h | 7
113 include/asm-x86/mach-xen/asm/pgtable_64.h | 3
114 include/asm-x86/mach-xen/asm/processor.h | 5
115 include/asm-x86/mach-xen/asm/processor_32.h | 47 ++-
116 include/asm-x86/mach-xen/asm/processor_64.h | 43 ++-
117 include/asm-x86/mach-xen/asm/scatterlist.h | 1
118 include/asm-x86/mach-xen/asm/segment.h | 5
119 include/asm-x86/mach-xen/asm/smp.h | 5
120 include/asm-x86/mach-xen/asm/smp_32.h | 12
121 include/asm-x86/mach-xen/asm/smp_64.h | 25 +
122 include/asm-x86/mach-xen/asm/swiotlb.h | 5
123 include/asm-x86/mach-xen/asm/system.h | 5
124 include/asm-x86/mach-xen/asm/system_32.h | 28 +-
125 include/asm-x86/mach-xen/asm/system_64.h | 27 +
126 include/asm-x86/mach-xen/asm/tlbflush.h | 5
127 include/asm-x86/mach-xen/asm/tlbflush_32.h | 7
128 include/asm-x86/mach-xen/asm/tlbflush_64.h | 9
129 include/asm-x86/mach-xen/asm/xor.h | 5
130 include/asm-x86/mach-xen/mach_time.h | 113 --------
131 include/asm-x86/mach-xen/mach_timer.h | 51 ---
132 include/linux/sysctl.h | 1
133 include/xen/pcifront.h | 20 -
134 include/xen/sysctl.h | 11
135 include/xen/xenbus.h | 2
136 kernel/kexec.c | 3
137 kernel/sysctl_check.c | 12
138 lib/swiotlb-xen.c | 35 +-
139 mm/memory.c | 2
140 130 files changed, 2202 insertions(+), 1349 deletions(-)
141
142 --- a/arch/x86/Kconfig
143 +++ b/arch/x86/Kconfig
144 @@ -48,15 +48,15 @@
145
146 config CLOCKSOURCE_WATCHDOG
147 def_bool y
148 - depends on !X86_XEN
149 + depends on !X86_XEN && !X86_64_XEN
150
151 config GENERIC_CLOCKEVENTS
152 def_bool y
153 - depends on !X86_XEN
154 + depends on !X86_XEN && !X86_64_XEN
155
156 config GENERIC_CLOCKEVENTS_BROADCAST
157 def_bool y
158 - depends on X86_64 || (X86_32 && X86_LOCAL_APIC && !X86_XEN)
159 + depends on (X86_64 && !X86_64_XEN) || (X86_32 && X86_LOCAL_APIC && !X86_XEN)
160
161 config LOCKDEP_SUPPORT
162 def_bool y
163 @@ -257,6 +257,7 @@
164
165 config X86_XEN
166 bool "Xen-compatible"
167 + depends on X86_32
168 select XEN
169 select X86_UP_APIC if !SMP && XEN_PRIVILEGED_GUEST
170 select X86_UP_IOAPIC if !SMP && XEN_PRIVILEGED_GUEST
171 @@ -363,6 +364,7 @@
172
173 config X86_64_XEN
174 bool "Enable Xen compatible kernel"
175 + depends on X86_64
176 select XEN
177 select SWIOTLB
178 help
179 --- a/arch/x86/Makefile
180 +++ b/arch/x86/Makefile
181 @@ -216,8 +216,8 @@
182 zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install
183
184 ifdef CONFIG_XEN
185 -CPPFLAGS := -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) \
186 - -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS)
187 +KBUILD_CPPFLAGS := -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) \
188 + -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(KBUILD_CPPFLAGS)
189
190 ifdef CONFIG_X86_64
191 LDFLAGS_vmlinux := -e startup_64
192 @@ -231,6 +231,8 @@
193
194 vmlinuz: vmlinux
195 $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
196 + $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
197 + $(Q)ln -fsn ../../x86/boot/$@ $(objtree)/arch/$(UTS_MACHINE)/boot/$@
198 else
199 # Default kernel to build
200 all: bzImage
201 --- a/arch/x86/ia32/ia32entry-xen.S
202 +++ b/arch/x86/ia32/ia32entry-xen.S
203 @@ -125,20 +125,16 @@
204 jmp int_ret_from_sys_call
205
206 sysenter_tracesys:
207 + xchgl %r9d,%ebp
208 SAVE_REST
209 CLEAR_RREGS
210 + movq %r9,R9(%rsp)
211 movq $-ENOSYS,RAX(%rsp) /* really needed? */
212 movq %rsp,%rdi /* &pt_regs -> arg1 */
213 call syscall_trace_enter
214 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
215 RESTORE_REST
216 - movl %ebp, %ebp
217 - /* no need to do an access_ok check here because rbp has been
218 - 32bit zero extended */
219 -1: movl (%rbp),%r9d
220 - .section __ex_table,"a"
221 - .quad 1b,ia32_badarg
222 - .previous
223 + xchgl %ebp,%r9d
224 jmp sysenter_do_call
225 CFI_ENDPROC
226 ENDPROC(ia32_sysenter_target)
227 @@ -200,20 +196,17 @@
228 jmp int_ret_from_sys_call
229
230 cstar_tracesys:
231 + xchgl %r9d,%ebp
232 SAVE_REST
233 CLEAR_RREGS
234 + movq %r9,R9(%rsp)
235 movq $-ENOSYS,RAX(%rsp) /* really needed? */
236 movq %rsp,%rdi /* &pt_regs -> arg1 */
237 call syscall_trace_enter
238 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
239 RESTORE_REST
240 + xchgl %ebp,%r9d
241 movl RSP-ARGOFFSET(%rsp), %r8d
242 - /* no need to do an access_ok check here because r8 has been
243 - 32bit zero extended */
244 -1: movl (%r8),%r9d
245 - .section __ex_table,"a"
246 - .quad 1b,ia32_badarg
247 - .previous
248 jmp cstar_do_call
249 END(ia32_cstar_target)
250
251 --- a/arch/x86/kernel/Makefile
252 +++ b/arch/x86/kernel/Makefile
253 @@ -106,4 +106,4 @@
254
255 disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \
256 smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o
257 -%/head_64.o %/head_64.s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) :=
258 +%/head_64.o %/head_64.s: asflags-$(CONFIG_XEN) :=
259 --- a/arch/x86/kernel/acpi/sleep_32-xen.c
260 +++ b/arch/x86/kernel/acpi/sleep_32-xen.c
261 @@ -90,7 +90,7 @@
262
263 /* Ouch, we want to delete this. We already have better version in userspace, in
264 s2ram from suspend.sf.net project */
265 -static __init int reset_videomode_after_s3(struct dmi_system_id *d)
266 +static __init int reset_videomode_after_s3(const struct dmi_system_id *d)
267 {
268 acpi_realmode_flags |= 2;
269 return 0;
270 --- a/arch/x86/kernel/acpi/sleep_64-xen.c
271 +++ b/arch/x86/kernel/acpi/sleep_64-xen.c
272 @@ -123,6 +123,3 @@
273 __setup("acpi_sleep=", acpi_sleep_setup);
274 #endif /* CONFIG_ACPI_PV_SLEEP */
275
276 -void acpi_pci_link_exit(void)
277 -{
278 -}
279 --- a/arch/x86/kernel/apic_64-xen.c
280 +++ b/arch/x86/kernel/apic_64-xen.c
281 @@ -63,22 +63,38 @@
282
283 void smp_local_timer_interrupt(void)
284 {
285 - profile_tick(CPU_PROFILING);
286 #ifndef CONFIG_XEN
287 -#ifdef CONFIG_SMP
288 - update_process_times(user_mode(get_irq_regs()));
289 -#endif
290 -#endif
291 + int cpu = smp_processor_id();
292 + struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
293 +
294 /*
295 - * We take the 'long' return path, and there every subsystem
296 - * grabs the appropriate locks (kernel lock/ irq lock).
297 + * Normally we should not be here till LAPIC has been initialized but
298 + * in some cases like kdump, its possible that there is a pending LAPIC
299 + * timer interrupt from previous kernel's context and is delivered in
300 + * new kernel the moment interrupts are enabled.
301 *
302 - * We might want to decouple profiling from the 'long path',
303 - * and do the profiling totally in assembly.
304 - *
305 - * Currently this isn't too much of an issue (performance wise),
306 - * we can take more than 100K local irqs per second on a 100 MHz P5.
307 + * Interrupts are enabled early and LAPIC is setup much later, hence
308 + * its possible that when we get here evt->event_handler is NULL.
309 + * Check for event_handler being NULL and discard the interrupt as
310 + * spurious.
311 + */
312 + if (!evt->event_handler) {
313 + printk(KERN_WARNING
314 + "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
315 + /* Switch it off */
316 + lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
317 + return;
318 + }
319 +#endif
320 +
321 + /*
322 + * the NMI deadlock-detector uses this.
323 */
324 + add_pda(apic_timer_irqs, 1);
325 +
326 +#ifndef CONFIG_XEN
327 + evt->event_handler(evt);
328 +#endif
329 }
330
331 /*
332 @@ -94,11 +110,6 @@
333 struct pt_regs *old_regs = set_irq_regs(regs);
334
335 /*
336 - * the NMI deadlock-detector uses this.
337 - */
338 - add_pda(apic_timer_irqs, 1);
339 -
340 - /*
341 * NOTE! We'd better ACK the irq immediately,
342 * because timer handling can be slow.
343 */
344 @@ -132,6 +143,7 @@
345 if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
346 ack_APIC_irq();
347
348 + add_pda(irq_spurious_count, 1);
349 irq_exit();
350 }
351
352 --- a/arch/x86/kernel/cpu/common-xen.c
353 +++ b/arch/x86/kernel/cpu/common-xen.c
354 @@ -214,7 +214,7 @@
355
356 static int __init x86_fxsr_setup(char * s)
357 {
358 - /* Tell all the other CPU's to not use it... */
359 + /* Tell all the other CPUs to not use it... */
360 disable_x86_fxsr = 1;
361
362 /*
363 --- a/arch/x86/kernel/e820_32-xen.c
364 +++ b/arch/x86/kernel/e820_32-xen.c
365 @@ -52,6 +52,13 @@
366 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
367 };
368
369 +struct resource bss_resource = {
370 + .name = "Kernel bss",
371 + .start = 0,
372 + .end = 0,
373 + .flags = IORESOURCE_BUSY | IORESOURCE_MEM
374 +};
375 +
376 static struct resource system_rom_resource = {
377 .name = "System ROM",
378 .start = 0xf0000,
379 @@ -266,7 +273,9 @@
380 * and also for regions reported as reserved by the e820.
381 */
382 static void __init
383 -legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
384 +legacy_init_iomem_resources(struct resource *code_resource,
385 + struct resource *data_resource,
386 + struct resource *bss_resource)
387 {
388 int i;
389
390 @@ -300,9 +309,11 @@
391 #ifndef CONFIG_XEN
392 request_resource(res, code_resource);
393 request_resource(res, data_resource);
394 + request_resource(res, bss_resource);
395 #endif
396 #ifdef CONFIG_KEXEC
397 - request_resource(res, &crashk_res);
398 + if (crashk_res.start != crashk_res.end)
399 + request_resource(res, &crashk_res);
400 #ifdef CONFIG_XEN
401 xen_machine_kexec_register_resources(res);
402 #endif
403 @@ -329,9 +340,11 @@
404
405 printk("Setting up standard PCI resources\n");
406 if (efi_enabled)
407 - efi_initialize_iomem_resources(&code_resource, &data_resource);
408 + efi_initialize_iomem_resources(&code_resource,
409 + &data_resource, &bss_resource);
410 else
411 - legacy_init_iomem_resources(&code_resource, &data_resource);
412 + legacy_init_iomem_resources(&code_resource,
413 + &data_resource, &bss_resource);
414
415 /* EFI systems may still have VGA */
416 request_resource(&iomem_resource, &video_ram_resource);
417 @@ -759,7 +772,7 @@
418 #endif
419
420 /*
421 - * Search for the bigest gap in the low 32 bits of the e820
422 + * Search for the biggest gap in the low 32 bits of the e820
423 * memory space.
424 */
425 last = 0x100000000ull;
426 --- a/arch/x86/kernel/e820_64-xen.c
427 +++ b/arch/x86/kernel/e820_64-xen.c
428 @@ -24,7 +24,7 @@
429 #include <asm/page.h>
430 #include <asm/e820.h>
431 #include <asm/proto.h>
432 -#include <asm/bootsetup.h>
433 +#include <asm/setup.h>
434 #include <asm/sections.h>
435 #include <xen/interface/memory.h>
436
437 @@ -51,7 +51,7 @@
438 */
439 static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
440
441 -extern struct resource code_resource, data_resource;
442 +extern struct resource code_resource, data_resource, bss_resource;
443
444 /* Check for some hardcoded bad areas that early boot is not allowed to touch */
445 static inline int bad_addr(unsigned long *addrp, unsigned long size)
446 @@ -73,10 +73,15 @@
447
448 /* initrd */
449 #ifdef CONFIG_BLK_DEV_INITRD
450 - if (LOADER_TYPE && INITRD_START && last >= INITRD_START &&
451 - addr < INITRD_START+INITRD_SIZE) {
452 - *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE);
453 - return 1;
454 + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
455 + unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
456 + unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
457 + unsigned long ramdisk_end = ramdisk_image+ramdisk_size;
458 +
459 + if (last >= ramdisk_image && addr < ramdisk_end) {
460 + *addrp = PAGE_ALIGN(ramdisk_end);
461 + return 1;
462 + }
463 }
464 #endif
465 /* kernel code */
466 @@ -251,6 +256,7 @@
467 #ifndef CONFIG_XEN
468 request_resource(res, &code_resource);
469 request_resource(res, &data_resource);
470 + request_resource(res, &bss_resource);
471 #endif
472 #ifdef CONFIG_KEXEC
473 if (crashk_res.start != crashk_res.end)
474 @@ -637,8 +643,8 @@
475 * Otherwise fake a memory map; one section from 0k->640k,
476 * the next section from 1mb->appropriate_mem_k
477 */
478 - sanitize_e820_map(E820_MAP, &E820_MAP_NR);
479 - if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0)
480 + sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
481 + if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0)
482 early_panic("Cannot find a valid memory map");
483 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
484 e820_print_map("BIOS-e820");
485 @@ -823,3 +829,22 @@
486 printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
487 pci_mem_start, gapstart, gapsize);
488 }
489 +
490 +int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
491 +{
492 + int i;
493 +
494 + if (slot < 0 || slot >= e820.nr_map)
495 + return -1;
496 + for (i = slot; i < e820.nr_map; i++) {
497 + if (e820.map[i].type != E820_RAM)
498 + continue;
499 + break;
500 + }
501 + if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT))
502 + return -1;
503 + *addr = e820.map[i].addr;
504 + *size = min_t(u64, e820.map[i].size + e820.map[i].addr,
505 + max_pfn << PAGE_SHIFT) - *addr;
506 + return i + 1;
507 +}
508 --- a/arch/x86/kernel/early_printk-xen.c
509 +++ b/arch/x86/kernel/early_printk-xen.c
510 @@ -6,14 +6,9 @@
511 #include <asm/io.h>
512 #include <asm/processor.h>
513 #include <asm/fcntl.h>
514 +#include <asm/setup.h>
515
516 /* Simple VGA output */
517 -
518 -#ifdef __i386__
519 -#include <asm/setup.h>
520 -#else
521 -#include <asm/bootsetup.h>
522 -#endif
523 #define VGABASE (__ISA_IO_base + 0xb8000)
524
525 #ifndef CONFIG_XEN
526 @@ -264,10 +259,10 @@
527 early_console = &early_serial_console;
528 } else if (!strncmp(buf, "vga", 3)) {
529 #ifndef CONFIG_XEN
530 - && SCREEN_INFO.orig_video_isVGA == 1) {
531 - max_xpos = SCREEN_INFO.orig_video_cols;
532 - max_ypos = SCREEN_INFO.orig_video_lines;
533 - current_ypos = SCREEN_INFO.orig_y;
534 + && boot_params.screen_info.orig_video_isVGA == 1) {
535 + max_xpos = boot_params.screen_info.orig_video_cols;
536 + max_ypos = boot_params.screen_info.orig_video_lines;
537 + current_ypos = boot_params.screen_info.orig_y;
538 #endif
539 early_console = &early_vga_console;
540 } else if (!strncmp(buf, "simnow", 6)) {
541 --- a/arch/x86/kernel/entry_32-xen.S
542 +++ b/arch/x86/kernel/entry_32-xen.S
543 @@ -254,6 +254,7 @@
544 jb resume_kernel # not returning to v8086 or userspace
545
546 ENTRY(resume_userspace)
547 + LOCKDEP_SYS_EXIT
548 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
549 # setting need_resched or sigpending
550 # between sampling and the iret
551 @@ -341,6 +342,7 @@
552 jae syscall_badsys
553 call *sys_call_table(,%eax,4)
554 movl %eax,PT_EAX(%esp)
555 + LOCKDEP_SYS_EXIT
556 DISABLE_INTERRUPTS(CLBR_ANY)
557 TRACE_IRQS_OFF
558 movl TI_flags(%ebp), %ecx
559 @@ -406,6 +408,7 @@
560 call *sys_call_table(,%eax,4)
561 movl %eax,PT_EAX(%esp) # store the return value
562 syscall_exit:
563 + LOCKDEP_SYS_EXIT
564 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
565 # setting need_resched or sigpending
566 # between sampling and the iret
567 @@ -478,7 +481,7 @@
568 * is still available to implement the setting of the high
569 * 16-bits in the INTERRUPT_RETURN paravirt-op.
570 */
571 - cmpl $0, paravirt_ops+PARAVIRT_enabled
572 + cmpl $0, pv_info+PARAVIRT_enabled
573 jne restore_nocheck
574 #endif
575
576 @@ -540,6 +543,7 @@
577 jz work_notifysig
578 work_resched:
579 call schedule
580 + LOCKDEP_SYS_EXIT
581 DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
582 # setting need_resched or sigpending
583 # between sampling and the iret
584 @@ -1264,6 +1268,6 @@
585 ENDPROC(kernel_thread_helper)
586
587 .section .rodata,"a"
588 -#include "syscall_table.S"
589 +#include "syscall_table_32.S"
590
591 syscall_table_size=(.-sys_call_table)
592 --- a/arch/x86/kernel/entry_64-xen.S
593 +++ b/arch/x86/kernel/entry_64-xen.S
594 @@ -57,7 +57,7 @@
595 #include <xen/interface/arch-x86_64.h>
596 #include <xen/interface/features.h>
597
598 -#include "xen_entry.S"
599 +#include "xen_entry_64.S"
600
601 .code64
602
603 @@ -275,6 +275,7 @@
604 movl $_TIF_ALLWORK_MASK,%edi
605 /* edi: flagmask */
606 sysret_check:
607 + LOCKDEP_SYS_EXIT
608 GET_THREAD_INFO(%rcx)
609 XEN_BLOCK_EVENTS(%rsi)
610 TRACE_IRQS_OFF
611 @@ -365,6 +366,7 @@
612 movl $_TIF_ALLWORK_MASK,%edi
613 /* edi: mask to check */
614 int_with_check:
615 + LOCKDEP_SYS_EXIT_IRQ
616 GET_THREAD_INFO(%rcx)
617 movl threadinfo_flags(%rcx),%edx
618 andl %edi,%edx
619 @@ -516,11 +518,12 @@
620
621 retint_check:
622 CFI_DEFAULT_STACK adj=1
623 + LOCKDEP_SYS_EXIT_IRQ
624 movl threadinfo_flags(%rcx),%edx
625 andl %edi,%edx
626 CFI_REMEMBER_STATE
627 jnz retint_careful
628 -retint_restore_args:
629 +retint_restore_args: /* return to kernel space */
630 movl EFLAGS-REST_SKIP(%rsp), %eax
631 shr $9, %eax # EAX[0] == IRET_EFLAGS.IF
632 XEN_GET_VCPU_INFO(%rsi)
633 @@ -841,7 +844,7 @@
634 movq ORIG_RAX(%rsp),%rsi # get error code
635 movq $-1,ORIG_RAX(%rsp)
636 call *%rax
637 -error_exit:
638 +error_exit:
639 RESTORE_REST
640 /* cli */
641 XEN_BLOCK_EVENTS(%rsi)
642 @@ -849,14 +852,11 @@
643 GET_THREAD_INFO(%rcx)
644 testb $3,CS-ARGOFFSET(%rsp)
645 jz retint_kernel
646 + LOCKDEP_SYS_EXIT_IRQ
647 movl threadinfo_flags(%rcx),%edx
648 movl $_TIF_WORK_MASK,%edi
649 andl %edi,%edx
650 jnz retint_careful
651 - /*
652 - * The iret might restore flags:
653 - */
654 - TRACE_IRQS_IRETQ
655 jmp retint_restore_args
656
657 #if 0
658 @@ -1071,7 +1071,7 @@
659 movq %rsi, %rdi
660 call *%rax
661 # exit
662 - xorl %edi, %edi
663 + mov %eax, %edi
664 call do_exit
665 CFI_ENDPROC
666 ENDPROC(child_rip)
667 --- a/arch/x86/kernel/genapic_64-xen.c
668 +++ b/arch/x86/kernel/genapic_64-xen.c
669 @@ -24,12 +24,21 @@
670 #include <acpi/acpi_bus.h>
671 #endif
672
673 -/* which logical CPU number maps to which CPU (physical APIC ID) */
674 -u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly
675 +/*
676 + * which logical CPU number maps to which CPU (physical APIC ID)
677 + *
678 + * The following static array is used during kernel startup
679 + * and the x86_cpu_to_apicid_ptr contains the address of the
680 + * array during this time. Is it zeroed when the per_cpu
681 + * data area is removed.
682 + */
683 +#ifndef CONFIG_XEN
684 +u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata
685 = { [0 ... NR_CPUS-1] = BAD_APICID };
686 -EXPORT_SYMBOL(x86_cpu_to_apicid);
687 -
688 -u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
689 +void *x86_cpu_to_apicid_ptr;
690 +#endif
691 +DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
692 +EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
693
694 #ifndef CONFIG_XEN
695 struct genapic __read_mostly *genapic = &apic_flat;
696 --- a/arch/x86/kernel/head64-xen.c
697 +++ b/arch/x86/kernel/head64-xen.c
698 @@ -1,5 +1,5 @@
699 /*
700 - * linux/arch/x86_64/kernel/head64.c -- prepare to run common code
701 + * prepare to run common code
702 *
703 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
704 *
705 @@ -18,7 +18,6 @@
706 #include <asm/processor.h>
707 #include <asm/proto.h>
708 #include <asm/smp.h>
709 -#include <asm/bootsetup.h>
710 #include <asm/setup.h>
711 #include <asm/desc.h>
712 #include <asm/pgtable.h>
713 @@ -44,27 +43,16 @@
714 }
715 #endif
716
717 -#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
718 -#define OLD_CL_MAGIC_ADDR 0x20
719 -#define OLD_CL_MAGIC 0xA33F
720 -#define OLD_CL_OFFSET 0x22
721 -
722 static void __init copy_bootdata(char *real_mode_data)
723 {
724 #ifndef CONFIG_XEN
725 - unsigned long new_data;
726 char * command_line;
727
728 - memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE);
729 - new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER);
730 - if (!new_data) {
731 - if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) {
732 - return;
733 - }
734 - new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET);
735 + memcpy(&boot_params, real_mode_data, sizeof boot_params);
736 + if (boot_params.hdr.cmd_line_ptr) {
737 + command_line = __va(boot_params.hdr.cmd_line_ptr);
738 + memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
739 }
740 - command_line = __va(new_data);
741 - memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
742 #else
743 int max_cmdline;
744
745 @@ -114,7 +102,7 @@
746
747 for (i = 0; i < IDT_ENTRIES; i++)
748 set_intr_gate(i, early_idt_handler);
749 - asm volatile("lidt %0" :: "m" (idt_descr));
750 + load_idt((const struct desc_ptr *)&idt_descr);
751 #endif
752
753 early_printk("Kernel alive\n");
754 --- a/arch/x86/kernel/init_task-xen.c
755 +++ b/arch/x86/kernel/init_task-xen.c
756 @@ -14,11 +14,11 @@
757 static struct files_struct init_files = INIT_FILES;
758 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
759 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
760 -
761 +#ifdef CONFIG_X86_XEN
762 #define swapper_pg_dir ((pgd_t *)NULL)
763 +#endif
764 struct mm_struct init_mm = INIT_MM(init_mm);
765 #undef swapper_pg_dir
766 -
767 EXPORT_SYMBOL(init_mm);
768
769 /*
770 @@ -28,7 +28,7 @@
771 * way process stacks are handled. This is done by having a special
772 * "init_task" linker map entry..
773 */
774 -union thread_union init_thread_union
775 +union thread_union init_thread_union
776 __attribute__((__section__(".data.init_task"))) =
777 { INIT_THREAD_INFO(init_task) };
778
779 @@ -38,14 +38,15 @@
780 * All other task structs will be allocated on slabs in fork.c
781 */
782 struct task_struct init_task = INIT_TASK(init_task);
783 -
784 EXPORT_SYMBOL(init_task);
785
786 #ifndef CONFIG_X86_NO_TSS
787 /*
788 - * per-CPU TSS segments. Threads are completely 'soft' on Linux,
789 - * no more per-task TSS's.
790 - */
791 + * no more per-task TSS's. The TSS size is kept cacheline-aligned
792 + * so they are allowed to end up in the .data.cacheline_aligned
793 + * section. Since TSS's are completely CPU-local, we want them
794 + * on exact cacheline boundaries, to eliminate cacheline ping-pong.
795 + */
796 DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
797 #endif
798
799 --- a/arch/x86/kernel/io_apic_32-xen.c
800 +++ b/arch/x86/kernel/io_apic_32-xen.c
801 @@ -422,7 +422,7 @@
802
803 #define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
804
805 -#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
806 +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
807
808 static cpumask_t balance_irq_affinity[NR_IRQS] = {
809 [0 ... NR_IRQS-1] = CPU_MASK_ALL
810 @@ -628,7 +628,7 @@
811
812 imbalance = move_this_load;
813
814 - /* For physical_balance case, we accumlated both load
815 + /* For physical_balance case, we accumulated both load
816 * values in the one of the siblings cpu_irq[],
817 * to use the same code for physical and logical processors
818 * as much as possible.
819 @@ -642,7 +642,7 @@
820 * (A+B)/2 vs B
821 */
822 load = CPU_IRQ(min_loaded) >> 1;
823 - for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
824 + for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) {
825 if (load > CPU_IRQ(j)) {
826 /* This won't change cpu_sibling_map[min_loaded] */
827 load = CPU_IRQ(j);
828 @@ -1011,7 +1011,7 @@
829 #define default_MCA_trigger(idx) (1)
830 #define default_MCA_polarity(idx) (0)
831
832 -static int __init MPBIOS_polarity(int idx)
833 +static int MPBIOS_polarity(int idx)
834 {
835 int bus = mp_irqs[idx].mpc_srcbus;
836 int polarity;
837 @@ -1337,6 +1337,11 @@
838 continue;
839 }
840
841 + if (!first_notcon) {
842 + apic_printk(APIC_VERBOSE, " not connected.\n");
843 + first_notcon = 1;
844 + }
845 +
846 entry.trigger = irq_trigger(idx);
847 entry.polarity = irq_polarity(idx);
848
849 @@ -1922,13 +1927,16 @@
850 static int __init timer_irq_works(void)
851 {
852 unsigned long t1 = jiffies;
853 + unsigned long flags;
854
855 if (no_timer_check)
856 return 1;
857
858 + local_save_flags(flags);
859 local_irq_enable();
860 /* Let ten ticks pass... */
861 mdelay((10 * 1000) / HZ);
862 + local_irq_restore(flags);
863
864 /*
865 * Expect a few ticks at least, to be sure some possible
866 @@ -2209,6 +2217,9 @@
867 {
868 int apic1, pin1, apic2, pin2;
869 int vector;
870 + unsigned long flags;
871 +
872 + local_irq_save(flags);
873
874 /*
875 * get/set the timer IRQ vector:
876 @@ -2254,7 +2265,7 @@
877 }
878 if (disable_timer_pin_1 > 0)
879 clear_IO_APIC_pin(0, pin1);
880 - return;
881 + goto out;
882 }
883 clear_IO_APIC_pin(apic1, pin1);
884 printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to "
885 @@ -2277,7 +2288,7 @@
886 if (nmi_watchdog == NMI_IO_APIC) {
887 setup_nmi();
888 }
889 - return;
890 + goto out;
891 }
892 /*
893 * Cleanup, just in case ...
894 @@ -2301,7 +2312,7 @@
895
896 if (timer_irq_works()) {
897 printk(" works.\n");
898 - return;
899 + goto out;
900 }
901 apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
902 printk(" failed.\n");
903 @@ -2317,11 +2328,13 @@
904
905 if (timer_irq_works()) {
906 printk(" works.\n");
907 - return;
908 + goto out;
909 }
910 printk(" failed :(.\n");
911 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
912 "report. Then try booting with the 'noapic' option");
913 +out:
914 + local_irq_restore(flags);
915 }
916 #else
917 int timer_uses_ioapic_pin_0 = 0;
918 @@ -2339,6 +2352,14 @@
919
920 void __init setup_IO_APIC(void)
921 {
922 +#ifndef CONFIG_XEN
923 + int i;
924 +
925 + /* Reserve all the system vectors. */
926 + for (i = FIRST_SYSTEM_VECTOR; i < NR_VECTORS; i++)
927 + set_bit(i, used_vectors);
928 +#endif
929 +
930 enable_IO_APIC();
931
932 if (acpi_ioapic)
933 @@ -2526,7 +2547,7 @@
934 #endif
935
936 /*
937 - * MSI mesage composition
938 + * MSI message composition
939 */
940 #ifdef CONFIG_PCI_MSI
941 static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
942 @@ -2883,6 +2904,25 @@
943 return 0;
944 }
945
946 +int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
947 +{
948 + int i;
949 +
950 + if (skip_ioapic_setup)
951 + return -1;
952 +
953 + for (i = 0; i < mp_irq_entries; i++)
954 + if (mp_irqs[i].mpc_irqtype == mp_INT &&
955 + mp_irqs[i].mpc_srcbusirq == bus_irq)
956 + break;
957 + if (i >= mp_irq_entries)
958 + return -1;
959 +
960 + *trigger = irq_trigger(i);
961 + *polarity = irq_polarity(i);
962 + return 0;
963 +}
964 +
965 #endif /* CONFIG_ACPI */
966
967 static int __init parse_disable_timer_pin_1(char *arg)
968 --- a/arch/x86/kernel/io_apic_64-xen.c
969 +++ b/arch/x86/kernel/io_apic_64-xen.c
970 @@ -31,6 +31,7 @@
971 #include <linux/sysdev.h>
972 #include <linux/msi.h>
973 #include <linux/htirq.h>
974 +#include <linux/dmar.h>
975 #ifdef CONFIG_ACPI
976 #include <acpi/acpi_bus.h>
977 #endif
978 @@ -581,7 +582,7 @@
979 #define default_PCI_trigger(idx) (1)
980 #define default_PCI_polarity(idx) (1)
981
982 -static int __init MPBIOS_polarity(int idx)
983 +static int MPBIOS_polarity(int idx)
984 {
985 int bus = mp_irqs[idx].mpc_srcbus;
986 int polarity;
987 @@ -864,6 +865,10 @@
988 apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
989 continue;
990 }
991 + if (!first_notcon) {
992 + apic_printk(APIC_VERBOSE, " not connected.\n");
993 + first_notcon = 1;
994 + }
995
996 irq = pin_2_irq(idx, apic, pin);
997 add_pin_to_irq(irq, apic, pin);
998 @@ -874,7 +879,7 @@
999 }
1000
1001 if (!first_notcon)
1002 - apic_printk(APIC_VERBOSE," not connected.\n");
1003 + apic_printk(APIC_VERBOSE, " not connected.\n");
1004 }
1005
1006 #ifndef CONFIG_XEN
1007 @@ -1270,10 +1275,13 @@
1008 static int __init timer_irq_works(void)
1009 {
1010 unsigned long t1 = jiffies;
1011 + unsigned long flags;
1012
1013 + local_save_flags(flags);
1014 local_irq_enable();
1015 /* Let ten ticks pass... */
1016 mdelay((10 * 1000) / HZ);
1017 + local_irq_restore(flags);
1018
1019 /*
1020 * Expect a few ticks at least, to be sure some possible
1021 @@ -1648,6 +1656,9 @@
1022 {
1023 struct irq_cfg *cfg = irq_cfg + 0;
1024 int apic1, pin1, apic2, pin2;
1025 + unsigned long flags;
1026 +
1027 + local_irq_save(flags);
1028
1029 /*
1030 * get/set the timer IRQ vector:
1031 @@ -1689,7 +1700,7 @@
1032 }
1033 if (disable_timer_pin_1 > 0)
1034 clear_IO_APIC_pin(0, pin1);
1035 - return;
1036 + goto out;
1037 }
1038 clear_IO_APIC_pin(apic1, pin1);
1039 apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not "
1040 @@ -1711,7 +1722,7 @@
1041 if (nmi_watchdog == NMI_IO_APIC) {
1042 setup_nmi();
1043 }
1044 - return;
1045 + goto out;
1046 }
1047 /*
1048 * Cleanup, just in case ...
1049 @@ -1734,7 +1745,7 @@
1050
1051 if (timer_irq_works()) {
1052 apic_printk(APIC_VERBOSE," works.\n");
1053 - return;
1054 + goto out;
1055 }
1056 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
1057 apic_printk(APIC_VERBOSE," failed.\n");
1058 @@ -1749,10 +1760,12 @@
1059
1060 if (timer_irq_works()) {
1061 apic_printk(APIC_VERBOSE," works.\n");
1062 - return;
1063 + goto out;
1064 }
1065 apic_printk(APIC_VERBOSE," failed :(.\n");
1066 panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n");
1067 +out:
1068 + local_irq_restore(flags);
1069 }
1070 #else
1071 #define check_timer() ((void)0)
1072 @@ -1768,7 +1781,7 @@
1073
1074 /*
1075 *
1076 - * IRQ's that are handled by the PIC in the MPS IOAPIC case.
1077 + * IRQs that are handled by the PIC in the MPS IOAPIC case.
1078 * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
1079 * Linux doesn't really care, as it's not actually used
1080 * for any interrupt handling anyway.
1081 @@ -1849,7 +1862,7 @@
1082 static int __init ioapic_init_sysfs(void)
1083 {
1084 struct sys_device * dev;
1085 - int i, size, error = 0;
1086 + int i, size, error;
1087
1088 error = sysdev_class_register(&ioapic_sysdev_class);
1089 if (error)
1090 @@ -1858,12 +1871,11 @@
1091 for (i = 0; i < nr_ioapics; i++ ) {
1092 size = sizeof(struct sys_device) + nr_ioapic_registers[i]
1093 * sizeof(struct IO_APIC_route_entry);
1094 - mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
1095 + mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
1096 if (!mp_ioapic_data[i]) {
1097 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
1098 continue;
1099 }
1100 - memset(mp_ioapic_data[i], 0, size);
1101 dev = &mp_ioapic_data[i]->dev;
1102 dev->id = i;
1103 dev->cls = &ioapic_sysdev_class;
1104 @@ -1924,7 +1936,7 @@
1105 #endif
1106
1107 /*
1108 - * MSI mesage composition
1109 + * MSI message composition
1110 */
1111 #ifdef CONFIG_PCI_MSI
1112 static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
1113 @@ -2034,8 +2046,64 @@
1114 destroy_irq(irq);
1115 }
1116
1117 -#endif /* CONFIG_PCI_MSI */
1118 +#ifdef CONFIG_DMAR
1119 +#ifdef CONFIG_SMP
1120 +static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
1121 +{
1122 + struct irq_cfg *cfg = irq_cfg + irq;
1123 + struct msi_msg msg;
1124 + unsigned int dest;
1125 + cpumask_t tmp;
1126 +
1127 + cpus_and(tmp, mask, cpu_online_map);
1128 + if (cpus_empty(tmp))
1129 + return;
1130 +
1131 + if (assign_irq_vector(irq, mask))
1132 + return;
1133 +
1134 + cpus_and(tmp, cfg->domain, mask);
1135 + dest = cpu_mask_to_apicid(tmp);
1136 +
1137 + dmar_msi_read(irq, &msg);
1138 +
1139 + msg.data &= ~MSI_DATA_VECTOR_MASK;
1140 + msg.data |= MSI_DATA_VECTOR(cfg->vector);
1141 + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
1142 + msg.address_lo |= MSI_ADDR_DEST_ID(dest);
1143 +
1144 + dmar_msi_write(irq, &msg);
1145 + irq_desc[irq].affinity = mask;
1146 +}
1147 +#endif /* CONFIG_SMP */
1148 +
1149 +struct irq_chip dmar_msi_type = {
1150 + .name = "DMAR_MSI",
1151 + .unmask = dmar_msi_unmask,
1152 + .mask = dmar_msi_mask,
1153 + .ack = ack_apic_edge,
1154 +#ifdef CONFIG_SMP
1155 + .set_affinity = dmar_msi_set_affinity,
1156 +#endif
1157 + .retrigger = ioapic_retrigger_irq,
1158 +};
1159 +
1160 +int arch_setup_dmar_msi(unsigned int irq)
1161 +{
1162 + int ret;
1163 + struct msi_msg msg;
1164 +
1165 + ret = msi_compose_msg(NULL, irq, &msg);
1166 + if (ret < 0)
1167 + return ret;
1168 + dmar_msi_write(irq, &msg);
1169 + set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
1170 + "edge");
1171 + return 0;
1172 +}
1173 +#endif
1174
1175 +#endif /* CONFIG_PCI_MSI */
1176 /*
1177 * Hypertransport interrupt support
1178 */
1179 @@ -2168,8 +2236,27 @@
1180 return 0;
1181 }
1182
1183 -#endif /* CONFIG_ACPI */
1184
1185 +int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
1186 +{
1187 + int i;
1188 +
1189 + if (skip_ioapic_setup)
1190 + return -1;
1191 +
1192 + for (i = 0; i < mp_irq_entries; i++)
1193 + if (mp_irqs[i].mpc_irqtype == mp_INT &&
1194 + mp_irqs[i].mpc_srcbusirq == bus_irq)
1195 + break;
1196 + if (i >= mp_irq_entries)
1197 + return -1;
1198 +
1199 + *trigger = irq_trigger(i);
1200 + *polarity = irq_polarity(i);
1201 + return 0;
1202 +}
1203 +
1204 +#endif /* CONFIG_ACPI */
1205
1206 #ifndef CONFIG_XEN
1207 /*
1208 @@ -2208,3 +2295,4 @@
1209 }
1210 #endif
1211 #endif /* !CONFIG_XEN */
1212 +
1213 --- a/arch/x86/kernel/ioport_32-xen.c
1214 +++ b/arch/x86/kernel/ioport_32-xen.c
1215 @@ -1,6 +1,4 @@
1216 /*
1217 - * linux/arch/i386/kernel/ioport.c
1218 - *
1219 * This contains the io-permission bitmap code - written by obz, with changes
1220 * by Linus.
1221 */
1222 --- a/arch/x86/kernel/ioport_64-xen.c
1223 +++ b/arch/x86/kernel/ioport_64-xen.c
1224 @@ -1,6 +1,4 @@
1225 /*
1226 - * linux/arch/x86_64/kernel/ioport.c
1227 - *
1228 * This contains the io-permission bitmap code - written by obz, with changes
1229 * by Linus.
1230 */
1231 --- a/arch/x86/kernel/irq_32-xen.c
1232 +++ b/arch/x86/kernel/irq_32-xen.c
1233 @@ -1,6 +1,4 @@
1234 /*
1235 - * linux/arch/i386/kernel/irq.c
1236 - *
1237 * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
1238 *
1239 * This file contains the lowest level x86-specific interrupt
1240 @@ -231,8 +229,6 @@
1241
1242 local_irq_restore(flags);
1243 }
1244 -
1245 -EXPORT_SYMBOL(do_softirq);
1246 #endif
1247
1248 /*
1249 @@ -259,9 +255,17 @@
1250 }
1251
1252 if (i < NR_IRQS) {
1253 + unsigned any_count = 0;
1254 +
1255 spin_lock_irqsave(&irq_desc[i].lock, flags);
1256 +#ifndef CONFIG_SMP
1257 + any_count = kstat_irqs(i);
1258 +#else
1259 + for_each_online_cpu(j)
1260 + any_count |= kstat_cpu(j).irqs[i];
1261 +#endif
1262 action = irq_desc[i].action;
1263 - if (!action)
1264 + if (!action && !any_count)
1265 goto skip;
1266 seq_printf(p, "%3d: ",i);
1267 #ifndef CONFIG_SMP
1268 @@ -272,10 +276,12 @@
1269 #endif
1270 seq_printf(p, " %8s", irq_desc[i].chip->name);
1271 seq_printf(p, "-%-8s", irq_desc[i].name);
1272 - seq_printf(p, " %s", action->name);
1273
1274 - for (action=action->next; action; action = action->next)
1275 - seq_printf(p, ", %s", action->name);
1276 + if (action) {
1277 + seq_printf(p, " %s", action->name);
1278 + while ((action = action->next) != NULL)
1279 + seq_printf(p, ", %s", action->name);
1280 + }
1281
1282 seq_putc(p, '\n');
1283 skip:
1284 @@ -284,13 +290,46 @@
1285 seq_printf(p, "NMI: ");
1286 for_each_online_cpu(j)
1287 seq_printf(p, "%10u ", nmi_count(j));
1288 - seq_putc(p, '\n');
1289 + seq_printf(p, " Non-maskable interrupts\n");
1290 #ifdef CONFIG_X86_LOCAL_APIC
1291 seq_printf(p, "LOC: ");
1292 for_each_online_cpu(j)
1293 seq_printf(p, "%10u ",
1294 per_cpu(irq_stat,j).apic_timer_irqs);
1295 - seq_putc(p, '\n');
1296 + seq_printf(p, " Local timer interrupts\n");
1297 +#endif
1298 +#ifdef CONFIG_SMP
1299 + seq_printf(p, "RES: ");
1300 + for_each_online_cpu(j)
1301 + seq_printf(p, "%10u ",
1302 + per_cpu(irq_stat,j).irq_resched_count);
1303 + seq_printf(p, " Rescheduling interrupts\n");
1304 + seq_printf(p, "CAL: ");
1305 + for_each_online_cpu(j)
1306 + seq_printf(p, "%10u ",
1307 + per_cpu(irq_stat,j).irq_call_count);
1308 + seq_printf(p, " function call interrupts\n");
1309 +#ifndef CONFIG_XEN
1310 + seq_printf(p, "TLB: ");
1311 + for_each_online_cpu(j)
1312 + seq_printf(p, "%10u ",
1313 + per_cpu(irq_stat,j).irq_tlb_count);
1314 + seq_printf(p, " TLB shootdowns\n");
1315 +#endif
1316 +#endif
1317 +#ifdef CONFIG_X86_MCE
1318 + seq_printf(p, "TRM: ");
1319 + for_each_online_cpu(j)
1320 + seq_printf(p, "%10u ",
1321 + per_cpu(irq_stat,j).irq_thermal_count);
1322 + seq_printf(p, " Thermal event interrupts\n");
1323 +#endif
1324 +#ifdef CONFIG_X86_LOCAL_APIC
1325 + seq_printf(p, "SPU: ");
1326 + for_each_online_cpu(j)
1327 + seq_printf(p, "%10u ",
1328 + per_cpu(irq_stat,j).irq_spurious_count);
1329 + seq_printf(p, " Spurious interrupts\n");
1330 #endif
1331 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
1332 #if defined(CONFIG_X86_IO_APIC)
1333 --- a/arch/x86/kernel/irq_64-xen.c
1334 +++ b/arch/x86/kernel/irq_64-xen.c
1335 @@ -1,6 +1,4 @@
1336 /*
1337 - * linux/arch/x86_64/kernel/irq.c
1338 - *
1339 * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
1340 *
1341 * This file contains the lowest level x86_64-specific interrupt
1342 @@ -64,9 +62,17 @@
1343 }
1344
1345 if (i < NR_IRQS) {
1346 + unsigned any_count = 0;
1347 +
1348 spin_lock_irqsave(&irq_desc[i].lock, flags);
1349 +#ifndef CONFIG_SMP
1350 + any_count = kstat_irqs(i);
1351 +#else
1352 + for_each_online_cpu(j)
1353 + any_count |= kstat_cpu(j).irqs[i];
1354 +#endif
1355 action = irq_desc[i].action;
1356 - if (!action)
1357 + if (!action && !any_count)
1358 goto skip;
1359 seq_printf(p, "%3d: ",i);
1360 #ifndef CONFIG_SMP
1361 @@ -78,9 +84,11 @@
1362 seq_printf(p, " %8s", irq_desc[i].chip->name);
1363 seq_printf(p, "-%-8s", irq_desc[i].name);
1364
1365 - seq_printf(p, " %s", action->name);
1366 - for (action=action->next; action; action = action->next)
1367 - seq_printf(p, ", %s", action->name);
1368 + if (action) {
1369 + seq_printf(p, " %s", action->name);
1370 + while ((action = action->next) != NULL)
1371 + seq_printf(p, ", %s", action->name);
1372 + }
1373 seq_putc(p, '\n');
1374 skip:
1375 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
1376 @@ -88,12 +96,44 @@
1377 seq_printf(p, "NMI: ");
1378 for_each_online_cpu(j)
1379 seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
1380 - seq_putc(p, '\n');
1381 + seq_printf(p, " Non-maskable interrupts\n");
1382 #ifdef CONFIG_X86_LOCAL_APIC
1383 seq_printf(p, "LOC: ");
1384 for_each_online_cpu(j)
1385 seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
1386 - seq_putc(p, '\n');
1387 + seq_printf(p, " Local timer interrupts\n");
1388 +#endif
1389 +#ifdef CONFIG_SMP
1390 + seq_printf(p, "RES: ");
1391 + for_each_online_cpu(j)
1392 + seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count);
1393 + seq_printf(p, " Rescheduling interrupts\n");
1394 + seq_printf(p, "CAL: ");
1395 + for_each_online_cpu(j)
1396 + seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
1397 + seq_printf(p, " function call interrupts\n");
1398 +#ifndef CONFIG_XEN
1399 + seq_printf(p, "TLB: ");
1400 + for_each_online_cpu(j)
1401 + seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
1402 + seq_printf(p, " TLB shootdowns\n");
1403 +#endif
1404 +#endif
1405 +#ifdef CONFIG_X86_MCE
1406 + seq_printf(p, "TRM: ");
1407 + for_each_online_cpu(j)
1408 + seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count);
1409 + seq_printf(p, " Thermal event interrupts\n");
1410 + seq_printf(p, "THR: ");
1411 + for_each_online_cpu(j)
1412 + seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count);
1413 + seq_printf(p, " Threshold APIC interrupts\n");
1414 +#endif
1415 +#ifdef CONFIG_X86_LOCAL_APIC
1416 + seq_printf(p, "SPU: ");
1417 + for_each_online_cpu(j)
1418 + seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
1419 + seq_printf(p, " Spurious interrupts\n");
1420 #endif
1421 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
1422 }
1423 @@ -211,7 +251,6 @@
1424 }
1425 local_irq_restore(flags);
1426 }
1427 -EXPORT_SYMBOL(do_softirq);
1428
1429 #ifndef CONFIG_X86_LOCAL_APIC
1430 /*
1431 --- a/arch/x86/kernel/ldt_32-xen.c
1432 +++ b/arch/x86/kernel/ldt_32-xen.c
1433 @@ -1,6 +1,4 @@
1434 /*
1435 - * linux/arch/i386/kernel/ldt.c
1436 - *
1437 * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
1438 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
1439 */
1440 @@ -106,14 +104,14 @@
1441 struct mm_struct * old_mm;
1442 int retval = 0;
1443
1444 - init_MUTEX(&mm->context.sem);
1445 + mutex_init(&mm->context.lock);
1446 mm->context.size = 0;
1447 mm->context.has_foreign_mappings = 0;
1448 old_mm = current->mm;
1449 if (old_mm && old_mm->context.size > 0) {
1450 - down(&old_mm->context.sem);
1451 + mutex_lock(&old_mm->context.lock);
1452 retval = copy_ldt(&mm->context, &old_mm->context);
1453 - up(&old_mm->context.sem);
1454 + mutex_unlock(&old_mm->context.lock);
1455 }
1456 return retval;
1457 }
1458 @@ -149,7 +147,7 @@
1459 if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
1460 bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
1461
1462 - down(&mm->context.sem);
1463 + mutex_lock(&mm->context.lock);
1464 size = mm->context.size*LDT_ENTRY_SIZE;
1465 if (size > bytecount)
1466 size = bytecount;
1467 @@ -157,7 +155,7 @@
1468 err = 0;
1469 if (copy_to_user(ptr, mm->context.ldt, size))
1470 err = -EFAULT;
1471 - up(&mm->context.sem);
1472 + mutex_unlock(&mm->context.lock);
1473 if (err < 0)
1474 goto error_return;
1475 if (size != bytecount) {
1476 @@ -213,7 +211,7 @@
1477 goto out;
1478 }
1479
1480 - down(&mm->context.sem);
1481 + mutex_lock(&mm->context.lock);
1482 if (ldt_info.entry_number >= mm->context.size) {
1483 error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
1484 if (error < 0)
1485 @@ -240,7 +238,7 @@
1486 entry_1, entry_2);
1487
1488 out_unlock:
1489 - up(&mm->context.sem);
1490 + mutex_unlock(&mm->context.lock);
1491 out:
1492 return error;
1493 }
1494 --- a/arch/x86/kernel/ldt_64-xen.c
1495 +++ b/arch/x86/kernel/ldt_64-xen.c
1496 @@ -1,6 +1,4 @@
1497 /*
1498 - * linux/arch/x86_64/kernel/ldt.c
1499 - *
1500 * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
1501 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
1502 * Copyright (C) 2002 Andi Kleen
1503 @@ -112,14 +110,14 @@
1504 int retval = 0;
1505
1506 memset(&mm->context, 0, sizeof(mm->context));
1507 - init_MUTEX(&mm->context.sem);
1508 + mutex_init(&mm->context.lock);
1509 old_mm = current->mm;
1510 if (old_mm)
1511 mm->context.vdso = old_mm->context.vdso;
1512 if (old_mm && old_mm->context.size > 0) {
1513 - down(&old_mm->context.sem);
1514 + mutex_lock(&old_mm->context.lock);
1515 retval = copy_ldt(&mm->context, &old_mm->context);
1516 - up(&old_mm->context.sem);
1517 + mutex_unlock(&old_mm->context.lock);
1518 }
1519 if (retval == 0) {
1520 spin_lock(&mm_unpinned_lock);
1521 @@ -166,7 +164,7 @@
1522 if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
1523 bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
1524
1525 - down(&mm->context.sem);
1526 + mutex_lock(&mm->context.lock);
1527 size = mm->context.size*LDT_ENTRY_SIZE;
1528 if (size > bytecount)
1529 size = bytecount;
1530 @@ -174,7 +172,7 @@
1531 err = 0;
1532 if (copy_to_user(ptr, mm->context.ldt, size))
1533 err = -EFAULT;
1534 - up(&mm->context.sem);
1535 + mutex_unlock(&mm->context.lock);
1536 if (err < 0)
1537 goto error_return;
1538 if (size != bytecount) {
1539 @@ -227,7 +225,7 @@
1540 goto out;
1541 }
1542
1543 - down(&mm->context.sem);
1544 + mutex_lock(&mm->context.lock);
1545 if (ldt_info.entry_number >= (unsigned)mm->context.size) {
1546 error = alloc_ldt(&current->mm->context, ldt_info.entry_number+1, 1);
1547 if (error < 0)
1548 @@ -256,7 +254,7 @@
1549 error = HYPERVISOR_update_descriptor(mach_lp, (unsigned long)((entry_1 | (unsigned long) entry_2 << 32)));
1550
1551 out_unlock:
1552 - up(&mm->context.sem);
1553 + mutex_unlock(&mm->context.lock);
1554 out:
1555 return error;
1556 }
1557 --- a/arch/x86/kernel/mpparse_32-xen.c
1558 +++ b/arch/x86/kernel/mpparse_32-xen.c
1559 @@ -1023,7 +1023,7 @@
1560
1561 /*
1562 * Use the default configuration for the IRQs 0-15. Unless
1563 - * overriden by (MADT) interrupt source override entries.
1564 + * overridden by (MADT) interrupt source override entries.
1565 */
1566 for (i = 0; i < 16; i++) {
1567 int idx;
1568 --- a/arch/x86/kernel/mpparse_64-xen.c
1569 +++ b/arch/x86/kernel/mpparse_64-xen.c
1570 @@ -57,6 +57,8 @@
1571
1572 /* Processor that is doing the boot up */
1573 unsigned int boot_cpu_id = -1U;
1574 +EXPORT_SYMBOL(boot_cpu_id);
1575 +
1576 /* Internal processor count */
1577 unsigned int num_processors __cpuinitdata = 0;
1578
1579 @@ -87,7 +89,7 @@
1580 }
1581
1582 #ifndef CONFIG_XEN
1583 -static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
1584 +static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
1585 {
1586 int cpu;
1587 cpumask_t tmp_map;
1588 @@ -124,13 +126,24 @@
1589 cpu = 0;
1590 }
1591 bios_cpu_apicid[cpu] = m->mpc_apicid;
1592 - x86_cpu_to_apicid[cpu] = m->mpc_apicid;
1593 + /*
1594 + * We get called early in the the start_kernel initialization
1595 + * process when the per_cpu data area is not yet setup, so we
1596 + * use a static array that is removed after the per_cpu data
1597 + * area is created.
1598 + */
1599 + if (x86_cpu_to_apicid_ptr) {
1600 + u8 *x86_cpu_to_apicid = (u8 *)x86_cpu_to_apicid_ptr;
1601 + x86_cpu_to_apicid[cpu] = m->mpc_apicid;
1602 + } else {
1603 + per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid;
1604 + }
1605
1606 cpu_set(cpu, cpu_possible_map);
1607 cpu_set(cpu, cpu_present_map);
1608 }
1609 #else
1610 -static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
1611 +static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
1612 {
1613 num_processors++;
1614 }
1615 --- a/arch/x86/kernel/pci-dma_32-xen.c
1616 +++ b/arch/x86/kernel/pci-dma_32-xen.c
1617 @@ -13,14 +13,13 @@
1618 #include <linux/pci.h>
1619 #include <linux/module.h>
1620 #include <linux/version.h>
1621 -#include <linux/pci.h>
1622 #include <asm/io.h>
1623 #include <xen/balloon.h>
1624 #include <xen/gnttab.h>
1625 #include <asm/swiotlb.h>
1626 #include <asm/tlbflush.h>
1627 -#include <asm-i386/mach-xen/asm/swiotlb.h>
1628 -#include <asm-i386/mach-xen/asm/gnttab_dma.h>
1629 +#include <asm/swiotlb_32.h>
1630 +#include <asm/gnttab_dma.h>
1631 #include <asm/bug.h>
1632
1633 #ifdef __x86_64__
1634 @@ -112,27 +111,29 @@
1635 }
1636
1637 int
1638 -dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
1639 +dma_map_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
1640 enum dma_data_direction direction)
1641 {
1642 int i, rc;
1643
1644 BUG_ON(!valid_dma_direction(direction));
1645 - WARN_ON(nents == 0 || sg[0].length == 0);
1646 + WARN_ON(nents == 0 || sgl->length == 0);
1647
1648 if (swiotlb) {
1649 - rc = swiotlb_map_sg(hwdev, sg, nents, direction);
1650 + rc = swiotlb_map_sg(hwdev, sgl, nents, direction);
1651 } else {
1652 - for (i = 0; i < nents; i++ ) {
1653 - BUG_ON(!sg[i].page);
1654 - sg[i].dma_address =
1655 - gnttab_dma_map_page(sg[i].page) + sg[i].offset;
1656 - sg[i].dma_length = sg[i].length;
1657 + struct scatterlist *sg;
1658 +
1659 + for_each_sg(sgl, sg, nents, i) {
1660 + BUG_ON(!sg_page(sg));
1661 + sg->dma_address =
1662 + gnttab_dma_map_page(sg_page(sg)) + sg->offset;
1663 + sg->dma_length = sg->length;
1664 IOMMU_BUG_ON(address_needs_mapping(
1665 - hwdev, sg[i].dma_address));
1666 + hwdev, sg->dma_address));
1667 IOMMU_BUG_ON(range_straddles_page_boundary(
1668 - page_to_pseudophys(sg[i].page) + sg[i].offset,
1669 - sg[i].length));
1670 + page_to_pseudophys(sg_page(sg)) + sg->offset,
1671 + sg->length));
1672 }
1673 rc = nents;
1674 }
1675 @@ -143,17 +144,19 @@
1676 EXPORT_SYMBOL(dma_map_sg);
1677
1678 void
1679 -dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
1680 +dma_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
1681 enum dma_data_direction direction)
1682 {
1683 int i;
1684
1685 BUG_ON(!valid_dma_direction(direction));
1686 if (swiotlb)
1687 - swiotlb_unmap_sg(hwdev, sg, nents, direction);
1688 + swiotlb_unmap_sg(hwdev, sgl, nents, direction);
1689 else {
1690 - for (i = 0; i < nents; i++ )
1691 - gnttab_dma_unmap_page(sg[i].dma_address);
1692 + struct scatterlist *sg;
1693 +
1694 + for_each_sg(sgl, sg, nents, i)
1695 + gnttab_dma_unmap_page(sg->dma_address);
1696 }
1697 }
1698 EXPORT_SYMBOL(dma_unmap_sg);
1699 @@ -267,7 +270,8 @@
1700 {
1701 struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
1702 int order = get_order(size);
1703 -
1704 +
1705 + WARN_ON(irqs_disabled()); /* for portability */
1706 if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) {
1707 int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
1708
1709 --- a/arch/x86/kernel/pci-swiotlb_64-xen.c
1710 +++ b/arch/x86/kernel/pci-swiotlb_64-xen.c
1711 @@ -5,7 +5,7 @@
1712 #include <linux/module.h>
1713 #include <linux/dma-mapping.h>
1714
1715 -#include <asm/iommu.h>
1716 +#include <asm/gart.h>
1717 #include <asm/swiotlb.h>
1718 #include <asm/dma.h>
1719
1720 --- a/arch/x86/kernel/process_32-xen.c
1721 +++ b/arch/x86/kernel/process_32-xen.c
1722 @@ -1,6 +1,4 @@
1723 /*
1724 - * linux/arch/i386/kernel/process.c
1725 - *
1726 * Copyright (C) 1995 Linus Torvalds
1727 *
1728 * Pentium III FXSR, SSE support
1729 @@ -190,6 +188,10 @@
1730 }
1731 }
1732
1733 +static void do_nothing(void *unused)
1734 +{
1735 +}
1736 +
1737 void cpu_idle_wait(void)
1738 {
1739 unsigned int cpu, this_cpu = get_cpu();
1740 @@ -214,13 +216,20 @@
1741 cpu_clear(cpu, map);
1742 }
1743 cpus_and(map, map, cpu_online_map);
1744 + /*
1745 + * We waited 1 sec, if a CPU still did not call idle
1746 + * it may be because it is in idle and not waking up
1747 + * because it has nothing to do.
1748 + * Give all the remaining CPUS a kick.
1749 + */
1750 + smp_call_function_mask(map, do_nothing, 0, 0);
1751 } while (!cpus_empty(map));
1752
1753 set_cpus_allowed(current, tmp);
1754 }
1755 EXPORT_SYMBOL_GPL(cpu_idle_wait);
1756
1757 -void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
1758 +void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
1759 {
1760 }
1761
1762 @@ -238,34 +247,52 @@
1763 }
1764 early_param("idle", idle_setup);
1765
1766 -void show_regs(struct pt_regs * regs)
1767 +void __show_registers(struct pt_regs *regs, int all)
1768 {
1769 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
1770 unsigned long d0, d1, d2, d3, d6, d7;
1771 + unsigned long esp;
1772 + unsigned short ss, gs;
1773 +
1774 + if (user_mode_vm(regs)) {
1775 + esp = regs->esp;
1776 + ss = regs->xss & 0xffff;
1777 + savesegment(gs, gs);
1778 + } else {
1779 + esp = (unsigned long) (&regs->esp);
1780 + savesegment(ss, ss);
1781 + savesegment(gs, gs);
1782 + }
1783
1784 printk("\n");
1785 - printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
1786 - printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
1787 + printk("Pid: %d, comm: %s %s (%s %.*s)\n",
1788 + task_pid_nr(current), current->comm,
1789 + print_tainted(), init_utsname()->release,
1790 + (int)strcspn(init_utsname()->version, " "),
1791 + init_utsname()->version);
1792 +
1793 + printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
1794 + 0xffff & regs->xcs, regs->eip, regs->eflags,
1795 + smp_processor_id());
1796 print_symbol("EIP is at %s\n", regs->eip);
1797
1798 - if (user_mode_vm(regs))
1799 - printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
1800 - printk(" EFLAGS: %08lx %s (%s %.*s)\n",
1801 - regs->eflags, print_tainted(), init_utsname()->release,
1802 - (int)strcspn(init_utsname()->version, " "),
1803 - init_utsname()->version);
1804 printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
1805 - regs->eax,regs->ebx,regs->ecx,regs->edx);
1806 - printk("ESI: %08lx EDI: %08lx EBP: %08lx",
1807 - regs->esi, regs->edi, regs->ebp);
1808 - printk(" DS: %04x ES: %04x FS: %04x\n",
1809 - 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs);
1810 + regs->eax, regs->ebx, regs->ecx, regs->edx);
1811 + printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
1812 + regs->esi, regs->edi, regs->ebp, esp);
1813 + printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
1814 + regs->xds & 0xffff, regs->xes & 0xffff,
1815 + regs->xfs & 0xffff, gs, ss);
1816 +
1817 + if (!all)
1818 + return;
1819
1820 cr0 = read_cr0();
1821 cr2 = read_cr2();
1822 cr3 = read_cr3();
1823 cr4 = read_cr4_safe();
1824 - printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
1825 + printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
1826 + cr0, cr2, cr3, cr4);
1827
1828 get_debugreg(d0, 0);
1829 get_debugreg(d1, 1);
1830 @@ -273,10 +300,16 @@
1831 get_debugreg(d3, 3);
1832 printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
1833 d0, d1, d2, d3);
1834 +
1835 get_debugreg(d6, 6);
1836 get_debugreg(d7, 7);
1837 - printk("DR6: %08lx DR7: %08lx\n", d6, d7);
1838 + printk("DR6: %08lx DR7: %08lx\n",
1839 + d6, d7);
1840 +}
1841
1842 +void show_regs(struct pt_regs *regs)
1843 +{
1844 + __show_registers(regs, 1);
1845 show_trace(NULL, regs, &regs->esp);
1846 }
1847
1848 --- a/arch/x86/kernel/process_64-xen.c
1849 +++ b/arch/x86/kernel/process_64-xen.c
1850 @@ -1,6 +1,4 @@
1851 /*
1852 - * linux/arch/x86-64/kernel/process.c
1853 - *
1854 * Copyright (C) 1995 Linus Torvalds
1855 *
1856 * Pentium III FXSR, SSE support
1857 @@ -41,6 +39,7 @@
1858 #include <linux/notifier.h>
1859 #include <linux/kprobes.h>
1860 #include <linux/kdebug.h>
1861 +#include <linux/tick.h>
1862
1863 #include <asm/uaccess.h>
1864 #include <asm/pgtable.h>
1865 @@ -172,6 +171,9 @@
1866
1867 if (__get_cpu_var(cpu_idle_state))
1868 __get_cpu_var(cpu_idle_state) = 0;
1869 +
1870 + tick_nohz_stop_sched_tick();
1871 +
1872 rmb();
1873 idle = xen_idle; /* no alternatives */
1874 if (cpu_is_offline(smp_processor_id()))
1875 @@ -190,12 +192,17 @@
1876 __exit_idle();
1877 }
1878
1879 + tick_nohz_restart_sched_tick();
1880 preempt_enable_no_resched();
1881 schedule();
1882 preempt_disable();
1883 }
1884 }
1885
1886 +static void do_nothing(void *unused)
1887 +{
1888 +}
1889 +
1890 void cpu_idle_wait(void)
1891 {
1892 unsigned int cpu, this_cpu = get_cpu();
1893 @@ -221,6 +228,13 @@
1894 cpu_clear(cpu, map);
1895 }
1896 cpus_and(map, map, cpu_online_map);
1897 + /*
1898 + * We waited 1 sec, if a CPU still did not call idle
1899 + * it may be because it is in idle and not waking up
1900 + * because it has nothing to do.
1901 + * Give all the remaining CPUS a kick.
1902 + */
1903 + smp_call_function_mask(map, do_nothing, 0, 0);
1904 } while (!cpus_empty(map));
1905
1906 set_cpus_allowed(current, tmp);
1907 @@ -528,7 +542,7 @@
1908 *
1909 * Kprobes not supported here. Set the probe on schedule instead.
1910 */
1911 -__kprobes struct task_struct *
1912 +struct task_struct *
1913 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1914 {
1915 struct thread_struct *prev = &prev_p->thread,
1916 --- a/arch/x86/kernel/quirks-xen.c
1917 +++ b/arch/x86/kernel/quirks-xen.c
1918 @@ -41,7 +41,353 @@
1919 if (!(config & 0x2))
1920 pci_write_config_byte(dev, 0xf4, config);
1921 }
1922 -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance);
1923 -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance);
1924 -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance);
1925 +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH,
1926 + quirk_intel_irqbalance);
1927 +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH,
1928 + quirk_intel_irqbalance);
1929 +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH,
1930 + quirk_intel_irqbalance);
1931 +#endif
1932 +
1933 +#if defined(CONFIG_HPET_TIMER)
1934 +#include <asm/hpet.h>
1935 +
1936 +unsigned long force_hpet_address;
1937 +
1938 +static enum {
1939 + NONE_FORCE_HPET_RESUME,
1940 + OLD_ICH_FORCE_HPET_RESUME,
1941 + ICH_FORCE_HPET_RESUME,
1942 + VT8237_FORCE_HPET_RESUME,
1943 + NVIDIA_FORCE_HPET_RESUME,
1944 +} force_hpet_resume_type;
1945 +
1946 +static void __iomem *rcba_base;
1947 +
1948 +static void ich_force_hpet_resume(void)
1949 +{
1950 + u32 val;
1951 +
1952 + if (!force_hpet_address)
1953 + return;
1954 +
1955 + if (rcba_base == NULL)
1956 + BUG();
1957 +
1958 + /* read the Function Disable register, dword mode only */
1959 + val = readl(rcba_base + 0x3404);
1960 + if (!(val & 0x80)) {
1961 + /* HPET disabled in HPTC. Trying to enable */
1962 + writel(val | 0x80, rcba_base + 0x3404);
1963 + }
1964 +
1965 + val = readl(rcba_base + 0x3404);
1966 + if (!(val & 0x80))
1967 + BUG();
1968 + else
1969 + printk(KERN_DEBUG "Force enabled HPET at resume\n");
1970 +
1971 + return;
1972 +}
1973 +
1974 +static void ich_force_enable_hpet(struct pci_dev *dev)
1975 +{
1976 + u32 val;
1977 + u32 uninitialized_var(rcba);
1978 + int err = 0;
1979 +
1980 + if (hpet_address || force_hpet_address)
1981 + return;
1982 +
1983 + pci_read_config_dword(dev, 0xF0, &rcba);
1984 + rcba &= 0xFFFFC000;
1985 + if (rcba == 0) {
1986 + printk(KERN_DEBUG "RCBA disabled. Cannot force enable HPET\n");
1987 + return;
1988 + }
1989 +
1990 + /* use bits 31:14, 16 kB aligned */
1991 + rcba_base = ioremap_nocache(rcba, 0x4000);
1992 + if (rcba_base == NULL) {
1993 + printk(KERN_DEBUG "ioremap failed. Cannot force enable HPET\n");
1994 + return;
1995 + }
1996 +
1997 + /* read the Function Disable register, dword mode only */
1998 + val = readl(rcba_base + 0x3404);
1999 +
2000 + if (val & 0x80) {
2001 + /* HPET is enabled in HPTC. Just not reported by BIOS */
2002 + val = val & 0x3;
2003 + force_hpet_address = 0xFED00000 | (val << 12);
2004 + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n",
2005 + force_hpet_address);
2006 + iounmap(rcba_base);
2007 + return;
2008 + }
2009 +
2010 + /* HPET disabled in HPTC. Trying to enable */
2011 + writel(val | 0x80, rcba_base + 0x3404);
2012 +
2013 + val = readl(rcba_base + 0x3404);
2014 + if (!(val & 0x80)) {
2015 + err = 1;
2016 + } else {
2017 + val = val & 0x3;
2018 + force_hpet_address = 0xFED00000 | (val << 12);
2019 + }
2020 +
2021 + if (err) {
2022 + force_hpet_address = 0;
2023 + iounmap(rcba_base);
2024 + printk(KERN_DEBUG "Failed to force enable HPET\n");
2025 + } else {
2026 + force_hpet_resume_type = ICH_FORCE_HPET_RESUME;
2027 + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n",
2028 + force_hpet_address);
2029 + }
2030 +}
2031 +
2032 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0,
2033 + ich_force_enable_hpet);
2034 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1,
2035 + ich_force_enable_hpet);
2036 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0,
2037 + ich_force_enable_hpet);
2038 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_1,
2039 + ich_force_enable_hpet);
2040 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31,
2041 + ich_force_enable_hpet);
2042 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1,
2043 + ich_force_enable_hpet);
2044 +
2045 +
2046 +static struct pci_dev *cached_dev;
2047 +
2048 +static void old_ich_force_hpet_resume(void)
2049 +{
2050 + u32 val;
2051 + u32 uninitialized_var(gen_cntl);
2052 +
2053 + if (!force_hpet_address || !cached_dev)
2054 + return;
2055 +
2056 + pci_read_config_dword(cached_dev, 0xD0, &gen_cntl);
2057 + gen_cntl &= (~(0x7 << 15));
2058 + gen_cntl |= (0x4 << 15);
2059 +
2060 + pci_write_config_dword(cached_dev, 0xD0, gen_cntl);
2061 + pci_read_config_dword(cached_dev, 0xD0, &gen_cntl);
2062 + val = gen_cntl >> 15;
2063 + val &= 0x7;
2064 + if (val == 0x4)
2065 + printk(KERN_DEBUG "Force enabled HPET at resume\n");
2066 + else
2067 + BUG();
2068 +}
2069 +
2070 +static void old_ich_force_enable_hpet(struct pci_dev *dev)
2071 +{
2072 + u32 val;
2073 + u32 uninitialized_var(gen_cntl);
2074 +
2075 + if (hpet_address || force_hpet_address)
2076 + return;
2077 +
2078 + pci_read_config_dword(dev, 0xD0, &gen_cntl);
2079 + /*
2080 + * Bit 17 is HPET enable bit.
2081 + * Bit 16:15 control the HPET base address.
2082 + */
2083 + val = gen_cntl >> 15;
2084 + val &= 0x7;
2085 + if (val & 0x4) {
2086 + val &= 0x3;
2087 + force_hpet_address = 0xFED00000 | (val << 12);
2088 + printk(KERN_DEBUG "HPET at base address 0x%lx\n",
2089 + force_hpet_address);
2090 + return;
2091 + }
2092 +
2093 + /*
2094 + * HPET is disabled. Trying enabling at FED00000 and check
2095 + * whether it sticks
2096 + */
2097 + gen_cntl &= (~(0x7 << 15));
2098 + gen_cntl |= (0x4 << 15);
2099 + pci_write_config_dword(dev, 0xD0, gen_cntl);
2100 +
2101 + pci_read_config_dword(dev, 0xD0, &gen_cntl);
2102 +
2103 + val = gen_cntl >> 15;
2104 + val &= 0x7;
2105 + if (val & 0x4) {
2106 + /* HPET is enabled in HPTC. Just not reported by BIOS */
2107 + val &= 0x3;
2108 + force_hpet_address = 0xFED00000 | (val << 12);
2109 + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n",
2110 + force_hpet_address);
2111 + cached_dev = dev;
2112 + force_hpet_resume_type = OLD_ICH_FORCE_HPET_RESUME;
2113 + return;
2114 + }
2115 +
2116 + printk(KERN_DEBUG "Failed to force enable HPET\n");
2117 +}
2118 +
2119 +/*
2120 + * Undocumented chipset features. Make sure that the user enforced
2121 + * this.
2122 + */
2123 +static void old_ich_force_enable_hpet_user(struct pci_dev *dev)
2124 +{
2125 + if (hpet_force_user)
2126 + old_ich_force_enable_hpet(dev);
2127 +}
2128 +
2129 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0,
2130 + old_ich_force_enable_hpet_user);
2131 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12,
2132 + old_ich_force_enable_hpet_user);
2133 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0,
2134 + old_ich_force_enable_hpet_user);
2135 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12,
2136 + old_ich_force_enable_hpet_user);
2137 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0,
2138 + old_ich_force_enable_hpet);
2139 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_12,
2140 + old_ich_force_enable_hpet);
2141 +
2142 +
2143 +static void vt8237_force_hpet_resume(void)
2144 +{
2145 + u32 val;
2146 +
2147 + if (!force_hpet_address || !cached_dev)
2148 + return;
2149 +
2150 + val = 0xfed00000 | 0x80;
2151 + pci_write_config_dword(cached_dev, 0x68, val);
2152 +
2153 + pci_read_config_dword(cached_dev, 0x68, &val);
2154 + if (val & 0x80)
2155 + printk(KERN_DEBUG "Force enabled HPET at resume\n");
2156 + else
2157 + BUG();
2158 +}
2159 +
2160 +static void vt8237_force_enable_hpet(struct pci_dev *dev)
2161 +{
2162 + u32 uninitialized_var(val);
2163 +
2164 + if (!hpet_force_user || hpet_address || force_hpet_address)
2165 + return;
2166 +
2167 + pci_read_config_dword(dev, 0x68, &val);
2168 + /*
2169 + * Bit 7 is HPET enable bit.
2170 + * Bit 31:10 is HPET base address (contrary to what datasheet claims)
2171 + */
2172 + if (val & 0x80) {
2173 + force_hpet_address = (val & ~0x3ff);
2174 + printk(KERN_DEBUG "HPET at base address 0x%lx\n",
2175 + force_hpet_address);
2176 + return;
2177 + }
2178 +
2179 + /*
2180 + * HPET is disabled. Trying enabling at FED00000 and check
2181 + * whether it sticks
2182 + */
2183 + val = 0xfed00000 | 0x80;
2184 + pci_write_config_dword(dev, 0x68, val);
2185 +
2186 + pci_read_config_dword(dev, 0x68, &val);
2187 + if (val & 0x80) {
2188 + force_hpet_address = (val & ~0x3ff);
2189 + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n",
2190 + force_hpet_address);
2191 + cached_dev = dev;
2192 + force_hpet_resume_type = VT8237_FORCE_HPET_RESUME;
2193 + return;
2194 + }
2195 +
2196 + printk(KERN_DEBUG "Failed to force enable HPET\n");
2197 +}
2198 +
2199 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235,
2200 + vt8237_force_enable_hpet);
2201 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237,
2202 + vt8237_force_enable_hpet);
2203 +
2204 +/*
2205 + * Undocumented chipset feature taken from LinuxBIOS.
2206 + */
2207 +static void nvidia_force_hpet_resume(void)
2208 +{
2209 + pci_write_config_dword(cached_dev, 0x44, 0xfed00001);
2210 + printk(KERN_DEBUG "Force enabled HPET at resume\n");
2211 +}
2212 +
2213 +static void nvidia_force_enable_hpet(struct pci_dev *dev)
2214 +{
2215 + u32 uninitialized_var(val);
2216 +
2217 + if (!hpet_force_user || hpet_address || force_hpet_address)
2218 + return;
2219 +
2220 + pci_write_config_dword(dev, 0x44, 0xfed00001);
2221 + pci_read_config_dword(dev, 0x44, &val);
2222 + force_hpet_address = val & 0xfffffffe;
2223 + force_hpet_resume_type = NVIDIA_FORCE_HPET_RESUME;
2224 + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n",
2225 + force_hpet_address);
2226 + cached_dev = dev;
2227 + return;
2228 +}
2229 +
2230 +/* ISA Bridges */
2231 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0050,
2232 + nvidia_force_enable_hpet);
2233 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0051,
2234 + nvidia_force_enable_hpet);
2235 +
2236 +/* LPC bridges */
2237 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0360,
2238 + nvidia_force_enable_hpet);
2239 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0361,
2240 + nvidia_force_enable_hpet);
2241 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0362,
2242 + nvidia_force_enable_hpet);
2243 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0363,
2244 + nvidia_force_enable_hpet);
2245 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0364,
2246 + nvidia_force_enable_hpet);
2247 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0365,
2248 + nvidia_force_enable_hpet);
2249 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0366,
2250 + nvidia_force_enable_hpet);
2251 +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0367,
2252 + nvidia_force_enable_hpet);
2253 +
2254 +void force_hpet_resume(void)
2255 +{
2256 + switch (force_hpet_resume_type) {
2257 + case ICH_FORCE_HPET_RESUME:
2258 + return ich_force_hpet_resume();
2259 +
2260 + case OLD_ICH_FORCE_HPET_RESUME:
2261 + return old_ich_force_hpet_resume();
2262 +
2263 + case VT8237_FORCE_HPET_RESUME:
2264 + return vt8237_force_hpet_resume();
2265 +
2266 + case NVIDIA_FORCE_HPET_RESUME:
2267 + return nvidia_force_hpet_resume();
2268 +
2269 + default:
2270 + break;
2271 + }
2272 +}
2273 +
2274 #endif
2275 --- a/arch/x86/kernel/setup64-xen.c
2276 +++ b/arch/x86/kernel/setup64-xen.c
2277 @@ -15,7 +15,6 @@
2278 #include <linux/bootmem.h>
2279 #include <linux/bitops.h>
2280 #include <linux/module.h>
2281 -#include <asm/bootsetup.h>
2282 #include <asm/pda.h>
2283 #include <asm/pgtable.h>
2284 #include <asm/processor.h>
2285 @@ -27,11 +26,12 @@
2286 #include <asm/percpu.h>
2287 #include <asm/proto.h>
2288 #include <asm/sections.h>
2289 +#include <asm/setup.h>
2290 #ifdef CONFIG_XEN
2291 #include <asm/hypervisor.h>
2292 #endif
2293
2294 -char x86_boot_params[BOOT_PARAM_SIZE] __initdata;
2295 +struct boot_params __initdata boot_params;
2296
2297 cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
2298
2299 @@ -159,8 +159,8 @@
2300
2301 static void __cpuinit cpu_gdt_init(const struct desc_ptr *gdt_descr)
2302 {
2303 - asm volatile("lgdt %0" :: "m" (*gdt_descr));
2304 - asm volatile("lidt %0" :: "m" (idt_descr));
2305 + load_gdt(gdt_descr);
2306 + load_idt(idt_descr);
2307 }
2308 #endif
2309
2310 @@ -252,6 +252,14 @@
2311
2312 unsigned long kernel_eflags;
2313
2314 +#ifndef CONFIG_X86_NO_TSS
2315 +/*
2316 + * Copies of the original ist values from the tss are only accessed during
2317 + * debugging, no special alignment required.
2318 + */
2319 +DEFINE_PER_CPU(struct orig_ist, orig_ist);
2320 +#endif
2321 +
2322 /*
2323 * cpu_init() initializes state that is per-CPU. Some data is already
2324 * initialized (naturally) in the bootstrap process, such as the GDT
2325 --- a/arch/x86/kernel/setup_32-xen.c
2326 +++ b/arch/x86/kernel/setup_32-xen.c
2327 @@ -1,6 +1,4 @@
2328 /*
2329 - * linux/arch/i386/kernel/setup.c
2330 - *
2331 * Copyright (C) 1995 Linus Torvalds
2332 *
2333 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
2334 @@ -70,6 +68,7 @@
2335 #include <xen/xencons.h>
2336 #include <setup_arch.h>
2337 #include <bios_ebda.h>
2338 +#include <asm/cacheflush.h>
2339
2340 #ifdef CONFIG_XEN
2341 #include <xen/interface/kexec.h>
2342 @@ -83,13 +82,14 @@
2343 extern char hypercall_page[PAGE_SIZE];
2344 EXPORT_SYMBOL(hypercall_page);
2345
2346 -int disable_pse __devinitdata = 0;
2347 +int disable_pse __cpuinitdata = 0;
2348
2349 /*
2350 * Machine setup..
2351 */
2352 extern struct resource code_resource;
2353 extern struct resource data_resource;
2354 +extern struct resource bss_resource;
2355
2356 /* cpu data as detected by the assembly code in head.S */
2357 struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
2358 @@ -101,9 +101,6 @@
2359
2360 /* for MCA, but anyone else can use it if they want */
2361 unsigned int machine_id;
2362 -#ifdef CONFIG_MCA
2363 -EXPORT_SYMBOL(machine_id);
2364 -#endif
2365 unsigned int machine_submodel_id;
2366 unsigned int BIOS_revision;
2367 unsigned int mca_pentium_flag;
2368 @@ -124,7 +121,7 @@
2369 struct edid_info edid_info;
2370 EXPORT_SYMBOL_GPL(edid_info);
2371 #ifndef CONFIG_XEN
2372 -#define copy_edid() (edid_info = EDID_INFO)
2373 +#define copy_edid() (edid_info = boot_params.edid_info)
2374 #endif
2375 struct ist_info ist_info;
2376 #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
2377 @@ -173,10 +170,11 @@
2378 */
2379 static inline void copy_edd(void)
2380 {
2381 - memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
2382 - memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
2383 - edd.mbr_signature_nr = EDD_MBR_SIG_NR;
2384 - edd.edd_info_nr = EDD_NR;
2385 + memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
2386 + sizeof(edd.mbr_signature));
2387 + memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
2388 + edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
2389 + edd.edd_info_nr = boot_params.eddbuf_entries;
2390 }
2391 #endif
2392 #else
2393 @@ -419,6 +417,53 @@
2394 extern void zone_sizes_init(void);
2395 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
2396
2397 +static inline unsigned long long get_total_mem(void)
2398 +{
2399 + unsigned long long total;
2400 +
2401 + total = max_low_pfn - min_low_pfn;
2402 +#ifdef CONFIG_HIGHMEM
2403 + total += highend_pfn - highstart_pfn;
2404 +#endif
2405 +
2406 + return total << PAGE_SHIFT;
2407 +}
2408 +
2409 +#ifdef CONFIG_KEXEC
2410 +#ifndef CONFIG_XEN
2411 +static void __init reserve_crashkernel(void)
2412 +{
2413 + unsigned long long total_mem;
2414 + unsigned long long crash_size, crash_base;
2415 + int ret;
2416 +
2417 + total_mem = get_total_mem();
2418 +
2419 + ret = parse_crashkernel(boot_command_line, total_mem,
2420 + &crash_size, &crash_base);
2421 + if (ret == 0 && crash_size > 0) {
2422 + if (crash_base > 0) {
2423 + printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
2424 + "for crashkernel (System RAM: %ldMB)\n",
2425 + (unsigned long)(crash_size >> 20),
2426 + (unsigned long)(crash_base >> 20),
2427 + (unsigned long)(total_mem >> 20));
2428 + crashk_res.start = crash_base;
2429 + crashk_res.end = crash_base + crash_size - 1;
2430 + reserve_bootmem(crash_base, crash_size);
2431 + } else
2432 + printk(KERN_INFO "crashkernel reservation failed - "
2433 + "you have to specify a base address\n");
2434 + }
2435 +}
2436 +#else
2437 +#define reserve_crashkernel xen_machine_kexec_setup_resources
2438 +#endif
2439 +#else
2440 +static inline void __init reserve_crashkernel(void)
2441 +{}
2442 +#endif
2443 +
2444 void __init setup_bootmem_allocator(void)
2445 {
2446 unsigned long bootmap_size;
2447 @@ -474,30 +519,25 @@
2448
2449 #ifdef CONFIG_BLK_DEV_INITRD
2450 if (xen_start_info->mod_start) {
2451 - if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
2452 - /*reserve_bootmem(INITRD_START, INITRD_SIZE);*/
2453 - initrd_start = INITRD_START + PAGE_OFFSET;
2454 - initrd_end = initrd_start+INITRD_SIZE;
2455 + unsigned long ramdisk_image = __pa(xen_start_info->mod_start);
2456 + unsigned long ramdisk_size = xen_start_info->mod_len;
2457 + unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
2458 + unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
2459 +
2460 + if (ramdisk_end <= end_of_lowmem) {
2461 + /*reserve_bootmem(ramdisk_image, ramdisk_size);*/
2462 + initrd_start = ramdisk_image + PAGE_OFFSET;
2463 + initrd_end = initrd_start+ramdisk_size;
2464 initrd_below_start_ok = 1;
2465 - }
2466 - else {
2467 + } else {
2468 printk(KERN_ERR "initrd extends beyond end of memory "
2469 - "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
2470 - INITRD_START + INITRD_SIZE,
2471 - max_low_pfn << PAGE_SHIFT);
2472 + "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
2473 + ramdisk_end, end_of_lowmem);
2474 initrd_start = 0;
2475 }
2476 }
2477 #endif
2478 -#ifdef CONFIG_KEXEC
2479 -#ifdef CONFIG_XEN
2480 - xen_machine_kexec_setup_resources();
2481 -#else
2482 - if (crashk_res.start != crashk_res.end)
2483 - reserve_bootmem(crashk_res.start,
2484 - crashk_res.end - crashk_res.start + 1);
2485 -#endif
2486 -#endif
2487 + reserve_crashkernel();
2488 }
2489
2490 /*
2491 @@ -575,7 +615,8 @@
2492 * the system table is valid. If not, then initialize normally.
2493 */
2494 #ifdef CONFIG_EFI
2495 - if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
2496 + if ((boot_params.hdr.type_of_loader == 0x50) &&
2497 + boot_params.efi_info.efi_systab)
2498 efi_enabled = 1;
2499 #endif
2500
2501 @@ -583,18 +624,18 @@
2502 properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd.
2503 */
2504 ROOT_DEV = MKDEV(UNNAMED_MAJOR,0);
2505 - screen_info = SCREEN_INFO;
2506 + screen_info = boot_params.screen_info;
2507 copy_edid();
2508 - apm_info.bios = APM_BIOS_INFO;
2509 - ist_info = IST_INFO;
2510 - saved_videomode = VIDEO_MODE;
2511 - if( SYS_DESC_TABLE.length != 0 ) {
2512 - set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
2513 - machine_id = SYS_DESC_TABLE.table[0];
2514 - machine_submodel_id = SYS_DESC_TABLE.table[1];
2515 - BIOS_revision = SYS_DESC_TABLE.table[2];
2516 + apm_info.bios = boot_params.apm_bios_info;
2517 + ist_info = boot_params.ist_info;
2518 + saved_videomode = boot_params.hdr.vid_mode;
2519 + if( boot_params.sys_desc_table.length != 0 ) {
2520 + set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
2521 + machine_id = boot_params.sys_desc_table.table[0];
2522 + machine_submodel_id = boot_params.sys_desc_table.table[1];
2523 + BIOS_revision = boot_params.sys_desc_table.table[2];
2524 }
2525 - bootloader_type = LOADER_TYPE;
2526 + bootloader_type = boot_params.hdr.type_of_loader;
2527
2528 if (is_initial_xendomain()) {
2529 const struct dom0_vga_console_info *info =
2530 @@ -609,9 +650,9 @@
2531 screen_info.orig_video_isVGA = 0;
2532
2533 #ifdef CONFIG_BLK_DEV_RAM
2534 - rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
2535 - rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
2536 - rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
2537 + rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
2538 + rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
2539 + rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
2540 #endif
2541
2542 ARCH_SETUP
2543 @@ -624,7 +665,7 @@
2544
2545 copy_edd();
2546
2547 - if (!MOUNT_ROOT_RDONLY)
2548 + if (!boot_params.hdr.root_flags)
2549 root_mountflags &= ~MS_RDONLY;
2550 init_mm.start_code = (unsigned long) _text;
2551 init_mm.end_code = (unsigned long) _etext;
2552 @@ -636,6 +677,8 @@
2553 code_resource.end = virt_to_phys(_etext)-1;
2554 data_resource.start = virt_to_phys(_etext);
2555 data_resource.end = virt_to_phys(_edata)-1;
2556 + bss_resource.start = virt_to_phys(&__bss_start);
2557 + bss_resource.end = virt_to_phys(&__bss_stop)-1;
2558
2559 if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
2560 i = COMMAND_LINE_SIZE;
2561 @@ -664,7 +707,7 @@
2562 /*
2563 * NOTE: before this point _nobody_ is allowed to allocate
2564 * any memory using the bootmem allocator. Although the
2565 - * alloctor is now initialised only the first 8Mb of the kernel
2566 + * allocator is now initialised only the first 8Mb of the kernel
2567 * virtual address space has been mapped. All allocations before
2568 * paging_init() has completed must use the alloc_bootmem_low_pages()
2569 * variant (which allocates DMA'able memory) and care must be taken
2570 @@ -787,10 +830,8 @@
2571 acpi_boot_table_init();
2572 #endif
2573
2574 -#ifdef CONFIG_PCI
2575 -#ifdef CONFIG_X86_IO_APIC
2576 - check_acpi_pci(); /* Checks more than just ACPI actually */
2577 -#endif
2578 +#if defined(CONFIG_PCI) && !defined(CONFIG_XEN)
2579 + early_quirks();
2580 #endif
2581
2582 #ifdef CONFIG_ACPI
2583 --- a/arch/x86/kernel/setup_64-xen.c
2584 +++ b/arch/x86/kernel/setup_64-xen.c
2585 @@ -1,10 +1,5 @@
2586 /*
2587 - * linux/arch/x86-64/kernel/setup.c
2588 - *
2589 * Copyright (C) 1995 Linus Torvalds
2590 - *
2591 - * Nov 2001 Dave Jones <davej@suse.de>
2592 - * Forked from i386 setup code.
2593 */
2594
2595 /*
2596 @@ -57,13 +52,13 @@
2597 #include <asm/dma.h>
2598 #include <asm/mpspec.h>
2599 #include <asm/mmu_context.h>
2600 -#include <asm/bootsetup.h>
2601 #include <asm/proto.h>
2602 #include <asm/setup.h>
2603 #include <asm/mach_apic.h>
2604 #include <asm/numa.h>
2605 #include <asm/sections.h>
2606 #include <asm/dmi.h>
2607 +#include <asm/cacheflush.h>
2608 #ifdef CONFIG_XEN
2609 #include <linux/percpu.h>
2610 #include <xen/interface/physdev.h>
2611 @@ -183,6 +178,12 @@
2612 .end = 0,
2613 .flags = IORESOURCE_RAM,
2614 };
2615 +struct resource bss_resource = {
2616 + .name = "Kernel bss",
2617 + .start = 0,
2618 + .end = 0,
2619 + .flags = IORESOURCE_RAM,
2620 +};
2621
2622 #ifdef CONFIG_PROC_VMCORE
2623 /* elfcorehdr= specifies the location of elf core header
2624 @@ -234,10 +235,11 @@
2625 */
2626 static inline void copy_edd(void)
2627 {
2628 - memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
2629 - memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
2630 - edd.mbr_signature_nr = EDD_MBR_SIG_NR;
2631 - edd.edd_info_nr = EDD_NR;
2632 + memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
2633 + sizeof(edd.mbr_signature));
2634 + memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
2635 + edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
2636 + edd.edd_info_nr = boot_params.eddbuf_entries;
2637 }
2638 #endif
2639 #else
2640 @@ -246,6 +248,41 @@
2641 }
2642 #endif
2643
2644 +#ifdef CONFIG_KEXEC
2645 +#ifndef CONFIG_XEN
2646 +static void __init reserve_crashkernel(void)
2647 +{
2648 + unsigned long long free_mem;
2649 + unsigned long long crash_size, crash_base;
2650 + int ret;
2651 +
2652 + free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
2653 +
2654 + ret = parse_crashkernel(boot_command_line, free_mem,
2655 + &crash_size, &crash_base);
2656 + if (ret == 0 && crash_size) {
2657 + if (crash_base > 0) {
2658 + printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
2659 + "for crashkernel (System RAM: %ldMB)\n",
2660 + (unsigned long)(crash_size >> 20),
2661 + (unsigned long)(crash_base >> 20),
2662 + (unsigned long)(free_mem >> 20));
2663 + crashk_res.start = crash_base;
2664 + crashk_res.end = crash_base + crash_size - 1;
2665 + reserve_bootmem(crash_base, crash_size);
2666 + } else
2667 + printk(KERN_INFO "crashkernel reservation failed - "
2668 + "you have to specify a base address\n");
2669 + }
2670 +}
2671 +#else
2672 +#define reserve_crashkernel xen_machine_kexec_setup_resources
2673 +#endif
2674 +#else
2675 +static inline void __init reserve_crashkernel(void)
2676 +{}
2677 +#endif
2678 +
2679 #ifndef CONFIG_XEN
2680 #define EBDA_ADDR_POINTER 0x40E
2681
2682 @@ -286,7 +323,7 @@
2683 atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
2684
2685 ROOT_DEV = MKDEV(RAMDISK_MAJOR,0);
2686 - screen_info = SCREEN_INFO;
2687 + screen_info = boot_params.screen_info;
2688
2689 if (is_initial_xendomain()) {
2690 const struct dom0_vga_console_info *info =
2691 @@ -309,22 +346,22 @@
2692 #else
2693 printk(KERN_INFO "Command line: %s\n", boot_command_line);
2694
2695 - ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
2696 - screen_info = SCREEN_INFO;
2697 - edid_info = EDID_INFO;
2698 + ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
2699 + screen_info = boot_params.screen_info;
2700 + edid_info = boot_params.edid_info;
2701 #endif /* !CONFIG_XEN */
2702 - saved_video_mode = SAVED_VIDEO_MODE;
2703 - bootloader_type = LOADER_TYPE;
2704 + saved_video_mode = boot_params.hdr.vid_mode;
2705 + bootloader_type = boot_params.hdr.type_of_loader;
2706
2707 #ifdef CONFIG_BLK_DEV_RAM
2708 - rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
2709 - rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
2710 - rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
2711 + rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
2712 + rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
2713 + rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
2714 #endif
2715 setup_memory_region();
2716 copy_edd();
2717
2718 - if (!MOUNT_ROOT_RDONLY)
2719 + if (!boot_params.hdr.root_flags)
2720 root_mountflags &= ~MS_RDONLY;
2721 init_mm.start_code = (unsigned long) &_text;
2722 init_mm.end_code = (unsigned long) &_etext;
2723 @@ -335,6 +372,8 @@
2724 code_resource.end = virt_to_phys(&_etext)-1;
2725 data_resource.start = virt_to_phys(&_etext);
2726 data_resource.end = virt_to_phys(&_edata)-1;
2727 + bss_resource.start = virt_to_phys(&__bss_start);
2728 + bss_resource.end = virt_to_phys(&__bss_stop)-1;
2729
2730 early_identify_cpu(&boot_cpu_data);
2731
2732 @@ -362,6 +401,11 @@
2733 if (is_initial_xendomain())
2734 dmi_scan_machine();
2735
2736 +#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
2737 + /* setup to use the static apicid table during kernel startup */
2738 + x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init;
2739 +#endif
2740 +
2741 /* How many end-of-memory variables you have, grandma! */
2742 max_low_pfn = end_pfn;
2743 max_pfn = end_pfn;
2744 @@ -426,52 +470,37 @@
2745 */
2746 acpi_reserve_bootmem();
2747 #endif
2748 -#ifdef CONFIG_XEN
2749 #ifdef CONFIG_BLK_DEV_INITRD
2750 +#ifdef CONFIG_XEN
2751 if (xen_start_info->mod_start) {
2752 - if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
2753 - /*reserve_bootmem_generic(INITRD_START, INITRD_SIZE);*/
2754 - initrd_start = INITRD_START + PAGE_OFFSET;
2755 - initrd_end = initrd_start+INITRD_SIZE;
2756 + unsigned long ramdisk_image = __pa(xen_start_info->mod_start);
2757 + unsigned long ramdisk_size = xen_start_info->mod_len;
2758 +#else
2759 + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
2760 + unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
2761 + unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
2762 +#endif
2763 + unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
2764 + unsigned long end_of_mem = end_pfn << PAGE_SHIFT;
2765 +
2766 + if (ramdisk_end <= end_of_mem) {
2767 +#ifndef CONFIG_XEN
2768 + reserve_bootmem_generic(ramdisk_image, ramdisk_size);
2769 +#endif
2770 + initrd_start = ramdisk_image + PAGE_OFFSET;
2771 + initrd_end = initrd_start+ramdisk_size;
2772 +#ifdef CONFIG_XEN
2773 initrd_below_start_ok = 1;
2774 - } else {
2775 - printk(KERN_ERR "initrd extends beyond end of memory "
2776 - "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
2777 - (unsigned long)(INITRD_START + INITRD_SIZE),
2778 - (unsigned long)(end_pfn << PAGE_SHIFT));
2779 - initrd_start = 0;
2780 - }
2781 - }
2782 #endif
2783 -#else /* CONFIG_XEN */
2784 -#ifdef CONFIG_BLK_DEV_INITRD
2785 - if (LOADER_TYPE && INITRD_START) {
2786 - if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
2787 - reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
2788 - initrd_start = INITRD_START + PAGE_OFFSET;
2789 - initrd_end = initrd_start+INITRD_SIZE;
2790 - }
2791 - else {
2792 + } else {
2793 printk(KERN_ERR "initrd extends beyond end of memory "
2794 - "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
2795 - (unsigned long)(INITRD_START + INITRD_SIZE),
2796 - (unsigned long)(end_pfn << PAGE_SHIFT));
2797 + "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
2798 + ramdisk_end, end_of_mem);
2799 initrd_start = 0;
2800 }
2801 }
2802 #endif
2803 -#endif /* !CONFIG_XEN */
2804 -#ifdef CONFIG_KEXEC
2805 -#ifdef CONFIG_XEN
2806 - xen_machine_kexec_setup_resources();
2807 -#else
2808 - if (crashk_res.start != crashk_res.end) {
2809 - reserve_bootmem_generic(crashk_res.start,
2810 - crashk_res.end - crashk_res.start + 1);
2811 - }
2812 -#endif
2813 -#endif
2814 -
2815 + reserve_crashkernel();
2816 paging_init();
2817 #ifdef CONFIG_X86_LOCAL_APIC
2818 /*
2819 @@ -796,7 +825,7 @@
2820 but in the same order as the HT nodeids.
2821 If that doesn't result in a usable node fall back to the
2822 path for the previous case. */
2823 - int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits);
2824 + int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits);
2825 if (ht_nodeid >= 0 &&
2826 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
2827 node = apicid_to_node[ht_nodeid];
2828 @@ -811,6 +840,39 @@
2829 #endif
2830 }
2831
2832 +#define ENABLE_C1E_MASK 0x18000000
2833 +#define CPUID_PROCESSOR_SIGNATURE 1
2834 +#define CPUID_XFAM 0x0ff00000
2835 +#define CPUID_XFAM_K8 0x00000000
2836 +#define CPUID_XFAM_10H 0x00100000
2837 +#define CPUID_XFAM_11H 0x00200000
2838 +#define CPUID_XMOD 0x000f0000
2839 +#define CPUID_XMOD_REV_F 0x00040000
2840 +
2841 +#ifndef CONFIG_XEN
2842 +/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
2843 +static __cpuinit int amd_apic_timer_broken(void)
2844 +{
2845 + u32 lo, hi;
2846 + u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
2847 + switch (eax & CPUID_XFAM) {
2848 + case CPUID_XFAM_K8:
2849 + if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
2850 + break;
2851 + case CPUID_XFAM_10H:
2852 + case CPUID_XFAM_11H:
2853 + rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
2854 + if (lo & ENABLE_C1E_MASK)
2855 + return 1;
2856 + break;
2857 + default:
2858 + /* err on the side of caution */
2859 + return 1;
2860 + }
2861 + return 0;
2862 +}
2863 +#endif
2864 +
2865 static void __cpuinit init_amd(struct cpuinfo_x86 *c)
2866 {
2867 unsigned level;
2868 @@ -840,7 +902,7 @@
2869 level = cpuid_eax(1);
2870 if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
2871 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
2872 - if (c->x86 == 0x10)
2873 + if (c->x86 == 0x10 || c->x86 == 0x11)
2874 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
2875
2876 /* Enable workaround for FXSAVE leak */
2877 @@ -882,6 +944,11 @@
2878 /* Family 10 doesn't support C states in MWAIT so don't use it */
2879 if (c->x86 == 0x10 && !force_mwait)
2880 clear_bit(X86_FEATURE_MWAIT, &c->x86_capability);
2881 +
2882 +#ifndef CONFIG_XEN
2883 + if (amd_apic_timer_broken())
2884 + disable_apic_timer = 1;
2885 +#endif
2886 }
2887
2888 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
2889 @@ -1192,6 +1259,7 @@
2890 static int show_cpuinfo(struct seq_file *m, void *v)
2891 {
2892 struct cpuinfo_x86 *c = v;
2893 + int cpu = 0;
2894
2895 /*
2896 * These flag bits must match the definitions in <asm/cpufeature.h>.
2897 @@ -1201,7 +1269,7 @@
2898 * applications want to get the raw CPUID data, they should access
2899 * /dev/cpu/<cpu_nr>/cpuid instead.
2900 */
2901 - static char *x86_cap_flags[] = {
2902 + static const char *const x86_cap_flags[] = {
2903 /* Intel-defined */
2904 "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
2905 "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
2906 @@ -1232,7 +1300,7 @@
2907 /* Intel-defined (#2) */
2908 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
2909 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
2910 - NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt",
2911 + NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
2912 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2913
2914 /* VIA/Cyrix/Centaur-defined */
2915 @@ -1242,10 +1310,10 @@
2916 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2917
2918 /* AMD-defined (#2) */
2919 - "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy",
2920 - "altmovcr8", "abm", "sse4a",
2921 - "misalignsse", "3dnowprefetch",
2922 - "osvw", "ibs", NULL, NULL, NULL, NULL,
2923 + "lahf_lm", "cmp_legacy", "svm", "extapic",
2924 + "cr8_legacy", "abm", "sse4a", "misalignsse",
2925 + "3dnowprefetch", "osvw", "ibs", "sse5",
2926 + "skinit", "wdt", NULL, NULL,
2927 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2928 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2929
2930 @@ -1255,7 +1323,7 @@
2931 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2932 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2933 };
2934 - static char *x86_power_flags[] = {
2935 + static const char *const x86_power_flags[] = {
2936 "ts", /* temperature sensor */
2937 "fid", /* frequency id control */
2938 "vid", /* voltage id control */
2939 @@ -1270,8 +1338,7 @@
2940
2941
2942 #ifdef CONFIG_SMP
2943 - if (!cpu_online(c-cpu_data))
2944 - return 0;
2945 + cpu = c->cpu_index;
2946 #endif
2947
2948 seq_printf(m,"processor\t: %u\n"
2949 @@ -1279,7 +1346,7 @@
2950 "cpu family\t: %d\n"
2951 "model\t\t: %d\n"
2952 "model name\t: %s\n",
2953 - (unsigned)(c-cpu_data),
2954 + (unsigned)cpu,
2955 c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
2956 c->x86,
2957 (int)c->x86_model,
2958 @@ -1291,7 +1358,7 @@
2959 seq_printf(m, "stepping\t: unknown\n");
2960
2961 if (cpu_has(c,X86_FEATURE_TSC)) {
2962 - unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
2963 + unsigned int freq = cpufreq_quick_get((unsigned)cpu);
2964 if (!freq)
2965 freq = cpu_khz;
2966 seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
2967 @@ -1304,9 +1371,9 @@
2968
2969 #ifdef CONFIG_SMP
2970 if (smp_num_siblings * c->x86_max_cores > 1) {
2971 - int cpu = c - cpu_data;
2972 seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
2973 - seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
2974 + seq_printf(m, "siblings\t: %d\n",
2975 + cpus_weight(per_cpu(cpu_core_map, cpu)));
2976 seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
2977 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
2978 }
2979 @@ -1361,12 +1428,16 @@
2980
2981 static void *c_start(struct seq_file *m, loff_t *pos)
2982 {
2983 - return *pos < NR_CPUS ? cpu_data + *pos : NULL;
2984 + if (*pos == 0) /* just in case, cpu 0 is not the first */
2985 + *pos = first_cpu(cpu_online_map);
2986 + if ((*pos) < NR_CPUS && cpu_online(*pos))
2987 + return &cpu_data(*pos);
2988 + return NULL;
2989 }
2990
2991 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
2992 {
2993 - ++*pos;
2994 + *pos = next_cpu(*pos, cpu_online_map);
2995 return c_start(m, pos);
2996 }
2997
2998 --- a/arch/x86/kernel/smp_32-xen.c
2999 +++ b/arch/x86/kernel/smp_32-xen.c
3000 @@ -72,7 +72,7 @@
3001 *
3002 * B stepping CPUs may hang. There are hardware work arounds
3003 * for this. We warn about it in case your board doesn't have the work
3004 - * arounds. Basically thats so I can tell anyone with a B stepping
3005 + * arounds. Basically that's so I can tell anyone with a B stepping
3006 * CPU and SMP problems "tough".
3007 *
3008 * Specific items [From Pentium Processor Specification Update]
3009 @@ -241,7 +241,7 @@
3010 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
3011 * Stop ipi delivery for the old mm. This is not synchronized with
3012 * the other cpus, but smp_invalidate_interrupt ignore flush ipis
3013 - * for the wrong mm, and in the worst case we perform a superflous
3014 + * for the wrong mm, and in the worst case we perform a superfluous
3015 * tlb flush.
3016 * 1a2) set cpu_tlbstate to TLBSTATE_OK
3017 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
3018 @@ -309,6 +309,7 @@
3019 smp_mb__after_clear_bit();
3020 out:
3021 put_cpu_no_resched();
3022 + __get_cpu_var(irq_stat).irq_tlb_count++;
3023
3024 return IRQ_HANDLED;
3025 }
3026 @@ -580,7 +581,7 @@
3027 */
3028 cpu_clear(smp_processor_id(), cpu_online_map);
3029 disable_all_local_evtchn();
3030 - if (cpu_data[smp_processor_id()].hlt_works_ok)
3031 + if (cpu_data(smp_processor_id()).hlt_works_ok)
3032 for(;;) halt();
3033 for (;;);
3034 }
3035 @@ -610,6 +611,7 @@
3036 */
3037 irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
3038 {
3039 + __get_cpu_var(irq_stat).irq_resched_count++;
3040
3041 return IRQ_HANDLED;
3042 }
3043 @@ -632,6 +634,7 @@
3044 */
3045 irq_enter();
3046 (*func)(info);
3047 + __get_cpu_var(irq_stat).irq_call_count++;
3048 irq_exit();
3049
3050 if (wait) {
3051 --- a/arch/x86/kernel/smp_64-xen.c
3052 +++ b/arch/x86/kernel/smp_64-xen.c
3053 @@ -167,6 +167,7 @@
3054 out:
3055 ack_APIC_irq();
3056 cpu_clear(cpu, f->flush_cpumask);
3057 + add_pda(irq_tlb_count, 1);
3058 }
3059
3060 static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
3061 @@ -326,17 +327,27 @@
3062 }
3063
3064 /*
3065 - * this function sends a 'generic call function' IPI to one other CPU
3066 - * in the system.
3067 - *
3068 - * cpu is a standard Linux logical CPU number.
3069 + * this function sends a 'generic call function' IPI to all other CPU
3070 + * of the system defined in the mask.
3071 */
3072 -static void
3073 -__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
3074 - int nonatomic, int wait)
3075 +
3076 +static int
3077 +__smp_call_function_mask(cpumask_t mask,
3078 + void (*func)(void *), void *info,
3079 + int wait)
3080 {
3081 struct call_data_struct data;
3082 - int cpus = 1;
3083 + cpumask_t allbutself;
3084 + int cpus;
3085 +
3086 + allbutself = cpu_online_map;
3087 + cpu_clear(smp_processor_id(), allbutself);
3088 +
3089 + cpus_and(mask, mask, allbutself);
3090 + cpus = cpus_weight(mask);
3091 +
3092 + if (!cpus)
3093 + return 0;
3094
3095 data.func = func;
3096 data.info = info;
3097 @@ -347,19 +358,55 @@
3098
3099 call_data = &data;
3100 wmb();
3101 - /* Send a message to all other CPUs and wait for them to respond */
3102 - send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
3103 +
3104 + /* Send a message to other CPUs */
3105 + if (cpus_equal(mask, allbutself))
3106 + send_IPI_allbutself(CALL_FUNCTION_VECTOR);
3107 + else
3108 + send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
3109
3110 /* Wait for response */
3111 while (atomic_read(&data.started) != cpus)
3112 cpu_relax();
3113
3114 if (!wait)
3115 - return;
3116 + return 0;
3117
3118 while (atomic_read(&data.finished) != cpus)
3119 cpu_relax();
3120 +
3121 + return 0;
3122 }
3123 +/**
3124 + * smp_call_function_mask(): Run a function on a set of other CPUs.
3125 + * @mask: The set of cpus to run on. Must not include the current cpu.
3126 + * @func: The function to run. This must be fast and non-blocking.
3127 + * @info: An arbitrary pointer to pass to the function.
3128 + * @wait: If true, wait (atomically) until function has completed on other CPUs.
3129 + *
3130 + * Returns 0 on success, else a negative status code.
3131 + *
3132 + * If @wait is true, then returns once @func has returned; otherwise
3133 + * it returns just before the target cpu calls @func.
3134 + *
3135 + * You must not call this function with disabled interrupts or from a
3136 + * hardware interrupt handler or from a bottom half handler.
3137 + */
3138 +int smp_call_function_mask(cpumask_t mask,
3139 + void (*func)(void *), void *info,
3140 + int wait)
3141 +{
3142 + int ret;
3143 +
3144 + /* Can deadlock when called with interrupts disabled */
3145 + WARN_ON(irqs_disabled());
3146 +
3147 + spin_lock(&call_lock);
3148 + ret = __smp_call_function_mask(mask, func, info, wait);
3149 + spin_unlock(&call_lock);
3150 + return ret;
3151 +}
3152 +EXPORT_SYMBOL(smp_call_function_mask);
3153
3154 /*
3155 * smp_call_function_single - Run a function on a specific CPU
3156 @@ -378,6 +425,7 @@
3157 int nonatomic, int wait)
3158 {
3159 /* prevent preemption and reschedule on another processor */
3160 + int ret;
3161 int me = get_cpu();
3162
3163 /* Can deadlock when called with interrupts disabled */
3164 @@ -391,51 +439,14 @@
3165 return 0;
3166 }
3167
3168 - spin_lock(&call_lock);
3169 - __smp_call_function_single(cpu, func, info, nonatomic, wait);
3170 - spin_unlock(&call_lock);
3171 + ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
3172 +
3173 put_cpu();
3174 - return 0;
3175 + return ret;
3176 }
3177 EXPORT_SYMBOL(smp_call_function_single);
3178
3179 /*
3180 - * this function sends a 'generic call function' IPI to all other CPUs
3181 - * in the system.
3182 - */
3183 -static void __smp_call_function (void (*func) (void *info), void *info,
3184 - int nonatomic, int wait)
3185 -{
3186 - struct call_data_struct data;
3187 - int cpus = num_online_cpus()-1;
3188 -
3189 - if (!cpus)
3190 - return;
3191 -
3192 - data.func = func;
3193 - data.info = info;
3194 - atomic_set(&data.started, 0);
3195 - data.wait = wait;
3196 - if (wait)
3197 - atomic_set(&data.finished, 0);
3198 -
3199 - call_data = &data;
3200 - wmb();
3201 - /* Send a message to all other CPUs and wait for them to respond */
3202 - send_IPI_allbutself(CALL_FUNCTION_VECTOR);
3203 -
3204 - /* Wait for response */
3205 - while (atomic_read(&data.started) != cpus)
3206 - cpu_relax();
3207 -
3208 - if (!wait)
3209 - return;
3210 -
3211 - while (atomic_read(&data.finished) != cpus)
3212 - cpu_relax();
3213 -}
3214 -
3215 -/*
3216 * smp_call_function - run a function on all other CPUs.
3217 * @func: The function to run. This must be fast and non-blocking.
3218 * @info: An arbitrary pointer to pass to the function.
3219 @@ -453,10 +464,7 @@
3220 int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
3221 int wait)
3222 {
3223 - spin_lock(&call_lock);
3224 - __smp_call_function(func,info,nonatomic,wait);
3225 - spin_unlock(&call_lock);
3226 - return 0;
3227 + return smp_call_function_mask(cpu_online_map, func, info, wait);
3228 }
3229 EXPORT_SYMBOL(smp_call_function);
3230
3231 @@ -485,7 +493,7 @@
3232 /* Don't deadlock on the call lock in panic */
3233 nolock = !spin_trylock(&call_lock);
3234 local_irq_save(flags);
3235 - __smp_call_function(stop_this_cpu, NULL, 0, 0);
3236 + __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0);
3237 if (!nolock)
3238 spin_unlock(&call_lock);
3239 disable_all_local_evtchn();
3240 @@ -505,7 +513,9 @@
3241 {
3242 #ifndef CONFIG_XEN
3243 ack_APIC_irq();
3244 -#else
3245 +#endif
3246 + add_pda(irq_resched_count, 1);
3247 +#ifdef CONFIG_XEN
3248 return IRQ_HANDLED;
3249 #endif
3250 }
3251 @@ -535,6 +545,7 @@
3252 exit_idle();
3253 irq_enter();
3254 (*func)(info);
3255 + add_pda(irq_call_count, 1);
3256 irq_exit();
3257 if (wait) {
3258 mb();
3259 --- a/arch/x86/kernel/time_32-xen.c
3260 +++ b/arch/x86/kernel/time_32-xen.c
3261 @@ -1,6 +1,4 @@
3262 /*
3263 - * linux/arch/i386/kernel/time.c
3264 - *
3265 * Copyright (C) 1991, 1992, 1995 Linus Torvalds
3266 *
3267 * This file contains the PC-specific time handling details:
3268 @@ -74,6 +72,7 @@
3269 #include <asm/arch_hooks.h>
3270
3271 #include <xen/evtchn.h>
3272 +#include <xen/sysctl.h>
3273 #include <xen/interface/vcpu.h>
3274
3275 #include <asm/i8253.h>
3276 @@ -539,6 +538,13 @@
3277 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
3278 struct vcpu_runstate_info runstate;
3279
3280 + /* Keep nmi watchdog up to date */
3281 +#ifdef __i386__
3282 + per_cpu(irq_stat, smp_processor_id()).irq0_irqs++;
3283 +#else
3284 + add_pda(irq0_irqs, 1);
3285 +#endif
3286 +
3287 /*
3288 * Here we are in the timer irq handler. We just have irqs locally
3289 * disabled but we don't know if the timer_bh is running on the other
3290 @@ -987,7 +993,7 @@
3291 struct cpufreq_freqs *freq = data;
3292 struct xen_platform_op op;
3293
3294 - if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC))
3295 + if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
3296 return 0;
3297
3298 if (val == CPUFREQ_PRECHANGE)
3299 @@ -1025,30 +1031,33 @@
3300 */
3301 static ctl_table xen_subtable[] = {
3302 {
3303 - .ctl_name = 1,
3304 + .ctl_name = CTL_XEN_INDEPENDENT_WALLCLOCK,
3305 .procname = "independent_wallclock",
3306 .data = &independent_wallclock,
3307 .maxlen = sizeof(independent_wallclock),
3308 .mode = 0644,
3309 + .strategy = sysctl_data,
3310 .proc_handler = proc_dointvec
3311 },
3312 {
3313 - .ctl_name = 2,
3314 + .ctl_name = CTL_XEN_PERMITTED_CLOCK_JITTER,
3315 .procname = "permitted_clock_jitter",
3316 .data = &permitted_clock_jitter,
3317 .maxlen = sizeof(permitted_clock_jitter),
3318 .mode = 0644,
3319 + .strategy = sysctl_data,
3320 .proc_handler = proc_doulongvec_minmax
3321 },
3322 - { 0 }
3323 + { }
3324 };
3325 static ctl_table xen_table[] = {
3326 {
3327 - .ctl_name = 123,
3328 + .ctl_name = CTL_XEN,
3329 .procname = "xen",
3330 .mode = 0555,
3331 - .child = xen_subtable},
3332 - { 0 }
3333 + .child = xen_subtable
3334 + },
3335 + { }
3336 };
3337 static int __init xen_sysctl_init(void)
3338 {
3339 --- a/arch/x86/kernel/traps_32-xen.c
3340 +++ b/arch/x86/kernel/traps_32-xen.c
3341 @@ -1,6 +1,4 @@
3342 /*
3343 - * linux/arch/i386/traps.c
3344 - *
3345 * Copyright (C) 1991, 1992 Linus Torvalds
3346 *
3347 * Pentium III FXSR, SSE support
3348 @@ -65,6 +63,11 @@
3349
3350 int panic_on_unrecovered_nmi;
3351
3352 +#ifndef CONFIG_XEN
3353 +DECLARE_BITMAP(used_vectors, NR_VECTORS);
3354 +EXPORT_SYMBOL_GPL(used_vectors);
3355 +#endif
3356 +
3357 asmlinkage int system_call(void);
3358
3359 /* Do we ignore FPU interrupts ? */
3360 @@ -120,7 +123,7 @@
3361
3362 static inline unsigned long print_context_stack(struct thread_info *tinfo,
3363 unsigned long *stack, unsigned long ebp,
3364 - struct stacktrace_ops *ops, void *data)
3365 + const struct stacktrace_ops *ops, void *data)
3366 {
3367 #ifdef CONFIG_FRAME_POINTER
3368 struct stack_frame *frame = (struct stack_frame *)ebp;
3369 @@ -157,7 +160,7 @@
3370
3371 void dump_trace(struct task_struct *task, struct pt_regs *regs,
3372 unsigned long *stack,
3373 - struct stacktrace_ops *ops, void *data)
3374 + const struct stacktrace_ops *ops, void *data)
3375 {
3376 unsigned long ebp = 0;
3377
3378 @@ -229,7 +232,7 @@
3379 touch_nmi_watchdog();
3380 }
3381
3382 -static struct stacktrace_ops print_trace_ops = {
3383 +static const struct stacktrace_ops print_trace_ops = {
3384 .warning = print_trace_warning,
3385 .warning_symbol = print_trace_warning_symbol,
3386 .stack = print_trace_stack,
3387 @@ -288,6 +291,11 @@
3388 {
3389 unsigned long stack;
3390
3391 + printk("Pid: %d, comm: %.20s %s %s %.*s\n",
3392 + current->pid, current->comm, print_tainted(),
3393 + init_utsname()->release,
3394 + (int)strcspn(init_utsname()->version, " "),
3395 + init_utsname()->version);
3396 show_trace(current, NULL, &stack);
3397 }
3398
3399 @@ -296,48 +304,24 @@
3400 void show_registers(struct pt_regs *regs)
3401 {
3402 int i;
3403 - int in_kernel = 1;
3404 - unsigned long esp;
3405 - unsigned short ss, gs;
3406 -
3407 - esp = (unsigned long) (&regs->esp);
3408 - savesegment(ss, ss);
3409 - savesegment(gs, gs);
3410 - if (user_mode_vm(regs)) {
3411 - in_kernel = 0;
3412 - esp = regs->esp;
3413 - ss = regs->xss & 0xffff;
3414 - }
3415 +
3416 print_modules();
3417 - printk(KERN_EMERG "CPU: %d\n"
3418 - KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n"
3419 - KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n",
3420 - smp_processor_id(), 0xffff & regs->xcs, regs->eip,
3421 - print_tainted(), regs->eflags, init_utsname()->release,
3422 - (int)strcspn(init_utsname()->version, " "),
3423 - init_utsname()->version);
3424 - print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip);
3425 - printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
3426 - regs->eax, regs->ebx, regs->ecx, regs->edx);
3427 - printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
3428 - regs->esi, regs->edi, regs->ebp, esp);
3429 - printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
3430 - regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
3431 + __show_registers(regs, 0);
3432 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
3433 - TASK_COMM_LEN, current->comm, current->pid,
3434 + TASK_COMM_LEN, current->comm, task_pid_nr(current),
3435 current_thread_info(), current, task_thread_info(current));
3436 /*
3437 * When in-kernel, we also print out the stack and code at the
3438 * time of the fault..
3439 */
3440 - if (in_kernel) {
3441 + if (!user_mode_vm(regs)) {
3442 u8 *eip;
3443 unsigned int code_prologue = code_bytes * 43 / 64;
3444 unsigned int code_len = code_bytes;
3445 unsigned char c;
3446
3447 printk("\n" KERN_EMERG "Stack: ");
3448 - show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
3449 + show_stack_log_lvl(NULL, regs, &regs->esp, KERN_EMERG);
3450
3451 printk(KERN_EMERG "Code: ");
3452
3453 @@ -382,11 +366,11 @@
3454 void die(const char * str, struct pt_regs * regs, long err)
3455 {
3456 static struct {
3457 - spinlock_t lock;
3458 + raw_spinlock_t lock;
3459 u32 lock_owner;
3460 int lock_owner_depth;
3461 } die = {
3462 - .lock = __SPIN_LOCK_UNLOCKED(die.lock),
3463 + .lock = __RAW_SPIN_LOCK_UNLOCKED,
3464 .lock_owner = -1,
3465 .lock_owner_depth = 0
3466 };
3467 @@ -397,40 +381,33 @@
3468
3469 if (die.lock_owner != raw_smp_processor_id()) {
3470 console_verbose();
3471 - spin_lock_irqsave(&die.lock, flags);
3472 + raw_local_irq_save(flags);
3473 + __raw_spin_lock(&die.lock);
3474 die.lock_owner = smp_processor_id();
3475 die.lock_owner_depth = 0;
3476 bust_spinlocks(1);
3477 - }
3478 - else
3479 - local_save_flags(flags);
3480 + } else
3481 + raw_local_irq_save(flags);
3482
3483 if (++die.lock_owner_depth < 3) {
3484 - int nl = 0;
3485 unsigned long esp;
3486 unsigned short ss;
3487
3488 report_bug(regs->eip, regs);
3489
3490 - printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
3491 + printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff,
3492 + ++die_counter);
3493 #ifdef CONFIG_PREEMPT
3494 - printk(KERN_EMERG "PREEMPT ");
3495 - nl = 1;
3496 + printk("PREEMPT ");
3497 #endif
3498 #ifdef CONFIG_SMP
3499 - if (!nl)
3500 - printk(KERN_EMERG);
3501 printk("SMP ");
3502 - nl = 1;
3503 #endif
3504 #ifdef CONFIG_DEBUG_PAGEALLOC
3505 - if (!nl)
3506 - printk(KERN_EMERG);
3507 printk("DEBUG_PAGEALLOC");
3508 - nl = 1;
3509 #endif
3510 - if (nl)
3511 - printk("\n");
3512 + printk("\n");
3513 +
3514 if (notify_die(DIE_OOPS, str, regs, err,
3515 current->thread.trap_no, SIGSEGV) !=
3516 NOTIFY_STOP) {
3517 @@ -454,7 +431,8 @@
3518 bust_spinlocks(0);
3519 die.lock_owner = -1;
3520 add_taint(TAINT_DIE);
3521 - spin_unlock_irqrestore(&die.lock, flags);
3522 + __raw_spin_unlock(&die.lock);
3523 + raw_local_irq_restore(flags);
3524
3525 if (!regs)
3526 return;
3527 @@ -571,6 +549,7 @@
3528 info.si_errno = 0; \
3529 info.si_code = sicode; \
3530 info.si_addr = (void __user *)siaddr; \
3531 + trace_hardirqs_fixup(); \
3532 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
3533 == NOTIFY_STOP) \
3534 return; \
3535 @@ -606,7 +585,7 @@
3536 printk_ratelimit())
3537 printk(KERN_INFO
3538 "%s[%d] general protection eip:%lx esp:%lx error:%lx\n",
3539 - current->comm, current->pid,
3540 + current->comm, task_pid_nr(current),
3541 regs->eip, regs->esp, error_code);
3542
3543 force_sig(SIGSEGV, current);
3544 @@ -785,6 +764,8 @@
3545 #ifdef CONFIG_KPROBES
3546 fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
3547 {
3548 + trace_hardirqs_fixup();
3549 +
3550 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
3551 == NOTIFY_STOP)
3552 return;
3553 @@ -822,6 +803,8 @@
3554 unsigned int condition;
3555 struct task_struct *tsk = current;
3556
3557 + trace_hardirqs_fixup();
3558 +
3559 get_debugreg(condition, 6);
3560
3561 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
3562 @@ -1084,20 +1067,6 @@
3563
3564 #endif /* CONFIG_MATH_EMULATION */
3565
3566 -#ifdef CONFIG_X86_F00F_BUG
3567 -void __init trap_init_f00f_bug(void)
3568 -{
3569 - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
3570 -
3571 - /*
3572 - * Update the IDT descriptor and reload the IDT so that
3573 - * it uses the read-only mapped virtual address.
3574 - */
3575 - idt_descr.address = fix_to_virt(FIX_F00F_IDT);
3576 - load_idt(&idt_descr);
3577 -}
3578 -#endif
3579 -
3580
3581 /*
3582 * NB. All these are "trap gates" (i.e. events_mask isn't set) except
3583 --- a/arch/x86/kernel/traps_64-xen.c
3584 +++ b/arch/x86/kernel/traps_64-xen.c
3585 @@ -1,6 +1,4 @@
3586 /*
3587 - * linux/arch/x86-64/traps.c
3588 - *
3589 * Copyright (C) 1991, 1992 Linus Torvalds
3590 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
3591 *
3592 @@ -33,6 +31,7 @@
3593 #include <linux/uaccess.h>
3594 #include <linux/bug.h>
3595 #include <linux/kdebug.h>
3596 +#include <linux/utsname.h>
3597
3598 #if defined(CONFIG_EDAC)
3599 #include <linux/edac.h>
3600 @@ -205,7 +204,7 @@
3601 #define MSG(txt) ops->warning(data, txt)
3602
3603 /*
3604 - * x86-64 can have upto three kernel stacks:
3605 + * x86-64 can have up to three kernel stacks:
3606 * process stack
3607 * interrupt stack
3608 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
3609 @@ -219,7 +218,7 @@
3610
3611 void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
3612 unsigned long *stack,
3613 - struct stacktrace_ops *ops, void *data)
3614 + const struct stacktrace_ops *ops, void *data)
3615 {
3616 const unsigned cpu = get_cpu();
3617 unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
3618 @@ -340,7 +339,7 @@
3619 printk_address(addr);
3620 }
3621
3622 -static struct stacktrace_ops print_trace_ops = {
3623 +static const struct stacktrace_ops print_trace_ops = {
3624 .warning = print_trace_warning,
3625 .warning_symbol = print_trace_warning_symbol,
3626 .stack = print_trace_stack,
3627 @@ -404,6 +403,12 @@
3628 void dump_stack(void)
3629 {
3630 unsigned long dummy;
3631 +
3632 + printk("Pid: %d, comm: %.20s %s %s %.*s\n",
3633 + current->pid, current->comm, print_tainted(),
3634 + init_utsname()->release,
3635 + (int)strcspn(init_utsname()->version, " "),
3636 + init_utsname()->version);
3637 show_trace(NULL, NULL, &dummy);
3638 }
3639
3640 @@ -466,7 +471,7 @@
3641 EXPORT_SYMBOL(out_of_line_bug);
3642 #endif
3643
3644 -static DEFINE_SPINLOCK(die_lock);
3645 +static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
3646 static int die_owner = -1;
3647 static unsigned int die_nest_count;
3648
3649 @@ -478,13 +483,13 @@
3650 oops_enter();
3651
3652 /* racy, but better than risking deadlock. */
3653 - local_irq_save(flags);
3654 + raw_local_irq_save(flags);
3655 cpu = smp_processor_id();
3656 - if (!spin_trylock(&die_lock)) {
3657 + if (!__raw_spin_trylock(&die_lock)) {
3658 if (cpu == die_owner)
3659 /* nested oops. should stop eventually */;
3660 else
3661 - spin_lock(&die_lock);
3662 + __raw_spin_lock(&die_lock);
3663 }
3664 die_nest_count++;
3665 die_owner = cpu;
3666 @@ -498,12 +503,10 @@
3667 die_owner = -1;
3668 bust_spinlocks(0);
3669 die_nest_count--;
3670 - if (die_nest_count)
3671 - /* We still own the lock */
3672 - local_irq_restore(flags);
3673 - else
3674 + if (!die_nest_count)
3675 /* Nest count reaches zero, release the lock. */
3676 - spin_unlock_irqrestore(&die_lock, flags);
3677 + __raw_spin_unlock(&die_lock);
3678 + raw_local_irq_restore(flags);
3679 if (panic_on_oops)
3680 panic("Fatal exception");
3681 oops_exit();
3682 @@ -636,6 +639,7 @@
3683 info.si_errno = 0; \
3684 info.si_code = sicode; \
3685 info.si_addr = (void __user *)siaddr; \
3686 + trace_hardirqs_fixup(); \
3687 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
3688 == NOTIFY_STOP) \
3689 return; \
3690 @@ -741,11 +745,8 @@
3691
3692 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
3693
3694 -#if 0 /* XEN */
3695 /* Clear and disable the memory parity error line. */
3696 - reason = (reason & 0xf) | 4;
3697 - outb(reason, 0x61);
3698 -#endif /* XEN */
3699 + clear_mem_error(reason);
3700 }
3701
3702 static __kprobes void
3703 @@ -754,14 +755,8 @@
3704 printk("NMI: IOCK error (debug interrupt?)\n");
3705 show_registers(regs);
3706
3707 -#if 0 /* XEN */
3708 /* Re-enable the IOCK line, wait for a few seconds */
3709 - reason = (reason & 0xf) | 8;
3710 - outb(reason, 0x61);
3711 - mdelay(2000);
3712 - reason &= ~8;
3713 - outb(reason, 0x61);
3714 -#endif /* XEN */
3715 + clear_io_check_error(reason);
3716 }
3717
3718 static __kprobes void
3719 @@ -821,6 +816,8 @@
3720 /* runs on IST stack. */
3721 asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code)
3722 {
3723 + trace_hardirqs_fixup();
3724 +
3725 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) {
3726 return;
3727 }
3728 @@ -858,6 +855,8 @@
3729 struct task_struct *tsk = current;
3730 siginfo_t info;
3731
3732 + trace_hardirqs_fixup();
3733 +
3734 get_debugreg(condition, 6);
3735
3736 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
3737 --- a/arch/x86/kernel/vsyscall_64-xen.c
3738 +++ b/arch/x86/kernel/vsyscall_64-xen.c
3739 @@ -1,6 +1,4 @@
3740 /*
3741 - * linux/arch/x86_64/kernel/vsyscall.c
3742 - *
3743 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
3744 * Copyright 2003 Andi Kleen, SuSE Labs.
3745 *
3746 @@ -50,12 +48,12 @@
3747 ({unsigned long v; \
3748 extern char __vsyscall_0; \
3749 asm("" : "=r" (v) : "0" (x)); \
3750 - ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); })
3751 + ((v - VSYSCALL_START) + __pa_symbol(&__vsyscall_0)); })
3752
3753 /*
3754 * vsyscall_gtod_data contains data that is :
3755 * - readonly from vsyscalls
3756 - * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
3757 + * - written by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
3758 * Try to keep this structure as small as possible to avoid cache line ping pongs
3759 */
3760 int __vgetcpu_mode __section_vgetcpu_mode;
3761 @@ -66,6 +64,16 @@
3762 .sysctl_enabled = 1,
3763 };
3764
3765 +void update_vsyscall_tz(void)
3766 +{
3767 + unsigned long flags;
3768 +
3769 + write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
3770 + /* sys_tz has changed */
3771 + vsyscall_gtod_data.sys_tz = sys_tz;
3772 + write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
3773 +}
3774 +
3775 void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
3776 {
3777 unsigned long flags;
3778 @@ -79,8 +87,6 @@
3779 vsyscall_gtod_data.clock.shift = clock->shift;
3780 vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
3781 vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
3782 - vsyscall_gtod_data.sys_tz = sys_tz;
3783 - vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
3784 vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
3785 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
3786 }
3787 @@ -166,7 +172,7 @@
3788 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
3789 return time_syscall(t);
3790
3791 - vgettimeofday(&tv, 0);
3792 + vgettimeofday(&tv, NULL);
3793 result = tv.tv_sec;
3794 if (t)
3795 *t = result;
3796 @@ -260,18 +266,10 @@
3797 return ret;
3798 }
3799
3800 -static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen,
3801 - void __user *oldval, size_t __user *oldlenp,
3802 - void __user *newval, size_t newlen)
3803 -{
3804 - return -ENOSYS;
3805 -}
3806 -
3807 static ctl_table kernel_table2[] = {
3808 - { .ctl_name = 99, .procname = "vsyscall64",
3809 + { .procname = "vsyscall64",
3810 .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
3811 .mode = 0644,
3812 - .strategy = vsyscall_sysctl_nostrat,
3813 .proc_handler = vsyscall_sysctl_change },
3814 {}
3815 };
3816 @@ -291,9 +289,9 @@
3817 unsigned long d;
3818 unsigned long node = 0;
3819 #ifdef CONFIG_NUMA
3820 - node = cpu_to_node[cpu];
3821 + node = cpu_to_node(cpu);
3822 #endif
3823 - if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP))
3824 + if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
3825 write_rdtscp_aux((node << 12) | cpu);
3826
3827 /* Store cpu number in limit so that it can be loaded quickly
3828 --- a/arch/x86/mm/fault_32-xen.c
3829 +++ b/arch/x86/mm/fault_32-xen.c
3830 @@ -25,6 +25,7 @@
3831 #include <linux/kprobes.h>
3832 #include <linux/uaccess.h>
3833 #include <linux/kdebug.h>
3834 +#include <linux/kprobes.h>
3835
3836 #include <asm/system.h>
3837 #include <asm/desc.h>
3838 @@ -32,33 +33,27 @@
3839
3840 extern void die(const char *,struct pt_regs *,long);
3841
3842 -static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
3843 -
3844 -int register_page_fault_notifier(struct notifier_block *nb)
3845 +#ifdef CONFIG_KPROBES
3846 +static inline int notify_page_fault(struct pt_regs *regs)
3847 {
3848 - vmalloc_sync_all();
3849 - return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
3850 -}
3851 -EXPORT_SYMBOL_GPL(register_page_fault_notifier);
3852 + int ret = 0;
3853
3854 -int unregister_page_fault_notifier(struct notifier_block *nb)
3855 -{
3856 - return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
3857 -}
3858 -EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
3859 + /* kprobe_running() needs smp_processor_id() */
3860 + if (!user_mode_vm(regs)) {
3861 + preempt_disable();
3862 + if (kprobe_running() && kprobe_fault_handler(regs, 14))
3863 + ret = 1;
3864 + preempt_enable();
3865 + }
3866
3867 -static inline int notify_page_fault(struct pt_regs *regs, long err)
3868 + return ret;
3869 +}
3870 +#else
3871 +static inline int notify_page_fault(struct pt_regs *regs)
3872 {
3873 - struct die_args args = {
3874 - .regs = regs,
3875 - .str = "page fault",
3876 - .err = err,
3877 - .trapnr = 14,
3878 - .signr = SIGSEGV
3879 - };
3880 - return atomic_notifier_call_chain(&notify_page_fault_chain,
3881 - DIE_PAGE_FAULT, &args);
3882 + return 0;
3883 }
3884 +#endif
3885
3886 /*
3887 * Return EIP plus the CS segment base. The segment limit is also
3888 @@ -110,7 +105,7 @@
3889 LDT and other horrors are only used in user space. */
3890 if (seg & (1<<2)) {
3891 /* Must lock the LDT while reading it. */
3892 - down(&current->mm->context.sem);
3893 + mutex_lock(&current->mm->context.lock);
3894 desc = current->mm->context.ldt;
3895 desc = (void *)desc + (seg & ~7);
3896 } else {
3897 @@ -123,7 +118,7 @@
3898 base = get_desc_base((unsigned long *)desc);
3899
3900 if (seg & (1<<2)) {
3901 - up(&current->mm->context.sem);
3902 + mutex_unlock(&current->mm->context.lock);
3903 } else
3904 put_cpu();
3905
3906 @@ -244,7 +239,7 @@
3907 if (mfn_to_pfn(mfn) >= highstart_pfn)
3908 return;
3909 #endif
3910 - if (p[0] & _PAGE_PRESENT) {
3911 + if ((p[0] & _PAGE_PRESENT) && !(p[0] & _PAGE_PSE)) {
3912 page = mfn_to_pfn(mfn) << PAGE_SHIFT;
3913 p = (unsigned long *) __va(page);
3914 address &= 0x001fffff;
3915 @@ -270,7 +265,8 @@
3916 * it's allocated already.
3917 */
3918 if ((machine_to_phys(page) >> PAGE_SHIFT) < max_low_pfn
3919 - && (page & _PAGE_PRESENT)) {
3920 + && (page & _PAGE_PRESENT)
3921 + && !(page & _PAGE_PSE)) {
3922 page = machine_to_phys(page & PAGE_MASK);
3923 page = ((unsigned long *) __va(page))[(address >> PAGE_SHIFT)
3924 & (PTRS_PER_PTE - 1)];
3925 @@ -413,6 +409,11 @@
3926 int write, si_code;
3927 int fault;
3928
3929 + /*
3930 + * We can fault from pretty much anywhere, with unknown IRQ state.
3931 + */
3932 + trace_hardirqs_fixup();
3933 +
3934 /* get the address */
3935 address = read_cr2();
3936
3937 @@ -450,7 +451,7 @@
3938 /* Can take a spurious fault if mapping changes R/O -> R/W. */
3939 if (spurious_fault(regs, address, error_code))
3940 return;
3941 - if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
3942 + if (notify_page_fault(regs))
3943 return;
3944 /*
3945 * Don't take the mm semaphore here. If we fixup a prefetch
3946 @@ -459,7 +460,7 @@
3947 goto bad_area_nosemaphore;
3948 }
3949
3950 - if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
3951 + if (notify_page_fault(regs))
3952 return;
3953
3954 /* It's safe to allow irq's after cr2 has been saved and the vmalloc
3955 @@ -478,7 +479,7 @@
3956
3957 /* When running in the kernel we expect faults to occur only to
3958 * addresses in user space. All other faults represent errors in the
3959 - * kernel and should generate an OOPS. Unfortunatly, in the case of an
3960 + * kernel and should generate an OOPS. Unfortunately, in the case of an
3961 * erroneous fault occurring in a code path which already holds mmap_sem
3962 * we will deadlock attempting to validate the fault against the
3963 * address space. Luckily the kernel only validly references user
3964 @@ -486,7 +487,7 @@
3965 * exceptions table.
3966 *
3967 * As the vast majority of faults will be valid we will only perform
3968 - * the source reference check when there is a possibilty of a deadlock.
3969 + * the source reference check when there is a possibility of a deadlock.
3970 * Attempt to lock the address space, if we cannot we then validate the
3971 * source. If this is invalid we can skip the address space check,
3972 * thus avoiding the deadlock.
3973 @@ -595,8 +596,8 @@
3974 printk_ratelimit()) {
3975 printk("%s%s[%d]: segfault at %08lx eip %08lx "
3976 "esp %08lx error %lx\n",
3977 - tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
3978 - tsk->comm, tsk->pid, address, regs->eip,
3979 + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
3980 + tsk->comm, task_pid_nr(tsk), address, regs->eip,
3981 regs->esp, error_code);
3982 }
3983 tsk->thread.cr2 = address;
3984 @@ -661,8 +662,7 @@
3985 printk(KERN_ALERT "BUG: unable to handle kernel paging"
3986 " request");
3987 printk(" at virtual address %08lx\n",address);
3988 - printk(KERN_ALERT " printing eip:\n");
3989 - printk("%08lx\n", regs->eip);
3990 + printk(KERN_ALERT "printing eip: %08lx\n", regs->eip);
3991 dump_fault_path(address);
3992 }
3993 tsk->thread.cr2 = address;
3994 @@ -678,14 +678,14 @@
3995 */
3996 out_of_memory:
3997 up_read(&mm->mmap_sem);
3998 - if (is_init(tsk)) {
3999 + if (is_global_init(tsk)) {
4000 yield();
4001 down_read(&mm->mmap_sem);
4002 goto survive;
4003 }
4004 printk("VM: killing process %s\n", tsk->comm);
4005 if (error_code & 4)
4006 - do_exit(SIGKILL);
4007 + do_group_exit(SIGKILL);
4008 goto no_context;
4009
4010 do_sigbus:
4011 --- a/arch/x86/mm/fault_64-xen.c
4012 +++ b/arch/x86/mm/fault_64-xen.c
4013 @@ -25,6 +25,7 @@
4014 #include <linux/kprobes.h>
4015 #include <linux/uaccess.h>
4016 #include <linux/kdebug.h>
4017 +#include <linux/kprobes.h>
4018
4019 #include <asm/system.h>
4020 #include <asm/pgalloc.h>
4021 @@ -40,34 +41,27 @@
4022 #define PF_RSVD (1<<3)
4023 #define PF_INSTR (1<<4)
4024
4025 -static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
4026 -
4027 -/* Hook to register for page fault notifications */
4028 -int register_page_fault_notifier(struct notifier_block *nb)
4029 +#ifdef CONFIG_KPROBES
4030 +static inline int notify_page_fault(struct pt_regs *regs)
4031 {
4032 - vmalloc_sync_all();
4033 - return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
4034 -}
4035 -EXPORT_SYMBOL_GPL(register_page_fault_notifier);
4036 + int ret = 0;
4037
4038 -int unregister_page_fault_notifier(struct notifier_block *nb)
4039 -{
4040 - return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
4041 -}
4042 -EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
4043 + /* kprobe_running() needs smp_processor_id() */
4044 + if (!user_mode(regs)) {
4045 + preempt_disable();
4046 + if (kprobe_running() && kprobe_fault_handler(regs, 14))
4047 + ret = 1;
4048 + preempt_enable();
4049 + }
4050
4051 -static inline int notify_page_fault(struct pt_regs *regs, long err)
4052 + return ret;
4053 +}
4054 +#else
4055 +static inline int notify_page_fault(struct pt_regs *regs)
4056 {
4057 - struct die_args args = {
4058 - .regs = regs,
4059 - .str = "page fault",
4060 - .err = err,
4061 - .trapnr = 14,
4062 - .signr = SIGSEGV
4063 - };
4064 - return atomic_notifier_call_chain(&notify_page_fault_chain,
4065 - DIE_PAGE_FAULT, &args);
4066 + return 0;
4067 }
4068 +#endif
4069
4070 /* Sometimes the CPU reports invalid exceptions on prefetch.
4071 Check that here and ignore.
4072 @@ -175,7 +169,7 @@
4073 pmd = pmd_offset(pud, address);
4074 if (bad_address(pmd)) goto bad;
4075 printk("PMD %lx ", pmd_val(*pmd));
4076 - if (!pmd_present(*pmd)) goto ret;
4077 + if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
4078
4079 pte = pte_offset_kernel(pmd, address);
4080 if (bad_address(pte)) goto bad;
4081 @@ -294,7 +288,6 @@
4082 return 0;
4083 }
4084
4085 -static int page_fault_trace;
4086 int show_unhandled_signals = 1;
4087
4088
4089 @@ -371,6 +364,11 @@
4090 if (!user_mode(regs))
4091 error_code &= ~PF_USER; /* means kernel */
4092
4093 + /*
4094 + * We can fault from pretty much anywhere, with unknown IRQ state.
4095 + */
4096 + trace_hardirqs_fixup();
4097 +
4098 tsk = current;
4099 mm = tsk->mm;
4100 prefetchw(&mm->mmap_sem);
4101 @@ -408,7 +406,7 @@
4102 /* Can take a spurious fault if mapping changes R/O -> R/W. */
4103 if (spurious_fault(regs, address, error_code))
4104 return;
4105 - if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
4106 + if (notify_page_fault(regs))
4107 return;
4108 /*
4109 * Don't take the mm semaphore here. If we fixup a prefetch
4110 @@ -417,16 +415,12 @@
4111 goto bad_area_nosemaphore;
4112 }
4113
4114 - if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
4115 + if (notify_page_fault(regs))
4116 return;
4117
4118 if (likely(regs->eflags & X86_EFLAGS_IF))
4119 local_irq_enable();
4120
4121 - if (unlikely(page_fault_trace))
4122 - printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
4123 - regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code);
4124 -
4125 if (unlikely(error_code & PF_RSVD))
4126 pgtable_bad(address, regs, error_code);
4127
4128 @@ -447,7 +441,7 @@
4129 again:
4130 /* When running in the kernel we expect faults to occur only to
4131 * addresses in user space. All other faults represent errors in the
4132 - * kernel and should generate an OOPS. Unfortunatly, in the case of an
4133 + * kernel and should generate an OOPS. Unfortunately, in the case of an
4134 * erroneous fault occurring in a code path which already holds mmap_sem
4135 * we will deadlock attempting to validate the fault against the
4136 * address space. Luckily the kernel only validly references user
4137 @@ -455,7 +449,7 @@
4138 * exceptions table.
4139 *
4140 * As the vast majority of faults will be valid we will only perform
4141 - * the source reference check when there is a possibilty of a deadlock.
4142 + * the source reference check when there is a possibility of a deadlock.
4143 * Attempt to lock the address space, if we cannot we then validate the
4144 * source. If this is invalid we can skip the address space check,
4145 * thus avoiding the deadlock.
4146 @@ -557,7 +551,7 @@
4147 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
4148 printk_ratelimit()) {
4149 printk(
4150 - "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
4151 + "%s%s[%d]: segfault at %lx rip %lx rsp %lx error %lx\n",
4152 tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
4153 tsk->comm, tsk->pid, address, regs->rip,
4154 regs->rsp, error_code);
4155 @@ -623,7 +617,7 @@
4156 */
4157 out_of_memory:
4158 up_read(&mm->mmap_sem);
4159 - if (is_init(current)) {
4160 + if (is_global_init(current)) {
4161 yield();
4162 goto again;
4163 }
4164 @@ -690,10 +684,3 @@
4165 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
4166 (__START_KERNEL & PGDIR_MASK)));
4167 }
4168 -
4169 -static int __init enable_pagefaulttrace(char *str)
4170 -{
4171 - page_fault_trace = 1;
4172 - return 1;
4173 -}
4174 -__setup("pagefaulttrace", enable_pagefaulttrace);
4175 --- a/arch/x86/mm/init_32-xen.c
4176 +++ b/arch/x86/mm/init_32-xen.c
4177 @@ -96,7 +96,14 @@
4178 #else
4179 if (!(__pmd_val(*pmd) & _PAGE_PRESENT)) {
4180 #endif
4181 - pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
4182 + pte_t *page_table = NULL;
4183 +
4184 +#ifdef CONFIG_DEBUG_PAGEALLOC
4185 + page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
4186 +#endif
4187 + if (!page_table)
4188 + page_table =
4189 + (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
4190
4191 paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT);
4192 make_lowmem_page_readonly(page_table,
4193 @@ -104,7 +111,7 @@
4194 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
4195 BUG_ON(page_table != pte_offset_kernel(pmd, 0));
4196 }
4197 -
4198 +
4199 return pte_offset_kernel(pmd, 0);
4200 }
4201
4202 @@ -362,8 +369,13 @@
4203 static void __init set_highmem_pages_init(int bad_ppro)
4204 {
4205 int pfn;
4206 - for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
4207 - add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
4208 + for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) {
4209 + /*
4210 + * Holes under sparsemem might not have no mem_map[]:
4211 + */
4212 + if (pfn_valid(pfn))
4213 + add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
4214 + }
4215 totalram_pages += totalhigh_pages;
4216 }
4217 #endif /* CONFIG_FLATMEM */
4218 @@ -786,35 +798,18 @@
4219 return __add_pages(zone, start_pfn, nr_pages);
4220 }
4221
4222 -int remove_memory(u64 start, u64 size)
4223 -{
4224 - return -EINVAL;
4225 -}
4226 -EXPORT_SYMBOL_GPL(remove_memory);
4227 #endif
4228
4229 struct kmem_cache *pmd_cache;
4230
4231 void __init pgtable_cache_init(void)
4232 {
4233 - size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t);
4234 -
4235 - if (PTRS_PER_PMD > 1) {
4236 + if (PTRS_PER_PMD > 1)
4237 pmd_cache = kmem_cache_create("pmd",
4238 - PTRS_PER_PMD*sizeof(pmd_t),
4239 - PTRS_PER_PMD*sizeof(pmd_t),
4240 - SLAB_PANIC,
4241 - pmd_ctor);
4242 - if (!SHARED_KERNEL_PMD) {
4243 - /* If we're in PAE mode and have a non-shared
4244 - kernel pmd, then the pgd size must be a
4245 - page size. This is because the pgd_list
4246 - links through the page structure, so there
4247 - can only be one pgd per page for this to
4248 - work. */
4249 - pgd_size = PAGE_SIZE;
4250 - }
4251 - }
4252 + PTRS_PER_PMD*sizeof(pmd_t),
4253 + PTRS_PER_PMD*sizeof(pmd_t),
4254 + SLAB_PANIC,
4255 + pmd_ctor);
4256 }
4257
4258 /*
4259 --- a/arch/x86/mm/init_64-xen.c
4260 +++ b/arch/x86/mm/init_64-xen.c
4261 @@ -761,7 +761,7 @@
4262 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
4263 This runs before bootmem is initialized and gets pages directly from the
4264 physical memory. To access them they are temporarily mapped. */
4265 -void __meminit init_memory_mapping(unsigned long start, unsigned long end)
4266 +void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
4267 {
4268 unsigned long next;
4269
4270 @@ -897,12 +897,6 @@
4271 }
4272 EXPORT_SYMBOL_GPL(arch_add_memory);
4273
4274 -int remove_memory(u64 start, u64 size)
4275 -{
4276 - return -EINVAL;
4277 -}
4278 -EXPORT_SYMBOL_GPL(remove_memory);
4279 -
4280 #if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
4281 int memory_add_physaddr_to_nid(u64 start)
4282 {
4283 @@ -1176,14 +1170,6 @@
4284 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
4285 }
4286
4287 -#ifndef CONFIG_XEN
4288 -void * __init alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
4289 -{
4290 - return __alloc_bootmem_core(pgdat->bdata, size,
4291 - SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0);
4292 -}
4293 -#endif
4294 -
4295 const char *arch_vma_name(struct vm_area_struct *vma)
4296 {
4297 if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
4298 @@ -1192,3 +1178,48 @@
4299 return "[vsyscall]";
4300 return NULL;
4301 }
4302 +
4303 +#ifdef CONFIG_SPARSEMEM_VMEMMAP
4304 +/*
4305 + * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
4306 + */
4307 +int __meminit vmemmap_populate(struct page *start_page,
4308 + unsigned long size, int node)
4309 +{
4310 + unsigned long addr = (unsigned long)start_page;
4311 + unsigned long end = (unsigned long)(start_page + size);
4312 + unsigned long next;
4313 + pgd_t *pgd;
4314 + pud_t *pud;
4315 + pmd_t *pmd;
4316 +
4317 + for (; addr < end; addr = next) {
4318 + next = pmd_addr_end(addr, end);
4319 +
4320 + pgd = vmemmap_pgd_populate(addr, node);
4321 + if (!pgd)
4322 + return -ENOMEM;
4323 + pud = vmemmap_pud_populate(pgd, addr, node);
4324 + if (!pud)
4325 + return -ENOMEM;
4326 +
4327 + pmd = pmd_offset(pud, addr);
4328 + if (pmd_none(*pmd)) {
4329 + pte_t entry;
4330 + void *p = vmemmap_alloc_block(PMD_SIZE, node);
4331 + if (!p)
4332 + return -ENOMEM;
4333 +
4334 + entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
4335 + mk_pte_huge(entry);
4336 + set_pmd(pmd, __pmd(pte_val(entry)));
4337 +
4338 + printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
4339 + addr, addr + PMD_SIZE - 1, p, node);
4340 + } else
4341 + vmemmap_verify((pte_t *)pmd, node, addr, next);
4342 + }
4343 +
4344 + return 0;
4345 +}
4346 +#endif
4347 --- a/arch/x86/mm/pageattr_64-xen.c
4348 +++ b/arch/x86/mm/pageattr_64-xen.c
4349 @@ -324,11 +324,11 @@
4350 return base;
4351 }
4352
4353 -static void cache_flush_page(void *adr)
4354 +void clflush_cache_range(void *adr, int size)
4355 {
4356 int i;
4357 - for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
4358 - asm volatile("clflush (%0)" :: "r" (adr + i));
4359 + for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
4360 + clflush(adr+i);
4361 }
4362
4363 static void flush_kernel_map(void *arg)
4364 @@ -343,7 +343,7 @@
4365 asm volatile("wbinvd" ::: "memory");
4366 else list_for_each_entry(pg, l, lru) {
4367 void *adr = page_address(pg);
4368 - cache_flush_page(adr);
4369 + clflush_cache_range(adr, PAGE_SIZE);
4370 }
4371 __flush_tlb_all();
4372 }
4373 @@ -411,6 +411,7 @@
4374 split = split_large_page(address, prot, ref_prot2);
4375 if (!split)
4376 return -ENOMEM;
4377 + pgprot_val(ref_prot2) &= ~_PAGE_NX;
4378 set_pte(kpte, mk_pte(split, ref_prot2));
4379 kpte_page = split;
4380 }
4381 @@ -503,9 +504,14 @@
4382 struct page *pg, *next;
4383 struct list_head l;
4384
4385 - down_read(&init_mm.mmap_sem);
4386 + /*
4387 + * Write-protect the semaphore, to exclude two contexts
4388 + * doing a list_replace_init() call in parallel and to
4389 + * exclude new additions to the deferred_pages list:
4390 + */
4391 + down_write(&init_mm.mmap_sem);
4392 list_replace_init(&deferred_pages, &l);
4393 - up_read(&init_mm.mmap_sem);
4394 + up_write(&init_mm.mmap_sem);
4395
4396 flush_map(&l);
4397
4398 --- a/arch/x86/mm/pgtable_32-xen.c
4399 +++ b/arch/x86/mm/pgtable_32-xen.c
4400 @@ -6,6 +6,7 @@
4401 #include <linux/kernel.h>
4402 #include <linux/errno.h>
4403 #include <linux/mm.h>
4404 +#include <linux/nmi.h>
4405 #include <linux/swap.h>
4406 #include <linux/smp.h>
4407 #include <linux/highmem.h>
4408 @@ -46,6 +47,8 @@
4409 for_each_online_pgdat(pgdat) {
4410 pgdat_resize_lock(pgdat, &flags);
4411 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
4412 + if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
4413 + touch_nmi_watchdog();
4414 page = pgdat_page_nr(pgdat, i);
4415 total++;
4416 if (PageHighMem(page))
4417 @@ -200,7 +203,7 @@
4418 __free_page(pte);
4419 }
4420
4421 -void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags)
4422 +void pmd_ctor(struct kmem_cache *cache, void *pmd)
4423 {
4424 memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
4425 }
4426 --- a/arch/x86/pci/Makefile
4427 +++ b/arch/x86/pci/Makefile
4428 @@ -3,3 +3,7 @@
4429 else
4430 include ${srctree}/arch/x86/pci/Makefile_64
4431 endif
4432 +
4433 +# pcifront should be after mmconfig.o and direct.o as it should only
4434 +# take over if direct access to the PCI bus is unavailable
4435 +obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += pcifront.o
4436 --- a/arch/x86/pci/Makefile_32
4437 +++ b/arch/x86/pci/Makefile_32
4438 @@ -4,10 +4,6 @@
4439 obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_32.o direct.o mmconfig-shared.o
4440 obj-$(CONFIG_PCI_DIRECT) += direct.o
4441
4442 -# pcifront should be after pcbios.o, mmconfig.o, and direct.o as it should only
4443 -# take over if direct access to the PCI bus is unavailable
4444 -obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += pcifront.o
4445 -
4446 pci-y := fixup.o
4447 pci-$(CONFIG_ACPI) += acpi.o
4448 pci-y += legacy.o irq.o
4449 --- a/arch/x86/pci/Makefile_64
4450 +++ b/arch/x86/pci/Makefile_64
4451 @@ -15,7 +15,3 @@
4452
4453 obj-$(CONFIG_NUMA) += k8-bus_64.o
4454
4455 -# pcifront should be after mmconfig.o and direct.o as it should only
4456 -# take over if direct access to the PCI bus is unavailable
4457 -obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += pcifront.o
4458 -
4459 --- a/arch/x86/pci/irq-xen.c
4460 +++ b/arch/x86/pci/irq-xen.c
4461 @@ -173,7 +173,7 @@
4462 }
4463
4464 /*
4465 - * Common IRQ routing practice: nybbles in config space,
4466 + * Common IRQ routing practice: nibbles in config space,
4467 * offset by some magic constant.
4468 */
4469 static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr)
4470 @@ -496,6 +496,26 @@
4471 return 1;
4472 }
4473
4474 +/*
4475 + * PicoPower PT86C523
4476 + */
4477 +static int pirq_pico_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
4478 +{
4479 + outb(0x10 + ((pirq - 1) >> 1), 0x24);
4480 + return ((pirq - 1) & 1) ? (inb(0x26) >> 4) : (inb(0x26) & 0xf);
4481 +}
4482 +
4483 +static int pirq_pico_set(struct pci_dev *router, struct pci_dev *dev, int pirq,
4484 + int irq)
4485 +{
4486 + unsigned int x;
4487 + outb(0x10 + ((pirq - 1) >> 1), 0x24);
4488 + x = inb(0x26);
4489 + x = ((pirq - 1) & 1) ? ((x & 0x0f) | (irq << 4)) : ((x & 0xf0) | (irq));
4490 + outb(x, 0x26);
4491 + return 1;
4492 +}
4493 +
4494 #ifdef CONFIG_PCI_BIOS
4495
4496 static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
4497 @@ -569,7 +589,7 @@
4498 /* FIXME: We should move some of the quirk fixup stuff here */
4499
4500 /*
4501 - * work arounds for some buggy BIOSes
4502 + * workarounds for some buggy BIOSes
4503 */
4504 if (device == PCI_DEVICE_ID_VIA_82C586_0) {
4505 switch(router->device) {
4506 @@ -725,6 +745,24 @@
4507 return 1;
4508 }
4509
4510 +static __init int pico_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
4511 +{
4512 + switch (device) {
4513 + case PCI_DEVICE_ID_PICOPOWER_PT86C523:
4514 + r->name = "PicoPower PT86C523";
4515 + r->get = pirq_pico_get;
4516 + r->set = pirq_pico_set;
4517 + return 1;
4518 +
4519 + case PCI_DEVICE_ID_PICOPOWER_PT86C523BBP:
4520 + r->name = "PicoPower PT86C523 rev. BB+";
4521 + r->get = pirq_pico_get;
4522 + r->set = pirq_pico_set;
4523 + return 1;
4524 + }
4525 + return 0;
4526 +}
4527 +
4528 static __initdata struct irq_router_handler pirq_routers[] = {
4529 { PCI_VENDOR_ID_INTEL, intel_router_probe },
4530 { PCI_VENDOR_ID_AL, ali_router_probe },
4531 @@ -736,6 +774,7 @@
4532 { PCI_VENDOR_ID_VLSI, vlsi_router_probe },
4533 { PCI_VENDOR_ID_SERVERWORKS, serverworks_router_probe },
4534 { PCI_VENDOR_ID_AMD, amd_router_probe },
4535 + { PCI_VENDOR_ID_PICOPOWER, pico_router_probe },
4536 /* Someone with docs needs to add the ATI Radeon IGP */
4537 { 0, NULL }
4538 };
4539 @@ -1014,7 +1053,7 @@
4540 * Work around broken HP Pavilion Notebooks which assign USB to
4541 * IRQ 9 even though it is actually wired to IRQ 11
4542 */
4543 -static int __init fix_broken_hp_bios_irq9(struct dmi_system_id *d)
4544 +static int __init fix_broken_hp_bios_irq9(const struct dmi_system_id *d)
4545 {
4546 if (!broken_hp_bios_irq9) {
4547 broken_hp_bios_irq9 = 1;
4548 @@ -1027,7 +1066,7 @@
4549 * Work around broken Acer TravelMate 360 Notebooks which assign
4550 * Cardbus to IRQ 11 even though it is actually wired to IRQ 10
4551 */
4552 -static int __init fix_acer_tm360_irqrouting(struct dmi_system_id *d)
4553 +static int __init fix_acer_tm360_irqrouting(const struct dmi_system_id *d)
4554 {
4555 if (!acer_tm360_irqrouting) {
4556 acer_tm360_irqrouting = 1;
4557 --- a/drivers/xen/blkback/blkback.c
4558 +++ b/drivers/xen/blkback/blkback.c
4559 @@ -269,13 +269,10 @@
4560 }
4561 }
4562
4563 -static int end_block_io_op(struct bio *bio, unsigned int done, int error)
4564 +static void end_block_io_op(struct bio *bio, int error)
4565 {
4566 - if (bio->bi_size != 0)
4567 - return 1;
4568 __end_block_io_op(bio->bi_private, error);
4569 bio_put(bio);
4570 - return error;
4571 }
4572
4573
4574 --- a/drivers/xen/blkfront/blkfront.c
4575 +++ b/drivers/xen/blkfront/blkfront.c
4576 @@ -573,9 +573,8 @@
4577 struct blkfront_info *info = req->rq_disk->private_data;
4578 unsigned long buffer_mfn;
4579 blkif_request_t *ring_req;
4580 - struct bio *bio;
4581 struct bio_vec *bvec;
4582 - int idx;
4583 + struct req_iterator iter;
4584 unsigned long id;
4585 unsigned int fsect, lsect;
4586 int ref;
4587 @@ -609,34 +608,32 @@
4588 ring_req->operation = BLKIF_OP_WRITE_BARRIER;
4589
4590 ring_req->nr_segments = 0;
4591 - rq_for_each_bio (bio, req) {
4592 - bio_for_each_segment (bvec, bio, idx) {
4593 - BUG_ON(ring_req->nr_segments
4594 - == BLKIF_MAX_SEGMENTS_PER_REQUEST);
4595 - buffer_mfn = page_to_phys(bvec->bv_page) >> PAGE_SHIFT;
4596 - fsect = bvec->bv_offset >> 9;
4597 - lsect = fsect + (bvec->bv_len >> 9) - 1;
4598 - /* install a grant reference. */
4599 - ref = gnttab_claim_grant_reference(&gref_head);
4600 - BUG_ON(ref == -ENOSPC);
4601 -
4602 - gnttab_grant_foreign_access_ref(
4603 - ref,
4604 - info->xbdev->otherend_id,
4605 - buffer_mfn,
4606 - rq_data_dir(req) ? GTF_readonly : 0 );
4607 -
4608 - info->shadow[id].frame[ring_req->nr_segments] =
4609 - mfn_to_pfn(buffer_mfn);
4610 -
4611 - ring_req->seg[ring_req->nr_segments] =
4612 - (struct blkif_request_segment) {
4613 - .gref = ref,
4614 - .first_sect = fsect,
4615 - .last_sect = lsect };
4616 + rq_for_each_segment(bvec, req, iter) {
4617 + BUG_ON(ring_req->nr_segments
4618 + == BLKIF_MAX_SEGMENTS_PER_REQUEST);
4619 + buffer_mfn = page_to_phys(bvec->bv_page) >> PAGE_SHIFT;
4620 + fsect = bvec->bv_offset >> 9;
4621 + lsect = fsect + (bvec->bv_len >> 9) - 1;
4622 + /* install a grant reference. */
4623 + ref = gnttab_claim_grant_reference(&gref_head);
4624 + BUG_ON(ref == -ENOSPC);
4625 +
4626 + gnttab_grant_foreign_access_ref(
4627 + ref,
4628 + info->xbdev->otherend_id,
4629 + buffer_mfn,
4630 + rq_data_dir(req) ? GTF_readonly : 0 );
4631 +
4632 + info->shadow[id].frame[ring_req->nr_segments] =
4633 + mfn_to_pfn(buffer_mfn);
4634 +
4635 + ring_req->seg[ring_req->nr_segments] =
4636 + (struct blkif_request_segment) {
4637 + .gref = ref,
4638 + .first_sect = fsect,
4639 + .last_sect = lsect };
4640
4641 - ring_req->nr_segments++;
4642 - }
4643 + ring_req->nr_segments++;
4644 }
4645
4646 info->ring.req_prod_pvt++;
4647 --- a/drivers/xen/core/machine_kexec.c
4648 +++ b/drivers/xen/core/machine_kexec.c
4649 @@ -25,6 +25,10 @@
4650 struct resource *res;
4651 int k = 0;
4652
4653 + if (strstr(boot_command_line, "crashkernel="))
4654 + printk(KERN_WARNING "Ignoring crashkernel command line, "
4655 + "parameter will be supplied by xen\n");
4656 +
4657 if (!is_initial_xendomain())
4658 return;
4659
4660 --- a/drivers/xen/core/smpboot.c
4661 +++ b/drivers/xen/core/smpboot.c
4662 @@ -45,8 +45,8 @@
4663 EXPORT_SYMBOL(cpu_possible_map);
4664 cpumask_t cpu_initialized_map;
4665
4666 -struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
4667 -EXPORT_SYMBOL(cpu_data);
4668 +DEFINE_PER_CPU(struct cpuinfo_x86, cpu_info);
4669 +EXPORT_PER_CPU_SYMBOL(cpu_info);
4670
4671 #ifdef CONFIG_HOTPLUG_CPU
4672 DEFINE_PER_CPU(int, cpu_state) = { 0 };
4673 @@ -59,13 +59,13 @@
4674
4675 u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
4676
4677 -cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
4678 -cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
4679 -EXPORT_SYMBOL(cpu_core_map);
4680 +DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
4681 +DEFINE_PER_CPU(cpumask_t, cpu_core_map);
4682 +EXPORT_PER_CPU_SYMBOL(cpu_core_map);
4683
4684 #if defined(__i386__)
4685 -u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff };
4686 -EXPORT_SYMBOL(x86_cpu_to_apicid);
4687 +DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
4688 +EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
4689 #endif
4690
4691 void __init prefill_possible_map(void)
4692 @@ -90,25 +90,25 @@
4693 static inline void
4694 set_cpu_sibling_map(unsigned int cpu)
4695 {
4696 - cpu_data[cpu].phys_proc_id = cpu;
4697 - cpu_data[cpu].cpu_core_id = 0;
4698 + cpu_data(cpu).phys_proc_id = cpu;
4699 + cpu_data(cpu).cpu_core_id = 0;
4700
4701 - cpu_sibling_map[cpu] = cpumask_of_cpu(cpu);
4702 - cpu_core_map[cpu] = cpumask_of_cpu(cpu);
4703 + per_cpu(cpu_sibling_map, cpu) = cpumask_of_cpu(cpu);
4704 + per_cpu(cpu_core_map, cpu) = cpumask_of_cpu(cpu);
4705
4706 - cpu_data[cpu].booted_cores = 1;
4707 + cpu_data(cpu).booted_cores = 1;
4708 }
4709
4710 static void
4711 remove_siblinginfo(unsigned int cpu)
4712 {
4713 - cpu_data[cpu].phys_proc_id = BAD_APICID;
4714 - cpu_data[cpu].cpu_core_id = BAD_APICID;
4715 + cpu_data(cpu).phys_proc_id = BAD_APICID;
4716 + cpu_data(cpu).cpu_core_id = BAD_APICID;
4717
4718 - cpus_clear(cpu_sibling_map[cpu]);
4719 - cpus_clear(cpu_core_map[cpu]);
4720 + cpus_clear(per_cpu(cpu_sibling_map, cpu));
4721 + cpus_clear(per_cpu(cpu_core_map, cpu));
4722
4723 - cpu_data[cpu].booted_cores = 0;
4724 + cpu_data(cpu).booted_cores = 0;
4725 }
4726
4727 static int __cpuinit xen_smp_intr_init(unsigned int cpu)
4728 @@ -167,9 +167,9 @@
4729 {
4730 cpu_init();
4731 #ifdef __i386__
4732 - identify_secondary_cpu(cpu_data + smp_processor_id());
4733 + identify_secondary_cpu(&current_cpu_data);
4734 #else
4735 - identify_cpu(cpu_data + smp_processor_id());
4736 + identify_cpu(&current_cpu_data);
4737 #endif
4738 touch_softlockup_watchdog();
4739 preempt_disable();
4740 @@ -270,16 +270,16 @@
4741 if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0)
4742 apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
4743 boot_cpu_data.apicid = apicid;
4744 - cpu_data[0] = boot_cpu_data;
4745 + cpu_data(0) = boot_cpu_data;
4746
4747 cpu_2_logical_apicid[0] = apicid;
4748 - x86_cpu_to_apicid[0] = apicid;
4749 + per_cpu(x86_cpu_to_apicid, 0) = apicid;
4750
4751 current_thread_info()->cpu = 0;
4752
4753 for (cpu = 0; cpu < NR_CPUS; cpu++) {
4754 - cpus_clear(cpu_sibling_map[cpu]);
4755 - cpus_clear(cpu_core_map[cpu]);
4756 + cpus_clear(per_cpu(cpu_sibling_map, cpu));
4757 + cpus_clear(per_cpu(cpu_core_map, cpu));
4758 }
4759
4760 set_cpu_sibling_map(0);
4761 @@ -324,11 +324,12 @@
4762 apicid = cpu;
4763 if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0)
4764 apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
4765 - cpu_data[cpu] = boot_cpu_data;
4766 - cpu_data[cpu].apicid = apicid;
4767 + cpu_data(cpu) = boot_cpu_data;
4768 + cpu_data(cpu).cpu_index = cpu;
4769 + cpu_data(cpu).apicid = apicid;
4770
4771 cpu_2_logical_apicid[cpu] = apicid;
4772 - x86_cpu_to_apicid[cpu] = apicid;
4773 + per_cpu(x86_cpu_to_apicid, cpu) = apicid;
4774
4775 #ifdef __x86_64__
4776 cpu_pda(cpu)->pcurrent = idle;
4777 --- a/drivers/xen/netback/loopback.c
4778 +++ b/drivers/xen/netback/loopback.c
4779 @@ -285,9 +285,9 @@
4780 char dev_name[IFNAMSIZ];
4781
4782 sprintf(dev_name, "vif0.%d", i);
4783 - dev1 = dev_get_by_name(dev_name);
4784 + dev1 = dev_get_by_name(&init_net, dev_name);
4785 sprintf(dev_name, "veth%d", i);
4786 - dev2 = dev_get_by_name(dev_name);
4787 + dev2 = dev_get_by_name(&init_net, dev_name);
4788 if (dev1 && dev2) {
4789 unregister_netdev(dev2);
4790 unregister_netdev(dev1);
4791 --- a/drivers/xen/netback/netback.c
4792 +++ b/drivers/xen/netback/netback.c
4793 @@ -335,8 +335,8 @@
4794 {
4795 static struct net_device *eth0_dev = NULL;
4796 if (unlikely(eth0_dev == NULL))
4797 - eth0_dev = __dev_get_by_name("eth0");
4798 - netif_rx_schedule(eth0_dev);
4799 + eth0_dev = __dev_get_by_name(&init_net, "eth0");
4800 + netif_rx_schedule(eth0_dev, ???);
4801 }
4802 /*
4803 * Add following to poll() function in NAPI driver (Tigon3 is example):
4804 --- a/drivers/xen/netback/xenbus.c
4805 +++ b/drivers/xen/netback/xenbus.c
4806 @@ -148,12 +148,10 @@
4807 * and vif variables to the environment, for the benefit of the vif-* hotplug
4808 * scripts.
4809 */
4810 -static int netback_uevent(struct xenbus_device *xdev, char **envp,
4811 - int num_envp, char *buffer, int buffer_size)
4812 +static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
4813 {
4814 struct backend_info *be = xdev->dev.driver_data;
4815 netif_t *netif = be->netif;
4816 - int i = 0, length = 0;
4817 char *val;
4818
4819 DPRINTK("netback_uevent");
4820 @@ -165,15 +163,11 @@
4821 return err;
4822 }
4823 else {
4824 - add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
4825 - &length, "script=%s", val);
4826 + add_uevent_var(env, "script=%s", val);
4827 kfree(val);
4828 }
4829
4830 - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
4831 - "vif=%s", netif->dev->name);
4832 -
4833 - envp[i] = NULL;
4834 + add_uevent_var(env, "vif=%s", netif->dev->name);
4835
4836 return 0;
4837 }
4838 --- a/drivers/xen/netfront/accel.c
4839 +++ b/drivers/xen/netfront/accel.c
4840 @@ -325,13 +325,13 @@
4841 DPRINTK("%p\n",vif_state);
4842
4843 /* Make sure there are no data path operations going on */
4844 - netif_poll_disable(vif_state->np->netdev);
4845 + napi_disable(&vif_state->np->napi);
4846 netif_tx_lock_bh(vif_state->np->netdev);
4847
4848 vif_state->hooks = vif_state->np->accelerator->hooks;
4849
4850 netif_tx_unlock_bh(vif_state->np->netdev);
4851 - netif_poll_enable(vif_state->np->netdev);
4852 + napi_enable(&vif_state->np->napi);
4853 }
4854
4855
4856 @@ -509,7 +509,7 @@
4857 struct netfront_accel_vif_state *vif_state)
4858 {
4859 /* Make sure there are no data path operations going on */
4860 - netif_poll_disable(vif_state->np->netdev);
4861 + napi_disable(&vif_state->np->napi);
4862 netif_tx_lock_bh(vif_state->np->netdev);
4863
4864 /*
4865 @@ -521,7 +521,7 @@
4866 vif_state->hooks = NULL;
4867
4868 netif_tx_unlock_bh(vif_state->np->netdev);
4869 - netif_poll_enable(vif_state->np->netdev);
4870 + napi_enable(&vif_state->np->napi);
4871 }
4872
4873
4874 --- a/drivers/xen/netfront/netfront.c
4875 +++ b/drivers/xen/netfront/netfront.c
4876 @@ -634,7 +634,7 @@
4877 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)){
4878 netfront_accelerator_call_stop_napi_irq(np, dev);
4879
4880 - netif_rx_schedule(dev);
4881 + netif_rx_schedule(dev, &np->napi);
4882 }
4883 }
4884 spin_unlock_bh(&np->rx_lock);
4885 @@ -706,7 +706,7 @@
4886
4887 netfront_accelerator_call_stop_napi_irq(np, dev);
4888
4889 - netif_rx_schedule(dev);
4890 + netif_rx_schedule(dev, &np->napi);
4891 }
4892
4893 static void network_alloc_rx_buffers(struct net_device *dev)
4894 @@ -1063,7 +1063,7 @@
4895 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) {
4896 netfront_accelerator_call_stop_napi_irq(np, dev);
4897
4898 - netif_rx_schedule(dev);
4899 + netif_rx_schedule(dev, &np->napi);
4900 dev->last_rx = jiffies;
4901 }
4902 }
4903 @@ -1316,16 +1316,17 @@
4904 #endif
4905 }
4906
4907 -static int netif_poll(struct net_device *dev, int *pbudget)
4908 +static int netif_poll(struct napi_struct *napi, int budget)
4909 {
4910 - struct netfront_info *np = netdev_priv(dev);
4911 + struct netfront_info *np = container_of(napi, struct netfront_info, napi);
4912 + struct net_device *dev = np->netdev;
4913 struct sk_buff *skb;
4914 struct netfront_rx_info rinfo;
4915 struct netif_rx_response *rx = &rinfo.rx;
4916 struct netif_extra_info *extras = rinfo.extras;
4917 RING_IDX i, rp;
4918 struct multicall_entry *mcl;
4919 - int work_done, budget, more_to_do = 1, accel_more_to_do = 1;
4920 + int work_done, more_to_do = 1, accel_more_to_do = 1;
4921 struct sk_buff_head rxq;
4922 struct sk_buff_head errq;
4923 struct sk_buff_head tmpq;
4924 @@ -1345,8 +1346,6 @@
4925 skb_queue_head_init(&errq);
4926 skb_queue_head_init(&tmpq);
4927
4928 - if ((budget = *pbudget) > dev->quota)
4929 - budget = dev->quota;
4930 rp = np->rx.sring->rsp_prod;
4931 rmb(); /* Ensure we see queued responses up to 'rp'. */
4932
4933 @@ -1508,9 +1507,6 @@
4934 accel_more_to_do = 0;
4935 }
4936
4937 - *pbudget -= work_done;
4938 - dev->quota -= work_done;
4939 -
4940 if (work_done < budget) {
4941 local_irq_save(flags);
4942
4943 @@ -1527,14 +1523,14 @@
4944 }
4945
4946 if (!more_to_do && !accel_more_to_do)
4947 - __netif_rx_complete(dev);
4948 + __netif_rx_complete(dev, napi);
4949
4950 local_irq_restore(flags);
4951 }
4952
4953 spin_unlock(&np->rx_lock);
4954
4955 - return more_to_do | accel_more_to_do;
4956 + return work_done;
4957 }
4958
4959 static void netif_release_tx_bufs(struct netfront_info *np)
4960 @@ -2089,16 +2085,14 @@
4961 netdev->hard_start_xmit = network_start_xmit;
4962 netdev->stop = network_close;
4963 netdev->get_stats = network_get_stats;
4964 - netdev->poll = netif_poll;
4965 + netif_napi_add(netdev, &np->napi, netif_poll, 64);
4966 netdev->set_multicast_list = network_set_multicast_list;
4967 netdev->uninit = netif_uninit;
4968 netdev->set_mac_address = xennet_set_mac_address;
4969 netdev->change_mtu = xennet_change_mtu;
4970 - netdev->weight = 64;
4971 netdev->features = NETIF_F_IP_CSUM;
4972
4973 SET_ETHTOOL_OPS(netdev, &network_ethtool_ops);
4974 - SET_MODULE_OWNER(netdev);
4975 SET_NETDEV_DEV(netdev, &dev->dev);
4976
4977 np->netdev = netdev;
4978 --- a/drivers/xen/netfront/netfront.h
4979 +++ b/drivers/xen/netfront/netfront.h
4980 @@ -157,6 +157,8 @@
4981 spinlock_t tx_lock;
4982 spinlock_t rx_lock;
4983
4984 + struct napi_struct napi;
4985 +
4986 unsigned int irq;
4987 unsigned int copying_receiver;
4988 unsigned int carrier;
4989 --- a/drivers/xen/pciback/Makefile
4990 +++ b/drivers/xen/pciback/Makefile
4991 @@ -11,6 +11,4 @@
4992 pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
4993 pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o
4994
4995 -ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y)
4996 -EXTRA_CFLAGS += -DDEBUG
4997 -endif
4998 +ccflags-$(CONFIG_XEN_PCIDEV_BE_DEBUG) += -DDEBUG
4999 --- a/drivers/xen/pcifront/Makefile
5000 +++ b/drivers/xen/pcifront/Makefile
5001 @@ -2,6 +2,4 @@
5002
5003 pcifront-y := pci_op.o xenbus.o pci.o
5004
5005 -ifeq ($(CONFIG_XEN_PCIDEV_FE_DEBUG),y)
5006 -EXTRA_CFLAGS += -DDEBUG
5007 -endif
5008 +ccflags-$(CONFIG_XEN_PCIDEV_FE_DEBUG) += -DDEBUG
5009 --- a/drivers/xen/sfc_netback/accel_fwd.c
5010 +++ b/drivers/xen/sfc_netback/accel_fwd.c
5011 @@ -181,10 +181,11 @@
5012 unsigned long flags;
5013 cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac);
5014 struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
5015 + DECLARE_MAC_BUF(buf);
5016
5017 BUG_ON(fwd_priv == NULL);
5018
5019 - DPRINTK("Adding mac " MAC_FMT "\n", MAC_ARG(mac));
5020 + DPRINTK("Adding mac " %s "\n", print_mac(buf, mac));
5021
5022 spin_lock_irqsave(&fwd_set->fwd_lock, flags);
5023
5024 @@ -235,8 +236,9 @@
5025 unsigned long flags;
5026 cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac);
5027 struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
5028 + DECLARE_MAC_BUF(buf);
5029
5030 - DPRINTK("Removing mac " MAC_FMT "\n", MAC_ARG(mac));
5031 + DPRINTK("Removing mac " %s "\n", print_mac(buf, mac));
5032
5033 BUG_ON(fwd_priv == NULL);
5034
5035 @@ -394,14 +396,16 @@
5036
5037 if (is_broadcast_ether_addr(skb_mac_header(skb))
5038 && packet_is_arp_reply(skb)) {
5039 + DECLARE_MAC_BUF(buf);
5040 +
5041 /*
5042 * update our fast path forwarding to reflect this
5043 * gratuitous ARP
5044 */
5045 mac = skb_mac_header(skb)+ETH_ALEN;
5046
5047 - DPRINTK("%s: found gratuitous ARP for " MAC_FMT "\n",
5048 - __FUNCTION__, MAC_ARG(mac));
5049 + DPRINTK("%s: found gratuitous ARP for " %s "\n",
5050 + __FUNCTION__, print_mac(buf, mac));
5051
5052 spin_lock_irqsave(&fwd_set->fwd_lock, flags);
5053 /*
5054 --- a/drivers/xen/sfc_netback/accel_msg.c
5055 +++ b/drivers/xen/sfc_netback/accel_msg.c
5056 @@ -57,11 +57,11 @@
5057 {
5058 unsigned long lock_state;
5059 struct net_accel_msg *msg;
5060 + DECLARE_MAC_BUF(buf);
5061
5062 BUG_ON(bend == NULL || mac == NULL);
5063
5064 - VPRINTK("Sending local mac message: " MAC_FMT "\n",
5065 - MAC_ARG((const char *)mac));
5066 + VPRINTK("Sending local mac message: " %s "\n", print_mac(buf, mac));
5067
5068 msg = net_accel_msg_start_send(bend->shared_page, &bend->to_domU,
5069 &lock_state);
5070 --- a/drivers/xen/sfc_netfront/accel_msg.c
5071 +++ b/drivers/xen/sfc_netfront/accel_msg.c
5072 @@ -41,11 +41,13 @@
5073 /* Prime our interrupt */
5074 spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
5075 if (!netfront_accel_vi_enable_interrupts(vnic)) {
5076 + struct netfront_info *np = netdev_priv(vnic->net_dev);
5077 +
5078 /* Cripes, that was quick, better pass it up */
5079 netfront_accel_disable_net_interrupts(vnic);
5080 vnic->irq_enabled = 0;
5081 NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_schedule_count++);
5082 - netif_rx_schedule(vnic->net_dev);
5083 + netif_rx_schedule(vnic->net_dev, &np->napi);
5084 } else {
5085 /*
5086 * Nothing yet, make sure we get interrupts through
5087 @@ -72,6 +74,7 @@
5088 static void vnic_start_fastpath(netfront_accel_vnic *vnic)
5089 {
5090 struct net_device *net_dev = vnic->net_dev;
5091 + struct netfront_info *np = netdev_priv(net_dev);
5092 unsigned long flags;
5093
5094 DPRINTK("%s\n", __FUNCTION__);
5095 @@ -80,9 +83,9 @@
5096 vnic->tx_enabled = 1;
5097 spin_unlock_irqrestore(&vnic->tx_lock, flags);
5098
5099 - netif_poll_disable(net_dev);
5100 + napi_disable(&np->napi);
5101 vnic->poll_enabled = 1;
5102 - netif_poll_enable(net_dev);
5103 + napi_enable(&np->napi);
5104
5105 vnic_start_interrupts(vnic);
5106 }
5107 @@ -114,11 +117,11 @@
5108 spin_unlock_irqrestore(&vnic->tx_lock, flags1);
5109
5110 /* Must prevent polls and hold lock to modify poll_enabled */
5111 - netif_poll_disable(net_dev);
5112 + napi_disable(&np->napi);
5113 spin_lock_irqsave(&vnic->irq_enabled_lock, flags1);
5114 vnic->poll_enabled = 0;
5115 spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags1);
5116 - netif_poll_enable(net_dev);
5117 + napi_enable(&np->napi);
5118 }
5119
5120
5121 @@ -326,8 +329,10 @@
5122 cuckoo_hash_mac_key key;
5123
5124 if (msg->u.localmac.flags & NET_ACCEL_MSG_ADD) {
5125 - DPRINTK("MAC has moved, could be local: " MAC_FMT "\n",
5126 - MAC_ARG(msg->u.localmac.mac));
5127 + DECLARE_MAC_BUF(buf);
5128 +
5129 + DPRINTK("MAC has moved, could be local: " %s "\n",
5130 + print_mac(buf, msg->u.localmac.mac));
5131 key = cuckoo_mac_to_key(msg->u.localmac.mac);
5132 spin_lock_irqsave(&vnic->table_lock, flags);
5133 /* Try to remove it, not a big deal if not there */
5134 @@ -515,6 +520,8 @@
5135
5136 spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
5137 if (vnic->irq_enabled) {
5138 + struct netfront_info *np = netdev_priv(net_dev);
5139 +
5140 netfront_accel_disable_net_interrupts(vnic);
5141 vnic->irq_enabled = 0;
5142 spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
5143 @@ -527,7 +534,7 @@
5144 vnic->stats.event_count_since_irq;
5145 vnic->stats.event_count_since_irq = 0;
5146 #endif
5147 - netif_rx_schedule(net_dev);
5148 + netif_rx_schedule(net_dev, &np->napi);
5149 }
5150 else {
5151 spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
5152 --- a/drivers/xen/sfc_netfront/accel_vi.c
5153 +++ b/drivers/xen/sfc_netfront/accel_vi.c
5154 @@ -641,8 +641,10 @@
5155 (cuckoo_hash_key *)(&key), &value);
5156
5157 if (!try_fastpath) {
5158 - VPRINTK("try fast path false for mac: " MAC_FMT "\n",
5159 - MAC_ARG(skb->data));
5160 + DECLARE_MAC_BUF(buf);
5161 +
5162 + VPRINTK("try fast path false for mac: " %s "\n",
5163 + print_mac(buf, skb->data));
5164
5165 return NETFRONT_ACCEL_STATUS_CANT;
5166 }
5167 @@ -768,9 +770,10 @@
5168 if (compare_ether_addr(skb->data, vnic->mac)) {
5169 struct iphdr *ip = (struct iphdr *)(skb->data + ETH_HLEN);
5170 u16 port;
5171 + DECLARE_MAC_BUF(buf);
5172
5173 - DPRINTK("%s: saw wrong MAC address " MAC_FMT "\n",
5174 - __FUNCTION__, MAC_ARG(skb->data));
5175 + DPRINTK("%s: saw wrong MAC address " %s "\n",
5176 + __FUNCTION__, print_mac(buf, skb->data));
5177
5178 if (ip->protocol == IPPROTO_TCP) {
5179 struct tcphdr *tcp = (struct tcphdr *)
5180 --- a/drivers/xen/sfc_netutil/accel_util.h
5181 +++ b/drivers/xen/sfc_netutil/accel_util.h
5182 @@ -63,9 +63,6 @@
5183 DPRINTK("%s at %s:%d\n", #exp, __FILE__, __LINE__); \
5184 } while(0)
5185
5186 -#define MAC_FMT "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x"
5187 -#define MAC_ARG(_mac) (_mac)[0], (_mac)[1], (_mac)[2], (_mac)[3], (_mac)[4], (_mac)[5]
5188 -
5189 #include <xen/xenbus.h>
5190
5191 /*! Map a set of pages from another domain
5192 --- a/drivers/xen/xenbus/xenbus_probe.c
5193 +++ b/drivers/xen/xenbus/xenbus_probe.c
5194 @@ -174,11 +174,9 @@
5195 }
5196
5197 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) && (defined(CONFIG_XEN) || defined(MODULE))
5198 -static int xenbus_uevent_frontend(struct device *dev, char **envp,
5199 - int num_envp, char *buffer, int buffer_size)
5200 +static int xenbus_uevent_frontend(struct device *dev, struct kobj_uevent_env *env)
5201 {
5202 struct xenbus_device *xdev;
5203 - int length = 0, i = 0;
5204
5205 if (dev == NULL)
5206 return -ENODEV;
5207 @@ -187,12 +185,9 @@
5208 return -ENODEV;
5209
5210 /* stuff we want to pass to /sbin/hotplug */
5211 - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
5212 - "XENBUS_TYPE=%s", xdev->devicetype);
5213 - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
5214 - "XENBUS_PATH=%s", xdev->nodename);
5215 - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
5216 - "MODALIAS=xen:%s", xdev->devicetype);
5217 + add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype);
5218 + add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename);
5219 + add_uevent_var(env, "MODALIAS=xen:%s", xdev->devicetype);
5220
5221 return 0;
5222 }
5223 --- a/drivers/xen/xenbus/xenbus_probe_backend.c
5224 +++ b/drivers/xen/xenbus/xenbus_probe_backend.c
5225 @@ -60,8 +60,7 @@
5226 #include <xen/platform-compat.h>
5227 #endif
5228
5229 -static int xenbus_uevent_backend(struct device *dev, char **envp,
5230 - int num_envp, char *buffer, int buffer_size);
5231 +static int xenbus_uevent_backend(struct device *dev, struct kobj_uevent_env *env);
5232 static int xenbus_probe_backend(const char *type, const char *domid);
5233
5234 extern int read_otherend_details(struct xenbus_device *xendev,
5235 @@ -128,13 +127,10 @@
5236 },
5237 };
5238
5239 -static int xenbus_uevent_backend(struct device *dev, char **envp,
5240 - int num_envp, char *buffer, int buffer_size)
5241 +static int xenbus_uevent_backend(struct device *dev, struct kobj_uevent_env *env)
5242 {
5243 struct xenbus_device *xdev;
5244 struct xenbus_driver *drv;
5245 - int i = 0;
5246 - int length = 0;
5247
5248 DPRINTK("");
5249
5250 @@ -146,27 +142,16 @@
5251 return -ENODEV;
5252
5253 /* stuff we want to pass to /sbin/hotplug */
5254 - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
5255 - "XENBUS_TYPE=%s", xdev->devicetype);
5256 + add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype);
5257
5258 - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
5259 - "XENBUS_PATH=%s", xdev->nodename);
5260 + add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename);
5261
5262 - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
5263 - "XENBUS_BASE_PATH=%s", xenbus_backend.root);
5264 -
5265 - /* terminate, set to next free slot, shrink available space */
5266 - envp[i] = NULL;
5267 - envp = &envp[i];
5268 - num_envp -= i;
5269 - buffer = &buffer[length];
5270 - buffer_size -= length;
5271 + add_uevent_var(env, "XENBUS_BASE_PATH=%s", xenbus_backend.root);
5272
5273 if (dev->driver) {
5274 drv = to_xenbus_driver(dev->driver);
5275 if (drv && drv->uevent)
5276 - return drv->uevent(xdev, envp, num_envp, buffer,
5277 - buffer_size);
5278 + return drv->uevent(xdev, env);
5279 }
5280
5281 return 0;
5282 --- a/fs/xfs/linux-2.6/xfs_buf.c
5283 +++ b/fs/xfs/linux-2.6/xfs_buf.c
5284 @@ -187,7 +187,7 @@
5285 {
5286 a_list_t *aentry;
5287
5288 -#ifdef CONFIG_XEN
5289 +#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN)
5290 /*
5291 * Xen needs to be able to make sure it can get an exclusive
5292 * RO mapping of pages it wants to turn into a pagetable. If
5293 --- a/include/asm-x86/mach-xen/asm/agp.h
5294 +++ b/include/asm-x86/mach-xen/asm/agp.h
5295 @@ -1,20 +1,22 @@
5296 -#ifndef AGP_H
5297 -#define AGP_H 1
5298 +#ifndef _ASM_X86_AGP_H
5299 +#define _ASM_X86_AGP_H
5300
5301 #include <asm/pgtable.h>
5302 #include <asm/cacheflush.h>
5303 #include <asm/system.h>
5304
5305 -/*
5306 - * Functions to keep the agpgart mappings coherent with the MMU.
5307 - * The GART gives the CPU a physical alias of pages in memory. The alias region is
5308 - * mapped uncacheable. Make sure there are no conflicting mappings
5309 - * with different cachability attributes for the same page. This avoids
5310 - * data corruption on some CPUs.
5311 +/*
5312 + * Functions to keep the agpgart mappings coherent with the MMU. The
5313 + * GART gives the CPU a physical alias of pages in memory. The alias
5314 + * region is mapped uncacheable. Make sure there are no conflicting
5315 + * mappings with different cachability attributes for the same
5316 + * page. This avoids data corruption on some CPUs.
5317 */
5318
5319 -/* Caller's responsibility to call global_flush_tlb() for
5320 - * performance reasons */
5321 +/*
5322 + * Caller's responsibility to call global_flush_tlb() for performance
5323 + * reasons
5324 + */
5325 #define map_page_into_agp(page) ( \
5326 xen_create_contiguous_region((unsigned long)page_address(page), 0, 32) \
5327 ?: change_page_attr(page, 1, PAGE_KERNEL_NOCACHE))
5328 @@ -24,9 +26,11 @@
5329 change_page_attr(page, 1, PAGE_KERNEL))
5330 #define flush_agp_mappings() global_flush_tlb()
5331
5332 -/* Could use CLFLUSH here if the cpu supports it. But then it would
5333 - need to be called for each cacheline of the whole page so it may not be
5334 - worth it. Would need a page for it. */
5335 +/*
5336 + * Could use CLFLUSH here if the cpu supports it. But then it would
5337 + * need to be called for each cacheline of the whole page so it may
5338 + * not be worth it. Would need a page for it.
5339 + */
5340 #define flush_agp_cache() wbinvd()
5341
5342 /* Convert a physical address to an address suitable for the GART. */
5343 --- /dev/null
5344 +++ b/include/asm-x86/mach-xen/asm/desc.h
5345 @@ -0,0 +1,5 @@
5346 +#ifdef CONFIG_X86_32
5347 +# include "desc_32.h"
5348 +#else
5349 +# include "desc_64.h"
5350 +#endif
5351 --- a/include/asm-x86/mach-xen/asm/desc_64.h
5352 +++ b/include/asm-x86/mach-xen/asm/desc_64.h
5353 @@ -34,6 +34,18 @@
5354 put_cpu();
5355 }
5356
5357 +#ifndef CONFIG_X86_NO_TSS
5358 +static inline unsigned long __store_tr(void)
5359 +{
5360 + unsigned long tr;
5361 +
5362 + asm volatile ("str %w0":"=r" (tr));
5363 + return tr;
5364 +}
5365 +
5366 +#define store_tr(tr) (tr) = __store_tr()
5367 +#endif
5368 +
5369 /*
5370 * This is the ldt that every process will get unless we need
5371 * something other than this.
5372 @@ -47,6 +59,18 @@
5373 /* the cpu gdt accessor */
5374 #define cpu_gdt(_cpu) ((struct desc_struct *)cpu_gdt_descr[_cpu].address)
5375
5376 +#ifndef CONFIG_XEN
5377 +static inline void load_gdt(const struct desc_ptr *ptr)
5378 +{
5379 + asm volatile("lgdt %w0"::"m" (*ptr));
5380 +}
5381 +
5382 +static inline void store_gdt(struct desc_ptr *ptr)
5383 +{
5384 + asm("sgdt %w0":"=m" (*ptr));
5385 +}
5386 +#endif
5387 +
5388 static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist)
5389 {
5390 struct gate_struct s;
5391 @@ -87,6 +111,16 @@
5392 {
5393 _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist);
5394 }
5395 +
5396 +static inline void load_idt(const struct desc_ptr *ptr)
5397 +{
5398 + asm volatile("lidt %w0"::"m" (*ptr));
5399 +}
5400 +
5401 +static inline void store_idt(struct desc_ptr *dtr)
5402 +{
5403 + asm("sidt %w0":"=m" (*dtr));
5404 +}
5405 #endif
5406
5407 static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type,
5408 --- /dev/null
5409 +++ b/include/asm-x86/mach-xen/asm/dma-mapping.h
5410 @@ -0,0 +1,5 @@
5411 +#ifdef CONFIG_X86_32
5412 +# include "dma-mapping_32.h"
5413 +#else
5414 +# include "dma-mapping_64.h"
5415 +#endif
5416 --- a/include/asm-x86/mach-xen/asm/dma-mapping_32.h
5417 +++ b/include/asm-x86/mach-xen/asm/dma-mapping_32.h
5418 @@ -7,9 +7,9 @@
5419 */
5420
5421 #include <linux/mm.h>
5422 +#include <linux/scatterlist.h>
5423 #include <asm/cache.h>
5424 #include <asm/io.h>
5425 -#include <asm/scatterlist.h>
5426 #include <asm/swiotlb.h>
5427
5428 static inline int
5429 --- a/include/asm-x86/mach-xen/asm/dma-mapping_64.h
5430 +++ b/include/asm-x86/mach-xen/asm/dma-mapping_64.h
5431 @@ -6,8 +6,7 @@
5432 * documentation.
5433 */
5434
5435 -
5436 -#include <asm/scatterlist.h>
5437 +#include <linux/scatterlist.h>
5438 #include <asm/io.h>
5439
5440 struct dma_mapping_ops {
5441 @@ -203,4 +202,4 @@
5442
5443 #endif /* _X8664_DMA_MAPPING_H */
5444
5445 -#include <asm-i386/mach-xen/asm/dma-mapping.h>
5446 +#include "dma-mapping_32.h"
5447 --- /dev/null
5448 +++ b/include/asm-x86/mach-xen/asm/e820.h
5449 @@ -0,0 +1,33 @@
5450 +#ifndef __ASM_E820_H
5451 +#define __ASM_E820_H
5452 +#define E820MAP 0x2d0 /* our map */
5453 +#define E820MAX 128 /* number of entries in E820MAP */
5454 +#define E820NR 0x1e8 /* # entries in E820MAP */
5455 +
5456 +#define E820_RAM 1
5457 +#define E820_RESERVED 2
5458 +#define E820_ACPI 3
5459 +#define E820_NVS 4
5460 +
5461 +#ifndef __ASSEMBLY__
5462 +struct e820entry {
5463 + __u64 addr; /* start of memory segment */
5464 + __u64 size; /* size of memory segment */
5465 + __u32 type; /* type of memory segment */
5466 +} __attribute__((packed));
5467 +
5468 +struct e820map {
5469 + __u32 nr_map;
5470 + struct e820entry map[E820MAX];
5471 +};
5472 +#endif /* __ASSEMBLY__ */
5473 +
5474 +#ifdef __KERNEL__
5475 +#ifdef CONFIG_X86_32
5476 +# include "../../e820_32.h"
5477 +#else
5478 +# include "e820_64.h"
5479 +#endif
5480 +#endif /* __KERNEL__ */
5481 +
5482 +#endif /* __ASM_E820_H */
5483 --- a/include/asm-x86/mach-xen/asm/e820_64.h
5484 +++ b/include/asm-x86/mach-xen/asm/e820_64.h
5485 @@ -11,27 +11,7 @@
5486 #ifndef __E820_HEADER
5487 #define __E820_HEADER
5488
5489 -#define E820MAP 0x2d0 /* our map */
5490 -#define E820MAX 128 /* number of entries in E820MAP */
5491 -#define E820NR 0x1e8 /* # entries in E820MAP */
5492 -
5493 -#define E820_RAM 1
5494 -#define E820_RESERVED 2
5495 -#define E820_ACPI 3
5496 -#define E820_NVS 4
5497 -
5498 #ifndef __ASSEMBLY__
5499 -struct e820entry {
5500 - u64 addr; /* start of memory segment */
5501 - u64 size; /* size of memory segment */
5502 - u32 type; /* type of memory segment */
5503 -} __attribute__((packed));
5504 -
5505 -struct e820map {
5506 - u32 nr_map;
5507 - struct e820entry map[E820MAX];
5508 -};
5509 -
5510 extern unsigned long find_e820_area(unsigned long start, unsigned long end,
5511 unsigned size);
5512 extern void add_memory_region(unsigned long start, unsigned long size,
5513 --- /dev/null
5514 +++ b/include/asm-x86/mach-xen/asm/fixmap.h
5515 @@ -0,0 +1,5 @@
5516 +#ifdef CONFIG_X86_32
5517 +# include "fixmap_32.h"
5518 +#else
5519 +# include "fixmap_64.h"
5520 +#endif
5521 --- /dev/null
5522 +++ b/include/asm-x86/mach-xen/asm/hw_irq.h
5523 @@ -0,0 +1,5 @@
5524 +#ifdef CONFIG_X86_32
5525 +# include "hw_irq_32.h"
5526 +#else
5527 +# include "hw_irq_64.h"
5528 +#endif
5529 --- a/include/asm-x86/mach-xen/asm/hw_irq_64.h
5530 +++ b/include/asm-x86/mach-xen/asm/hw_irq_64.h
5531 @@ -41,22 +41,22 @@
5532 /*
5533 * Vectors 0x30-0x3f are used for ISA interrupts.
5534 */
5535 -#define IRQ0_VECTOR FIRST_EXTERNAL_VECTOR + 0x10
5536 -#define IRQ1_VECTOR IRQ0_VECTOR + 1
5537 -#define IRQ2_VECTOR IRQ0_VECTOR + 2
5538 -#define IRQ3_VECTOR IRQ0_VECTOR + 3
5539 -#define IRQ4_VECTOR IRQ0_VECTOR + 4
5540 -#define IRQ5_VECTOR IRQ0_VECTOR + 5
5541 -#define IRQ6_VECTOR IRQ0_VECTOR + 6
5542 -#define IRQ7_VECTOR IRQ0_VECTOR + 7
5543 -#define IRQ8_VECTOR IRQ0_VECTOR + 8
5544 -#define IRQ9_VECTOR IRQ0_VECTOR + 9
5545 -#define IRQ10_VECTOR IRQ0_VECTOR + 10
5546 -#define IRQ11_VECTOR IRQ0_VECTOR + 11
5547 -#define IRQ12_VECTOR IRQ0_VECTOR + 12
5548 -#define IRQ13_VECTOR IRQ0_VECTOR + 13
5549 -#define IRQ14_VECTOR IRQ0_VECTOR + 14
5550 -#define IRQ15_VECTOR IRQ0_VECTOR + 15
5551 +#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10)
5552 +#define IRQ1_VECTOR (IRQ0_VECTOR + 1)
5553 +#define IRQ2_VECTOR (IRQ0_VECTOR + 2)
5554 +#define IRQ3_VECTOR (IRQ0_VECTOR + 3)
5555 +#define IRQ4_VECTOR (IRQ0_VECTOR + 4)
5556 +#define IRQ5_VECTOR (IRQ0_VECTOR + 5)
5557 +#define IRQ6_VECTOR (IRQ0_VECTOR + 6)
5558 +#define IRQ7_VECTOR (IRQ0_VECTOR + 7)
5559 +#define IRQ8_VECTOR (IRQ0_VECTOR + 8)
5560 +#define IRQ9_VECTOR (IRQ0_VECTOR + 9)
5561 +#define IRQ10_VECTOR (IRQ0_VECTOR + 10)
5562 +#define IRQ11_VECTOR (IRQ0_VECTOR + 11)
5563 +#define IRQ12_VECTOR (IRQ0_VECTOR + 12)
5564 +#define IRQ13_VECTOR (IRQ0_VECTOR + 13)
5565 +#define IRQ14_VECTOR (IRQ0_VECTOR + 14)
5566 +#define IRQ15_VECTOR (IRQ0_VECTOR + 15)
5567
5568 /*
5569 * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
5570 @@ -150,9 +150,6 @@
5571
5572 #define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
5573
5574 -#define __STR(x) #x
5575 -#define STR(x) __STR(x)
5576 -
5577 #include <asm/ptrace.h>
5578
5579 #define IRQ_NAME2(nr) nr##_interrupt(void)
5580 --- /dev/null
5581 +++ b/include/asm-x86/mach-xen/asm/hypercall.h
5582 @@ -0,0 +1,5 @@
5583 +#ifdef CONFIG_X86_32
5584 +# include "hypercall_32.h"
5585 +#else
5586 +# include "hypercall_64.h"
5587 +#endif
5588 --- /dev/null
5589 +++ b/include/asm-x86/mach-xen/asm/io.h
5590 @@ -0,0 +1,5 @@
5591 +#ifdef CONFIG_X86_32
5592 +# include "io_32.h"
5593 +#else
5594 +# include "io_64.h"
5595 +#endif
5596 --- a/include/asm-x86/mach-xen/asm/io_32.h
5597 +++ b/include/asm-x86/mach-xen/asm/io_32.h
5598 @@ -212,17 +212,22 @@
5599
5600 #define mmiowb()
5601
5602 -static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
5603 +static inline void
5604 +memset_io(volatile void __iomem *addr, unsigned char val, int count)
5605 {
5606 - memset((void __force *) addr, val, count);
5607 + memset((void __force *)addr, val, count);
5608 }
5609 -static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
5610 +
5611 +static inline void
5612 +memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
5613 {
5614 - __memcpy(dst, (void __force *) src, count);
5615 + __memcpy(dst, (const void __force *)src, count);
5616 }
5617 -static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int count)
5618 +
5619 +static inline void
5620 +memcpy_toio(volatile void __iomem *dst, const void *src, int count)
5621 {
5622 - __memcpy((void __force *) dst, src, count);
5623 + __memcpy((void __force *)dst, src, count);
5624 }
5625
5626 /*
5627 @@ -250,18 +255,9 @@
5628 __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory");
5629 }
5630
5631 -#define dma_cache_inv(_start,_size) flush_write_buffers()
5632 -#define dma_cache_wback(_start,_size) flush_write_buffers()
5633 -#define dma_cache_wback_inv(_start,_size) flush_write_buffers()
5634 -
5635 #else
5636
5637 -/* Nothing to do */
5638 -
5639 -#define dma_cache_inv(_start,_size) do { } while (0)
5640 -#define dma_cache_wback(_start,_size) do { } while (0)
5641 -#define dma_cache_wback_inv(_start,_size) do { } while (0)
5642 -#define flush_write_buffers()
5643 +#define flush_write_buffers() do { } while (0)
5644
5645 #endif
5646
5647 --- a/include/asm-x86/mach-xen/asm/io_64.h
5648 +++ b/include/asm-x86/mach-xen/asm/io_64.h
5649 @@ -268,12 +268,6 @@
5650 */
5651 #define __ISA_IO_base ((char __iomem *)(fix_to_virt(FIX_ISAMAP_BEGIN)))
5652
5653 -/* Nothing to do */
5654 -
5655 -#define dma_cache_inv(_start,_size) do { } while (0)
5656 -#define dma_cache_wback(_start,_size) do { } while (0)
5657 -#define dma_cache_wback_inv(_start,_size) do { } while (0)
5658 -
5659 #define flush_write_buffers()
5660
5661 extern int iommu_bio_merge;
5662 --- /dev/null
5663 +++ b/include/asm-x86/mach-xen/asm/irq.h
5664 @@ -0,0 +1,5 @@
5665 +#ifdef CONFIG_X86_32
5666 +# include "../../irq_32.h"
5667 +#else
5668 +# include "irq_64.h"
5669 +#endif
5670 --- /dev/null
5671 +++ b/include/asm-x86/mach-xen/asm/irqflags.h
5672 @@ -0,0 +1,5 @@
5673 +#ifdef CONFIG_X86_32
5674 +# include "irqflags_32.h"
5675 +#else
5676 +# include "irqflags_64.h"
5677 +#endif
5678 --- a/include/asm-x86/mach-xen/asm/irqflags_32.h
5679 +++ b/include/asm-x86/mach-xen/asm/irqflags_32.h
5680 @@ -151,6 +151,23 @@
5681 \
5682 raw_irqs_disabled_flags(flags); \
5683 })
5684 +
5685 +/*
5686 + * makes the traced hardirq state match with the machine state
5687 + *
5688 + * should be a rarely used function, only in places where its
5689 + * otherwise impossible to know the irq state, like in traps.
5690 + */
5691 +static inline void trace_hardirqs_fixup_flags(unsigned long flags)
5692 +{
5693 + if (raw_irqs_disabled_flags(flags))
5694 + trace_hardirqs_off();
5695 + else
5696 + trace_hardirqs_on();
5697 +}
5698 +
5699 +#define trace_hardirqs_fixup() \
5700 + trace_hardirqs_fixup_flags(__raw_local_save_flags())
5701 #endif /* __ASSEMBLY__ */
5702
5703 /*
5704 @@ -182,4 +199,17 @@
5705 # define TRACE_IRQS_OFF
5706 #endif
5707
5708 +#ifdef CONFIG_DEBUG_LOCK_ALLOC
5709 +# define LOCKDEP_SYS_EXIT \
5710 + pushl %eax; \
5711 + pushl %ecx; \
5712 + pushl %edx; \
5713 + call lockdep_sys_exit; \
5714 + popl %edx; \
5715 + popl %ecx; \
5716 + popl %eax;
5717 +#else
5718 +# define LOCKDEP_SYS_EXIT
5719 +#endif
5720 +
5721 #endif
5722 --- a/include/asm-x86/mach-xen/asm/irqflags_64.h
5723 +++ b/include/asm-x86/mach-xen/asm/irqflags_64.h
5724 @@ -116,6 +116,22 @@
5725 })
5726
5727 /*
5728 + * makes the traced hardirq state match with the machine state
5729 + *
5730 + * should be a rarely used function, only in places where its
5731 + * otherwise impossible to know the irq state, like in traps.
5732 + */
5733 +static inline void trace_hardirqs_fixup_flags(unsigned long flags)
5734 +{
5735 + if (raw_irqs_disabled_flags(flags))
5736 + trace_hardirqs_off();
5737 + else
5738 + trace_hardirqs_on();
5739 +}
5740 +
5741 +#define trace_hardirqs_fixup() \
5742 + trace_hardirqs_fixup_flags(__raw_local_save_flags())
5743 +/*
5744 * Used in the idle loop; sti takes one instruction cycle
5745 * to complete:
5746 */
5747 @@ -143,6 +159,20 @@
5748 # define TRACE_IRQS_ON
5749 # define TRACE_IRQS_OFF
5750 # endif
5751 +# ifdef CONFIG_DEBUG_LOCK_ALLOC
5752 +# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
5753 +# define LOCKDEP_SYS_EXIT_IRQ \
5754 + TRACE_IRQS_ON; \
5755 + sti; \
5756 + SAVE_REST; \
5757 + LOCKDEP_SYS_EXIT; \
5758 + RESTORE_REST; \
5759 + cli; \
5760 + TRACE_IRQS_OFF;
5761 +# else
5762 +# define LOCKDEP_SYS_EXIT
5763 +# define LOCKDEP_SYS_EXIT_IRQ
5764 +# endif
5765 #endif
5766
5767 #endif
5768 --- /dev/null
5769 +++ b/include/asm-x86/mach-xen/asm/maddr.h
5770 @@ -0,0 +1,5 @@
5771 +#ifdef CONFIG_X86_32
5772 +# include "maddr_32.h"
5773 +#else
5774 +# include "maddr_64.h"
5775 +#endif
5776 --- a/include/asm-x86/mach-xen/asm/mmu.h
5777 +++ b/include/asm-x86/mach-xen/asm/mmu.h
5778 @@ -1,21 +1,33 @@
5779 -#ifndef __i386_MMU_H
5780 -#define __i386_MMU_H
5781 +#ifndef _ASM_X86_MMU_H
5782 +#define _ASM_X86_MMU_H
5783 +
5784 +#include <linux/spinlock.h>
5785 +#include <linux/mutex.h>
5786
5787 -#include <asm/semaphore.h>
5788 /*
5789 - * The i386 doesn't have a mmu context, but
5790 + * The x86 doesn't have a mmu context, but
5791 * we put the segment information here.
5792 *
5793 * cpu_vm_mask is used to optimize ldt flushing.
5794 */
5795 typedef struct {
5796 - int size;
5797 - struct semaphore sem;
5798 void *ldt;
5799 +#ifdef CONFIG_X86_64
5800 + rwlock_t ldtlock;
5801 +#endif
5802 + int size;
5803 + struct mutex lock;
5804 void *vdso;
5805 -#ifdef CONFIG_XEN
5806 - int has_foreign_mappings;
5807 + unsigned has_foreign_mappings:1;
5808 +#ifdef CONFIG_X86_64
5809 + unsigned pinned:1;
5810 + struct list_head unpinned;
5811 #endif
5812 } mm_context_t;
5813
5814 +#ifdef CONFIG_X86_64
5815 +extern struct list_head mm_unpinned;
5816 +extern spinlock_t mm_unpinned_lock;
5817 #endif
5818 +
5819 +#endif /* _ASM_X86_MMU_H */
5820 --- a/include/asm-x86/mach-xen/asm/mmu_64.h
5821 +++ /dev/null
5822 @@ -1,31 +0,0 @@
5823 -#ifndef __x86_64_MMU_H
5824 -#define __x86_64_MMU_H
5825 -
5826 -#include <linux/spinlock.h>
5827 -#include <asm/semaphore.h>
5828 -
5829 -/*
5830 - * The x86_64 doesn't have a mmu context, but
5831 - * we put the segment information here.
5832 - *
5833 - * cpu_vm_mask is used to optimize ldt flushing.
5834 - */
5835 -typedef struct {
5836 - void *ldt;
5837 - rwlock_t ldtlock;
5838 - int size;
5839 - struct semaphore sem;
5840 - void *vdso;
5841 -#ifdef CONFIG_XEN
5842 - unsigned pinned:1;
5843 - unsigned has_foreign_mappings:1;
5844 - struct list_head unpinned;
5845 -#endif
5846 -} mm_context_t;
5847 -
5848 -#ifdef CONFIG_XEN
5849 -extern struct list_head mm_unpinned;
5850 -extern spinlock_t mm_unpinned_lock;
5851 -#endif
5852 -
5853 -#endif
5854 --- /dev/null
5855 +++ b/include/asm-x86/mach-xen/asm/mmu_context.h
5856 @@ -0,0 +1,5 @@
5857 +#ifdef CONFIG_X86_32
5858 +# include "mmu_context_32.h"
5859 +#else
5860 +# include "mmu_context_64.h"
5861 +#endif
5862 --- /dev/null
5863 +++ b/include/asm-x86/mach-xen/asm/nmi.h
5864 @@ -0,0 +1,7 @@
5865 +#ifdef CONFIG_X86_32
5866 +# include "../../nmi_32.h"
5867 +#else
5868 +# include "../../nmi_64.h"
5869 +# undef get_nmi_reason
5870 +# include "../mach_traps.h"
5871 +#endif
5872 --- /dev/null
5873 +++ b/include/asm-x86/mach-xen/asm/page.h
5874 @@ -0,0 +1,13 @@
5875 +#ifdef __KERNEL__
5876 +# ifdef CONFIG_X86_32
5877 +# include "page_32.h"
5878 +# else
5879 +# include "page_64.h"
5880 +# endif
5881 +#else
5882 +# ifdef __i386__
5883 +# include "page_32.h"
5884 +# else
5885 +# include "page_64.h"
5886 +# endif
5887 +#endif
5888 --- a/include/asm-x86/mach-xen/asm/page_64.h
5889 +++ b/include/asm-x86/mach-xen/asm/page_64.h
5890 @@ -207,6 +207,7 @@
5891 VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
5892
5893 #define __HAVE_ARCH_GATE_AREA 1
5894 +#define vmemmap ((struct page *)VMEMMAP_START)
5895
5896 #include <asm-generic/memory_model.h>
5897 #include <asm-generic/page.h>
5898 --- /dev/null
5899 +++ b/include/asm-x86/mach-xen/asm/pci.h
5900 @@ -0,0 +1,100 @@
5901 +#ifndef __x86_PCI_H
5902 +#define __x86_PCI_H
5903 +
5904 +#include <linux/mm.h> /* for struct page */
5905 +#include <linux/types.h>
5906 +#include <linux/slab.h>
5907 +#include <linux/string.h>
5908 +#include <asm/scatterlist.h>
5909 +#include <asm/io.h>
5910 +
5911 +
5912 +#ifdef __KERNEL__
5913 +
5914 +struct pci_sysdata {
5915 + int domain; /* PCI domain */
5916 + int node; /* NUMA node */
5917 +#ifdef CONFIG_X86_64
5918 + void* iommu; /* IOMMU private data */
5919 +#endif
5920 +#ifdef CONFIG_XEN_PCIDEV_FRONTEND
5921 + struct pcifront_device *pdev;
5922 +#endif
5923 +};
5924 +
5925 +/* scan a bus after allocating a pci_sysdata for it */
5926 +extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
5927 +
5928 +static inline int pci_domain_nr(struct pci_bus *bus)
5929 +{
5930 + struct pci_sysdata *sd = bus->sysdata;
5931 + return sd->domain;
5932 +}
5933 +
5934 +static inline int pci_proc_domain(struct pci_bus *bus)
5935 +{
5936 + return pci_domain_nr(bus);
5937 +}
5938 +
5939 +
5940 +/* Can be used to override the logic in pci_scan_bus for skipping
5941 + already-configured bus numbers - to be used for buggy BIOSes
5942 + or architectures with incomplete PCI setup by the loader */
5943 +
5944 +#ifdef CONFIG_PCI
5945 +extern unsigned int pcibios_assign_all_busses(void);
5946 +#else
5947 +#define pcibios_assign_all_busses() 0
5948 +#endif
5949 +
5950 +#include <asm/hypervisor.h>
5951 +#define pcibios_scan_all_fns(a, b) (!is_initial_xendomain())
5952 +
5953 +extern unsigned long pci_mem_start;
5954 +#define PCIBIOS_MIN_IO 0x1000
5955 +#define PCIBIOS_MIN_MEM (pci_mem_start)
5956 +
5957 +#define PCIBIOS_MIN_CARDBUS_IO 0x4000
5958 +
5959 +void pcibios_config_init(void);
5960 +struct pci_bus * pcibios_scan_root(int bus);
5961 +
5962 +void pcibios_set_master(struct pci_dev *dev);
5963 +void pcibios_penalize_isa_irq(int irq, int active);
5964 +struct irq_routing_table *pcibios_get_irq_routing_table(void);
5965 +int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
5966 +
5967 +
5968 +#define HAVE_PCI_MMAP
5969 +extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
5970 + enum pci_mmap_state mmap_state, int write_combine);
5971 +
5972 +
5973 +#ifdef CONFIG_PCI
5974 +static inline void pci_dma_burst_advice(struct pci_dev *pdev,
5975 + enum pci_dma_burst_strategy *strat,
5976 + unsigned long *strategy_parameter)
5977 +{
5978 + *strat = PCI_DMA_BURST_INFINITY;
5979 + *strategy_parameter = ~0UL;
5980 +}
5981 +#endif
5982 +
5983 +
5984 +#endif /* __KERNEL__ */
5985 +
5986 +#ifdef CONFIG_X86_32
5987 +# include "pci_32.h"
5988 +#else
5989 +# include "pci_64.h"
5990 +#endif
5991 +
5992 +/* implement the pci_ DMA API in terms of the generic device dma_ one */
5993 +#include <asm-generic/pci-dma-compat.h>
5994 +
5995 +/* generic pci stuff */
5996 +#include <asm-generic/pci.h>
5997 +
5998 +
5999 +
6000 +#endif
6001 --- a/include/asm-x86/mach-xen/asm/pci_32.h
6002 +++ b/include/asm-x86/mach-xen/asm/pci_32.h
6003 @@ -4,52 +4,10 @@
6004
6005 #ifdef __KERNEL__
6006
6007 -struct pci_sysdata {
6008 - int node; /* NUMA node */
6009 -};
6010 -
6011 -/* scan a bus after allocating a pci_sysdata for it */
6012 -extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
6013 -
6014 -#include <linux/mm.h> /* for struct page */
6015 -
6016 -/* Can be used to override the logic in pci_scan_bus for skipping
6017 - already-configured bus numbers - to be used for buggy BIOSes
6018 - or architectures with incomplete PCI setup by the loader */
6019 -
6020 -#ifdef CONFIG_PCI
6021 -extern unsigned int pcibios_assign_all_busses(void);
6022 -#else
6023 -#define pcibios_assign_all_busses() 0
6024 -#endif
6025 -
6026 -#include <asm/hypervisor.h>
6027 -#define pcibios_scan_all_fns(a, b) (!is_initial_xendomain())
6028 -
6029 -extern unsigned long pci_mem_start;
6030 -#define PCIBIOS_MIN_IO 0x1000
6031 -#define PCIBIOS_MIN_MEM (pci_mem_start)
6032 -
6033 -#define PCIBIOS_MIN_CARDBUS_IO 0x4000
6034 -
6035 -void pcibios_config_init(void);
6036 -struct pci_bus * pcibios_scan_root(int bus);
6037 -
6038 -void pcibios_set_master(struct pci_dev *dev);
6039 -void pcibios_penalize_isa_irq(int irq, int active);
6040 -struct irq_routing_table *pcibios_get_irq_routing_table(void);
6041 -int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
6042 -
6043 /* Dynamic DMA mapping stuff.
6044 * i386 has everything mapped statically.
6045 */
6046
6047 -#include <linux/types.h>
6048 -#include <linux/slab.h>
6049 -#include <asm/scatterlist.h>
6050 -#include <linux/string.h>
6051 -#include <asm/io.h>
6052 -
6053 struct pci_dev;
6054
6055 #ifdef CONFIG_SWIOTLB
6056 @@ -89,31 +47,8 @@
6057
6058 #endif
6059
6060 -#define HAVE_PCI_MMAP
6061 -extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
6062 - enum pci_mmap_state mmap_state, int write_combine);
6063 -
6064 -
6065 -#ifdef CONFIG_PCI
6066 -static inline void pci_dma_burst_advice(struct pci_dev *pdev,
6067 - enum pci_dma_burst_strategy *strat,
6068 - unsigned long *strategy_parameter)
6069 -{
6070 - *strat = PCI_DMA_BURST_INFINITY;
6071 - *strategy_parameter = ~0UL;
6072 -}
6073 -#endif
6074
6075 #endif /* __KERNEL__ */
6076
6077 -#ifdef CONFIG_XEN_PCIDEV_FRONTEND
6078 -#include <xen/pcifront.h>
6079 -#endif /* CONFIG_XEN_PCIDEV_FRONTEND */
6080 -
6081 -/* implement the pci_ DMA API in terms of the generic device dma_ one */
6082 -#include <asm-generic/pci-dma-compat.h>
6083 -
6084 -/* generic pci stuff */
6085 -#include <asm-generic/pci.h>
6086
6087 #endif /* __i386_PCI_H */
6088 --- a/include/asm-x86/mach-xen/asm/pci_64.h
6089 +++ b/include/asm-x86/mach-xen/asm/pci_64.h
6090 @@ -1,16 +1,9 @@
6091 #ifndef __x8664_PCI_H
6092 #define __x8664_PCI_H
6093
6094 -#include <asm/io.h>
6095
6096 #ifdef __KERNEL__
6097
6098 -struct pci_sysdata {
6099 - int node; /* NUMA node */
6100 - void* iommu; /* IOMMU private data */
6101 -};
6102 -
6103 -extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
6104
6105 #ifdef CONFIG_CALGARY_IOMMU
6106 static inline void* pci_iommu(struct pci_bus *bus)
6107 @@ -26,42 +19,11 @@
6108 }
6109 #endif /* CONFIG_CALGARY_IOMMU */
6110
6111 -#include <linux/mm.h> /* for struct page */
6112 -
6113 -/* Can be used to override the logic in pci_scan_bus for skipping
6114 - already-configured bus numbers - to be used for buggy BIOSes
6115 - or architectures with incomplete PCI setup by the loader */
6116 -
6117 -#ifdef CONFIG_PCI
6118 -extern unsigned int pcibios_assign_all_busses(void);
6119 -#else
6120 -#define pcibios_assign_all_busses() 0
6121 -#endif
6122 -
6123 -#include <asm/hypervisor.h>
6124 -#define pcibios_scan_all_fns(a, b) (!is_initial_xendomain())
6125 -
6126 -extern unsigned long pci_mem_start;
6127 -#define PCIBIOS_MIN_IO 0x1000
6128 -#define PCIBIOS_MIN_MEM (pci_mem_start)
6129 -
6130 -#define PCIBIOS_MIN_CARDBUS_IO 0x4000
6131
6132 -void pcibios_config_init(void);
6133 -struct pci_bus * pcibios_scan_root(int bus);
6134 extern int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value);
6135 extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value);
6136
6137 -void pcibios_set_master(struct pci_dev *dev);
6138 -void pcibios_penalize_isa_irq(int irq, int active);
6139 -struct irq_routing_table *pcibios_get_irq_routing_table(void);
6140 -int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
6141 -
6142 -#include <linux/types.h>
6143 -#include <linux/slab.h>
6144 -#include <asm/scatterlist.h>
6145 -#include <linux/string.h>
6146 -#include <asm/page.h>
6147 +
6148
6149 extern void pci_iommu_alloc(void);
6150 extern int iommu_setup(char *opt);
6151 @@ -75,7 +37,7 @@
6152 */
6153 #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
6154
6155 -#if defined(CONFIG_IOMMU) || defined(CONFIG_CALGARY_IOMMU)
6156 +#if defined(CONFIG_GART_IOMMU) || defined(CONFIG_CALGARY_IOMMU)
6157
6158 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
6159 dma_addr_t ADDR_NAME;
6160 @@ -119,27 +81,7 @@
6161
6162 #endif
6163
6164 -#include <asm-generic/pci-dma-compat.h>
6165 -
6166 -#ifdef CONFIG_PCI
6167 -static inline void pci_dma_burst_advice(struct pci_dev *pdev,
6168 - enum pci_dma_burst_strategy *strat,
6169 - unsigned long *strategy_parameter)
6170 -{
6171 - *strat = PCI_DMA_BURST_INFINITY;
6172 - *strategy_parameter = ~0UL;
6173 -}
6174 -#endif
6175 -
6176 -#define HAVE_PCI_MMAP
6177 -extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
6178 - enum pci_mmap_state mmap_state, int write_combine);
6179 -
6180 #endif /* __KERNEL__ */
6181
6182 -/* generic pci stuff */
6183 -#ifdef CONFIG_PCI
6184 -#include <asm-generic/pci.h>
6185 -#endif
6186
6187 #endif /* __x8664_PCI_H */
6188 --- /dev/null
6189 +++ b/include/asm-x86/mach-xen/asm/pgalloc.h
6190 @@ -0,0 +1,5 @@
6191 +#ifdef CONFIG_X86_32
6192 +# include "pgalloc_32.h"
6193 +#else
6194 +# include "pgalloc_64.h"
6195 +#endif
6196 --- /dev/null
6197 +++ b/include/asm-x86/mach-xen/asm/pgtable.h
6198 @@ -0,0 +1,5 @@
6199 +#ifdef CONFIG_X86_32
6200 +# include "pgtable_32.h"
6201 +#else
6202 +# include "pgtable_64.h"
6203 +#endif
6204 --- a/include/asm-x86/mach-xen/asm/pgtable_32.h
6205 +++ b/include/asm-x86/mach-xen/asm/pgtable_32.h
6206 @@ -17,10 +17,7 @@
6207 #include <asm/fixmap.h>
6208 #include <linux/threads.h>
6209
6210 -#ifndef _I386_BITOPS_H
6211 -#include <asm/bitops.h>
6212 -#endif
6213 -
6214 +#include <linux/bitops.h>
6215 #include <linux/slab.h>
6216 #include <linux/list.h>
6217 #include <linux/spinlock.h>
6218 @@ -40,7 +37,7 @@
6219 extern struct page *pgd_list;
6220 void check_pgt_cache(void);
6221
6222 -void pmd_ctor(void *, struct kmem_cache *, unsigned long);
6223 +void pmd_ctor(struct kmem_cache *, void *);
6224 void pgtable_cache_init(void);
6225 void paging_init(void);
6226
6227 --- a/include/asm-x86/mach-xen/asm/pgtable_64.h
6228 +++ b/include/asm-x86/mach-xen/asm/pgtable_64.h
6229 @@ -9,7 +9,7 @@
6230 * the x86-64 page table tree.
6231 */
6232 #include <asm/processor.h>
6233 -#include <asm/bitops.h>
6234 +#include <linux/bitops.h>
6235 #include <linux/threads.h>
6236 #include <linux/sched.h>
6237 #include <asm/pda.h>
6238 @@ -138,6 +138,7 @@
6239 #define MAXMEM _AC(0x3fffffffffff, UL)
6240 #define VMALLOC_START _AC(0xffffc20000000000, UL)
6241 #define VMALLOC_END _AC(0xffffe1ffffffffff, UL)
6242 +#define VMEMMAP_START _AC(0xffffe20000000000, UL)
6243 #define MODULES_VADDR _AC(0xffffffff88000000, UL)
6244 #define MODULES_END _AC(0xfffffffffff00000, UL)
6245 #define MODULES_LEN (MODULES_END - MODULES_VADDR)
6246 --- /dev/null
6247 +++ b/include/asm-x86/mach-xen/asm/processor.h
6248 @@ -0,0 +1,5 @@
6249 +#ifdef CONFIG_X86_32
6250 +# include "processor_32.h"
6251 +#else
6252 +# include "processor_64.h"
6253 +#endif
6254 --- a/include/asm-x86/mach-xen/asm/processor_32.h
6255 +++ b/include/asm-x86/mach-xen/asm/processor_32.h
6256 @@ -80,6 +80,7 @@
6257 unsigned char booted_cores; /* number of cores as seen by OS */
6258 __u8 phys_proc_id; /* Physical processor id. */
6259 __u8 cpu_core_id; /* Core id */
6260 + __u8 cpu_index; /* index into per_cpu list */
6261 #endif
6262 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
6263
6264 @@ -106,14 +107,19 @@
6265 #endif
6266
6267 #ifdef CONFIG_SMP
6268 -extern struct cpuinfo_x86 cpu_data[];
6269 -#define current_cpu_data cpu_data[smp_processor_id()]
6270 +DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
6271 +#define cpu_data(cpu) per_cpu(cpu_info, cpu)
6272 +#define current_cpu_data cpu_data(smp_processor_id())
6273 #else
6274 -#define cpu_data (&boot_cpu_data)
6275 -#define current_cpu_data boot_cpu_data
6276 +#define cpu_data(cpu) boot_cpu_data
6277 +#define current_cpu_data boot_cpu_data
6278 #endif
6279
6280 -extern int cpu_llc_id[NR_CPUS];
6281 +/*
6282 + * the following now lives in the per cpu area:
6283 + * extern int cpu_llc_id[NR_CPUS];
6284 + */
6285 +DECLARE_PER_CPU(u8, cpu_llc_id);
6286 extern char ignore_fpu_irq;
6287
6288 void __init cpu_detect(struct cpuinfo_x86 *c);
6289 @@ -560,7 +566,9 @@
6290 * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
6291 * resulting in stale register contents being returned.
6292 */
6293 -static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
6294 +static inline void cpuid(unsigned int op,
6295 + unsigned int *eax, unsigned int *ebx,
6296 + unsigned int *ecx, unsigned int *edx)
6297 {
6298 *eax = op;
6299 *ecx = 0;
6300 @@ -568,8 +576,9 @@
6301 }
6302
6303 /* Some CPUID calls want 'count' to be placed in ecx */
6304 -static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
6305 - int *edx)
6306 +static inline void cpuid_count(unsigned int op, int count,
6307 + unsigned int *eax, unsigned int *ebx,
6308 + unsigned int *ecx, unsigned int *edx)
6309 {
6310 *eax = op;
6311 *ecx = count;
6312 @@ -639,6 +648,17 @@
6313 #define K7_NOP7 ".byte 0x8D,0x04,0x05,0,0,0,0\n"
6314 #define K7_NOP8 K7_NOP7 ASM_NOP1
6315
6316 +/* P6 nops */
6317 +/* uses eax dependencies (Intel-recommended choice) */
6318 +#define P6_NOP1 GENERIC_NOP1
6319 +#define P6_NOP2 ".byte 0x66,0x90\n"
6320 +#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n"
6321 +#define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n"
6322 +#define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n"
6323 +#define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n"
6324 +#define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n"
6325 +#define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n"
6326 +
6327 #ifdef CONFIG_MK8
6328 #define ASM_NOP1 K8_NOP1
6329 #define ASM_NOP2 K8_NOP2
6330 @@ -657,6 +677,17 @@
6331 #define ASM_NOP6 K7_NOP6
6332 #define ASM_NOP7 K7_NOP7
6333 #define ASM_NOP8 K7_NOP8
6334 +#elif defined(CONFIG_M686) || defined(CONFIG_MPENTIUMII) || \
6335 + defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUMM) || \
6336 + defined(CONFIG_MCORE2) || defined(CONFIG_PENTIUM4)
6337 +#define ASM_NOP1 P6_NOP1
6338 +#define ASM_NOP2 P6_NOP2
6339 +#define ASM_NOP3 P6_NOP3
6340 +#define ASM_NOP4 P6_NOP4
6341 +#define ASM_NOP5 P6_NOP5
6342 +#define ASM_NOP6 P6_NOP6
6343 +#define ASM_NOP7 P6_NOP7
6344 +#define ASM_NOP8 P6_NOP8
6345 #else
6346 #define ASM_NOP1 GENERIC_NOP1
6347 #define ASM_NOP2 GENERIC_NOP2
6348 --- a/include/asm-x86/mach-xen/asm/processor_64.h
6349 +++ b/include/asm-x86/mach-xen/asm/processor_64.h
6350 @@ -74,6 +74,7 @@
6351 __u8 booted_cores; /* number of cores as seen by OS */
6352 __u8 phys_proc_id; /* Physical Processor id. */
6353 __u8 cpu_core_id; /* Core id. */
6354 + __u8 cpu_index; /* index into per_cpu list */
6355 #endif
6356 } ____cacheline_aligned;
6357
6358 @@ -88,11 +89,12 @@
6359 #define X86_VENDOR_UNKNOWN 0xff
6360
6361 #ifdef CONFIG_SMP
6362 -extern struct cpuinfo_x86 cpu_data[];
6363 -#define current_cpu_data cpu_data[smp_processor_id()]
6364 +DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
6365 +#define cpu_data(cpu) per_cpu(cpu_info, cpu)
6366 +#define current_cpu_data cpu_data(smp_processor_id())
6367 #else
6368 -#define cpu_data (&boot_cpu_data)
6369 -#define current_cpu_data boot_cpu_data
6370 +#define cpu_data(cpu) boot_cpu_data
6371 +#define current_cpu_data boot_cpu_data
6372 #endif
6373
6374 extern char ignore_irq13;
6375 @@ -343,6 +345,16 @@
6376 };
6377
6378
6379 +#if defined(CONFIG_MPSC) || defined(CONFIG_MCORE2)
6380 +#define ASM_NOP1 P6_NOP1
6381 +#define ASM_NOP2 P6_NOP2
6382 +#define ASM_NOP3 P6_NOP3
6383 +#define ASM_NOP4 P6_NOP4
6384 +#define ASM_NOP5 P6_NOP5
6385 +#define ASM_NOP6 P6_NOP6
6386 +#define ASM_NOP7 P6_NOP7
6387 +#define ASM_NOP8 P6_NOP8
6388 +#else
6389 #define ASM_NOP1 K8_NOP1
6390 #define ASM_NOP2 K8_NOP2
6391 #define ASM_NOP3 K8_NOP3
6392 @@ -351,6 +363,7 @@
6393 #define ASM_NOP6 K8_NOP6
6394 #define ASM_NOP7 K8_NOP7
6395 #define ASM_NOP8 K8_NOP8
6396 +#endif
6397
6398 /* Opteron nops */
6399 #define K8_NOP1 ".byte 0x90\n"
6400 @@ -362,6 +375,17 @@
6401 #define K8_NOP7 K8_NOP4 K8_NOP3
6402 #define K8_NOP8 K8_NOP4 K8_NOP4
6403
6404 +/* P6 nops */
6405 +/* uses eax dependencies (Intel-recommended choice) */
6406 +#define P6_NOP1 ".byte 0x90\n"
6407 +#define P6_NOP2 ".byte 0x66,0x90\n"
6408 +#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n"
6409 +#define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n"
6410 +#define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n"
6411 +#define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n"
6412 +#define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n"
6413 +#define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n"
6414 +
6415 #define ASM_NOP_MAX 8
6416
6417 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
6418 @@ -377,12 +401,6 @@
6419 asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory");
6420 }
6421
6422 -#define ARCH_HAS_PREFETCH
6423 -static inline void prefetch(void *x)
6424 -{
6425 - asm volatile("prefetcht0 (%0)" :: "r" (x));
6426 -}
6427 -
6428 #define ARCH_HAS_PREFETCHW 1
6429 static inline void prefetchw(void *x)
6430 {
6431 @@ -398,11 +416,6 @@
6432
6433 #define cpu_relax() rep_nop()
6434
6435 -static inline void serialize_cpu(void)
6436 -{
6437 - __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx");
6438 -}
6439 -
6440 static inline void __monitor(const void *eax, unsigned long ecx,
6441 unsigned long edx)
6442 {
6443 --- /dev/null
6444 +++ b/include/asm-x86/mach-xen/asm/scatterlist.h
6445 @@ -0,0 +1 @@
6446 +#include "../../scatterlist_64.h"
6447 --- a/include/asm-x86/mach-xen/asm/scatterlist_32.h
6448 +++ /dev/null
6449 @@ -1,24 +0,0 @@
6450 -#ifndef _I386_SCATTERLIST_H
6451 -#define _I386_SCATTERLIST_H
6452 -
6453 -#include <asm/types.h>
6454 -
6455 -struct scatterlist {
6456 - struct page *page;
6457 - unsigned int offset;
6458 - unsigned int length;
6459 - dma_addr_t dma_address;
6460 - unsigned int dma_length;
6461 -};
6462 -
6463 -/* These macros should be used after a pci_map_sg call has been done
6464 - * to get bus addresses of each of the SG entries and their lengths.
6465 - * You should only work with the number of sg entries pci_map_sg
6466 - * returns.
6467 - */
6468 -#define sg_dma_address(sg) ((sg)->dma_address)
6469 -#define sg_dma_len(sg) ((sg)->dma_length)
6470 -
6471 -#define ISA_DMA_THRESHOLD (0x00ffffff)
6472 -
6473 -#endif /* !(_I386_SCATTERLIST_H) */
6474 --- /dev/null
6475 +++ b/include/asm-x86/mach-xen/asm/segment.h
6476 @@ -0,0 +1,5 @@
6477 +#ifdef CONFIG_X86_32
6478 +# include "segment_32.h"
6479 +#else
6480 +# include "../../segment_64.h"
6481 +#endif
6482 --- /dev/null
6483 +++ b/include/asm-x86/mach-xen/asm/smp.h
6484 @@ -0,0 +1,5 @@
6485 +#ifdef CONFIG_X86_32
6486 +# include "smp_32.h"
6487 +#else
6488 +# include "smp_64.h"
6489 +#endif
6490 --- a/include/asm-x86/mach-xen/asm/smp_32.h
6491 +++ b/include/asm-x86/mach-xen/asm/smp_32.h
6492 @@ -11,7 +11,7 @@
6493 #endif
6494
6495 #if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__)
6496 -#include <asm/bitops.h>
6497 +#include <linux/bitops.h>
6498 #include <asm/mpspec.h>
6499 #include <asm/apic.h>
6500 #ifdef CONFIG_X86_IO_APIC
6501 @@ -30,8 +30,8 @@
6502 extern void smp_alloc_memory(void);
6503 extern int pic_mode;
6504 extern int smp_num_siblings;
6505 -extern cpumask_t cpu_sibling_map[];
6506 -extern cpumask_t cpu_core_map[];
6507 +DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
6508 +DECLARE_PER_CPU(cpumask_t, cpu_core_map);
6509
6510 extern void (*mtrr_hook) (void);
6511 extern void zap_low_mappings (void);
6512 @@ -39,9 +39,11 @@
6513 extern void unlock_ipi_call_lock(void);
6514
6515 #define MAX_APICID 256
6516 -extern u8 x86_cpu_to_apicid[];
6517 +extern u8 __initdata x86_cpu_to_apicid_init[];
6518 +extern void *x86_cpu_to_apicid_ptr;
6519 +DECLARE_PER_CPU(u8, x86_cpu_to_apicid);
6520
6521 -#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
6522 +#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
6523
6524 #ifdef CONFIG_HOTPLUG_CPU
6525 extern void cpu_exit_clear(void);
6526 --- a/include/asm-x86/mach-xen/asm/smp_64.h
6527 +++ b/include/asm-x86/mach-xen/asm/smp_64.h
6528 @@ -40,10 +40,19 @@
6529 extern void unlock_ipi_call_lock(void);
6530 extern int smp_num_siblings;
6531 extern void smp_send_reschedule(int cpu);
6532 +extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *),
6533 + void *info, int wait);
6534
6535 -extern cpumask_t cpu_sibling_map[NR_CPUS];
6536 -extern cpumask_t cpu_core_map[NR_CPUS];
6537 -extern u8 cpu_llc_id[NR_CPUS];
6538 +/*
6539 + * cpu_sibling_map and cpu_core_map now live
6540 + * in the per cpu area
6541 + *
6542 + * extern cpumask_t cpu_sibling_map[NR_CPUS];
6543 + * extern cpumask_t cpu_core_map[NR_CPUS];
6544 + */
6545 +DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
6546 +DECLARE_PER_CPU(cpumask_t, cpu_core_map);
6547 +DECLARE_PER_CPU(u8, cpu_llc_id);
6548
6549 #define SMP_TRAMPOLINE_BASE 0x6000
6550
6551 @@ -70,6 +79,8 @@
6552
6553 #endif /* CONFIG_SMP */
6554
6555 +#define safe_smp_processor_id() smp_processor_id()
6556 +
6557 #ifdef CONFIG_X86_LOCAL_APIC
6558 static inline int hard_smp_processor_id(void)
6559 {
6560 @@ -82,8 +93,9 @@
6561 * Some lowlevel functions might want to know about
6562 * the real APIC ID <-> CPU # mapping.
6563 */
6564 -extern u8 x86_cpu_to_apicid[NR_CPUS]; /* physical ID */
6565 -extern u8 x86_cpu_to_log_apicid[NR_CPUS];
6566 +extern u8 __initdata x86_cpu_to_apicid_init[];
6567 +extern void *x86_cpu_to_apicid_ptr;
6568 +DECLARE_PER_CPU(u8, x86_cpu_to_apicid); /* physical ID */
6569 extern u8 bios_cpu_apicid[];
6570
6571 #ifdef CONFIG_X86_LOCAL_APIC
6572 @@ -118,8 +130,9 @@
6573 #endif
6574
6575 #ifdef CONFIG_SMP
6576 -#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
6577 +#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
6578 #else
6579 +extern unsigned int boot_cpu_id;
6580 #define cpu_physical_id(cpu) boot_cpu_id
6581 #endif /* !CONFIG_SMP */
6582 #endif
6583 --- /dev/null
6584 +++ b/include/asm-x86/mach-xen/asm/swiotlb.h
6585 @@ -0,0 +1,5 @@
6586 +#ifdef CONFIG_X86_32
6587 +# include "swiotlb_32.h"
6588 +#else
6589 +# include "../../swiotlb.h"
6590 +#endif
6591 --- /dev/null
6592 +++ b/include/asm-x86/mach-xen/asm/system.h
6593 @@ -0,0 +1,5 @@
6594 +#ifdef CONFIG_X86_32
6595 +# include "system_32.h"
6596 +#else
6597 +# include "system_64.h"
6598 +#endif
6599 --- a/include/asm-x86/mach-xen/asm/system_32.h
6600 +++ b/include/asm-x86/mach-xen/asm/system_32.h
6601 @@ -9,6 +9,7 @@
6602 #include <asm/hypervisor.h>
6603
6604 #ifdef __KERNEL__
6605 +#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */
6606
6607 struct task_struct; /* one of the stranger aspects of C forward declarations.. */
6608 extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next));
6609 @@ -138,7 +139,7 @@
6610 {
6611 unsigned long val;
6612 /* This could fault if %cr4 does not exist */
6613 - asm("1: movl %%cr4, %0 \n"
6614 + asm volatile("1: movl %%cr4, %0 \n"
6615 "2: \n"
6616 ".section __ex_table,\"a\" \n"
6617 ".long 1b,2b \n"
6618 @@ -157,6 +158,11 @@
6619 asm volatile("wbinvd": : :"memory");
6620 }
6621
6622 +static inline void clflush(volatile void *__p)
6623 +{
6624 + asm volatile("clflush %0" : "+m" (*(char __force *)__p));
6625 +}
6626 +
6627 #define read_cr0() (xen_read_cr0())
6628 #define write_cr0(x) (xen_write_cr0(x))
6629 #define read_cr2() (xen_read_cr2())
6630 @@ -207,6 +213,7 @@
6631
6632 #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
6633 #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
6634 +#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
6635
6636 /**
6637 * read_barrier_depends - Flush all pending reads that subsequents reads
6638 @@ -262,18 +269,18 @@
6639
6640 #define read_barrier_depends() do { } while(0)
6641
6642 +#ifdef CONFIG_SMP
6643 +#define smp_mb() mb()
6644 +#ifdef CONFIG_X86_PPRO_FENCE
6645 +# define smp_rmb() rmb()
6646 +#else
6647 +# define smp_rmb() barrier()
6648 +#endif
6649 #ifdef CONFIG_X86_OOSTORE
6650 -/* Actually there are no OOO store capable CPUs for now that do SSE,
6651 - but make it already an possibility. */
6652 -#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
6653 +# define smp_wmb() wmb()
6654 #else
6655 -#define wmb() __asm__ __volatile__ ("": : :"memory")
6656 +# define smp_wmb() barrier()
6657 #endif
6658 -
6659 -#ifdef CONFIG_SMP
6660 -#define smp_mb() mb()
6661 -#define smp_rmb() rmb()
6662 -#define smp_wmb() wmb()
6663 #define smp_read_barrier_depends() read_barrier_depends()
6664 #define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
6665 #else
6666 @@ -300,5 +307,6 @@
6667 extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
6668
6669 void default_idle(void);
6670 +void __show_registers(struct pt_regs *, int all);
6671
6672 #endif
6673 --- a/include/asm-x86/mach-xen/asm/system_64.h
6674 +++ b/include/asm-x86/mach-xen/asm/system_64.h
6675 @@ -11,8 +11,12 @@
6676
6677 #ifdef __KERNEL__
6678
6679 -#define __STR(x) #x
6680 -#define STR(x) __STR(x)
6681 +/* entries in ARCH_DLINFO: */
6682 +#ifdef CONFIG_IA32_EMULATION
6683 +# define AT_VECTOR_SIZE_ARCH 2
6684 +#else
6685 +# define AT_VECTOR_SIZE_ARCH 1
6686 +#endif
6687
6688 #define __SAVE(reg,offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
6689 #define __RESTORE(reg,offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
6690 @@ -92,7 +96,7 @@
6691
6692 #define read_cr3() ({ \
6693 unsigned long __dummy; \
6694 - asm("movq %%cr3,%0" : "=r" (__dummy)); \
6695 + asm volatile("movq %%cr3,%0" : "=r" (__dummy)); \
6696 machine_to_phys(__dummy); \
6697 })
6698
6699 @@ -105,7 +109,7 @@
6700 static inline unsigned long read_cr4(void)
6701 {
6702 unsigned long cr4;
6703 - asm("movq %%cr4,%0" : "=r" (cr4));
6704 + asm volatile("movq %%cr4,%0" : "=r" (cr4));
6705 return cr4;
6706 }
6707
6708 @@ -131,12 +135,17 @@
6709
6710 #endif /* __KERNEL__ */
6711
6712 +static inline void clflush(volatile void *__p)
6713 +{
6714 + asm volatile("clflush %0" : "+m" (*(char __force *)__p));
6715 +}
6716 +
6717 #define nop() __asm__ __volatile__ ("nop")
6718
6719 #ifdef CONFIG_SMP
6720 #define smp_mb() mb()
6721 -#define smp_rmb() rmb()
6722 -#define smp_wmb() wmb()
6723 +#define smp_rmb() barrier()
6724 +#define smp_wmb() barrier()
6725 #define smp_read_barrier_depends() do {} while(0)
6726 #else
6727 #define smp_mb() barrier()
6728 @@ -153,12 +162,8 @@
6729 */
6730 #define mb() asm volatile("mfence":::"memory")
6731 #define rmb() asm volatile("lfence":::"memory")
6732 -
6733 -#ifdef CONFIG_UNORDERED_IO
6734 #define wmb() asm volatile("sfence" ::: "memory")
6735 -#else
6736 -#define wmb() asm volatile("" ::: "memory")
6737 -#endif
6738 +
6739 #define read_barrier_depends() do {} while(0)
6740 #define set_mb(var, value) do { (void) xchg(&var, value); } while (0)
6741
6742 --- /dev/null
6743 +++ b/include/asm-x86/mach-xen/asm/tlbflush.h
6744 @@ -0,0 +1,5 @@
6745 +#ifdef CONFIG_X86_32
6746 +# include "tlbflush_32.h"
6747 +#else
6748 +# include "tlbflush_64.h"
6749 +#endif
6750 --- a/include/asm-x86/mach-xen/asm/tlbflush_32.h
6751 +++ b/include/asm-x86/mach-xen/asm/tlbflush_32.h
6752 @@ -23,7 +23,6 @@
6753 * - flush_tlb_page(vma, vmaddr) flushes one page
6754 * - flush_tlb_range(vma, start, end) flushes a range of pages
6755 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
6756 - * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
6757 *
6758 * ..but the i386 has somewhat limited tlb flushing capabilities,
6759 * and page-granular flushes are available only on i486 and up.
6760 @@ -97,10 +96,4 @@
6761 flush_tlb_all();
6762 }
6763
6764 -static inline void flush_tlb_pgtables(struct mm_struct *mm,
6765 - unsigned long start, unsigned long end)
6766 -{
6767 - /* i386 does not keep any page table caches in TLB */
6768 -}
6769 -
6770 #endif /* _I386_TLBFLUSH_H */
6771 --- a/include/asm-x86/mach-xen/asm/tlbflush_64.h
6772 +++ b/include/asm-x86/mach-xen/asm/tlbflush_64.h
6773 @@ -28,7 +28,6 @@
6774 * - flush_tlb_page(vma, vmaddr) flushes one page
6775 * - flush_tlb_range(vma, start, end) flushes a range of pages
6776 * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
6777 - * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
6778 *
6779 * x86-64 can only flush individual pages or full VMs. For a range flush
6780 * we always do the full VM. Might be worth trying if for a small
6781 @@ -95,12 +94,4 @@
6782 flush_tlb_all();
6783 }
6784
6785 -static inline void flush_tlb_pgtables(struct mm_struct *mm,
6786 - unsigned long start, unsigned long end)
6787 -{
6788 - /* x86_64 does not keep any page table caches in a software TLB.
6789 - The CPUs do in their hardware TLBs, but they are handled
6790 - by the normal TLB flushing algorithms. */
6791 -}
6792 -
6793 #endif /* _X8664_TLBFLUSH_H */
6794 --- /dev/null
6795 +++ b/include/asm-x86/mach-xen/asm/xor.h
6796 @@ -0,0 +1,5 @@
6797 +#ifdef CONFIG_X86_32
6798 +# include "../../xor_32.h"
6799 +#else
6800 +# include "xor_64.h"
6801 +#endif
6802 --- a/include/asm-x86/mach-xen/mach_time.h
6803 +++ b/include/asm-x86/mach-xen/mach_time.h
6804 @@ -1,111 +1,2 @@
6805 -/*
6806 - * include/asm-i386/mach-default/mach_time.h
6807 - *
6808 - * Machine specific set RTC function for generic.
6809 - * Split out from time.c by Osamu Tomita <tomita@cinet.co.jp>
6810 - */
6811 -#ifndef _MACH_TIME_H
6812 -#define _MACH_TIME_H
6813 -
6814 -#include <asm-i386/mc146818rtc.h>
6815 -
6816 -/* for check timing call set_rtc_mmss() 500ms */
6817 -/* used in arch/i386/time.c::do_timer_interrupt() */
6818 -#define USEC_AFTER 500000
6819 -#define USEC_BEFORE 500000
6820 -
6821 -/*
6822 - * In order to set the CMOS clock precisely, set_rtc_mmss has to be
6823 - * called 500 ms after the second nowtime has started, because when
6824 - * nowtime is written into the registers of the CMOS clock, it will
6825 - * jump to the next second precisely 500 ms later. Check the Motorola
6826 - * MC146818A or Dallas DS12887 data sheet for details.
6827 - *
6828 - * BUG: This routine does not handle hour overflow properly; it just
6829 - * sets the minutes. Usually you'll only notice that after reboot!
6830 - */
6831 -static inline int mach_set_rtc_mmss(unsigned long nowtime)
6832 -{
6833 - int retval = 0;
6834 - int real_seconds, real_minutes, cmos_minutes;
6835 - unsigned char save_control, save_freq_select;
6836 -
6837 - save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being set */
6838 - CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
6839 -
6840 - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); /* stop and reset prescaler */
6841 - CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
6842 -
6843 - cmos_minutes = CMOS_READ(RTC_MINUTES);
6844 - if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
6845 - BCD_TO_BIN(cmos_minutes);
6846 -
6847 - /*
6848 - * since we're only adjusting minutes and seconds,
6849 - * don't interfere with hour overflow. This avoids
6850 - * messing with unknown time zones but requires your
6851 - * RTC not to be off by more than 15 minutes
6852 - */
6853 - real_seconds = nowtime % 60;
6854 - real_minutes = nowtime / 60;
6855 - if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1)
6856 - real_minutes += 30; /* correct for half hour time zone */
6857 - real_minutes %= 60;
6858 -
6859 - if (abs(real_minutes - cmos_minutes) < 30) {
6860 - if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
6861 - BIN_TO_BCD(real_seconds);
6862 - BIN_TO_BCD(real_minutes);
6863 - }
6864 - CMOS_WRITE(real_seconds,RTC_SECONDS);
6865 - CMOS_WRITE(real_minutes,RTC_MINUTES);
6866 - } else {
6867 - printk(KERN_WARNING
6868 - "set_rtc_mmss: can't update from %d to %d\n",
6869 - cmos_minutes, real_minutes);
6870 - retval = -1;
6871 - }
6872 -
6873 - /* The following flags have to be released exactly in this order,
6874 - * otherwise the DS12887 (popular MC146818A clone with integrated
6875 - * battery and quartz) will not reset the oscillator and will not
6876 - * update precisely 500 ms later. You won't find this mentioned in
6877 - * the Dallas Semiconductor data sheets, but who believes data
6878 - * sheets anyway ... -- Markus Kuhn
6879 - */
6880 - CMOS_WRITE(save_control, RTC_CONTROL);
6881 - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
6882 -
6883 - return retval;
6884 -}
6885 -
6886 -static inline unsigned long mach_get_cmos_time(void)
6887 -{
6888 - unsigned int year, mon, day, hour, min, sec;
6889 -
6890 - do {
6891 - sec = CMOS_READ(RTC_SECONDS);
6892 - min = CMOS_READ(RTC_MINUTES);
6893 - hour = CMOS_READ(RTC_HOURS);
6894 - day = CMOS_READ(RTC_DAY_OF_MONTH);
6895 - mon = CMOS_READ(RTC_MONTH);
6896 - year = CMOS_READ(RTC_YEAR);
6897 - } while (sec != CMOS_READ(RTC_SECONDS));
6898 -
6899 - if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
6900 - BCD_TO_BIN(sec);
6901 - BCD_TO_BIN(min);
6902 - BCD_TO_BIN(hour);
6903 - BCD_TO_BIN(day);
6904 - BCD_TO_BIN(mon);
6905 - BCD_TO_BIN(year);
6906 - }
6907 -
6908 - year += 1900;
6909 - if (year < 1970)
6910 - year += 100;
6911 -
6912 - return mktime(year, mon, day, hour, min, sec);
6913 -}
6914 -
6915 -#endif /* !_MACH_TIME_H */
6916 +#include "../mc146818rtc_32.h"
6917 +#include "../mach-default/mach_time.h"
6918 --- a/include/asm-x86/mach-xen/mach_timer.h
6919 +++ b/include/asm-x86/mach-xen/mach_timer.h
6920 @@ -1,50 +1 @@
6921 -/*
6922 - * include/asm-i386/mach-default/mach_timer.h
6923 - *
6924 - * Machine specific calibrate_tsc() for generic.
6925 - * Split out from timer_tsc.c by Osamu Tomita <tomita@cinet.co.jp>
6926 - */
6927 -/* ------ Calibrate the TSC -------
6928 - * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
6929 - * Too much 64-bit arithmetic here to do this cleanly in C, and for
6930 - * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
6931 - * output busy loop as low as possible. We avoid reading the CTC registers
6932 - * directly because of the awkward 8-bit access mechanism of the 82C54
6933 - * device.
6934 - */
6935 -#ifndef _MACH_TIMER_H
6936 -#define _MACH_TIMER_H
6937 -
6938 -#define CALIBRATE_TIME_MSEC 30 /* 30 msecs */
6939 -#define CALIBRATE_LATCH \
6940 - ((CLOCK_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000)
6941 -
6942 -static inline void mach_prepare_counter(void)
6943 -{
6944 - /* Set the Gate high, disable speaker */
6945 - outb((inb(0x61) & ~0x02) | 0x01, 0x61);
6946 -
6947 - /*
6948 - * Now let's take care of CTC channel 2
6949 - *
6950 - * Set the Gate high, program CTC channel 2 for mode 0,
6951 - * (interrupt on terminal count mode), binary count,
6952 - * load 5 * LATCH count, (LSB and MSB) to begin countdown.
6953 - *
6954 - * Some devices need a delay here.
6955 - */
6956 - outb(0xb0, 0x43); /* binary, mode 0, LSB/MSB, Ch 2 */
6957 - outb_p(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */
6958 - outb_p(CALIBRATE_LATCH >> 8, 0x42); /* MSB of count */
6959 -}
6960 -
6961 -static inline void mach_countup(unsigned long *count_p)
6962 -{
6963 - unsigned long count = 0;
6964 - do {
6965 - count++;
6966 - } while ((inb_p(0x61) & 0x20) == 0);
6967 - *count_p = count;
6968 -}
6969 -
6970 -#endif /* !_MACH_TIMER_H */
6971 +#include "../mach-default/mach_timer.h"
6972 --- a/include/linux/sysctl.h
6973 +++ b/include/linux/sysctl.h
6974 @@ -69,6 +69,7 @@
6975 CTL_BUS=8, /* Busses */
6976 CTL_ABI=9, /* Binary emulation */
6977 CTL_CPU=10, /* CPU stuff (speed scaling, etc) */
6978 + CTL_XEN=123, /* Xen info and control */
6979 CTL_ARLAN=254, /* arlan wireless driver */
6980 CTL_S390DBF=5677, /* s390 debug */
6981 CTL_SUNRPC=7249, /* sunrpc debug */
6982 --- a/include/xen/pcifront.h
6983 +++ b/include/xen/pcifront.h
6984 @@ -12,13 +12,11 @@
6985
6986 #ifndef __ia64__
6987
6988 +#include <asm/pci.h>
6989 +
6990 struct pcifront_device;
6991 struct pci_bus;
6992 -
6993 -struct pcifront_sd {
6994 - int domain;
6995 - struct pcifront_device *pdev;
6996 -};
6997 +#define pcifront_sd pci_sysdata
6998
6999 static inline struct pcifront_device *
7000 pcifront_get_pdev(struct pcifront_sd *sd)
7001 @@ -34,18 +32,6 @@
7002 sd->pdev = pdev;
7003 }
7004
7005 -#if defined(CONFIG_PCI_DOMAINS)
7006 -static inline int pci_domain_nr(struct pci_bus *bus)
7007 -{
7008 - struct pcifront_sd *sd = bus->sysdata;
7009 - return sd->domain;
7010 -}
7011 -static inline int pci_proc_domain(struct pci_bus *bus)
7012 -{
7013 - return pci_domain_nr(bus);
7014 -}
7015 -#endif /* CONFIG_PCI_DOMAINS */
7016 -
7017 static inline void pcifront_setup_root_resources(struct pci_bus *bus,
7018 struct pcifront_sd *sd)
7019 {
7020 --- /dev/null
7021 +++ b/include/xen/sysctl.h
7022 @@ -0,0 +1,11 @@
7023 +#ifndef _XEN_SYSCTL_H
7024 +#define _XEN_SYSCTL_H
7025 +
7026 +/* CTL_XEN names: */
7027 +enum
7028 +{
7029 + CTL_XEN_INDEPENDENT_WALLCLOCK=1,
7030 + CTL_XEN_PERMITTED_CLOCK_JITTER=2,
7031 +};
7032 +
7033 +#endif /* _XEN_SYSCTL_H */
7034 --- a/include/xen/xenbus.h
7035 +++ b/include/xen/xenbus.h
7036 @@ -107,7 +107,7 @@
7037 int (*suspend)(struct xenbus_device *dev);
7038 int (*suspend_cancel)(struct xenbus_device *dev);
7039 int (*resume)(struct xenbus_device *dev);
7040 - int (*uevent)(struct xenbus_device *, char **, int, char *, int);
7041 + int (*uevent)(struct xenbus_device *, struct kobj_uevent_env *);
7042 struct device_driver driver;
7043 int (*read_otherend_details)(struct xenbus_device *dev);
7044 int (*is_ready)(struct xenbus_device *dev);
7045 --- a/kernel/kexec.c
7046 +++ b/kernel/kexec.c
7047 @@ -1190,6 +1190,7 @@
7048 module_init(crash_notes_memory_init)
7049
7050
7051 +#ifndef CONFIG_XEN
7052 /*
7053 * parsing the "crashkernel" commandline
7054 *
7055 @@ -1352,7 +1353,7 @@
7056
7057 return 0;
7058 }
7059 -
7060 +#endif
7061
7062
7063 void crash_save_vmcoreinfo(void)
7064 --- a/kernel/sysctl_check.c
7065 +++ b/kernel/sysctl_check.c
7066 @@ -4,6 +4,7 @@
7067 #include <linux/sunrpc/debug.h>
7068 #include <linux/string.h>
7069 #include <net/ip_vs.h>
7070 +#include <xen/sysctl.h>
7071
7072 struct trans_ctl_table {
7073 int ctl_name;
7074 @@ -895,6 +896,14 @@
7075 {}
7076 };
7077
7078 +#ifdef CONFIG_XEN
7079 +static struct trans_ctl_table trans_xen_table[] = {
7080 + { CTL_XEN_INDEPENDENT_WALLCLOCK, "independent_wallclock" },
7081 + { CTL_XEN_PERMITTED_CLOCK_JITTER, "permitted_clock_jitter" },
7082 + {}
7083 +};
7084 +#endif
7085 +
7086 static const struct trans_ctl_table trans_arlan_conf_table0[] = {
7087 { 1, "spreadingCode" },
7088 { 2, "channelNumber" },
7089 @@ -1230,6 +1239,9 @@
7090 { CTL_BUS, "bus", trans_bus_table },
7091 { CTL_ABI, "abi" },
7092 /* CTL_CPU not used */
7093 +#ifdef CONFIG_XEN
7094 + { CTL_XEN, "xen", trans_xen_table },
7095 +#endif
7096 { CTL_ARLAN, "arlan", trans_arlan_table },
7097 { CTL_S390DBF, "s390dbf", trans_s390dbf_table },
7098 { CTL_SUNRPC, "sunrpc", trans_sunrpc_table },
7099 --- a/lib/swiotlb-xen.c
7100 +++ b/lib/swiotlb-xen.c
7101 @@ -27,7 +27,7 @@
7102 #include <asm/uaccess.h>
7103 #include <xen/gnttab.h>
7104 #include <xen/interface/memory.h>
7105 -#include <asm-i386/mach-xen/asm/gnttab_dma.h>
7106 +#include <asm/gnttab_dma.h>
7107
7108 int swiotlb;
7109 EXPORT_SYMBOL(swiotlb);
7110 @@ -580,9 +580,10 @@
7111 * same here.
7112 */
7113 int
7114 -swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
7115 +swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
7116 int dir)
7117 {
7118 + struct scatterlist *sg;
7119 struct phys_addr buffer;
7120 dma_addr_t dev_addr;
7121 char *map;
7122 @@ -590,22 +591,22 @@
7123
7124 BUG_ON(dir == DMA_NONE);
7125
7126 - for (i = 0; i < nelems; i++, sg++) {
7127 - dev_addr = gnttab_dma_map_page(sg->page) + sg->offset;
7128 + for_each_sg(sgl, sg, nelems, i) {
7129 + dev_addr = gnttab_dma_map_page(sg_page(sg)) + sg->offset;
7130
7131 - if (range_straddles_page_boundary(page_to_pseudophys(sg->page)
7132 + if (range_straddles_page_boundary(page_to_pseudophys(sg_page(sg))
7133 + sg->offset, sg->length)
7134 || address_needs_mapping(hwdev, dev_addr)) {
7135 gnttab_dma_unmap_page(dev_addr);
7136 - buffer.page = sg->page;
7137 + buffer.page = sg_page(sg);
7138 buffer.offset = sg->offset;
7139 map = map_single(hwdev, buffer, sg->length, dir);
7140 if (!map) {
7141 /* Don't panic here, we expect map_sg users
7142 to do proper error handling. */
7143 swiotlb_full(hwdev, sg->length, dir, 0);
7144 - swiotlb_unmap_sg(hwdev, sg - i, i, dir);
7145 - sg[0].dma_length = 0;
7146 + swiotlb_unmap_sg(hwdev, sgl, i, dir);
7147 + sgl[0].dma_length = 0;
7148 return 0;
7149 }
7150 sg->dma_address = virt_to_bus(map);
7151 @@ -621,19 +622,21 @@
7152 * concerning calls here are the same as for swiotlb_unmap_single() above.
7153 */
7154 void
7155 -swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
7156 +swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
7157 int dir)
7158 {
7159 + struct scatterlist *sg;
7160 int i;
7161
7162 BUG_ON(dir == DMA_NONE);
7163
7164 - for (i = 0; i < nelems; i++, sg++)
7165 + for_each_sg(sgl, sg, nelems, i) {
7166 if (in_swiotlb_aperture(sg->dma_address))
7167 unmap_single(hwdev, bus_to_virt(sg->dma_address),
7168 sg->dma_length, dir);
7169 else
7170 gnttab_dma_unmap_page(sg->dma_address);
7171 + }
7172 }
7173
7174 /*
7175 @@ -644,31 +647,35 @@
7176 * and usage.
7177 */
7178 void
7179 -swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
7180 +swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sgl,
7181 int nelems, int dir)
7182 {
7183 + struct scatterlist *sg;
7184 int i;
7185
7186 BUG_ON(dir == DMA_NONE);
7187
7188 - for (i = 0; i < nelems; i++, sg++)
7189 + for_each_sg(sgl, sg, nelems, i) {
7190 if (in_swiotlb_aperture(sg->dma_address))
7191 sync_single(hwdev, bus_to_virt(sg->dma_address),
7192 sg->dma_length, dir);
7193 + }
7194 }
7195
7196 void
7197 -swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
7198 +swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sgl,
7199 int nelems, int dir)
7200 {
7201 + struct scatterlist *sg;
7202 int i;
7203
7204 BUG_ON(dir == DMA_NONE);
7205
7206 - for (i = 0; i < nelems; i++, sg++)
7207 + for_each_sg(sgl, sg, nelems, i) {
7208 if (in_swiotlb_aperture(sg->dma_address))
7209 sync_single(hwdev, bus_to_virt(sg->dma_address),
7210 sg->dma_length, dir);
7211 + }
7212 }
7213
7214 #ifdef CONFIG_HIGHMEM
7215 --- a/mm/memory.c
7216 +++ b/mm/memory.c
7217 @@ -408,7 +408,7 @@
7218 return NULL;
7219 #endif
7220
7221 -#ifdef CONFIG_DEBUG_VM
7222 +#if defined(CONFIG_DEBUG_VM) || defined(CONFIG_XEN)
7223 /*
7224 * Add some anal sanity checks for now. Eventually,
7225 * we should just do "return pfn_to_page(pfn)", but