Magellan Linux

Contents of /trunk/kernel26-xen/patches-2.6.25-r1/1020-2.6.25-xen-patch-2.6.19.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 611 - (show annotations) (download)
Fri May 23 18:49:01 2008 UTC (15 years, 11 months ago) by niro
File size: 335709 byte(s)
-fixed patch

1 diff -Naur linux-2.6.25/arch/x86/ia32/ia32entry-xen.S linux-2.6.25-xen/arch/x86/ia32/ia32entry-xen.S
2 --- linux-2.6.25/arch/x86/ia32/ia32entry-xen.S 2008-05-23 20:51:11.000000000 +0200
3 +++ linux-2.6.25-xen/arch/x86/ia32/ia32entry-xen.S 2008-05-23 20:39:03.000000000 +0200
4 @@ -83,6 +83,7 @@
5 */
6 ENTRY(ia32_sysenter_target)
7 CFI_STARTPROC32 simple
8 + CFI_SIGNAL_FRAME
9 CFI_DEF_CFA rsp,SS+8-RIP+16
10 /*CFI_REL_OFFSET ss,SS-RIP+16*/
11 CFI_REL_OFFSET rsp,RSP-RIP+16
12 @@ -164,6 +165,7 @@
13 */
14 ENTRY(ia32_cstar_target)
15 CFI_STARTPROC32 simple
16 + CFI_SIGNAL_FRAME
17 CFI_DEF_CFA rsp,SS+8-RIP+16
18 /*CFI_REL_OFFSET ss,SS-RIP+16*/
19 CFI_REL_OFFSET rsp,RSP-RIP+16
20 @@ -243,6 +245,7 @@
21
22 ENTRY(ia32_syscall)
23 CFI_STARTPROC simple
24 + CFI_SIGNAL_FRAME
25 CFI_DEF_CFA rsp,SS+8-RIP+16
26 /*CFI_REL_OFFSET ss,SS-RIP+16*/
27 CFI_REL_OFFSET rsp,RSP-RIP+16
28 @@ -320,6 +323,7 @@
29 popq %r11
30 CFI_ENDPROC
31 CFI_STARTPROC32 simple
32 + CFI_SIGNAL_FRAME
33 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
34 CFI_REL_OFFSET rax,RAX-ARGOFFSET
35 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
36 @@ -653,8 +657,8 @@
37 .quad sys_readlinkat /* 305 */
38 .quad sys_fchmodat
39 .quad sys_faccessat
40 - .quad quiet_ni_syscall /* pselect6 for now */
41 - .quad quiet_ni_syscall /* ppoll for now */
42 + .quad compat_sys_pselect6
43 + .quad compat_sys_ppoll
44 .quad sys_unshare /* 310 */
45 .quad compat_sys_set_robust_list
46 .quad compat_sys_get_robust_list
47 @@ -663,4 +667,5 @@
48 .quad sys_tee
49 .quad compat_sys_vmsplice
50 .quad compat_sys_move_pages
51 + .quad sys_getcpu
52 ia32_syscall_end:
53 diff -Naur linux-2.6.25/arch/x86/Kconfig linux-2.6.25-xen/arch/x86/Kconfig
54 --- linux-2.6.25/arch/x86/Kconfig 2008-05-23 20:51:22.000000000 +0200
55 +++ linux-2.6.25-xen/arch/x86/Kconfig 2008-05-23 20:39:03.000000000 +0200
56 @@ -390,6 +390,7 @@
57
58 menuconfig PARAVIRT_GUEST
59 bool "Paravirtualized guest support"
60 + depends on !X86_XEN && !X86_64_XEN
61 help
62 Say Y here to get to see options related to running Linux under
63 various hypervisors. This option alone does not add any kernel code.
64 diff -Naur linux-2.6.25/arch/x86/kernel/apic_32-xen.c linux-2.6.25-xen/arch/x86/kernel/apic_32-xen.c
65 --- linux-2.6.25/arch/x86/kernel/apic_32-xen.c 2008-05-23 20:51:11.000000000 +0200
66 +++ linux-2.6.25-xen/arch/x86/kernel/apic_32-xen.c 2008-05-23 20:39:03.000000000 +0200
67 @@ -54,7 +54,6 @@
68 /*
69 * Knob to control our willingness to enable the local APIC.
70 */
71 -int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
72
73 /*
74 * Debug level
75 @@ -102,7 +101,7 @@
76
77 #ifndef CONFIG_XEN
78 #ifndef CONFIG_SMP
79 -static void up_apic_timer_interrupt_call(struct pt_regs *regs)
80 +static void up_apic_timer_interrupt_call(void)
81 {
82 int cpu = smp_processor_id();
83
84 @@ -111,11 +110,11 @@
85 */
86 per_cpu(irq_stat, cpu).apic_timer_irqs++;
87
88 - smp_local_timer_interrupt(regs);
89 + smp_local_timer_interrupt();
90 }
91 #endif
92
93 -void smp_send_timer_broadcast_ipi(struct pt_regs *regs)
94 +void smp_send_timer_broadcast_ipi(void)
95 {
96 cpumask_t mask;
97
98 @@ -128,7 +127,7 @@
99 * We can directly call the apic timer interrupt handler
100 * in UP case. Minus all irq related functions
101 */
102 - up_apic_timer_interrupt_call(regs);
103 + up_apic_timer_interrupt_call();
104 #endif
105 }
106 }
107 diff -Naur linux-2.6.25/arch/x86/kernel/apic_64-xen.c linux-2.6.25-xen/arch/x86/kernel/apic_64-xen.c
108 --- linux-2.6.25/arch/x86/kernel/apic_64-xen.c 2008-05-23 20:51:11.000000000 +0200
109 +++ linux-2.6.25-xen/arch/x86/kernel/apic_64-xen.c 2008-05-23 20:39:03.000000000 +0200
110 @@ -43,7 +43,7 @@
111 */
112 void ack_bad_irq(unsigned int irq)
113 {
114 - printk("unexpected IRQ trap at vector %02x\n", irq);
115 + printk("unexpected IRQ trap at irq %02x\n", irq);
116 /*
117 * Currently unexpected vectors happen only on SMP and APIC.
118 * We _must_ ack these because every local APIC has only N
119 @@ -62,19 +62,19 @@
120 return -EINVAL;
121 }
122
123 -void smp_local_timer_interrupt(struct pt_regs *regs)
124 +void smp_local_timer_interrupt(void)
125 {
126 - profile_tick(CPU_PROFILING, regs);
127 + profile_tick(CPU_PROFILING);
128 #ifndef CONFIG_XEN
129 #ifdef CONFIG_SMP
130 - update_process_times(user_mode(regs));
131 + update_process_times(user_mode(get_irq_regs()));
132 #endif
133 #endif
134 /*
135 * We take the 'long' return path, and there every subsystem
136 * grabs the appropriate locks (kernel lock/ irq lock).
137 *
138 - * we might want to decouple profiling from the 'long path',
139 + * We might want to decouple profiling from the 'long path',
140 * and do the profiling totally in assembly.
141 *
142 * Currently this isn't too much of an issue (performance wise),
143 @@ -92,6 +92,8 @@
144 */
145 void smp_apic_timer_interrupt(struct pt_regs *regs)
146 {
147 + struct pt_regs *old_regs = set_irq_regs(regs);
148 +
149 /*
150 * the NMI deadlock-detector uses this.
151 */
152 @@ -109,8 +111,9 @@
153 */
154 exit_idle();
155 irq_enter();
156 - smp_local_timer_interrupt(regs);
157 + smp_local_timer_interrupt();
158 irq_exit();
159 + set_irq_regs(old_regs);
160 }
161
162 /*
163 @@ -188,9 +191,8 @@
164 int __init APIC_init_uniprocessor (void)
165 {
166 #ifdef CONFIG_X86_IO_APIC
167 - if (smp_found_config)
168 - if (!skip_ioapic_setup && nr_ioapics)
169 - setup_IO_APIC();
170 + if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
171 + setup_IO_APIC();
172 #endif
173
174 return 1;
175 diff -Naur linux-2.6.25/arch/x86/kernel/cpu/common-xen.c linux-2.6.25-xen/arch/x86/kernel/cpu/common-xen.c
176 --- linux-2.6.25/arch/x86/kernel/cpu/common-xen.c 2008-05-23 20:51:11.000000000 +0200
177 +++ linux-2.6.25-xen/arch/x86/kernel/cpu/common-xen.c 2008-05-23 20:39:03.000000000 +0200
178 @@ -43,7 +43,7 @@
179
180 extern int disable_pse;
181
182 -static void default_init(struct cpuinfo_x86 * c)
183 +static void __cpuinit default_init(struct cpuinfo_x86 * c)
184 {
185 /* Not much we can do here... */
186 /* Check if at least it has cpuid */
187 @@ -56,7 +56,7 @@
188 }
189 }
190
191 -static struct cpu_dev default_cpu = {
192 +static struct cpu_dev __cpuinitdata default_cpu = {
193 .c_init = default_init,
194 .c_vendor = "Unknown",
195 };
196 @@ -191,7 +191,16 @@
197
198 static int __init x86_fxsr_setup(char * s)
199 {
200 + /* Tell all the other CPU's to not use it... */
201 disable_x86_fxsr = 1;
202 +
203 + /*
204 + * ... and clear the bits early in the boot_cpu_data
205 + * so that the bootup process doesn't try to do this
206 + * either.
207 + */
208 + clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability);
209 + clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability);
210 return 1;
211 }
212 __setup("nofxsr", x86_fxsr_setup);
213 @@ -272,7 +281,7 @@
214 }
215 }
216
217 -void __cpuinit generic_identify(struct cpuinfo_x86 * c)
218 +static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
219 {
220 u32 tfms, xlvl;
221 int ebx;
222 @@ -698,8 +707,7 @@
223 */
224 atomic_inc(&init_mm.mm_count);
225 current->active_mm = &init_mm;
226 - if (current->mm)
227 - BUG();
228 + BUG_ON(current->mm);
229 enter_lazy_tlb(&init_mm, current);
230
231 load_esp0(t, thread);
232 @@ -712,7 +720,7 @@
233 #endif
234
235 /* Clear %fs and %gs. */
236 - asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
237 + asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
238
239 /* Clear all 6 debug registers: */
240 set_debugreg(0, 0);
241 diff -Naur linux-2.6.25/arch/x86/kernel/e820_64-xen.c linux-2.6.25-xen/arch/x86/kernel/e820_64-xen.c
242 --- linux-2.6.25/arch/x86/kernel/e820_64-xen.c 2008-05-23 20:51:11.000000000 +0200
243 +++ linux-2.6.25-xen/arch/x86/kernel/e820_64-xen.c 2008-05-23 20:39:03.000000000 +0200
244 @@ -16,6 +16,7 @@
245 #include <linux/string.h>
246 #include <linux/kexec.h>
247 #include <linux/module.h>
248 +#include <linux/mm.h>
249
250 #include <asm/pgtable.h>
251 #include <asm/page.h>
252 @@ -25,6 +26,11 @@
253 #include <asm/sections.h>
254 #include <xen/interface/memory.h>
255
256 +struct e820map e820 __initdata;
257 +#ifdef CONFIG_XEN
258 +struct e820map machine_e820 __initdata;
259 +#endif
260 +
261 /*
262 * PFN of last memory page.
263 */
264 @@ -41,7 +47,7 @@
265 /*
266 * Last pfn which the user wants to use.
267 */
268 -unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;
269 +static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
270
271 extern struct resource code_resource, data_resource;
272
273 @@ -53,13 +59,13 @@
274 #ifndef CONFIG_XEN
275 /* various gunk below that needed for SMP startup */
276 if (addr < 0x8000) {
277 - *addrp = 0x8000;
278 + *addrp = PAGE_ALIGN(0x8000);
279 return 1;
280 }
281
282 /* direct mapping tables of the kernel */
283 if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
284 - *addrp = table_end << PAGE_SHIFT;
285 + *addrp = PAGE_ALIGN(table_end << PAGE_SHIFT);
286 return 1;
287 }
288
289 @@ -67,23 +73,18 @@
290 #ifdef CONFIG_BLK_DEV_INITRD
291 if (LOADER_TYPE && INITRD_START && last >= INITRD_START &&
292 addr < INITRD_START+INITRD_SIZE) {
293 - *addrp = INITRD_START + INITRD_SIZE;
294 + *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE);
295 return 1;
296 }
297 #endif
298 - /* kernel code + 640k memory hole (later should not be needed, but
299 - be paranoid for now) */
300 - if (last >= 640*1024 && addr < 1024*1024) {
301 - *addrp = 1024*1024;
302 - return 1;
303 - }
304 - if (last >= __pa_symbol(&_text) && last < __pa_symbol(&_end)) {
305 - *addrp = __pa_symbol(&_end);
306 + /* kernel code */
307 + if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
308 + *addrp = PAGE_ALIGN(__pa_symbol(&_end));
309 return 1;
310 }
311
312 if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
313 - *addrp = ebda_addr + ebda_size;
314 + *addrp = PAGE_ALIGN(ebda_addr + ebda_size);
315 return 1;
316 }
317
318 @@ -141,8 +142,6 @@
319 for (i = 0; i < e820.nr_map; i++) {
320 struct e820entry *ei = &e820.map[i];
321 #else
322 - extern struct e820map machine_e820;
323 -
324 if (!is_initial_xendomain())
325 return 0;
326 for (i = 0; i < machine_e820.nr_map; i++) {
327 @@ -184,7 +183,7 @@
328 continue;
329 while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
330 ;
331 - last = addr + size;
332 + last = PAGE_ALIGN(addr) + size;
333 if (last > ei->addr + ei->size)
334 continue;
335 if (last > end)
336 @@ -194,59 +193,14 @@
337 return -1UL;
338 }
339
340 -/*
341 - * Free bootmem based on the e820 table for a node.
342 - */
343 -void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end)
344 -{
345 - int i;
346 - for (i = 0; i < e820.nr_map; i++) {
347 - struct e820entry *ei = &e820.map[i];
348 - unsigned long last, addr;
349 -
350 - if (ei->type != E820_RAM ||
351 - ei->addr+ei->size <= start ||
352 - ei->addr >= end)
353 - continue;
354 -
355 - addr = round_up(ei->addr, PAGE_SIZE);
356 - if (addr < start)
357 - addr = start;
358 -
359 - last = round_down(ei->addr + ei->size, PAGE_SIZE);
360 - if (last >= end)
361 - last = end;
362 -
363 - if (last > addr && last-addr >= PAGE_SIZE)
364 - free_bootmem_node(pgdat, addr, last-addr);
365 - }
366 -}
367 -
368 /*
369 * Find the highest page frame number we have available
370 */
371 unsigned long __init e820_end_of_ram(void)
372 {
373 - int i;
374 unsigned long end_pfn = 0;
375 + end_pfn = find_max_pfn_with_active_regions();
376
377 - for (i = 0; i < e820.nr_map; i++) {
378 - struct e820entry *ei = &e820.map[i];
379 - unsigned long start, end;
380 -
381 - start = round_up(ei->addr, PAGE_SIZE);
382 - end = round_down(ei->addr + ei->size, PAGE_SIZE);
383 - if (start >= end)
384 - continue;
385 - if (ei->type == E820_RAM) {
386 - if (end > end_pfn<<PAGE_SHIFT)
387 - end_pfn = end>>PAGE_SHIFT;
388 - } else {
389 - if (end > end_pfn_map<<PAGE_SHIFT)
390 - end_pfn_map = end>>PAGE_SHIFT;
391 - }
392 - }
393 -
394 if (end_pfn > end_pfn_map)
395 end_pfn_map = end_pfn;
396 if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
397 @@ -256,43 +210,10 @@
398 if (end_pfn > end_pfn_map)
399 end_pfn = end_pfn_map;
400
401 + printk("end_pfn_map = %lu\n", end_pfn_map);
402 return end_pfn;
403 }
404
405 -/*
406 - * Compute how much memory is missing in a range.
407 - * Unlike the other functions in this file the arguments are in page numbers.
408 - */
409 -unsigned long __init
410 -e820_hole_size(unsigned long start_pfn, unsigned long end_pfn)
411 -{
412 - unsigned long ram = 0;
413 - unsigned long start = start_pfn << PAGE_SHIFT;
414 - unsigned long end = end_pfn << PAGE_SHIFT;
415 - int i;
416 - for (i = 0; i < e820.nr_map; i++) {
417 - struct e820entry *ei = &e820.map[i];
418 - unsigned long last, addr;
419 -
420 - if (ei->type != E820_RAM ||
421 - ei->addr+ei->size <= start ||
422 - ei->addr >= end)
423 - continue;
424 -
425 - addr = round_up(ei->addr, PAGE_SIZE);
426 - if (addr < start)
427 - addr = start;
428 -
429 - last = round_down(ei->addr + ei->size, PAGE_SIZE);
430 - if (last >= end)
431 - last = end;
432 -
433 - if (last > addr)
434 - ram += last - addr;
435 - }
436 - return ((end - start) - ram) >> PAGE_SHIFT;
437 -}
438 -
439 /*
440 * Mark e820 reserved areas as busy for the resource manager.
441 */
442 @@ -333,6 +254,98 @@
443 }
444 }
445
446 +#ifndef CONFIG_XEN
447 +/* Mark pages corresponding to given address range as nosave */
448 +static void __init
449 +e820_mark_nosave_range(unsigned long start, unsigned long end)
450 +{
451 + unsigned long pfn, max_pfn;
452 +
453 + if (start >= end)
454 + return;
455 +
456 + printk("Nosave address range: %016lx - %016lx\n", start, end);
457 + max_pfn = end >> PAGE_SHIFT;
458 + for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
459 + if (pfn_valid(pfn))
460 + SetPageNosave(pfn_to_page(pfn));
461 +}
462 +
463 +/*
464 + * Find the ranges of physical addresses that do not correspond to
465 + * e820 RAM areas and mark the corresponding pages as nosave for software
466 + * suspend and suspend to RAM.
467 + *
468 + * This function requires the e820 map to be sorted and without any
469 + * overlapping entries and assumes the first e820 area to be RAM.
470 + */
471 +void __init e820_mark_nosave_regions(void)
472 +{
473 + int i;
474 + unsigned long paddr;
475 +
476 + paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
477 + for (i = 1; i < e820.nr_map; i++) {
478 + struct e820entry *ei = &e820.map[i];
479 +
480 + if (paddr < ei->addr)
481 + e820_mark_nosave_range(paddr,
482 + round_up(ei->addr, PAGE_SIZE));
483 +
484 + paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
485 + if (ei->type != E820_RAM)
486 + e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE),
487 + paddr);
488 +
489 + if (paddr >= (end_pfn << PAGE_SHIFT))
490 + break;
491 + }
492 +}
493 +#endif
494 +
495 +/* Walk the e820 map and register active regions within a node */
496 +void __init
497 +e820_register_active_regions(int nid, unsigned long start_pfn,
498 + unsigned long end_pfn)
499 +{
500 + int i;
501 + unsigned long ei_startpfn, ei_endpfn;
502 + for (i = 0; i < e820.nr_map; i++) {
503 + struct e820entry *ei = &e820.map[i];
504 + ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
505 + ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE)
506 + >> PAGE_SHIFT;
507 +
508 + /* Skip map entries smaller than a page */
509 + if (ei_startpfn >= ei_endpfn)
510 + continue;
511 +
512 + /* Check if end_pfn_map should be updated */
513 + if (ei->type != E820_RAM && ei_endpfn > end_pfn_map)
514 + end_pfn_map = ei_endpfn;
515 +
516 + /* Skip if map is outside the node */
517 + if (ei->type != E820_RAM ||
518 + ei_endpfn <= start_pfn ||
519 + ei_startpfn >= end_pfn)
520 + continue;
521 +
522 + /* Check for overlaps */
523 + if (ei_startpfn < start_pfn)
524 + ei_startpfn = start_pfn;
525 + if (ei_endpfn > end_pfn)
526 + ei_endpfn = end_pfn;
527 +
528 + /* Obey end_user_pfn to save on memmap */
529 + if (ei_startpfn >= end_user_pfn)
530 + continue;
531 + if (ei_endpfn > end_user_pfn)
532 + ei_endpfn = end_user_pfn;
533 +
534 + add_active_range(nid, ei_startpfn, ei_endpfn);
535 + }
536 +}
537 +
538 /*
539 * Add a memory region to the kernel e820 map.
540 */
541 @@ -553,13 +566,6 @@
542 * If we're lucky and live on a modern system, the setup code
543 * will have given us a memory map that we can use to properly
544 * set up memory. If we aren't, we'll fake a memory map.
545 - *
546 - * We check to see that the memory map contains at least 2 elements
547 - * before we'll use it, because the detection code in setup.S may
548 - * not be perfect and most every PC known to man has two memory
549 - * regions: one from 0 to 640k, and one from 1mb up. (The IBM
550 - * thinkpad 560x, for example, does not cooperate with the memory
551 - * detection code.)
552 */
553 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
554 {
555 @@ -581,37 +587,20 @@
556 if (start > end)
557 return -1;
558
559 -#ifndef CONFIG_XEN
560 - /*
561 - * Some BIOSes claim RAM in the 640k - 1M region.
562 - * Not right. Fix it up.
563 - *
564 - * This should be removed on Hammer which is supposed to not
565 - * have non e820 covered ISA mappings there, but I had some strange
566 - * problems so it stays for now. -AK
567 - */
568 - if (type == E820_RAM) {
569 - if (start < 0x100000ULL && end > 0xA0000ULL) {
570 - if (start < 0xA0000ULL)
571 - add_memory_region(start, 0xA0000ULL-start, type);
572 - if (end <= 0x100000ULL)
573 - continue;
574 - start = 0x100000ULL;
575 - size = end - start;
576 - }
577 - }
578 -#endif
579 -
580 add_memory_region(start, size, type);
581 } while (biosmap++,--nr_map);
582 return 0;
583 }
584
585 +void early_panic(char *msg)
586 +{
587 + early_printk(msg);
588 + panic(msg);
589 +}
590 +
591 #ifndef CONFIG_XEN
592 void __init setup_memory_region(void)
593 {
594 - char *who = "BIOS-e820";
595 -
596 /*
597 * Try to copy the BIOS-supplied E820-map.
598 *
599 @@ -619,24 +608,10 @@
600 * the next section from 1mb->appropriate_mem_k
601 */
602 sanitize_e820_map(E820_MAP, &E820_MAP_NR);
603 - if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
604 - unsigned long mem_size;
605 -
606 - /* compare results from other methods and take the greater */
607 - if (ALT_MEM_K < EXT_MEM_K) {
608 - mem_size = EXT_MEM_K;
609 - who = "BIOS-88";
610 - } else {
611 - mem_size = ALT_MEM_K;
612 - who = "BIOS-e801";
613 - }
614 -
615 - e820.nr_map = 0;
616 - add_memory_region(0, LOWMEMSIZE(), E820_RAM);
617 - add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
618 - }
619 + if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0)
620 + early_panic("Cannot find a valid memory map");
621 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
622 - e820_print_map(who);
623 + e820_print_map("BIOS-e820");
624 }
625
626 #else /* CONFIG_XEN */
627 @@ -668,20 +643,23 @@
628
629 sanitize_e820_map(map, (char *)&memmap.nr_entries);
630
631 - BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0);
632 + if (copy_e820_map(map, (char)memmap.nr_entries) < 0)
633 + early_panic("Cannot find a valid memory map");
634
635 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
636 e820_print_map("Xen");
637 }
638 #endif
639
640 -void __init parse_memopt(char *p, char **from)
641 -{
642 +static int __init parse_memopt(char *p)
643 +{
644 int i;
645 unsigned long current_end;
646 unsigned long end;
647
648 - end_user_pfn = memparse(p, from);
649 + if (!p)
650 + return -EINVAL;
651 + end_user_pfn = memparse(p, &p);
652 end_user_pfn >>= PAGE_SHIFT;
653
654 end = end_user_pfn<<PAGE_SHIFT;
655 @@ -698,27 +676,61 @@
656 else
657 add_memory_region(current_end, end - current_end, E820_RAM);
658 }
659 +
660 + return 0;
661 }
662 +early_param("mem", parse_memopt);
663 +
664 +static int userdef __initdata;
665
666 -void __init parse_memmapopt(char *p, char **from)
667 +static int __init parse_memmap_opt(char *p)
668 {
669 + char *oldp;
670 unsigned long long start_at, mem_size;
671
672 - mem_size = memparse(p, from);
673 - p = *from;
674 + if (!strcmp(p, "exactmap")) {
675 +#ifdef CONFIG_CRASH_DUMP
676 + /* If we are doing a crash dump, we
677 + * still need to know the real mem
678 + * size before original memory map is
679 + * reset.
680 + */
681 + e820_register_active_regions(0, 0, -1UL);
682 + saved_max_pfn = e820_end_of_ram();
683 + remove_all_active_ranges();
684 +#endif
685 + end_pfn_map = 0;
686 + e820.nr_map = 0;
687 + userdef = 1;
688 + return 0;
689 + }
690 +
691 + oldp = p;
692 + mem_size = memparse(p, &p);
693 + if (p == oldp)
694 + return -EINVAL;
695 if (*p == '@') {
696 - start_at = memparse(p+1, from);
697 + start_at = memparse(p+1, &p);
698 add_memory_region(start_at, mem_size, E820_RAM);
699 } else if (*p == '#') {
700 - start_at = memparse(p+1, from);
701 + start_at = memparse(p+1, &p);
702 add_memory_region(start_at, mem_size, E820_ACPI);
703 } else if (*p == '$') {
704 - start_at = memparse(p+1, from);
705 + start_at = memparse(p+1, &p);
706 add_memory_region(start_at, mem_size, E820_RESERVED);
707 } else {
708 end_user_pfn = (mem_size >> PAGE_SHIFT);
709 }
710 - p = *from;
711 + return *p == '\0' ? 0 : -EINVAL;
712 +}
713 +early_param("memmap", parse_memmap_opt);
714 +
715 +void finish_e820_parsing(void)
716 +{
717 + if (userdef) {
718 + printk(KERN_INFO "user-defined physical RAM map:\n");
719 + e820_print_map("user");
720 + }
721 }
722
723 unsigned long pci_mem_start = 0xaeedbabe;
724 diff -Naur linux-2.6.25/arch/x86/kernel/early_printk-xen.c linux-2.6.25-xen/arch/x86/kernel/early_printk-xen.c
725 --- linux-2.6.25/arch/x86/kernel/early_printk-xen.c 2008-05-23 20:51:11.000000000 +0200
726 +++ linux-2.6.25-xen/arch/x86/kernel/early_printk-xen.c 2008-05-23 20:39:03.000000000 +0200
727 @@ -244,20 +244,16 @@
728
729 static int __initdata keep_early;
730
731 -int __init setup_early_printk(char *opt)
732 +static int __init setup_early_printk(char *buf)
733 {
734 - char *space;
735 - char buf[256];
736 + if (!buf)
737 + return 0;
738
739 if (early_console_initialized)
740 - return 1;
741 -
742 - strlcpy(buf,opt,sizeof(buf));
743 - space = strchr(buf, ' ');
744 - if (space)
745 - *space = 0;
746 + return 0;
747 + early_console_initialized = 1;
748
749 - if (strstr(buf,"keep"))
750 + if (strstr(buf, "keep"))
751 keep_early = 1;
752
753 if (!strncmp(buf, "serial", 6)) {
754 @@ -281,11 +277,12 @@
755 early_console = &simnow_console;
756 keep_early = 1;
757 }
758 - early_console_initialized = 1;
759 register_console(early_console);
760 return 0;
761 }
762
763 +early_param("earlyprintk", setup_early_printk);
764 +
765 void __init disable_early_printk(void)
766 {
767 if (!early_console_initialized || !early_console)
768 @@ -299,4 +296,3 @@
769 }
770 }
771
772 -__setup("earlyprintk=", setup_early_printk);
773 diff -Naur linux-2.6.25/arch/x86/kernel/entry_32-xen.S linux-2.6.25-xen/arch/x86/kernel/entry_32-xen.S
774 --- linux-2.6.25/arch/x86/kernel/entry_32-xen.S 2008-05-23 20:51:22.000000000 +0200
775 +++ linux-2.6.25-xen/arch/x86/kernel/entry_32-xen.S 2008-05-23 20:39:03.000000000 +0200
776 @@ -80,8 +80,12 @@
777 NMI_MASK = 0x80000000
778
779 #ifndef CONFIG_XEN
780 -#define DISABLE_INTERRUPTS cli
781 -#define ENABLE_INTERRUPTS sti
782 +/* These are replaces for paravirtualization */
783 +#define DISABLE_INTERRUPTS cli
784 +#define ENABLE_INTERRUPTS sti
785 +#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
786 +#define INTERRUPT_RETURN iret
787 +#define GET_CR0_INTO_EAX movl %cr0, %eax
788 #else
789 /* Offsets into shared_info_t. */
790 #define evtchn_upcall_pending /* 0 */
791 @@ -99,15 +103,29 @@
792
793 #define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
794 #define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
795 +#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
796 #define DISABLE_INTERRUPTS GET_VCPU_INFO ; \
797 __DISABLE_INTERRUPTS
798 #define ENABLE_INTERRUPTS GET_VCPU_INFO ; \
799 __ENABLE_INTERRUPTS
800 -#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
801 +#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \
802 +sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \
803 + __TEST_PENDING ; \
804 + jnz 14f # process more events if necessary... ; \
805 + movl ESI(%esp), %esi ; \
806 + sysexit ; \
807 +14: __DISABLE_INTERRUPTS ; \
808 + TRACE_IRQS_OFF ; \
809 +sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \
810 + push %esp ; \
811 + call evtchn_do_upcall ; \
812 + add $4,%esp ; \
813 + jmp ret_from_intr
814 +#define INTERRUPT_RETURN iret
815 #endif
816
817 #ifdef CONFIG_PREEMPT
818 -#define preempt_stop cli; TRACE_IRQS_OFF
819 +#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
820 #else
821 #define preempt_stop
822 #define resume_kernel restore_nocheck
823 @@ -206,18 +224,21 @@
824
825 #define RING0_INT_FRAME \
826 CFI_STARTPROC simple;\
827 + CFI_SIGNAL_FRAME;\
828 CFI_DEF_CFA esp, 3*4;\
829 /*CFI_OFFSET cs, -2*4;*/\
830 CFI_OFFSET eip, -3*4
831
832 #define RING0_EC_FRAME \
833 CFI_STARTPROC simple;\
834 + CFI_SIGNAL_FRAME;\
835 CFI_DEF_CFA esp, 4*4;\
836 /*CFI_OFFSET cs, -2*4;*/\
837 CFI_OFFSET eip, -3*4
838
839 #define RING0_PTREGS_FRAME \
840 CFI_STARTPROC simple;\
841 + CFI_SIGNAL_FRAME;\
842 CFI_DEF_CFA esp, OLDESP-EBX;\
843 /*CFI_OFFSET cs, CS-OLDESP;*/\
844 CFI_OFFSET eip, EIP-OLDESP;\
845 @@ -263,8 +284,9 @@
846 check_userspace:
847 movl EFLAGS(%esp), %eax # mix EFLAGS and CS
848 movb CS(%esp), %al
849 - testl $(VM_MASK | 2), %eax
850 - jz resume_kernel
851 + andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
852 + cmpl $USER_RPL, %eax
853 + jb resume_kernel # not returning to v8086 or userspace
854 ENTRY(resume_userspace)
855 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
856 # setting need_resched or sigpending
857 @@ -277,7 +299,7 @@
858
859 #ifdef CONFIG_PREEMPT
860 ENTRY(resume_kernel)
861 - cli
862 + DISABLE_INTERRUPTS
863 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
864 jnz restore_nocheck
865 need_resched:
866 @@ -297,6 +319,7 @@
867 # sysenter call handler stub
868 ENTRY(sysenter_entry)
869 CFI_STARTPROC simple
870 + CFI_SIGNAL_FRAME
871 CFI_DEF_CFA esp, 0
872 CFI_REGISTER esp, ebp
873 movl SYSENTER_stack_esp0(%esp),%esp
874 @@ -305,7 +328,7 @@
875 * No need to follow this irqs on/off section: the syscall
876 * disabled irqs and here we enable it straight after entry:
877 */
878 - sti
879 + ENABLE_INTERRUPTS
880 pushl $(__USER_DS)
881 CFI_ADJUST_CFA_OFFSET 4
882 /*CFI_REL_OFFSET ss, 0*/
883 @@ -359,26 +382,8 @@
884 movl EIP(%esp), %edx
885 movl OLDESP(%esp), %ecx
886 xorl %ebp,%ebp
887 -#ifdef CONFIG_XEN
888 TRACE_IRQS_ON
889 - __ENABLE_INTERRUPTS
890 -sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/
891 - __TEST_PENDING
892 - jnz 14f # process more events if necessary...
893 - movl ESI(%esp), %esi
894 - sysexit
895 -14: __DISABLE_INTERRUPTS
896 - TRACE_IRQS_OFF
897 -sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/
898 - push %esp
899 - call evtchn_do_upcall
900 - add $4,%esp
901 - jmp ret_from_intr
902 -#else
903 - TRACE_IRQS_ON
904 - sti
905 - sysexit
906 -#endif /* !CONFIG_XEN */
907 + ENABLE_INTERRUPTS_SYSEXIT
908 CFI_ENDPROC
909
910 # pv sysenter call handler stub
911 @@ -444,8 +449,8 @@
912 # See comments in process.c:copy_thread() for details.
913 movb OLDSS(%esp), %ah
914 movb CS(%esp), %al
915 - andl $(VM_MASK | (4 << 8) | 3), %eax
916 - cmpl $((4 << 8) | 3), %eax
917 + andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
918 + cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
919 CFI_REMEMBER_STATE
920 je ldt_ss # returning to user-space with LDT SS
921 restore_nocheck:
922 @@ -467,12 +472,11 @@
923 RESTORE_REGS
924 addl $4, %esp
925 CFI_ADJUST_CFA_OFFSET -4
926 -1: iret
927 +1: INTERRUPT_RETURN
928 .section .fixup,"ax"
929 iret_exc:
930 #ifndef CONFIG_XEN
931 - TRACE_IRQS_ON
932 - sti
933 + ENABLE_INTERRUPTS
934 #endif
935 pushl $0 # no error code
936 pushl $do_iret_error
937 @@ -498,7 +502,7 @@
938 * dosemu and wine happy. */
939 subl $8, %esp # reserve space for switch16 pointer
940 CFI_ADJUST_CFA_OFFSET 8
941 - cli
942 + DISABLE_INTERRUPTS
943 TRACE_IRQS_OFF
944 movl %esp, %eax
945 /* Set up the 16bit stack frame with switch32 pointer on top,
946 @@ -508,7 +512,7 @@
947 TRACE_IRQS_IRET
948 RESTORE_REGS
949 lss 20+4(%esp), %esp # switch to 16bit stack
950 -1: iret
951 +1: INTERRUPT_RETURN
952 .section __ex_table,"a"
953 .align 4
954 .long 1b,iret_exc
955 @@ -524,7 +528,7 @@
956 RESTORE_REGS
957 addl $4, %esp
958 CFI_ADJUST_CFA_OFFSET -4
959 -1: iret
960 +1: INTERRUPT_RETURN
961 .section __ex_table,"a"
962 .align 4
963 .long 1b,iret_exc
964 @@ -713,11 +717,9 @@
965 #define UNWIND_ESPFIX_STACK
966 #endif
967
968 -ENTRY(divide_error)
969 - RING0_INT_FRAME
970 - pushl $0 # no error code
971 - CFI_ADJUST_CFA_OFFSET 4
972 - pushl $do_divide_error
973 +KPROBE_ENTRY(page_fault)
974 + RING0_EC_FRAME
975 + pushl $do_page_fault
976 CFI_ADJUST_CFA_OFFSET 4
977 ALIGN
978 error_code:
979 @@ -767,6 +769,7 @@
980 call *%edi
981 jmp ret_from_exception
982 CFI_ENDPROC
983 +KPROBE_END(page_fault)
984
985 #ifdef CONFIG_XEN
986 # A note on the "critical region" in our callback handler.
987 @@ -926,7 +929,7 @@
988 CFI_ADJUST_CFA_OFFSET 4
989 SAVE_ALL
990 #ifndef CONFIG_XEN
991 - movl %cr0, %eax
992 + GET_CR0_INTO_EAX
993 testl $0x4, %eax # EM (math emulation bit)
994 je device_available_emulate
995 pushl $0 # temporary storage for ORIG_EIP
996 @@ -961,9 +964,15 @@
997 jne ok; \
998 label: \
999 movl SYSENTER_stack_esp0+offset(%esp),%esp; \
1000 + CFI_DEF_CFA esp, 0; \
1001 + CFI_UNDEFINED eip; \
1002 pushfl; \
1003 + CFI_ADJUST_CFA_OFFSET 4; \
1004 pushl $__KERNEL_CS; \
1005 - pushl $sysenter_past_esp
1006 + CFI_ADJUST_CFA_OFFSET 4; \
1007 + pushl $sysenter_past_esp; \
1008 + CFI_ADJUST_CFA_OFFSET 4; \
1009 + CFI_REL_OFFSET eip, 0
1010 #endif /* CONFIG_XEN */
1011
1012 KPROBE_ENTRY(debug)
1013 @@ -982,7 +991,8 @@
1014 call do_debug
1015 jmp ret_from_exception
1016 CFI_ENDPROC
1017 - .previous .text
1018 +KPROBE_END(debug)
1019 +
1020 #ifndef CONFIG_XEN
1021 /*
1022 * NMI is doubly nasty. It can happen _while_ we're handling
1023 @@ -992,7 +1002,7 @@
1024 * check whether we got an NMI on the debug path where the debug
1025 * fault happened on the sysenter path.
1026 */
1027 -ENTRY(nmi)
1028 +KPROBE_ENTRY(nmi)
1029 RING0_INT_FRAME
1030 pushl %eax
1031 CFI_ADJUST_CFA_OFFSET 4
1032 @@ -1017,6 +1027,7 @@
1033 cmpl $sysenter_entry,12(%esp)
1034 je nmi_debug_stack_check
1035 nmi_stack_correct:
1036 + /* We have a RING0_INT_FRAME here */
1037 pushl %eax
1038 CFI_ADJUST_CFA_OFFSET 4
1039 SAVE_ALL
1040 @@ -1027,9 +1038,12 @@
1041 CFI_ENDPROC
1042
1043 nmi_stack_fixup:
1044 + RING0_INT_FRAME
1045 FIX_STACK(12,nmi_stack_correct, 1)
1046 jmp nmi_stack_correct
1047 +
1048 nmi_debug_stack_check:
1049 + /* We have a RING0_INT_FRAME here */
1050 cmpw $__KERNEL_CS,16(%esp)
1051 jne nmi_stack_correct
1052 cmpl $debug,(%esp)
1053 @@ -1040,8 +1054,10 @@
1054 jmp nmi_stack_correct
1055
1056 nmi_16bit_stack:
1057 - RING0_INT_FRAME
1058 - /* create the pointer to lss back */
1059 + /* We have a RING0_INT_FRAME here.
1060 + *
1061 + * create the pointer to lss back
1062 + */
1063 pushl %ss
1064 CFI_ADJUST_CFA_OFFSET 4
1065 pushl %esp
1066 @@ -1062,14 +1078,14 @@
1067 call do_nmi
1068 RESTORE_REGS
1069 lss 12+4(%esp), %esp # back to 16bit stack
1070 -1: iret
1071 +1: INTERRUPT_RETURN
1072 CFI_ENDPROC
1073 .section __ex_table,"a"
1074 .align 4
1075 .long 1b,iret_exc
1076 .previous
1077 #else
1078 -ENTRY(nmi)
1079 +KPROBE_ENTRY(nmi)
1080 RING0_INT_FRAME
1081 pushl %eax
1082 CFI_ADJUST_CFA_OFFSET 4
1083 @@ -1081,6 +1097,7 @@
1084 jmp restore_all
1085 CFI_ENDPROC
1086 #endif
1087 +KPROBE_END(nmi)
1088
1089 KPROBE_ENTRY(int3)
1090 RING0_INT_FRAME
1091 @@ -1092,7 +1109,7 @@
1092 call do_int3
1093 jmp ret_from_exception
1094 CFI_ENDPROC
1095 - .previous .text
1096 +KPROBE_END(int3)
1097
1098 ENTRY(overflow)
1099 RING0_INT_FRAME
1100 @@ -1157,7 +1174,7 @@
1101 CFI_ADJUST_CFA_OFFSET 4
1102 jmp error_code
1103 CFI_ENDPROC
1104 - .previous .text
1105 +KPROBE_END(general_protection)
1106
1107 ENTRY(alignment_check)
1108 RING0_EC_FRAME
1109 @@ -1166,13 +1183,14 @@
1110 jmp error_code
1111 CFI_ENDPROC
1112
1113 -KPROBE_ENTRY(page_fault)
1114 - RING0_EC_FRAME
1115 - pushl $do_page_fault
1116 +ENTRY(divide_error)
1117 + RING0_INT_FRAME
1118 + pushl $0 # no error code
1119 + CFI_ADJUST_CFA_OFFSET 4
1120 + pushl $do_divide_error
1121 CFI_ADJUST_CFA_OFFSET 4
1122 jmp error_code
1123 CFI_ENDPROC
1124 - .previous .text
1125
1126 #ifdef CONFIG_X86_MCE
1127 ENTRY(machine_check)
1128 @@ -1234,6 +1252,19 @@
1129 jmp error_code
1130 CFI_ENDPROC
1131
1132 +ENTRY(kernel_thread_helper)
1133 + pushl $0 # fake return address for unwinder
1134 + CFI_STARTPROC
1135 + movl %edx,%eax
1136 + push %edx
1137 + CFI_ADJUST_CFA_OFFSET 4
1138 + call *%ebx
1139 + push %eax
1140 + CFI_ADJUST_CFA_OFFSET 4
1141 + call do_exit
1142 + CFI_ENDPROC
1143 +ENDPROC(kernel_thread_helper)
1144 +
1145 .section .rodata,"a"
1146 #include "syscall_table.S"
1147
1148 diff -Naur linux-2.6.25/arch/x86/kernel/entry_64-xen.S linux-2.6.25-xen/arch/x86/kernel/entry_64-xen.S
1149 --- linux-2.6.25/arch/x86/kernel/entry_64-xen.S 2008-05-23 20:51:11.000000000 +0200
1150 +++ linux-2.6.25-xen/arch/x86/kernel/entry_64-xen.S 2008-05-23 20:47:55.000000000 +0200
1151 @@ -4,9 +4,6 @@
1152 * Copyright (C) 1991, 1992 Linus Torvalds
1153 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
1154 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
1155 - *
1156 - * $Id: 1020-2.6.25-xen-patch-2.6.19.patch,v 1.2 2008-05-23 18:49:01 niro Exp $
1157 - *
1158 * Jun Nakajima <jun.nakajima@intel.com>
1159 * Asit Mallick <asit.k.mallick@intel.com>
1160 * Modified for Xen
1161 @@ -26,15 +23,25 @@
1162 * at the top of the kernel process stack.
1163 * - partial stack frame: partially saved registers upto R11.
1164 * - full stack frame: Like partial stack frame, but all register saved.
1165 - *
1166 - * TODO:
1167 - * - schedule it carefully for the final hardware.
1168 + *
1169 + * Some macro usage:
1170 + * - CFI macros are used to generate dwarf2 unwind information for better
1171 + * backtraces. They don't change any code.
1172 + * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
1173 + * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
1174 + * There are unfortunately lots of special cases where some registers
1175 + * not touched. The macro is a big mess that should be cleaned up.
1176 + * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
1177 + * Gives a full stack frame.
1178 + * - ENTRY/END Define functions in the symbol table.
1179 + * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
1180 + * frame that is otherwise undefined after a SYSCALL
1181 + * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
1182 + * - errorentry/paranoidentry/zeroentry - Define exception entry points.
1183 */
1184
1185 -#define ASSEMBLY 1
1186 #include <linux/linkage.h>
1187 #include <asm/segment.h>
1188 -#include <asm/smp.h>
1189 #include <asm/cache.h>
1190 #include <asm/errno.h>
1191 #include <asm/dwarf2.h>
1192 @@ -117,6 +124,7 @@
1193 .macro CFI_DEFAULT_STACK start=1,adj=0
1194 .if \start
1195 CFI_STARTPROC simple
1196 + CFI_SIGNAL_FRAME
1197 CFI_DEF_CFA rsp,SS+8-(\adj*ARGOFFSET)
1198 .else
1199 CFI_DEF_CFA_OFFSET SS+8-(\adj*ARGOFFSET)
1200 @@ -207,6 +215,7 @@
1201 */
1202 .macro _frame ref
1203 CFI_STARTPROC simple
1204 + CFI_SIGNAL_FRAME
1205 CFI_DEF_CFA rsp,SS+8-\ref
1206 /*CFI_REL_OFFSET ss,SS-\ref*/
1207 CFI_REL_OFFSET rsp,RSP-\ref
1208 @@ -255,6 +264,8 @@
1209 CFI_REMEMBER_STATE
1210 jnz tracesys
1211 cmpq $__NR_syscall_max,%rax
1212 + movq $-ENOSYS,%rcx
1213 + cmova %rcx,%rax
1214 ja badsys
1215 movq %r10,%rcx
1216 call *sys_call_table(,%rax,8) # XXX: rip relative
1217 @@ -349,6 +360,7 @@
1218 */
1219 ENTRY(int_ret_from_sys_call)
1220 CFI_STARTPROC simple
1221 + CFI_SIGNAL_FRAME
1222 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
1223 /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
1224 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
1225 @@ -583,8 +595,7 @@
1226 #ifdef CONFIG_PREEMPT
1227 /* Returning to kernel space. Check if we need preemption */
1228 /* rcx: threadinfo. interrupts off. */
1229 - .p2align
1230 -retint_kernel:
1231 +ENTRY(retint_kernel)
1232 cmpl $0,threadinfo_preempt_count(%rcx)
1233 jnz retint_restore_args
1234 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
1235 @@ -644,7 +655,6 @@
1236 END(call_function_interrupt)
1237 #endif
1238
1239 -#ifdef CONFIG_X86_LOCAL_APIC
1240 ENTRY(apic_timer_interrupt)
1241 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
1242 END(apic_timer_interrupt)
1243 @@ -656,7 +666,6 @@
1244 ENTRY(spurious_interrupt)
1245 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
1246 END(spurious_interrupt)
1247 -#endif
1248 #endif /* !CONFIG_XEN */
1249
1250 /*
1251 @@ -755,7 +764,9 @@
1252 testl $3,CS(%rsp)
1253 jnz paranoid_userspace\trace
1254 paranoid_swapgs\trace:
1255 + .if \trace
1256 TRACE_IRQS_IRETQ 0
1257 + .endif
1258 swapgs
1259 paranoid_restore\trace:
1260 RESTORE_ALL 8
1261 @@ -802,7 +813,7 @@
1262 * Exception entry point. This expects an error code/orig_rax on the stack
1263 * and the exception handler in %rax.
1264 */
1265 -ENTRY(error_entry)
1266 +KPROBE_ENTRY(error_entry)
1267 _frame RDI
1268 CFI_REL_OFFSET rax,0
1269 /* rdi slot contains rax, oldrax contains error code */
1270 @@ -896,7 +907,7 @@
1271 jmp error_sti
1272 #endif
1273 CFI_ENDPROC
1274 -END(error_entry)
1275 +KPROBE_END(error_entry)
1276
1277 ENTRY(hypervisor_callback)
1278 zeroentry do_hypervisor_callback
1279 @@ -936,26 +947,6 @@
1280 CFI_ENDPROC
1281 END(do_hypervisor_callback)
1282
1283 -#ifdef CONFIG_X86_LOCAL_APIC
1284 -KPROBE_ENTRY(nmi)
1285 - zeroentry do_nmi_callback
1286 -ENTRY(do_nmi_callback)
1287 - CFI_STARTPROC
1288 - addq $8, %rsp
1289 - CFI_ENDPROC
1290 - CFI_DEFAULT_STACK
1291 - call do_nmi
1292 - orl $NMI_MASK,EFLAGS(%rsp)
1293 - RESTORE_REST
1294 - XEN_BLOCK_EVENTS(%rsi)
1295 - TRACE_IRQS_OFF
1296 - GET_THREAD_INFO(%rcx)
1297 - jmp retint_restore_args
1298 - CFI_ENDPROC
1299 - .previous .text
1300 -END(nmi)
1301 -#endif
1302 -
1303 ALIGN
1304 restore_all_enable_events:
1305 CFI_DEFAULT_STACK adj=1
1306 @@ -1121,7 +1112,7 @@
1307 * do_sys_execve asm fallback arguments:
1308 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
1309 */
1310 -ENTRY(execve)
1311 +ENTRY(kernel_execve)
1312 CFI_STARTPROC
1313 FAKE_STACK_FRAME $0
1314 SAVE_ALL
1315 @@ -1135,12 +1126,11 @@
1316 UNFAKE_STACK_FRAME
1317 ret
1318 CFI_ENDPROC
1319 -ENDPROC(execve)
1320 +ENDPROC(kernel_execve)
1321
1322 KPROBE_ENTRY(page_fault)
1323 errorentry do_page_fault
1324 -END(page_fault)
1325 - .previous .text
1326 +KPROBE_END(page_fault)
1327
1328 ENTRY(coprocessor_error)
1329 zeroentry do_coprocessor_error
1330 @@ -1162,25 +1152,25 @@
1331 zeroentry do_debug
1332 /* paranoidexit
1333 CFI_ENDPROC */
1334 -END(debug)
1335 - .previous .text
1336 +KPROBE_END(debug)
1337
1338 -#if 0
1339 - /* runs on exception stack */
1340 KPROBE_ENTRY(nmi)
1341 - INTR_FRAME
1342 - pushq $-1
1343 - CFI_ADJUST_CFA_OFFSET 8
1344 - paranoidentry do_nmi, 0, 0
1345 -#ifdef CONFIG_TRACE_IRQFLAGS
1346 - paranoidexit 0
1347 -#else
1348 - jmp paranoid_exit1
1349 - CFI_ENDPROC
1350 -#endif
1351 -END(nmi)
1352 - .previous .text
1353 -#endif
1354 + zeroentry do_nmi_callback
1355 +KPROBE_END(nmi)
1356 +do_nmi_callback:
1357 + CFI_STARTPROC
1358 + addq $8, %rsp
1359 + CFI_ENDPROC
1360 + CFI_DEFAULT_STACK
1361 + call do_nmi
1362 + orl $NMI_MASK,EFLAGS(%rsp)
1363 + RESTORE_REST
1364 + XEN_BLOCK_EVENTS(%rsi)
1365 + TRACE_IRQS_OFF
1366 + GET_THREAD_INFO(%rcx)
1367 + jmp retint_restore_args
1368 + CFI_ENDPROC
1369 +END(do_nmi_callback)
1370
1371 KPROBE_ENTRY(int3)
1372 /* INTR_FRAME
1373 @@ -1189,8 +1179,7 @@
1374 zeroentry do_int3
1375 /* jmp paranoid_exit1
1376 CFI_ENDPROC */
1377 -END(int3)
1378 - .previous .text
1379 +KPROBE_END(int3)
1380
1381 ENTRY(overflow)
1382 zeroentry do_overflow
1383 @@ -1241,8 +1230,7 @@
1384
1385 KPROBE_ENTRY(general_protection)
1386 errorentry do_general_protection
1387 -END(general_protection)
1388 - .previous .text
1389 +KPROBE_END(general_protection)
1390
1391 ENTRY(alignment_check)
1392 errorentry do_alignment_check
1393 diff -Naur linux-2.6.25/arch/x86/kernel/genapic_xen_64.c linux-2.6.25-xen/arch/x86/kernel/genapic_xen_64.c
1394 --- linux-2.6.25/arch/x86/kernel/genapic_xen_64.c 2008-05-23 20:51:11.000000000 +0200
1395 +++ linux-2.6.25-xen/arch/x86/kernel/genapic_xen_64.c 2008-05-23 20:39:03.000000000 +0200
1396 @@ -71,6 +71,13 @@
1397 return cpu_online_map;
1398 }
1399
1400 +static cpumask_t xen_vector_allocation_domain(int cpu)
1401 +{
1402 + cpumask_t domain = CPU_MASK_NONE;
1403 + cpu_set(cpu, domain);
1404 + return domain;
1405 +}
1406 +
1407 /*
1408 * Set up the logical destination ID.
1409 * Do nothing, not called now.
1410 @@ -147,8 +154,8 @@
1411 .int_delivery_mode = dest_LowestPrio,
1412 #endif
1413 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
1414 - .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST,
1415 .target_cpus = xen_target_cpus,
1416 + .vector_allocation_domain = xen_vector_allocation_domain,
1417 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
1418 .apic_id_registered = xen_apic_id_registered,
1419 #endif
1420 diff -Naur linux-2.6.25/arch/x86/kernel/head_32-xen.S linux-2.6.25-xen/arch/x86/kernel/head_32-xen.S
1421 --- linux-2.6.25/arch/x86/kernel/head_32-xen.S 2008-05-23 20:51:11.000000000 +0200
1422 +++ linux-2.6.25-xen/arch/x86/kernel/head_32-xen.S 2008-05-23 20:39:03.000000000 +0200
1423 @@ -62,7 +62,7 @@
1424 movl %eax,%gs
1425 cld # gcc2 wants the direction flag cleared at all times
1426
1427 - pushl %eax # fake return address
1428 + pushl $0 # fake return address for unwinder
1429 jmp start_kernel
1430
1431 #define HYPERCALL_PAGE_OFFSET 0x1000
1432 diff -Naur linux-2.6.25/arch/x86/kernel/head64-xen.c linux-2.6.25-xen/arch/x86/kernel/head64-xen.c
1433 --- linux-2.6.25/arch/x86/kernel/head64-xen.c 2008-05-23 20:51:11.000000000 +0200
1434 +++ linux-2.6.25-xen/arch/x86/kernel/head64-xen.c 2008-05-23 20:39:03.000000000 +0200
1435 @@ -54,11 +54,9 @@
1436 new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
1437 if (!new_data) {
1438 if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
1439 - printk("so old bootloader that it does not support commandline?!\n");
1440 return;
1441 }
1442 new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
1443 - printk("old bootloader convention, maybe loadlin?\n");
1444 }
1445 command_line = (char *) ((u64)(new_data));
1446 memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
1447 @@ -70,25 +68,6 @@
1448 memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
1449 saved_command_line[max_cmdline-1] = '\0';
1450 #endif
1451 - printk("Bootdata ok (command line is %s)\n", saved_command_line);
1452 -}
1453 -
1454 -static void __init setup_boot_cpu_data(void)
1455 -{
1456 - unsigned int dummy, eax;
1457 -
1458 - /* get vendor info */
1459 - cpuid(0, (unsigned int *)&boot_cpu_data.cpuid_level,
1460 - (unsigned int *)&boot_cpu_data.x86_vendor_id[0],
1461 - (unsigned int *)&boot_cpu_data.x86_vendor_id[8],
1462 - (unsigned int *)&boot_cpu_data.x86_vendor_id[4]);
1463 -
1464 - /* get cpu type */
1465 - cpuid(1, &eax, &dummy, &dummy,
1466 - (unsigned int *) &boot_cpu_data.x86_capability);
1467 - boot_cpu_data.x86 = (eax >> 8) & 0xf;
1468 - boot_cpu_data.x86_model = (eax >> 4) & 0xf;
1469 - boot_cpu_data.x86_mask = eax & 0xf;
1470 }
1471
1472 #include <xen/interface/memory.h>
1473 @@ -101,7 +80,6 @@
1474 {
1475 struct xen_machphys_mapping mapping;
1476 unsigned long machine_to_phys_nr_ents;
1477 - char *s;
1478 int i;
1479
1480 setup_xen_features();
1481 @@ -128,10 +106,7 @@
1482 asm volatile("lidt %0" :: "m" (idt_descr));
1483 #endif
1484
1485 - /*
1486 - * This must be called really, really early:
1487 - */
1488 - lockdep_init();
1489 + early_printk("Kernel alive\n");
1490
1491 for (i = 0; i < NR_CPUS; i++)
1492 cpu_pda(i) = &boot_cpu_pda[i];
1493 @@ -141,22 +116,5 @@
1494 #ifdef CONFIG_SMP
1495 cpu_set(0, cpu_online_map);
1496 #endif
1497 - s = strstr(saved_command_line, "earlyprintk=");
1498 - if (s != NULL)
1499 - setup_early_printk(strchr(s, '=') + 1);
1500 -#ifdef CONFIG_NUMA
1501 - s = strstr(saved_command_line, "numa=");
1502 - if (s != NULL)
1503 - numa_setup(s+5);
1504 -#endif
1505 -#ifdef CONFIG_X86_IO_APIC
1506 - if (strstr(saved_command_line, "disableapic"))
1507 - disable_apic = 1;
1508 -#endif
1509 - /* You need early console to see that */
1510 - if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE)
1511 - panic("Kernel too big for kernel mapping\n");
1512 -
1513 - setup_boot_cpu_data();
1514 start_kernel();
1515 }
1516 diff -Naur linux-2.6.25/arch/x86/kernel/head_64-xen.S linux-2.6.25-xen/arch/x86/kernel/head_64-xen.S
1517 --- linux-2.6.25/arch/x86/kernel/head_64-xen.S 2008-05-23 20:51:11.000000000 +0200
1518 +++ linux-2.6.25-xen/arch/x86/kernel/head_64-xen.S 2008-05-23 20:49:19.000000000 +0200
1519 @@ -5,9 +5,6 @@
1520 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
1521 * Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
1522 * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
1523 - *
1524 - * $Id: 1020-2.6.25-xen-patch-2.6.19.patch,v 1.2 2008-05-23 18:49:01 niro Exp $
1525 - *
1526 * Jun Nakajima <jun.nakajima@intel.com>
1527 * Modified for Xen
1528 */
1529 @@ -149,7 +146,7 @@
1530 .quad 0,0 /* TSS */
1531 .quad 0,0 /* LDT */
1532 .quad 0,0,0 /* three TLS descriptors */
1533 - .quad 0 /* unused */
1534 + .quad 0x0000f40000000000 /* node/CPU stored in limit */
1535 gdt_end:
1536 /* asm/segment.h:GDT_ENTRIES must match this */
1537 /* This should be a multiple of the cache line size */
1538 diff -Naur linux-2.6.25/arch/x86/kernel/io_apic_32-xen.c linux-2.6.25-xen/arch/x86/kernel/io_apic_32-xen.c
1539 --- linux-2.6.25/arch/x86/kernel/io_apic_32-xen.c 2008-05-23 20:51:11.000000000 +0200
1540 +++ linux-2.6.25-xen/arch/x86/kernel/io_apic_32-xen.c 2008-05-23 20:39:03.000000000 +0200
1541 @@ -31,6 +31,9 @@
1542 #include <linux/acpi.h>
1543 #include <linux/module.h>
1544 #include <linux/sysdev.h>
1545 +#include <linux/pci.h>
1546 +#include <linux/msi.h>
1547 +#include <linux/htirq.h>
1548
1549 #include <asm/io.h>
1550 #include <asm/smp.h>
1551 @@ -38,13 +41,15 @@
1552 #include <asm/timer.h>
1553 #include <asm/i8259.h>
1554 #include <asm/nmi.h>
1555 +#include <asm/msidef.h>
1556 +#include <asm/hypertransport.h>
1557
1558 #include <mach_apic.h>
1559 +#include <mach_apicdef.h>
1560
1561 #include "io_ports.h"
1562
1563 #ifdef CONFIG_XEN
1564 -
1565 #include <xen/interface/xen.h>
1566 #include <xen/interface/physdev.h>
1567
1568 @@ -55,32 +60,7 @@
1569
1570 unsigned long io_apic_irqs;
1571
1572 -static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
1573 -{
1574 - struct physdev_apic apic_op;
1575 - int ret;
1576 -
1577 - apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
1578 - apic_op.reg = reg;
1579 - ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
1580 - if (ret)
1581 - return ret;
1582 - return apic_op.value;
1583 -}
1584 -
1585 -static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
1586 -{
1587 - struct physdev_apic apic_op;
1588 -
1589 - apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
1590 - apic_op.reg = reg;
1591 - apic_op.value = value;
1592 - WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
1593 -}
1594 -
1595 -#define io_apic_read(a,r) xen_io_apic_read(a,r)
1596 -#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
1597 -
1598 +#define clear_IO_APIC() ((void)0)
1599 #endif /* CONFIG_XEN */
1600
1601 int (*ioapic_renumber_irq)(int ioapic, int irq);
1602 @@ -105,7 +85,7 @@
1603 */
1604 int nr_ioapic_registers[MAX_IO_APICS];
1605
1606 -int disable_timer_pin_1 __initdata;
1607 +static int disable_timer_pin_1 __initdata;
1608
1609 /*
1610 * Rough estimation of how many shared IRQs there are, can
1611 @@ -125,12 +105,122 @@
1612 int apic, pin, next;
1613 } irq_2_pin[PIN_MAP_SIZE];
1614
1615 -int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
1616 -#ifdef CONFIG_PCI_MSI
1617 -#define vector_to_irq(vector) \
1618 - (platform_legacy_irq(vector) ? vector : vector_irq[vector])
1619 +#ifndef CONFIG_XEN
1620 +struct io_apic {
1621 + unsigned int index;
1622 + unsigned int unused[3];
1623 + unsigned int data;
1624 +};
1625 +
1626 +static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
1627 +{
1628 + return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
1629 + + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
1630 +}
1631 +#endif
1632 +
1633 +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
1634 +{
1635 +#ifndef CONFIG_XEN
1636 + struct io_apic __iomem *io_apic = io_apic_base(apic);
1637 + writel(reg, &io_apic->index);
1638 + return readl(&io_apic->data);
1639 +#else
1640 + struct physdev_apic apic_op;
1641 + int ret;
1642 +
1643 + apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
1644 + apic_op.reg = reg;
1645 + ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
1646 + if (ret)
1647 + return ret;
1648 + return apic_op.value;
1649 +#endif
1650 +}
1651 +
1652 +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
1653 +{
1654 +#ifndef CONFIG_XEN
1655 + struct io_apic __iomem *io_apic = io_apic_base(apic);
1656 + writel(reg, &io_apic->index);
1657 + writel(value, &io_apic->data);
1658 +#else
1659 + struct physdev_apic apic_op;
1660 +
1661 + apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
1662 + apic_op.reg = reg;
1663 + apic_op.value = value;
1664 + WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
1665 +#endif
1666 +}
1667 +
1668 +#ifndef CONFIG_XEN
1669 +/*
1670 + * Re-write a value: to be used for read-modify-write
1671 + * cycles where the read already set up the index register.
1672 + *
1673 + * Older SiS APIC requires we rewrite the index register
1674 + */
1675 +static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
1676 +{
1677 + volatile struct io_apic *io_apic = io_apic_base(apic);
1678 + if (sis_apic_bug)
1679 + writel(reg, &io_apic->index);
1680 + writel(value, &io_apic->data);
1681 +}
1682 #else
1683 -#define vector_to_irq(vector) (vector)
1684 +#define io_apic_modify io_apic_write
1685 +#endif
1686 +
1687 +union entry_union {
1688 + struct { u32 w1, w2; };
1689 + struct IO_APIC_route_entry entry;
1690 +};
1691 +
1692 +static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
1693 +{
1694 + union entry_union eu;
1695 + unsigned long flags;
1696 + spin_lock_irqsave(&ioapic_lock, flags);
1697 + eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
1698 + eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
1699 + spin_unlock_irqrestore(&ioapic_lock, flags);
1700 + return eu.entry;
1701 +}
1702 +
1703 +/*
1704 + * When we write a new IO APIC routing entry, we need to write the high
1705 + * word first! If the mask bit in the low word is clear, we will enable
1706 + * the interrupt, and we need to make sure the entry is fully populated
1707 + * before that happens.
1708 + */
1709 +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
1710 +{
1711 + unsigned long flags;
1712 + union entry_union eu;
1713 + eu.entry = e;
1714 + spin_lock_irqsave(&ioapic_lock, flags);
1715 + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
1716 + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
1717 + spin_unlock_irqrestore(&ioapic_lock, flags);
1718 +}
1719 +
1720 +#ifndef CONFIG_XEN
1721 +/*
1722 + * When we mask an IO APIC routing entry, we need to write the low
1723 + * word first, in order to set the mask bit before we change the
1724 + * high bits!
1725 + */
1726 +static void ioapic_mask_entry(int apic, int pin)
1727 +{
1728 + unsigned long flags;
1729 + union entry_union eu = { .entry.mask = 1 };
1730 +
1731 + spin_lock_irqsave(&ioapic_lock, flags);
1732 + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
1733 + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
1734 + spin_unlock_irqrestore(&ioapic_lock, flags);
1735 +}
1736 #endif
1737
1738 /*
1739 @@ -156,9 +246,7 @@
1740 entry->pin = pin;
1741 }
1742
1743 -#ifdef CONFIG_XEN
1744 -#define clear_IO_APIC() ((void)0)
1745 -#else
1746 +#ifndef CONFIG_XEN
1747 /*
1748 * Reroute an IRQ to a different pin.
1749 */
1750 @@ -243,25 +331,16 @@
1751 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
1752 {
1753 struct IO_APIC_route_entry entry;
1754 - unsigned long flags;
1755
1756 /* Check delivery_mode to be sure we're not clearing an SMI pin */
1757 - spin_lock_irqsave(&ioapic_lock, flags);
1758 - *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
1759 - *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
1760 - spin_unlock_irqrestore(&ioapic_lock, flags);
1761 + entry = ioapic_read_entry(apic, pin);
1762 if (entry.delivery_mode == dest_SMI)
1763 return;
1764
1765 /*
1766 * Disable it in the IO-APIC irq-routing table:
1767 */
1768 - memset(&entry, 0, sizeof(entry));
1769 - entry.mask = 1;
1770 - spin_lock_irqsave(&ioapic_lock, flags);
1771 - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
1772 - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
1773 - spin_unlock_irqrestore(&ioapic_lock, flags);
1774 + ioapic_mask_entry(apic, pin);
1775 }
1776
1777 static void clear_IO_APIC (void)
1778 @@ -301,7 +380,7 @@
1779 break;
1780 entry = irq_2_pin + entry->next;
1781 }
1782 - set_irq_info(irq, cpumask);
1783 + set_native_irq_info(irq, cpumask);
1784 spin_unlock_irqrestore(&ioapic_lock, flags);
1785 }
1786
1787 @@ -1207,40 +1286,40 @@
1788 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
1789 u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
1790
1791 -int assign_irq_vector(int irq)
1792 +static int __assign_irq_vector(int irq)
1793 {
1794 - unsigned long flags;
1795 int vector;
1796 struct physdev_irq irq_op;
1797
1798 - BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
1799 -
1800 - spin_lock_irqsave(&vector_lock, flags);
1801 + BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
1802
1803 - if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
1804 - spin_unlock_irqrestore(&vector_lock, flags);
1805 - return IO_APIC_VECTOR(irq);
1806 - }
1807 + if (irq_vector[irq] > 0)
1808 + return irq_vector[irq];
1809
1810 irq_op.irq = irq;
1811 - if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
1812 - spin_unlock_irqrestore(&vector_lock, flags);
1813 + if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
1814 return -ENOSPC;
1815 - }
1816
1817 vector = irq_op.vector;
1818 - vector_irq[vector] = irq;
1819 - if (irq != AUTO_ASSIGN)
1820 - IO_APIC_VECTOR(irq) = vector;
1821 + irq_vector[irq] = vector;
1822 +
1823 + return vector;
1824 +}
1825
1826 +static int assign_irq_vector(int irq)
1827 +{
1828 + unsigned long flags;
1829 + int vector;
1830 +
1831 + spin_lock_irqsave(&vector_lock, flags);
1832 + vector = __assign_irq_vector(irq);
1833 spin_unlock_irqrestore(&vector_lock, flags);
1834
1835 return vector;
1836 }
1837
1838 #ifndef CONFIG_XEN
1839 -static struct hw_interrupt_type ioapic_level_type;
1840 -static struct hw_interrupt_type ioapic_edge_type;
1841 +static struct irq_chip ioapic_chip;
1842
1843 #define IOAPIC_AUTO -1
1844 #define IOAPIC_EDGE 0
1845 @@ -1248,16 +1327,16 @@
1846
1847 static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
1848 {
1849 - unsigned idx;
1850 -
1851 - idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
1852 -
1853 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1854 trigger == IOAPIC_LEVEL)
1855 - irq_desc[idx].chip = &ioapic_level_type;
1856 - else
1857 - irq_desc[idx].chip = &ioapic_edge_type;
1858 - set_intr_gate(vector, interrupt[idx]);
1859 + set_irq_chip_and_handler_name(irq, &ioapic_chip,
1860 + handle_fasteoi_irq, "fasteoi");
1861 + else {
1862 + irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
1863 + set_irq_chip_and_handler_name(irq, &ioapic_chip,
1864 + handle_edge_irq, "edge");
1865 + }
1866 + set_intr_gate(vector, interrupt[irq]);
1867 }
1868 #else
1869 #define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
1870 @@ -1328,9 +1407,8 @@
1871 if (!apic && (irq < 16))
1872 disable_8259A_irq(irq);
1873 }
1874 + ioapic_write_entry(apic, pin, entry);
1875 spin_lock_irqsave(&ioapic_lock, flags);
1876 - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
1877 - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
1878 set_native_irq_info(irq, TARGET_CPUS);
1879 spin_unlock_irqrestore(&ioapic_lock, flags);
1880 }
1881 @@ -1347,7 +1425,6 @@
1882 static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
1883 {
1884 struct IO_APIC_route_entry entry;
1885 - unsigned long flags;
1886
1887 memset(&entry,0,sizeof(entry));
1888
1889 @@ -1372,15 +1449,13 @@
1890 * The timer IRQ doesn't have to know that behind the
1891 * scene we have a 8259A-master in AEOI mode ...
1892 */
1893 - irq_desc[0].chip = &ioapic_edge_type;
1894 + irq_desc[0].chip = &ioapic_chip;
1895 + set_irq_handler(0, handle_edge_irq);
1896
1897 /*
1898 * Add it to the IO-APIC irq-routing table:
1899 */
1900 - spin_lock_irqsave(&ioapic_lock, flags);
1901 - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
1902 - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
1903 - spin_unlock_irqrestore(&ioapic_lock, flags);
1904 + ioapic_write_entry(apic, pin, entry);
1905
1906 enable_8259A_irq(0);
1907 }
1908 @@ -1490,10 +1565,7 @@
1909 for (i = 0; i <= reg_01.bits.entries; i++) {
1910 struct IO_APIC_route_entry entry;
1911
1912 - spin_lock_irqsave(&ioapic_lock, flags);
1913 - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
1914 - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
1915 - spin_unlock_irqrestore(&ioapic_lock, flags);
1916 + entry = ioapic_read_entry(apic, i);
1917
1918 printk(KERN_DEBUG " %02x %03X %02X ",
1919 i,
1920 @@ -1513,17 +1585,12 @@
1921 );
1922 }
1923 }
1924 - if (use_pci_vector())
1925 - printk(KERN_INFO "Using vector-based indexing\n");
1926 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1927 for (i = 0; i < NR_IRQS; i++) {
1928 struct irq_pin_list *entry = irq_2_pin + i;
1929 if (entry->pin < 0)
1930 continue;
1931 - if (use_pci_vector() && !platform_legacy_irq(i))
1932 - printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
1933 - else
1934 - printk(KERN_DEBUG "IRQ%d ", i);
1935 + printk(KERN_DEBUG "IRQ%d ", i);
1936 for (;;) {
1937 printk("-> %d:%d", entry->apic, entry->pin);
1938 if (!entry->next)
1939 @@ -1709,10 +1776,7 @@
1940 /* See if any of the pins is in ExtINT mode */
1941 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1942 struct IO_APIC_route_entry entry;
1943 - spin_lock_irqsave(&ioapic_lock, flags);
1944 - *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
1945 - *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
1946 - spin_unlock_irqrestore(&ioapic_lock, flags);
1947 + entry = ioapic_read_entry(apic, pin);
1948
1949
1950 /* If the interrupt line is enabled and in ExtInt mode
1951 @@ -1770,7 +1834,6 @@
1952 */
1953 if (ioapic_i8259.pin != -1) {
1954 struct IO_APIC_route_entry entry;
1955 - unsigned long flags;
1956
1957 memset(&entry, 0, sizeof(entry));
1958 entry.mask = 0; /* Enabled */
1959 @@ -1787,12 +1850,7 @@
1960 /*
1961 * Add it to the IO-APIC irq-routing table:
1962 */
1963 - spin_lock_irqsave(&ioapic_lock, flags);
1964 - io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
1965 - *(((int *)&entry)+1));
1966 - io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
1967 - *(((int *)&entry)+0));
1968 - spin_unlock_irqrestore(&ioapic_lock, flags);
1969 + ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1970 }
1971 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1972 #endif
1973 @@ -1959,6 +2017,8 @@
1974 */
1975
1976 /*
1977 + * Startup quirk:
1978 + *
1979 * Starting up a edge-triggered IO-APIC interrupt is
1980 * nasty - we need to make sure that we get the edge.
1981 * If it is already asserted for some reason, we need
1982 @@ -1966,8 +2026,10 @@
1983 *
1984 * This is not complete - we should be able to fake
1985 * an edge even if it isn't on the 8259A...
1986 + *
1987 + * (We do this for level-triggered IRQs too - it cannot hurt.)
1988 */
1989 -static unsigned int startup_edge_ioapic_irq(unsigned int irq)
1990 +static unsigned int startup_ioapic_irq(unsigned int irq)
1991 {
1992 int was_pending = 0;
1993 unsigned long flags;
1994 @@ -1984,47 +2046,18 @@
1995 return was_pending;
1996 }
1997
1998 -/*
1999 - * Once we have recorded IRQ_PENDING already, we can mask the
2000 - * interrupt for real. This prevents IRQ storms from unhandled
2001 - * devices.
2002 - */
2003 -static void ack_edge_ioapic_irq(unsigned int irq)
2004 -{
2005 - move_irq(irq);
2006 - if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
2007 - == (IRQ_PENDING | IRQ_DISABLED))
2008 - mask_IO_APIC_irq(irq);
2009 - ack_APIC_irq();
2010 -}
2011 -
2012 -/*
2013 - * Level triggered interrupts can just be masked,
2014 - * and shutting down and starting up the interrupt
2015 - * is the same as enabling and disabling them -- except
2016 - * with a startup need to return a "was pending" value.
2017 - *
2018 - * Level triggered interrupts are special because we
2019 - * do not touch any IO-APIC register while handling
2020 - * them. We ack the APIC in the end-IRQ handler, not
2021 - * in the start-IRQ-handler. Protection against reentrance
2022 - * from the same interrupt is still provided, both by the
2023 - * generic IRQ layer and by the fact that an unacked local
2024 - * APIC does not accept IRQs.
2025 - */
2026 -static unsigned int startup_level_ioapic_irq (unsigned int irq)
2027 +static void ack_ioapic_irq(unsigned int irq)
2028 {
2029 - unmask_IO_APIC_irq(irq);
2030 -
2031 - return 0; /* don't check for pending */
2032 + move_native_irq(irq);
2033 + ack_APIC_irq();
2034 }
2035
2036 -static void end_level_ioapic_irq (unsigned int irq)
2037 +static void ack_ioapic_quirk_irq(unsigned int irq)
2038 {
2039 unsigned long v;
2040 int i;
2041
2042 - move_irq(irq);
2043 + move_native_irq(irq);
2044 /*
2045 * It appears there is an erratum which affects at least version 0x11
2046 * of I/O APIC (that's the 82093AA and cores integrated into various
2047 @@ -2044,7 +2077,7 @@
2048 * operation to prevent an edge-triggered interrupt escaping meanwhile.
2049 * The idea is from Manfred Spraul. --macro
2050 */
2051 - i = IO_APIC_VECTOR(irq);
2052 + i = irq_vector[irq];
2053
2054 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2055
2056 @@ -2059,104 +2092,24 @@
2057 }
2058 }
2059
2060 -#ifdef CONFIG_PCI_MSI
2061 -static unsigned int startup_edge_ioapic_vector(unsigned int vector)
2062 -{
2063 - int irq = vector_to_irq(vector);
2064 -
2065 - return startup_edge_ioapic_irq(irq);
2066 -}
2067 -
2068 -static void ack_edge_ioapic_vector(unsigned int vector)
2069 -{
2070 - int irq = vector_to_irq(vector);
2071 -
2072 - move_native_irq(vector);
2073 - ack_edge_ioapic_irq(irq);
2074 -}
2075 -
2076 -static unsigned int startup_level_ioapic_vector (unsigned int vector)
2077 -{
2078 - int irq = vector_to_irq(vector);
2079 -
2080 - return startup_level_ioapic_irq (irq);
2081 -}
2082 -
2083 -static void end_level_ioapic_vector (unsigned int vector)
2084 -{
2085 - int irq = vector_to_irq(vector);
2086 -
2087 - move_native_irq(vector);
2088 - end_level_ioapic_irq(irq);
2089 -}
2090 -
2091 -static void mask_IO_APIC_vector (unsigned int vector)
2092 -{
2093 - int irq = vector_to_irq(vector);
2094 -
2095 - mask_IO_APIC_irq(irq);
2096 -}
2097 -
2098 -static void unmask_IO_APIC_vector (unsigned int vector)
2099 -{
2100 - int irq = vector_to_irq(vector);
2101 -
2102 - unmask_IO_APIC_irq(irq);
2103 -}
2104 -
2105 -#ifdef CONFIG_SMP
2106 -static void set_ioapic_affinity_vector (unsigned int vector,
2107 - cpumask_t cpu_mask)
2108 -{
2109 - int irq = vector_to_irq(vector);
2110 -
2111 - set_native_irq_info(vector, cpu_mask);
2112 - set_ioapic_affinity_irq(irq, cpu_mask);
2113 -}
2114 -#endif
2115 -#endif
2116 -
2117 -static int ioapic_retrigger(unsigned int irq)
2118 +static int ioapic_retrigger_irq(unsigned int irq)
2119 {
2120 - send_IPI_self(IO_APIC_VECTOR(irq));
2121 + send_IPI_self(irq_vector[irq]);
2122
2123 return 1;
2124 }
2125
2126 -/*
2127 - * Level and edge triggered IO-APIC interrupts need different handling,
2128 - * so we use two separate IRQ descriptors. Edge triggered IRQs can be
2129 - * handled with the level-triggered descriptor, but that one has slightly
2130 - * more overhead. Level-triggered interrupts cannot be handled with the
2131 - * edge-triggered handler, without risking IRQ storms and other ugly
2132 - * races.
2133 - */
2134 -static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
2135 - .typename = "IO-APIC-edge",
2136 - .startup = startup_edge_ioapic,
2137 - .shutdown = shutdown_edge_ioapic,
2138 - .enable = enable_edge_ioapic,
2139 - .disable = disable_edge_ioapic,
2140 - .ack = ack_edge_ioapic,
2141 - .end = end_edge_ioapic,
2142 -#ifdef CONFIG_SMP
2143 - .set_affinity = set_ioapic_affinity,
2144 -#endif
2145 - .retrigger = ioapic_retrigger,
2146 -};
2147 -
2148 -static struct hw_interrupt_type ioapic_level_type __read_mostly = {
2149 - .typename = "IO-APIC-level",
2150 - .startup = startup_level_ioapic,
2151 - .shutdown = shutdown_level_ioapic,
2152 - .enable = enable_level_ioapic,
2153 - .disable = disable_level_ioapic,
2154 - .ack = mask_and_ack_level_ioapic,
2155 - .end = end_level_ioapic,
2156 +static struct irq_chip ioapic_chip __read_mostly = {
2157 + .name = "IO-APIC",
2158 + .startup = startup_ioapic_irq,
2159 + .mask = mask_IO_APIC_irq,
2160 + .unmask = unmask_IO_APIC_irq,
2161 + .ack = ack_ioapic_irq,
2162 + .eoi = ack_ioapic_quirk_irq,
2163 #ifdef CONFIG_SMP
2164 - .set_affinity = set_ioapic_affinity,
2165 + .set_affinity = set_ioapic_affinity_irq,
2166 #endif
2167 - .retrigger = ioapic_retrigger,
2168 + .retrigger = ioapic_retrigger_irq,
2169 };
2170 #endif /* !CONFIG_XEN */
2171
2172 @@ -2177,12 +2130,7 @@
2173 */
2174 for (irq = 0; irq < NR_IRQS ; irq++) {
2175 int tmp = irq;
2176 - if (use_pci_vector()) {
2177 - if (!platform_legacy_irq(tmp))
2178 - if ((tmp = vector_to_irq(tmp)) == -1)
2179 - continue;
2180 - }
2181 - if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
2182 + if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
2183 /*
2184 * Hmm.. We don't have an entry for this,
2185 * so default to an old-fashioned 8259
2186 @@ -2193,22 +2141,23 @@
2187 #ifndef CONFIG_XEN
2188 else
2189 /* Strange. Oh, well.. */
2190 - irq_desc[irq].chip = &no_irq_type;
2191 + irq_desc[irq].chip = &no_irq_chip;
2192 #endif
2193 }
2194 }
2195 }
2196
2197 #ifndef CONFIG_XEN
2198 -static void enable_lapic_irq (unsigned int irq)
2199 -{
2200 - unsigned long v;
2201 +/*
2202 + * The local APIC irq-chip implementation:
2203 + */
2204
2205 - v = apic_read(APIC_LVT0);
2206 - apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
2207 +static void ack_apic(unsigned int irq)
2208 +{
2209 + ack_APIC_irq();
2210 }
2211
2212 -static void disable_lapic_irq (unsigned int irq)
2213 +static void mask_lapic_irq (unsigned int irq)
2214 {
2215 unsigned long v;
2216
2217 @@ -2216,21 +2165,19 @@
2218 apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
2219 }
2220
2221 -static void ack_lapic_irq (unsigned int irq)
2222 +static void unmask_lapic_irq (unsigned int irq)
2223 {
2224 - ack_APIC_irq();
2225 -}
2226 + unsigned long v;
2227
2228 -static void end_lapic_irq (unsigned int i) { /* nothing */ }
2229 + v = apic_read(APIC_LVT0);
2230 + apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
2231 +}
2232
2233 -static struct hw_interrupt_type lapic_irq_type __read_mostly = {
2234 - .typename = "local-APIC-edge",
2235 - .startup = NULL, /* startup_irq() not used for IRQ0 */
2236 - .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
2237 - .enable = enable_lapic_irq,
2238 - .disable = disable_lapic_irq,
2239 - .ack = ack_lapic_irq,
2240 - .end = end_lapic_irq
2241 +static struct irq_chip lapic_chip __read_mostly = {
2242 + .name = "local-APIC-edge",
2243 + .mask = mask_lapic_irq,
2244 + .unmask = unmask_lapic_irq,
2245 + .eoi = ack_apic,
2246 };
2247
2248 static void setup_nmi (void)
2249 @@ -2263,17 +2210,13 @@
2250 int apic, pin, i;
2251 struct IO_APIC_route_entry entry0, entry1;
2252 unsigned char save_control, save_freq_select;
2253 - unsigned long flags;
2254
2255 pin = find_isa_irq_pin(8, mp_INT);
2256 apic = find_isa_irq_apic(8, mp_INT);
2257 if (pin == -1)
2258 return;
2259
2260 - spin_lock_irqsave(&ioapic_lock, flags);
2261 - *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
2262 - *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
2263 - spin_unlock_irqrestore(&ioapic_lock, flags);
2264 + entry0 = ioapic_read_entry(apic, pin);
2265 clear_IO_APIC_pin(apic, pin);
2266
2267 memset(&entry1, 0, sizeof(entry1));
2268 @@ -2286,10 +2229,7 @@
2269 entry1.trigger = 0;
2270 entry1.vector = 0;
2271
2272 - spin_lock_irqsave(&ioapic_lock, flags);
2273 - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
2274 - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
2275 - spin_unlock_irqrestore(&ioapic_lock, flags);
2276 + ioapic_write_entry(apic, pin, entry1);
2277
2278 save_control = CMOS_READ(RTC_CONTROL);
2279 save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
2280 @@ -2308,10 +2248,7 @@
2281 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
2282 clear_IO_APIC_pin(apic, pin);
2283
2284 - spin_lock_irqsave(&ioapic_lock, flags);
2285 - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
2286 - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
2287 - spin_unlock_irqrestore(&ioapic_lock, flags);
2288 + ioapic_write_entry(apic, pin, entry0);
2289 }
2290
2291 int timer_uses_ioapic_pin_0;
2292 @@ -2411,7 +2348,8 @@
2293 printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
2294
2295 disable_8259A_irq(0);
2296 - irq_desc[0].chip = &lapic_irq_type;
2297 + set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
2298 + "fasteio");
2299 apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
2300 enable_8259A_irq(0);
2301
2302 @@ -2523,17 +2461,12 @@
2303 {
2304 struct IO_APIC_route_entry *entry;
2305 struct sysfs_ioapic_data *data;
2306 - unsigned long flags;
2307 int i;
2308
2309 data = container_of(dev, struct sysfs_ioapic_data, dev);
2310 entry = data->entry;
2311 - spin_lock_irqsave(&ioapic_lock, flags);
2312 - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
2313 - *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
2314 - *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
2315 - }
2316 - spin_unlock_irqrestore(&ioapic_lock, flags);
2317 + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
2318 + entry[i] = ioapic_read_entry(dev->id, i);
2319
2320 return 0;
2321 }
2322 @@ -2555,11 +2488,9 @@
2323 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
2324 io_apic_write(dev->id, 0, reg_00.raw);
2325 }
2326 - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
2327 - io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
2328 - io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
2329 - }
2330 spin_unlock_irqrestore(&ioapic_lock, flags);
2331 + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
2332 + ioapic_write_entry(dev->id, i, entry[i]);
2333
2334 return 0;
2335 }
2336 @@ -2605,6 +2536,240 @@
2337
2338 device_initcall(ioapic_init_sysfs);
2339
2340 +#ifndef CONFIG_XEN
2341 +/*
2342 + * Dynamic irq allocate and deallocation
2343 + */
2344 +int create_irq(void)
2345 +{
2346 + /* Allocate an unused irq */
2347 + int irq, new, vector;
2348 + unsigned long flags;
2349 +
2350 + irq = -ENOSPC;
2351 + spin_lock_irqsave(&vector_lock, flags);
2352 + for (new = (NR_IRQS - 1); new >= 0; new--) {
2353 + if (platform_legacy_irq(new))
2354 + continue;
2355 + if (irq_vector[new] != 0)
2356 + continue;
2357 + vector = __assign_irq_vector(new);
2358 + if (likely(vector > 0))
2359 + irq = new;
2360 + break;
2361 + }
2362 + spin_unlock_irqrestore(&vector_lock, flags);
2363 +
2364 + if (irq >= 0) {
2365 + set_intr_gate(vector, interrupt[irq]);
2366 + dynamic_irq_init(irq);
2367 + }
2368 + return irq;
2369 +}
2370 +
2371 +void destroy_irq(unsigned int irq)
2372 +{
2373 + unsigned long flags;
2374 +
2375 + dynamic_irq_cleanup(irq);
2376 +
2377 + spin_lock_irqsave(&vector_lock, flags);
2378 + irq_vector[irq] = 0;
2379 + spin_unlock_irqrestore(&vector_lock, flags);
2380 +}
2381 +#endif
2382 +
2383 +/*
2384 + * MSI mesage composition
2385 + */
2386 +#ifdef CONFIG_PCI_MSI
2387 +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
2388 +{
2389 + int vector;
2390 + unsigned dest;
2391 +
2392 + vector = assign_irq_vector(irq);
2393 + if (vector >= 0) {
2394 + dest = cpu_mask_to_apicid(TARGET_CPUS);
2395 +
2396 + msg->address_hi = MSI_ADDR_BASE_HI;
2397 + msg->address_lo =
2398 + MSI_ADDR_BASE_LO |
2399 + ((INT_DEST_MODE == 0) ?
2400 + MSI_ADDR_DEST_MODE_PHYSICAL:
2401 + MSI_ADDR_DEST_MODE_LOGICAL) |
2402 + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2403 + MSI_ADDR_REDIRECTION_CPU:
2404 + MSI_ADDR_REDIRECTION_LOWPRI) |
2405 + MSI_ADDR_DEST_ID(dest);
2406 +
2407 + msg->data =
2408 + MSI_DATA_TRIGGER_EDGE |
2409 + MSI_DATA_LEVEL_ASSERT |
2410 + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2411 + MSI_DATA_DELIVERY_FIXED:
2412 + MSI_DATA_DELIVERY_LOWPRI) |
2413 + MSI_DATA_VECTOR(vector);
2414 + }
2415 + return vector;
2416 +}
2417 +
2418 +#ifdef CONFIG_SMP
2419 +static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2420 +{
2421 + struct msi_msg msg;
2422 + unsigned int dest;
2423 + cpumask_t tmp;
2424 + int vector;
2425 +
2426 + cpus_and(tmp, mask, cpu_online_map);
2427 + if (cpus_empty(tmp))
2428 + tmp = TARGET_CPUS;
2429 +
2430 + vector = assign_irq_vector(irq);
2431 + if (vector < 0)
2432 + return;
2433 +
2434 + dest = cpu_mask_to_apicid(mask);
2435 +
2436 + read_msi_msg(irq, &msg);
2437 +
2438 + msg.data &= ~MSI_DATA_VECTOR_MASK;
2439 + msg.data |= MSI_DATA_VECTOR(vector);
2440 + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
2441 + msg.address_lo |= MSI_ADDR_DEST_ID(dest);
2442 +
2443 + write_msi_msg(irq, &msg);
2444 + set_native_irq_info(irq, mask);
2445 +}
2446 +#endif /* CONFIG_SMP */
2447 +
2448 +/*
2449 + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
2450 + * which implement the MSI or MSI-X Capability Structure.
2451 + */
2452 +static struct irq_chip msi_chip = {
2453 + .name = "PCI-MSI",
2454 + .unmask = unmask_msi_irq,
2455 + .mask = mask_msi_irq,
2456 + .ack = ack_ioapic_irq,
2457 +#ifdef CONFIG_SMP
2458 + .set_affinity = set_msi_irq_affinity,
2459 +#endif
2460 + .retrigger = ioapic_retrigger_irq,
2461 +};
2462 +
2463 +int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
2464 +{
2465 + struct msi_msg msg;
2466 + int ret;
2467 + ret = msi_compose_msg(dev, irq, &msg);
2468 + if (ret < 0)
2469 + return ret;
2470 +
2471 + write_msi_msg(irq, &msg);
2472 +
2473 + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
2474 + "edge");
2475 +
2476 + return 0;
2477 +}
2478 +
2479 +void arch_teardown_msi_irq(unsigned int irq)
2480 +{
2481 + return;
2482 +}
2483 +
2484 +#endif /* CONFIG_PCI_MSI */
2485 +
2486 +/*
2487 + * Hypertransport interrupt support
2488 + */
2489 +#ifdef CONFIG_HT_IRQ
2490 +
2491 +#ifdef CONFIG_SMP
2492 +
2493 +static void target_ht_irq(unsigned int irq, unsigned int dest)
2494 +{
2495 + struct ht_irq_msg msg;
2496 + fetch_ht_irq_msg(irq, &msg);
2497 +
2498 + msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
2499 + msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
2500 +
2501 + msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
2502 + msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
2503 +
2504 + write_ht_irq_msg(irq, &msg);
2505 +}
2506 +
2507 +static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
2508 +{
2509 + unsigned int dest;
2510 + cpumask_t tmp;
2511 +
2512 + cpus_and(tmp, mask, cpu_online_map);
2513 + if (cpus_empty(tmp))
2514 + tmp = TARGET_CPUS;
2515 +
2516 + cpus_and(mask, tmp, CPU_MASK_ALL);
2517 +
2518 + dest = cpu_mask_to_apicid(mask);
2519 +
2520 + target_ht_irq(irq, dest);
2521 + set_native_irq_info(irq, mask);
2522 +}
2523 +#endif
2524 +
2525 +static struct irq_chip ht_irq_chip = {
2526 + .name = "PCI-HT",
2527 + .mask = mask_ht_irq,
2528 + .unmask = unmask_ht_irq,
2529 + .ack = ack_ioapic_irq,
2530 +#ifdef CONFIG_SMP
2531 + .set_affinity = set_ht_irq_affinity,
2532 +#endif
2533 + .retrigger = ioapic_retrigger_irq,
2534 +};
2535 +
2536 +int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
2537 +{
2538 + int vector;
2539 +
2540 + vector = assign_irq_vector(irq);
2541 + if (vector >= 0) {
2542 + struct ht_irq_msg msg;
2543 + unsigned dest;
2544 + cpumask_t tmp;
2545 +
2546 + cpus_clear(tmp);
2547 + cpu_set(vector >> 8, tmp);
2548 + dest = cpu_mask_to_apicid(tmp);
2549 +
2550 + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
2551 +
2552 + msg.address_lo =
2553 + HT_IRQ_LOW_BASE |
2554 + HT_IRQ_LOW_DEST_ID(dest) |
2555 + HT_IRQ_LOW_VECTOR(vector) |
2556 + ((INT_DEST_MODE == 0) ?
2557 + HT_IRQ_LOW_DM_PHYSICAL :
2558 + HT_IRQ_LOW_DM_LOGICAL) |
2559 + HT_IRQ_LOW_RQEOI_EDGE |
2560 + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2561 + HT_IRQ_LOW_MT_FIXED :
2562 + HT_IRQ_LOW_MT_ARBITRATED) |
2563 + HT_IRQ_LOW_IRQ_MASKED;
2564 +
2565 + write_ht_irq_msg(irq, &msg);
2566 +
2567 + set_irq_chip_and_handler_name(irq, &ht_irq_chip,
2568 + handle_edge_irq, "edge");
2569 + }
2570 + return vector;
2571 +}
2572 +#endif /* CONFIG_HT_IRQ */
2573 +
2574 /* --------------------------------------------------------------------------
2575 ACPI-based IOAPIC Configuration
2576 -------------------------------------------------------------------------- */
2577 @@ -2758,13 +2923,34 @@
2578 if (!ioapic && (irq < 16))
2579 disable_8259A_irq(irq);
2580
2581 + ioapic_write_entry(ioapic, pin, entry);
2582 spin_lock_irqsave(&ioapic_lock, flags);
2583 - io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
2584 - io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
2585 - set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
2586 + set_native_irq_info(irq, TARGET_CPUS);
2587 spin_unlock_irqrestore(&ioapic_lock, flags);
2588
2589 return 0;
2590 }
2591
2592 #endif /* CONFIG_ACPI */
2593 +
2594 +static int __init parse_disable_timer_pin_1(char *arg)
2595 +{
2596 + disable_timer_pin_1 = 1;
2597 + return 0;
2598 +}
2599 +early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
2600 +
2601 +static int __init parse_enable_timer_pin_1(char *arg)
2602 +{
2603 + disable_timer_pin_1 = -1;
2604 + return 0;
2605 +}
2606 +early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
2607 +
2608 +static int __init parse_noapic(char *arg)
2609 +{
2610 + /* disable IO-APIC */
2611 + disable_ioapic_setup();
2612 + return 0;
2613 +}
2614 +early_param("noapic", parse_noapic);
2615 diff -Naur linux-2.6.25/arch/x86/kernel/io_apic_64-xen.c linux-2.6.25-xen/arch/x86/kernel/io_apic_64-xen.c
2616 --- linux-2.6.25/arch/x86/kernel/io_apic_64-xen.c 2008-05-23 20:51:11.000000000 +0200
2617 +++ linux-2.6.25-xen/arch/x86/kernel/io_apic_64-xen.c 2008-05-23 20:39:03.000000000 +0200
2618 @@ -26,9 +26,12 @@
2619 #include <linux/delay.h>
2620 #include <linux/sched.h>
2621 #include <linux/smp_lock.h>
2622 +#include <linux/pci.h>
2623 #include <linux/mc146818rtc.h>
2624 #include <linux/acpi.h>
2625 #include <linux/sysdev.h>
2626 +#include <linux/msi.h>
2627 +#include <linux/htirq.h>
2628 #ifdef CONFIG_ACPI
2629 #include <acpi/acpi_bus.h>
2630 #endif
2631 @@ -41,6 +44,10 @@
2632 #include <asm/acpi.h>
2633 #include <asm/dma.h>
2634 #include <asm/nmi.h>
2635 +#include <asm/msidef.h>
2636 +#include <asm/hypertransport.h>
2637 +
2638 +static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result);
2639
2640 #define __apicdebuginit __init
2641
2642 @@ -48,17 +55,29 @@
2643
2644 static int no_timer_check;
2645
2646 -int disable_timer_pin_1 __initdata;
2647 +static int disable_timer_pin_1 __initdata;
2648
2649 -#ifndef CONFIG_XEN
2650 -int timer_over_8254 __initdata = 0;
2651 +#ifdef CONFIG_XEN
2652 +#include <xen/interface/xen.h>
2653 +#include <xen/interface/physdev.h>
2654 +
2655 +/* Fake i8259 */
2656 +#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
2657 +#define disable_8259A_irq(_irq) ((void)0)
2658 +#define i8259A_irq_pending(_irq) (0)
2659 +
2660 +unsigned long io_apic_irqs;
2661 +
2662 +#define clear_IO_APIC() ((void)0)
2663 +#else
2664 +int timer_over_8254 __initdata = 1;
2665
2666 /* Where if anywhere is the i8259 connect in external int mode */
2667 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
2668 #endif
2669
2670 static DEFINE_SPINLOCK(ioapic_lock);
2671 -static DEFINE_SPINLOCK(vector_lock);
2672 +DEFINE_SPINLOCK(vector_lock);
2673
2674 /*
2675 * # of IRQ routing registers
2676 @@ -83,28 +102,27 @@
2677 short apic, pin, next;
2678 } irq_2_pin[PIN_MAP_SIZE];
2679
2680 -int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
2681 -#ifdef CONFIG_PCI_MSI
2682 -#define vector_to_irq(vector) \
2683 - (platform_legacy_irq(vector) ? vector : vector_irq[vector])
2684 -#else
2685 -#define vector_to_irq(vector) (vector)
2686 -#endif
2687 -
2688 -#ifdef CONFIG_XEN
2689 -
2690 -#include <xen/interface/xen.h>
2691 -#include <xen/interface/physdev.h>
2692 -
2693 -/* Fake i8259 */
2694 -#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
2695 -#define disable_8259A_irq(_irq) ((void)0)
2696 -#define i8259A_irq_pending(_irq) (0)
2697 +#ifndef CONFIG_XEN
2698 +struct io_apic {
2699 + unsigned int index;
2700 + unsigned int unused[3];
2701 + unsigned int data;
2702 +};
2703
2704 -unsigned long io_apic_irqs;
2705 +static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
2706 +{
2707 + return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
2708 + + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
2709 +}
2710 +#endif
2711
2712 -static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
2713 +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
2714 {
2715 +#ifndef CONFIG_XEN
2716 + struct io_apic __iomem *io_apic = io_apic_base(apic);
2717 + writel(reg, &io_apic->index);
2718 + return readl(&io_apic->data);
2719 +#else
2720 struct physdev_apic apic_op;
2721 int ret;
2722
2723 @@ -114,31 +132,131 @@
2724 if (ret)
2725 return ret;
2726 return apic_op.value;
2727 +#endif
2728 }
2729
2730 -static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
2731 +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
2732 {
2733 +#ifndef CONFIG_XEN
2734 + struct io_apic __iomem *io_apic = io_apic_base(apic);
2735 + writel(reg, &io_apic->index);
2736 + writel(value, &io_apic->data);
2737 +#else
2738 struct physdev_apic apic_op;
2739
2740 apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
2741 apic_op.reg = reg;
2742 apic_op.value = value;
2743 WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
2744 +#endif
2745 +}
2746 +
2747 +#ifndef CONFIG_XEN
2748 +/*
2749 + * Re-write a value: to be used for read-modify-write
2750 + * cycles where the read already set up the index register.
2751 + */
2752 +static inline void io_apic_modify(unsigned int apic, unsigned int value)
2753 +{
2754 + struct io_apic __iomem *io_apic = io_apic_base(apic);
2755 + writel(value, &io_apic->data);
2756 }
2757 +#else
2758 +#define io_apic_modify io_apic_write
2759 +#endif
2760
2761 -#define io_apic_read(a,r) xen_io_apic_read(a,r)
2762 -#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
2763 +/*
2764 + * Synchronize the IO-APIC and the CPU by doing
2765 + * a dummy read from the IO-APIC
2766 + */
2767 +static inline void io_apic_sync(unsigned int apic)
2768 +{
2769 +#ifndef CONFIG_XEN
2770 + struct io_apic __iomem *io_apic = io_apic_base(apic);
2771 + readl(&io_apic->data);
2772 +#endif
2773 +}
2774
2775 -#define clear_IO_APIC() ((void)0)
2776 +union entry_union {
2777 + struct { u32 w1, w2; };
2778 + struct IO_APIC_route_entry entry;
2779 +};
2780
2781 -#else
2782 +static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
2783 +{
2784 + union entry_union eu;
2785 + unsigned long flags;
2786 + spin_lock_irqsave(&ioapic_lock, flags);
2787 + eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
2788 + eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
2789 + spin_unlock_irqrestore(&ioapic_lock, flags);
2790 + return eu.entry;
2791 +}
2792 +
2793 +/*
2794 + * When we write a new IO APIC routing entry, we need to write the high
2795 + * word first! If the mask bit in the low word is clear, we will enable
2796 + * the interrupt, and we need to make sure the entry is fully populated
2797 + * before that happens.
2798 + */
2799 +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
2800 +{
2801 + unsigned long flags;
2802 + union entry_union eu;
2803 + eu.entry = e;
2804 + spin_lock_irqsave(&ioapic_lock, flags);
2805 + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
2806 + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
2807 + spin_unlock_irqrestore(&ioapic_lock, flags);
2808 +}
2809 +
2810 +#ifndef CONFIG_XEN
2811 +/*
2812 + * When we mask an IO APIC routing entry, we need to write the low
2813 + * word first, in order to set the mask bit before we change the
2814 + * high bits!
2815 + */
2816 +static void ioapic_mask_entry(int apic, int pin)
2817 +{
2818 + unsigned long flags;
2819 + union entry_union eu = { .entry.mask = 1 };
2820 +
2821 + spin_lock_irqsave(&ioapic_lock, flags);
2822 + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
2823 + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
2824 + spin_unlock_irqrestore(&ioapic_lock, flags);
2825 +}
2826
2827 #ifdef CONFIG_SMP
2828 +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
2829 +{
2830 + int apic, pin;
2831 + struct irq_pin_list *entry = irq_2_pin + irq;
2832 +
2833 + BUG_ON(irq >= NR_IRQS);
2834 + for (;;) {
2835 + unsigned int reg;
2836 + apic = entry->apic;
2837 + pin = entry->pin;
2838 + if (pin == -1)
2839 + break;
2840 + io_apic_write(apic, 0x11 + pin*2, dest);
2841 + reg = io_apic_read(apic, 0x10 + pin*2);
2842 + reg &= ~0x000000ff;
2843 + reg |= vector;
2844 + io_apic_modify(apic, reg);
2845 + if (!entry->next)
2846 + break;
2847 + entry = irq_2_pin + entry->next;
2848 + }
2849 +}
2850 +
2851 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
2852 {
2853 unsigned long flags;
2854 unsigned int dest;
2855 cpumask_t tmp;
2856 + int vector;
2857
2858 cpus_and(tmp, mask, cpu_online_map);
2859 if (cpus_empty(tmp))
2860 @@ -146,7 +264,11 @@
2861
2862 cpus_and(mask, tmp, CPU_MASK_ALL);
2863
2864 - dest = cpu_mask_to_apicid(mask);
2865 + vector = assign_irq_vector(irq, mask, &tmp);
2866 + if (vector < 0)
2867 + return;
2868 +
2869 + dest = cpu_mask_to_apicid(tmp);
2870
2871 /*
2872 * Only the high 8 bits are valid.
2873 @@ -154,13 +276,12 @@
2874 dest = SET_APIC_LOGICAL_ID(dest);
2875
2876 spin_lock_irqsave(&ioapic_lock, flags);
2877 - __DO_ACTION(1, = dest, )
2878 - set_irq_info(irq, mask);
2879 + __target_IO_APIC_irq(irq, dest, vector);
2880 + set_native_irq_info(irq, mask);
2881 spin_unlock_irqrestore(&ioapic_lock, flags);
2882 }
2883 #endif
2884 -
2885 -#endif /* !CONFIG_XEN */
2886 +#endif
2887
2888 /*
2889 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
2890 @@ -240,24 +361,15 @@
2891 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
2892 {
2893 struct IO_APIC_route_entry entry;
2894 - unsigned long flags;
2895
2896 /* Check delivery_mode to be sure we're not clearing an SMI pin */
2897 - spin_lock_irqsave(&ioapic_lock, flags);
2898 - *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
2899 - *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
2900 - spin_unlock_irqrestore(&ioapic_lock, flags);
2901 + entry = ioapic_read_entry(apic, pin);
2902 if (entry.delivery_mode == dest_SMI)
2903 return;
2904 /*
2905 * Disable it in the IO-APIC irq-routing table:
2906 */
2907 - memset(&entry, 0, sizeof(entry));
2908 - entry.mask = 1;
2909 - spin_lock_irqsave(&ioapic_lock, flags);
2910 - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
2911 - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
2912 - spin_unlock_irqrestore(&ioapic_lock, flags);
2913 + ioapic_mask_entry(apic, pin);
2914 }
2915
2916 static void clear_IO_APIC (void)
2917 @@ -271,16 +383,6 @@
2918
2919 #endif /* !CONFIG_XEN */
2920
2921 -static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
2922 -
2923 -/*
2924 - * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
2925 - * specific CPU-side IRQs.
2926 - */
2927 -
2928 -#define MAX_PIRQS 8
2929 -static int pirq_entries [MAX_PIRQS];
2930 -static int pirqs_enabled;
2931 int skip_ioapic_setup;
2932 int ioapic_force;
2933
2934 @@ -289,18 +391,17 @@
2935 static int __init disable_ioapic_setup(char *str)
2936 {
2937 skip_ioapic_setup = 1;
2938 - return 1;
2939 + return 0;
2940 }
2941 +early_param("noapic", disable_ioapic_setup);
2942
2943 -static int __init enable_ioapic_setup(char *str)
2944 +/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
2945 +static int __init disable_timer_pin_setup(char *arg)
2946 {
2947 - ioapic_force = 1;
2948 - skip_ioapic_setup = 0;
2949 + disable_timer_pin_1 = 1;
2950 return 1;
2951 }
2952 -
2953 -__setup("noapic", disable_ioapic_setup);
2954 -__setup("apic", enable_ioapic_setup);
2955 +__setup("disable_timer_pin_1", disable_timer_pin_setup);
2956
2957 #ifndef CONFIG_XEN
2958 static int __init setup_disable_8254_timer(char *s)
2959 @@ -318,137 +419,6 @@
2960 __setup("enable_8254_timer", setup_enable_8254_timer);
2961 #endif /* !CONFIG_XEN */
2962
2963 -#include <asm/pci-direct.h>
2964 -#include <linux/pci_ids.h>
2965 -#include <linux/pci.h>
2966 -
2967 -
2968 -#ifdef CONFIG_ACPI
2969 -
2970 -static int nvidia_hpet_detected __initdata;
2971 -
2972 -static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
2973 -{
2974 - nvidia_hpet_detected = 1;
2975 - return 0;
2976 -}
2977 -#endif
2978 -
2979 -/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC
2980 - off. Check for an Nvidia or VIA PCI bridge and turn it off.
2981 - Use pci direct infrastructure because this runs before the PCI subsystem.
2982 -
2983 - Can be overwritten with "apic"
2984 -
2985 - And another hack to disable the IOMMU on VIA chipsets.
2986 -
2987 - ... and others. Really should move this somewhere else.
2988 -
2989 - Kludge-O-Rama. */
2990 -void __init check_ioapic(void)
2991 -{
2992 - int num,slot,func;
2993 - /* Poor man's PCI discovery */
2994 - for (num = 0; num < 32; num++) {
2995 - for (slot = 0; slot < 32; slot++) {
2996 - for (func = 0; func < 8; func++) {
2997 - u32 class;
2998 - u32 vendor;
2999 - u8 type;
3000 - class = read_pci_config(num,slot,func,
3001 - PCI_CLASS_REVISION);
3002 - if (class == 0xffffffff)
3003 - break;
3004 -
3005 - if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
3006 - continue;
3007 -
3008 - vendor = read_pci_config(num, slot, func,
3009 - PCI_VENDOR_ID);
3010 - vendor &= 0xffff;
3011 - switch (vendor) {
3012 - case PCI_VENDOR_ID_VIA:
3013 -#ifdef CONFIG_IOMMU
3014 - if ((end_pfn > MAX_DMA32_PFN ||
3015 - force_iommu) &&
3016 - !iommu_aperture_allowed) {
3017 - printk(KERN_INFO
3018 - "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n");
3019 - iommu_aperture_disabled = 1;
3020 - }
3021 -#endif
3022 - return;
3023 - case PCI_VENDOR_ID_NVIDIA:
3024 -#ifdef CONFIG_ACPI
3025 - /*
3026 - * All timer overrides on Nvidia are
3027 - * wrong unless HPET is enabled.
3028 - */
3029 - nvidia_hpet_detected = 0;
3030 - acpi_table_parse(ACPI_HPET,
3031 - nvidia_hpet_check);
3032 - if (nvidia_hpet_detected == 0) {
3033 - acpi_skip_timer_override = 1;
3034 - printk(KERN_INFO "Nvidia board "
3035 - "detected. Ignoring ACPI "
3036 - "timer override.\n");
3037 - }
3038 -#endif
3039 - /* RED-PEN skip them on mptables too? */
3040 - return;
3041 - case PCI_VENDOR_ID_ATI:
3042 -
3043 - /* This should be actually default, but
3044 - for 2.6.16 let's do it for ATI only where
3045 - it's really needed. */
3046 -#ifndef CONFIG_XEN
3047 - if (timer_over_8254 == 1) {
3048 - timer_over_8254 = 0;
3049 - printk(KERN_INFO
3050 - "ATI board detected. Disabling timer routing over 8254.\n");
3051 - }
3052 -#endif
3053 - return;
3054 - }
3055 -
3056 -
3057 - /* No multi-function device? */
3058 - type = read_pci_config_byte(num,slot,func,
3059 - PCI_HEADER_TYPE);
3060 - if (!(type & 0x80))
3061 - break;
3062 - }
3063 - }
3064 - }
3065 -}
3066 -
3067 -static int __init ioapic_pirq_setup(char *str)
3068 -{
3069 - int i, max;
3070 - int ints[MAX_PIRQS+1];
3071 -
3072 - get_options(str, ARRAY_SIZE(ints), ints);
3073 -
3074 - for (i = 0; i < MAX_PIRQS; i++)
3075 - pirq_entries[i] = -1;
3076 -
3077 - pirqs_enabled = 1;
3078 - apic_printk(APIC_VERBOSE, "PIRQ redirection, working around broken MP-BIOS.\n");
3079 - max = MAX_PIRQS;
3080 - if (ints[0] < MAX_PIRQS)
3081 - max = ints[0];
3082 -
3083 - for (i = 0; i < max; i++) {
3084 - apic_printk(APIC_VERBOSE, "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
3085 - /*
3086 - * PIRQs are mapped upside down, usually.
3087 - */
3088 - pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
3089 - }
3090 - return 1;
3091 -}
3092 -
3093 -__setup("pirq=", ioapic_pirq_setup);
3094
3095 /*
3096 * Find the IRQ entry number of a certain pin.
3097 @@ -478,9 +448,7 @@
3098 for (i = 0; i < mp_irq_entries; i++) {
3099 int lbus = mp_irqs[i].mpc_srcbus;
3100
3101 - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
3102 - mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
3103 - mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
3104 + if (test_bit(lbus, mp_bus_not_pci) &&
3105 (mp_irqs[i].mpc_irqtype == type) &&
3106 (mp_irqs[i].mpc_srcbusirq == irq))
3107
3108 @@ -496,9 +464,7 @@
3109 for (i = 0; i < mp_irq_entries; i++) {
3110 int lbus = mp_irqs[i].mpc_srcbus;
3111
3112 - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
3113 - mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
3114 - mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
3115 + if (test_bit(lbus, mp_bus_not_pci) &&
3116 (mp_irqs[i].mpc_irqtype == type) &&
3117 (mp_irqs[i].mpc_srcbusirq == irq))
3118 break;
3119 @@ -539,7 +505,7 @@
3120 mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
3121 break;
3122
3123 - if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
3124 + if (!test_bit(lbus, mp_bus_not_pci) &&
3125 !mp_irqs[i].mpc_irqtype &&
3126 (bus == lbus) &&
3127 (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
3128 @@ -562,27 +528,6 @@
3129 return best_guess;
3130 }
3131
3132 -/*
3133 - * EISA Edge/Level control register, ELCR
3134 - */
3135 -static int EISA_ELCR(unsigned int irq)
3136 -{
3137 - if (irq < 16) {
3138 - unsigned int port = 0x4d0 + (irq >> 3);
3139 - return (inb(port) >> (irq & 7)) & 1;
3140 - }
3141 - apic_printk(APIC_VERBOSE, "Broken MPtable reports ISA irq %d\n", irq);
3142 - return 0;
3143 -}
3144 -
3145 -/* EISA interrupts are always polarity zero and can be edge or level
3146 - * trigger depending on the ELCR value. If an interrupt is listed as
3147 - * EISA conforming in the MP table, that means its trigger type must
3148 - * be read in from the ELCR */
3149 -
3150 -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
3151 -#define default_EISA_polarity(idx) (0)
3152 -
3153 /* ISA interrupts are always polarity zero edge triggered,
3154 * when listed as conforming in the MP table. */
3155
3156 @@ -595,12 +540,6 @@
3157 #define default_PCI_trigger(idx) (1)
3158 #define default_PCI_polarity(idx) (1)
3159
3160 -/* MCA interrupts are always polarity zero level triggered,
3161 - * when listed as conforming in the MP table. */
3162 -
3163 -#define default_MCA_trigger(idx) (1)
3164 -#define default_MCA_polarity(idx) (0)
3165 -
3166 static int __init MPBIOS_polarity(int idx)
3167 {
3168 int bus = mp_irqs[idx].mpc_srcbus;
3169 @@ -612,38 +551,11 @@
3170 switch (mp_irqs[idx].mpc_irqflag & 3)
3171 {
3172 case 0: /* conforms, ie. bus-type dependent polarity */
3173 - {
3174 - switch (mp_bus_id_to_type[bus])
3175 - {
3176 - case MP_BUS_ISA: /* ISA pin */
3177 - {
3178 - polarity = default_ISA_polarity(idx);
3179 - break;
3180 - }
3181 - case MP_BUS_EISA: /* EISA pin */
3182 - {
3183 - polarity = default_EISA_polarity(idx);
3184 - break;
3185 - }
3186 - case MP_BUS_PCI: /* PCI pin */
3187 - {
3188 - polarity = default_PCI_polarity(idx);
3189 - break;
3190 - }
3191 - case MP_BUS_MCA: /* MCA pin */
3192 - {
3193 - polarity = default_MCA_polarity(idx);
3194 - break;
3195 - }
3196 - default:
3197 - {
3198 - printk(KERN_WARNING "broken BIOS!!\n");
3199 - polarity = 1;
3200 - break;
3201 - }
3202 - }
3203 + if (test_bit(bus, mp_bus_not_pci))
3204 + polarity = default_ISA_polarity(idx);
3205 + else
3206 + polarity = default_PCI_polarity(idx);
3207 break;
3208 - }
3209 case 1: /* high active */
3210 {
3211 polarity = 0;
3212 @@ -681,38 +593,11 @@
3213 switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
3214 {
3215 case 0: /* conforms, ie. bus-type dependent */
3216 - {
3217 - switch (mp_bus_id_to_type[bus])
3218 - {
3219 - case MP_BUS_ISA: /* ISA pin */
3220 - {
3221 - trigger = default_ISA_trigger(idx);
3222 - break;
3223 - }
3224 - case MP_BUS_EISA: /* EISA pin */
3225 - {
3226 - trigger = default_EISA_trigger(idx);
3227 - break;
3228 - }
3229 - case MP_BUS_PCI: /* PCI pin */
3230 - {
3231 - trigger = default_PCI_trigger(idx);
3232 - break;
3233 - }
3234 - case MP_BUS_MCA: /* MCA pin */
3235 - {
3236 - trigger = default_MCA_trigger(idx);
3237 - break;
3238 - }
3239 - default:
3240 - {
3241 - printk(KERN_WARNING "broken BIOS!!\n");
3242 - trigger = 1;
3243 - break;
3244 - }
3245 - }
3246 + if (test_bit(bus, mp_bus_not_pci))
3247 + trigger = default_ISA_trigger(idx);
3248 + else
3249 + trigger = default_PCI_trigger(idx);
3250 break;
3251 - }
3252 case 1: /* edge */
3253 {
3254 trigger = 0;
3255 @@ -749,64 +634,6 @@
3256 return MPBIOS_trigger(idx);
3257 }
3258
3259 -static int next_irq = 16;
3260 -
3261 -/*
3262 - * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ
3263 - * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
3264 - * from ACPI, which can reach 800 in large boxen.
3265 - *
3266 - * Compact the sparse GSI space into a sequential IRQ series and reuse
3267 - * vectors if possible.
3268 - */
3269 -int gsi_irq_sharing(int gsi)
3270 -{
3271 - int i, tries, vector;
3272 -
3273 - BUG_ON(gsi >= NR_IRQ_VECTORS);
3274 -
3275 - if (platform_legacy_irq(gsi))
3276 - return gsi;
3277 -
3278 - if (gsi_2_irq[gsi] != 0xFF)
3279 - return (int)gsi_2_irq[gsi];
3280 -
3281 - tries = NR_IRQS;
3282 - try_again:
3283 - vector = assign_irq_vector(gsi);
3284 -
3285 - /*
3286 - * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
3287 - * use of vector and if found, return that IRQ. However, we never want
3288 - * to share legacy IRQs, which usually have a different trigger mode
3289 - * than PCI.
3290 - */
3291 - for (i = 0; i < NR_IRQS; i++)
3292 - if (IO_APIC_VECTOR(i) == vector)
3293 - break;
3294 - if (platform_legacy_irq(i)) {
3295 - if (--tries >= 0) {
3296 - IO_APIC_VECTOR(i) = 0;
3297 - goto try_again;
3298 - }
3299 - panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);
3300 - }
3301 - if (i < NR_IRQS) {
3302 - gsi_2_irq[gsi] = i;
3303 - printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
3304 - gsi, vector, i);
3305 - return i;
3306 - }
3307 -
3308 - i = next_irq++;
3309 - BUG_ON(i >= NR_IRQS);
3310 - gsi_2_irq[gsi] = i;
3311 - IO_APIC_VECTOR(i) = vector;
3312 - printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
3313 - gsi, vector, i);
3314 - return i;
3315 -}
3316 -
3317 static int pin_2_irq(int idx, int apic, int pin)
3318 {
3319 int irq, i;
3320 @@ -818,49 +645,16 @@
3321 if (mp_irqs[idx].mpc_dstirq != pin)
3322 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
3323
3324 - switch (mp_bus_id_to_type[bus])
3325 - {
3326 - case MP_BUS_ISA: /* ISA pin */
3327 - case MP_BUS_EISA:
3328 - case MP_BUS_MCA:
3329 - {
3330 - irq = mp_irqs[idx].mpc_srcbusirq;
3331 - break;
3332 - }
3333 - case MP_BUS_PCI: /* PCI pin */
3334 - {
3335 - /*
3336 - * PCI IRQs are mapped in order
3337 - */
3338 - i = irq = 0;
3339 - while (i < apic)
3340 - irq += nr_ioapic_registers[i++];
3341 - irq += pin;
3342 - irq = gsi_irq_sharing(irq);
3343 - break;
3344 - }
3345 - default:
3346 - {
3347 - printk(KERN_ERR "unknown bus type %d.\n",bus);
3348 - irq = 0;
3349 - break;
3350 - }
3351 - }
3352 - BUG_ON(irq >= NR_IRQS);
3353 -
3354 - /*
3355 - * PCI IRQ command line redirection. Yes, limits are hardcoded.
3356 - */
3357 - if ((pin >= 16) && (pin <= 23)) {
3358 - if (pirq_entries[pin-16] != -1) {
3359 - if (!pirq_entries[pin-16]) {
3360 - apic_printk(APIC_VERBOSE, "disabling PIRQ%d\n", pin-16);
3361 - } else {
3362 - irq = pirq_entries[pin-16];
3363 - apic_printk(APIC_VERBOSE, "using PIRQ%d -> IRQ %d\n",
3364 - pin-16, irq);
3365 - }
3366 - }
3367 + if (test_bit(bus, mp_bus_not_pci)) {
3368 + irq = mp_irqs[idx].mpc_srcbusirq;
3369 + } else {
3370 + /*
3371 + * PCI IRQs are mapped in order
3372 + */
3373 + i = irq = 0;
3374 + while (i < apic)
3375 + irq += nr_ioapic_registers[i++];
3376 + irq += pin;
3377 }
3378 BUG_ON(irq >= NR_IRQS);
3379 return irq;
3380 @@ -884,43 +678,68 @@
3381 }
3382
3383 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
3384 -u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
3385 +static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
3386
3387 -int assign_irq_vector(int irq)
3388 +static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
3389 {
3390 - unsigned long flags;
3391 int vector;
3392 struct physdev_irq irq_op;
3393
3394 - BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
3395 + BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
3396
3397 - spin_lock_irqsave(&vector_lock, flags);
3398 + cpus_and(*result, mask, cpu_online_map);
3399
3400 - if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
3401 - spin_unlock_irqrestore(&vector_lock, flags);
3402 - return IO_APIC_VECTOR(irq);
3403 - }
3404 + if (irq_vector[irq] > 0)
3405 + return irq_vector[irq];
3406
3407 irq_op.irq = irq;
3408 - if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
3409 - spin_unlock_irqrestore(&vector_lock, flags);
3410 + if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
3411 return -ENOSPC;
3412 - }
3413
3414 vector = irq_op.vector;
3415 - vector_irq[vector] = irq;
3416 - if (irq != AUTO_ASSIGN)
3417 - IO_APIC_VECTOR(irq) = vector;
3418 + irq_vector[irq] = vector;
3419
3420 - spin_unlock_irqrestore(&vector_lock, flags);
3421 + return vector;
3422 +}
3423
3424 +static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
3425 +{
3426 + int vector;
3427 + unsigned long flags;
3428 +
3429 + spin_lock_irqsave(&vector_lock, flags);
3430 + vector = __assign_irq_vector(irq, mask, result);
3431 + spin_unlock_irqrestore(&vector_lock, flags);
3432 return vector;
3433 }
3434
3435 -extern void (*interrupt[NR_IRQS])(void);
3436 #ifndef CONFIG_XEN
3437 -static struct hw_interrupt_type ioapic_level_type;
3438 -static struct hw_interrupt_type ioapic_edge_type;
3439 +void __setup_vector_irq(int cpu)
3440 +{
3441 + /* Initialize vector_irq on a new cpu */
3442 + /* This function must be called with vector_lock held */
3443 + int irq, vector;
3444 +
3445 + /* Mark the inuse vectors */
3446 + for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) {
3447 + if (!cpu_isset(cpu, irq_domain[irq]))
3448 + continue;
3449 + vector = irq_vector[irq];
3450 + per_cpu(vector_irq, cpu)[vector] = irq;
3451 + }
3452 + /* Mark the free vectors */
3453 + for (vector = 0; vector < NR_VECTORS; ++vector) {
3454 + irq = per_cpu(vector_irq, cpu)[vector];
3455 + if (irq < 0)
3456 + continue;
3457 + if (!cpu_isset(cpu, irq_domain[irq]))
3458 + per_cpu(vector_irq, cpu)[vector] = -1;
3459 + }
3460 +}
3461 +
3462 +extern void (*interrupt[NR_IRQS])(void);
3463 +
3464 +static struct irq_chip ioapic_chip;
3465
3466 #define IOAPIC_AUTO -1
3467 #define IOAPIC_EDGE 0
3468 @@ -928,16 +747,15 @@
3469
3470 static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
3471 {
3472 - unsigned idx;
3473 -
3474 - idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
3475 -
3476 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
3477 trigger == IOAPIC_LEVEL)
3478 - irq_desc[idx].chip = &ioapic_level_type;
3479 - else
3480 - irq_desc[idx].chip = &ioapic_edge_type;
3481 - set_intr_gate(vector, interrupt[idx]);
3482 + set_irq_chip_and_handler_name(irq, &ioapic_chip,
3483 + handle_fasteoi_irq, "fasteoi");
3484 + else {
3485 + irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
3486 + set_irq_chip_and_handler_name(irq, &ioapic_chip,
3487 + handle_edge_irq, "edge");
3488 + }
3489 }
3490 #else
3491 #define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
3492 @@ -990,16 +808,21 @@
3493 continue;
3494
3495 if (IO_APIC_IRQ(irq)) {
3496 - vector = assign_irq_vector(irq);
3497 + cpumask_t mask;
3498 + vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
3499 + if (vector < 0)
3500 + continue;
3501 +
3502 + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
3503 entry.vector = vector;
3504
3505 ioapic_register_intr(irq, vector, IOAPIC_AUTO);
3506 if (!apic && (irq < 16))
3507 disable_8259A_irq(irq);
3508 }
3509 + ioapic_write_entry(apic, pin, entry);
3510 +
3511 spin_lock_irqsave(&ioapic_lock, flags);
3512 - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
3513 - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
3514 set_native_irq_info(irq, TARGET_CPUS);
3515 spin_unlock_irqrestore(&ioapic_lock, flags);
3516 }
3517 @@ -1042,7 +865,7 @@
3518 * The timer IRQ doesn't have to know that behind the
3519 * scene we have a 8259A-master in AEOI mode ...
3520 */
3521 - irq_desc[0].chip = &ioapic_edge_type;
3522 + set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
3523
3524 /*
3525 * Add it to the IO-APIC irq-routing table:
3526 @@ -1138,10 +961,7 @@
3527 for (i = 0; i <= reg_01.bits.entries; i++) {
3528 struct IO_APIC_route_entry entry;
3529
3530 - spin_lock_irqsave(&ioapic_lock, flags);
3531 - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
3532 - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
3533 - spin_unlock_irqrestore(&ioapic_lock, flags);
3534 + entry = ioapic_read_entry(apic, i);
3535
3536 printk(KERN_DEBUG " %02x %03X %02X ",
3537 i,
3538 @@ -1161,17 +981,12 @@
3539 );
3540 }
3541 }
3542 - if (use_pci_vector())
3543 - printk(KERN_INFO "Using vector-based indexing\n");
3544 printk(KERN_DEBUG "IRQ to pin mappings:\n");
3545 for (i = 0; i < NR_IRQS; i++) {
3546 struct irq_pin_list *entry = irq_2_pin + i;
3547 if (entry->pin < 0)
3548 continue;
3549 - if (use_pci_vector() && !platform_legacy_irq(i))
3550 - printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
3551 - else
3552 - printk(KERN_DEBUG "IRQ%d ", i);
3553 + printk(KERN_DEBUG "IRQ%d ", i);
3554 for (;;) {
3555 printk("-> %d:%d", entry->apic, entry->pin);
3556 if (!entry->next)
3557 @@ -1335,9 +1150,6 @@
3558 irq_2_pin[i].pin = -1;
3559 irq_2_pin[i].next = 0;
3560 }
3561 - if (!pirqs_enabled)
3562 - for (i = 0; i < MAX_PIRQS; i++)
3563 - pirq_entries[i] = -1;
3564
3565 /*
3566 * The number of IO-APIC IRQ registers (== #pins):
3567 @@ -1354,11 +1166,7 @@
3568 /* See if any of the pins is in ExtINT mode */
3569 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
3570 struct IO_APIC_route_entry entry;
3571 - spin_lock_irqsave(&ioapic_lock, flags);
3572 - *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
3573 - *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
3574 - spin_unlock_irqrestore(&ioapic_lock, flags);
3575 -
3576 + entry = ioapic_read_entry(apic, pin);
3577
3578 /* If the interrupt line is enabled and in ExtInt mode
3579 * I have found the pin where the i8259 is connected.
3580 @@ -1412,7 +1220,6 @@
3581 */
3582 if (ioapic_i8259.pin != -1) {
3583 struct IO_APIC_route_entry entry;
3584 - unsigned long flags;
3585
3586 memset(&entry, 0, sizeof(entry));
3587 entry.mask = 0; /* Enabled */
3588 @@ -1429,12 +1236,7 @@
3589 /*
3590 * Add it to the IO-APIC irq-routing table:
3591 */
3592 - spin_lock_irqsave(&ioapic_lock, flags);
3593 - io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
3594 - *(((int *)&entry)+1));
3595 - io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
3596 - *(((int *)&entry)+0));
3597 - spin_unlock_irqrestore(&ioapic_lock, flags);
3598 + ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
3599 }
3600
3601 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
3602 @@ -1442,76 +1244,6 @@
3603 }
3604
3605 /*
3606 - * function to set the IO-APIC physical IDs based on the
3607 - * values stored in the MPC table.
3608 - *
3609 - * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
3610 - */
3611 -
3612 -#ifndef CONFIG_XEN
3613 -static void __init setup_ioapic_ids_from_mpc (void)
3614 -{
3615 - union IO_APIC_reg_00 reg_00;
3616 - int apic;
3617 - int i;
3618 - unsigned char old_id;
3619 - unsigned long flags;
3620 -
3621 - /*
3622 - * Set the IOAPIC ID to the value stored in the MPC table.
3623 - */
3624 - for (apic = 0; apic < nr_ioapics; apic++) {
3625 -
3626 - /* Read the register 0 value */
3627 - spin_lock_irqsave(&ioapic_lock, flags);
3628 - reg_00.raw = io_apic_read(apic, 0);
3629 - spin_unlock_irqrestore(&ioapic_lock, flags);
3630 -
3631 - old_id = mp_ioapics[apic].mpc_apicid;
3632 -
3633 -
3634 - printk(KERN_INFO "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid);
3635 -
3636 -
3637 - /*
3638 - * We need to adjust the IRQ routing table
3639 - * if the ID changed.
3640 - */
3641 - if (old_id != mp_ioapics[apic].mpc_apicid)
3642 - for (i = 0; i < mp_irq_entries; i++)
3643 - if (mp_irqs[i].mpc_dstapic == old_id)
3644 - mp_irqs[i].mpc_dstapic
3645 - = mp_ioapics[apic].mpc_apicid;
3646 -
3647 - /*
3648 - * Read the right value from the MPC table and
3649 - * write it into the ID register.
3650 - */
3651 - apic_printk(APIC_VERBOSE,KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
3652 - mp_ioapics[apic].mpc_apicid);
3653 -
3654 - reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
3655 - spin_lock_irqsave(&ioapic_lock, flags);
3656 - io_apic_write(apic, 0, reg_00.raw);
3657 - spin_unlock_irqrestore(&ioapic_lock, flags);
3658 -
3659 - /*
3660 - * Sanity check
3661 - */
3662 - spin_lock_irqsave(&ioapic_lock, flags);
3663 - reg_00.raw = io_apic_read(apic, 0);
3664 - spin_unlock_irqrestore(&ioapic_lock, flags);
3665 - if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
3666 - printk("could not set ID!\n");
3667 - else
3668 - apic_printk(APIC_VERBOSE," ok.\n");
3669 - }
3670 -}
3671 -#else
3672 -static void __init setup_ioapic_ids_from_mpc(void) { }
3673 -#endif
3674 -
3675 -/*
3676 * There is a nasty bug in some older SMP boards, their mptable lies
3677 * about the timer IRQ. We do the following to work around the situation:
3678 *
3679 @@ -1565,7 +1297,7 @@
3680 * an edge even if it isn't on the 8259A...
3681 */
3682
3683 -static unsigned int startup_edge_ioapic_irq(unsigned int irq)
3684 +static unsigned int startup_ioapic_irq(unsigned int irq)
3685 {
3686 int was_pending = 0;
3687 unsigned long flags;
3688 @@ -1582,107 +1314,19 @@
3689 return was_pending;
3690 }
3691
3692 -/*
3693 - * Once we have recorded IRQ_PENDING already, we can mask the
3694 - * interrupt for real. This prevents IRQ storms from unhandled
3695 - * devices.
3696 - */
3697 -static void ack_edge_ioapic_irq(unsigned int irq)
3698 -{
3699 - move_irq(irq);
3700 - if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
3701 - == (IRQ_PENDING | IRQ_DISABLED))
3702 - mask_IO_APIC_irq(irq);
3703 - ack_APIC_irq();
3704 -}
3705 -
3706 -/*
3707 - * Level triggered interrupts can just be masked,
3708 - * and shutting down and starting up the interrupt
3709 - * is the same as enabling and disabling them -- except
3710 - * with a startup need to return a "was pending" value.
3711 - *
3712 - * Level triggered interrupts are special because we
3713 - * do not touch any IO-APIC register while handling
3714 - * them. We ack the APIC in the end-IRQ handler, not
3715 - * in the start-IRQ-handler. Protection against reentrance
3716 - * from the same interrupt is still provided, both by the
3717 - * generic IRQ layer and by the fact that an unacked local
3718 - * APIC does not accept IRQs.
3719 - */
3720 -static unsigned int startup_level_ioapic_irq (unsigned int irq)
3721 -{
3722 - unmask_IO_APIC_irq(irq);
3723 -
3724 - return 0; /* don't check for pending */
3725 -}
3726 -
3727 -static void end_level_ioapic_irq (unsigned int irq)
3728 -{
3729 - move_irq(irq);
3730 - ack_APIC_irq();
3731 -}
3732 -
3733 -#ifdef CONFIG_PCI_MSI
3734 -static unsigned int startup_edge_ioapic_vector(unsigned int vector)
3735 -{
3736 - int irq = vector_to_irq(vector);
3737 -
3738 - return startup_edge_ioapic_irq(irq);
3739 -}
3740 -
3741 -static void ack_edge_ioapic_vector(unsigned int vector)
3742 -{
3743 - int irq = vector_to_irq(vector);
3744 -
3745 - move_native_irq(vector);
3746 - ack_edge_ioapic_irq(irq);
3747 -}
3748 -
3749 -static unsigned int startup_level_ioapic_vector (unsigned int vector)
3750 -{
3751 - int irq = vector_to_irq(vector);
3752 -
3753 - return startup_level_ioapic_irq (irq);
3754 -}
3755 -
3756 -static void end_level_ioapic_vector (unsigned int vector)
3757 -{
3758 - int irq = vector_to_irq(vector);
3759 -
3760 - move_native_irq(vector);
3761 - end_level_ioapic_irq(irq);
3762 -}
3763 -
3764 -static void mask_IO_APIC_vector (unsigned int vector)
3765 -{
3766 - int irq = vector_to_irq(vector);
3767 -
3768 - mask_IO_APIC_irq(irq);
3769 -}
3770 -
3771 -static void unmask_IO_APIC_vector (unsigned int vector)
3772 -{
3773 - int irq = vector_to_irq(vector);
3774 -
3775 - unmask_IO_APIC_irq(irq);
3776 -}
3777 -
3778 -#ifdef CONFIG_SMP
3779 -static void set_ioapic_affinity_vector (unsigned int vector,
3780 - cpumask_t cpu_mask)
3781 +static int ioapic_retrigger_irq(unsigned int irq)
3782 {
3783 - int irq = vector_to_irq(vector);
3784 + cpumask_t mask;
3785 + unsigned vector;
3786 + unsigned long flags;
3787
3788 - set_native_irq_info(vector, cpu_mask);
3789 - set_ioapic_affinity_irq(irq, cpu_mask);
3790 -}
3791 -#endif // CONFIG_SMP
3792 -#endif // CONFIG_PCI_MSI
3793 + spin_lock_irqsave(&vector_lock, flags);
3794 + vector = irq_vector[irq];
3795 + cpus_clear(mask);
3796 + cpu_set(first_cpu(irq_domain[irq]), mask);
3797
3798 -static int ioapic_retrigger(unsigned int irq)
3799 -{
3800 - send_IPI_self(IO_APIC_VECTOR(irq));
3801 + send_IPI_mask(mask, vector);
3802 + spin_unlock_irqrestore(&vector_lock, flags);
3803
3804 return 1;
3805 }
3806 @@ -1696,32 +1340,47 @@
3807 * races.
3808 */
3809
3810 -static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
3811 - .typename = "IO-APIC-edge",
3812 - .startup = startup_edge_ioapic,
3813 - .shutdown = shutdown_edge_ioapic,
3814 - .enable = enable_edge_ioapic,
3815 - .disable = disable_edge_ioapic,
3816 - .ack = ack_edge_ioapic,
3817 - .end = end_edge_ioapic,
3818 -#ifdef CONFIG_SMP
3819 - .set_affinity = set_ioapic_affinity,
3820 +static void ack_apic_edge(unsigned int irq)
3821 +{
3822 + move_native_irq(irq);
3823 + ack_APIC_irq();
3824 +}
3825 +
3826 +static void ack_apic_level(unsigned int irq)
3827 +{
3828 + int do_unmask_irq = 0;
3829 +
3830 +#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
3831 + /* If we are moving the irq we need to mask it */
3832 + if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
3833 + do_unmask_irq = 1;
3834 + mask_IO_APIC_irq(irq);
3835 + }
3836 #endif
3837 - .retrigger = ioapic_retrigger,
3838 -};
3839
3840 -static struct hw_interrupt_type ioapic_level_type __read_mostly = {
3841 - .typename = "IO-APIC-level",
3842 - .startup = startup_level_ioapic,
3843 - .shutdown = shutdown_level_ioapic,
3844 - .enable = enable_level_ioapic,
3845 - .disable = disable_level_ioapic,
3846 - .ack = mask_and_ack_level_ioapic,
3847 - .end = end_level_ioapic,
3848 + /*
3849 + * We must acknowledge the irq before we move it or the acknowledge will
3850 + * not propogate properly.
3851 + */
3852 + ack_APIC_irq();
3853 +
3854 + /* Now we can move and renable the irq */
3855 + move_masked_irq(irq);
3856 + if (unlikely(do_unmask_irq))
3857 + unmask_IO_APIC_irq(irq);
3858 +}
3859 +
3860 +static struct irq_chip ioapic_chip __read_mostly = {
3861 + .name = "IO-APIC",
3862 + .startup = startup_ioapic_irq,
3863 + .mask = mask_IO_APIC_irq,
3864 + .unmask = unmask_IO_APIC_irq,
3865 + .ack = ack_apic_edge,
3866 + .eoi = ack_apic_level,
3867 #ifdef CONFIG_SMP
3868 - .set_affinity = set_ioapic_affinity,
3869 + .set_affinity = set_ioapic_affinity_irq,
3870 #endif
3871 - .retrigger = ioapic_retrigger,
3872 + .retrigger = ioapic_retrigger_irq,
3873 };
3874 #endif /* !CONFIG_XEN */
3875
3876 @@ -1742,12 +1401,7 @@
3877 */
3878 for (irq = 0; irq < NR_IRQS ; irq++) {
3879 int tmp = irq;
3880 - if (use_pci_vector()) {
3881 - if (!platform_legacy_irq(tmp))
3882 - if ((tmp = vector_to_irq(tmp)) == -1)
3883 - continue;
3884 - }
3885 - if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
3886 + if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
3887 /*
3888 * Hmm.. We don't have an entry for this,
3889 * so default to an old-fashioned 8259
3890 @@ -1758,7 +1412,7 @@
3891 #ifndef CONFIG_XEN
3892 else
3893 /* Strange. Oh, well.. */
3894 - irq_desc[irq].chip = &no_irq_type;
3895 + irq_desc[irq].chip = &no_irq_chip;
3896 #endif
3897 }
3898 }
3899 @@ -1879,8 +1533,6 @@
3900 spin_unlock_irqrestore(&ioapic_lock, flags);
3901 }
3902
3903 -int timer_uses_ioapic_pin_0;
3904 -
3905 /*
3906 * This code may look a bit paranoid, but it's supposed to cooperate with
3907 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
3908 @@ -1893,13 +1545,13 @@
3909 {
3910 int apic1, pin1, apic2, pin2;
3911 int vector;
3912 + cpumask_t mask;
3913
3914 /*
3915 * get/set the timer IRQ vector:
3916 */
3917 disable_8259A_irq(0);
3918 - vector = assign_irq_vector(0);
3919 - set_intr_gate(vector, interrupt[0]);
3920 + vector = assign_irq_vector(0, TARGET_CPUS, &mask);
3921
3922 /*
3923 * Subtle, code in do_timer_interrupt() expects an AEOI
3924 @@ -1918,9 +1570,6 @@
3925 pin2 = ioapic_i8259.pin;
3926 apic2 = ioapic_i8259.apic;
3927
3928 - if (pin1 == 0)
3929 - timer_uses_ioapic_pin_0 = 1;
3930 -
3931 apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
3932 vector, apic1, pin1, apic2, pin2);
3933
3934 @@ -2035,11 +1684,6 @@
3935
3936 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
3937
3938 - /*
3939 - * Set up the IO-APIC IRQ routing table.
3940 - */
3941 - if (!acpi_ioapic)
3942 - setup_ioapic_ids_from_mpc();
3943 #ifndef CONFIG_XEN
3944 sync_Arb_IDs();
3945 #endif /* !CONFIG_XEN */
3946 @@ -2060,17 +1704,12 @@
3947 {
3948 struct IO_APIC_route_entry *entry;
3949 struct sysfs_ioapic_data *data;
3950 - unsigned long flags;
3951 int i;
3952
3953 data = container_of(dev, struct sysfs_ioapic_data, dev);
3954 entry = data->entry;
3955 - spin_lock_irqsave(&ioapic_lock, flags);
3956 - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
3957 - *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
3958 - *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
3959 - }
3960 - spin_unlock_irqrestore(&ioapic_lock, flags);
3961 + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
3962 + *entry = ioapic_read_entry(dev->id, i);
3963
3964 return 0;
3965 }
3966 @@ -2092,11 +1731,9 @@
3967 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
3968 io_apic_write(dev->id, 0, reg_00.raw);
3969 }
3970 - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
3971 - io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
3972 - io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
3973 - }
3974 spin_unlock_irqrestore(&ioapic_lock, flags);
3975 + for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
3976 + ioapic_write_entry(dev->id, i, entry[i]);
3977
3978 return 0;
3979 }
3980 @@ -2142,26 +1779,254 @@
3981
3982 device_initcall(ioapic_init_sysfs);
3983
3984 -/* --------------------------------------------------------------------------
3985 - ACPI-based IOAPIC Configuration
3986 - -------------------------------------------------------------------------- */
3987 +#ifndef CONFIG_XEN
3988 +/*
3989 + * Dynamic irq allocate and deallocation
3990 + */
3991 +int create_irq(void)
3992 +{
3993 + /* Allocate an unused irq */
3994 + int irq;
3995 + int new;
3996 + int vector = 0;
3997 + unsigned long flags;
3998 + cpumask_t mask;
3999
4000 -#ifdef CONFIG_ACPI
4001 + irq = -ENOSPC;
4002 + spin_lock_irqsave(&vector_lock, flags);
4003 + for (new = (NR_IRQS - 1); new >= 0; new--) {
4004 + if (platform_legacy_irq(new))
4005 + continue;
4006 + if (irq_vector[new] != 0)
4007 + continue;
4008 + vector = __assign_irq_vector(new, TARGET_CPUS, &mask);
4009 + if (likely(vector > 0))
4010 + irq = new;
4011 + break;
4012 + }
4013 + spin_unlock_irqrestore(&vector_lock, flags);
4014
4015 -#define IO_APIC_MAX_ID 0xFE
4016 + if (irq >= 0) {
4017 + dynamic_irq_init(irq);
4018 + }
4019 + return irq;
4020 +}
4021
4022 -int __init io_apic_get_version (int ioapic)
4023 +void destroy_irq(unsigned int irq)
4024 {
4025 - union IO_APIC_reg_01 reg_01;
4026 unsigned long flags;
4027
4028 - spin_lock_irqsave(&ioapic_lock, flags);
4029 - reg_01.raw = io_apic_read(ioapic, 1);
4030 - spin_unlock_irqrestore(&ioapic_lock, flags);
4031 + dynamic_irq_cleanup(irq);
4032 +
4033 + spin_lock_irqsave(&vector_lock, flags);
4034 + irq_vector[irq] = 0;
4035 + spin_unlock_irqrestore(&vector_lock, flags);
4036 +}
4037 +#endif
4038 +
4039 +/*
4040 + * MSI mesage composition
4041 + */
4042 +#ifdef CONFIG_PCI_MSI
4043 +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
4044 +{
4045 + int vector;
4046 + unsigned dest;
4047 + cpumask_t tmp;
4048 +
4049 + vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
4050 + if (vector >= 0) {
4051 + dest = cpu_mask_to_apicid(tmp);
4052 +
4053 + msg->address_hi = MSI_ADDR_BASE_HI;
4054 + msg->address_lo =
4055 + MSI_ADDR_BASE_LO |
4056 + ((INT_DEST_MODE == 0) ?
4057 + MSI_ADDR_DEST_MODE_PHYSICAL:
4058 + MSI_ADDR_DEST_MODE_LOGICAL) |
4059 + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
4060 + MSI_ADDR_REDIRECTION_CPU:
4061 + MSI_ADDR_REDIRECTION_LOWPRI) |
4062 + MSI_ADDR_DEST_ID(dest);
4063 +
4064 + msg->data =
4065 + MSI_DATA_TRIGGER_EDGE |
4066 + MSI_DATA_LEVEL_ASSERT |
4067 + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
4068 + MSI_DATA_DELIVERY_FIXED:
4069 + MSI_DATA_DELIVERY_LOWPRI) |
4070 + MSI_DATA_VECTOR(vector);
4071 + }
4072 + return vector;
4073 +}
4074 +
4075 +#ifdef CONFIG_SMP
4076 +static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
4077 +{
4078 + struct msi_msg msg;
4079 + unsigned int dest;
4080 + cpumask_t tmp;
4081 + int vector;
4082 +
4083 + cpus_and(tmp, mask, cpu_online_map);
4084 + if (cpus_empty(tmp))
4085 + tmp = TARGET_CPUS;
4086 +
4087 + cpus_and(mask, tmp, CPU_MASK_ALL);
4088 +
4089 + vector = assign_irq_vector(irq, mask, &tmp);
4090 + if (vector < 0)
4091 + return;
4092 +
4093 + dest = cpu_mask_to_apicid(tmp);
4094 +
4095 + read_msi_msg(irq, &msg);
4096 +
4097 + msg.data &= ~MSI_DATA_VECTOR_MASK;
4098 + msg.data |= MSI_DATA_VECTOR(vector);
4099 + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
4100 + msg.address_lo |= MSI_ADDR_DEST_ID(dest);
4101 +
4102 + write_msi_msg(irq, &msg);
4103 + set_native_irq_info(irq, mask);
4104 +}
4105 +#endif /* CONFIG_SMP */
4106 +
4107 +/*
4108 + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
4109 + * which implement the MSI or MSI-X Capability Structure.
4110 + */
4111 +static struct irq_chip msi_chip = {
4112 + .name = "PCI-MSI",
4113 + .unmask = unmask_msi_irq,
4114 + .mask = mask_msi_irq,
4115 + .ack = ack_apic_edge,
4116 +#ifdef CONFIG_SMP
4117 + .set_affinity = set_msi_irq_affinity,
4118 +#endif
4119 + .retrigger = ioapic_retrigger_irq,
4120 +};
4121 +
4122 +int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
4123 +{
4124 + struct msi_msg msg;
4125 + int ret;
4126 + ret = msi_compose_msg(dev, irq, &msg);
4127 + if (ret < 0)
4128 + return ret;
4129 +
4130 + write_msi_msg(irq, &msg);
4131 +
4132 + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
4133 +
4134 + return 0;
4135 +}
4136 +
4137 +void arch_teardown_msi_irq(unsigned int irq)
4138 +{
4139 + return;
4140 +}
4141 +
4142 +#endif /* CONFIG_PCI_MSI */
4143 +
4144 +/*
4145 + * Hypertransport interrupt support
4146 + */
4147 +#ifdef CONFIG_HT_IRQ
4148 +
4149 +#ifdef CONFIG_SMP
4150 +
4151 +static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
4152 +{
4153 + struct ht_irq_msg msg;
4154 + fetch_ht_irq_msg(irq, &msg);
4155 +
4156 + msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
4157 + msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
4158
4159 - return reg_01.bits.version;
4160 + msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
4161 + msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
4162 +
4163 + write_ht_irq_msg(irq, &msg);
4164 }
4165
4166 +static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
4167 +{
4168 + unsigned int dest;
4169 + cpumask_t tmp;
4170 + int vector;
4171 +
4172 + cpus_and(tmp, mask, cpu_online_map);
4173 + if (cpus_empty(tmp))
4174 + tmp = TARGET_CPUS;
4175 +
4176 + cpus_and(mask, tmp, CPU_MASK_ALL);
4177 +
4178 + vector = assign_irq_vector(irq, mask, &tmp);
4179 + if (vector < 0)
4180 + return;
4181 +
4182 + dest = cpu_mask_to_apicid(tmp);
4183 +
4184 + target_ht_irq(irq, dest, vector);
4185 + set_native_irq_info(irq, mask);
4186 +}
4187 +#endif
4188 +
4189 +static struct irq_chip ht_irq_chip = {
4190 + .name = "PCI-HT",
4191 + .mask = mask_ht_irq,
4192 + .unmask = unmask_ht_irq,
4193 + .ack = ack_apic_edge,
4194 +#ifdef CONFIG_SMP
4195 + .set_affinity = set_ht_irq_affinity,
4196 +#endif
4197 + .retrigger = ioapic_retrigger_irq,
4198 +};
4199 +
4200 +int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
4201 +{
4202 + int vector;
4203 + cpumask_t tmp;
4204 +
4205 + vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
4206 + if (vector >= 0) {
4207 + struct ht_irq_msg msg;
4208 + unsigned dest;
4209 +
4210 + dest = cpu_mask_to_apicid(tmp);
4211 +
4212 + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
4213 +
4214 + msg.address_lo =
4215 + HT_IRQ_LOW_BASE |
4216 + HT_IRQ_LOW_DEST_ID(dest) |
4217 + HT_IRQ_LOW_VECTOR(vector) |
4218 + ((INT_DEST_MODE == 0) ?
4219 + HT_IRQ_LOW_DM_PHYSICAL :
4220 + HT_IRQ_LOW_DM_LOGICAL) |
4221 + HT_IRQ_LOW_RQEOI_EDGE |
4222 + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
4223 + HT_IRQ_LOW_MT_FIXED :
4224 + HT_IRQ_LOW_MT_ARBITRATED) |
4225 + HT_IRQ_LOW_IRQ_MASKED;
4226 +
4227 + write_ht_irq_msg(irq, &msg);
4228 +
4229 + set_irq_chip_and_handler_name(irq, &ht_irq_chip,
4230 + handle_edge_irq, "edge");
4231 + }
4232 + return vector;
4233 +}
4234 +#endif /* CONFIG_HT_IRQ */
4235 +
4236 +/* --------------------------------------------------------------------------
4237 + ACPI-based IOAPIC Configuration
4238 + -------------------------------------------------------------------------- */
4239 +
4240 +#ifdef CONFIG_ACPI
4241 +
4242 +#define IO_APIC_MAX_ID 0xFE
4243
4244 int __init io_apic_get_redir_entries (int ioapic)
4245 {
4246 @@ -2180,6 +2045,8 @@
4247 {
4248 struct IO_APIC_route_entry entry;
4249 unsigned long flags;
4250 + int vector;
4251 + cpumask_t mask;
4252
4253 if (!IO_APIC_IRQ(irq)) {
4254 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
4255 @@ -2188,6 +2055,17 @@
4256 }
4257
4258 /*
4259 + * IRQs < 16 are already in the irq_2_pin[] map
4260 + */
4261 + if (irq >= 16)
4262 + add_pin_to_irq(irq, ioapic, pin);
4263 +
4264 +
4265 + vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
4266 + if (vector < 0)
4267 + return vector;
4268 +
4269 + /*
4270 * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
4271 * Note that we mask (disable) IRQs now -- these get enabled when the
4272 * corresponding device driver registers for this IRQ.
4273 @@ -2197,19 +2075,11 @@
4274
4275 entry.delivery_mode = INT_DELIVERY_MODE;
4276 entry.dest_mode = INT_DEST_MODE;
4277 - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
4278 + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
4279 entry.trigger = edge_level;
4280 entry.polarity = active_high_low;
4281 entry.mask = 1; /* Disabled (masked) */
4282 -
4283 - irq = gsi_irq_sharing(irq);
4284 - /*
4285 - * IRQs < 16 are already in the irq_2_pin[] map
4286 - */
4287 - if (irq >= 16)
4288 - add_pin_to_irq(irq, ioapic, pin);
4289 -
4290 - entry.vector = assign_irq_vector(irq);
4291 + entry.vector = vector & 0xff;
4292
4293 apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
4294 "IRQ %d Mode:%i Active:%i)\n", ioapic,
4295 @@ -2221,10 +2091,10 @@
4296 if (!ioapic && (irq < 16))
4297 disable_8259A_irq(irq);
4298
4299 + ioapic_write_entry(ioapic, pin, entry);
4300 +
4301 spin_lock_irqsave(&ioapic_lock, flags);
4302 - io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
4303 - io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
4304 - set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
4305 + set_native_irq_info(irq, TARGET_CPUS);
4306 spin_unlock_irqrestore(&ioapic_lock, flags);
4307
4308 return 0;
4309 diff -Naur linux-2.6.25/arch/x86/kernel/ioport_64-xen.c linux-2.6.25-xen/arch/x86/kernel/ioport_64-xen.c
4310 --- linux-2.6.25/arch/x86/kernel/ioport_64-xen.c 2008-05-23 20:51:11.000000000 +0200
4311 +++ linux-2.6.25-xen/arch/x86/kernel/ioport_64-xen.c 2008-05-23 20:39:03.000000000 +0200
4312 @@ -58,6 +58,7 @@
4313
4314 memset(bitmap, 0xff, IO_BITMAP_BYTES);
4315 t->io_bitmap_ptr = bitmap;
4316 + set_thread_flag(TIF_IO_BITMAP);
4317
4318 set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap);
4319 set_iobitmap.nr_ports = IO_BITMAP_BITS;
4320 diff -Naur linux-2.6.25/arch/x86/kernel/irq_32-xen.c linux-2.6.25-xen/arch/x86/kernel/irq_32-xen.c
4321 --- linux-2.6.25/arch/x86/kernel/irq_32-xen.c 2008-05-23 20:51:11.000000000 +0200
4322 +++ linux-2.6.25-xen/arch/x86/kernel/irq_32-xen.c 2008-05-23 20:39:03.000000000 +0200
4323 @@ -53,8 +53,10 @@
4324 */
4325 fastcall unsigned int do_IRQ(struct pt_regs *regs)
4326 {
4327 + struct pt_regs *old_regs;
4328 /* high bit used in ret_from_ code */
4329 int irq = ~regs->orig_eax;
4330 + struct irq_desc *desc = irq_desc + irq;
4331 #ifdef CONFIG_4KSTACKS
4332 union irq_ctx *curctx, *irqctx;
4333 u32 *isp;
4334 @@ -66,6 +68,7 @@
4335 BUG();
4336 }
4337
4338 + old_regs = set_irq_regs(regs);
4339 irq_enter();
4340 #ifdef CONFIG_DEBUG_STACKOVERFLOW
4341 /* Debugging check for stack overflow: is there less than 1KB free? */
4342 @@ -110,19 +113,20 @@
4343 (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
4344
4345 asm volatile(
4346 - " xchgl %%ebx,%%esp \n"
4347 - " call __do_IRQ \n"
4348 + " xchgl %%ebx,%%esp \n"
4349 + " call *%%edi \n"
4350 " movl %%ebx,%%esp \n"
4351 : "=a" (arg1), "=d" (arg2), "=b" (ebx)
4352 - : "0" (irq), "1" (regs), "2" (isp)
4353 - : "memory", "cc", "ecx"
4354 + : "0" (irq), "1" (desc), "2" (isp),
4355 + "D" (desc->handle_irq)
4356 + : "memory", "cc"
4357 );
4358 } else
4359 #endif
4360 - __do_IRQ(irq, regs);
4361 + desc->handle_irq(irq, desc);
4362
4363 irq_exit();
4364 -
4365 + set_irq_regs(old_regs);
4366 return 1;
4367 }
4368
4369 @@ -253,7 +257,8 @@
4370 for_each_online_cpu(j)
4371 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
4372 #endif
4373 - seq_printf(p, " %14s", irq_desc[i].chip->typename);
4374 + seq_printf(p, " %8s", irq_desc[i].chip->name);
4375 + seq_printf(p, "-%-8s", irq_desc[i].name);
4376 seq_printf(p, " %s", action->name);
4377
4378 for (action=action->next; action; action = action->next)
4379 diff -Naur linux-2.6.25/arch/x86/kernel/irq_64-xen.c linux-2.6.25-xen/arch/x86/kernel/irq_64-xen.c
4380 --- linux-2.6.25/arch/x86/kernel/irq_64-xen.c 2008-05-23 20:51:11.000000000 +0200
4381 +++ linux-2.6.25-xen/arch/x86/kernel/irq_64-xen.c 2008-05-23 20:39:03.000000000 +0200
4382 @@ -20,11 +20,6 @@
4383 #include <asm/idle.h>
4384
4385 atomic_t irq_err_count;
4386 -#ifdef CONFIG_X86_IO_APIC
4387 -#ifdef APIC_MISMATCH_DEBUG
4388 -atomic_t irq_mis_count;
4389 -#endif
4390 -#endif
4391
4392 #ifdef CONFIG_DEBUG_STACKOVERFLOW
4393 /*
4394 @@ -79,7 +74,8 @@
4395 for_each_online_cpu(j)
4396 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
4397 #endif
4398 - seq_printf(p, " %14s", irq_desc[i].chip->typename);
4399 + seq_printf(p, " %8s", irq_desc[i].chip->name);
4400 + seq_printf(p, "-%-8s", irq_desc[i].name);
4401
4402 seq_printf(p, " %s", action->name);
4403 for (action=action->next; action; action = action->next)
4404 @@ -99,11 +95,6 @@
4405 seq_putc(p, '\n');
4406 #endif
4407 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
4408 -#ifdef CONFIG_X86_IO_APIC
4409 -#ifdef APIC_MISMATCH_DEBUG
4410 - seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
4411 -#endif
4412 -#endif
4413 }
4414 return 0;
4415 }
4416 @@ -114,24 +105,28 @@
4417 * handlers).
4418 */
4419 asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
4420 -{
4421 +{
4422 + struct pt_regs *old_regs = set_irq_regs(regs);
4423 +
4424 /* high bit used in ret_from_ code */
4425 unsigned irq = ~regs->orig_rax;
4426
4427 - if (unlikely(irq >= NR_IRQS)) {
4428 - printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
4429 - __FUNCTION__, irq);
4430 - BUG();
4431 - }
4432 -
4433 exit_idle();
4434 irq_enter();
4435 +
4436 #ifdef CONFIG_DEBUG_STACKOVERFLOW
4437 stack_overflow_check(regs);
4438 #endif
4439 - __do_IRQ(irq, regs);
4440 +
4441 + if (likely(irq < NR_IRQS))
4442 + generic_handle_irq(irq);
4443 + else
4444 + printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n",
4445 + __func__, smp_processor_id(), irq);
4446 +
4447 irq_exit();
4448
4449 + set_irq_regs(old_regs);
4450 return 1;
4451 }
4452
4453 @@ -192,6 +187,6 @@
4454 */
4455 void ack_bad_irq(unsigned int irq)
4456 {
4457 - printk("unexpected IRQ trap at vector %02x\n", irq);
4458 + printk("unexpected IRQ trap at irq %02x\n", irq);
4459 }
4460 #endif
4461 diff -Naur linux-2.6.25/arch/x86/kernel/ldt_32-xen.c linux-2.6.25-xen/arch/x86/kernel/ldt_32-xen.c
4462 --- linux-2.6.25/arch/x86/kernel/ldt_32-xen.c 2008-05-23 20:51:11.000000000 +0200
4463 +++ linux-2.6.25-xen/arch/x86/kernel/ldt_32-xen.c 2008-05-23 20:39:03.000000000 +0200
4464 @@ -1,5 +1,5 @@
4465 /*
4466 - * linux/kernel/ldt.c
4467 + * linux/arch/i386/kernel/ldt.c
4468 *
4469 * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
4470 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
4471 diff -Naur linux-2.6.25/arch/x86/kernel/Makefile linux-2.6.25-xen/arch/x86/kernel/Makefile
4472 --- linux-2.6.25/arch/x86/kernel/Makefile 2008-05-23 20:51:22.000000000 +0200
4473 +++ linux-2.6.25-xen/arch/x86/kernel/Makefile 2008-05-23 20:39:03.000000000 +0200
4474 @@ -91,7 +91,7 @@
4475 ###
4476 # 64 bit specific files
4477 ifeq ($(CONFIG_X86_64),y)
4478 - obj-y += genapic_64.o genapic_flat_64.o
4479 + obj-$(CONFIG_X86_LOCAL_APIC) += genapic_64.o genapic_flat_64.o
4480 obj-$(CONFIG_X86_XEN_GENAPIC) += genapic_64.o genapic_xen_64.o
4481 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
4482 obj-$(CONFIG_AUDIT) += audit_64.o
4483 @@ -104,5 +104,6 @@
4484 pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o
4485 endif
4486
4487 -disabled-obj-$(CONFIG_XEN) := i8253.o i8259_$(BITS).o reboot.o smpboot_$(BITS).o tsc_$(BITS).o
4488 +disabled-obj-$(CONFIG_XEN) := early-quirks.o i8253.o i8259_$(BITS).o reboot.o \
4489 + smpboot_$(BITS).o tsc_$(BITS).o
4490 %/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) :=
4491 diff -Naur linux-2.6.25/arch/x86/kernel/microcode-xen.c linux-2.6.25-xen/arch/x86/kernel/microcode-xen.c
4492 --- linux-2.6.25/arch/x86/kernel/microcode-xen.c 2008-05-23 20:51:11.000000000 +0200
4493 +++ linux-2.6.25-xen/arch/x86/kernel/microcode-xen.c 2008-05-23 20:39:03.000000000 +0200
4494 @@ -2,6 +2,7 @@
4495 * Intel CPU Microcode Update Driver for Linux
4496 *
4497 * Copyright (C) 2000-2004 Tigran Aivazian
4498 + * 2006 Shaohua Li <shaohua.li@intel.com>
4499 *
4500 * This driver allows to upgrade microcode on Intel processors
4501 * belonging to IA-32 family - PentiumPro, Pentium II,
4502 @@ -33,7 +34,9 @@
4503 #include <linux/spinlock.h>
4504 #include <linux/mm.h>
4505 #include <linux/mutex.h>
4506 -#include <linux/syscalls.h>
4507 +#include <linux/cpu.h>
4508 +#include <linux/firmware.h>
4509 +#include <linux/platform_device.h>
4510
4511 #include <asm/msr.h>
4512 #include <asm/uaccess.h>
4513 @@ -55,12 +58,7 @@
4514 /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
4515 static DEFINE_MUTEX(microcode_mutex);
4516
4517 -static int microcode_open (struct inode *unused1, struct file *unused2)
4518 -{
4519 - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
4520 -}
4521 -
4522 -
4523 +#ifdef CONFIG_MICROCODE_OLD_INTERFACE
4524 static int do_microcode_update (const void __user *ubuf, size_t len)
4525 {
4526 int err;
4527 @@ -85,6 +83,11 @@
4528 return err;
4529 }
4530
4531 +static int microcode_open (struct inode *unused1, struct file *unused2)
4532 +{
4533 + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
4534 +}
4535 +
4536 static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
4537 {
4538 ssize_t ret;
4539 @@ -117,7 +120,7 @@
4540 .fops = &microcode_fops,
4541 };
4542
4543 -static int __init microcode_init (void)
4544 +static int __init microcode_dev_init (void)
4545 {
4546 int error;
4547
4548 @@ -129,6 +132,68 @@
4549 return error;
4550 }
4551
4552 + return 0;
4553 +}
4554 +
4555 +static void __exit microcode_dev_exit (void)
4556 +{
4557 + misc_deregister(&microcode_dev);
4558 +}
4559 +
4560 +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
4561 +#else
4562 +#define microcode_dev_init() 0
4563 +#define microcode_dev_exit() do { } while(0)
4564 +#endif
4565 +
4566 +/* fake device for request_firmware */
4567 +static struct platform_device *microcode_pdev;
4568 +
4569 +static int request_microcode(void)
4570 +{
4571 + char name[30];
4572 + const struct cpuinfo_x86 *c = &boot_cpu_data;
4573 + const struct firmware *firmware;
4574 + int error;
4575 + struct xen_platform_op op;
4576 +
4577 + sprintf(name,"intel-ucode/%02x-%02x-%02x",
4578 + c->x86, c->x86_model, c->x86_mask);
4579 + error = request_firmware(&firmware, name, &microcode_pdev->dev);
4580 + if (error) {
4581 + pr_debug("ucode data file %s load failed\n", name);
4582 + return error;
4583 + }
4584 +
4585 + op.cmd = XENPF_microcode_update;
4586 + set_xen_guest_handle(op.u.microcode.data, (void *)firmware->data);
4587 + op.u.microcode.length = firmware->size;
4588 + error = HYPERVISOR_platform_op(&op);
4589 +
4590 + release_firmware(firmware);
4591 +
4592 + if (error)
4593 + pr_debug("ucode load failed\n");
4594 +
4595 + return error;
4596 +}
4597 +
4598 +static int __init microcode_init (void)
4599 +{
4600 + int error;
4601 +
4602 + error = microcode_dev_init();
4603 + if (error)
4604 + return error;
4605 + microcode_pdev = platform_device_register_simple("microcode", -1,
4606 + NULL, 0);
4607 + if (IS_ERR(microcode_pdev)) {
4608 + microcode_dev_exit();
4609 + return PTR_ERR(microcode_pdev);
4610 + }
4611 +
4612 + request_microcode();
4613 +
4614 printk(KERN_INFO
4615 "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
4616 return 0;
4617 @@ -136,9 +201,9 @@
4618
4619 static void __exit microcode_exit (void)
4620 {
4621 - misc_deregister(&microcode_dev);
4622 + microcode_dev_exit();
4623 + platform_device_unregister(microcode_pdev);
4624 }
4625
4626 module_init(microcode_init)
4627 module_exit(microcode_exit)
4628 -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
4629 diff -Naur linux-2.6.25/arch/x86/kernel/mpparse_32-xen.c linux-2.6.25-xen/arch/x86/kernel/mpparse_32-xen.c
4630 --- linux-2.6.25/arch/x86/kernel/mpparse_32-xen.c 2008-05-23 20:51:11.000000000 +0200
4631 +++ linux-2.6.25-xen/arch/x86/kernel/mpparse_32-xen.c 2008-05-23 20:39:03.000000000 +0200
4632 @@ -30,6 +30,7 @@
4633 #include <asm/io_apic.h>
4634
4635 #include <mach_apic.h>
4636 +#include <mach_apicdef.h>
4637 #include <mach_mpparse.h>
4638 #include <bios_ebda.h>
4639
4640 @@ -68,7 +69,7 @@
4641 /* Processor that is doing the boot up */
4642 unsigned int boot_cpu_physical_apicid = -1U;
4643 /* Internal processor count */
4644 -static unsigned int __devinitdata num_processors;
4645 +unsigned int __cpuinitdata num_processors;
4646
4647 /* Bitmask of physically existing CPUs */
4648 physid_mask_t phys_cpu_present_map;
4649 @@ -235,12 +236,14 @@
4650
4651 mpc_oem_bus_info(m, str, translation_table[mpc_record]);
4652
4653 +#if MAX_MP_BUSSES < 256
4654 if (m->mpc_busid >= MAX_MP_BUSSES) {
4655 printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
4656 " is too large, max. supported is %d\n",
4657 m->mpc_busid, str, MAX_MP_BUSSES - 1);
4658 return;
4659 }
4660 +#endif
4661
4662 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
4663 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
4664 @@ -300,19 +303,6 @@
4665 m->mpc_irqtype, m->mpc_irqflag & 3,
4666 (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
4667 m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
4668 - /*
4669 - * Well it seems all SMP boards in existence
4670 - * use ExtINT/LVT1 == LINT0 and
4671 - * NMI/LVT2 == LINT1 - the following check
4672 - * will show us if this assumptions is false.
4673 - * Until then we do not have to add baggage.
4674 - */
4675 - if ((m->mpc_irqtype == mp_ExtINT) &&
4676 - (m->mpc_destapiclint != 0))
4677 - BUG();
4678 - if ((m->mpc_irqtype == mp_NMI) &&
4679 - (m->mpc_destapiclint != 1))
4680 - BUG();
4681 }
4682
4683 #ifdef CONFIG_X86_NUMAQ
4684 @@ -838,8 +828,7 @@
4685
4686 #ifdef CONFIG_ACPI
4687
4688 -void __init mp_register_lapic_address (
4689 - u64 address)
4690 +void __init mp_register_lapic_address(u64 address)
4691 {
4692 #ifndef CONFIG_XEN
4693 mp_lapic_addr = (unsigned long) address;
4694 @@ -853,13 +842,10 @@
4695 #endif
4696 }
4697
4698 -
4699 -void __devinit mp_register_lapic (
4700 - u8 id,
4701 - u8 enabled)
4702 +void __devinit mp_register_lapic (u8 id, u8 enabled)
4703 {
4704 struct mpc_config_processor processor;
4705 - int boot_cpu = 0;
4706 + int boot_cpu = 0;
4707
4708 if (MAX_APICS - id <= 0) {
4709 printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
4710 @@ -898,11 +884,9 @@
4711 u32 pin_programmed[4];
4712 } mp_ioapic_routing[MAX_IO_APICS];
4713
4714 -
4715 -static int mp_find_ioapic (
4716 - int gsi)
4717 +static int mp_find_ioapic (int gsi)
4718 {
4719 - int i = 0;
4720 + int i = 0;
4721
4722 /* Find the IOAPIC that manages this GSI. */
4723 for (i = 0; i < nr_ioapics; i++) {
4724 @@ -915,15 +899,11 @@
4725
4726 return -1;
4727 }
4728 -
4729
4730 -void __init mp_register_ioapic (
4731 - u8 id,
4732 - u32 address,
4733 - u32 gsi_base)
4734 +void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
4735 {
4736 - int idx = 0;
4737 - int tmpid;
4738 + int idx = 0;
4739 + int tmpid;
4740
4741 if (nr_ioapics >= MAX_IO_APICS) {
4742 printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
4743 @@ -971,16 +951,10 @@
4744 mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
4745 mp_ioapic_routing[idx].gsi_base,
4746 mp_ioapic_routing[idx].gsi_end);
4747 -
4748 - return;
4749 }
4750
4751 -
4752 -void __init mp_override_legacy_irq (
4753 - u8 bus_irq,
4754 - u8 polarity,
4755 - u8 trigger,
4756 - u32 gsi)
4757 +void __init
4758 +mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
4759 {
4760 struct mpc_config_intsrc intsrc;
4761 int ioapic = -1;
4762 @@ -1018,15 +992,13 @@
4763 mp_irqs[mp_irq_entries] = intsrc;
4764 if (++mp_irq_entries == MAX_IRQ_SOURCES)
4765 panic("Max # of irq sources exceeded!\n");
4766 -
4767 - return;
4768 }
4769
4770 void __init mp_config_acpi_legacy_irqs (void)
4771 {
4772 struct mpc_config_intsrc intsrc;
4773 - int i = 0;
4774 - int ioapic = -1;
4775 + int i = 0;
4776 + int ioapic = -1;
4777
4778 /*
4779 * Fabricate the legacy ISA bus (bus #31).
4780 @@ -1095,12 +1067,12 @@
4781
4782 #define MAX_GSI_NUM 4096
4783
4784 -int mp_register_gsi (u32 gsi, int triggering, int polarity)
4785 +int mp_register_gsi(u32 gsi, int triggering, int polarity)
4786 {
4787 - int ioapic = -1;
4788 - int ioapic_pin = 0;
4789 - int idx, bit = 0;
4790 - static int pci_irq = 16;
4791 + int ioapic = -1;
4792 + int ioapic_pin = 0;
4793 + int idx, bit = 0;
4794 + static int pci_irq = 16;
4795 /*
4796 * Mapping between Global System Interrups, which
4797 * represent all possible interrupts, and IRQs
4798 diff -Naur linux-2.6.25/arch/x86/kernel/mpparse_64-xen.c linux-2.6.25-xen/arch/x86/kernel/mpparse_64-xen.c
4799 --- linux-2.6.25/arch/x86/kernel/mpparse_64-xen.c 2008-05-23 20:51:11.000000000 +0200
4800 +++ linux-2.6.25-xen/arch/x86/kernel/mpparse_64-xen.c 2008-05-23 20:39:03.000000000 +0200
4801 @@ -41,8 +41,7 @@
4802 * Various Linux-internal data structures created from the
4803 * MP-table.
4804 */
4805 -unsigned char apic_version [MAX_APICS];
4806 -unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
4807 +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
4808 int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
4809
4810 static int mp_current_pci_id = 0;
4811 @@ -56,7 +55,6 @@
4812 int mp_irq_entries;
4813
4814 int nr_ioapics;
4815 -int pic_mode;
4816 unsigned long mp_lapic_addr = 0;
4817
4818
4819 @@ -71,19 +69,6 @@
4820 /* Bitmask of physically existing CPUs */
4821 physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
4822
4823 -/* ACPI MADT entry parsing functions */
4824 -#ifdef CONFIG_ACPI
4825 -extern struct acpi_boot_flags acpi_boot;
4826 -#ifdef CONFIG_X86_LOCAL_APIC
4827 -extern int acpi_parse_lapic (acpi_table_entry_header *header);
4828 -extern int acpi_parse_lapic_addr_ovr (acpi_table_entry_header *header);
4829 -extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header);
4830 -#endif /*CONFIG_X86_LOCAL_APIC*/
4831 -#ifdef CONFIG_X86_IO_APIC
4832 -extern int acpi_parse_ioapic (acpi_table_entry_header *header);
4833 -#endif /*CONFIG_X86_IO_APIC*/
4834 -#endif /*CONFIG_ACPI*/
4835 -
4836 u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
4837
4838
4839 @@ -109,24 +94,20 @@
4840 static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
4841 {
4842 int cpu;
4843 - unsigned char ver;
4844 cpumask_t tmp_map;
4845 + char *bootup_cpu = "";
4846
4847 if (!(m->mpc_cpuflag & CPU_ENABLED)) {
4848 disabled_cpus++;
4849 return;
4850 }
4851 -
4852 - printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
4853 - m->mpc_apicid,
4854 - (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8,
4855 - (m->mpc_cpufeature & CPU_MODEL_MASK)>>4,
4856 - m->mpc_apicver);
4857 -
4858 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
4859 - Dprintk(" Bootup CPU\n");
4860 + bootup_cpu = " (Bootup-CPU)";
4861 boot_cpu_id = m->mpc_apicid;
4862 }
4863 +
4864 + printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
4865 +
4866 if (num_processors >= NR_CPUS) {
4867 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
4868 " Processor ignored.\n", NR_CPUS);
4869 @@ -137,24 +118,7 @@
4870 cpus_complement(tmp_map, cpu_present_map);
4871 cpu = first_cpu(tmp_map);
4872
4873 -#if MAX_APICS < 255
4874 - if ((int)m->mpc_apicid > MAX_APICS) {
4875 - printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
4876 - m->mpc_apicid, MAX_APICS);
4877 - return;
4878 - }
4879 -#endif
4880 - ver = m->mpc_apicver;
4881 -
4882 physid_set(m->mpc_apicid, phys_cpu_present_map);
4883 - /*
4884 - * Validate version
4885 - */
4886 - if (ver == 0x0) {
4887 - printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
4888 - ver = 0x10;
4889 - }
4890 - apic_version[m->mpc_apicid] = ver;
4891 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
4892 /*
4893 * bios_cpu_apicid is required to have processors listed
4894 @@ -185,37 +149,42 @@
4895 Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
4896
4897 if (strncmp(str, "ISA", 3) == 0) {
4898 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
4899 - } else if (strncmp(str, "EISA", 4) == 0) {
4900 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
4901 + set_bit(m->mpc_busid, mp_bus_not_pci);
4902 } else if (strncmp(str, "PCI", 3) == 0) {
4903 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
4904 + clear_bit(m->mpc_busid, mp_bus_not_pci);
4905 mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
4906 mp_current_pci_id++;
4907 - } else if (strncmp(str, "MCA", 3) == 0) {
4908 - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
4909 } else {
4910 printk(KERN_ERR "Unknown bustype %s\n", str);
4911 }
4912 }
4913
4914 +static int bad_ioapic(unsigned long address)
4915 +{
4916 + if (nr_ioapics >= MAX_IO_APICS) {
4917 + printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
4918 + "(found %d)\n", MAX_IO_APICS, nr_ioapics);
4919 + panic("Recompile kernel with bigger MAX_IO_APICS!\n");
4920 + }
4921 + if (!address) {
4922 + printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
4923 + " found in table, skipping!\n");
4924 + return 1;
4925 + }
4926 + return 0;
4927 +}
4928 +
4929 static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
4930 {
4931 if (!(m->mpc_flags & MPC_APIC_USABLE))
4932 return;
4933
4934 - printk("I/O APIC #%d Version %d at 0x%X.\n",
4935 - m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
4936 - if (nr_ioapics >= MAX_IO_APICS) {
4937 - printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n",
4938 - MAX_IO_APICS, nr_ioapics);
4939 - panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
4940 - }
4941 - if (!m->mpc_apicaddr) {
4942 - printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
4943 - " found in MP table, skipping!\n");
4944 + printk("I/O APIC #%d at 0x%X.\n",
4945 + m->mpc_apicid, m->mpc_apicaddr);
4946 +
4947 + if (bad_ioapic(m->mpc_apicaddr))
4948 return;
4949 - }
4950 +
4951 mp_ioapics[nr_ioapics] = *m;
4952 nr_ioapics++;
4953 }
4954 @@ -239,19 +208,6 @@
4955 m->mpc_irqtype, m->mpc_irqflag & 3,
4956 (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
4957 m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
4958 - /*
4959 - * Well it seems all SMP boards in existence
4960 - * use ExtINT/LVT1 == LINT0 and
4961 - * NMI/LVT2 == LINT1 - the following check
4962 - * will show us if this assumptions is false.
4963 - * Until then we do not have to add baggage.
4964 - */
4965 - if ((m->mpc_irqtype == mp_ExtINT) &&
4966 - (m->mpc_destapiclint != 0))
4967 - BUG();
4968 - if ((m->mpc_irqtype == mp_NMI) &&
4969 - (m->mpc_destapiclint != 1))
4970 - BUG();
4971 }
4972
4973 /*
4974 @@ -265,7 +221,7 @@
4975 unsigned char *mpt=((unsigned char *)mpc)+count;
4976
4977 if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
4978 - printk("SMP mptable: bad signature [%c%c%c%c]!\n",
4979 + printk("MPTABLE: bad signature [%c%c%c%c]!\n",
4980 mpc->mpc_signature[0],
4981 mpc->mpc_signature[1],
4982 mpc->mpc_signature[2],
4983 @@ -273,31 +229,31 @@
4984 return 0;
4985 }
4986 if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
4987 - printk("SMP mptable: checksum error!\n");
4988 + printk("MPTABLE: checksum error!\n");
4989 return 0;
4990 }
4991 if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
4992 - printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
4993 + printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
4994 mpc->mpc_spec);
4995 return 0;
4996 }
4997 if (!mpc->mpc_lapic) {
4998 - printk(KERN_ERR "SMP mptable: null local APIC address!\n");
4999 + printk(KERN_ERR "MPTABLE: null local APIC address!\n");
5000 return 0;
5001 }
5002 memcpy(str,mpc->mpc_oem,8);
5003 - str[8]=0;
5004 - printk(KERN_INFO "OEM ID: %s ",str);
5005 + str[8] = 0;
5006 + printk(KERN_INFO "MPTABLE: OEM ID: %s ",str);
5007
5008 memcpy(str,mpc->mpc_productid,12);
5009 - str[12]=0;
5010 - printk("Product ID: %s ",str);
5011 + str[12] = 0;
5012 + printk("MPTABLE: Product ID: %s ",str);
5013
5014 - printk("APIC at: 0x%X\n",mpc->mpc_lapic);
5015 + printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic);
5016
5017 /* save the local APIC address, it might be non-default */
5018 if (!acpi_lapic)
5019 - mp_lapic_addr = mpc->mpc_lapic;
5020 + mp_lapic_addr = mpc->mpc_lapic;
5021
5022 /*
5023 * Now process the configuration blocks.
5024 @@ -309,7 +265,7 @@
5025 struct mpc_config_processor *m=
5026 (struct mpc_config_processor *)mpt;
5027 if (!acpi_lapic)
5028 - MP_processor_info(m);
5029 + MP_processor_info(m);
5030 mpt += sizeof(*m);
5031 count += sizeof(*m);
5032 break;
5033 @@ -328,8 +284,8 @@
5034 struct mpc_config_ioapic *m=
5035 (struct mpc_config_ioapic *)mpt;
5036 MP_ioapic_info(m);
5037 - mpt+=sizeof(*m);
5038 - count+=sizeof(*m);
5039 + mpt += sizeof(*m);
5040 + count += sizeof(*m);
5041 break;
5042 }
5043 case MP_INTSRC:
5044 @@ -338,8 +294,8 @@
5045 (struct mpc_config_intsrc *)mpt;
5046
5047 MP_intsrc_info(m);
5048 - mpt+=sizeof(*m);
5049 - count+=sizeof(*m);
5050 + mpt += sizeof(*m);
5051 + count += sizeof(*m);
5052 break;
5053 }
5054 case MP_LINTSRC:
5055 @@ -347,15 +303,15 @@
5056 struct mpc_config_lintsrc *m=
5057 (struct mpc_config_lintsrc *)mpt;
5058 MP_lintsrc_info(m);
5059 - mpt+=sizeof(*m);
5060 - count+=sizeof(*m);
5061 + mpt += sizeof(*m);
5062 + count += sizeof(*m);
5063 break;
5064 }
5065 }
5066 }
5067 clustered_apic_check();
5068 if (!num_processors)
5069 - printk(KERN_ERR "SMP mptable: no processors registered!\n");
5070 + printk(KERN_ERR "MPTABLE: no processors registered!\n");
5071 return num_processors;
5072 }
5073
5074 @@ -451,13 +407,10 @@
5075 * 2 CPUs, numbered 0 & 1.
5076 */
5077 processor.mpc_type = MP_PROCESSOR;
5078 - /* Either an integrated APIC or a discrete 82489DX. */
5079 - processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
5080 + processor.mpc_apicver = 0;
5081 processor.mpc_cpuflag = CPU_ENABLED;
5082 - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
5083 - (boot_cpu_data.x86_model << 4) |
5084 - boot_cpu_data.x86_mask;
5085 - processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
5086 + processor.mpc_cpufeature = 0;
5087 + processor.mpc_featureflag = 0;
5088 processor.mpc_reserved[0] = 0;
5089 processor.mpc_reserved[1] = 0;
5090 for (i = 0; i < 2; i++) {
5091 @@ -476,14 +429,6 @@
5092 case 5:
5093 memcpy(bus.mpc_bustype, "ISA ", 6);
5094 break;
5095 - case 2:
5096 - case 6:
5097 - case 3:
5098 - memcpy(bus.mpc_bustype, "EISA ", 6);
5099 - break;
5100 - case 4:
5101 - case 7:
5102 - memcpy(bus.mpc_bustype, "MCA ", 6);
5103 }
5104 MP_bus_info(&bus);
5105 if (mpc_default_type > 4) {
5106 @@ -494,7 +439,7 @@
5107
5108 ioapic.mpc_type = MP_IOAPIC;
5109 ioapic.mpc_apicid = 2;
5110 - ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
5111 + ioapic.mpc_apicver = 0;
5112 ioapic.mpc_flags = MPC_APIC_USABLE;
5113 ioapic.mpc_apicaddr = 0xFEC00000;
5114 MP_ioapic_info(&ioapic);
5115 @@ -537,13 +482,6 @@
5116 printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
5117
5118 printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
5119 - if (mpf->mpf_feature2 & (1<<7)) {
5120 - printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
5121 - pic_mode = 1;
5122 - } else {
5123 - printk(KERN_INFO " Virtual Wire compatibility mode.\n");
5124 - pic_mode = 0;
5125 - }
5126
5127 /*
5128 * Now see if we need to read further.
5129 @@ -620,7 +558,7 @@
5130 return 0;
5131 }
5132
5133 -void __init find_intel_smp (void)
5134 +void __init find_smp_config(void)
5135 {
5136 unsigned int address;
5137
5138 @@ -637,9 +575,7 @@
5139 smp_scan_config(0xF0000,0x10000))
5140 return;
5141 /*
5142 - * If it is an SMP machine we should know now, unless the
5143 - * configuration is in an EISA/MCA bus machine with an
5144 - * extended bios data area.
5145 + * If it is an SMP machine we should know now.
5146 *
5147 * there is a real-mode segmented pointer pointing to the
5148 * 4K EBDA area at 0x40E, calculate and scan it here.
5149 @@ -660,64 +596,38 @@
5150 printk(KERN_INFO "No mptable found.\n");
5151 }
5152
5153 -/*
5154 - * - Intel MP Configuration Table
5155 - */
5156 -void __init find_smp_config (void)
5157 -{
5158 -#ifdef CONFIG_X86_LOCAL_APIC
5159 - find_intel_smp();
5160 -#endif
5161 -}
5162 -
5163 -
5164 /* --------------------------------------------------------------------------
5165 ACPI-based MP Configuration
5166 -------------------------------------------------------------------------- */
5167
5168 #ifdef CONFIG_ACPI
5169
5170 -void __init mp_register_lapic_address (
5171 - u64 address)
5172 +void __init mp_register_lapic_address(u64 address)
5173 {
5174 #ifndef CONFIG_XEN
5175 mp_lapic_addr = (unsigned long) address;
5176 -
5177 set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
5178 -
5179 if (boot_cpu_id == -1U)
5180 boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
5181 -
5182 - Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
5183 #endif
5184 }
5185
5186 -
5187 -void __cpuinit mp_register_lapic (
5188 - u8 id,
5189 - u8 enabled)
5190 +void __cpuinit mp_register_lapic (u8 id, u8 enabled)
5191 {
5192 struct mpc_config_processor processor;
5193 int boot_cpu = 0;
5194
5195 - if (id >= MAX_APICS) {
5196 - printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
5197 - id, MAX_APICS);
5198 - return;
5199 - }
5200 -
5201 - if (id == boot_cpu_physical_apicid)
5202 + if (id == boot_cpu_id)
5203 boot_cpu = 1;
5204
5205 #ifndef CONFIG_XEN
5206 processor.mpc_type = MP_PROCESSOR;
5207 processor.mpc_apicid = id;
5208 - processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
5209 + processor.mpc_apicver = 0;
5210 processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
5211 processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
5212 - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
5213 - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
5214 - processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
5215 + processor.mpc_cpufeature = 0;
5216 + processor.mpc_featureflag = 0;
5217 processor.mpc_reserved[0] = 0;
5218 processor.mpc_reserved[1] = 0;
5219 #endif
5220 @@ -725,8 +635,6 @@
5221 MP_processor_info(&processor);
5222 }
5223
5224 -#ifdef CONFIG_X86_IO_APIC
5225 -
5226 #define MP_ISA_BUS 0
5227 #define MP_MAX_IOAPIC_PIN 127
5228
5229 @@ -737,11 +645,9 @@
5230 u32 pin_programmed[4];
5231 } mp_ioapic_routing[MAX_IO_APICS];
5232
5233 -
5234 -static int mp_find_ioapic (
5235 - int gsi)
5236 +static int mp_find_ioapic(int gsi)
5237 {
5238 - int i = 0;
5239 + int i = 0;
5240
5241 /* Find the IOAPIC that manages this GSI. */
5242 for (i = 0; i < nr_ioapics; i++) {
5243 @@ -751,28 +657,15 @@
5244 }
5245
5246 printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
5247 -
5248 return -1;
5249 }
5250 -
5251
5252 -void __init mp_register_ioapic (
5253 - u8 id,
5254 - u32 address,
5255 - u32 gsi_base)
5256 +void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
5257 {
5258 - int idx = 0;
5259 + int idx = 0;
5260
5261 - if (nr_ioapics >= MAX_IO_APICS) {
5262 - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
5263 - "(found %d)\n", MAX_IO_APICS, nr_ioapics);
5264 - panic("Recompile kernel with bigger MAX_IO_APICS!\n");
5265 - }
5266 - if (!address) {
5267 - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
5268 - " found in MADT table, skipping!\n");
5269 + if (bad_ioapic(address))
5270 return;
5271 - }
5272
5273 idx = nr_ioapics++;
5274
5275 @@ -784,7 +677,7 @@
5276 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
5277 #endif
5278 mp_ioapics[idx].mpc_apicid = id;
5279 - mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
5280 + mp_ioapics[idx].mpc_apicver = 0;
5281
5282 /*
5283 * Build basic IRQ lookup table to facilitate gsi->io_apic lookups
5284 @@ -795,21 +688,15 @@
5285 mp_ioapic_routing[idx].gsi_end = gsi_base +
5286 io_apic_get_redir_entries(idx);
5287
5288 - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
5289 + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, "
5290 "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
5291 - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
5292 + mp_ioapics[idx].mpc_apicaddr,
5293 mp_ioapic_routing[idx].gsi_start,
5294 mp_ioapic_routing[idx].gsi_end);
5295 -
5296 - return;
5297 }
5298
5299 -
5300 -void __init mp_override_legacy_irq (
5301 - u8 bus_irq,
5302 - u8 polarity,
5303 - u8 trigger,
5304 - u32 gsi)
5305 +void __init
5306 +mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
5307 {
5308 struct mpc_config_intsrc intsrc;
5309 int ioapic = -1;
5310 @@ -847,22 +734,18 @@
5311 mp_irqs[mp_irq_entries] = intsrc;
5312 if (++mp_irq_entries == MAX_IRQ_SOURCES)
5313 panic("Max # of irq sources exceeded!\n");
5314 -
5315 - return;
5316 }
5317
5318 -
5319 -void __init mp_config_acpi_legacy_irqs (void)
5320 +void __init mp_config_acpi_legacy_irqs(void)
5321 {
5322 struct mpc_config_intsrc intsrc;
5323 - int i = 0;
5324 - int ioapic = -1;
5325 + int i = 0;
5326 + int ioapic = -1;
5327
5328 /*
5329 * Fabricate the legacy ISA bus (bus #31).
5330 */
5331 - mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
5332 - Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
5333 + set_bit(MP_ISA_BUS, mp_bus_not_pci);
5334
5335 /*
5336 * Locate the IOAPIC that manages the ISA IRQs (0-15).
5337 @@ -915,24 +798,13 @@
5338 if (++mp_irq_entries == MAX_IRQ_SOURCES)
5339 panic("Max # of irq sources exceeded!\n");
5340 }
5341 -
5342 - return;
5343 }
5344
5345 -#define MAX_GSI_NUM 4096
5346 -
5347 int mp_register_gsi(u32 gsi, int triggering, int polarity)
5348 {
5349 - int ioapic = -1;
5350 - int ioapic_pin = 0;
5351 - int idx, bit = 0;
5352 - static int pci_irq = 16;
5353 - /*
5354 - * Mapping between Global System Interrupts, which
5355 - * represent all possible interrupts, to the IRQs
5356 - * assigned to actual devices.
5357 - */
5358 - static int gsi_to_irq[MAX_GSI_NUM];
5359 + int ioapic = -1;
5360 + int ioapic_pin = 0;
5361 + int idx, bit = 0;
5362
5363 if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
5364 return gsi;
5365 @@ -965,47 +837,14 @@
5366 if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
5367 Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
5368 mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
5369 - return gsi_to_irq[gsi];
5370 + return gsi;
5371 }
5372
5373 mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
5374
5375 - if (triggering == ACPI_LEVEL_SENSITIVE) {
5376 - /*
5377 - * For PCI devices assign IRQs in order, avoiding gaps
5378 - * due to unused I/O APIC pins.
5379 - */
5380 - int irq = gsi;
5381 - if (gsi < MAX_GSI_NUM) {
5382 - /*
5383 - * Retain the VIA chipset work-around (gsi > 15), but
5384 - * avoid a problem where the 8254 timer (IRQ0) is setup
5385 - * via an override (so it's not on pin 0 of the ioapic),
5386 - * and at the same time, the pin 0 interrupt is a PCI
5387 - * type. The gsi > 15 test could cause these two pins
5388 - * to be shared as IRQ0, and they are not shareable.
5389 - * So test for this condition, and if necessary, avoid
5390 - * the pin collision.
5391 - */
5392 - if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
5393 - gsi = pci_irq++;
5394 - /*
5395 - * Don't assign IRQ used by ACPI SCI
5396 - */
5397 - if (gsi == acpi_fadt.sci_int)
5398 - gsi = pci_irq++;
5399 - gsi_to_irq[irq] = gsi;
5400 - } else {
5401 - printk(KERN_ERR "GSI %u is too high\n", gsi);
5402 - return gsi;
5403 - }
5404 - }
5405 -
5406 io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
5407 triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
5408 polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
5409 return gsi;
5410 }
5411 -
5412 -#endif /*CONFIG_X86_IO_APIC*/
5413 #endif /*CONFIG_ACPI*/
5414 diff -Naur linux-2.6.25/arch/x86/kernel/pci-dma_32-xen.c linux-2.6.25-xen/arch/x86/kernel/pci-dma_32-xen.c
5415 --- linux-2.6.25/arch/x86/kernel/pci-dma_32-xen.c 2008-05-23 20:51:11.000000000 +0200
5416 +++ linux-2.6.25-xen/arch/x86/kernel/pci-dma_32-xen.c 2008-05-23 20:39:03.000000000 +0200
5417 @@ -116,8 +116,7 @@
5418 {
5419 int i, rc;
5420
5421 - if (direction == DMA_NONE)
5422 - BUG();
5423 + BUG_ON(!valid_dma_direction(direction));
5424 WARN_ON(nents == 0 || sg[0].length == 0);
5425
5426 if (swiotlb) {
5427 @@ -148,7 +147,7 @@
5428 {
5429 int i;
5430
5431 - BUG_ON(direction == DMA_NONE);
5432 + BUG_ON(!valid_dma_direction(direction));
5433 if (swiotlb)
5434 swiotlb_unmap_sg(hwdev, sg, nents, direction);
5435 else {
5436 @@ -165,8 +164,7 @@
5437 {
5438 dma_addr_t dma_addr;
5439
5440 - BUG_ON(direction == DMA_NONE);
5441 -
5442 + BUG_ON(!valid_dma_direction(direction));
5443 if (swiotlb) {
5444 dma_addr = swiotlb_map_page(
5445 dev, page, offset, size, direction);
5446 @@ -183,7 +181,7 @@
5447 dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
5448 enum dma_data_direction direction)
5449 {
5450 - BUG_ON(direction == DMA_NONE);
5451 + BUG_ON(!valid_dma_direction(direction));
5452 if (swiotlb)
5453 swiotlb_unmap_page(dev, dma_address, size, direction);
5454 else
5455 @@ -365,8 +363,7 @@
5456 {
5457 dma_addr_t dma;
5458
5459 - if (direction == DMA_NONE)
5460 - BUG();
5461 + BUG_ON(!valid_dma_direction(direction));
5462 WARN_ON(size == 0);
5463
5464 if (swiotlb) {
5465 @@ -387,8 +384,7 @@
5466 dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
5467 enum dma_data_direction direction)
5468 {
5469 - if (direction == DMA_NONE)
5470 - BUG();
5471 + BUG_ON(!valid_dma_direction(direction));
5472 if (swiotlb)
5473 swiotlb_unmap_single(dev, dma_addr, size, direction);
5474 else
5475 diff -Naur linux-2.6.25/arch/x86/kernel/pci-swiotlb_64-xen.c linux-2.6.25-xen/arch/x86/kernel/pci-swiotlb_64-xen.c
5476 --- linux-2.6.25/arch/x86/kernel/pci-swiotlb_64-xen.c 2008-05-23 20:51:21.000000000 +0200
5477 +++ linux-2.6.25-xen/arch/x86/kernel/pci-swiotlb_64-xen.c 2008-05-23 20:39:03.000000000 +0200
5478 @@ -3,7 +3,8 @@
5479 #include <linux/pci.h>
5480 #include <linux/cache.h>
5481 #include <linux/module.h>
5482 -#include <asm/dma-mapping.h>
5483 +#include <linux/dma-mapping.h>
5484 +
5485 #include <asm/proto.h>
5486 #include <asm/swiotlb.h>
5487 #include <asm/dma.h>
5488 diff -Naur linux-2.6.25/arch/x86/kernel/process_32-xen.c linux-2.6.25-xen/arch/x86/kernel/process_32-xen.c
5489 --- linux-2.6.25/arch/x86/kernel/process_32-xen.c 2008-05-23 20:51:11.000000000 +0200
5490 +++ linux-2.6.25-xen/arch/x86/kernel/process_32-xen.c 2008-05-23 20:39:03.000000000 +0200
5491 @@ -37,6 +37,7 @@
5492 #include <linux/kallsyms.h>
5493 #include <linux/ptrace.h>
5494 #include <linux/random.h>
5495 +#include <linux/personality.h>
5496
5497 #include <asm/uaccess.h>
5498 #include <asm/pgtable.h>
5499 @@ -186,7 +187,7 @@
5500 void cpu_idle_wait(void)
5501 {
5502 unsigned int cpu, this_cpu = get_cpu();
5503 - cpumask_t map;
5504 + cpumask_t map, tmp = current->cpus_allowed;
5505
5506 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
5507 put_cpu();
5508 @@ -208,6 +209,8 @@
5509 }
5510 cpus_and(map, map, cpu_online_map);
5511 } while (!cpus_empty(map));
5512 +
5513 + set_cpus_allowed(current, tmp);
5514 }
5515 EXPORT_SYMBOL_GPL(cpu_idle_wait);
5516
5517 @@ -240,9 +243,9 @@
5518 if (user_mode_vm(regs))
5519 printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
5520 printk(" EFLAGS: %08lx %s (%s %.*s)\n",
5521 - regs->eflags, print_tainted(), system_utsname.release,
5522 - (int)strcspn(system_utsname.version, " "),
5523 - system_utsname.version);
5524 + regs->eflags, print_tainted(), init_utsname()->release,
5525 + (int)strcspn(init_utsname()->version, " "),
5526 + init_utsname()->version);
5527 printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
5528 regs->eax,regs->ebx,regs->ecx,regs->edx);
5529 printk("ESI: %08lx EDI: %08lx EBP: %08lx",
5530 @@ -264,15 +267,6 @@
5531 * the "args".
5532 */
5533 extern void kernel_thread_helper(void);
5534 -__asm__(".section .text\n"
5535 - ".align 4\n"
5536 - "kernel_thread_helper:\n\t"
5537 - "movl %edx,%eax\n\t"
5538 - "pushl %edx\n\t"
5539 - "call *%ebx\n\t"
5540 - "pushl %eax\n\t"
5541 - "call do_exit\n"
5542 - ".previous");
5543
5544 /*
5545 * Create a kernel thread
5546 @@ -290,7 +284,7 @@
5547 regs.xes = __USER_DS;
5548 regs.orig_eax = -1;
5549 regs.eip = (unsigned long) kernel_thread_helper;
5550 - regs.xcs = GET_KERNEL_CS();
5551 + regs.xcs = __KERNEL_CS | get_kernel_rpl();
5552 regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
5553
5554 /* Ok, create the new process.. */
5555 @@ -369,13 +363,12 @@
5556
5557 tsk = current;
5558 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
5559 - p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
5560 + p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
5561 + IO_BITMAP_BYTES, GFP_KERNEL);
5562 if (!p->thread.io_bitmap_ptr) {
5563 p->thread.io_bitmap_max = 0;
5564 return -ENOMEM;
5565 }
5566 - memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr,
5567 - IO_BITMAP_BYTES);
5568 set_tsk_thread_flag(p, TIF_IO_BITMAP);
5569 }
5570
5571 @@ -850,7 +843,7 @@
5572
5573 unsigned long arch_align_stack(unsigned long sp)
5574 {
5575 - if (randomize_va_space)
5576 + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
5577 sp -= get_random_int() % 8192;
5578 return sp & ~0xf;
5579 }
5580 diff -Naur linux-2.6.25/arch/x86/kernel/process_64-xen.c linux-2.6.25-xen/arch/x86/kernel/process_64-xen.c
5581 --- linux-2.6.25/arch/x86/kernel/process_64-xen.c 2008-05-23 20:51:11.000000000 +0200
5582 +++ linux-2.6.25-xen/arch/x86/kernel/process_64-xen.c 2008-05-23 20:39:03.000000000 +0200
5583 @@ -89,25 +89,24 @@
5584 }
5585 EXPORT_SYMBOL(idle_notifier_unregister);
5586
5587 -enum idle_state { CPU_IDLE, CPU_NOT_IDLE };
5588 -static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
5589 -
5590 void enter_idle(void)
5591 {
5592 - __get_cpu_var(idle_state) = CPU_IDLE;
5593 + write_pda(isidle, 1);
5594 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
5595 }
5596
5597 static void __exit_idle(void)
5598 {
5599 - __get_cpu_var(idle_state) = CPU_NOT_IDLE;
5600 + if (test_and_clear_bit_pda(0, isidle) == 0)
5601 + return;
5602 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
5603 }
5604
5605 /* Called from interrupts to signify idle end */
5606 void exit_idle(void)
5607 {
5608 - if (current->pid | read_pda(irqcount))
5609 + /* idle loop has pid 0 */
5610 + if (current->pid)
5611 return;
5612 __exit_idle();
5613 }
5614 @@ -184,6 +183,9 @@
5615 play_dead();
5616 enter_idle();
5617 idle();
5618 + /* In many cases the interrupt that ended idle
5619 + has already called exit_idle. But some idle
5620 + loops can be woken up without interrupt. */
5621 __exit_idle();
5622 }
5623
5624 @@ -196,7 +198,7 @@
5625 void cpu_idle_wait(void)
5626 {
5627 unsigned int cpu, this_cpu = get_cpu();
5628 - cpumask_t map;
5629 + cpumask_t map, tmp = current->cpus_allowed;
5630
5631 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
5632 put_cpu();
5633 @@ -219,6 +221,8 @@
5634 }
5635 cpus_and(map, map, cpu_online_map);
5636 } while (!cpus_empty(map));
5637 +
5638 + set_cpus_allowed(current, tmp);
5639 }
5640 EXPORT_SYMBOL_GPL(cpu_idle_wait);
5641
5642 @@ -250,9 +254,9 @@
5643 print_modules();
5644 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
5645 current->pid, current->comm, print_tainted(),
5646 - system_utsname.release,
5647 - (int)strcspn(system_utsname.version, " "),
5648 - system_utsname.version);
5649 + init_utsname()->release,
5650 + (int)strcspn(init_utsname()->version, " "),
5651 + init_utsname()->version);
5652 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
5653 printk_address(regs->rip);
5654 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
5655 @@ -310,6 +314,7 @@
5656
5657 kfree(t->io_bitmap_ptr);
5658 t->io_bitmap_ptr = NULL;
5659 + clear_thread_flag(TIF_IO_BITMAP);
5660 /*
5661 * Careful, clear this in the TSS too:
5662 */
5663 @@ -340,6 +345,7 @@
5664 if (t->flags & _TIF_IA32)
5665 current_thread_info()->status |= TS_COMPAT;
5666 }
5667 + t->flags &= ~_TIF_DEBUG;
5668
5669 tsk->thread.debugreg0 = 0;
5670 tsk->thread.debugreg1 = 0;
5671 @@ -432,7 +438,7 @@
5672 asm("mov %%es,%0" : "=m" (p->thread.es));
5673 asm("mov %%ds,%0" : "=m" (p->thread.ds));
5674
5675 - if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
5676 + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
5677 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
5678 if (!p->thread.io_bitmap_ptr) {
5679 p->thread.io_bitmap_max = 0;
5680 @@ -440,6 +446,7 @@
5681 }
5682 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
5683 IO_BITMAP_BYTES);
5684 + set_tsk_thread_flag(p, TIF_IO_BITMAP);
5685 }
5686
5687 /*
5688 @@ -474,6 +481,30 @@
5689 }
5690
5691 /*
5692 + * This special macro can be used to load a debugging register
5693 + */
5694 +#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
5695 +
5696 +static inline void __switch_to_xtra(struct task_struct *prev_p,
5697 + struct task_struct *next_p)
5698 +{
5699 + struct thread_struct *prev, *next;
5700 +
5701 + prev = &prev_p->thread,
5702 + next = &next_p->thread;
5703 +
5704 + if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
5705 + loaddebug(next, 0);
5706 + loaddebug(next, 1);
5707 + loaddebug(next, 2);
5708 + loaddebug(next, 3);
5709 + /* no 4 and 5 */
5710 + loaddebug(next, 6);
5711 + loaddebug(next, 7);
5712 + }
5713 +}
5714 +
5715 +/*
5716 * switch_to(x,y) should switch tasks from x to y.
5717 *
5718 * This could still be optimized:
5719 @@ -495,6 +526,10 @@
5720 struct physdev_set_iobitmap iobmp_op;
5721 multicall_entry_t _mcl[8], *mcl = _mcl;
5722
5723 + /* we're going to use this soon, after a few expensive things */
5724 + if (next_p->fpu_counter>5)
5725 + prefetch(&next->i387.fxsave);
5726 +
5727 /*
5728 * This is basically '__unlazy_fpu', except that we queue a
5729 * multicall to indicate FPU task switch, rather than
5730 @@ -507,7 +542,8 @@
5731 mcl->op = __HYPERVISOR_fpu_taskswitch;
5732 mcl->args[0] = 1;
5733 mcl++;
5734 - }
5735 + } else
5736 + prev_p->fpu_counter = 0;
5737
5738 /*
5739 * Reload esp0, LDT and the page table pointer:
5740 @@ -587,21 +623,29 @@
5741 write_pda(oldrsp, next->userrsp);
5742 write_pda(pcurrent, next_p);
5743 write_pda(kernelstack,
5744 - task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
5745 + (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
5746 +#ifdef CONFIG_CC_STACKPROTECTOR
5747 + write_pda(stack_canary, next_p->stack_canary);
5748 +
5749 + /*
5750 + * Build time only check to make sure the stack_canary is at
5751 + * offset 40 in the pda; this is a gcc ABI requirement
5752 + */
5753 + BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
5754 +#endif
5755
5756 /*
5757 * Now maybe reload the debug registers
5758 */
5759 - if (unlikely(next->debugreg7)) {
5760 - set_debugreg(next->debugreg0, 0);
5761 - set_debugreg(next->debugreg1, 1);
5762 - set_debugreg(next->debugreg2, 2);
5763 - set_debugreg(next->debugreg3, 3);
5764 - /* no 4 and 5 */
5765 - set_debugreg(next->debugreg6, 6);
5766 - set_debugreg(next->debugreg7, 7);
5767 - }
5768 + if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
5769 + __switch_to_xtra(prev_p, next_p);
5770
5771 + /* If the task has used fpu the last 5 timeslices, just do a full
5772 + * restore of the math state immediately to avoid the trap; the
5773 + * chances of needing FPU soon are obviously high now
5774 + */
5775 + if (next_p->fpu_counter>5)
5776 + math_state_restore();
5777 return prev_p;
5778 }
5779
5780 @@ -821,7 +865,7 @@
5781
5782 unsigned long arch_align_stack(unsigned long sp)
5783 {
5784 - if (randomize_va_space)
5785 + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
5786 sp -= get_random_int() % 8192;
5787 return sp & ~0xf;
5788 }
5789 diff -Naur linux-2.6.25/arch/x86/kernel/setup_32-xen.c linux-2.6.25-xen/arch/x86/kernel/setup_32-xen.c
5790 --- linux-2.6.25/arch/x86/kernel/setup_32-xen.c 2008-05-23 20:51:11.000000000 +0200
5791 +++ linux-2.6.25-xen/arch/x86/kernel/setup_32-xen.c 2008-05-23 20:39:03.000000000 +0200
5792 @@ -56,6 +56,7 @@
5793 #include <asm/apic.h>
5794 #include <asm/e820.h>
5795 #include <asm/mpspec.h>
5796 +#include <asm/mmzone.h>
5797 #include <asm/setup.h>
5798 #include <asm/arch_hooks.h>
5799 #include <asm/sections.h>
5800 @@ -105,18 +106,6 @@
5801
5802 unsigned long mmu_cr4_features;
5803
5804 -#ifdef CONFIG_ACPI
5805 - int acpi_disabled = 0;
5806 -#else
5807 - int acpi_disabled = 1;
5808 -#endif
5809 -EXPORT_SYMBOL(acpi_disabled);
5810 -
5811 -#ifdef CONFIG_ACPI
5812 -int __initdata acpi_force = 0;
5813 -extern acpi_interrupt_flags acpi_sci_flags;
5814 -#endif
5815 -
5816 /* for MCA, but anyone else can use it if they want */
5817 unsigned int machine_id;
5818 #ifdef CONFIG_MCA
5819 @@ -170,7 +159,6 @@
5820 #endif
5821
5822 extern void early_cpu_init(void);
5823 -extern void generic_apic_probe(char *);
5824 extern int root_mountflags;
5825
5826 unsigned long saved_videomode;
5827 @@ -243,9 +231,6 @@
5828 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
5829 } };
5830
5831 -#define ADAPTER_ROM_RESOURCES \
5832 - (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
5833 -
5834 static struct resource video_rom_resource = {
5835 .name = "Video ROM",
5836 .start = 0xc0000,
5837 @@ -307,9 +292,6 @@
5838 .flags = IORESOURCE_BUSY | IORESOURCE_IO
5839 } };
5840
5841 -#define STANDARD_IO_RESOURCES \
5842 - (sizeof standard_io_resources / sizeof standard_io_resources[0])
5843 -
5844 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
5845
5846 static int __init romchecksum(unsigned char *rom, unsigned long length)
5847 @@ -372,7 +354,7 @@
5848 }
5849
5850 /* check for adapter roms on 2k boundaries */
5851 - for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
5852 + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
5853 rom = isa_bus_to_virt(start);
5854 if (!romsignature(rom))
5855 continue;
5856 @@ -764,246 +746,152 @@
5857 }
5858 #endif
5859
5860 -static void __init parse_cmdline_early (char ** cmdline_p)
5861 +static int __initdata user_defined_memmap = 0;
5862 +
5863 +/*
5864 + * "mem=nopentium" disables the 4MB page tables.
5865 + * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
5866 + * to <mem>, overriding the bios size.
5867 + * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
5868 + * <start> to <start>+<mem>, overriding the bios size.
5869 + *
5870 + * HPA tells me bootloaders need to parse mem=, so no new
5871 + * option should be mem= [also see Documentation/i386/boot.txt]
5872 + */
5873 +static int __init parse_mem(char *arg)
5874 {
5875 - char c = ' ', *to = command_line, *from = saved_command_line;
5876 - int len = 0, max_cmdline;
5877 - int userdef = 0;
5878 -
5879 - if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
5880 - max_cmdline = COMMAND_LINE_SIZE;
5881 - memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
5882 - /* Save unparsed command line copy for /proc/cmdline */
5883 - saved_command_line[max_cmdline-1] = '\0';
5884 -
5885 - for (;;) {
5886 - if (c != ' ')
5887 - goto next_char;
5888 - /*
5889 - * "mem=nopentium" disables the 4MB page tables.
5890 - * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
5891 - * to <mem>, overriding the bios size.
5892 - * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
5893 - * <start> to <start>+<mem>, overriding the bios size.
5894 - *
5895 - * HPA tells me bootloaders need to parse mem=, so no new
5896 - * option should be mem= [also see Documentation/i386/boot.txt]
5897 - */
5898 - if (!memcmp(from, "mem=", 4)) {
5899 - if (to != command_line)
5900 - to--;
5901 - if (!memcmp(from+4, "nopentium", 9)) {
5902 - from += 9+4;
5903 - clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
5904 - disable_pse = 1;
5905 - } else {
5906 - /* If the user specifies memory size, we
5907 - * limit the BIOS-provided memory map to
5908 - * that size. exactmap can be used to specify
5909 - * the exact map. mem=number can be used to
5910 - * trim the existing memory map.
5911 - */
5912 - unsigned long long mem_size;
5913 -
5914 - mem_size = memparse(from+4, &from);
5915 - limit_regions(mem_size);
5916 - userdef=1;
5917 - }
5918 - }
5919 + if (!arg)
5920 + return -EINVAL;
5921
5922 - else if (!memcmp(from, "memmap=", 7)) {
5923 - if (to != command_line)
5924 - to--;
5925 - if (!memcmp(from+7, "exactmap", 8)) {
5926 -#ifdef CONFIG_CRASH_DUMP
5927 - /* If we are doing a crash dump, we
5928 - * still need to know the real mem
5929 - * size before original memory map is
5930 - * reset.
5931 - */
5932 - find_max_pfn();
5933 - saved_max_pfn = max_pfn;
5934 -#endif
5935 - from += 8+7;
5936 - e820.nr_map = 0;
5937 - userdef = 1;
5938 - } else {
5939 - /* If the user specifies memory size, we
5940 - * limit the BIOS-provided memory map to
5941 - * that size. exactmap can be used to specify
5942 - * the exact map. mem=number can be used to
5943 - * trim the existing memory map.
5944 - */
5945 - unsigned long long start_at, mem_size;
5946 + if (strcmp(arg, "nopentium") == 0) {
5947 + clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
5948 + disable_pse = 1;
5949 + } else {
5950 + /* If the user specifies memory size, we
5951 + * limit the BIOS-provided memory map to
5952 + * that size. exactmap can be used to specify
5953 + * the exact map. mem=number can be used to
5954 + * trim the existing memory map.
5955 + */
5956 + unsigned long long mem_size;
5957
5958 - mem_size = memparse(from+7, &from);
5959 - if (*from == '@') {
5960 - start_at = memparse(from+1, &from);
5961 - add_memory_region(start_at, mem_size, E820_RAM);
5962 - } else if (*from == '#') {
5963 - start_at = memparse(from+1, &from);
5964 - add_memory_region(start_at, mem_size, E820_ACPI);
5965 - } else if (*from == '$') {
5966 - start_at = memparse(from+1, &from);
5967 - add_memory_region(start_at, mem_size, E820_RESERVED);
5968 - } else {
5969 - limit_regions(mem_size);
5970 - userdef=1;
5971 - }
5972 - }
5973 - }
5974 -
5975 - else if (!memcmp(from, "noexec=", 7))
5976 - noexec_setup(from + 7);
5977 + mem_size = memparse(arg, &arg);
5978 + limit_regions(mem_size);
5979 + user_defined_memmap = 1;
5980 + }
5981 + return 0;
5982 +}
5983 +early_param("mem", parse_mem);
5984
5985 +static int __init parse_memmap(char *arg)
5986 +{
5987 + if (!arg)
5988 + return -EINVAL;
5989
5990 -#ifdef CONFIG_X86_MPPARSE
5991 - /*
5992 - * If the BIOS enumerates physical processors before logical,
5993 - * maxcpus=N at enumeration-time can be used to disable HT.
5994 + if (strcmp(arg, "exactmap") == 0) {
5995 +#ifdef CONFIG_CRASH_DUMP
5996 + /* If we are doing a crash dump, we
5997 + * still need to know the real mem
5998 + * size before original memory map is
5999 + * reset.
6000 */
6001 - else if (!memcmp(from, "maxcpus=", 8)) {
6002 - extern unsigned int maxcpus;
6003 -
6004 - maxcpus = simple_strtoul(from + 8, NULL, 0);
6005 - }
6006 + find_max_pfn();
6007 + saved_max_pfn = max_pfn;
6008 #endif
6009 + e820.nr_map = 0;
6010 + user_defined_memmap = 1;
6011 + } else {
6012 + /* If the user specifies memory size, we
6013 + * limit the BIOS-provided memory map to
6014 + * that size. exactmap can be used to specify
6015 + * the exact map. mem=number can be used to
6016 + * trim the existing memory map.
6017 + */
6018 + unsigned long long start_at, mem_size;
6019
6020 -#ifdef CONFIG_ACPI
6021 - /* "acpi=off" disables both ACPI table parsing and interpreter */
6022 - else if (!memcmp(from, "acpi=off", 8)) {
6023 - disable_acpi();
6024 - }
6025 -
6026 - /* acpi=force to over-ride black-list */
6027 - else if (!memcmp(from, "acpi=force", 10)) {
6028 - acpi_force = 1;
6029 - acpi_ht = 1;
6030 - acpi_disabled = 0;
6031 - }
6032 -
6033 - /* acpi=strict disables out-of-spec workarounds */
6034 - else if (!memcmp(from, "acpi=strict", 11)) {
6035 - acpi_strict = 1;
6036 - }
6037 -
6038 - /* Limit ACPI just to boot-time to enable HT */
6039 - else if (!memcmp(from, "acpi=ht", 7)) {
6040 - if (!acpi_force)
6041 - disable_acpi();
6042 - acpi_ht = 1;
6043 - }
6044 -
6045 - /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
6046 - else if (!memcmp(from, "pci=noacpi", 10)) {
6047 - acpi_disable_pci();
6048 - }
6049 - /* "acpi=noirq" disables ACPI interrupt routing */
6050 - else if (!memcmp(from, "acpi=noirq", 10)) {
6051 - acpi_noirq_set();
6052 + mem_size = memparse(arg, &arg);
6053 + if (*arg == '@') {
6054 + start_at = memparse(arg+1, &arg);
6055 + add_memory_region(start_at, mem_size, E820_RAM);
6056 + } else if (*arg == '#') {
6057 + start_at = memparse(arg+1, &arg);
6058 + add_memory_region(start_at, mem_size, E820_ACPI);
6059 + } else if (*arg == '$') {
6060 + start_at = memparse(arg+1, &arg);
6061 + add_memory_region(start_at, mem_size, E820_RESERVED);
6062 + } else {
6063 + limit_regions(mem_size);
6064 + user_defined_memmap = 1;
6065 }
6066 + }
6067 + return 0;
6068 +}
6069 +early_param("memmap", parse_memmap);
6070
6071 - else if (!memcmp(from, "acpi_sci=edge", 13))
6072 - acpi_sci_flags.trigger = 1;
6073 -
6074 - else if (!memcmp(from, "acpi_sci=level", 14))
6075 - acpi_sci_flags.trigger = 3;
6076 +#ifdef CONFIG_PROC_VMCORE
6077 +/* elfcorehdr= specifies the location of elf core header
6078 + * stored by the crashed kernel.
6079 + */
6080 +static int __init parse_elfcorehdr(char *arg)
6081 +{
6082 + if (!arg)
6083 + return -EINVAL;
6084
6085 - else if (!memcmp(from, "acpi_sci=high", 13))
6086 - acpi_sci_flags.polarity = 1;
6087 + elfcorehdr_addr = memparse(arg, &arg);
6088 + return 0;
6089 +}
6090 +early_param("elfcorehdr", parse_elfcorehdr);
6091 +#endif /* CONFIG_PROC_VMCORE */
6092
6093 - else if (!memcmp(from, "acpi_sci=low", 12))
6094 - acpi_sci_flags.polarity = 3;
6095 +/*
6096 + * highmem=size forces highmem to be exactly 'size' bytes.
6097 + * This works even on boxes that have no highmem otherwise.
6098 + * This also works to reduce highmem size on bigger boxes.
6099 + */
6100 +static int __init parse_highmem(char *arg)
6101 +{
6102 + if (!arg)
6103 + return -EINVAL;
6104
6105 -#ifdef CONFIG_X86_IO_APIC
6106 - else if (!memcmp(from, "acpi_skip_timer_override", 24))
6107 - acpi_skip_timer_override = 1;
6108 + highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
6109 + return 0;
6110 +}
6111 +early_param("highmem", parse_highmem);
6112
6113 - if (!memcmp(from, "disable_timer_pin_1", 19))
6114 - disable_timer_pin_1 = 1;
6115 - if (!memcmp(from, "enable_timer_pin_1", 18))
6116 - disable_timer_pin_1 = -1;
6117 -
6118 - /* disable IO-APIC */
6119 - else if (!memcmp(from, "noapic", 6))
6120 - disable_ioapic_setup();
6121 -#endif /* CONFIG_X86_IO_APIC */
6122 -#endif /* CONFIG_ACPI */
6123 +/*
6124 + * vmalloc=size forces the vmalloc area to be exactly 'size'
6125 + * bytes. This can be used to increase (or decrease) the
6126 + * vmalloc area - the default is 128m.
6127 + */
6128 +static int __init parse_vmalloc(char *arg)
6129 +{
6130 + if (!arg)
6131 + return -EINVAL;
6132
6133 -#ifdef CONFIG_X86_LOCAL_APIC
6134 - /* enable local APIC */
6135 - else if (!memcmp(from, "lapic", 5))
6136 - lapic_enable();
6137 -
6138 - /* disable local APIC */
6139 - else if (!memcmp(from, "nolapic", 6))
6140 - lapic_disable();
6141 -#endif /* CONFIG_X86_LOCAL_APIC */
6142 + __VMALLOC_RESERVE = memparse(arg, &arg);
6143 + return 0;
6144 +}
6145 +early_param("vmalloc", parse_vmalloc);
6146
6147 -#ifdef CONFIG_KEXEC
6148 - /* crashkernel=size@addr specifies the location to reserve for
6149 - * a crash kernel. By reserving this memory we guarantee
6150 - * that linux never set's it up as a DMA target.
6151 - * Useful for holding code to do something appropriate
6152 - * after a kernel panic.
6153 - */
6154 - else if (!memcmp(from, "crashkernel=", 12)) {
6155 #ifndef CONFIG_XEN
6156 - unsigned long size, base;
6157 - size = memparse(from+12, &from);
6158 - if (*from == '@') {
6159 - base = memparse(from+1, &from);
6160 - /* FIXME: Do I want a sanity check
6161 - * to validate the memory range?
6162 - */
6163 - crashk_res.start = base;
6164 - crashk_res.end = base + size - 1;
6165 - }
6166 -#else
6167 - printk("Ignoring crashkernel command line, "
6168 - "parameter will be supplied by xen\n");
6169 -#endif
6170 - }
6171 -#endif
6172 -#ifdef CONFIG_PROC_VMCORE
6173 - /* elfcorehdr= specifies the location of elf core header
6174 - * stored by the crashed kernel.
6175 - */
6176 - else if (!memcmp(from, "elfcorehdr=", 11))
6177 - elfcorehdr_addr = memparse(from+11, &from);
6178 -#endif
6179 +/*
6180 + * reservetop=size reserves a hole at the top of the kernel address space which
6181 + * a hypervisor can load into later. Needed for dynamically loaded hypervisors,
6182 + * so relocating the fixmap can be done before paging initialization.
6183 + */
6184 +static int __init parse_reservetop(char *arg)
6185 +{
6186 + unsigned long address;
6187
6188 - /*
6189 - * highmem=size forces highmem to be exactly 'size' bytes.
6190 - * This works even on boxes that have no highmem otherwise.
6191 - * This also works to reduce highmem size on bigger boxes.
6192 - */
6193 - else if (!memcmp(from, "highmem=", 8))
6194 - highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
6195 -
6196 - /*
6197 - * vmalloc=size forces the vmalloc area to be exactly 'size'
6198 - * bytes. This can be used to increase (or decrease) the
6199 - * vmalloc area - the default is 128m.
6200 - */
6201 - else if (!memcmp(from, "vmalloc=", 8))
6202 - __VMALLOC_RESERVE = memparse(from+8, &from);
6203 + if (!arg)
6204 + return -EINVAL;
6205
6206 - next_char:
6207 - c = *(from++);
6208 - if (!c)
6209 - break;
6210 - if (COMMAND_LINE_SIZE <= ++len)
6211 - break;
6212 - *(to++) = c;
6213 - }
6214 - *to = '\0';
6215 - *cmdline_p = command_line;
6216 - if (userdef) {
6217 - printk(KERN_INFO "user-defined physical RAM map:\n");
6218 - print_memory_map("user");
6219 - }
6220 + address = memparse(arg, &arg);
6221 + reserve_top_address(address);
6222 + return 0;
6223 }
6224 +early_param("reservetop", parse_reservetop);
6225 +#endif
6226
6227 /*
6228 * Callback for efi_memory_walk.
6229 @@ -1024,7 +912,7 @@
6230 static int __init
6231 efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
6232 {
6233 - memory_present(0, start, end);
6234 + memory_present(0, PFN_UP(start), PFN_DOWN(end));
6235 return 0;
6236 }
6237
6238 @@ -1291,6 +1179,14 @@
6239 }
6240 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
6241 pages_to_mb(highend_pfn - highstart_pfn));
6242 + num_physpages = highend_pfn;
6243 + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
6244 +#else
6245 + num_physpages = max_low_pfn;
6246 + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
6247 +#endif
6248 +#ifdef CONFIG_FLATMEM
6249 + max_mapnr = num_physpages;
6250 #endif
6251 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
6252 pages_to_mb(max_low_pfn));
6253 @@ -1302,22 +1198,19 @@
6254
6255 void __init zone_sizes_init(void)
6256 {
6257 - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
6258 - unsigned int max_dma, low;
6259 -
6260 - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
6261 - low = max_low_pfn;
6262 -
6263 - if (low < max_dma)
6264 - zones_size[ZONE_DMA] = low;
6265 - else {
6266 - zones_size[ZONE_DMA] = max_dma;
6267 - zones_size[ZONE_NORMAL] = low - max_dma;
6268 + unsigned long max_zone_pfns[MAX_NR_ZONES];
6269 + memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
6270 + max_zone_pfns[ZONE_DMA] =
6271 + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
6272 + max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
6273 #ifdef CONFIG_HIGHMEM
6274 - zones_size[ZONE_HIGHMEM] = highend_pfn - low;
6275 + max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
6276 + add_active_range(0, 0, highend_pfn);
6277 +#else
6278 + add_active_range(0, 0, max_low_pfn);
6279 #endif
6280 - }
6281 - free_area_init(zones_size);
6282 +
6283 + free_area_init_nodes(max_zone_pfns);
6284 }
6285 #else
6286 extern unsigned long __init setup_memory(void);
6287 @@ -1374,6 +1267,7 @@
6288 */
6289 acpi_reserve_bootmem();
6290 #endif
6291 + numa_kva_reserve();
6292 #endif /* !CONFIG_XEN */
6293
6294 #ifdef CONFIG_BLK_DEV_INITRD
6295 @@ -1559,7 +1453,7 @@
6296 request_resource(&iomem_resource, &video_ram_resource);
6297
6298 /* request I/O space for devices used on all i[345]86 PCs */
6299 - for (i = 0; i < STANDARD_IO_RESOURCES; i++)
6300 + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
6301 request_resource(&ioport_resource, &standard_io_resources[i]);
6302 return 0;
6303 }
6304 @@ -1700,17 +1594,19 @@
6305 data_resource.start = virt_to_phys(_etext);
6306 data_resource.end = virt_to_phys(_edata)-1;
6307
6308 - parse_cmdline_early(cmdline_p);
6309 + if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
6310 + i = COMMAND_LINE_SIZE;
6311 + memcpy(saved_command_line, xen_start_info->cmd_line, i);
6312 + saved_command_line[i - 1] = '\0';
6313 + parse_early_param();
6314
6315 -#ifdef CONFIG_EARLY_PRINTK
6316 - {
6317 - char *s = strstr(*cmdline_p, "earlyprintk=");
6318 - if (s) {
6319 - setup_early_printk(strchr(s, '=') + 1);
6320 - printk("early console enabled\n");
6321 - }
6322 + if (user_defined_memmap) {
6323 + printk(KERN_INFO "user-defined physical RAM map:\n");
6324 + print_memory_map("user");
6325 }
6326 -#endif
6327 +
6328 + strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
6329 + *cmdline_p = command_line;
6330
6331 max_low_pfn = setup_memory();
6332
6333 @@ -1817,7 +1713,7 @@
6334 dmi_scan_machine();
6335
6336 #ifdef CONFIG_X86_GENERICARCH
6337 - generic_apic_probe(*cmdline_p);
6338 + generic_apic_probe();
6339 #endif
6340 if (efi_enabled)
6341 efi_map_memmap();
6342 @@ -1838,9 +1734,11 @@
6343 acpi_boot_table_init();
6344 #endif
6345
6346 +#ifdef CONFIG_PCI
6347 #ifdef CONFIG_X86_IO_APIC
6348 check_acpi_pci(); /* Checks more than just ACPI actually */
6349 #endif
6350 +#endif
6351
6352 #ifdef CONFIG_ACPI
6353 acpi_boot_init();
6354 diff -Naur linux-2.6.25/arch/x86/kernel/setup64-xen.c linux-2.6.25-xen/arch/x86/kernel/setup64-xen.c
6355 --- linux-2.6.25/arch/x86/kernel/setup64-xen.c 2008-05-23 20:51:11.000000000 +0200
6356 +++ linux-2.6.25-xen/arch/x86/kernel/setup64-xen.c 2008-05-23 20:39:03.000000000 +0200
6357 @@ -31,7 +31,7 @@
6358 #include <asm/hypervisor.h>
6359 #endif
6360
6361 -char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
6362 +char x86_boot_params[BOOT_PARAM_SIZE] __initdata;
6363
6364 cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
6365
6366 @@ -55,8 +55,10 @@
6367 on Enable(default)
6368 off Disable
6369 */
6370 -int __init nonx_setup(char *str)
6371 +static int __init nonx_setup(char *str)
6372 {
6373 + if (!str)
6374 + return -EINVAL;
6375 if (!strncmp(str, "on", 2)) {
6376 __supported_pte_mask |= _PAGE_NX;
6377 do_not_nx = 0;
6378 @@ -64,9 +66,9 @@
6379 do_not_nx = 1;
6380 __supported_pte_mask &= ~_PAGE_NX;
6381 }
6382 - return 1;
6383 + return 0;
6384 }
6385 -__setup("noexec=", nonx_setup); /* parsed early actually */
6386 +early_param("noexec", nonx_setup);
6387
6388 int force_personality32 = 0;
6389
6390 @@ -102,12 +104,9 @@
6391 #endif
6392
6393 /* Copy section for each CPU (we discard the original) */
6394 - size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
6395 -#ifdef CONFIG_MODULES
6396 - if (size < PERCPU_ENOUGH_ROOM)
6397 - size = PERCPU_ENOUGH_ROOM;
6398 -#endif
6399 + size = PERCPU_ENOUGH_ROOM;
6400
6401 + printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
6402 for_each_cpu_mask (i, cpu_possible_map) {
6403 char *ptr;
6404
6405 @@ -169,7 +168,10 @@
6406 /* Setup up data that may be needed in __get_free_pages early */
6407 asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
6408 #ifndef CONFIG_XEN
6409 + /* Memory clobbers used to order PDA accessed */
6410 + mb();
6411 wrmsrl(MSR_GS_BASE, pda);
6412 + mb();
6413 #else
6414 if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
6415 (unsigned long)pda))
6416 @@ -302,28 +304,17 @@
6417 * set up and load the per-CPU TSS
6418 */
6419 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
6420 + static const unsigned int order[N_EXCEPTION_STACKS] = {
6421 + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
6422 + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
6423 + };
6424 if (cpu) {
6425 - static const unsigned int order[N_EXCEPTION_STACKS] = {
6426 - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
6427 - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
6428 - };
6429 -
6430 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
6431 if (!estacks)
6432 panic("Cannot allocate exception stack %ld %d\n",
6433 v, cpu);
6434 }
6435 - switch (v + 1) {
6436 -#if DEBUG_STKSZ > EXCEPTION_STKSZ
6437 - case DEBUG_STACK:
6438 - cpu_pda(cpu)->debugstack = (unsigned long)estacks;
6439 - estacks += DEBUG_STKSZ;
6440 - break;
6441 -#endif
6442 - default:
6443 - estacks += EXCEPTION_STKSZ;
6444 - break;
6445 - }
6446 + estacks += PAGE_SIZE << order[v];
6447 orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks;
6448 }
6449
6450 diff -Naur linux-2.6.25/arch/x86/kernel/setup_64-xen.c linux-2.6.25-xen/arch/x86/kernel/setup_64-xen.c
6451 --- linux-2.6.25/arch/x86/kernel/setup_64-xen.c 2008-05-23 20:51:11.000000000 +0200
6452 +++ linux-2.6.25-xen/arch/x86/kernel/setup_64-xen.c 2008-05-23 20:39:03.000000000 +0200
6453 @@ -118,16 +118,6 @@
6454
6455 unsigned long mmu_cr4_features;
6456
6457 -int acpi_disabled;
6458 -EXPORT_SYMBOL(acpi_disabled);
6459 -#ifdef CONFIG_ACPI
6460 -extern int __initdata acpi_ht;
6461 -extern acpi_interrupt_flags acpi_sci_flags;
6462 -int __initdata acpi_force = 0;
6463 -#endif
6464 -
6465 -int acpi_numa __initdata;
6466 -
6467 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
6468 int bootloader_type;
6469
6470 @@ -151,10 +141,6 @@
6471
6472 struct edid_info edid_info;
6473 EXPORT_SYMBOL_GPL(edid_info);
6474 -struct e820map e820;
6475 -#ifdef CONFIG_XEN
6476 -struct e820map machine_e820;
6477 -#endif
6478
6479 extern int root_mountflags;
6480
6481 @@ -181,9 +167,6 @@
6482 .flags = IORESOURCE_BUSY | IORESOURCE_IO }
6483 };
6484
6485 -#define STANDARD_IO_RESOURCES \
6486 - (sizeof standard_io_resources / sizeof standard_io_resources[0])
6487 -
6488 #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
6489
6490 struct resource data_resource = {
6491 @@ -230,9 +213,6 @@
6492 .flags = IORESOURCE_ROM }
6493 };
6494
6495 -#define ADAPTER_ROM_RESOURCES \
6496 - (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
6497 -
6498 static struct resource video_rom_resource = {
6499 .name = "Video ROM",
6500 .start = 0xc0000,
6501 @@ -309,7 +289,8 @@
6502 }
6503
6504 /* check for adapter roms on 2k boundaries */
6505 - for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
6506 + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper;
6507 + start += 2048) {
6508 rom = isa_bus_to_virt(start);
6509 if (!romsignature(rom))
6510 continue;
6511 @@ -329,187 +310,22 @@
6512 }
6513 }
6514
6515 -/* Check for full argument with no trailing characters */
6516 -static int fullarg(char *p, char *arg)
6517 +#ifdef CONFIG_PROC_VMCORE
6518 +/* elfcorehdr= specifies the location of elf core header
6519 + * stored by the crashed kernel. This option will be passed
6520 + * by kexec loader to the capture kernel.
6521 + */
6522 +static int __init setup_elfcorehdr(char *arg)
6523 {
6524 - int l = strlen(arg);
6525 - return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l]));
6526 + char *end;
6527 + if (!arg)
6528 + return -EINVAL;
6529 + elfcorehdr_addr = memparse(arg, &end);
6530 + return end > arg ? 0 : -EINVAL;
6531 }
6532 -
6533 -static __init void parse_cmdline_early (char ** cmdline_p)
6534 -{
6535 - char c = ' ', *to = command_line, *from = COMMAND_LINE;
6536 - int len = 0;
6537 - int userdef = 0;
6538 -
6539 - for (;;) {
6540 - if (c != ' ')
6541 - goto next_char;
6542 -
6543 -#ifdef CONFIG_SMP
6544 - /*
6545 - * If the BIOS enumerates physical processors before logical,
6546 - * maxcpus=N at enumeration-time can be used to disable HT.
6547 - */
6548 - else if (!memcmp(from, "maxcpus=", 8)) {
6549 - extern unsigned int maxcpus;
6550 -
6551 - maxcpus = simple_strtoul(from + 8, NULL, 0);
6552 - }
6553 -#endif
6554 -#ifdef CONFIG_ACPI
6555 - /* "acpi=off" disables both ACPI table parsing and interpreter init */
6556 - if (fullarg(from,"acpi=off"))
6557 - disable_acpi();
6558 -
6559 - if (fullarg(from, "acpi=force")) {
6560 - /* add later when we do DMI horrors: */
6561 - acpi_force = 1;
6562 - acpi_disabled = 0;
6563 - }
6564 -
6565 - /* acpi=ht just means: do ACPI MADT parsing
6566 - at bootup, but don't enable the full ACPI interpreter */
6567 - if (fullarg(from, "acpi=ht")) {
6568 - if (!acpi_force)
6569 - disable_acpi();
6570 - acpi_ht = 1;
6571 - }
6572 - else if (fullarg(from, "pci=noacpi"))
6573 - acpi_disable_pci();
6574 - else if (fullarg(from, "acpi=noirq"))
6575 - acpi_noirq_set();
6576 -
6577 - else if (fullarg(from, "acpi_sci=edge"))
6578 - acpi_sci_flags.trigger = 1;
6579 - else if (fullarg(from, "acpi_sci=level"))
6580 - acpi_sci_flags.trigger = 3;
6581 - else if (fullarg(from, "acpi_sci=high"))
6582 - acpi_sci_flags.polarity = 1;
6583 - else if (fullarg(from, "acpi_sci=low"))
6584 - acpi_sci_flags.polarity = 3;
6585 -
6586 - /* acpi=strict disables out-of-spec workarounds */
6587 - else if (fullarg(from, "acpi=strict")) {
6588 - acpi_strict = 1;
6589 - }
6590 -#ifdef CONFIG_X86_IO_APIC
6591 - else if (fullarg(from, "acpi_skip_timer_override"))
6592 - acpi_skip_timer_override = 1;
6593 -#endif
6594 -#endif
6595 -
6596 -#ifndef CONFIG_XEN
6597 - if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) {
6598 - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
6599 - disable_apic = 1;
6600 - }
6601 -
6602 - if (fullarg(from, "noapic"))
6603 - skip_ioapic_setup = 1;
6604 -
6605 - if (fullarg(from,"apic")) {
6606 - skip_ioapic_setup = 0;
6607 - ioapic_force = 1;
6608 - }
6609 -#endif
6610 -
6611 - if (!memcmp(from, "mem=", 4))
6612 - parse_memopt(from+4, &from);
6613 -
6614 - if (!memcmp(from, "memmap=", 7)) {
6615 - /* exactmap option is for used defined memory */
6616 - if (!memcmp(from+7, "exactmap", 8)) {
6617 -#ifdef CONFIG_CRASH_DUMP
6618 - /* If we are doing a crash dump, we
6619 - * still need to know the real mem
6620 - * size before original memory map is
6621 - * reset.
6622 - */
6623 - saved_max_pfn = e820_end_of_ram();
6624 -#endif
6625 - from += 8+7;
6626 - end_pfn_map = 0;
6627 - e820.nr_map = 0;
6628 - userdef = 1;
6629 - }
6630 - else {
6631 - parse_memmapopt(from+7, &from);
6632 - userdef = 1;
6633 - }
6634 - }
6635 -
6636 -#ifdef CONFIG_NUMA
6637 - if (!memcmp(from, "numa=", 5))
6638 - numa_setup(from+5);
6639 +early_param("elfcorehdr", setup_elfcorehdr);
6640 #endif
6641
6642 - if (!memcmp(from,"iommu=",6)) {
6643 - iommu_setup(from+6);
6644 - }
6645 -
6646 - if (fullarg(from,"oops=panic"))
6647 - panic_on_oops = 1;
6648 -
6649 - if (!memcmp(from, "noexec=", 7))
6650 - nonx_setup(from + 7);
6651 -
6652 -#ifdef CONFIG_KEXEC
6653 - /* crashkernel=size@addr specifies the location to reserve for
6654 - * a crash kernel. By reserving this memory we guarantee
6655 - * that linux never set's it up as a DMA target.
6656 - * Useful for holding code to do something appropriate
6657 - * after a kernel panic.
6658 - */
6659 - else if (!memcmp(from, "crashkernel=", 12)) {
6660 -#ifndef CONFIG_XEN
6661 - unsigned long size, base;
6662 - size = memparse(from+12, &from);
6663 - if (*from == '@') {
6664 - base = memparse(from+1, &from);
6665 - /* FIXME: Do I want a sanity check
6666 - * to validate the memory range?
6667 - */
6668 - crashk_res.start = base;
6669 - crashk_res.end = base + size - 1;
6670 - }
6671 -#else
6672 - printk("Ignoring crashkernel command line, "
6673 - "parameter will be supplied by xen\n");
6674 -#endif
6675 - }
6676 -#endif
6677 -
6678 -#ifdef CONFIG_PROC_VMCORE
6679 - /* elfcorehdr= specifies the location of elf core header
6680 - * stored by the crashed kernel. This option will be passed
6681 - * by kexec loader to the capture kernel.
6682 - */
6683 - else if(!memcmp(from, "elfcorehdr=", 11))
6684 - elfcorehdr_addr = memparse(from+11, &from);
6685 -#endif
6686 -
6687 -#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_XEN)
6688 - else if (!memcmp(from, "additional_cpus=", 16))
6689 - setup_additional_cpus(from+16);
6690 -#endif
6691 -
6692 - next_char:
6693 - c = *(from++);
6694 - if (!c)
6695 - break;
6696 - if (COMMAND_LINE_SIZE <= ++len)
6697 - break;
6698 - *(to++) = c;
6699 - }
6700 - if (userdef) {
6701 - printk(KERN_INFO "user-defined physical RAM map:\n");
6702 - e820_print_map("user");
6703 - }
6704 - *to = '\0';
6705 - *cmdline_p = command_line;
6706 -}
6707 -
6708 #ifndef CONFIG_NUMA
6709 static void __init
6710 contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
6711 @@ -521,10 +337,11 @@
6712 if (bootmap == -1L)
6713 panic("Cannot find bootmem map of size %ld\n",bootmap_size);
6714 bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
6715 + e820_register_active_regions(0, start_pfn, end_pfn);
6716 #ifdef CONFIG_XEN
6717 - e820_bootmem_free(NODE_DATA(0), 0, xen_start_info->nr_pages<<PAGE_SHIFT);
6718 + free_bootmem_with_active_regions(0, xen_start_info->nr_pages);
6719 #else
6720 - e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
6721 + free_bootmem_with_active_regions(0, end_pfn);
6722 #endif
6723 reserve_bootmem(bootmap, bootmap_size);
6724 }
6725 @@ -587,6 +404,10 @@
6726 void __init setup_arch(char **cmdline_p)
6727 {
6728 #ifdef CONFIG_XEN
6729 + extern struct e820map machine_e820;
6730 +
6731 + printk(KERN_INFO "Command line: %s\n", saved_command_line);
6732 +
6733 /* Register a call for panic conditions. */
6734 atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
6735
6736 @@ -612,6 +433,8 @@
6737
6738 ARCH_SETUP
6739 #else
6740 + printk(KERN_INFO "Command line: %s\n", saved_command_line);
6741 +
6742 ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
6743 screen_info = SCREEN_INFO;
6744 edid_info = EDID_INFO;
6745 @@ -639,16 +462,22 @@
6746 data_resource.start = virt_to_phys(&_etext);
6747 data_resource.end = virt_to_phys(&_edata)-1;
6748
6749 - parse_cmdline_early(cmdline_p);
6750 -
6751 early_identify_cpu(&boot_cpu_data);
6752
6753 + strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
6754 + *cmdline_p = command_line;
6755 +
6756 + parse_early_param();
6757 +
6758 + finish_e820_parsing();
6759 +
6760 + e820_register_active_regions(0, 0, -1UL);
6761 /*
6762 * partially used pages are not usable - thus
6763 * we are rounding upwards:
6764 */
6765 end_pfn = e820_end_of_ram();
6766 - num_physpages = end_pfn; /* for pfn_valid */
6767 + num_physpages = end_pfn;
6768
6769 check_efer();
6770
6771 @@ -659,6 +488,14 @@
6772 if (is_initial_xendomain())
6773 dmi_scan_machine();
6774
6775 + /* How many end-of-memory variables you have, grandma! */
6776 + max_low_pfn = end_pfn;
6777 + max_pfn = end_pfn;
6778 + high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
6779 +
6780 + /* Remove active ranges so rediscovery with NUMA-awareness happens */
6781 + remove_all_active_ranges();
6782 +
6783 #ifdef CONFIG_ACPI_NUMA
6784 /*
6785 * Parse SRAT to discover nodes.
6786 @@ -848,16 +685,16 @@
6787 BUG();
6788 }
6789
6790 +#ifdef CONFIG_ACPI
6791 if (!is_initial_xendomain()) {
6792 acpi_disabled = 1;
6793 -#ifdef CONFIG_ACPI
6794 acpi_ht = 0;
6795 -#endif
6796 }
6797 #endif
6798 +#endif
6799
6800 -#ifndef CONFIG_XEN
6801 - check_ioapic();
6802 +#if defined(CONFIG_PCI) && !defined(CONFIG_XEN)
6803 + early_quirks();
6804 #endif
6805
6806 zap_low_mappings(0);
6807 @@ -917,6 +754,7 @@
6808 }
6809 #else
6810 e820_reserve_resources(e820.map, e820.nr_map);
6811 + e820_mark_nosave_regions();
6812 #endif
6813
6814 request_resource(&iomem_resource, &video_ram_resource);
6815 @@ -924,7 +762,7 @@
6816 {
6817 unsigned i;
6818 /* request I/O space for devices used on all i[345]86 PCs */
6819 - for (i = 0; i < STANDARD_IO_RESOURCES; i++)
6820 + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
6821 request_resource(&ioport_resource, &standard_io_resources[i]);
6822 }
6823
6824 @@ -1108,7 +946,7 @@
6825 #endif
6826 }
6827
6828 -static void __init init_amd(struct cpuinfo_x86 *c)
6829 +static void __cpuinit init_amd(struct cpuinfo_x86 *c)
6830 {
6831 unsigned level;
6832
6833 @@ -1164,6 +1002,12 @@
6834
6835 /* Fix cpuid4 emulation for more */
6836 num_cache_leaves = 3;
6837 +
6838 + /* When there is only one core no need to synchronize RDTSC */
6839 + if (num_possible_cpus() == 1)
6840 + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
6841 + else
6842 + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
6843 }
6844
6845 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
6846 @@ -1245,8 +1089,7 @@
6847 node = first_node(node_online_map);
6848 numa_set_node(cpu, node);
6849
6850 - if (acpi_numa > 0)
6851 - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
6852 + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
6853 #endif
6854 }
6855
6856 @@ -1280,6 +1123,8 @@
6857 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
6858 (c->x86 == 0x6 && c->x86_model >= 0x0e))
6859 set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
6860 + if (c->x86 == 6)
6861 + set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
6862 set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
6863 c->x86_max_cores = intel_num_cpu_cores(c);
6864
6865 @@ -1498,8 +1343,8 @@
6866
6867 /* Intel-defined (#2) */
6868 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
6869 - "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
6870 - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
6871 + "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
6872 + NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL,
6873 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
6874
6875 /* VIA/Cyrix/Centaur-defined */
6876 diff -Naur linux-2.6.25/arch/x86/kernel/smp_32-xen.c linux-2.6.25-xen/arch/x86/kernel/smp_32-xen.c
6877 --- linux-2.6.25/arch/x86/kernel/smp_32-xen.c 2008-05-23 20:51:11.000000000 +0200
6878 +++ linux-2.6.25-xen/arch/x86/kernel/smp_32-xen.c 2008-05-23 20:39:03.000000000 +0200
6879 @@ -279,8 +279,7 @@
6880 * 2) Leave the mm if we are in the lazy tlb mode.
6881 */
6882
6883 -irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
6884 - struct pt_regs *regs)
6885 +irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id)
6886 {
6887 unsigned long cpu;
6888
6889 @@ -567,16 +566,14 @@
6890 * all the work is done automatically when
6891 * we return from the interrupt.
6892 */
6893 -irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
6894 - struct pt_regs *regs)
6895 +irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
6896 {
6897
6898 return IRQ_HANDLED;
6899 }
6900
6901 #include <linux/kallsyms.h>
6902 -irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
6903 - struct pt_regs *regs)
6904 +irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
6905 {
6906 void (*func) (void *info) = call_data->func;
6907 void *info = call_data->info;
6908 @@ -603,3 +600,69 @@
6909 return IRQ_HANDLED;
6910 }
6911
6912 +/*
6913 + * this function sends a 'generic call function' IPI to one other CPU
6914 + * in the system.
6915 + *
6916 + * cpu is a standard Linux logical CPU number.
6917 + */
6918 +static void
6919 +__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
6920 + int nonatomic, int wait)
6921 +{
6922 + struct call_data_struct data;
6923 + int cpus = 1;
6924 +
6925 + data.func = func;
6926 + data.info = info;
6927 + atomic_set(&data.started, 0);
6928 + data.wait = wait;
6929 + if (wait)
6930 + atomic_set(&data.finished, 0);
6931 +
6932 + call_data = &data;
6933 + wmb();
6934 + /* Send a message to all other CPUs and wait for them to respond */
6935 + send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
6936 +
6937 + /* Wait for response */
6938 + while (atomic_read(&data.started) != cpus)
6939 + cpu_relax();
6940 +
6941 + if (!wait)
6942 + return;
6943 +
6944 + while (atomic_read(&data.finished) != cpus)
6945 + cpu_relax();
6946 +}
6947 +
6948 +/*
6949 + * smp_call_function_single - Run a function on another CPU
6950 + * @func: The function to run. This must be fast and non-blocking.
6951 + * @info: An arbitrary pointer to pass to the function.
6952 + * @nonatomic: Currently unused.
6953 + * @wait: If true, wait until function has completed on other CPUs.
6954 + *
6955 + * Retrurns 0 on success, else a negative status code.
6956 + *
6957 + * Does not return until the remote CPU is nearly ready to execute <func>
6958 + * or is or has executed.
6959 + */
6960 +
6961 +int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
6962 + int nonatomic, int wait)
6963 +{
6964 + /* prevent preemption and reschedule on another processor */
6965 + int me = get_cpu();
6966 + if (cpu == me) {
6967 + WARN_ON(1);
6968 + put_cpu();
6969 + return -EBUSY;
6970 + }
6971 + spin_lock_bh(&call_lock);
6972 + __smp_call_function_single(cpu, func, info, nonatomic, wait);
6973 + spin_unlock_bh(&call_lock);
6974 + put_cpu();
6975 + return 0;
6976 +}
6977 +EXPORT_SYMBOL(smp_call_function_single);
6978 diff -Naur linux-2.6.25/arch/x86/kernel/smp_64-xen.c linux-2.6.25-xen/arch/x86/kernel/smp_64-xen.c
6979 --- linux-2.6.25/arch/x86/kernel/smp_64-xen.c 2008-05-23 20:51:11.000000000 +0200
6980 +++ linux-2.6.25-xen/arch/x86/kernel/smp_64-xen.c 2008-05-23 20:39:03.000000000 +0200
6981 @@ -381,9 +381,8 @@
6982 /* prevent preemption and reschedule on another processor */
6983 int me = get_cpu();
6984 if (cpu == me) {
6985 - WARN_ON(1);
6986 put_cpu();
6987 - return -EBUSY;
6988 + return 0;
6989 }
6990 spin_lock_bh(&call_lock);
6991 __smp_call_function_single(cpu, func, info, nonatomic, wait);
6992 @@ -501,7 +500,7 @@
6993 #ifndef CONFIG_XEN
6994 asmlinkage void smp_reschedule_interrupt(void)
6995 #else
6996 -asmlinkage irqreturn_t smp_reschedule_interrupt(void)
6997 +asmlinkage irqreturn_t smp_reschedule_interrupt(int irq, void *ctx)
6998 #endif
6999 {
7000 #ifndef CONFIG_XEN
7001 @@ -514,7 +513,7 @@
7002 #ifndef CONFIG_XEN
7003 asmlinkage void smp_call_function_interrupt(void)
7004 #else
7005 -asmlinkage irqreturn_t smp_call_function_interrupt(void)
7006 +asmlinkage irqreturn_t smp_call_function_interrupt(int irq, void *ctx)
7007 #endif
7008 {
7009 void (*func) (void *info) = call_data->func;
7010 @@ -545,31 +544,3 @@
7011 return IRQ_HANDLED;
7012 #endif
7013 }
7014 -
7015 -int safe_smp_processor_id(void)
7016 -{
7017 -#ifdef CONFIG_XEN
7018 - return smp_processor_id();
7019 -#else
7020 - unsigned apicid, i;
7021 -
7022 - if (disable_apic)
7023 - return 0;
7024 -
7025 - apicid = hard_smp_processor_id();
7026 - if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid)
7027 - return apicid;
7028 -
7029 - for (i = 0; i < NR_CPUS; ++i) {
7030 - if (x86_cpu_to_apicid[i] == apicid)
7031 - return i;
7032 - }
7033 -
7034 - /* No entries in x86_cpu_to_apicid? Either no MPS|ACPI,
7035 - * or called too early. Either way, we must be CPU 0. */
7036 - if (x86_cpu_to_apicid[0] == BAD_APICID)
7037 - return 0;
7038 -
7039 - return 0; /* Should not happen */
7040 -#endif
7041 -}
7042 diff -Naur linux-2.6.25/arch/x86/kernel/time_32-xen.c linux-2.6.25-xen/arch/x86/kernel/time_32-xen.c
7043 --- linux-2.6.25/arch/x86/kernel/time_32-xen.c 2008-05-23 20:51:22.000000000 +0200
7044 +++ linux-2.6.25-xen/arch/x86/kernel/time_32-xen.c 2008-05-23 20:39:03.000000000 +0200
7045 @@ -89,7 +89,6 @@
7046 unsigned long vxtime_hz = PIT_TICK_RATE;
7047 struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
7048 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
7049 -unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
7050 struct timespec __xtime __section_xtime;
7051 struct timezone __sys_tz __section_sys_tz;
7052 #endif
7053 @@ -97,8 +96,6 @@
7054 unsigned int cpu_khz; /* Detected as we calibrate the TSC */
7055 EXPORT_SYMBOL(cpu_khz);
7056
7057 -extern unsigned long wall_jiffies;
7058 -
7059 DEFINE_SPINLOCK(rtc_lock);
7060 EXPORT_SYMBOL(rtc_lock);
7061
7062 @@ -265,11 +262,10 @@
7063 time_t wtm_sec, xtime_sec;
7064 u64 tmp, wc_nsec;
7065
7066 - /* Adjust wall-clock time base based on wall_jiffies ticks. */
7067 + /* Adjust wall-clock time base. */
7068 wc_nsec = processed_system_time;
7069 wc_nsec += sec * (u64)NSEC_PER_SEC;
7070 wc_nsec += nsec;
7071 - wc_nsec -= (jiffies - wall_jiffies) * (u64)NS_PER_TICK;
7072
7073 /* Split wallclock base into seconds and nanoseconds. */
7074 tmp = wc_nsec;
7075 @@ -387,16 +383,10 @@
7076 shadow = &per_cpu(shadow_time, cpu);
7077
7078 do {
7079 - unsigned long lost;
7080 -
7081 local_time_version = shadow->version;
7082 seq = read_seqbegin(&xtime_lock);
7083
7084 usec = get_usec_offset(shadow);
7085 - lost = jiffies - wall_jiffies;
7086 -
7087 - if (unlikely(lost))
7088 - usec += lost * (USEC_PER_SEC / HZ);
7089
7090 sec = xtime.tv_sec;
7091 usec += (xtime.tv_nsec / NSEC_PER_USEC);
7092 @@ -519,7 +509,7 @@
7093 write_seqlock_irq(&xtime_lock);
7094
7095 sec = xtime.tv_sec;
7096 - nsec = xtime.tv_nsec + ((jiffies - wall_jiffies) * (u64)NS_PER_TICK);
7097 + nsec = xtime.tv_nsec;
7098 __normalize_time(&sec, &nsec);
7099
7100 op.cmd = XENPF_settime;
7101 @@ -593,42 +583,49 @@
7102 }
7103 #endif
7104
7105 -#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
7106 unsigned long profile_pc(struct pt_regs *regs)
7107 {
7108 unsigned long pc = instruction_pointer(regs);
7109
7110 -#ifdef __x86_64__
7111 - /* Assume the lock function has either no stack frame or only a single word.
7112 - This checks if the address on the stack looks like a kernel text address.
7113 - There is a small window for false hits, but in that case the tick
7114 - is just accounted to the spinlock function.
7115 - Better would be to write these functions in assembler again
7116 - and check exactly. */
7117 +#if defined(CONFIG_SMP) || defined(__x86_64__)
7118 if (!user_mode_vm(regs) && in_lock_functions(pc)) {
7119 - char *v = *(char **)regs->rsp;
7120 - if ((v >= _stext && v <= _etext) ||
7121 - (v >= _sinittext && v <= _einittext) ||
7122 - (v >= (char *)MODULES_VADDR && v <= (char *)MODULES_END))
7123 - return (unsigned long)v;
7124 - return ((unsigned long *)regs->rsp)[1];
7125 +# ifdef CONFIG_FRAME_POINTER
7126 +# ifdef __i386__
7127 + return ((unsigned long *)regs->ebp)[1];
7128 +# else
7129 + return ((unsigned long *)regs->rbp)[1];
7130 +# endif
7131 +# else
7132 +# ifdef __i386__
7133 + unsigned long *sp;
7134 + if ((regs->xcs & 2) == 0)
7135 + sp = (unsigned long *)&regs->esp;
7136 + else
7137 + sp = (unsigned long *)regs->esp;
7138 +# else
7139 + unsigned long *sp = (unsigned long *)regs->rsp;
7140 +# endif
7141 + /* Return address is either directly at stack pointer
7142 + or above a saved eflags. Eflags has bits 22-31 zero,
7143 + kernel addresses don't. */
7144 + if (sp[0] >> 22)
7145 + return sp[0];
7146 + if (sp[1] >> 22)
7147 + return sp[1];
7148 +# endif
7149 }
7150 -#else
7151 - if (!user_mode_vm(regs) && in_lock_functions(pc))
7152 - return *(unsigned long *)(regs->ebp + 4);
7153 #endif
7154
7155 return pc;
7156 }
7157 EXPORT_SYMBOL(profile_pc);
7158 -#endif
7159
7160 /*
7161 * This is the same as the above, except we _also_ save the current
7162 * Time Stamp Counter value at the time of the timer interrupt, so that
7163 * we later on can estimate the time of day more exactly.
7164 */
7165 -irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
7166 +irqreturn_t timer_interrupt(int irq, void *dev_id)
7167 {
7168 s64 delta, delta_cpu, stolen, blocked;
7169 u64 sched_time;
7170 @@ -686,10 +683,14 @@
7171 }
7172
7173 /* System-wide jiffy work. */
7174 - while (delta >= NS_PER_TICK) {
7175 - delta -= NS_PER_TICK;
7176 - processed_system_time += NS_PER_TICK;
7177 - do_timer(regs);
7178 + if (delta >= NS_PER_TICK) {
7179 + do_div(delta, NS_PER_TICK);
7180 + processed_system_time += delta * NS_PER_TICK;
7181 + while (delta > HZ) {
7182 + do_timer(HZ);
7183 + delta -= HZ;
7184 + }
7185 + do_timer(delta);
7186 }
7187
7188 if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
7189 @@ -734,7 +735,7 @@
7190 if (delta_cpu > 0) {
7191 do_div(delta_cpu, NS_PER_TICK);
7192 per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
7193 - if (user_mode_vm(regs))
7194 + if (user_mode_vm(get_irq_regs()))
7195 account_user_time(current, (cputime_t)delta_cpu);
7196 else
7197 account_system_time(current, HARDIRQ_OFFSET,
7198 @@ -748,10 +749,10 @@
7199 /* Local timer processing (see update_process_times()). */
7200 run_local_timers();
7201 if (rcu_pending(cpu))
7202 - rcu_check_callbacks(cpu, user_mode_vm(regs));
7203 + rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs()));
7204 scheduler_tick();
7205 run_posix_cpu_timers(current);
7206 - profile_tick(CPU_PROFILING, regs);
7207 + profile_tick(CPU_PROFILING);
7208
7209 return IRQ_HANDLED;
7210 }
7211 @@ -959,10 +960,11 @@
7212 /* Duplicate of time_init() below, with hpet_enable part added */
7213 static void __init hpet_time_init(void)
7214 {
7215 - xtime.tv_sec = get_cmos_time();
7216 - xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
7217 - set_normalized_timespec(&wall_to_monotonic,
7218 - -xtime.tv_sec, -xtime.tv_nsec);
7219 + struct timespec ts;
7220 + ts.tv_sec = get_cmos_time();
7221 + ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
7222 +
7223 + do_settimeofday(&ts);
7224
7225 if ((hpet_enable() >= 0) && hpet_use_timer) {
7226 printk("Using HPET for base-timer\n");
7227 diff -Naur linux-2.6.25/arch/x86/kernel/traps_32-xen.c linux-2.6.25-xen/arch/x86/kernel/traps_32-xen.c
7228 --- linux-2.6.25/arch/x86/kernel/traps_32-xen.c 2008-05-23 20:51:11.000000000 +0200
7229 +++ linux-2.6.25-xen/arch/x86/kernel/traps_32-xen.c 2008-05-23 20:39:03.000000000 +0200
7230 @@ -28,6 +28,7 @@
7231 #include <linux/kprobes.h>
7232 #include <linux/kexec.h>
7233 #include <linux/unwind.h>
7234 +#include <linux/uaccess.h>
7235
7236 #ifdef CONFIG_EISA
7237 #include <linux/ioport.h>
7238 @@ -40,7 +41,6 @@
7239
7240 #include <asm/processor.h>
7241 #include <asm/system.h>
7242 -#include <asm/uaccess.h>
7243 #include <asm/io.h>
7244 #include <asm/atomic.h>
7245 #include <asm/debugreg.h>
7246 @@ -51,11 +51,14 @@
7247 #include <asm/smp.h>
7248 #include <asm/arch_hooks.h>
7249 #include <asm/kdebug.h>
7250 +#include <asm/stacktrace.h>
7251
7252 #include <linux/module.h>
7253
7254 #include "mach_traps.h"
7255
7256 +int panic_on_unrecovered_nmi;
7257 +
7258 asmlinkage int system_call(void);
7259
7260 struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
7261 @@ -124,62 +127,63 @@
7262 p < (void *)tinfo + THREAD_SIZE - 3;
7263 }
7264
7265 -/*
7266 - * Print one address/symbol entries per line.
7267 - */
7268 -static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl)
7269 -{
7270 - printk(" [<%08lx>] ", addr);
7271 -
7272 - print_symbol("%s\n", addr);
7273 -}
7274 -
7275 static inline unsigned long print_context_stack(struct thread_info *tinfo,
7276 unsigned long *stack, unsigned long ebp,
7277 - char *log_lvl)
7278 + struct stacktrace_ops *ops, void *data)
7279 {
7280 unsigned long addr;
7281
7282 #ifdef CONFIG_FRAME_POINTER
7283 while (valid_stack_ptr(tinfo, (void *)ebp)) {
7284 + unsigned long new_ebp;
7285 addr = *(unsigned long *)(ebp + 4);
7286 - print_addr_and_symbol(addr, log_lvl);
7287 + ops->address(data, addr);
7288 /*
7289 * break out of recursive entries (such as
7290 - * end_of_stack_stop_unwind_function):
7291 + * end_of_stack_stop_unwind_function). Also,
7292 + * we can never allow a frame pointer to
7293 + * move downwards!
7294 */
7295 - if (ebp == *(unsigned long *)ebp)
7296 + new_ebp = *(unsigned long *)ebp;
7297 + if (new_ebp <= ebp)
7298 break;
7299 - ebp = *(unsigned long *)ebp;
7300 + ebp = new_ebp;
7301 }
7302 #else
7303 while (valid_stack_ptr(tinfo, stack)) {
7304 addr = *stack++;
7305 if (__kernel_text_address(addr))
7306 - print_addr_and_symbol(addr, log_lvl);
7307 + ops->address(data, addr);
7308 }
7309 #endif
7310 return ebp;
7311 }
7312
7313 +struct ops_and_data {
7314 + struct stacktrace_ops *ops;
7315 + void *data;
7316 +};
7317 +
7318 static asmlinkage int
7319 -show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
7320 +dump_trace_unwind(struct unwind_frame_info *info, void *data)
7321 {
7322 + struct ops_and_data *oad = (struct ops_and_data *)data;
7323 int n = 0;
7324
7325 while (unwind(info) == 0 && UNW_PC(info)) {
7326 n++;
7327 - print_addr_and_symbol(UNW_PC(info), log_lvl);
7328 + oad->ops->address(oad->data, UNW_PC(info));
7329 if (arch_unw_user_mode(info))
7330 break;
7331 }
7332 return n;
7333 }
7334
7335 -static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
7336 - unsigned long *stack, char *log_lvl)
7337 +void dump_trace(struct task_struct *task, struct pt_regs *regs,
7338 + unsigned long *stack,
7339 + struct stacktrace_ops *ops, void *data)
7340 {
7341 - unsigned long ebp;
7342 + unsigned long ebp = 0;
7343
7344 if (!task)
7345 task = current;
7346 @@ -187,54 +191,116 @@
7347 if (call_trace >= 0) {
7348 int unw_ret = 0;
7349 struct unwind_frame_info info;
7350 + struct ops_and_data oad = { .ops = ops, .data = data };
7351
7352 if (regs) {
7353 if (unwind_init_frame_info(&info, task, regs) == 0)
7354 - unw_ret = show_trace_unwind(&info, log_lvl);
7355 + unw_ret = dump_trace_unwind(&info, &oad);
7356 } else if (task == current)
7357 - unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
7358 + unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
7359 else {
7360 if (unwind_init_blocked(&info, task) == 0)
7361 - unw_ret = show_trace_unwind(&info, log_lvl);
7362 + unw_ret = dump_trace_unwind(&info, &oad);
7363 }
7364 if (unw_ret > 0) {
7365 if (call_trace == 1 && !arch_unw_user_mode(&info)) {
7366 - print_symbol("DWARF2 unwinder stuck at %s\n",
7367 + ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
7368 UNW_PC(&info));
7369 if (UNW_SP(&info) >= PAGE_OFFSET) {
7370 - printk("Leftover inexact backtrace:\n");
7371 + ops->warning(data, "Leftover inexact backtrace:\n");
7372 stack = (void *)UNW_SP(&info);
7373 + if (!stack)
7374 + return;
7375 + ebp = UNW_FP(&info);
7376 } else
7377 - printk("Full inexact backtrace again:\n");
7378 + ops->warning(data, "Full inexact backtrace again:\n");
7379 } else if (call_trace >= 1)
7380 return;
7381 else
7382 - printk("Full inexact backtrace again:\n");
7383 + ops->warning(data, "Full inexact backtrace again:\n");
7384 } else
7385 - printk("Inexact backtrace:\n");
7386 + ops->warning(data, "Inexact backtrace:\n");
7387 }
7388 -
7389 - if (task == current) {
7390 - /* Grab ebp right from our regs */
7391 - asm ("movl %%ebp, %0" : "=r" (ebp) : );
7392 - } else {
7393 - /* ebp is the last reg pushed by switch_to */
7394 - ebp = *(unsigned long *) task->thread.esp;
7395 + if (!stack) {
7396 + unsigned long dummy;
7397 + stack = &dummy;
7398 + if (task && task != current)
7399 + stack = (unsigned long *)task->thread.esp;
7400 + }
7401 +
7402 +#ifdef CONFIG_FRAME_POINTER
7403 + if (!ebp) {
7404 + if (task == current) {
7405 + /* Grab ebp right from our regs */
7406 + asm ("movl %%ebp, %0" : "=r" (ebp) : );
7407 + } else {
7408 + /* ebp is the last reg pushed by switch_to */
7409 + ebp = *(unsigned long *) task->thread.esp;
7410 + }
7411 }
7412 +#endif
7413
7414 while (1) {
7415 struct thread_info *context;
7416 context = (struct thread_info *)
7417 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
7418 - ebp = print_context_stack(context, stack, ebp, log_lvl);
7419 + ebp = print_context_stack(context, stack, ebp, ops, data);
7420 + /* Should be after the line below, but somewhere
7421 + in early boot context comes out corrupted and we
7422 + can't reference it -AK */
7423 + if (ops->stack(data, "IRQ") < 0)
7424 + break;
7425 stack = (unsigned long*)context->previous_esp;
7426 if (!stack)
7427 break;
7428 - printk("%s =======================\n", log_lvl);
7429 }
7430 }
7431 +EXPORT_SYMBOL(dump_trace);
7432 +
7433 +static void
7434 +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
7435 +{
7436 + printk(data);
7437 + print_symbol(msg, symbol);
7438 + printk("\n");
7439 +}
7440 +
7441 +static void print_trace_warning(void *data, char *msg)
7442 +{
7443 + printk("%s%s\n", (char *)data, msg);
7444 +}
7445
7446 -void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
7447 +static int print_trace_stack(void *data, char *name)
7448 +{
7449 + return 0;
7450 +}
7451 +
7452 +/*
7453 + * Print one address/symbol entries per line.
7454 + */
7455 +static void print_trace_address(void *data, unsigned long addr)
7456 +{
7457 + printk("%s [<%08lx>] ", (char *)data, addr);
7458 + print_symbol("%s\n", addr);
7459 +}
7460 +
7461 +static struct stacktrace_ops print_trace_ops = {
7462 + .warning = print_trace_warning,
7463 + .warning_symbol = print_trace_warning_symbol,
7464 + .stack = print_trace_stack,
7465 + .address = print_trace_address,
7466 +};
7467 +
7468 +static void
7469 +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
7470 + unsigned long * stack, char *log_lvl)
7471 +{
7472 + dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
7473 + printk("%s =======================\n", log_lvl);
7474 +}
7475 +
7476 +void show_trace(struct task_struct *task, struct pt_regs *regs,
7477 + unsigned long * stack)
7478 {
7479 show_trace_log_lvl(task, regs, stack, "");
7480 }
7481 @@ -297,12 +363,13 @@
7482 ss = regs->xss & 0xffff;
7483 }
7484 print_modules();
7485 - printk(KERN_EMERG "CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\n"
7486 - "EFLAGS: %08lx (%s %.*s) \n",
7487 + printk(KERN_EMERG "CPU: %d\n"
7488 + KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n"
7489 + KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n",
7490 smp_processor_id(), 0xffff & regs->xcs, regs->eip,
7491 - print_tainted(), regs->eflags, system_utsname.release,
7492 - (int)strcspn(system_utsname.version, " "),
7493 - system_utsname.version);
7494 + print_tainted(), regs->eflags, init_utsname()->release,
7495 + (int)strcspn(init_utsname()->version, " "),
7496 + init_utsname()->version);
7497 print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip);
7498 printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
7499 regs->eax, regs->ebx, regs->ecx, regs->edx);
7500 @@ -319,6 +386,8 @@
7501 */
7502 if (in_kernel) {
7503 u8 __user *eip;
7504 + int code_bytes = 64;
7505 + unsigned char c;
7506
7507 printk("\n" KERN_EMERG "Stack: ");
7508 show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
7509 @@ -326,9 +395,12 @@
7510 printk(KERN_EMERG "Code: ");
7511
7512 eip = (u8 __user *)regs->eip - 43;
7513 - for (i = 0; i < 64; i++, eip++) {
7514 - unsigned char c;
7515 -
7516 + if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
7517 + /* try starting at EIP */
7518 + eip = (u8 __user *)regs->eip;
7519 + code_bytes = 32;
7520 + }
7521 + for (i = 0; i < code_bytes; i++, eip++) {
7522 if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
7523 printk(" Bad EIP value.");
7524 break;
7525 @@ -349,7 +421,7 @@
7526
7527 if (eip < PAGE_OFFSET)
7528 return;
7529 - if (__get_user(ud2, (unsigned short __user *)eip))
7530 + if (probe_kernel_address((unsigned short __user *)eip, ud2))
7531 return;
7532 if (ud2 != 0x0b0f)
7533 return;
7534 @@ -362,7 +434,8 @@
7535 char *file;
7536 char c;
7537
7538 - if (__get_user(line, (unsigned short __user *)(eip + 2)))
7539 + if (probe_kernel_address((unsigned short __user *)(eip + 2),
7540 + line))
7541 break;
7542 if (__get_user(file, (char * __user *)(eip + 4)) ||
7543 (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
7544 @@ -604,18 +677,24 @@
7545 }
7546 }
7547
7548 -static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
7549 +static __kprobes void
7550 +mem_parity_error(unsigned char reason, struct pt_regs * regs)
7551 {
7552 - printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying "
7553 - "to continue\n");
7554 + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
7555 + "CPU %d.\n", reason, smp_processor_id());
7556 printk(KERN_EMERG "You probably have a hardware problem with your RAM "
7557 "chips\n");
7558 + if (panic_on_unrecovered_nmi)
7559 + panic("NMI: Not continuing");
7560 +
7561 + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
7562
7563 /* Clear and disable the memory parity error line. */
7564 clear_mem_error(reason);
7565 }
7566
7567 -static void io_check_error(unsigned char reason, struct pt_regs * regs)
7568 +static __kprobes void
7569 +io_check_error(unsigned char reason, struct pt_regs * regs)
7570 {
7571 printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
7572 show_registers(regs);
7573 @@ -624,7 +703,8 @@
7574 clear_io_check_error(reason);
7575 }
7576
7577 -static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
7578 +static __kprobes void
7579 +unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
7580 {
7581 #ifdef CONFIG_MCA
7582 /* Might actually be able to figure out what the guilty party
7583 @@ -634,15 +714,18 @@
7584 return;
7585 }
7586 #endif
7587 - printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
7588 - reason, smp_processor_id());
7589 - printk("Dazed and confused, but trying to continue\n");
7590 - printk("Do you have a strange power saving mode enabled?\n");
7591 + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
7592 + "CPU %d.\n", reason, smp_processor_id());
7593 + printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
7594 + if (panic_on_unrecovered_nmi)
7595 + panic("NMI: Not continuing");
7596 +
7597 + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
7598 }
7599
7600 static DEFINE_SPINLOCK(nmi_print_lock);
7601
7602 -void die_nmi (struct pt_regs *regs, const char *msg)
7603 +void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
7604 {
7605 if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
7606 NOTIFY_STOP)
7607 @@ -674,7 +757,7 @@
7608 do_exit(SIGSEGV);
7609 }
7610
7611 -static void default_do_nmi(struct pt_regs * regs)
7612 +static __kprobes void default_do_nmi(struct pt_regs * regs)
7613 {
7614 unsigned char reason = 0;
7615
7616 @@ -691,12 +774,12 @@
7617 * Ok, so this is none of the documented NMI sources,
7618 * so it must be the NMI watchdog.
7619 */
7620 - if (nmi_watchdog) {
7621 - nmi_watchdog_tick(regs);
7622 + if (nmi_watchdog_tick(regs, reason))
7623 return;
7624 - }
7625 + if (!do_nmi_callback(regs, smp_processor_id()))
7626 #endif
7627 - unknown_nmi_error(reason, regs);
7628 + unknown_nmi_error(reason, regs);
7629 +
7630 return;
7631 }
7632 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
7633 @@ -712,14 +795,7 @@
7634 reassert_nmi();
7635 }
7636
7637 -static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
7638 -{
7639 - return 0;
7640 -}
7641 -
7642 -static nmi_callback_t nmi_callback = dummy_nmi_callback;
7643 -
7644 -fastcall void do_nmi(struct pt_regs * regs, long error_code)
7645 +fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
7646 {
7647 int cpu;
7648
7649 @@ -729,25 +805,11 @@
7650
7651 ++nmi_count(cpu);
7652
7653 - if (!rcu_dereference(nmi_callback)(regs, cpu))
7654 - default_do_nmi(regs);
7655 + default_do_nmi(regs);
7656
7657 nmi_exit();
7658 }
7659
7660 -void set_nmi_callback(nmi_callback_t callback)
7661 -{
7662 - vmalloc_sync_all();
7663 - rcu_assign_pointer(nmi_callback, callback);
7664 -}
7665 -EXPORT_SYMBOL_GPL(set_nmi_callback);
7666 -
7667 -void unset_nmi_callback(void)
7668 -{
7669 - nmi_callback = dummy_nmi_callback;
7670 -}
7671 -EXPORT_SYMBOL_GPL(unset_nmi_callback);
7672 -
7673 #ifdef CONFIG_KPROBES
7674 fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
7675 {
7676 diff -Naur linux-2.6.25/arch/x86/kernel/traps_64-xen.c linux-2.6.25-xen/arch/x86/kernel/traps_64-xen.c
7677 --- linux-2.6.25/arch/x86/kernel/traps_64-xen.c 2008-05-23 20:51:11.000000000 +0200
7678 +++ linux-2.6.25-xen/arch/x86/kernel/traps_64-xen.c 2008-05-23 20:39:03.000000000 +0200
7679 @@ -23,6 +23,7 @@
7680 #include <linux/delay.h>
7681 #include <linux/spinlock.h>
7682 #include <linux/interrupt.h>
7683 +#include <linux/kallsyms.h>
7684 #include <linux/module.h>
7685 #include <linux/moduleparam.h>
7686 #include <linux/nmi.h>
7687 @@ -45,6 +46,7 @@
7688 #include <asm/pda.h>
7689 #include <asm/proto.h>
7690 #include <asm/nmi.h>
7691 +#include <asm/stacktrace.h>
7692
7693 asmlinkage void divide_error(void);
7694 asmlinkage void debug(void);
7695 @@ -114,7 +116,6 @@
7696 #endif
7697
7698 #ifdef CONFIG_KALLSYMS
7699 -# include <linux/kallsyms.h>
7700 void printk_address(unsigned long address)
7701 {
7702 unsigned long offset = 0, symsize;
7703 @@ -142,7 +143,7 @@
7704 #endif
7705
7706 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
7707 - unsigned *usedp, const char **idp)
7708 + unsigned *usedp, char **idp)
7709 {
7710 #ifndef CONFIG_X86_NO_TSS
7711 static char ids[][8] = {
7712 @@ -162,26 +163,7 @@
7713 * 'stack' is in one of them:
7714 */
7715 for (k = 0; k < N_EXCEPTION_STACKS; k++) {
7716 - unsigned long end;
7717 -
7718 - /*
7719 - * set 'end' to the end of the exception stack.
7720 - */
7721 - switch (k + 1) {
7722 - /*
7723 - * TODO: this block is not needed i think, because
7724 - * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK]
7725 - * properly too.
7726 - */
7727 -#if DEBUG_STKSZ > EXCEPTION_STKSZ
7728 - case DEBUG_STACK:
7729 - end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ;
7730 - break;
7731 -#endif
7732 - default:
7733 - end = per_cpu(orig_ist, cpu).ist[k];
7734 - break;
7735 - }
7736 + unsigned long end = per_cpu(orig_ist, cpu).ist[k];
7737 /*
7738 * Is 'stack' above this exception frame's end?
7739 * If yes then skip to the next frame.
7740 @@ -236,13 +218,19 @@
7741 return NULL;
7742 }
7743
7744 -static int show_trace_unwind(struct unwind_frame_info *info, void *context)
7745 +struct ops_and_data {
7746 + struct stacktrace_ops *ops;
7747 + void *data;
7748 +};
7749 +
7750 +static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
7751 {
7752 + struct ops_and_data *oad = (struct ops_and_data *)context;
7753 int n = 0;
7754
7755 while (unwind(info) == 0 && UNW_PC(info)) {
7756 n++;
7757 - printk_address(UNW_PC(info));
7758 + oad->ops->address(oad->data, UNW_PC(info));
7759 if (arch_unw_user_mode(info))
7760 break;
7761 }
7762 @@ -256,13 +244,19 @@
7763 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
7764 */
7765
7766 -void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack)
7767 +static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
7768 {
7769 - const unsigned cpu = safe_smp_processor_id();
7770 + void *t = (void *)tinfo;
7771 + return p > t && p < t + THREAD_SIZE - 3;
7772 +}
7773 +
7774 +void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack,
7775 + struct stacktrace_ops *ops, void *data)
7776 +{
7777 + const unsigned cpu = smp_processor_id();
7778 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
7779 unsigned used = 0;
7780 -
7781 - printk("\nCall Trace:\n");
7782 + struct thread_info *tinfo;
7783
7784 if (!tsk)
7785 tsk = current;
7786 @@ -270,32 +264,47 @@
7787 if (call_trace >= 0) {
7788 int unw_ret = 0;
7789 struct unwind_frame_info info;
7790 + struct ops_and_data oad = { .ops = ops, .data = data };
7791
7792 if (regs) {
7793 if (unwind_init_frame_info(&info, tsk, regs) == 0)
7794 - unw_ret = show_trace_unwind(&info, NULL);
7795 + unw_ret = dump_trace_unwind(&info, &oad);
7796 } else if (tsk == current)
7797 - unw_ret = unwind_init_running(&info, show_trace_unwind, NULL);
7798 + unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
7799 else {
7800 if (unwind_init_blocked(&info, tsk) == 0)
7801 - unw_ret = show_trace_unwind(&info, NULL);
7802 + unw_ret = dump_trace_unwind(&info, &oad);
7803 }
7804 if (unw_ret > 0) {
7805 if (call_trace == 1 && !arch_unw_user_mode(&info)) {
7806 - print_symbol("DWARF2 unwinder stuck at %s\n",
7807 + ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
7808 UNW_PC(&info));
7809 if ((long)UNW_SP(&info) < 0) {
7810 - printk("Leftover inexact backtrace:\n");
7811 + ops->warning(data, "Leftover inexact backtrace:\n");
7812 stack = (unsigned long *)UNW_SP(&info);
7813 + if (!stack)
7814 + return;
7815 } else
7816 - printk("Full inexact backtrace again:\n");
7817 + ops->warning(data, "Full inexact backtrace again:\n");
7818 } else if (call_trace >= 1)
7819 return;
7820 else
7821 - printk("Full inexact backtrace again:\n");
7822 + ops->warning(data, "Full inexact backtrace again:\n");
7823 } else
7824 - printk("Inexact backtrace:\n");
7825 + ops->warning(data, "Inexact backtrace:\n");
7826 + }
7827 + if (!stack) {
7828 + unsigned long dummy;
7829 + stack = &dummy;
7830 + if (tsk && tsk != current)
7831 + stack = (unsigned long *)tsk->thread.rsp;
7832 }
7833 + /*
7834 + * Align the stack pointer on word boundary, later loops
7835 + * rely on that (and corruption / debug info bugs can cause
7836 + * unaligned values here):
7837 + */
7838 + stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1));
7839
7840 /*
7841 * Print function call entries within a stack. 'cond' is the
7842 @@ -305,7 +314,9 @@
7843 #define HANDLE_STACK(cond) \
7844 do while (cond) { \
7845 unsigned long addr = *stack++; \
7846 - if (kernel_text_address(addr)) { \
7847 + if (oops_in_progress ? \
7848 + __kernel_text_address(addr) : \
7849 + kernel_text_address(addr)) { \
7850 /* \
7851 * If the address is either in the text segment of the \
7852 * kernel, or in the region which contains vmalloc'ed \
7853 @@ -314,7 +325,7 @@
7854 * down the cause of the crash will be able to figure \
7855 * out the call path that was taken. \
7856 */ \
7857 - printk_address(addr); \
7858 + ops->address(data, addr); \
7859 } \
7860 } while (0)
7861
7862 @@ -323,16 +334,17 @@
7863 * current stack address. If the stacks consist of nested
7864 * exceptions
7865 */
7866 - for ( ; ; ) {
7867 - const char *id;
7868 + for (;;) {
7869 + char *id;
7870 unsigned long *estack_end;
7871 estack_end = in_exception_stack(cpu, (unsigned long)stack,
7872 &used, &id);
7873
7874 if (estack_end) {
7875 - printk(" <%s>", id);
7876 + if (ops->stack(data, id) < 0)
7877 + break;
7878 HANDLE_STACK (stack < estack_end);
7879 - printk(" <EOE>");
7880 + ops->stack(data, "<EOE>");
7881 /*
7882 * We link to the next stack via the
7883 * second-to-last pointer (index -2 to end) in the
7884 @@ -347,7 +359,8 @@
7885 (IRQSTACKSIZE - 64) / sizeof(*irqstack);
7886
7887 if (stack >= irqstack && stack < irqstack_end) {
7888 - printk(" <IRQ>");
7889 + if (ops->stack(data, "IRQ") < 0)
7890 + break;
7891 HANDLE_STACK (stack < irqstack_end);
7892 /*
7893 * We link to the next stack (which would be
7894 @@ -356,7 +369,7 @@
7895 */
7896 stack = (unsigned long *) (irqstack_end[-1]);
7897 irqstack_end = NULL;
7898 - printk(" <EOI>");
7899 + ops->stack(data, "EOI");
7900 continue;
7901 }
7902 }
7903 @@ -364,19 +377,58 @@
7904 }
7905
7906 /*
7907 - * This prints the process stack:
7908 + * This handles the process stack:
7909 */
7910 - HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
7911 + tinfo = current_thread_info();
7912 + HANDLE_STACK (valid_stack_ptr(tinfo, stack));
7913 #undef HANDLE_STACK
7914 +}
7915 +EXPORT_SYMBOL(dump_trace);
7916 +
7917 +static void
7918 +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
7919 +{
7920 + print_symbol(msg, symbol);
7921 + printk("\n");
7922 +}
7923 +
7924 +static void print_trace_warning(void *data, char *msg)
7925 +{
7926 + printk("%s\n", msg);
7927 +}
7928 +
7929 +static int print_trace_stack(void *data, char *name)
7930 +{
7931 + printk(" <%s> ", name);
7932 + return 0;
7933 +}
7934 +
7935 +static void print_trace_address(void *data, unsigned long addr)
7936 +{
7937 + printk_address(addr);
7938 +}
7939 +
7940 +static struct stacktrace_ops print_trace_ops = {
7941 + .warning = print_trace_warning,
7942 + .warning_symbol = print_trace_warning_symbol,
7943 + .stack = print_trace_stack,
7944 + .address = print_trace_address,
7945 +};
7946
7947 +void
7948 +show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack)
7949 +{
7950 + printk("\nCall Trace:\n");
7951 + dump_trace(tsk, regs, stack, &print_trace_ops, NULL);
7952 printk("\n");
7953 }
7954
7955 -static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp)
7956 +static void
7957 +_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
7958 {
7959 unsigned long *stack;
7960 int i;
7961 - const int cpu = safe_smp_processor_id();
7962 + const int cpu = smp_processor_id();
7963 unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
7964 unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
7965
7966 @@ -430,7 +482,7 @@
7967 int i;
7968 int in_kernel = !user_mode(regs);
7969 unsigned long rsp;
7970 - const int cpu = safe_smp_processor_id();
7971 + const int cpu = smp_processor_id();
7972 struct task_struct *cur = cpu_pda(cpu)->pcurrent;
7973
7974 rsp = regs->rsp;
7975 @@ -505,9 +557,11 @@
7976
7977 unsigned __kprobes long oops_begin(void)
7978 {
7979 - int cpu = safe_smp_processor_id();
7980 + int cpu = smp_processor_id();
7981 unsigned long flags;
7982
7983 + oops_enter();
7984 +
7985 /* racy, but better than risking deadlock. */
7986 local_irq_save(flags);
7987 if (!spin_trylock(&die_lock)) {
7988 @@ -536,6 +590,7 @@
7989 spin_unlock_irqrestore(&die_lock, flags);
7990 if (panic_on_oops)
7991 panic("Fatal exception");
7992 + oops_exit();
7993 }
7994
7995 void __kprobes __die(const char * str, struct pt_regs * regs, long err)
7996 @@ -573,7 +628,7 @@
7997 }
7998
7999 #ifdef CONFIG_X86_LOCAL_APIC
8000 -void __kprobes die_nmi(char *str, struct pt_regs *regs)
8001 +void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
8002 {
8003 unsigned long flags = oops_begin();
8004
8005 @@ -581,13 +636,12 @@
8006 * We are in trouble anyway, lets at least try
8007 * to get a message out.
8008 */
8009 - printk(str, safe_smp_processor_id());
8010 + printk(str, smp_processor_id());
8011 show_registers(regs);
8012 if (kexec_should_crash(current))
8013 crash_kexec(regs);
8014 - if (panic_on_timeout || panic_on_oops)
8015 - panic("nmi watchdog");
8016 - printk("console shuts up ...\n");
8017 + if (do_panic || panic_on_oops)
8018 + panic("Non maskable interrupt");
8019 oops_end(flags);
8020 nmi_exit();
8021 local_irq_enable();
8022 @@ -734,8 +788,15 @@
8023 static __kprobes void
8024 mem_parity_error(unsigned char reason, struct pt_regs * regs)
8025 {
8026 - printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
8027 - printk("You probably have a hardware problem with your RAM chips\n");
8028 + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
8029 + reason);
8030 + printk(KERN_EMERG "You probably have a hardware problem with your "
8031 + "RAM chips\n");
8032 +
8033 + if (panic_on_unrecovered_nmi)
8034 + panic("NMI: Not continuing");
8035 +
8036 + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
8037
8038 #if 0 /* XEN */
8039 /* Clear and disable the memory parity error line. */
8040 @@ -762,9 +823,15 @@
8041
8042 static __kprobes void
8043 unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
8044 -{ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
8045 - printk("Dazed and confused, but trying to continue\n");
8046 - printk("Do you have a strange power saving mode enabled?\n");
8047 +{
8048 + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
8049 + reason);
8050 + printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
8051 +
8052 + if (panic_on_unrecovered_nmi)
8053 + panic("NMI: Not continuing");
8054 +
8055 + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
8056 }
8057
8058 /* Runs on IST stack. This code must keep interrupts off all the time.
8059 @@ -789,12 +856,12 @@
8060 * Ok, so this is none of the documented NMI sources,
8061 * so it must be the NMI watchdog.
8062 */
8063 - if (nmi_watchdog > 0) {
8064 - nmi_watchdog_tick(regs,reason);
8065 + if (nmi_watchdog_tick(regs,reason))
8066 return;
8067 - }
8068 #endif
8069 - unknown_nmi_error(reason, regs);
8070 + if (!do_nmi_callback(regs,cpu))
8071 + unknown_nmi_error(reason, regs);
8072 +
8073 return;
8074 }
8075 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
8076 @@ -1081,6 +1148,7 @@
8077 init_fpu(me);
8078 restore_fpu_checking(&me->thread.i387.fxsave);
8079 task_thread_info(me)->status |= TS_USEDFPU;
8080 + me->fpu_counter++;
8081 }
8082
8083
8084 @@ -1141,24 +1209,30 @@
8085 }
8086
8087
8088 -/* Actual parsing is done early in setup.c. */
8089 -static int __init oops_dummy(char *s)
8090 +static int __init oops_setup(char *s)
8091 {
8092 - panic_on_oops = 1;
8093 - return 1;
8094 + if (!s)
8095 + return -EINVAL;
8096 + if (!strcmp(s, "panic"))
8097 + panic_on_oops = 1;
8098 + return 0;
8099 }
8100 -__setup("oops=", oops_dummy);
8101 +early_param("oops", oops_setup);
8102
8103 static int __init kstack_setup(char *s)
8104 {
8105 + if (!s)
8106 + return -EINVAL;
8107 kstack_depth_to_print = simple_strtoul(s,NULL,0);
8108 - return 1;
8109 + return 0;
8110 }
8111 -__setup("kstack=", kstack_setup);
8112 +early_param("kstack", kstack_setup);
8113
8114 #ifdef CONFIG_STACK_UNWIND
8115 static int __init call_trace_setup(char *s)
8116 {
8117 + if (!s)
8118 + return -EINVAL;
8119 if (strcmp(s, "old") == 0)
8120 call_trace = -1;
8121 else if (strcmp(s, "both") == 0)
8122 @@ -1167,7 +1241,7 @@
8123 call_trace = 1;
8124 else if (strcmp(s, "new") == 0)
8125 call_trace = 2;
8126 - return 1;
8127 + return 0;
8128 }
8129 -__setup("call_trace=", call_trace_setup);
8130 +early_param("call_trace", call_trace_setup);
8131 #endif
8132 diff -Naur linux-2.6.25/arch/x86/kernel/vsyscall_64-xen.c linux-2.6.25-xen/arch/x86/kernel/vsyscall_64-xen.c
8133 --- linux-2.6.25/arch/x86/kernel/vsyscall_64-xen.c 2008-05-23 20:51:11.000000000 +0200
8134 +++ linux-2.6.25-xen/arch/x86/kernel/vsyscall_64-xen.c 2008-05-23 20:39:03.000000000 +0200
8135 @@ -26,6 +26,10 @@
8136 #include <linux/seqlock.h>
8137 #include <linux/jiffies.h>
8138 #include <linux/sysctl.h>
8139 +#include <linux/getcpu.h>
8140 +#include <linux/cpu.h>
8141 +#include <linux/smp.h>
8142 +#include <linux/notifier.h>
8143
8144 #include <asm/vsyscall.h>
8145 #include <asm/pgtable.h>
8146 @@ -33,11 +37,15 @@
8147 #include <asm/fixmap.h>
8148 #include <asm/errno.h>
8149 #include <asm/io.h>
8150 +#include <asm/segment.h>
8151 +#include <asm/desc.h>
8152 +#include <asm/topology.h>
8153
8154 #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
8155
8156 int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
8157 seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
8158 +int __vgetcpu_mode __section_vgetcpu_mode;
8159
8160 #include <asm/unistd.h>
8161
8162 @@ -61,8 +69,7 @@
8163 sequence = read_seqbegin(&__xtime_lock);
8164
8165 sec = __xtime.tv_sec;
8166 - usec = (__xtime.tv_nsec / 1000) +
8167 - (__jiffies - __wall_jiffies) * (1000000 / HZ);
8168 + usec = __xtime.tv_nsec / 1000;
8169
8170 if (__vxtime.mode != VXTIME_HPET) {
8171 t = get_cycles_sync();
8172 @@ -72,7 +79,8 @@
8173 __vxtime.tsc_quot) >> 32;
8174 /* See comment in x86_64 do_gettimeofday. */
8175 } else {
8176 - usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
8177 + usec += ((readl((void __iomem *)
8178 + fix_to_virt(VSYSCALL_HPET) + 0xf0) -
8179 __vxtime.last) * __vxtime.quot) >> 32;
8180 }
8181 } while (read_seqretry(&__xtime_lock, sequence));
8182 @@ -127,9 +135,46 @@
8183 return __xtime.tv_sec;
8184 }
8185
8186 -long __vsyscall(2) venosys_0(void)
8187 -{
8188 - return -ENOSYS;
8189 +/* Fast way to get current CPU and node.
8190 + This helps to do per node and per CPU caches in user space.
8191 + The result is not guaranteed without CPU affinity, but usually
8192 + works out because the scheduler tries to keep a thread on the same
8193 + CPU.
8194 +
8195 + tcache must point to a two element sized long array.
8196 + All arguments can be NULL. */
8197 +long __vsyscall(2)
8198 +vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
8199 +{
8200 + unsigned int dummy, p;
8201 + unsigned long j = 0;
8202 +
8203 + /* Fast cache - only recompute value once per jiffies and avoid
8204 + relatively costly rdtscp/cpuid otherwise.
8205 + This works because the scheduler usually keeps the process
8206 + on the same CPU and this syscall doesn't guarantee its
8207 + results anyways.
8208 + We do this here because otherwise user space would do it on
8209 + its own in a likely inferior way (no access to jiffies).
8210 + If you don't like it pass NULL. */
8211 + if (tcache && tcache->blob[0] == (j = __jiffies)) {
8212 + p = tcache->blob[1];
8213 + } else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
8214 + /* Load per CPU data from RDTSCP */
8215 + rdtscp(dummy, dummy, p);
8216 + } else {
8217 + /* Load per CPU data from GDT */
8218 + asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
8219 + }
8220 + if (tcache) {
8221 + tcache->blob[0] = j;
8222 + tcache->blob[1] = p;
8223 + }
8224 + if (cpu)
8225 + *cpu = p & 0xfff;
8226 + if (node)
8227 + *node = p >> 12;
8228 + return 0;
8229 }
8230
8231 long __vsyscall(3) venosys_1(void)
8232 @@ -149,7 +194,8 @@
8233 void __user *buffer, size_t *lenp, loff_t *ppos)
8234 {
8235 extern u16 vsysc1, vsysc2;
8236 - u16 *map1, *map2;
8237 + u16 __iomem *map1;
8238 + u16 __iomem *map2;
8239 int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
8240 if (!write)
8241 return ret;
8242 @@ -164,11 +210,11 @@
8243 goto out;
8244 }
8245 if (!sysctl_vsyscall) {
8246 - *map1 = SYSCALL;
8247 - *map2 = SYSCALL;
8248 + writew(SYSCALL, map1);
8249 + writew(SYSCALL, map2);
8250 } else {
8251 - *map1 = NOP2;
8252 - *map2 = NOP2;
8253 + writew(NOP2, map1);
8254 + writew(NOP2, map2);
8255 }
8256 iounmap(map2);
8257 out:
8258 @@ -200,6 +246,48 @@
8259
8260 #endif
8261
8262 +/* Assume __initcall executes before all user space. Hopefully kmod
8263 + doesn't violate that. We'll find out if it does. */
8264 +static void __cpuinit vsyscall_set_cpu(int cpu)
8265 +{
8266 + unsigned long d;
8267 + unsigned long node = 0;
8268 +#ifdef CONFIG_NUMA
8269 + node = cpu_to_node[cpu];
8270 +#endif
8271 + if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP))
8272 + write_rdtscp_aux((node << 12) | cpu);
8273 +
8274 + /* Store cpu number in limit so that it can be loaded quickly
8275 + in user space in vgetcpu.
8276 + 12 bits for the CPU and 8 bits for the node. */
8277 + d = 0x0f40000000000ULL;
8278 + d |= cpu;
8279 + d |= (node & 0xf) << 12;
8280 + d |= (node >> 4) << 48;
8281 + if (HYPERVISOR_update_descriptor(virt_to_machine(cpu_gdt(cpu)
8282 + + GDT_ENTRY_PER_CPU),
8283 + d))
8284 + BUG();
8285 +}
8286 +
8287 +static void __cpuinit cpu_vsyscall_init(void *arg)
8288 +{
8289 + /* preemption should be already off */
8290 + vsyscall_set_cpu(raw_smp_processor_id());
8291 +}
8292 +
8293 +#ifdef CONFIG_HOTPLUG_CPU
8294 +static int __cpuinit
8295 +cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
8296 +{
8297 + long cpu = (long)arg;
8298 + if (action == CPU_ONLINE)
8299 + smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
8300 + return NOTIFY_DONE;
8301 +}
8302 +#endif
8303 +
8304 static void __init map_vsyscall(void)
8305 {
8306 extern char __vsyscall_0;
8307 @@ -214,13 +302,20 @@
8308 VSYSCALL_ADDR(__NR_vgettimeofday)));
8309 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
8310 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
8311 + BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
8312 map_vsyscall();
8313 #ifdef CONFIG_XEN
8314 sysctl_vsyscall = 0; /* disable vgettimeofay() */
8315 + if (boot_cpu_has(X86_FEATURE_RDTSCP))
8316 + vgetcpu_mode = VGETCPU_RDTSCP;
8317 + else
8318 + vgetcpu_mode = VGETCPU_LSL;
8319 #endif
8320 #ifdef CONFIG_SYSCTL
8321 register_sysctl_table(kernel_root_table2, 0);
8322 #endif
8323 + on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
8324 + hotcpu_notifier(cpu_vsyscall_notifier, 0);
8325 return 0;
8326 }
8327
8328 diff -Naur linux-2.6.25/arch/x86/mach-xen/setup.c linux-2.6.25-xen/arch/x86/mach-xen/setup.c
8329 --- linux-2.6.25/arch/x86/mach-xen/setup.c 2008-05-23 20:51:11.000000000 +0200
8330 +++ linux-2.6.25-xen/arch/x86/mach-xen/setup.c 2008-05-23 20:39:03.000000000 +0200
8331 @@ -103,8 +103,10 @@
8332
8333 setup_xen_features();
8334
8335 - if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
8336 - set_fixaddr_top(pp.virt_start);
8337 + if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) {
8338 + hypervisor_virt_start = pp.virt_start;
8339 + reserve_top_address(0UL - pp.virt_start);
8340 + }
8341
8342 if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
8343 machine_to_phys_mapping = (unsigned long *)mapping.v_start;
8344 diff -Naur linux-2.6.25/arch/x86/mm/fault_32-xen.c linux-2.6.25-xen/arch/x86/mm/fault_32-xen.c
8345 --- linux-2.6.25/arch/x86/mm/fault_32-xen.c 2008-05-23 20:51:11.000000000 +0200
8346 +++ linux-2.6.25-xen/arch/x86/mm/fault_32-xen.c 2008-05-23 20:39:03.000000000 +0200
8347 @@ -27,21 +27,24 @@
8348 #include <asm/uaccess.h>
8349 #include <asm/desc.h>
8350 #include <asm/kdebug.h>
8351 +#include <asm/segment.h>
8352
8353 extern void die(const char *,struct pt_regs *,long);
8354
8355 -#ifdef CONFIG_KPROBES
8356 -ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
8357 +static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
8358 +
8359 int register_page_fault_notifier(struct notifier_block *nb)
8360 {
8361 vmalloc_sync_all();
8362 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
8363 }
8364 +EXPORT_SYMBOL_GPL(register_page_fault_notifier);
8365
8366 int unregister_page_fault_notifier(struct notifier_block *nb)
8367 {
8368 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
8369 }
8370 +EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
8371
8372 static inline int notify_page_fault(enum die_val val, const char *str,
8373 struct pt_regs *regs, long err, int trap, int sig)
8374 @@ -55,14 +58,6 @@
8375 };
8376 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
8377 }
8378 -#else
8379 -static inline int notify_page_fault(enum die_val val, const char *str,
8380 - struct pt_regs *regs, long err, int trap, int sig)
8381 -{
8382 - return NOTIFY_DONE;
8383 -}
8384 -#endif
8385 -
8386
8387 /*
8388 * Unlock any spinlocks which will prevent us from getting the
8389 @@ -119,10 +114,10 @@
8390 }
8391
8392 /* The standard kernel/user address space limit. */
8393 - *eip_limit = (seg & 2) ? USER_DS.seg : KERNEL_DS.seg;
8394 + *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
8395
8396 /* By far the most common cases. */
8397 - if (likely(seg == __USER_CS || seg == GET_KERNEL_CS()))
8398 + if (likely(SEGMENT_IS_FLAT_CODE(seg)))
8399 return eip;
8400
8401 /* Check the segment exists, is within the current LDT/GDT size,
8402 @@ -559,11 +554,7 @@
8403 write = 0;
8404 switch (error_code & 3) {
8405 default: /* 3: write, present */
8406 -#ifdef TEST_VERIFY_AREA
8407 - if (regs->cs == GET_KERNEL_CS())
8408 - printk("WP fault at %08lx\n", regs->eip);
8409 -#endif
8410 - /* fall through */
8411 + /* fall through */
8412 case 2: /* write, not present */
8413 if (!(vma->vm_flags & VM_WRITE))
8414 goto bad_area;
8415 @@ -572,7 +563,7 @@
8416 case 1: /* read, present */
8417 goto bad_area;
8418 case 0: /* read, not present */
8419 - if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
8420 + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
8421 goto bad_area;
8422 }
8423
8424 @@ -704,7 +695,7 @@
8425 */
8426 out_of_memory:
8427 up_read(&mm->mmap_sem);
8428 - if (tsk->pid == 1) {
8429 + if (is_init(tsk)) {
8430 yield();
8431 down_read(&mm->mmap_sem);
8432 goto survive;
8433 diff -Naur linux-2.6.25/arch/x86/mm/fault_64-xen.c linux-2.6.25-xen/arch/x86/mm/fault_64-xen.c
8434 --- linux-2.6.25/arch/x86/mm/fault_64-xen.c 2008-05-23 20:51:11.000000000 +0200
8435 +++ linux-2.6.25-xen/arch/x86/mm/fault_64-xen.c 2008-05-23 20:39:03.000000000 +0200
8436 @@ -40,8 +40,7 @@
8437 #define PF_RSVD (1<<3)
8438 #define PF_INSTR (1<<4)
8439
8440 -#ifdef CONFIG_KPROBES
8441 -ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
8442 +static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
8443
8444 /* Hook to register for page fault notifications */
8445 int register_page_fault_notifier(struct notifier_block *nb)
8446 @@ -49,11 +48,13 @@
8447 vmalloc_sync_all();
8448 return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
8449 }
8450 +EXPORT_SYMBOL_GPL(register_page_fault_notifier);
8451
8452 int unregister_page_fault_notifier(struct notifier_block *nb)
8453 {
8454 return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
8455 }
8456 +EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
8457
8458 static inline int notify_page_fault(enum die_val val, const char *str,
8459 struct pt_regs *regs, long err, int trap, int sig)
8460 @@ -67,13 +68,6 @@
8461 };
8462 return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
8463 }
8464 -#else
8465 -static inline int notify_page_fault(enum die_val val, const char *str,
8466 - struct pt_regs *regs, long err, int trap, int sig)
8467 -{
8468 - return NOTIFY_DONE;
8469 -}
8470 -#endif
8471
8472 void bust_spinlocks(int yes)
8473 {
8474 @@ -102,7 +96,7 @@
8475 static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
8476 unsigned long error_code)
8477 {
8478 - unsigned char *instr;
8479 + unsigned char __user *instr;
8480 int scan_more = 1;
8481 int prefetch = 0;
8482 unsigned char *max_instr;
8483 @@ -111,7 +105,7 @@
8484 if (error_code & PF_INSTR)
8485 return 0;
8486
8487 - instr = (unsigned char *)convert_rip_to_linear(current, regs);
8488 + instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
8489 max_instr = instr + 15;
8490
8491 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
8492 @@ -122,7 +116,7 @@
8493 unsigned char instr_hi;
8494 unsigned char instr_lo;
8495
8496 - if (__get_user(opcode, instr))
8497 + if (__get_user(opcode, (char __user *)instr))
8498 break;
8499
8500 instr_hi = opcode & 0xf0;
8501 @@ -160,7 +154,7 @@
8502 case 0x00:
8503 /* Prefetch instruction is 0x0F0D or 0x0F18 */
8504 scan_more = 0;
8505 - if (__get_user(opcode, instr))
8506 + if (__get_user(opcode, (char __user *)instr))
8507 break;
8508 prefetch = (instr_lo == 0xF) &&
8509 (opcode == 0x0D || opcode == 0x18);
8510 @@ -176,7 +170,7 @@
8511 static int bad_address(void *p)
8512 {
8513 unsigned long dummy;
8514 - return __get_user(dummy, (unsigned long *)p);
8515 + return __get_user(dummy, (unsigned long __user *)p);
8516 }
8517
8518 void dump_pagetable(unsigned long address)
8519 @@ -248,7 +242,7 @@
8520
8521 int unhandled_signal(struct task_struct *tsk, int sig)
8522 {
8523 - if (tsk->pid == 1)
8524 + if (is_init(tsk))
8525 return 1;
8526 if (tsk->ptrace & PT_PTRACED)
8527 return 0;
8528 @@ -300,7 +294,7 @@
8529 if (pgd_none(*pgd))
8530 set_pgd(pgd, *pgd_ref);
8531 else
8532 - BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
8533 + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
8534
8535 /* Below here mismatches are bugs because these lower tables
8536 are shared */
8537 @@ -309,7 +303,7 @@
8538 pud_ref = pud_offset(pgd_ref, address);
8539 if (pud_none(*pud_ref))
8540 return -1;
8541 - if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref))
8542 + if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
8543 BUG();
8544 pmd = pmd_offset(pud, address);
8545 pmd_ref = pmd_offset(pud_ref, address);
8546 @@ -531,7 +525,7 @@
8547 case PF_PROT: /* read, present */
8548 goto bad_area;
8549 case 0: /* read, not present */
8550 - if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
8551 + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
8552 goto bad_area;
8553 }
8554
8555 @@ -647,7 +641,7 @@
8556 */
8557 out_of_memory:
8558 up_read(&mm->mmap_sem);
8559 - if (current->pid == 1) {
8560 + if (is_init(current)) {
8561 yield();
8562 goto again;
8563 }
8564 @@ -702,7 +696,7 @@
8565 if (pgd_none(*pgd))
8566 set_pgd(pgd, *pgd_ref);
8567 else
8568 - BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
8569 + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
8570 }
8571 spin_unlock(&pgd_lock);
8572 set_bit(pgd_index(address), insync);
8573 diff -Naur linux-2.6.25/arch/x86/mm/highmem_32-xen.c linux-2.6.25-xen/arch/x86/mm/highmem_32-xen.c
8574 --- linux-2.6.25/arch/x86/mm/highmem_32-xen.c 2008-05-23 20:51:11.000000000 +0200
8575 +++ linux-2.6.25-xen/arch/x86/mm/highmem_32-xen.c 2008-05-23 20:39:03.000000000 +0200
8576 @@ -38,11 +38,9 @@
8577
8578 idx = type + KM_TYPE_NR*smp_processor_id();
8579 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
8580 -#ifdef CONFIG_DEBUG_HIGHMEM
8581 if (!pte_none(*(kmap_pte-idx)))
8582 BUG();
8583 -#endif
8584 - set_pte_at_sync(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
8585 + set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
8586
8587 return (void*) vaddr;
8588 }
8589 @@ -62,36 +60,26 @@
8590
8591 void kunmap_atomic(void *kvaddr, enum km_type type)
8592 {
8593 -#if defined(CONFIG_DEBUG_HIGHMEM) || defined(CONFIG_XEN)
8594 unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
8595 enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
8596
8597 - if (vaddr < FIXADDR_START) { // FIXME
8598 +#ifdef CONFIG_DEBUG_HIGHMEM
8599 + if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) {
8600 dec_preempt_count();
8601 preempt_check_resched();
8602 return;
8603 }
8604 -#endif
8605
8606 -#if defined(CONFIG_DEBUG_HIGHMEM)
8607 if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
8608 BUG();
8609 -
8610 - /*
8611 - * force other mappings to Oops if they'll try to access
8612 - * this pte without first remap it
8613 - */
8614 - pte_clear(&init_mm, vaddr, kmap_pte-idx);
8615 - __flush_tlb_one(vaddr);
8616 -#elif defined(CONFIG_XEN)
8617 +#endif
8618 /*
8619 - * We must ensure there are no dangling pagetable references when
8620 - * returning memory to Xen (decrease_reservation).
8621 - * XXX TODO: We could make this faster by only zapping when
8622 - * kmap_flush_unused is called but that is trickier and more invasive.
8623 + * Force other mappings to Oops if they'll try to access this pte
8624 + * without first remap it. Keeping stale mappings around is a bad idea
8625 + * also, in case the page changes cacheability attributes or becomes
8626 + * a protected page in a hypervisor.
8627 */
8628 - pte_clear(&init_mm, vaddr, kmap_pte-idx);
8629 -#endif
8630 + kpte_clear_flush(kmap_pte-idx, vaddr);
8631
8632 dec_preempt_count();
8633 preempt_check_resched();
8634 @@ -110,7 +98,6 @@
8635 idx = type + KM_TYPE_NR*smp_processor_id();
8636 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
8637 set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
8638 - __flush_tlb_one(vaddr);
8639
8640 return (void*) vaddr;
8641 }
8642 diff -Naur linux-2.6.25/arch/x86/mm/hypervisor.c linux-2.6.25-xen/arch/x86/mm/hypervisor.c
8643 --- linux-2.6.25/arch/x86/mm/hypervisor.c 2008-05-23 20:51:11.000000000 +0200
8644 +++ linux-2.6.25-xen/arch/x86/mm/hypervisor.c 2008-05-23 20:39:03.000000000 +0200
8645 @@ -569,7 +569,8 @@
8646 #define MAX_BATCHED_FULL_PTES 32
8647
8648 int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
8649 - unsigned long addr, unsigned long end, pgprot_t newprot)
8650 + unsigned long addr, unsigned long end, pgprot_t newprot,
8651 + int dirty_accountable)
8652 {
8653 int rc = 0, i = 0;
8654 mmu_update_t u[MAX_BATCHED_FULL_PTES];
8655 @@ -582,10 +583,14 @@
8656 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
8657 do {
8658 if (pte_present(*pte)) {
8659 + pte_t ptent = pte_modify(*pte, newprot);
8660 +
8661 + if (dirty_accountable && pte_dirty(ptent))
8662 + ptent = pte_mkwrite(ptent);
8663 u[i].ptr = (__pmd_val(*pmd) & PHYSICAL_PAGE_MASK)
8664 | ((unsigned long)pte & ~PAGE_MASK)
8665 | MMU_PT_UPDATE_PRESERVE_AD;
8666 - u[i].val = __pte_val(pte_modify(*pte, newprot));
8667 + u[i].val = __pte_val(ptent);
8668 if (++i == MAX_BATCHED_FULL_PTES) {
8669 if ((rc = HYPERVISOR_mmu_update(
8670 &u[0], i, NULL, DOMID_SELF)) != 0)
8671 diff -Naur linux-2.6.25/arch/x86/mm/init_32-xen.c linux-2.6.25-xen/arch/x86/mm/init_32-xen.c
8672 --- linux-2.6.25/arch/x86/mm/init_32-xen.c 2008-05-23 20:51:11.000000000 +0200
8673 +++ linux-2.6.25-xen/arch/x86/mm/init_32-xen.c 2008-05-23 20:39:03.000000000 +0200
8674 @@ -464,16 +464,22 @@
8675 * on Enable
8676 * off Disable
8677 */
8678 -void __init noexec_setup(const char *str)
8679 +static int __init noexec_setup(char *str)
8680 {
8681 - if (!strncmp(str, "on",2) && cpu_has_nx) {
8682 - __supported_pte_mask |= _PAGE_NX;
8683 - disable_nx = 0;
8684 - } else if (!strncmp(str,"off",3)) {
8685 + if (!str || !strcmp(str, "on")) {
8686 + if (cpu_has_nx) {
8687 + __supported_pte_mask |= _PAGE_NX;
8688 + disable_nx = 0;
8689 + }
8690 + } else if (!strcmp(str,"off")) {
8691 disable_nx = 1;
8692 __supported_pte_mask &= ~_PAGE_NX;
8693 - }
8694 + } else
8695 + return -EINVAL;
8696 +
8697 + return 0;
8698 }
8699 +early_param("noexec", noexec_setup);
8700
8701 int nx_enabled = 0;
8702 #ifdef CONFIG_X86_PAE
8703 @@ -516,6 +522,7 @@
8704 pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
8705 else
8706 pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
8707 + pte_update_defer(&init_mm, vaddr, pte);
8708 __flush_tlb_all();
8709 out:
8710 return ret;
8711 @@ -598,18 +605,6 @@
8712 }
8713 }
8714
8715 -static void __init set_max_mapnr_init(void)
8716 -{
8717 -#ifdef CONFIG_HIGHMEM
8718 - num_physpages = highend_pfn;
8719 -#else
8720 - num_physpages = max_low_pfn;
8721 -#endif
8722 -#ifdef CONFIG_FLATMEM
8723 - max_mapnr = num_physpages;
8724 -#endif
8725 -}
8726 -
8727 static struct kcore_list kcore_mem, kcore_vmalloc;
8728
8729 void __init mem_init(void)
8730 @@ -630,8 +625,7 @@
8731 #endif
8732
8733 #ifdef CONFIG_FLATMEM
8734 - if (!mem_map)
8735 - BUG();
8736 + BUG_ON(!mem_map);
8737 #endif
8738
8739 bad_ppro = ppro_with_ram_bug();
8740 @@ -646,17 +640,6 @@
8741 }
8742 #endif
8743
8744 - set_max_mapnr_init();
8745 -
8746 -#ifdef CONFIG_HIGHMEM
8747 - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
8748 -#else
8749 - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
8750 -#endif
8751 - printk("vmalloc area: %lx-%lx, maxmem %lx\n",
8752 - VMALLOC_START,VMALLOC_END,MAXMEM);
8753 - BUG_ON(VMALLOC_START > VMALLOC_END);
8754 -
8755 /* this will put all low memory onto the freelists */
8756 totalram_pages += free_all_bootmem();
8757 /* XEN: init and count low-mem pages outside initial allocation. */
8758 @@ -694,6 +677,48 @@
8759 (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
8760 );
8761
8762 +#if 1 /* double-sanity-check paranoia */
8763 + printk("virtual kernel memory layout:\n"
8764 + " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
8765 +#ifdef CONFIG_HIGHMEM
8766 + " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
8767 +#endif
8768 + " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
8769 + " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
8770 + " .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
8771 + " .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
8772 + " .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
8773 + FIXADDR_START, FIXADDR_TOP,
8774 + (FIXADDR_TOP - FIXADDR_START) >> 10,
8775 +
8776 +#ifdef CONFIG_HIGHMEM
8777 + PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
8778 + (LAST_PKMAP*PAGE_SIZE) >> 10,
8779 +#endif
8780 +
8781 + VMALLOC_START, VMALLOC_END,
8782 + (VMALLOC_END - VMALLOC_START) >> 20,
8783 +
8784 + (unsigned long)__va(0), (unsigned long)high_memory,
8785 + ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
8786 +
8787 + (unsigned long)&__init_begin, (unsigned long)&__init_end,
8788 + ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10,
8789 +
8790 + (unsigned long)&_etext, (unsigned long)&_edata,
8791 + ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
8792 +
8793 + (unsigned long)&_text, (unsigned long)&_etext,
8794 + ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
8795 +
8796 +#ifdef CONFIG_HIGHMEM
8797 + BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
8798 + BUG_ON(VMALLOC_END > PKMAP_BASE);
8799 +#endif
8800 + BUG_ON(VMALLOC_START > VMALLOC_END);
8801 + BUG_ON((unsigned long)high_memory > VMALLOC_START);
8802 +#endif /* double-sanity-check paranoia */
8803 +
8804 #ifdef CONFIG_X86_PAE
8805 if (!cpu_has_pae)
8806 panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
8807 @@ -724,7 +749,7 @@
8808 int arch_add_memory(int nid, u64 start, u64 size)
8809 {
8810 struct pglist_data *pgdata = &contig_page_data;
8811 - struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
8812 + struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
8813 unsigned long start_pfn = start >> PAGE_SHIFT;
8814 unsigned long nr_pages = size >> PAGE_SHIFT;
8815
8816 diff -Naur linux-2.6.25/arch/x86/mm/init_64-xen.c linux-2.6.25-xen/arch/x86/mm/init_64-xen.c
8817 --- linux-2.6.25/arch/x86/mm/init_64-xen.c 2008-05-23 20:51:11.000000000 +0200
8818 +++ linux-2.6.25-xen/arch/x86/mm/init_64-xen.c 2008-05-23 20:39:03.000000000 +0200
8819 @@ -61,8 +61,6 @@
8820
8821 extern unsigned long *contiguous_bitmap;
8822
8823 -static unsigned long dma_reserve __initdata;
8824 -
8825 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
8826 extern unsigned long start_pfn;
8827
8828 @@ -416,7 +414,6 @@
8829
8830 /* actually usually some more */
8831 if (size >= LARGE_PAGE_SIZE) {
8832 - printk("SMBIOS area too long %lu\n", size);
8833 return NULL;
8834 }
8835 set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
8836 @@ -438,13 +435,15 @@
8837 #endif
8838
8839 static void __meminit
8840 -phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
8841 +phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
8842 {
8843 - int i, k;
8844 + int i = pmd_index(address);
8845
8846 - for (i = 0; i < PTRS_PER_PMD; pmd++, i++) {
8847 + for (; i < PTRS_PER_PMD; i++) {
8848 unsigned long pte_phys;
8849 + pmd_t *pmd = pmd_page + i;
8850 pte_t *pte, *pte_save;
8851 + int k;
8852
8853 if (address >= end) {
8854 if (!after_bootmem)
8855 @@ -452,6 +451,12 @@
8856 set_pmd(pmd, __pmd(0));
8857 break;
8858 }
8859 +
8860 + if (__pmd_val(*pmd)) {
8861 + address += PMD_SIZE;
8862 + continue;
8863 + }
8864 +
8865 pte = alloc_static_page(&pte_phys);
8866 pte_save = pte;
8867 for (k = 0; k < PTRS_PER_PTE; pte++, k++, address += PTE_SIZE) {
8868 @@ -474,40 +479,35 @@
8869 static void __meminit
8870 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
8871 {
8872 - pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
8873 -
8874 - if (pmd_none(*pmd)) {
8875 - spin_lock(&init_mm.page_table_lock);
8876 - phys_pmd_init(pmd, address, end);
8877 - spin_unlock(&init_mm.page_table_lock);
8878 - __flush_tlb_all();
8879 - }
8880 + pmd_t *pmd = pmd_offset(pud,0);
8881 + spin_lock(&init_mm.page_table_lock);
8882 + phys_pmd_init(pmd, address, end);
8883 + spin_unlock(&init_mm.page_table_lock);
8884 + __flush_tlb_all();
8885 }
8886
8887 -static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
8888 +static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
8889 {
8890 - long i = pud_index(address);
8891 -
8892 - pud = pud + i;
8893 -
8894 - if (after_bootmem && pud_val(*pud)) {
8895 - phys_pmd_update(pud, address, end);
8896 - return;
8897 - }
8898 + int i = pud_index(addr);
8899
8900 - for (; i < PTRS_PER_PUD; pud++, i++) {
8901 - unsigned long paddr, pmd_phys;
8902 + for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
8903 + unsigned long pmd_phys;
8904 + pud_t *pud = pud_page + pud_index(addr);
8905 pmd_t *pmd;
8906
8907 - paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
8908 - if (paddr >= end)
8909 + if (addr >= end)
8910 break;
8911
8912 + if (__pud_val(*pud)) {
8913 + phys_pmd_update(pud, addr, end);
8914 + continue;
8915 + }
8916 +
8917 pmd = alloc_static_page(&pmd_phys);
8918 early_make_page_readonly(pmd, XENFEAT_writable_page_tables);
8919 spin_lock(&init_mm.page_table_lock);
8920 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
8921 - phys_pmd_init(pmd, paddr, end);
8922 + phys_pmd_init(pmd, addr, end);
8923 spin_unlock(&init_mm.page_table_lock);
8924 }
8925 __flush_tlb();
8926 @@ -771,69 +771,18 @@
8927 #endif
8928 }
8929
8930 -/* Compute zone sizes for the DMA and DMA32 zones in a node. */
8931 -__init void
8932 -size_zones(unsigned long *z, unsigned long *h,
8933 - unsigned long start_pfn, unsigned long end_pfn)
8934 -{
8935 - int i;
8936 - unsigned long w;
8937 -
8938 - for (i = 0; i < MAX_NR_ZONES; i++)
8939 - z[i] = 0;
8940 -
8941 - if (start_pfn < MAX_DMA_PFN)
8942 - z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
8943 - if (start_pfn < MAX_DMA32_PFN) {
8944 - unsigned long dma32_pfn = MAX_DMA32_PFN;
8945 - if (dma32_pfn > end_pfn)
8946 - dma32_pfn = end_pfn;
8947 - z[ZONE_DMA32] = dma32_pfn - start_pfn;
8948 - }
8949 - z[ZONE_NORMAL] = end_pfn - start_pfn;
8950 -
8951 - /* Remove lower zones from higher ones. */
8952 - w = 0;
8953 - for (i = 0; i < MAX_NR_ZONES; i++) {
8954 - if (z[i])
8955 - z[i] -= w;
8956 - w += z[i];
8957 - }
8958 -
8959 - /* Compute holes */
8960 - w = start_pfn;
8961 - for (i = 0; i < MAX_NR_ZONES; i++) {
8962 - unsigned long s = w;
8963 - w += z[i];
8964 - h[i] = e820_hole_size(s, w);
8965 - }
8966 -
8967 - /* Add the space pace needed for mem_map to the holes too. */
8968 - for (i = 0; i < MAX_NR_ZONES; i++)
8969 - h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
8970 -
8971 - /* The 16MB DMA zone has the kernel and other misc mappings.
8972 - Account them too */
8973 - if (h[ZONE_DMA]) {
8974 - h[ZONE_DMA] += dma_reserve;
8975 - if (h[ZONE_DMA] >= z[ZONE_DMA]) {
8976 - printk(KERN_WARNING
8977 - "Kernel too large and filling up ZONE_DMA?\n");
8978 - h[ZONE_DMA] = z[ZONE_DMA];
8979 - }
8980 - }
8981 -}
8982 -
8983 #ifndef CONFIG_NUMA
8984 void __init paging_init(void)
8985 {
8986 - unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
8987 + unsigned long max_zone_pfns[MAX_NR_ZONES];
8988 + memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
8989 + max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
8990 + max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
8991 + max_zone_pfns[ZONE_NORMAL] = end_pfn;
8992
8993 memory_present(0, 0, end_pfn);
8994 sparse_init();
8995 - size_zones(zones, holes, 0, end_pfn);
8996 - free_area_init_node(0, NODE_DATA(0), zones,
8997 - __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
8998 + free_area_init_nodes(max_zone_pfns);
8999
9000 init_mm.context.pinned = 1;
9001 }
9002 @@ -887,36 +836,23 @@
9003
9004 #ifdef CONFIG_MEMORY_HOTPLUG
9005 /*
9006 - * XXX: memory_add_physaddr_to_nid() is to find node id from physical address
9007 - * via probe interface of sysfs. If acpi notifies hot-add event, then it
9008 - * can tell node id by searching dsdt. But, probe interface doesn't have
9009 - * node id. So, return 0 as node id at this time.
9010 - */
9011 -#ifdef CONFIG_NUMA
9012 -int memory_add_physaddr_to_nid(u64 start)
9013 -{
9014 - return 0;
9015 -}
9016 -#endif
9017 -
9018 -/*
9019 * Memory is added always to NORMAL zone. This means you will never get
9020 * additional DMA/DMA32 memory.
9021 */
9022 int arch_add_memory(int nid, u64 start, u64 size)
9023 {
9024 struct pglist_data *pgdat = NODE_DATA(nid);
9025 - struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
9026 + struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
9027 unsigned long start_pfn = start >> PAGE_SHIFT;
9028 unsigned long nr_pages = size >> PAGE_SHIFT;
9029 int ret;
9030
9031 + init_memory_mapping(start, (start + size -1));
9032 +
9033 ret = __add_pages(zone, start_pfn, nr_pages);
9034 if (ret)
9035 goto error;
9036
9037 - init_memory_mapping(start, (start + size -1));
9038 -
9039 return ret;
9040 error:
9041 printk("%s: Problem encountered in __add_pages!\n", __func__);
9042 @@ -930,7 +866,17 @@
9043 }
9044 EXPORT_SYMBOL_GPL(remove_memory);
9045
9046 -#else /* CONFIG_MEMORY_HOTPLUG */
9047 +#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
9048 +int memory_add_physaddr_to_nid(u64 start)
9049 +{
9050 + return 0;
9051 +}
9052 +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
9053 +#endif
9054 +
9055 +#endif /* CONFIG_MEMORY_HOTPLUG */
9056 +
9057 +#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
9058 /*
9059 * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
9060 * just online the pages.
9061 @@ -956,7 +902,7 @@
9062 }
9063 return err;
9064 }
9065 -#endif /* CONFIG_MEMORY_HOTPLUG */
9066 +#endif
9067
9068 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
9069 kcore_vsyscall;
9070 @@ -973,12 +919,6 @@
9071
9072 pci_iommu_alloc();
9073
9074 - /* How many end-of-memory variables you have, grandma! */
9075 - max_low_pfn = end_pfn;
9076 - max_pfn = end_pfn;
9077 - num_physpages = end_pfn;
9078 - high_memory = (void *) __va(end_pfn * PAGE_SIZE);
9079 -
9080 /* clear the zero-page */
9081 memset(empty_zero_page, 0, PAGE_SIZE);
9082
9083 @@ -996,7 +936,8 @@
9084 init_page_count(pfn_to_page(pfn));
9085 totalram_pages++;
9086 }
9087 - reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
9088 + reservedpages = end_pfn - totalram_pages -
9089 + absent_pages_in_range(0, end_pfn);
9090
9091 after_bootmem = 1;
9092
9093 @@ -1103,15 +1044,34 @@
9094
9095 void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
9096 {
9097 - /* Should check here against the e820 map to avoid double free */
9098 #ifdef CONFIG_NUMA
9099 int nid = phys_to_nid(phys);
9100 +#endif
9101 + unsigned long pfn = phys >> PAGE_SHIFT;
9102 + if (pfn >= end_pfn) {
9103 + /* This can happen with kdump kernels when accessing firmware
9104 + tables. */
9105 + if (pfn < end_pfn_map)
9106 + return;
9107 + printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
9108 + phys, len);
9109 + return;
9110 + }
9111 +
9112 + /* Should check here against the e820 map to avoid double free */
9113 +#ifdef CONFIG_NUMA
9114 reserve_bootmem_node(NODE_DATA(nid), phys, len);
9115 #else
9116 reserve_bootmem(phys, len);
9117 #endif
9118 - if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
9119 +#ifndef CONFIG_XEN
9120 + if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
9121 + static unsigned long dma_reserve __initdata;
9122 +
9123 dma_reserve += len / PAGE_SIZE;
9124 + set_dma_reserve(dma_reserve);
9125 + }
9126 +#endif
9127 }
9128
9129 int kern_addr_valid(unsigned long addr)
9130 diff -Naur linux-2.6.25/arch/x86/mm/ioremap_32-xen.c linux-2.6.25-xen/arch/x86/mm/ioremap_32-xen.c
9131 --- linux-2.6.25/arch/x86/mm/ioremap_32-xen.c 2008-05-23 20:51:11.000000000 +0200
9132 +++ linux-2.6.25-xen/arch/x86/mm/ioremap_32-xen.c 2008-05-23 20:39:03.000000000 +0200
9133 @@ -12,7 +12,7 @@
9134 #include <linux/init.h>
9135 #include <linux/slab.h>
9136 #include <linux/module.h>
9137 -#include <asm/io.h>
9138 +#include <linux/io.h>
9139 #include <asm/fixmap.h>
9140 #include <asm/cacheflush.h>
9141 #include <asm/tlbflush.h>
9142 @@ -118,7 +118,7 @@
9143 if (domid == DOMID_SELF)
9144 return -EINVAL;
9145
9146 - vma->vm_flags |= VM_IO | VM_RESERVED;
9147 + vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
9148
9149 vma->vm_mm->context.has_foreign_mappings = 1;
9150
9151 @@ -203,6 +203,7 @@
9152 void __iomem * addr;
9153 struct vm_struct * area;
9154 unsigned long offset, last_addr;
9155 + pgprot_t prot;
9156 domid_t domid = DOMID_IO;
9157
9158 /* Don't allow wraparound or zero size */
9159 @@ -234,6 +235,8 @@
9160 domid = DOMID_SELF;
9161 }
9162
9163 + prot = __pgprot(_KERNPG_TABLE | flags);
9164 +
9165 /*
9166 * Mappings have to be page-aligned
9167 */
9168 @@ -249,10 +252,9 @@
9169 return NULL;
9170 area->phys_addr = phys_addr;
9171 addr = (void __iomem *) area->addr;
9172 - flags |= _KERNPG_TABLE;
9173 if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr,
9174 phys_addr>>PAGE_SHIFT,
9175 - size, __pgprot(flags), domid)) {
9176 + size, prot, domid)) {
9177 vunmap((void __force *) addr);
9178 return NULL;
9179 }
9180 diff -Naur linux-2.6.25/arch/x86/mm/pageattr_64-xen.c linux-2.6.25-xen/arch/x86/mm/pageattr_64-xen.c
9181 --- linux-2.6.25/arch/x86/mm/pageattr_64-xen.c 2008-05-23 20:51:11.000000000 +0200
9182 +++ linux-2.6.25-xen/arch/x86/mm/pageattr_64-xen.c 2008-05-23 20:39:03.000000000 +0200
9183 @@ -371,8 +371,8 @@
9184 BUG_ON(pud_none(*pud));
9185 pmd = pmd_offset(pud, address);
9186 BUG_ON(__pmd_val(*pmd) & _PAGE_PSE);
9187 - pgprot_val(ref_prot) |= _PAGE_PSE;
9188 large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot);
9189 + large_pte = pte_mkhuge(large_pte);
9190 set_pte((pte_t *)pmd, large_pte);
9191 }
9192
9193 @@ -382,32 +382,28 @@
9194 {
9195 pte_t *kpte;
9196 struct page *kpte_page;
9197 - unsigned kpte_flags;
9198 pgprot_t ref_prot2;
9199 kpte = lookup_address(address);
9200 if (!kpte) return 0;
9201 kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
9202 - kpte_flags = pte_val(*kpte);
9203 if (pgprot_val(prot) != pgprot_val(ref_prot)) {
9204 - if ((kpte_flags & _PAGE_PSE) == 0) {
9205 + if (!pte_huge(*kpte)) {
9206 set_pte(kpte, pfn_pte(pfn, prot));
9207 } else {
9208 /*
9209 * split_large_page will take the reference for this
9210 * change_page_attr on the split page.
9211 */
9212 -
9213 struct page *split;
9214 - ref_prot2 = __pgprot(pgprot_val(pte_pgprot(*lookup_address(address))) & ~(1<<_PAGE_BIT_PSE));
9215 -
9216 + ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
9217 split = split_large_page(address, prot, ref_prot2);
9218 if (!split)
9219 return -ENOMEM;
9220 - set_pte(kpte,mk_pte(split, ref_prot2));
9221 + set_pte(kpte, mk_pte(split, ref_prot2));
9222 kpte_page = split;
9223 - }
9224 + }
9225 page_private(kpte_page)++;
9226 - } else if ((kpte_flags & _PAGE_PSE) == 0) {
9227 + } else if (!pte_huge(*kpte)) {
9228 set_pte(kpte, pfn_pte(pfn, ref_prot));
9229 BUG_ON(page_private(kpte_page) == 0);
9230 page_private(kpte_page)--;
9231 @@ -464,10 +460,12 @@
9232 * lowmem */
9233 if (__pa(address) < KERNEL_TEXT_SIZE) {
9234 unsigned long addr2;
9235 - pgprot_t prot2 = prot;
9236 + pgprot_t prot2;
9237 addr2 = __START_KERNEL_map + __pa(address);
9238 - pgprot_val(prot2) &= ~_PAGE_NX;
9239 - err = __change_page_attr(addr2, pfn, prot2, PAGE_KERNEL_EXEC);
9240 + /* Make sure the kernel mappings stay executable */
9241 + prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
9242 + err = __change_page_attr(addr2, pfn, prot2,
9243 + PAGE_KERNEL_EXEC);
9244 }
9245 }
9246 up_write(&init_mm.mmap_sem);
9247 diff -Naur linux-2.6.25/arch/x86/mm/pgtable_32-xen.c linux-2.6.25-xen/arch/x86/mm/pgtable_32-xen.c
9248 --- linux-2.6.25/arch/x86/mm/pgtable_32-xen.c 2008-05-23 20:51:11.000000000 +0200
9249 +++ linux-2.6.25-xen/arch/x86/mm/pgtable_32-xen.c 2008-05-23 20:39:03.000000000 +0200
9250 @@ -68,7 +68,9 @@
9251 printk(KERN_INFO "%lu pages writeback\n",
9252 global_page_state(NR_WRITEBACK));
9253 printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
9254 - printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB));
9255 + printk(KERN_INFO "%lu pages slab\n",
9256 + global_page_state(NR_SLAB_RECLAIMABLE) +
9257 + global_page_state(NR_SLAB_UNRECLAIMABLE));
9258 printk(KERN_INFO "%lu pages pagetables\n",
9259 global_page_state(NR_PAGETABLE));
9260 }
9261 @@ -108,18 +110,11 @@
9262 __flush_tlb_one(vaddr);
9263 }
9264
9265 -static int nr_fixmaps = 0;
9266 +static int fixmaps;
9267 unsigned long hypervisor_virt_start = HYPERVISOR_VIRT_START;
9268 -unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE);
9269 +unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - PAGE_SIZE);
9270 EXPORT_SYMBOL(__FIXADDR_TOP);
9271
9272 -void __init set_fixaddr_top(unsigned long top)
9273 -{
9274 - BUG_ON(nr_fixmaps > 0);
9275 - hypervisor_virt_start = top;
9276 - __FIXADDR_TOP = hypervisor_virt_start - 2 * PAGE_SIZE;
9277 -}
9278 -
9279 void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
9280 {
9281 unsigned long address = __fix_to_virt(idx);
9282 @@ -141,7 +136,21 @@
9283 if (HYPERVISOR_update_va_mapping(address, pte,
9284 UVMF_INVLPG|UVMF_ALL))
9285 BUG();
9286 - nr_fixmaps++;
9287 + fixmaps++;
9288 +}
9289 +
9290 +/**
9291 + * reserve_top_address - reserves a hole in the top of kernel address space
9292 + * @reserve - size of hole to reserve
9293 + *
9294 + * Can be used to relocate the fixmap area and poke a hole in the top
9295 + * of kernel address space to make room for a hypervisor.
9296 + */
9297 +void __init reserve_top_address(unsigned long reserve)
9298 +{
9299 + BUG_ON(fixmaps > 0);
9300 + __FIXADDR_TOP = -reserve - PAGE_SIZE;
9301 + __VMALLOC_RESERVE += reserve;
9302 }
9303
9304 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
9305 diff -Naur linux-2.6.25/arch/x86/pci/irq-xen.c linux-2.6.25-xen/arch/x86/pci/irq-xen.c
9306 --- linux-2.6.25/arch/x86/pci/irq-xen.c 2008-05-23 20:51:11.000000000 +0200
9307 +++ linux-2.6.25-xen/arch/x86/pci/irq-xen.c 2008-05-23 20:39:03.000000000 +0200
9308 @@ -991,10 +991,6 @@
9309 pci_name(bridge), 'A' + pin, irq);
9310 }
9311 if (irq >= 0) {
9312 - if (use_pci_vector() &&
9313 - !platform_legacy_irq(irq))
9314 - irq = IO_APIC_VECTOR(irq);
9315 -
9316 printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
9317 pci_name(dev), 'A' + pin, irq);
9318 dev->irq = irq;
9319 @@ -1155,10 +1151,6 @@
9320 }
9321 dev = temp_dev;
9322 if (irq >= 0) {
9323 -#ifdef CONFIG_PCI_MSI
9324 - if (!platform_legacy_irq(irq))
9325 - irq = IO_APIC_VECTOR(irq);
9326 -#endif
9327 printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
9328 pci_name(dev), 'A' + pin, irq);
9329 dev->irq = irq;
9330 @@ -1179,33 +1171,3 @@
9331 }
9332 return 0;
9333 }
9334 -
9335 -int pci_vector_resources(int last, int nr_released)
9336 -{
9337 - int count = nr_released;
9338 -
9339 - int next = last;
9340 - int offset = (last % 8);
9341 -
9342 - while (next < FIRST_SYSTEM_VECTOR) {
9343 - next += 8;
9344 -#ifdef CONFIG_X86_64
9345 - if (next == IA32_SYSCALL_VECTOR)
9346 - continue;
9347 -#else
9348 - if (next == SYSCALL_VECTOR)
9349 - continue;
9350 -#endif
9351 - count++;
9352 - if (next >= FIRST_SYSTEM_VECTOR) {
9353 - if (offset%8) {
9354 - next = FIRST_DEVICE_VECTOR + offset;
9355 - offset++;
9356 - continue;
9357 - }
9358 - count--;
9359 - }
9360 - }
9361 -
9362 - return count;
9363 -}
9364 diff -Naur linux-2.6.25/drivers/char/tpm/tpm_xen.c linux-2.6.25-xen/drivers/char/tpm/tpm_xen.c
9365 --- linux-2.6.25/drivers/char/tpm/tpm_xen.c 2008-05-23 20:51:15.000000000 +0200
9366 +++ linux-2.6.25-xen/drivers/char/tpm/tpm_xen.c 2008-05-23 20:39:03.000000000 +0200
9367 @@ -85,8 +85,7 @@
9368
9369 /* local function prototypes */
9370 static irqreturn_t tpmif_int(int irq,
9371 - void *tpm_priv,
9372 - struct pt_regs *ptregs);
9373 + void *tpm_priv);
9374 static void tpmif_rx_action(unsigned long unused);
9375 static int tpmif_connect(struct xenbus_device *dev,
9376 struct tpm_private *tp,
9377 @@ -559,7 +558,7 @@
9378 }
9379
9380
9381 -static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
9382 +static irqreturn_t tpmif_int(int irq, void *tpm_priv)
9383 {
9384 struct tpm_private *tp = tpm_priv;
9385 unsigned long flags;
9386 diff -Naur linux-2.6.25/drivers/pci/Kconfig linux-2.6.25-xen/drivers/pci/Kconfig
9387 --- linux-2.6.25/drivers/pci/Kconfig 2008-04-17 04:49:44.000000000 +0200
9388 +++ linux-2.6.25-xen/drivers/pci/Kconfig 2008-05-23 20:39:03.000000000 +0200
9389 @@ -45,7 +45,7 @@
9390 config HT_IRQ
9391 bool "Interrupts on hypertransport devices"
9392 default y
9393 - depends on PCI && X86_LOCAL_APIC && X86_IO_APIC
9394 + depends on PCI && X86_LOCAL_APIC && X86_IO_APIC && !XEN
9395 help
9396 This allows native hypertransport devices to use interrupts.
9397
9398 diff -Naur linux-2.6.25/drivers/xen/balloon/balloon.c linux-2.6.25-xen/drivers/xen/balloon/balloon.c
9399 --- linux-2.6.25/drivers/xen/balloon/balloon.c 2008-05-23 20:51:11.000000000 +0200
9400 +++ linux-2.6.25-xen/drivers/xen/balloon/balloon.c 2008-05-23 20:39:03.000000000 +0200
9401 @@ -84,7 +84,7 @@
9402 /* VM /proc information for memory */
9403 extern unsigned long totalram_pages;
9404
9405 -#ifndef MODULE
9406 +#if !defined(MODULE) && defined(CONFIG_HIGHMEM)
9407 extern unsigned long totalhigh_pages;
9408 #define inc_totalhigh_pages() (totalhigh_pages++)
9409 #define dec_totalhigh_pages() (totalhigh_pages--)
9410 diff -Naur linux-2.6.25/drivers/xen/blkback/blkback.c linux-2.6.25-xen/drivers/xen/blkback/blkback.c
9411 --- linux-2.6.25/drivers/xen/blkback/blkback.c 2008-05-23 20:51:11.000000000 +0200
9412 +++ linux-2.6.25-xen/drivers/xen/blkback/blkback.c 2008-05-23 20:39:03.000000000 +0200
9413 @@ -288,7 +288,7 @@
9414 wake_up(&blkif->wq);
9415 }
9416
9417 -irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
9418 +irqreturn_t blkif_be_int(int irq, void *dev_id)
9419 {
9420 blkif_notify_work(dev_id);
9421 return IRQ_HANDLED;
9422 diff -Naur linux-2.6.25/drivers/xen/blkback/common.h linux-2.6.25-xen/drivers/xen/blkback/common.h
9423 --- linux-2.6.25/drivers/xen/blkback/common.h 2008-05-23 20:51:11.000000000 +0200
9424 +++ linux-2.6.25-xen/drivers/xen/blkback/common.h 2008-05-23 20:39:03.000000000 +0200
9425 @@ -130,7 +130,7 @@
9426
9427 void blkif_xenbus_init(void);
9428
9429 -irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
9430 +irqreturn_t blkif_be_int(int irq, void *dev_id);
9431 int blkif_schedule(void *arg);
9432
9433 int blkback_barrier(struct xenbus_transaction xbt,
9434 diff -Naur linux-2.6.25/drivers/xen/blkfront/blkfront.c linux-2.6.25-xen/drivers/xen/blkfront/blkfront.c
9435 --- linux-2.6.25/drivers/xen/blkfront/blkfront.c 2008-05-23 20:51:11.000000000 +0200
9436 +++ linux-2.6.25-xen/drivers/xen/blkfront/blkfront.c 2008-05-23 20:39:03.000000000 +0200
9437 @@ -69,7 +69,7 @@
9438
9439 static void kick_pending_request_queues(struct blkfront_info *);
9440
9441 -static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs);
9442 +static irqreturn_t blkif_int(int irq, void *dev_id);
9443 static void blkif_restart_queue(void *arg);
9444 static void blkif_recover(struct blkfront_info *);
9445 static void blkif_completion(struct blk_shadow *);
9446 @@ -698,7 +698,7 @@
9447 }
9448
9449
9450 -static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
9451 +static irqreturn_t blkif_int(int irq, void *dev_id)
9452 {
9453 struct request *req;
9454 blkif_response_t *bret;
9455 diff -Naur linux-2.6.25/drivers/xen/blktap/blktap.c linux-2.6.25-xen/drivers/xen/blktap/blktap.c
9456 --- linux-2.6.25/drivers/xen/blktap/blktap.c 2008-05-23 20:51:11.000000000 +0200
9457 +++ linux-2.6.25-xen/drivers/xen/blktap/blktap.c 2008-05-23 20:39:03.000000000 +0200
9458 @@ -1175,7 +1175,7 @@
9459 wake_up(&blkif->wq);
9460 }
9461
9462 -irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
9463 +irqreturn_t tap_blkif_be_int(int irq, void *dev_id)
9464 {
9465 blkif_notify_work(dev_id);
9466 return IRQ_HANDLED;
9467 diff -Naur linux-2.6.25/drivers/xen/blktap/common.h linux-2.6.25-xen/drivers/xen/blktap/common.h
9468 --- linux-2.6.25/drivers/xen/blktap/common.h 2008-05-23 20:51:11.000000000 +0200
9469 +++ linux-2.6.25-xen/drivers/xen/blktap/common.h 2008-05-23 20:39:03.000000000 +0200
9470 @@ -112,7 +112,7 @@
9471
9472 void tap_blkif_xenbus_init(void);
9473
9474 -irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
9475 +irqreturn_t tap_blkif_be_int(int irq, void *dev_id);
9476 int tap_blkif_schedule(void *arg);
9477
9478 int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif);
9479 diff -Naur linux-2.6.25/drivers/xen/console/console.c linux-2.6.25-xen/drivers/xen/console/console.c
9480 --- linux-2.6.25/drivers/xen/console/console.c 2008-05-23 20:51:11.000000000 +0200
9481 +++ linux-2.6.25-xen/drivers/xen/console/console.c 2008-05-23 20:39:03.000000000 +0200
9482 @@ -345,7 +345,7 @@
9483 static int xencons_priv_irq;
9484 static char x_char;
9485
9486 -void xencons_rx(char *buf, unsigned len, struct pt_regs *regs)
9487 +void xencons_rx(char *buf, unsigned len)
9488 {
9489 int i;
9490 unsigned long flags;
9491 @@ -370,8 +370,7 @@
9492 if (time_before(jiffies, sysrq_timeout)) {
9493 spin_unlock_irqrestore(
9494 &xencons_lock, flags);
9495 - handle_sysrq(
9496 - buf[i], regs, xencons_tty);
9497 + handle_sysrq(buf[i], xencons_tty);
9498 spin_lock_irqsave(
9499 &xencons_lock, flags);
9500 continue;
9501 @@ -436,14 +435,13 @@
9502 }
9503
9504 /* Privileged receive callback and transmit kicker. */
9505 -static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id,
9506 - struct pt_regs *regs)
9507 +static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id)
9508 {
9509 static char rbuf[16];
9510 int l;
9511
9512 while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0)
9513 - xencons_rx(rbuf, l, regs);
9514 + xencons_rx(rbuf, l);
9515
9516 xencons_tx();
9517
9518 diff -Naur linux-2.6.25/drivers/xen/console/xencons_ring.c linux-2.6.25-xen/drivers/xen/console/xencons_ring.c
9519 --- linux-2.6.25/drivers/xen/console/xencons_ring.c 2008-05-23 20:51:11.000000000 +0200
9520 +++ linux-2.6.25-xen/drivers/xen/console/xencons_ring.c 2008-05-23 20:39:03.000000000 +0200
9521 @@ -83,7 +83,7 @@
9522 return sent;
9523 }
9524
9525 -static irqreturn_t handle_input(int irq, void *unused, struct pt_regs *regs)
9526 +static irqreturn_t handle_input(int irq, void *unused)
9527 {
9528 struct xencons_interface *intf = xencons_interface();
9529 XENCONS_RING_IDX cons, prod;
9530 @@ -94,7 +94,7 @@
9531 BUG_ON((prod - cons) > sizeof(intf->in));
9532
9533 while (cons != prod) {
9534 - xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1, regs);
9535 + xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1);
9536 cons++;
9537 }
9538
9539 diff -Naur linux-2.6.25/drivers/xen/core/evtchn.c linux-2.6.25-xen/drivers/xen/core/evtchn.c
9540 --- linux-2.6.25/drivers/xen/core/evtchn.c 2008-05-23 20:51:11.000000000 +0200
9541 +++ linux-2.6.25-xen/drivers/xen/core/evtchn.c 2008-05-23 20:39:03.000000000 +0200
9542 @@ -507,7 +507,7 @@
9543
9544 int bind_caller_port_to_irqhandler(
9545 unsigned int caller_port,
9546 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
9547 + irq_handler_t handler,
9548 unsigned long irqflags,
9549 const char *devname,
9550 void *dev_id)
9551 @@ -530,7 +530,7 @@
9552
9553 int bind_listening_port_to_irqhandler(
9554 unsigned int remote_domain,
9555 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
9556 + irq_handler_t handler,
9557 unsigned long irqflags,
9558 const char *devname,
9559 void *dev_id)
9560 @@ -554,7 +554,7 @@
9561 int bind_interdomain_evtchn_to_irqhandler(
9562 unsigned int remote_domain,
9563 unsigned int remote_port,
9564 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
9565 + irq_handler_t handler,
9566 unsigned long irqflags,
9567 const char *devname,
9568 void *dev_id)
9569 @@ -578,7 +578,7 @@
9570 int bind_virq_to_irqhandler(
9571 unsigned int virq,
9572 unsigned int cpu,
9573 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
9574 + irq_handler_t handler,
9575 unsigned long irqflags,
9576 const char *devname,
9577 void *dev_id)
9578 @@ -602,7 +602,7 @@
9579 int bind_ipi_to_irqhandler(
9580 unsigned int ipi,
9581 unsigned int cpu,
9582 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
9583 + irq_handler_t handler,
9584 unsigned long irqflags,
9585 const char *devname,
9586 void *dev_id)
9587 @@ -687,15 +687,7 @@
9588 return 0;
9589 }
9590
9591 -static void shutdown_dynirq(unsigned int irq)
9592 -{
9593 - int evtchn = evtchn_from_irq(irq);
9594 -
9595 - if (VALID_EVTCHN(evtchn))
9596 - mask_evtchn(evtchn);
9597 -}
9598 -
9599 -static void enable_dynirq(unsigned int irq)
9600 +static void unmask_dynirq(unsigned int irq)
9601 {
9602 int evtchn = evtchn_from_irq(irq);
9603
9604 @@ -703,7 +695,7 @@
9605 unmask_evtchn(evtchn);
9606 }
9607
9608 -static void disable_dynirq(unsigned int irq)
9609 +static void mask_dynirq(unsigned int irq)
9610 {
9611 int evtchn = evtchn_from_irq(irq);
9612
9613 @@ -731,12 +723,12 @@
9614 unmask_evtchn(evtchn);
9615 }
9616
9617 -static struct hw_interrupt_type dynirq_type = {
9618 - .typename = "Dynamic-irq",
9619 +static struct irq_chip dynirq_chip = {
9620 + .name = "Dynamic-irq",
9621 .startup = startup_dynirq,
9622 - .shutdown = shutdown_dynirq,
9623 - .enable = enable_dynirq,
9624 - .disable = disable_dynirq,
9625 + .mask = mask_dynirq,
9626 + .unmask = unmask_dynirq,
9627 + .mask_ack = ack_dynirq,
9628 .ack = ack_dynirq,
9629 .end = end_dynirq,
9630 #ifdef CONFIG_SMP
9631 @@ -820,12 +812,12 @@
9632 irq_info[irq] = IRQ_UNBOUND;
9633 }
9634
9635 -static void enable_pirq(unsigned int irq)
9636 +static void unmask_pirq(unsigned int irq)
9637 {
9638 startup_pirq(irq);
9639 }
9640
9641 -static void disable_pirq(unsigned int irq)
9642 +static void mask_pirq(unsigned int irq)
9643 {
9644 }
9645
9646 @@ -854,12 +846,14 @@
9647 }
9648 }
9649
9650 -static struct hw_interrupt_type pirq_type = {
9651 +static struct irq_chip pirq_chip = {
9652 + .name = "Phys-irq",
9653 .typename = "Phys-irq",
9654 .startup = startup_pirq,
9655 .shutdown = shutdown_pirq,
9656 - .enable = enable_pirq,
9657 - .disable = disable_pirq,
9658 + .mask = mask_pirq,
9659 + .unmask = unmask_pirq,
9660 + .mask_ack = ack_pirq,
9661 .ack = ack_pirq,
9662 .end = end_pirq,
9663 #ifdef CONFIG_SMP
9664 @@ -1043,7 +1037,8 @@
9665 irq_desc[dynirq_to_irq(i)].status = IRQ_DISABLED;
9666 irq_desc[dynirq_to_irq(i)].action = NULL;
9667 irq_desc[dynirq_to_irq(i)].depth = 1;
9668 - irq_desc[dynirq_to_irq(i)].chip = &dynirq_type;
9669 + set_irq_chip_and_handler_name(dynirq_to_irq(i), &dynirq_chip,
9670 + handle_level_irq, "level");
9671 }
9672
9673 /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */
9674 @@ -1059,6 +1054,7 @@
9675 irq_desc[pirq_to_irq(i)].status = IRQ_DISABLED;
9676 irq_desc[pirq_to_irq(i)].action = NULL;
9677 irq_desc[pirq_to_irq(i)].depth = 1;
9678 - irq_desc[pirq_to_irq(i)].chip = &pirq_type;
9679 + set_irq_chip_and_handler_name(pirq_to_irq(i), &pirq_chip,
9680 + handle_level_irq, "level");
9681 }
9682 }
9683 diff -Naur linux-2.6.25/drivers/xen/core/reboot.c linux-2.6.25-xen/drivers/xen/core/reboot.c
9684 --- linux-2.6.25/drivers/xen/core/reboot.c 2008-05-23 20:51:11.000000000 +0200
9685 +++ linux-2.6.25-xen/drivers/xen/core/reboot.c 2008-05-23 20:39:03.000000000 +0200
9686 @@ -13,6 +13,7 @@
9687
9688 #ifdef HAVE_XEN_PLATFORM_COMPAT_H
9689 #include <xen/platform-compat.h>
9690 +#undef handle_sysrq
9691 #endif
9692
9693 MODULE_LICENSE("Dual BSD/GPL");
9694 @@ -203,7 +204,7 @@
9695
9696 #ifdef CONFIG_MAGIC_SYSRQ
9697 if (sysrq_key != '\0')
9698 - handle_sysrq(sysrq_key, NULL, NULL);
9699 + handle_sysrq(sysrq_key, NULL);
9700 #endif
9701 }
9702
9703 diff -Naur linux-2.6.25/drivers/xen/core/smpboot.c linux-2.6.25-xen/drivers/xen/core/smpboot.c
9704 --- linux-2.6.25/drivers/xen/core/smpboot.c 2008-05-23 20:51:11.000000000 +0200
9705 +++ linux-2.6.25-xen/drivers/xen/core/smpboot.c 2008-05-23 20:39:03.000000000 +0200
9706 @@ -25,8 +25,8 @@
9707 #include <xen/cpu_hotplug.h>
9708 #include <xen/xenbus.h>
9709
9710 -extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
9711 -extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
9712 +extern irqreturn_t smp_reschedule_interrupt(int, void *);
9713 +extern irqreturn_t smp_call_function_interrupt(int, void *);
9714
9715 extern int local_setup_timer(unsigned int cpu);
9716 extern void local_teardown_timer(unsigned int cpu);
9717 @@ -66,8 +66,6 @@
9718 #if defined(__i386__)
9719 u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff };
9720 EXPORT_SYMBOL(x86_cpu_to_apicid);
9721 -#elif !defined(CONFIG_X86_IO_APIC)
9722 -unsigned int maxcpus = NR_CPUS;
9723 #endif
9724
9725 void __init prefill_possible_map(void)
9726 diff -Naur linux-2.6.25/drivers/xen/fbfront/xenfb.c linux-2.6.25-xen/drivers/xen/fbfront/xenfb.c
9727 --- linux-2.6.25/drivers/xen/fbfront/xenfb.c 2008-05-23 20:51:11.000000000 +0200
9728 +++ linux-2.6.25-xen/drivers/xen/fbfront/xenfb.c 2008-05-23 20:39:03.000000000 +0200
9729 @@ -523,8 +523,7 @@
9730 .fb_set_par = xenfb_set_par,
9731 };
9732
9733 -static irqreturn_t xenfb_event_handler(int rq, void *dev_id,
9734 - struct pt_regs *regs)
9735 +static irqreturn_t xenfb_event_handler(int rq, void *dev_id)
9736 {
9737 /*
9738 * No in events recognized, simply ignore them all.
9739 diff -Naur linux-2.6.25/drivers/xen/fbfront/xenkbd.c linux-2.6.25-xen/drivers/xen/fbfront/xenkbd.c
9740 --- linux-2.6.25/drivers/xen/fbfront/xenkbd.c 2008-05-23 20:51:11.000000000 +0200
9741 +++ linux-2.6.25-xen/drivers/xen/fbfront/xenkbd.c 2008-05-23 20:39:03.000000000 +0200
9742 @@ -46,7 +46,7 @@
9743 * to do that.
9744 */
9745
9746 -static irqreturn_t input_handler(int rq, void *dev_id, struct pt_regs *regs)
9747 +static irqreturn_t input_handler(int rq, void *dev_id)
9748 {
9749 struct xenkbd_info *info = dev_id;
9750 struct xenkbd_page *page = info->page;
9751 diff -Naur linux-2.6.25/drivers/xen/gntdev/gntdev.c linux-2.6.25-xen/drivers/xen/gntdev/gntdev.c
9752 --- linux-2.6.25/drivers/xen/gntdev/gntdev.c 2008-05-23 20:51:11.000000000 +0200
9753 +++ linux-2.6.25-xen/drivers/xen/gntdev/gntdev.c 2008-05-23 20:39:03.000000000 +0200
9754 @@ -755,9 +755,6 @@
9755 BUG();
9756 }
9757
9758 - /* Copy the existing value of the PTE for returning. */
9759 - copy = *ptep;
9760 -
9761 /* Calculate the grant relating to this PTE. */
9762 slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
9763
9764 @@ -772,6 +769,10 @@
9765 GNTDEV_INVALID_HANDLE &&
9766 !xen_feature(XENFEAT_auto_translated_physmap)) {
9767 /* NOT USING SHADOW PAGE TABLES. */
9768 +
9769 + /* Copy the existing value of the PTE for returning. */
9770 + copy = *ptep;
9771 +
9772 gnttab_set_unmap_op(&op, virt_to_machine(ptep),
9773 GNTMAP_contains_pte,
9774 private_data->grants[slot_index]
9775 @@ -784,7 +785,7 @@
9776 op.status);
9777 } else {
9778 /* USING SHADOW PAGE TABLES. */
9779 - pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
9780 + copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
9781 }
9782
9783 /* Finally, we unmap the grant from kernel space. */
9784 @@ -812,7 +813,7 @@
9785 >> PAGE_SHIFT, INVALID_P2M_ENTRY);
9786
9787 } else {
9788 - pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
9789 + copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
9790 }
9791
9792 return copy;
9793 diff -Naur linux-2.6.25/drivers/xen/Kconfig linux-2.6.25-xen/drivers/xen/Kconfig
9794 --- linux-2.6.25/drivers/xen/Kconfig 2008-05-23 20:51:14.000000000 +0200
9795 +++ linux-2.6.25-xen/drivers/xen/Kconfig 2008-05-23 20:39:03.000000000 +0200
9796 @@ -278,6 +278,9 @@
9797 config HAVE_IRQ_IGNORE_UNHANDLED
9798 def_bool y
9799
9800 +config GENERIC_HARDIRQS_NO__DO_IRQ
9801 + def_bool y
9802 +
9803 config NO_IDLE_HZ
9804 def_bool y
9805
9806 diff -Naur linux-2.6.25/drivers/xen/netback/accel.c linux-2.6.25-xen/drivers/xen/netback/accel.c
9807 --- linux-2.6.25/drivers/xen/netback/accel.c 2008-05-23 20:51:11.000000000 +0200
9808 +++ linux-2.6.25-xen/drivers/xen/netback/accel.c 2008-05-23 20:39:03.000000000 +0200
9809 @@ -65,7 +65,7 @@
9810
9811 if (IS_ERR(eth_name)) {
9812 /* Probably means not present */
9813 - DPRINTK("%s: no match due to xenbus_read accel error %d\n",
9814 + DPRINTK("%s: no match due to xenbus_read accel error %ld\n",
9815 __FUNCTION__, PTR_ERR(eth_name));
9816 return 0;
9817 } else {
9818 diff -Naur linux-2.6.25/drivers/xen/netback/common.h linux-2.6.25-xen/drivers/xen/netback/common.h
9819 --- linux-2.6.25/drivers/xen/netback/common.h 2008-05-23 20:51:11.000000000 +0200
9820 +++ linux-2.6.25-xen/drivers/xen/netback/common.h 2008-05-23 20:39:03.000000000 +0200
9821 @@ -200,7 +200,7 @@
9822
9823 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
9824 struct net_device_stats *netif_be_get_stats(struct net_device *dev);
9825 -irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
9826 +irqreturn_t netif_be_int(int irq, void *dev_id);
9827
9828 static inline int netbk_can_queue(struct net_device *dev)
9829 {
9830 diff -Naur linux-2.6.25/drivers/xen/netback/loopback.c linux-2.6.25-xen/drivers/xen/netback/loopback.c
9831 --- linux-2.6.25/drivers/xen/netback/loopback.c 2008-05-23 20:51:11.000000000 +0200
9832 +++ linux-2.6.25-xen/drivers/xen/netback/loopback.c 2008-05-23 20:39:03.000000000 +0200
9833 @@ -151,7 +151,7 @@
9834 np->stats.rx_bytes += skb->len;
9835 np->stats.rx_packets++;
9836
9837 - if (skb->ip_summed == CHECKSUM_HW) {
9838 + if (skb->ip_summed == CHECKSUM_PARTIAL) {
9839 /* Defer checksum calculation. */
9840 skb->proto_csum_blank = 1;
9841 /* Must be a local packet: assert its integrity. */
9842 diff -Naur linux-2.6.25/drivers/xen/netback/netback.c linux-2.6.25-xen/drivers/xen/netback/netback.c
9843 --- linux-2.6.25/drivers/xen/netback/netback.c 2008-05-23 20:51:11.000000000 +0200
9844 +++ linux-2.6.25-xen/drivers/xen/netback/netback.c 2008-05-23 20:39:03.000000000 +0200
9845 @@ -677,7 +677,7 @@
9846 id = meta[npo.meta_cons].id;
9847 flags = nr_frags ? NETRXF_more_data : 0;
9848
9849 - if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
9850 + if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
9851 flags |= NETRXF_csum_blank | NETRXF_data_validated;
9852 else if (skb->proto_data_valid) /* remote but checksummed? */
9853 flags |= NETRXF_data_validated;
9854 @@ -1441,7 +1441,7 @@
9855 netif_idx_release(netif_page_index(page));
9856 }
9857
9858 -irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
9859 +irqreturn_t netif_be_int(int irq, void *dev_id)
9860 {
9861 netif_t *netif = dev_id;
9862
9863 @@ -1508,7 +1508,7 @@
9864 }
9865
9866 #ifdef NETBE_DEBUG_INTERRUPT
9867 -static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
9868 +static irqreturn_t netif_be_dbg(int irq, void *dev_id)
9869 {
9870 struct list_head *ent;
9871 netif_t *netif;
9872 diff -Naur linux-2.6.25/drivers/xen/netfront/netfront.c linux-2.6.25-xen/drivers/xen/netfront/netfront.c
9873 --- linux-2.6.25/drivers/xen/netfront/netfront.c 2008-05-23 20:51:11.000000000 +0200
9874 +++ linux-2.6.25-xen/drivers/xen/netfront/netfront.c 2008-05-23 20:39:03.000000000 +0200
9875 @@ -136,7 +136,7 @@
9876 {
9877 return skb_is_gso(skb) &&
9878 (!skb_gso_ok(skb, dev->features) ||
9879 - unlikely(skb->ip_summed != CHECKSUM_HW));
9880 + unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
9881 }
9882 #else
9883 #define HAVE_GSO 0
9884 @@ -222,7 +222,7 @@
9885 static void network_alloc_rx_buffers(struct net_device *);
9886 static void send_fake_arp(struct net_device *);
9887
9888 -static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs);
9889 +static irqreturn_t netif_int(int irq, void *dev_id);
9890
9891 #ifdef CONFIG_SYSFS
9892 static int xennet_sysfs_addif(struct net_device *netdev);
9893 @@ -992,7 +992,7 @@
9894 tx->flags = 0;
9895 extra = NULL;
9896
9897 - if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
9898 + if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
9899 tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
9900 #ifdef CONFIG_XEN
9901 if (skb->proto_data_valid) /* remote but checksummed? */
9902 @@ -1049,7 +1049,7 @@
9903 return 0;
9904 }
9905
9906 -static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
9907 +static irqreturn_t netif_int(int irq, void *dev_id)
9908 {
9909 struct net_device *dev = dev_id;
9910 struct netfront_info *np = netdev_priv(dev);
9911 diff -Naur linux-2.6.25/drivers/xen/pciback/pciback.h linux-2.6.25-xen/drivers/xen/pciback/pciback.h
9912 --- linux-2.6.25/drivers/xen/pciback/pciback.h 2008-05-23 20:51:11.000000000 +0200
9913 +++ linux-2.6.25-xen/drivers/xen/pciback/pciback.h 2008-05-23 20:39:03.000000000 +0200
9914 @@ -87,7 +87,7 @@
9915 void pciback_release_devices(struct pciback_device *pdev);
9916
9917 /* Handles events from front-end */
9918 -irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
9919 +irqreturn_t pciback_handle_event(int irq, void *dev_id);
9920 void pciback_do_op(void *data);
9921
9922 int pciback_xenbus_register(void);
9923 diff -Naur linux-2.6.25/drivers/xen/pciback/pciback_ops.c linux-2.6.25-xen/drivers/xen/pciback/pciback_ops.c
9924 --- linux-2.6.25/drivers/xen/pciback/pciback_ops.c 2008-05-23 20:51:11.000000000 +0200
9925 +++ linux-2.6.25-xen/drivers/xen/pciback/pciback_ops.c 2008-05-23 20:39:03.000000000 +0200
9926 @@ -85,7 +85,7 @@
9927 test_and_schedule_op(pdev);
9928 }
9929
9930 -irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
9931 +irqreturn_t pciback_handle_event(int irq, void *dev_id)
9932 {
9933 struct pciback_device *pdev = dev_id;
9934
9935 diff -Naur linux-2.6.25/drivers/xen/pcifront/pci_op.c linux-2.6.25-xen/drivers/xen/pcifront/pci_op.c
9936 --- linux-2.6.25/drivers/xen/pcifront/pci_op.c 2008-05-23 20:51:11.000000000 +0200
9937 +++ linux-2.6.25-xen/drivers/xen/pcifront/pci_op.c 2008-05-23 20:39:03.000000000 +0200
9938 @@ -392,10 +392,16 @@
9939
9940 d = pci_scan_single_device(b, devfn);
9941 if (d) {
9942 + int err;
9943 +
9944 dev_info(&pdev->xdev->dev, "New device on "
9945 "%04x:%02x:%02x.%02x found.\n", domain, bus,
9946 PCI_SLOT(devfn), PCI_FUNC(devfn));
9947 - pci_bus_add_device(d);
9948 + err = pci_bus_add_device(d);
9949 + if (err)
9950 + dev_err(&pdev->xdev->dev,
9951 + "error %d adding device, continuing.\n",
9952 + err);
9953 }
9954 }
9955
9956 diff -Naur linux-2.6.25/drivers/xen/privcmd/compat_privcmd.c linux-2.6.25-xen/drivers/xen/privcmd/compat_privcmd.c
9957 --- linux-2.6.25/drivers/xen/privcmd/compat_privcmd.c 2008-05-23 20:51:11.000000000 +0200
9958 +++ linux-2.6.25-xen/drivers/xen/privcmd/compat_privcmd.c 2008-05-23 20:39:03.000000000 +0200
9959 @@ -18,7 +18,6 @@
9960 * Authors: Jimi Xenidis <jimix@watson.ibm.com>
9961 */
9962
9963 -#include <linux/config.h>
9964 #include <linux/compat.h>
9965 #include <linux/ioctl.h>
9966 #include <linux/syscalls.h>
9967 diff -Naur linux-2.6.25/drivers/xen/privcmd/privcmd.c linux-2.6.25-xen/drivers/xen/privcmd/privcmd.c
9968 --- linux-2.6.25/drivers/xen/privcmd/privcmd.c 2008-05-23 20:51:11.000000000 +0200
9969 +++ linux-2.6.25-xen/drivers/xen/privcmd/privcmd.c 2008-05-23 20:39:03.000000000 +0200
9970 @@ -236,7 +236,7 @@
9971 #endif
9972
9973 /* DONTCOPY is essential for Xen as copy_page_range is broken. */
9974 - vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
9975 + vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTCOPY;
9976 vma->vm_ops = &privcmd_vm_ops;
9977 vma->vm_private_data = NULL;
9978
9979 diff -Naur linux-2.6.25/drivers/xen/sfc_netback/accel_xenbus.c linux-2.6.25-xen/drivers/xen/sfc_netback/accel_xenbus.c
9980 --- linux-2.6.25/drivers/xen/sfc_netback/accel_xenbus.c 2008-05-23 20:51:11.000000000 +0200
9981 +++ linux-2.6.25-xen/drivers/xen/sfc_netback/accel_xenbus.c 2008-05-23 20:39:03.000000000 +0200
9982 @@ -68,8 +68,7 @@
9983
9984
9985 /* Demultiplex a message IRQ from the frontend driver. */
9986 -static irqreturn_t msgirq_from_frontend(int irq, void *context,
9987 - struct pt_regs *unused)
9988 +static irqreturn_t msgirq_from_frontend(int irq, void *context)
9989 {
9990 struct xenbus_device *dev = context;
9991 struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
9992 @@ -84,8 +83,7 @@
9993 * functionally, but we need it to pass to the bind function, and may
9994 * get called spuriously
9995 */
9996 -static irqreturn_t netirq_from_frontend(int irq, void *context,
9997 - struct pt_regs *unused)
9998 +static irqreturn_t netirq_from_frontend(int irq, void *context)
9999 {
10000 VPRINTK("netirq %d from device %s\n", irq,
10001 ((struct xenbus_device *)context)->nodename);
10002 diff -Naur linux-2.6.25/drivers/xen/sfc_netfront/accel.h linux-2.6.25-xen/drivers/xen/sfc_netfront/accel.h
10003 --- linux-2.6.25/drivers/xen/sfc_netfront/accel.h 2008-05-23 20:51:11.000000000 +0200
10004 +++ linux-2.6.25-xen/drivers/xen/sfc_netfront/accel.h 2008-05-23 20:39:03.000000000 +0200
10005 @@ -449,10 +449,8 @@
10006 u32 ip, u16 port, u8 protocol);
10007
10008 /* Process an IRQ received from back end driver */
10009 -irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
10010 - struct pt_regs *unused);
10011 -irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
10012 - struct pt_regs *unused);
10013 +irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context);
10014 +irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context);
10015
10016 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
10017 extern void netfront_accel_msg_from_bend(struct work_struct *context);
10018 diff -Naur linux-2.6.25/drivers/xen/sfc_netfront/accel_msg.c linux-2.6.25-xen/drivers/xen/sfc_netfront/accel_msg.c
10019 --- linux-2.6.25/drivers/xen/sfc_netfront/accel_msg.c 2008-05-23 20:51:11.000000000 +0200
10020 +++ linux-2.6.25-xen/drivers/xen/sfc_netfront/accel_msg.c 2008-05-23 20:39:03.000000000 +0200
10021 @@ -490,8 +490,7 @@
10022 }
10023
10024
10025 -irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
10026 - struct pt_regs *unused)
10027 +irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context)
10028 {
10029 netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
10030 VPRINTK("irq %d from device %s\n", irq, vnic->dev->nodename);
10031 @@ -502,8 +501,7 @@
10032 }
10033
10034 /* Process an interrupt received from the NIC via backend */
10035 -irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
10036 - struct pt_regs *unused)
10037 +irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context)
10038 {
10039 netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
10040 struct net_device *net_dev = vnic->net_dev;
10041 diff -Naur linux-2.6.25/drivers/xen/sfc_netfront/accel_tso.c linux-2.6.25-xen/drivers/xen/sfc_netfront/accel_tso.c
10042 --- linux-2.6.25/drivers/xen/sfc_netfront/accel_tso.c 2008-05-23 20:51:11.000000000 +0200
10043 +++ linux-2.6.25-xen/drivers/xen/sfc_netfront/accel_tso.c 2008-05-23 20:39:03.000000000 +0200
10044 @@ -363,7 +363,7 @@
10045
10046 tso_check_safe(skb);
10047
10048 - if (skb->ip_summed != CHECKSUM_HW)
10049 + if (skb->ip_summed != CHECKSUM_PARTIAL)
10050 EPRINTK("Trying to TSO send a packet without HW checksum\n");
10051
10052 tso_start(&state, skb);
10053 diff -Naur linux-2.6.25/drivers/xen/sfc_netfront/accel_vi.c linux-2.6.25-xen/drivers/xen/sfc_netfront/accel_vi.c
10054 --- linux-2.6.25/drivers/xen/sfc_netfront/accel_vi.c 2008-05-23 20:51:11.000000000 +0200
10055 +++ linux-2.6.25-xen/drivers/xen/sfc_netfront/accel_vi.c 2008-05-23 20:39:03.000000000 +0200
10056 @@ -461,7 +461,7 @@
10057
10058 frag_i = -1;
10059
10060 - if (skb->ip_summed == CHECKSUM_HW) {
10061 + if (skb->ip_summed == CHECKSUM_PARTIAL) {
10062 /* Set to zero to encourage falcon to work it out for us */
10063 *(u16*)(skb->h.raw + skb->csum) = 0;
10064 }
10065 @@ -580,7 +580,7 @@
10066
10067 kva = buf->pkt_kva;
10068
10069 - if (skb->ip_summed == CHECKSUM_HW) {
10070 + if (skb->ip_summed == CHECKSUM_PARTIAL) {
10071 /* Set to zero to encourage falcon to work it out for us */
10072 *(u16*)(skb->h.raw + skb->csum) = 0;
10073 }
10074 diff -Naur linux-2.6.25/drivers/xen/tpmback/common.h linux-2.6.25-xen/drivers/xen/tpmback/common.h
10075 --- linux-2.6.25/drivers/xen/tpmback/common.h 2008-05-23 20:51:11.000000000 +0200
10076 +++ linux-2.6.25-xen/drivers/xen/tpmback/common.h 2008-05-23 20:39:03.000000000 +0200
10077 @@ -61,7 +61,7 @@
10078 void tpmif_xenbus_init(void);
10079 void tpmif_xenbus_exit(void);
10080 int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn);
10081 -irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs);
10082 +irqreturn_t tpmif_be_int(int irq, void *dev_id);
10083
10084 long int tpmback_get_instance(struct backend_info *bi);
10085
10086 diff -Naur linux-2.6.25/drivers/xen/tpmback/tpmback.c linux-2.6.25-xen/drivers/xen/tpmback/tpmback.c
10087 --- linux-2.6.25/drivers/xen/tpmback/tpmback.c 2008-05-23 20:51:11.000000000 +0200
10088 +++ linux-2.6.25-xen/drivers/xen/tpmback/tpmback.c 2008-05-23 20:39:03.000000000 +0200
10089 @@ -502,7 +502,7 @@
10090 list_del(&pak->next);
10091 write_unlock_irqrestore(&dataex.pak_lock, flags);
10092
10093 - DPRINTK("size given by app: %d, available: %d\n", size, left);
10094 + DPRINTK("size given by app: %zu, available: %u\n", size, left);
10095
10096 ret_size = min_t(size_t, size, left);
10097
10098 @@ -899,7 +899,7 @@
10099 }
10100 }
10101
10102 -irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs)
10103 +irqreturn_t tpmif_be_int(int irq, void *dev_id)
10104 {
10105 tpmif_t *tpmif = (tpmif_t *) dev_id;
10106
10107 diff -Naur linux-2.6.25/drivers/xen/xenbus/xenbus_comms.c linux-2.6.25-xen/drivers/xen/xenbus/xenbus_comms.c
10108 --- linux-2.6.25/drivers/xen/xenbus/xenbus_comms.c 2008-05-23 20:51:19.000000000 +0200
10109 +++ linux-2.6.25-xen/drivers/xen/xenbus/xenbus_comms.c 2008-05-23 20:39:03.000000000 +0200
10110 @@ -55,7 +55,7 @@
10111
10112 static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
10113
10114 -static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
10115 +static irqreturn_t wake_waiting(int irq, void *unused)
10116 {
10117 if (unlikely(xenstored_ready == 0)) {
10118 xenstored_ready = 1;
10119 diff -Naur linux-2.6.25/drivers/xen/xenoprof/xenoprofile.c linux-2.6.25-xen/drivers/xen/xenoprof/xenoprofile.c
10120 --- linux-2.6.25/drivers/xen/xenoprof/xenoprofile.c 2008-05-23 20:51:11.000000000 +0200
10121 +++ linux-2.6.25-xen/drivers/xen/xenoprof/xenoprofile.c 2008-05-23 20:39:03.000000000 +0200
10122 @@ -195,7 +195,7 @@
10123 }
10124
10125 static irqreturn_t
10126 -xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
10127 +xenoprof_ovf_interrupt(int irq, void * dev_id)
10128 {
10129 struct xenoprof_buf * buf;
10130 static unsigned long flag;
10131 diff -Naur linux-2.6.25/include/asm-generic/pgtable.h linux-2.6.25-xen/include/asm-generic/pgtable.h
10132 --- linux-2.6.25/include/asm-generic/pgtable.h 2008-05-23 20:51:15.000000000 +0200
10133 +++ linux-2.6.25-xen/include/asm-generic/pgtable.h 2008-05-23 20:39:03.000000000 +0200
10134 @@ -100,7 +100,7 @@
10135 #endif
10136
10137 #ifndef arch_change_pte_range
10138 -#define arch_change_pte_range(mm, pmd, addr, end, newprot) 0
10139 +#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) 0
10140 #endif
10141
10142 #ifndef __HAVE_ARCH_PTE_SAME
10143 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/desc_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/desc_32.h
10144 --- linux-2.6.25/include/asm-x86/mach-xen/asm/desc_32.h 2008-05-23 20:51:11.000000000 +0200
10145 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/desc_32.h 2008-05-23 20:39:03.000000000 +0200
10146 @@ -32,52 +32,110 @@
10147 return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
10148 }
10149
10150 +/*
10151 + * This is the ldt that every process will get unless we need
10152 + * something other than this.
10153 + */
10154 +extern struct desc_struct default_ldt[];
10155 +extern struct desc_struct idt_table[];
10156 +extern void set_intr_gate(unsigned int irq, void * addr);
10157 +
10158 +static inline void pack_descriptor(__u32 *a, __u32 *b,
10159 + unsigned long base, unsigned long limit, unsigned char type, unsigned char flags)
10160 +{
10161 + *a = ((base & 0xffff) << 16) | (limit & 0xffff);
10162 + *b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
10163 + (limit & 0x000f0000) | ((type & 0xff) << 8) | ((flags & 0xf) << 20);
10164 +}
10165 +
10166 +static inline void pack_gate(__u32 *a, __u32 *b,
10167 + unsigned long base, unsigned short seg, unsigned char type, unsigned char flags)
10168 +{
10169 + *a = (seg << 16) | (base & 0xffff);
10170 + *b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff);
10171 +}
10172 +
10173 +#define DESCTYPE_LDT 0x82 /* present, system, DPL-0, LDT */
10174 +#define DESCTYPE_TSS 0x89 /* present, system, DPL-0, 32-bit TSS */
10175 +#define DESCTYPE_TASK 0x85 /* present, system, DPL-0, task gate */
10176 +#define DESCTYPE_INT 0x8e /* present, system, DPL-0, interrupt gate */
10177 +#define DESCTYPE_TRAP 0x8f /* present, system, DPL-0, trap gate */
10178 +#define DESCTYPE_DPL3 0x60 /* DPL-3 */
10179 +#define DESCTYPE_S 0x10 /* !system */
10180 +
10181 #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
10182 #define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8))
10183
10184 #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
10185 #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
10186 -#define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr))
10187 -#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt))
10188 +#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
10189 +#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
10190
10191 #define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr))
10192 #define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
10193 -#define store_tr(tr) __asm__ ("str %0":"=mr" (tr))
10194 -#define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt))
10195 +#define store_tr(tr) __asm__ ("str %0":"=m" (tr))
10196 +#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
10197
10198 -/*
10199 - * This is the ldt that every process will get unless we need
10200 - * something other than this.
10201 - */
10202 -extern struct desc_struct default_ldt[];
10203 -extern void set_intr_gate(unsigned int irq, void * addr);
10204 +#if TLS_SIZE != 24
10205 +# error update this code.
10206 +#endif
10207 +
10208 +static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
10209 +{
10210 +#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \
10211 + *(u64 *)&t->tls_array[i]) \
10212 + BUG()
10213 + C(0); C(1); C(2);
10214 +#undef C
10215 +}
10216
10217 -#define _set_tssldt_desc(n,addr,limit,type) \
10218 -__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
10219 - "movw %w1,2(%2)\n\t" \
10220 - "rorl $16,%1\n\t" \
10221 - "movb %b1,4(%2)\n\t" \
10222 - "movb %4,5(%2)\n\t" \
10223 - "movb $0,6(%2)\n\t" \
10224 - "movb %h1,7(%2)\n\t" \
10225 - "rorl $16,%1" \
10226 - : "=m"(*(n)) : "q" (addr), "r"(n), "ir"(limit), "i"(type))
10227 +#ifndef CONFIG_XEN
10228 +static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
10229 +{
10230 + __u32 *lp = (__u32 *)((char *)dt + entry*8);
10231 + *lp = entry_a;
10232 + *(lp+1) = entry_b;
10233 +}
10234
10235 -#ifndef CONFIG_X86_NO_TSS
10236 -static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr)
10237 +#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
10238 +#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
10239 +#else
10240 +extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
10241 +extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
10242 +#endif
10243 +#ifndef CONFIG_X86_NO_IDT
10244 +#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
10245 +
10246 +static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
10247 {
10248 - _set_tssldt_desc(&get_cpu_gdt_table(cpu)[entry], (int)addr,
10249 - offsetof(struct tss_struct, __cacheline_filler) - 1, 0x89);
10250 + __u32 a, b;
10251 + pack_gate(&a, &b, (unsigned long)addr, seg, type, 0);
10252 + write_idt_entry(idt_table, gate, a, b);
10253 }
10254 +#endif
10255
10256 -#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
10257 +#ifndef CONFIG_X86_NO_TSS
10258 +static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
10259 +{
10260 + __u32 a, b;
10261 + pack_descriptor(&a, &b, (unsigned long)addr,
10262 + offsetof(struct tss_struct, __cacheline_filler) - 1,
10263 + DESCTYPE_TSS, 0);
10264 + write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b);
10265 +}
10266 #endif
10267
10268 -static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
10269 +static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries)
10270 {
10271 - _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
10272 + __u32 a, b;
10273 + pack_descriptor(&a, &b, (unsigned long)addr,
10274 + entries * sizeof(struct desc_struct) - 1,
10275 + DESCTYPE_LDT, 0);
10276 + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
10277 }
10278
10279 +#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
10280 +
10281 #define LDT_entry_a(info) \
10282 ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
10283
10284 @@ -103,21 +161,6 @@
10285 (info)->seg_not_present == 1 && \
10286 (info)->useable == 0 )
10287
10288 -extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
10289 -
10290 -#if TLS_SIZE != 24
10291 -# error update this code.
10292 -#endif
10293 -
10294 -static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
10295 -{
10296 -#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \
10297 - *(u64 *)&t->tls_array[i])) \
10298 - BUG();
10299 - C(0); C(1); C(2);
10300 -#undef C
10301 -}
10302 -
10303 static inline void clear_LDT(void)
10304 {
10305 int cpu = get_cpu();
10306 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/dma-mapping_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/dma-mapping_64.h
10307 --- linux-2.6.25/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2008-05-23 20:51:11.000000000 +0200
10308 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2008-05-23 20:39:03.000000000 +0200
10309 @@ -55,13 +55,6 @@
10310 extern struct dma_mapping_ops* dma_ops;
10311 extern int iommu_merge;
10312
10313 -static inline int valid_dma_direction(int dma_direction)
10314 -{
10315 - return ((dma_direction == DMA_BIDIRECTIONAL) ||
10316 - (dma_direction == DMA_TO_DEVICE) ||
10317 - (dma_direction == DMA_FROM_DEVICE));
10318 -}
10319 -
10320 #if 0
10321 static inline int dma_mapping_error(dma_addr_t dma_addr)
10322 {
10323 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/e820_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/e820_64.h
10324 --- linux-2.6.25/include/asm-x86/mach-xen/asm/e820_64.h 2008-05-23 20:51:11.000000000 +0200
10325 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/e820_64.h 2008-05-23 20:39:03.000000000 +0200
10326 @@ -19,13 +19,9 @@
10327
10328 #define E820_RAM 1
10329 #define E820_RESERVED 2
10330 -#define E820_ACPI 3 /* usable as RAM once ACPI tables have been read */
10331 +#define E820_ACPI 3
10332 #define E820_NVS 4
10333
10334 -#define HIGH_MEMORY (1024*1024)
10335 -
10336 -#define LOWMEMSIZE() (0x9f000)
10337 -
10338 #ifndef __ASSEMBLY__
10339 struct e820entry {
10340 u64 addr; /* start of memory segment */
10341 @@ -46,17 +42,16 @@
10342 extern void contig_e820_setup(void);
10343 extern unsigned long e820_end_of_ram(void);
10344 extern void e820_reserve_resources(struct e820entry *e820, int nr_map);
10345 +extern void e820_mark_nosave_regions(void);
10346 extern void e820_print_map(char *who);
10347 extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
10348 extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
10349
10350 -extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end);
10351 extern void e820_setup_gap(struct e820entry *e820, int nr_map);
10352 -extern unsigned long e820_hole_size(unsigned long start_pfn,
10353 - unsigned long end_pfn);
10354 +extern void e820_register_active_regions(int nid,
10355 + unsigned long start_pfn, unsigned long end_pfn);
10356
10357 -extern void __init parse_memopt(char *p, char **end);
10358 -extern void __init parse_memmapopt(char *p, char **end);
10359 +extern void finish_e820_parsing(void);
10360
10361 extern struct e820map e820;
10362
10363 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/fixmap_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/fixmap_32.h
10364 --- linux-2.6.25/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-05-23 20:51:11.000000000 +0200
10365 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-05-23 20:39:03.000000000 +0200
10366 @@ -55,7 +55,7 @@
10367 #ifdef CONFIG_X86_LOCAL_APIC
10368 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
10369 #endif
10370 -#ifdef CONFIG_X86_IO_APIC
10371 +#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_XEN)
10372 FIX_IO_APIC_BASE_0,
10373 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
10374 #endif
10375 @@ -95,10 +95,9 @@
10376 __end_of_fixed_addresses
10377 };
10378
10379 -extern void set_fixaddr_top(unsigned long top);
10380 -
10381 extern void __set_fixmap(enum fixed_addresses idx,
10382 maddr_t phys, pgprot_t flags);
10383 +extern void reserve_top_address(unsigned long reserve);
10384
10385 #define set_fixmap(idx, phys) \
10386 __set_fixmap(idx, phys, PAGE_KERNEL)
10387 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/fixmap_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/fixmap_64.h
10388 --- linux-2.6.25/include/asm-x86/mach-xen/asm/fixmap_64.h 2008-05-23 20:51:11.000000000 +0200
10389 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/fixmap_64.h 2008-05-23 20:39:03.000000000 +0200
10390 @@ -41,7 +41,7 @@
10391 #ifdef CONFIG_X86_LOCAL_APIC
10392 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
10393 #endif
10394 -#ifdef CONFIG_X86_IO_APIC
10395 +#ifndef CONFIG_XEN
10396 FIX_IO_APIC_BASE_0,
10397 FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
10398 #endif
10399 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/hw_irq_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/hw_irq_32.h
10400 --- linux-2.6.25/include/asm-x86/mach-xen/asm/hw_irq_32.h 2008-05-23 20:51:11.000000000 +0200
10401 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/hw_irq_32.h 2008-05-23 20:39:03.000000000 +0200
10402 @@ -17,8 +17,6 @@
10403 #include <asm/irq.h>
10404 #include <asm/sections.h>
10405
10406 -struct hw_interrupt_type;
10407 -
10408 #define NMI_VECTOR 0x02
10409
10410 /*
10411 @@ -28,10 +26,6 @@
10412 * Interrupt entry/exit code at both C and assembly level
10413 */
10414
10415 -extern u8 irq_vector[NR_IRQ_VECTORS];
10416 -#define IO_APIC_VECTOR(irq) (irq_vector[irq])
10417 -#define AUTO_ASSIGN -1
10418 -
10419 extern void (*interrupt[NR_IRQS])(void);
10420
10421 #ifdef CONFIG_SMP
10422 @@ -44,7 +38,7 @@
10423 fastcall void apic_timer_interrupt(void);
10424 fastcall void error_interrupt(void);
10425 fastcall void spurious_interrupt(void);
10426 -fastcall void thermal_interrupt(struct pt_regs *);
10427 +fastcall void thermal_interrupt(void);
10428 #define platform_legacy_irq(irq) ((irq) < 16)
10429 #endif
10430
10431 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/hw_irq_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/hw_irq_64.h
10432 --- linux-2.6.25/include/asm-x86/mach-xen/asm/hw_irq_64.h 2008-05-23 20:51:11.000000000 +0200
10433 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/hw_irq_64.h 2008-05-23 20:39:03.000000000 +0200
10434 @@ -19,8 +19,7 @@
10435 #include <asm/irq.h>
10436 #include <linux/profile.h>
10437 #include <linux/smp.h>
10438 -
10439 -struct hw_interrupt_type;
10440 +#include <linux/percpu.h>
10441 #endif
10442
10443 #define NMI_VECTOR 0x02
10444 @@ -77,9 +76,10 @@
10445
10446
10447 #ifndef __ASSEMBLY__
10448 -extern u8 irq_vector[NR_IRQ_VECTORS];
10449 -#define IO_APIC_VECTOR(irq) (irq_vector[irq])
10450 -#define AUTO_ASSIGN -1
10451 +typedef int vector_irq_t[NR_VECTORS];
10452 +DECLARE_PER_CPU(vector_irq_t, vector_irq);
10453 +extern void __setup_vector_irq(int cpu);
10454 +extern spinlock_t vector_lock;
10455
10456 /*
10457 * Various low-level irq details needed by irq.c, process.c,
10458 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/io_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/io_32.h
10459 --- linux-2.6.25/include/asm-x86/mach-xen/asm/io_32.h 2008-05-23 20:51:11.000000000 +0200
10460 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/io_32.h 2008-05-23 20:39:03.000000000 +0200
10461 @@ -237,33 +237,6 @@
10462
10463 #define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(b),(c),(d))
10464
10465 -/**
10466 - * check_signature - find BIOS signatures
10467 - * @io_addr: mmio address to check
10468 - * @signature: signature block
10469 - * @length: length of signature
10470 - *
10471 - * Perform a signature comparison with the mmio address io_addr. This
10472 - * address should have been obtained by ioremap.
10473 - * Returns 1 on a match.
10474 - */
10475 -
10476 -static inline int check_signature(volatile void __iomem * io_addr,
10477 - const unsigned char *signature, int length)
10478 -{
10479 - int retval = 0;
10480 - do {
10481 - if (readb(io_addr) != *signature)
10482 - goto out;
10483 - io_addr++;
10484 - signature++;
10485 - length--;
10486 - } while (length);
10487 - retval = 1;
10488 -out:
10489 - return retval;
10490 -}
10491 -
10492 /*
10493 * Cache management
10494 *
10495 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/io_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/io_64.h
10496 --- linux-2.6.25/include/asm-x86/mach-xen/asm/io_64.h 2008-05-23 20:51:11.000000000 +0200
10497 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/io_64.h 2008-05-23 20:39:03.000000000 +0200
10498 @@ -273,33 +273,6 @@
10499
10500 #define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(b),(c),(d))
10501
10502 -/**
10503 - * check_signature - find BIOS signatures
10504 - * @io_addr: mmio address to check
10505 - * @signature: signature block
10506 - * @length: length of signature
10507 - *
10508 - * Perform a signature comparison with the mmio address io_addr. This
10509 - * address should have been obtained by ioremap.
10510 - * Returns 1 on a match.
10511 - */
10512 -
10513 -static inline int check_signature(void __iomem *io_addr,
10514 - const unsigned char *signature, int length)
10515 -{
10516 - int retval = 0;
10517 - do {
10518 - if (readb(io_addr) != *signature)
10519 - goto out;
10520 - io_addr++;
10521 - signature++;
10522 - length--;
10523 - } while (length);
10524 - retval = 1;
10525 -out:
10526 - return retval;
10527 -}
10528 -
10529 /* Nothing to do */
10530
10531 #define dma_cache_inv(_start,_size) do { } while (0)
10532 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable-2level.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable-2level.h
10533 --- linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable-2level.h 2008-05-23 20:51:11.000000000 +0200
10534 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable-2level.h 2008-05-23 20:39:03.000000000 +0200
10535 @@ -23,14 +23,6 @@
10536 set_pte((ptep), (pteval)); \
10537 } while (0)
10538
10539 -#define set_pte_at_sync(_mm,addr,ptep,pteval) do { \
10540 - if (((_mm) != current->mm && (_mm) != &init_mm) || \
10541 - HYPERVISOR_update_va_mapping((addr), (pteval), UVMF_INVLPG)) { \
10542 - set_pte((ptep), (pteval)); \
10543 - xen_invlpg((addr)); \
10544 - } \
10545 -} while (0)
10546 -
10547 #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
10548
10549 #define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval))
10550 @@ -40,6 +32,7 @@
10551
10552 #define pte_none(x) (!(x).pte_low)
10553
10554 +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
10555 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
10556 {
10557 pte_t pte = *ptep;
10558 @@ -51,6 +44,7 @@
10559 return pte;
10560 }
10561
10562 +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
10563 #define ptep_clear_flush(vma, addr, ptep) \
10564 ({ \
10565 pte_t *__ptep = (ptep); \
10566 @@ -66,8 +60,6 @@
10567 __res; \
10568 })
10569
10570 -#define pte_same(a, b) ((a).pte_low == (b).pte_low)
10571 -
10572 #define __pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
10573 #define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \
10574 __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte)))
10575 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable_32.h
10576 --- linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-05-23 20:51:11.000000000 +0200
10577 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-05-23 20:39:03.000000000 +0200
10578 @@ -260,31 +260,89 @@
10579 # include <asm/pgtable-2level.h>
10580 #endif
10581
10582 -#define ptep_test_and_clear_dirty(vma, addr, ptep) \
10583 +/*
10584 + * Rules for using pte_update - it must be called after any PTE update which
10585 + * has not been done using the set_pte / clear_pte interfaces. It is used by
10586 + * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE
10587 + * updates should either be sets, clears, or set_pte_atomic for P->P
10588 + * transitions, which means this hook should only be called for user PTEs.
10589 + * This hook implies a P->P protection or access change has taken place, which
10590 + * requires a subsequent TLB flush. The notification can optionally be delayed
10591 + * until the TLB flush event by using the pte_update_defer form of the
10592 + * interface, but care must be taken to assure that the flush happens while
10593 + * still holding the same page table lock so that the shadow and primary pages
10594 + * do not become out of sync on SMP.
10595 + */
10596 +#define pte_update(mm, addr, ptep) do { } while (0)
10597 +#define pte_update_defer(mm, addr, ptep) do { } while (0)
10598 +
10599 +
10600 +/*
10601 + * We only update the dirty/accessed state if we set
10602 + * the dirty bit by hand in the kernel, since the hardware
10603 + * will do the accessed bit for us, and we don't want to
10604 + * race with other CPU's that might be updating the dirty
10605 + * bit at the same time.
10606 + */
10607 +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
10608 +#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
10609 +do { \
10610 + if (dirty) \
10611 + ptep_establish(vma, address, ptep, entry); \
10612 +} while (0)
10613 +
10614 +/*
10615 + * We don't actually have these, but we want to advertise them so that
10616 + * we can encompass the flush here.
10617 + */
10618 +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
10619 +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
10620 +
10621 +/*
10622 + * Rules for using ptep_establish: the pte MUST be a user pte, and
10623 + * must be a present->present transition.
10624 + */
10625 +#define __HAVE_ARCH_PTEP_ESTABLISH
10626 +#define ptep_establish(vma, address, ptep, pteval) \
10627 +do { \
10628 + if ( likely((vma)->vm_mm == current->mm) ) { \
10629 + BUG_ON(HYPERVISOR_update_va_mapping(address, \
10630 + pteval, \
10631 + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
10632 + UVMF_INVLPG|UVMF_MULTI)); \
10633 + } else { \
10634 + xen_l1_entry_update(ptep, pteval); \
10635 + flush_tlb_page(vma, address); \
10636 + } \
10637 +} while (0)
10638 +
10639 +#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
10640 +#define ptep_clear_flush_dirty(vma, address, ptep) \
10641 ({ \
10642 pte_t __pte = *(ptep); \
10643 - int __ret = pte_dirty(__pte); \
10644 - if (__ret) { \
10645 - __pte = pte_mkclean(__pte); \
10646 - if ((vma)->vm_mm != current->mm || \
10647 - HYPERVISOR_update_va_mapping(addr, __pte, 0)) \
10648 - (ptep)->pte_low = __pte.pte_low; \
10649 - } \
10650 - __ret; \
10651 + int __dirty = pte_dirty(__pte); \
10652 + __pte = pte_mkclean(__pte); \
10653 + if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
10654 + ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
10655 + else if (__dirty) \
10656 + (ptep)->pte_low = __pte.pte_low; \
10657 + __dirty; \
10658 })
10659
10660 -#define ptep_test_and_clear_young(vma, addr, ptep) \
10661 +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
10662 +#define ptep_clear_flush_young(vma, address, ptep) \
10663 ({ \
10664 pte_t __pte = *(ptep); \
10665 - int __ret = pte_young(__pte); \
10666 - if (__ret) \
10667 - __pte = pte_mkold(__pte); \
10668 - if ((vma)->vm_mm != current->mm || \
10669 - HYPERVISOR_update_va_mapping(addr, __pte, 0)) \
10670 - (ptep)->pte_low = __pte.pte_low; \
10671 - __ret; \
10672 + int __young = pte_young(__pte); \
10673 + __pte = pte_mkold(__pte); \
10674 + if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
10675 + ptep_set_access_flags(vma, address, ptep, __pte, __young); \
10676 + else if (__young) \
10677 + (ptep)->pte_low = __pte.pte_low; \
10678 + __young; \
10679 })
10680
10681 +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
10682 #define ptep_get_and_clear_full(mm, addr, ptep, full) \
10683 ((full) ? ({ \
10684 pte_t __res = *(ptep); \
10685 @@ -296,6 +354,7 @@
10686 }) : \
10687 ptep_get_and_clear(mm, addr, ptep))
10688
10689 +#define __HAVE_ARCH_PTEP_SET_WRPROTECT
10690 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
10691 {
10692 pte_t pte = *ptep;
10693 @@ -391,11 +450,11 @@
10694 #define pte_index(address) \
10695 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
10696 #define pte_offset_kernel(dir, address) \
10697 - ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address))
10698 + ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
10699
10700 #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
10701
10702 -#define pmd_page_kernel(pmd) \
10703 +#define pmd_page_vaddr(pmd) \
10704 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
10705
10706 /*
10707 @@ -418,8 +477,6 @@
10708 static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
10709 #endif
10710
10711 -extern void noexec_setup(const char *str);
10712 -
10713 #if defined(CONFIG_HIGHPTE)
10714 #define pte_offset_map(dir, address) \
10715 ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \
10716 @@ -437,37 +494,17 @@
10717 #define pte_unmap_nested(pte) do { } while (0)
10718 #endif
10719
10720 -#define __HAVE_ARCH_PTEP_ESTABLISH
10721 -#define ptep_establish(vma, address, ptep, pteval) \
10722 - do { \
10723 - if ( likely((vma)->vm_mm == current->mm) ) { \
10724 - BUG_ON(HYPERVISOR_update_va_mapping(address, \
10725 - pteval, \
10726 - (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
10727 - UVMF_INVLPG|UVMF_MULTI)); \
10728 - } else { \
10729 - xen_l1_entry_update(ptep, pteval); \
10730 - flush_tlb_page(vma, address); \
10731 - } \
10732 - } while (0)
10733 +/* Clear a kernel PTE and flush it from the TLB */
10734 +#define kpte_clear_flush(ptep, vaddr) do { \
10735 + if (HYPERVISOR_update_va_mapping(vaddr, __pte(0), UVMF_INVLPG)) \
10736 + BUG(); \
10737 +} while (0)
10738
10739 /*
10740 * The i386 doesn't have any external MMU info: the kernel page
10741 * tables contain all the necessary information.
10742 - *
10743 - * Also, we only update the dirty/accessed state if we set
10744 - * the dirty bit by hand in the kernel, since the hardware
10745 - * will do the accessed bit for us, and we don't want to
10746 - * race with other CPU's that might be updating the dirty
10747 - * bit at the same time.
10748 */
10749 #define update_mmu_cache(vma,address,pte) do { } while (0)
10750 -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
10751 -#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
10752 - do { \
10753 - if (dirty) \
10754 - ptep_establish(vma, address, ptep, entry); \
10755 - } while (0)
10756
10757 #include <xen/features.h>
10758 void make_lowmem_page_readonly(void *va, unsigned int feature);
10759 @@ -516,10 +553,11 @@
10760 unsigned long size);
10761
10762 int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
10763 - unsigned long addr, unsigned long end, pgprot_t newprot);
10764 + unsigned long addr, unsigned long end, pgprot_t newprot,
10765 + int dirty_accountable);
10766
10767 -#define arch_change_pte_range(mm, pmd, addr, end, newprot) \
10768 - xen_change_pte_range(mm, pmd, addr, end, newprot)
10769 +#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
10770 + xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
10771
10772 #define io_remap_pfn_range(vma,from,pfn,size,prot) \
10773 direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
10774 @@ -528,13 +566,6 @@
10775 #define GET_IOSPACE(pfn) 0
10776 #define GET_PFN(pfn) (pfn)
10777
10778 -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
10779 -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
10780 -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
10781 -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
10782 -#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
10783 -#define __HAVE_ARCH_PTEP_SET_WRPROTECT
10784 -#define __HAVE_ARCH_PTE_SAME
10785 #include <asm-generic/pgtable.h>
10786
10787 #endif /* _I386_PGTABLE_H */
10788 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable-3level.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable-3level.h
10789 --- linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-05-23 20:51:11.000000000 +0200
10790 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-05-23 20:39:03.000000000 +0200
10791 @@ -53,7 +53,6 @@
10792 * not possible, use pte_get_and_clear to obtain the old pte
10793 * value and then use set_pte to update it. -ben
10794 */
10795 -#define __HAVE_ARCH_SET_PTE_ATOMIC
10796
10797 static inline void set_pte(pte_t *ptep, pte_t pte)
10798 {
10799 @@ -70,14 +69,6 @@
10800 set_pte((ptep), (pteval)); \
10801 } while (0)
10802
10803 -#define set_pte_at_sync(_mm,addr,ptep,pteval) do { \
10804 - if (((_mm) != current->mm && (_mm) != &init_mm) || \
10805 - HYPERVISOR_update_va_mapping((addr), (pteval), UVMF_INVLPG)) { \
10806 - set_pte((ptep), (pteval)); \
10807 - xen_invlpg((addr)); \
10808 - } \
10809 -} while (0)
10810 -
10811 #define set_pmd(pmdptr,pmdval) \
10812 xen_l2_entry_update((pmdptr), (pmdval))
10813 #define set_pud(pudptr,pudval) \
10814 @@ -94,7 +85,7 @@
10815 #define pud_page(pud) \
10816 ((struct page *) __va(pud_val(pud) & PAGE_MASK))
10817
10818 -#define pud_page_kernel(pud) \
10819 +#define pud_page_vaddr(pud) \
10820 ((unsigned long) __va(pud_val(pud) & PAGE_MASK))
10821
10822
10823 @@ -124,6 +115,7 @@
10824
10825 #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
10826
10827 +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
10828 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
10829 {
10830 pte_t pte = *ptep;
10831 @@ -142,6 +134,7 @@
10832 return pte;
10833 }
10834
10835 +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
10836 #define ptep_clear_flush(vma, addr, ptep) \
10837 ({ \
10838 pte_t *__ptep = (ptep); \
10839 @@ -159,6 +152,7 @@
10840 __res; \
10841 })
10842
10843 +#define __HAVE_ARCH_PTE_SAME
10844 static inline int pte_same(pte_t a, pte_t b)
10845 {
10846 return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
10847 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable_64.h
10848 --- linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-05-23 20:51:11.000000000 +0200
10849 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-05-23 20:39:03.000000000 +0200
10850 @@ -43,12 +43,9 @@
10851
10852 #define swapper_pg_dir init_level4_pgt
10853
10854 -extern int nonx_setup(char *str);
10855 extern void paging_init(void);
10856 extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
10857
10858 -extern unsigned long pgkern_mask;
10859 -
10860 /*
10861 * ZERO_PAGE is a global shared page that is always zero: used
10862 * for zero-mapped memory areas etc..
10863 @@ -118,9 +115,6 @@
10864 set_pgd(__user_pgd(pgd), __pgd(0));
10865 }
10866
10867 -#define pud_page(pud) \
10868 - ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
10869 -
10870 #define pte_same(a, b) ((a).pte == (b).pte)
10871
10872 #define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK))
10873 @@ -332,7 +326,7 @@
10874 #define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT)
10875 static inline int pte_user(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
10876 static inline int pte_read(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
10877 -static inline int pte_exec(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
10878 +static inline int pte_exec(pte_t pte) { return !(__pte_val(pte) & _PAGE_NX); }
10879 static inline int pte_dirty(pte_t pte) { return __pte_val(pte) & _PAGE_DIRTY; }
10880 static inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED; }
10881 static inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; }
10882 @@ -345,29 +339,12 @@
10883 static inline pte_t pte_mkold(pte_t pte) { __pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
10884 static inline pte_t pte_wrprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_RW; return pte; }
10885 static inline pte_t pte_mkread(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
10886 -static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
10887 +static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) &= ~_PAGE_NX; return pte; }
10888 static inline pte_t pte_mkdirty(pte_t pte) { __pte_val(pte) |= _PAGE_DIRTY; return pte; }
10889 static inline pte_t pte_mkyoung(pte_t pte) { __pte_val(pte) |= _PAGE_ACCESSED; return pte; }
10890 static inline pte_t pte_mkwrite(pte_t pte) { __pte_val(pte) |= _PAGE_RW; return pte; }
10891 static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; }
10892 -
10893 -#define ptep_test_and_clear_dirty(vma, addr, ptep) \
10894 -({ \
10895 - pte_t __pte = *(ptep); \
10896 - int __ret = pte_dirty(__pte); \
10897 - if (__ret) \
10898 - set_pte_at((vma)->vm_mm, addr, ptep, pte_mkclean(__pte)); \
10899 - __ret; \
10900 -})
10901 -
10902 -#define ptep_test_and_clear_young(vma, addr, ptep) \
10903 -({ \
10904 - pte_t __pte = *(ptep); \
10905 - int __ret = pte_young(__pte); \
10906 - if (__ret) \
10907 - set_pte_at((vma)->vm_mm, addr, ptep, pte_mkold(__pte)); \
10908 - __ret; \
10909 -})
10910 +static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; }
10911
10912 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
10913 {
10914 @@ -395,7 +372,8 @@
10915 * Level 4 access.
10916 * Never use these in the common code.
10917 */
10918 -#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
10919 +#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
10920 +#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT))
10921 #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
10922 #define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
10923 #define pgd_offset_k(address) (init_level4_pgt + pgd_index(address))
10924 @@ -404,16 +382,18 @@
10925
10926 /* PUD - Level3 access */
10927 /* to find an entry in a page-table-directory. */
10928 +#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
10929 +#define pud_page(pud) (pfn_to_page(pud_val(pud) >> PAGE_SHIFT))
10930 #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
10931 -#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address))
10932 +#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address))
10933 #define pud_present(pud) (__pud_val(pud) & _PAGE_PRESENT)
10934
10935 /* PMD - Level 2 access */
10936 -#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
10937 +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
10938 #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
10939
10940 #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
10941 -#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
10942 +#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \
10943 pmd_index(address))
10944 #define pmd_none(x) (!__pmd_val(x))
10945 #if CONFIG_XEN_COMPAT <= 0x030002
10946 @@ -444,6 +424,7 @@
10947 {
10948 unsigned long pteval;
10949 pteval = physpage | pgprot_val(pgprot);
10950 + pteval &= __supported_pte_mask;
10951 return __pte(pteval);
10952 }
10953
10954 @@ -465,7 +446,7 @@
10955
10956 #define pte_index(address) \
10957 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
10958 -#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
10959 +#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
10960 pte_index(address))
10961
10962 /* x86-64 always has all page tables mapped. */
10963 @@ -506,6 +487,40 @@
10964 ptep_establish(vma, address, ptep, entry); \
10965 } while (0)
10966
10967 +
10968 +/*
10969 + * i386 says: We don't actually have these, but we want to advertise
10970 + * them so that we can encompass the flush here.
10971 + */
10972 +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
10973 +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
10974 +
10975 +#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
10976 +#define ptep_clear_flush_dirty(vma, address, ptep) \
10977 +({ \
10978 + pte_t __pte = *(ptep); \
10979 + int __dirty = pte_dirty(__pte); \
10980 + __pte = pte_mkclean(__pte); \
10981 + if ((vma)->vm_mm->context.pinned) \
10982 + ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
10983 + else if (__dirty) \
10984 + set_pte(ptep, __pte); \
10985 + __dirty; \
10986 +})
10987 +
10988 +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
10989 +#define ptep_clear_flush_young(vma, address, ptep) \
10990 +({ \
10991 + pte_t __pte = *(ptep); \
10992 + int __young = pte_young(__pte); \
10993 + __pte = pte_mkold(__pte); \
10994 + if ((vma)->vm_mm->context.pinned) \
10995 + ptep_set_access_flags(vma, address, ptep, __pte, __young); \
10996 + else if (__young) \
10997 + set_pte(ptep, __pte); \
10998 + __young; \
10999 +})
11000 +
11001 /* Encode and de-code a swap entry */
11002 #define __swp_type(x) (((x).val >> 1) & 0x3f)
11003 #define __swp_offset(x) ((x).val >> 8)
11004 @@ -547,10 +562,11 @@
11005 unsigned long size);
11006
11007 int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
11008 - unsigned long addr, unsigned long end, pgprot_t newprot);
11009 + unsigned long addr, unsigned long end, pgprot_t newprot,
11010 + int dirty_accountable);
11011
11012 -#define arch_change_pte_range(mm, pmd, addr, end, newprot) \
11013 - xen_change_pte_range(mm, pmd, addr, end, newprot)
11014 +#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
11015 + xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
11016
11017 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
11018 direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
11019 @@ -572,8 +588,6 @@
11020 #define kc_offset_to_vaddr(o) \
11021 (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
11022
11023 -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
11024 -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
11025 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
11026 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
11027 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
11028 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/processor_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/processor_32.h
11029 --- linux-2.6.25/include/asm-x86/mach-xen/asm/processor_32.h 2008-05-23 20:51:22.000000000 +0200
11030 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/processor_32.h 2008-05-23 20:39:03.000000000 +0200
11031 @@ -146,6 +146,18 @@
11032 #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
11033 #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
11034
11035 +static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
11036 + unsigned int *ecx, unsigned int *edx)
11037 +{
11038 + /* ecx is often an input as well as an output. */
11039 + __asm__(XEN_CPUID
11040 + : "=a" (*eax),
11041 + "=b" (*ebx),
11042 + "=c" (*ecx),
11043 + "=d" (*edx)
11044 + : "0" (*eax), "2" (*ecx));
11045 +}
11046 +
11047 /*
11048 * Generic CPUID function
11049 * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
11050 @@ -153,24 +165,18 @@
11051 */
11052 static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
11053 {
11054 - __asm__(XEN_CPUID
11055 - : "=a" (*eax),
11056 - "=b" (*ebx),
11057 - "=c" (*ecx),
11058 - "=d" (*edx)
11059 - : "0" (op), "c"(0));
11060 + *eax = op;
11061 + *ecx = 0;
11062 + __cpuid(eax, ebx, ecx, edx);
11063 }
11064
11065 /* Some CPUID calls want 'count' to be placed in ecx */
11066 static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
11067 - int *edx)
11068 + int *edx)
11069 {
11070 - __asm__(XEN_CPUID
11071 - : "=a" (*eax),
11072 - "=b" (*ebx),
11073 - "=c" (*ecx),
11074 - "=d" (*edx)
11075 - : "0" (op), "c" (count));
11076 + *eax = op;
11077 + *ecx = count;
11078 + __cpuid(eax, ebx, ecx, edx);
11079 }
11080
11081 /*
11082 @@ -178,42 +184,30 @@
11083 */
11084 static inline unsigned int cpuid_eax(unsigned int op)
11085 {
11086 - unsigned int eax;
11087 + unsigned int eax, ebx, ecx, edx;
11088
11089 - __asm__(XEN_CPUID
11090 - : "=a" (eax)
11091 - : "0" (op)
11092 - : "bx", "cx", "dx");
11093 + cpuid(op, &eax, &ebx, &ecx, &edx);
11094 return eax;
11095 }
11096 static inline unsigned int cpuid_ebx(unsigned int op)
11097 {
11098 - unsigned int eax, ebx;
11099 + unsigned int eax, ebx, ecx, edx;
11100
11101 - __asm__(XEN_CPUID
11102 - : "=a" (eax), "=b" (ebx)
11103 - : "0" (op)
11104 - : "cx", "dx" );
11105 + cpuid(op, &eax, &ebx, &ecx, &edx);
11106 return ebx;
11107 }
11108 static inline unsigned int cpuid_ecx(unsigned int op)
11109 {
11110 - unsigned int eax, ecx;
11111 + unsigned int eax, ebx, ecx, edx;
11112
11113 - __asm__(XEN_CPUID
11114 - : "=a" (eax), "=c" (ecx)
11115 - : "0" (op)
11116 - : "bx", "dx" );
11117 + cpuid(op, &eax, &ebx, &ecx, &edx);
11118 return ecx;
11119 }
11120 static inline unsigned int cpuid_edx(unsigned int op)
11121 {
11122 - unsigned int eax, edx;
11123 + unsigned int eax, ebx, ecx, edx;
11124
11125 - __asm__(XEN_CPUID
11126 - : "=a" (eax), "=d" (edx)
11127 - : "0" (op)
11128 - : "bx", "cx");
11129 + cpuid(op, &eax, &ebx, &ecx, &edx);
11130 return edx;
11131 }
11132
11133 @@ -315,6 +309,8 @@
11134 : :"a" (eax), "c" (ecx));
11135 }
11136
11137 +extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
11138 +
11139 /* from system description table in BIOS. Mostly for MCA use, but
11140 others may find it useful. */
11141 extern unsigned int machine_id;
11142 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/processor_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/processor_64.h
11143 --- linux-2.6.25/include/asm-x86/mach-xen/asm/processor_64.h 2008-05-23 20:51:11.000000000 +0200
11144 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/processor_64.h 2008-05-23 20:39:03.000000000 +0200
11145 @@ -484,6 +484,8 @@
11146 : :"a" (eax), "c" (ecx));
11147 }
11148
11149 +extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
11150 +
11151 #define stack_current() \
11152 ({ \
11153 struct thread_info *ti; \
11154 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/segment_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/segment_32.h
11155 --- linux-2.6.25/include/asm-x86/mach-xen/asm/segment_32.h 2008-05-23 20:51:11.000000000 +0200
11156 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/segment_32.h 2008-05-23 20:39:03.000000000 +0200
11157 @@ -61,11 +61,9 @@
11158
11159 #define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0)
11160 #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
11161 -#define GET_KERNEL_CS() (__KERNEL_CS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) )
11162
11163 #define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1)
11164 #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
11165 -#define GET_KERNEL_DS() (__KERNEL_DS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) )
11166
11167 #define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4)
11168 #define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5)
11169 @@ -85,6 +83,11 @@
11170
11171 #define GDT_SIZE (GDT_ENTRIES * 8)
11172
11173 +/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
11174 +#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
11175 +/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
11176 +#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
11177 +
11178 /* Simple and small GDT entries for booting only */
11179
11180 #define GDT_ENTRY_BOOT_CS 2
11181 @@ -114,4 +117,16 @@
11182 */
11183 #define IDT_ENTRIES 256
11184
11185 +/* Bottom two bits of selector give the ring privilege level */
11186 +#define SEGMENT_RPL_MASK 0x3
11187 +/* Bit 2 is table indicator (LDT/GDT) */
11188 +#define SEGMENT_TI_MASK 0x4
11189 +
11190 +/* User mode is privilege level 3 */
11191 +#define USER_RPL 0x3
11192 +/* LDT segment has TI set, GDT has it cleared */
11193 +#define SEGMENT_LDT 0x4
11194 +#define SEGMENT_GDT 0x0
11195 +
11196 +#define get_kernel_rpl() (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1)
11197 #endif
11198 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/smp_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/smp_32.h
11199 --- linux-2.6.25/include/asm-x86/mach-xen/asm/smp_32.h 2008-05-23 20:51:11.000000000 +0200
11200 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/smp_32.h 2008-05-23 20:39:03.000000000 +0200
11201 @@ -79,25 +79,36 @@
11202 return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
11203 }
11204 #endif
11205 -
11206 -static __inline int logical_smp_processor_id(void)
11207 -{
11208 - /* we don't want to mark this access volatile - bad code generation */
11209 - return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
11210 -}
11211 -
11212 #endif
11213
11214 +#define safe_smp_processor_id() smp_processor_id()
11215 extern int __cpu_disable(void);
11216 extern void __cpu_die(unsigned int cpu);
11217 extern void prefill_possible_map(void);
11218 +extern unsigned int num_processors;
11219 +
11220 #endif /* !__ASSEMBLY__ */
11221
11222 #else /* CONFIG_SMP */
11223
11224 +#define safe_smp_processor_id() 0
11225 #define cpu_physical_id(cpu) boot_cpu_physical_apicid
11226
11227 #define NO_PROC_ID 0xFF /* No processor magic marker */
11228
11229 #endif
11230 +
11231 +#ifndef __ASSEMBLY__
11232 +
11233 +extern u8 apicid_2_node[];
11234 +
11235 +#ifdef CONFIG_X86_LOCAL_APIC
11236 +static __inline int logical_smp_processor_id(void)
11237 +{
11238 + /* we don't want to mark this access volatile - bad code generation */
11239 + return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
11240 +}
11241 +#endif
11242 +#endif
11243 +
11244 #endif
11245 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/smp_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/smp_64.h
11246 --- linux-2.6.25/include/asm-x86/mach-xen/asm/smp_64.h 2008-05-23 20:51:11.000000000 +0200
11247 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/smp_64.h 2008-05-23 20:39:03.000000000 +0200
11248 @@ -4,15 +4,12 @@
11249 /*
11250 * We need the APIC definitions automatically as part of 'smp.h'
11251 */
11252 -#ifndef __ASSEMBLY__
11253 #include <linux/threads.h>
11254 #include <linux/cpumask.h>
11255 #include <linux/bitops.h>
11256 extern int disable_apic;
11257 -#endif
11258
11259 #ifdef CONFIG_X86_LOCAL_APIC
11260 -#ifndef __ASSEMBLY__
11261 #include <asm/fixmap.h>
11262 #include <asm/mpspec.h>
11263 #ifdef CONFIG_X86_IO_APIC
11264 @@ -21,10 +18,8 @@
11265 #include <asm/apic.h>
11266 #include <asm/thread_info.h>
11267 #endif
11268 -#endif
11269
11270 #ifdef CONFIG_SMP
11271 -#ifndef ASSEMBLY
11272
11273 #include <asm/pda.h>
11274
11275 @@ -41,14 +36,11 @@
11276
11277 extern void smp_alloc_memory(void);
11278 extern volatile unsigned long smp_invalidate_needed;
11279 -extern int pic_mode;
11280 extern void lock_ipi_call_lock(void);
11281 extern void unlock_ipi_call_lock(void);
11282 extern int smp_num_siblings;
11283 extern void smp_send_reschedule(int cpu);
11284 void smp_stop_cpu(void);
11285 -extern int smp_call_function_single(int cpuid, void (*func) (void *info),
11286 - void *info, int retry, int wait);
11287
11288 extern cpumask_t cpu_sibling_map[NR_CPUS];
11289 extern cpumask_t cpu_core_map[NR_CPUS];
11290 @@ -77,20 +69,16 @@
11291 }
11292 #endif
11293
11294 -extern int safe_smp_processor_id(void);
11295 extern int __cpu_disable(void);
11296 extern void __cpu_die(unsigned int cpu);
11297 extern void prefill_possible_map(void);
11298 extern unsigned num_processors;
11299 extern unsigned disabled_cpus;
11300
11301 -#endif /* !ASSEMBLY */
11302 -
11303 #define NO_PROC_ID 0xFF /* No processor magic marker */
11304
11305 #endif
11306
11307 -#ifndef ASSEMBLY
11308 /*
11309 * Some lowlevel functions might want to know about
11310 * the real APIC ID <-> CPU # mapping.
11311 @@ -114,11 +102,8 @@
11312 }
11313 #endif
11314
11315 -#endif /* !ASSEMBLY */
11316 -
11317 #ifndef CONFIG_SMP
11318 #define stack_smp_processor_id() 0
11319 -#define safe_smp_processor_id() 0
11320 #define cpu_logical_map(x) (x)
11321 #else
11322 #include <asm/thread_info.h>
11323 @@ -130,7 +115,6 @@
11324 })
11325 #endif
11326
11327 -#ifndef __ASSEMBLY__
11328 #ifdef CONFIG_X86_LOCAL_APIC
11329 static __inline int logical_smp_processor_id(void)
11330 {
11331 @@ -138,13 +122,18 @@
11332 return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
11333 }
11334 #endif
11335 -#endif
11336
11337 #ifdef CONFIG_SMP
11338 #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
11339 #else
11340 #define cpu_physical_id(cpu) boot_cpu_id
11341 -#endif
11342 -
11343 +static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
11344 + void *info, int retry, int wait)
11345 +{
11346 + /* Disable interrupts here? */
11347 + func(info);
11348 + return 0;
11349 +}
11350 +#endif /* !CONFIG_SMP */
11351 #endif
11352
11353 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/system_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/system_32.h
11354 --- linux-2.6.25/include/asm-x86/mach-xen/asm/system_32.h 2008-05-23 20:51:11.000000000 +0200
11355 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/system_32.h 2008-05-23 20:39:03.000000000 +0200
11356 @@ -267,6 +267,9 @@
11357 #define cmpxchg(ptr,o,n)\
11358 ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
11359 (unsigned long)(n),sizeof(*(ptr))))
11360 +#define sync_cmpxchg(ptr,o,n)\
11361 + ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
11362 + (unsigned long)(n),sizeof(*(ptr))))
11363 #endif
11364
11365 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
11366 @@ -296,6 +299,39 @@
11367 return old;
11368 }
11369
11370 +/*
11371 + * Always use locked operations when touching memory shared with a
11372 + * hypervisor, since the system may be SMP even if the guest kernel
11373 + * isn't.
11374 + */
11375 +static inline unsigned long __sync_cmpxchg(volatile void *ptr,
11376 + unsigned long old,
11377 + unsigned long new, int size)
11378 +{
11379 + unsigned long prev;
11380 + switch (size) {
11381 + case 1:
11382 + __asm__ __volatile__("lock; cmpxchgb %b1,%2"
11383 + : "=a"(prev)
11384 + : "q"(new), "m"(*__xg(ptr)), "0"(old)
11385 + : "memory");
11386 + return prev;
11387 + case 2:
11388 + __asm__ __volatile__("lock; cmpxchgw %w1,%2"
11389 + : "=a"(prev)
11390 + : "r"(new), "m"(*__xg(ptr)), "0"(old)
11391 + : "memory");
11392 + return prev;
11393 + case 4:
11394 + __asm__ __volatile__("lock; cmpxchgl %1,%2"
11395 + : "=a"(prev)
11396 + : "r"(new), "m"(*__xg(ptr)), "0"(old)
11397 + : "memory");
11398 + return prev;
11399 + }
11400 + return old;
11401 +}
11402 +
11403 #ifndef CONFIG_X86_CMPXCHG
11404 /*
11405 * Building a kernel capable running on 80386. It may be necessary to
11406 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/system_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/system_64.h
11407 --- linux-2.6.25/include/asm-x86/mach-xen/asm/system_64.h 2008-05-23 20:51:11.000000000 +0200
11408 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/system_64.h 2008-05-23 20:39:03.000000000 +0200
11409 @@ -24,6 +24,7 @@
11410 #define __EXTRA_CLOBBER \
11411 ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15"
11412
11413 +/* Save restore flags to clear handle leaking NT */
11414 #define switch_to(prev,next,last) \
11415 asm volatile(SAVE_CONTEXT \
11416 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
11417 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/tlbflush_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/tlbflush_32.h
11418 --- linux-2.6.25/include/asm-x86/mach-xen/asm/tlbflush_32.h 2008-05-23 20:51:11.000000000 +0200
11419 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/tlbflush_32.h 2008-05-23 20:39:03.000000000 +0200
11420 @@ -8,8 +8,6 @@
11421 #define __flush_tlb_global() xen_tlb_flush()
11422 #define __flush_tlb_all() xen_tlb_flush()
11423
11424 -extern unsigned long pgkern_mask;
11425 -
11426 #define cpu_has_invlpg (boot_cpu_data.x86 > 3)
11427
11428 #define __flush_tlb_single(addr) xen_invlpg(addr)
11429 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/tlbflush_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/tlbflush_64.h
11430 --- linux-2.6.25/include/asm-x86/mach-xen/asm/tlbflush_64.h 2008-05-23 20:51:11.000000000 +0200
11431 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/tlbflush_64.h 2008-05-23 20:39:03.000000000 +0200
11432 @@ -12,9 +12,6 @@
11433 */
11434 #define __flush_tlb_global() xen_tlb_flush()
11435
11436 -
11437 -extern unsigned long pgkern_mask;
11438 -
11439 #define __flush_tlb_all() __flush_tlb_global()
11440
11441 #define __flush_tlb_one(addr) xen_invlpg((unsigned long)addr)
11442 diff -Naur linux-2.6.25/include/asm-x86/thread_info_64.h linux-2.6.25-xen/include/asm-x86/thread_info_64.h
11443 --- linux-2.6.25/include/asm-x86/thread_info_64.h 2008-04-17 04:49:44.000000000 +0200
11444 +++ linux-2.6.25-xen/include/asm-x86/thread_info_64.h 2008-05-23 20:39:03.000000000 +0200
11445 @@ -157,10 +157,14 @@
11446 (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
11447
11448 /* flags to check in __switch_to() */
11449 +#ifndef CONFIG_XEN
11450 #define _TIF_WORK_CTXSW \
11451 (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS)
11452 #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
11453 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
11454 +#else
11455 +#define _TIF_WORK_CTXSW _TIF_DEBUG
11456 +#endif
11457
11458 #define PREEMPT_ACTIVE 0x10000000
11459
11460 diff -Naur linux-2.6.25/include/linux/skbuff.h linux-2.6.25-xen/include/linux/skbuff.h
11461 --- linux-2.6.25/include/linux/skbuff.h 2008-05-23 20:51:15.000000000 +0200
11462 +++ linux-2.6.25-xen/include/linux/skbuff.h 2008-05-23 20:39:03.000000000 +0200
11463 @@ -1821,5 +1821,12 @@
11464 }
11465
11466 bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
11467 +
11468 +#ifdef CONFIG_XEN
11469 +int skb_checksum_setup(struct sk_buff *skb);
11470 +#else
11471 +static inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
11472 +#endif
11473 +
11474 #endif /* __KERNEL__ */
11475 #endif /* _LINUX_SKBUFF_H */
11476 diff -Naur linux-2.6.25/include/xen/evtchn.h linux-2.6.25-xen/include/xen/evtchn.h
11477 --- linux-2.6.25/include/xen/evtchn.h 2008-05-23 20:51:11.000000000 +0200
11478 +++ linux-2.6.25-xen/include/xen/evtchn.h 2008-05-23 20:39:03.000000000 +0200
11479 @@ -54,34 +54,34 @@
11480 */
11481 int bind_caller_port_to_irqhandler(
11482 unsigned int caller_port,
11483 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
11484 + irq_handler_t handler,
11485 unsigned long irqflags,
11486 const char *devname,
11487 void *dev_id);
11488 int bind_listening_port_to_irqhandler(
11489 unsigned int remote_domain,
11490 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
11491 + irq_handler_t handler,
11492 unsigned long irqflags,
11493 const char *devname,
11494 void *dev_id);
11495 int bind_interdomain_evtchn_to_irqhandler(
11496 unsigned int remote_domain,
11497 unsigned int remote_port,
11498 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
11499 + irq_handler_t handler,
11500 unsigned long irqflags,
11501 const char *devname,
11502 void *dev_id);
11503 int bind_virq_to_irqhandler(
11504 unsigned int virq,
11505 unsigned int cpu,
11506 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
11507 + irq_handler_t handler,
11508 unsigned long irqflags,
11509 const char *devname,
11510 void *dev_id);
11511 int bind_ipi_to_irqhandler(
11512 unsigned int ipi,
11513 unsigned int cpu,
11514 - irqreturn_t (*handler)(int, void *, struct pt_regs *),
11515 + irq_handler_t handler,
11516 unsigned long irqflags,
11517 const char *devname,
11518 void *dev_id);
11519 diff -Naur linux-2.6.25/include/xen/xencons.h linux-2.6.25-xen/include/xen/xencons.h
11520 --- linux-2.6.25/include/xen/xencons.h 2008-05-23 20:51:11.000000000 +0200
11521 +++ linux-2.6.25-xen/include/xen/xencons.h 2008-05-23 20:39:03.000000000 +0200
11522 @@ -8,7 +8,7 @@
11523 void xencons_resume(void);
11524
11525 /* Interrupt work hooks. Receive data, or kick data out. */
11526 -void xencons_rx(char *buf, unsigned len, struct pt_regs *regs);
11527 +void xencons_rx(char *buf, unsigned len);
11528 void xencons_tx(void);
11529
11530 int xencons_ring_init(void);
11531 diff -Naur linux-2.6.25/mm/mprotect.c linux-2.6.25-xen/mm/mprotect.c
11532 --- linux-2.6.25/mm/mprotect.c 2008-05-23 20:51:15.000000000 +0200
11533 +++ linux-2.6.25-xen/mm/mprotect.c 2008-05-23 20:39:03.000000000 +0200
11534 @@ -86,7 +86,7 @@
11535 next = pmd_addr_end(addr, end);
11536 if (pmd_none_or_clear_bad(pmd))
11537 continue;
11538 - if (arch_change_pte_range(mm, pmd, addr, next, newprot))
11539 + if (arch_change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable))
11540 continue;
11541 change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable);
11542 } while (pmd++, addr = next, addr != end);
11543 diff -Naur linux-2.6.25/net/core/dev.c linux-2.6.25-xen/net/core/dev.c
11544 --- linux-2.6.25/net/core/dev.c 2008-05-23 20:51:15.000000000 +0200
11545 +++ linux-2.6.25-xen/net/core/dev.c 2008-05-23 20:39:03.000000000 +0200
11546 @@ -1607,15 +1607,14 @@
11547 }
11548 if ((skb->h.raw + skb->csum + 2) > skb->tail)
11549 goto out;
11550 - skb->ip_summed = CHECKSUM_HW;
11551 + skb->ip_summed = CHECKSUM_PARTIAL;
11552 skb->proto_csum_blank = 0;
11553 }
11554 return 0;
11555 out:
11556 return -EPROTO;
11557 }
11558 -#else
11559 -inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
11560 +EXPORT_SYMBOL(skb_checksum_setup);
11561 #endif
11562
11563 /**
11564 @@ -2111,7 +2110,7 @@
11565 case CHECKSUM_UNNECESSARY:
11566 skb->proto_data_valid = 1;
11567 break;
11568 - case CHECKSUM_HW:
11569 + case CHECKSUM_PARTIAL:
11570 /* XXX Implement me. */
11571 default:
11572 skb->proto_data_valid = 0;
11573 @@ -4644,7 +4643,6 @@
11574 EXPORT_SYMBOL(net_enable_timestamp);
11575 EXPORT_SYMBOL(net_disable_timestamp);
11576 EXPORT_SYMBOL(dev_get_flags);
11577 -EXPORT_SYMBOL(skb_checksum_setup);
11578
11579 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
11580 EXPORT_SYMBOL(br_handle_frame_hook);