Magellan Linux

Annotation of /trunk/kernel26-xen/patches-2.6.25-r1/1020-2.6.25-xen-patch-2.6.19.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 611 - (hide annotations) (download)
Fri May 23 18:49:01 2008 UTC (16 years ago) by niro
File size: 335709 byte(s)
-fixed patch

1 niro 611 diff -Naur linux-2.6.25/arch/x86/ia32/ia32entry-xen.S linux-2.6.25-xen/arch/x86/ia32/ia32entry-xen.S
2     --- linux-2.6.25/arch/x86/ia32/ia32entry-xen.S 2008-05-23 20:51:11.000000000 +0200
3     +++ linux-2.6.25-xen/arch/x86/ia32/ia32entry-xen.S 2008-05-23 20:39:03.000000000 +0200
4 niro 609 @@ -83,6 +83,7 @@
5     */
6     ENTRY(ia32_sysenter_target)
7     CFI_STARTPROC32 simple
8     + CFI_SIGNAL_FRAME
9     CFI_DEF_CFA rsp,SS+8-RIP+16
10     /*CFI_REL_OFFSET ss,SS-RIP+16*/
11     CFI_REL_OFFSET rsp,RSP-RIP+16
12     @@ -164,6 +165,7 @@
13     */
14     ENTRY(ia32_cstar_target)
15     CFI_STARTPROC32 simple
16     + CFI_SIGNAL_FRAME
17     CFI_DEF_CFA rsp,SS+8-RIP+16
18     /*CFI_REL_OFFSET ss,SS-RIP+16*/
19     CFI_REL_OFFSET rsp,RSP-RIP+16
20     @@ -243,6 +245,7 @@
21    
22     ENTRY(ia32_syscall)
23     CFI_STARTPROC simple
24     + CFI_SIGNAL_FRAME
25     CFI_DEF_CFA rsp,SS+8-RIP+16
26     /*CFI_REL_OFFSET ss,SS-RIP+16*/
27     CFI_REL_OFFSET rsp,RSP-RIP+16
28     @@ -320,6 +323,7 @@
29     popq %r11
30     CFI_ENDPROC
31     CFI_STARTPROC32 simple
32     + CFI_SIGNAL_FRAME
33     CFI_DEF_CFA rsp,SS+8-ARGOFFSET
34     CFI_REL_OFFSET rax,RAX-ARGOFFSET
35     CFI_REL_OFFSET rcx,RCX-ARGOFFSET
36     @@ -653,8 +657,8 @@
37     .quad sys_readlinkat /* 305 */
38     .quad sys_fchmodat
39     .quad sys_faccessat
40     - .quad quiet_ni_syscall /* pselect6 for now */
41     - .quad quiet_ni_syscall /* ppoll for now */
42     + .quad compat_sys_pselect6
43     + .quad compat_sys_ppoll
44     .quad sys_unshare /* 310 */
45     .quad compat_sys_set_robust_list
46     .quad compat_sys_get_robust_list
47     @@ -663,4 +667,5 @@
48     .quad sys_tee
49     .quad compat_sys_vmsplice
50     .quad compat_sys_move_pages
51     + .quad sys_getcpu
52     ia32_syscall_end:
53 niro 611 diff -Naur linux-2.6.25/arch/x86/Kconfig linux-2.6.25-xen/arch/x86/Kconfig
54     --- linux-2.6.25/arch/x86/Kconfig 2008-05-23 20:51:22.000000000 +0200
55     +++ linux-2.6.25-xen/arch/x86/Kconfig 2008-05-23 20:39:03.000000000 +0200
56     @@ -390,6 +390,7 @@
57 niro 609
58 niro 611 menuconfig PARAVIRT_GUEST
59     bool "Paravirtualized guest support"
60     + depends on !X86_XEN && !X86_64_XEN
61     help
62     Say Y here to get to see options related to running Linux under
63     various hypervisors. This option alone does not add any kernel code.
64     diff -Naur linux-2.6.25/arch/x86/kernel/apic_32-xen.c linux-2.6.25-xen/arch/x86/kernel/apic_32-xen.c
65     --- linux-2.6.25/arch/x86/kernel/apic_32-xen.c 2008-05-23 20:51:11.000000000 +0200
66     +++ linux-2.6.25-xen/arch/x86/kernel/apic_32-xen.c 2008-05-23 20:39:03.000000000 +0200
67 niro 609 @@ -54,7 +54,6 @@
68     /*
69     * Knob to control our willingness to enable the local APIC.
70     */
71     -int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
72    
73     /*
74     * Debug level
75     @@ -102,7 +101,7 @@
76    
77     #ifndef CONFIG_XEN
78     #ifndef CONFIG_SMP
79     -static void up_apic_timer_interrupt_call(struct pt_regs *regs)
80     +static void up_apic_timer_interrupt_call(void)
81     {
82     int cpu = smp_processor_id();
83    
84     @@ -111,11 +110,11 @@
85     */
86     per_cpu(irq_stat, cpu).apic_timer_irqs++;
87    
88     - smp_local_timer_interrupt(regs);
89     + smp_local_timer_interrupt();
90     }
91     #endif
92    
93     -void smp_send_timer_broadcast_ipi(struct pt_regs *regs)
94     +void smp_send_timer_broadcast_ipi(void)
95     {
96     cpumask_t mask;
97    
98     @@ -128,7 +127,7 @@
99     * We can directly call the apic timer interrupt handler
100     * in UP case. Minus all irq related functions
101     */
102     - up_apic_timer_interrupt_call(regs);
103     + up_apic_timer_interrupt_call();
104     #endif
105     }
106     }
107 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/apic_64-xen.c linux-2.6.25-xen/arch/x86/kernel/apic_64-xen.c
108     --- linux-2.6.25/arch/x86/kernel/apic_64-xen.c 2008-05-23 20:51:11.000000000 +0200
109     +++ linux-2.6.25-xen/arch/x86/kernel/apic_64-xen.c 2008-05-23 20:39:03.000000000 +0200
110 niro 609 @@ -43,7 +43,7 @@
111     */
112     void ack_bad_irq(unsigned int irq)
113     {
114     - printk("unexpected IRQ trap at vector %02x\n", irq);
115     + printk("unexpected IRQ trap at irq %02x\n", irq);
116     /*
117     * Currently unexpected vectors happen only on SMP and APIC.
118     * We _must_ ack these because every local APIC has only N
119     @@ -62,19 +62,19 @@
120     return -EINVAL;
121     }
122    
123     -void smp_local_timer_interrupt(struct pt_regs *regs)
124     +void smp_local_timer_interrupt(void)
125     {
126     - profile_tick(CPU_PROFILING, regs);
127     + profile_tick(CPU_PROFILING);
128     #ifndef CONFIG_XEN
129     #ifdef CONFIG_SMP
130     - update_process_times(user_mode(regs));
131     + update_process_times(user_mode(get_irq_regs()));
132     #endif
133     #endif
134     /*
135     * We take the 'long' return path, and there every subsystem
136     * grabs the appropriate locks (kernel lock/ irq lock).
137     *
138     - * we might want to decouple profiling from the 'long path',
139     + * We might want to decouple profiling from the 'long path',
140     * and do the profiling totally in assembly.
141     *
142     * Currently this isn't too much of an issue (performance wise),
143     @@ -92,6 +92,8 @@
144     */
145     void smp_apic_timer_interrupt(struct pt_regs *regs)
146     {
147     + struct pt_regs *old_regs = set_irq_regs(regs);
148     +
149     /*
150     * the NMI deadlock-detector uses this.
151     */
152     @@ -109,8 +111,9 @@
153     */
154     exit_idle();
155     irq_enter();
156     - smp_local_timer_interrupt(regs);
157     + smp_local_timer_interrupt();
158     irq_exit();
159     + set_irq_regs(old_regs);
160     }
161    
162     /*
163     @@ -188,9 +191,8 @@
164     int __init APIC_init_uniprocessor (void)
165     {
166     #ifdef CONFIG_X86_IO_APIC
167     - if (smp_found_config)
168     - if (!skip_ioapic_setup && nr_ioapics)
169     - setup_IO_APIC();
170     + if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
171     + setup_IO_APIC();
172     #endif
173    
174     return 1;
175 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/cpu/common-xen.c linux-2.6.25-xen/arch/x86/kernel/cpu/common-xen.c
176     --- linux-2.6.25/arch/x86/kernel/cpu/common-xen.c 2008-05-23 20:51:11.000000000 +0200
177     +++ linux-2.6.25-xen/arch/x86/kernel/cpu/common-xen.c 2008-05-23 20:39:03.000000000 +0200
178 niro 609 @@ -43,7 +43,7 @@
179    
180     extern int disable_pse;
181    
182     -static void default_init(struct cpuinfo_x86 * c)
183     +static void __cpuinit default_init(struct cpuinfo_x86 * c)
184     {
185     /* Not much we can do here... */
186     /* Check if at least it has cpuid */
187     @@ -56,7 +56,7 @@
188     }
189     }
190    
191     -static struct cpu_dev default_cpu = {
192     +static struct cpu_dev __cpuinitdata default_cpu = {
193     .c_init = default_init,
194     .c_vendor = "Unknown",
195     };
196     @@ -191,7 +191,16 @@
197    
198     static int __init x86_fxsr_setup(char * s)
199     {
200     + /* Tell all the other CPU's to not use it... */
201     disable_x86_fxsr = 1;
202     +
203     + /*
204     + * ... and clear the bits early in the boot_cpu_data
205     + * so that the bootup process doesn't try to do this
206     + * either.
207     + */
208     + clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability);
209     + clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability);
210     return 1;
211     }
212     __setup("nofxsr", x86_fxsr_setup);
213     @@ -272,7 +281,7 @@
214     }
215     }
216    
217     -void __cpuinit generic_identify(struct cpuinfo_x86 * c)
218     +static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
219     {
220     u32 tfms, xlvl;
221     int ebx;
222     @@ -698,8 +707,7 @@
223     */
224     atomic_inc(&init_mm.mm_count);
225     current->active_mm = &init_mm;
226     - if (current->mm)
227     - BUG();
228     + BUG_ON(current->mm);
229     enter_lazy_tlb(&init_mm, current);
230    
231     load_esp0(t, thread);
232     @@ -712,7 +720,7 @@
233     #endif
234    
235     /* Clear %fs and %gs. */
236     - asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
237     + asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
238    
239     /* Clear all 6 debug registers: */
240     set_debugreg(0, 0);
241 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/e820_64-xen.c linux-2.6.25-xen/arch/x86/kernel/e820_64-xen.c
242     --- linux-2.6.25/arch/x86/kernel/e820_64-xen.c 2008-05-23 20:51:11.000000000 +0200
243     +++ linux-2.6.25-xen/arch/x86/kernel/e820_64-xen.c 2008-05-23 20:39:03.000000000 +0200
244 niro 609 @@ -16,6 +16,7 @@
245     #include <linux/string.h>
246     #include <linux/kexec.h>
247     #include <linux/module.h>
248     +#include <linux/mm.h>
249    
250     #include <asm/pgtable.h>
251     #include <asm/page.h>
252     @@ -25,6 +26,11 @@
253     #include <asm/sections.h>
254     #include <xen/interface/memory.h>
255    
256     +struct e820map e820 __initdata;
257     +#ifdef CONFIG_XEN
258     +struct e820map machine_e820 __initdata;
259     +#endif
260     +
261     /*
262     * PFN of last memory page.
263     */
264     @@ -41,7 +47,7 @@
265     /*
266     * Last pfn which the user wants to use.
267     */
268     -unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;
269     +static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
270    
271     extern struct resource code_resource, data_resource;
272    
273     @@ -53,13 +59,13 @@
274     #ifndef CONFIG_XEN
275     /* various gunk below that needed for SMP startup */
276     if (addr < 0x8000) {
277     - *addrp = 0x8000;
278     + *addrp = PAGE_ALIGN(0x8000);
279     return 1;
280     }
281    
282     /* direct mapping tables of the kernel */
283     if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
284     - *addrp = table_end << PAGE_SHIFT;
285     + *addrp = PAGE_ALIGN(table_end << PAGE_SHIFT);
286     return 1;
287     }
288    
289     @@ -67,23 +73,18 @@
290     #ifdef CONFIG_BLK_DEV_INITRD
291     if (LOADER_TYPE && INITRD_START && last >= INITRD_START &&
292     addr < INITRD_START+INITRD_SIZE) {
293     - *addrp = INITRD_START + INITRD_SIZE;
294     + *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE);
295     return 1;
296     }
297     #endif
298     - /* kernel code + 640k memory hole (later should not be needed, but
299     - be paranoid for now) */
300     - if (last >= 640*1024 && addr < 1024*1024) {
301     - *addrp = 1024*1024;
302     - return 1;
303     - }
304     - if (last >= __pa_symbol(&_text) && last < __pa_symbol(&_end)) {
305     - *addrp = __pa_symbol(&_end);
306     + /* kernel code */
307     + if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
308     + *addrp = PAGE_ALIGN(__pa_symbol(&_end));
309     return 1;
310     }
311    
312     if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
313     - *addrp = ebda_addr + ebda_size;
314     + *addrp = PAGE_ALIGN(ebda_addr + ebda_size);
315     return 1;
316     }
317    
318     @@ -141,8 +142,6 @@
319     for (i = 0; i < e820.nr_map; i++) {
320     struct e820entry *ei = &e820.map[i];
321     #else
322     - extern struct e820map machine_e820;
323     -
324     if (!is_initial_xendomain())
325     return 0;
326     for (i = 0; i < machine_e820.nr_map; i++) {
327     @@ -184,7 +183,7 @@
328     continue;
329     while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
330     ;
331     - last = addr + size;
332     + last = PAGE_ALIGN(addr) + size;
333     if (last > ei->addr + ei->size)
334     continue;
335     if (last > end)
336     @@ -194,59 +193,14 @@
337     return -1UL;
338     }
339    
340     -/*
341     - * Free bootmem based on the e820 table for a node.
342     - */
343     -void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end)
344     -{
345     - int i;
346     - for (i = 0; i < e820.nr_map; i++) {
347     - struct e820entry *ei = &e820.map[i];
348     - unsigned long last, addr;
349     -
350     - if (ei->type != E820_RAM ||
351     - ei->addr+ei->size <= start ||
352     - ei->addr >= end)
353     - continue;
354     -
355     - addr = round_up(ei->addr, PAGE_SIZE);
356     - if (addr < start)
357     - addr = start;
358     -
359     - last = round_down(ei->addr + ei->size, PAGE_SIZE);
360     - if (last >= end)
361     - last = end;
362     -
363     - if (last > addr && last-addr >= PAGE_SIZE)
364     - free_bootmem_node(pgdat, addr, last-addr);
365     - }
366     -}
367     -
368     /*
369     * Find the highest page frame number we have available
370     */
371     unsigned long __init e820_end_of_ram(void)
372     {
373     - int i;
374     unsigned long end_pfn = 0;
375     + end_pfn = find_max_pfn_with_active_regions();
376    
377     - for (i = 0; i < e820.nr_map; i++) {
378     - struct e820entry *ei = &e820.map[i];
379     - unsigned long start, end;
380     -
381     - start = round_up(ei->addr, PAGE_SIZE);
382     - end = round_down(ei->addr + ei->size, PAGE_SIZE);
383     - if (start >= end)
384     - continue;
385     - if (ei->type == E820_RAM) {
386     - if (end > end_pfn<<PAGE_SHIFT)
387     - end_pfn = end>>PAGE_SHIFT;
388     - } else {
389     - if (end > end_pfn_map<<PAGE_SHIFT)
390     - end_pfn_map = end>>PAGE_SHIFT;
391     - }
392     - }
393     -
394     if (end_pfn > end_pfn_map)
395     end_pfn_map = end_pfn;
396     if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
397     @@ -256,43 +210,10 @@
398     if (end_pfn > end_pfn_map)
399     end_pfn = end_pfn_map;
400    
401     + printk("end_pfn_map = %lu\n", end_pfn_map);
402     return end_pfn;
403     }
404    
405     -/*
406     - * Compute how much memory is missing in a range.
407     - * Unlike the other functions in this file the arguments are in page numbers.
408     - */
409     -unsigned long __init
410     -e820_hole_size(unsigned long start_pfn, unsigned long end_pfn)
411     -{
412     - unsigned long ram = 0;
413     - unsigned long start = start_pfn << PAGE_SHIFT;
414     - unsigned long end = end_pfn << PAGE_SHIFT;
415     - int i;
416     - for (i = 0; i < e820.nr_map; i++) {
417     - struct e820entry *ei = &e820.map[i];
418     - unsigned long last, addr;
419     -
420     - if (ei->type != E820_RAM ||
421     - ei->addr+ei->size <= start ||
422     - ei->addr >= end)
423     - continue;
424     -
425     - addr = round_up(ei->addr, PAGE_SIZE);
426     - if (addr < start)
427     - addr = start;
428     -
429     - last = round_down(ei->addr + ei->size, PAGE_SIZE);
430     - if (last >= end)
431     - last = end;
432     -
433     - if (last > addr)
434     - ram += last - addr;
435     - }
436     - return ((end - start) - ram) >> PAGE_SHIFT;
437     -}
438     -
439     /*
440     * Mark e820 reserved areas as busy for the resource manager.
441     */
442     @@ -333,6 +254,98 @@
443     }
444     }
445    
446     +#ifndef CONFIG_XEN
447     +/* Mark pages corresponding to given address range as nosave */
448     +static void __init
449     +e820_mark_nosave_range(unsigned long start, unsigned long end)
450     +{
451     + unsigned long pfn, max_pfn;
452     +
453     + if (start >= end)
454     + return;
455     +
456     + printk("Nosave address range: %016lx - %016lx\n", start, end);
457     + max_pfn = end >> PAGE_SHIFT;
458     + for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
459     + if (pfn_valid(pfn))
460     + SetPageNosave(pfn_to_page(pfn));
461     +}
462     +
463     +/*
464     + * Find the ranges of physical addresses that do not correspond to
465     + * e820 RAM areas and mark the corresponding pages as nosave for software
466     + * suspend and suspend to RAM.
467     + *
468     + * This function requires the e820 map to be sorted and without any
469     + * overlapping entries and assumes the first e820 area to be RAM.
470     + */
471     +void __init e820_mark_nosave_regions(void)
472     +{
473     + int i;
474     + unsigned long paddr;
475     +
476     + paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
477     + for (i = 1; i < e820.nr_map; i++) {
478     + struct e820entry *ei = &e820.map[i];
479     +
480     + if (paddr < ei->addr)
481     + e820_mark_nosave_range(paddr,
482     + round_up(ei->addr, PAGE_SIZE));
483     +
484     + paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
485     + if (ei->type != E820_RAM)
486     + e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE),
487     + paddr);
488     +
489     + if (paddr >= (end_pfn << PAGE_SHIFT))
490     + break;
491     + }
492     +}
493     +#endif
494     +
495     +/* Walk the e820 map and register active regions within a node */
496     +void __init
497     +e820_register_active_regions(int nid, unsigned long start_pfn,
498     + unsigned long end_pfn)
499     +{
500     + int i;
501     + unsigned long ei_startpfn, ei_endpfn;
502     + for (i = 0; i < e820.nr_map; i++) {
503     + struct e820entry *ei = &e820.map[i];
504     + ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
505     + ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE)
506     + >> PAGE_SHIFT;
507     +
508     + /* Skip map entries smaller than a page */
509     + if (ei_startpfn >= ei_endpfn)
510     + continue;
511     +
512     + /* Check if end_pfn_map should be updated */
513     + if (ei->type != E820_RAM && ei_endpfn > end_pfn_map)
514     + end_pfn_map = ei_endpfn;
515     +
516     + /* Skip if map is outside the node */
517     + if (ei->type != E820_RAM ||
518     + ei_endpfn <= start_pfn ||
519     + ei_startpfn >= end_pfn)
520     + continue;
521     +
522     + /* Check for overlaps */
523     + if (ei_startpfn < start_pfn)
524     + ei_startpfn = start_pfn;
525     + if (ei_endpfn > end_pfn)
526     + ei_endpfn = end_pfn;
527     +
528     + /* Obey end_user_pfn to save on memmap */
529     + if (ei_startpfn >= end_user_pfn)
530     + continue;
531     + if (ei_endpfn > end_user_pfn)
532     + ei_endpfn = end_user_pfn;
533     +
534     + add_active_range(nid, ei_startpfn, ei_endpfn);
535     + }
536     +}
537     +
538     /*
539     * Add a memory region to the kernel e820 map.
540     */
541     @@ -553,13 +566,6 @@
542     * If we're lucky and live on a modern system, the setup code
543     * will have given us a memory map that we can use to properly
544     * set up memory. If we aren't, we'll fake a memory map.
545     - *
546     - * We check to see that the memory map contains at least 2 elements
547     - * before we'll use it, because the detection code in setup.S may
548     - * not be perfect and most every PC known to man has two memory
549     - * regions: one from 0 to 640k, and one from 1mb up. (The IBM
550     - * thinkpad 560x, for example, does not cooperate with the memory
551     - * detection code.)
552     */
553     static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
554     {
555     @@ -581,37 +587,20 @@
556     if (start > end)
557     return -1;
558    
559     -#ifndef CONFIG_XEN
560     - /*
561     - * Some BIOSes claim RAM in the 640k - 1M region.
562     - * Not right. Fix it up.
563     - *
564     - * This should be removed on Hammer which is supposed to not
565     - * have non e820 covered ISA mappings there, but I had some strange
566     - * problems so it stays for now. -AK
567     - */
568     - if (type == E820_RAM) {
569     - if (start < 0x100000ULL && end > 0xA0000ULL) {
570     - if (start < 0xA0000ULL)
571     - add_memory_region(start, 0xA0000ULL-start, type);
572     - if (end <= 0x100000ULL)
573     - continue;
574     - start = 0x100000ULL;
575     - size = end - start;
576     - }
577     - }
578     -#endif
579     -
580     add_memory_region(start, size, type);
581     } while (biosmap++,--nr_map);
582     return 0;
583     }
584    
585     +void early_panic(char *msg)
586     +{
587     + early_printk(msg);
588     + panic(msg);
589     +}
590     +
591     #ifndef CONFIG_XEN
592     void __init setup_memory_region(void)
593     {
594     - char *who = "BIOS-e820";
595     -
596     /*
597     * Try to copy the BIOS-supplied E820-map.
598     *
599     @@ -619,24 +608,10 @@
600     * the next section from 1mb->appropriate_mem_k
601     */
602     sanitize_e820_map(E820_MAP, &E820_MAP_NR);
603     - if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
604     - unsigned long mem_size;
605     -
606     - /* compare results from other methods and take the greater */
607     - if (ALT_MEM_K < EXT_MEM_K) {
608     - mem_size = EXT_MEM_K;
609     - who = "BIOS-88";
610     - } else {
611     - mem_size = ALT_MEM_K;
612     - who = "BIOS-e801";
613     - }
614     -
615     - e820.nr_map = 0;
616     - add_memory_region(0, LOWMEMSIZE(), E820_RAM);
617     - add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
618     - }
619     + if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0)
620     + early_panic("Cannot find a valid memory map");
621     printk(KERN_INFO "BIOS-provided physical RAM map:\n");
622     - e820_print_map(who);
623     + e820_print_map("BIOS-e820");
624     }
625    
626     #else /* CONFIG_XEN */
627     @@ -668,20 +643,23 @@
628    
629     sanitize_e820_map(map, (char *)&memmap.nr_entries);
630    
631     - BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0);
632     + if (copy_e820_map(map, (char)memmap.nr_entries) < 0)
633     + early_panic("Cannot find a valid memory map");
634    
635     printk(KERN_INFO "BIOS-provided physical RAM map:\n");
636     e820_print_map("Xen");
637     }
638     #endif
639    
640     -void __init parse_memopt(char *p, char **from)
641     -{
642     +static int __init parse_memopt(char *p)
643     +{
644     int i;
645     unsigned long current_end;
646     unsigned long end;
647    
648     - end_user_pfn = memparse(p, from);
649     + if (!p)
650     + return -EINVAL;
651     + end_user_pfn = memparse(p, &p);
652     end_user_pfn >>= PAGE_SHIFT;
653    
654     end = end_user_pfn<<PAGE_SHIFT;
655     @@ -698,27 +676,61 @@
656     else
657     add_memory_region(current_end, end - current_end, E820_RAM);
658     }
659     +
660     + return 0;
661     }
662     +early_param("mem", parse_memopt);
663     +
664     +static int userdef __initdata;
665    
666     -void __init parse_memmapopt(char *p, char **from)
667     +static int __init parse_memmap_opt(char *p)
668     {
669     + char *oldp;
670     unsigned long long start_at, mem_size;
671    
672     - mem_size = memparse(p, from);
673     - p = *from;
674     + if (!strcmp(p, "exactmap")) {
675     +#ifdef CONFIG_CRASH_DUMP
676     + /* If we are doing a crash dump, we
677     + * still need to know the real mem
678     + * size before original memory map is
679     + * reset.
680     + */
681     + e820_register_active_regions(0, 0, -1UL);
682     + saved_max_pfn = e820_end_of_ram();
683     + remove_all_active_ranges();
684     +#endif
685     + end_pfn_map = 0;
686     + e820.nr_map = 0;
687     + userdef = 1;
688     + return 0;
689     + }
690     +
691     + oldp = p;
692     + mem_size = memparse(p, &p);
693     + if (p == oldp)
694     + return -EINVAL;
695     if (*p == '@') {
696     - start_at = memparse(p+1, from);
697     + start_at = memparse(p+1, &p);
698     add_memory_region(start_at, mem_size, E820_RAM);
699     } else if (*p == '#') {
700     - start_at = memparse(p+1, from);
701     + start_at = memparse(p+1, &p);
702     add_memory_region(start_at, mem_size, E820_ACPI);
703     } else if (*p == '$') {
704     - start_at = memparse(p+1, from);
705     + start_at = memparse(p+1, &p);
706     add_memory_region(start_at, mem_size, E820_RESERVED);
707     } else {
708     end_user_pfn = (mem_size >> PAGE_SHIFT);
709     }
710     - p = *from;
711     + return *p == '\0' ? 0 : -EINVAL;
712     +}
713     +early_param("memmap", parse_memmap_opt);
714     +
715     +void finish_e820_parsing(void)
716     +{
717     + if (userdef) {
718     + printk(KERN_INFO "user-defined physical RAM map:\n");
719     + e820_print_map("user");
720     + }
721     }
722    
723     unsigned long pci_mem_start = 0xaeedbabe;
724 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/early_printk-xen.c linux-2.6.25-xen/arch/x86/kernel/early_printk-xen.c
725     --- linux-2.6.25/arch/x86/kernel/early_printk-xen.c 2008-05-23 20:51:11.000000000 +0200
726     +++ linux-2.6.25-xen/arch/x86/kernel/early_printk-xen.c 2008-05-23 20:39:03.000000000 +0200
727 niro 609 @@ -244,20 +244,16 @@
728    
729     static int __initdata keep_early;
730    
731     -int __init setup_early_printk(char *opt)
732     +static int __init setup_early_printk(char *buf)
733     {
734     - char *space;
735     - char buf[256];
736     + if (!buf)
737     + return 0;
738    
739     if (early_console_initialized)
740     - return 1;
741     -
742     - strlcpy(buf,opt,sizeof(buf));
743     - space = strchr(buf, ' ');
744     - if (space)
745     - *space = 0;
746     + return 0;
747     + early_console_initialized = 1;
748    
749     - if (strstr(buf,"keep"))
750     + if (strstr(buf, "keep"))
751     keep_early = 1;
752    
753     if (!strncmp(buf, "serial", 6)) {
754     @@ -281,11 +277,12 @@
755     early_console = &simnow_console;
756     keep_early = 1;
757     }
758     - early_console_initialized = 1;
759     register_console(early_console);
760     return 0;
761     }
762    
763     +early_param("earlyprintk", setup_early_printk);
764     +
765     void __init disable_early_printk(void)
766     {
767     if (!early_console_initialized || !early_console)
768     @@ -299,4 +296,3 @@
769     }
770     }
771    
772     -__setup("earlyprintk=", setup_early_printk);
773 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/entry_32-xen.S linux-2.6.25-xen/arch/x86/kernel/entry_32-xen.S
774     --- linux-2.6.25/arch/x86/kernel/entry_32-xen.S 2008-05-23 20:51:22.000000000 +0200
775     +++ linux-2.6.25-xen/arch/x86/kernel/entry_32-xen.S 2008-05-23 20:39:03.000000000 +0200
776 niro 609 @@ -80,8 +80,12 @@
777     NMI_MASK = 0x80000000
778    
779     #ifndef CONFIG_XEN
780     -#define DISABLE_INTERRUPTS cli
781     -#define ENABLE_INTERRUPTS sti
782     +/* These are replaces for paravirtualization */
783     +#define DISABLE_INTERRUPTS cli
784     +#define ENABLE_INTERRUPTS sti
785     +#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
786     +#define INTERRUPT_RETURN iret
787     +#define GET_CR0_INTO_EAX movl %cr0, %eax
788     #else
789     /* Offsets into shared_info_t. */
790     #define evtchn_upcall_pending /* 0 */
791     @@ -99,15 +103,29 @@
792    
793     #define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
794     #define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
795     +#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
796     #define DISABLE_INTERRUPTS GET_VCPU_INFO ; \
797     __DISABLE_INTERRUPTS
798     #define ENABLE_INTERRUPTS GET_VCPU_INFO ; \
799     __ENABLE_INTERRUPTS
800     -#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
801     +#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \
802     +sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \
803     + __TEST_PENDING ; \
804     + jnz 14f # process more events if necessary... ; \
805     + movl ESI(%esp), %esi ; \
806     + sysexit ; \
807     +14: __DISABLE_INTERRUPTS ; \
808     + TRACE_IRQS_OFF ; \
809     +sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \
810     + push %esp ; \
811     + call evtchn_do_upcall ; \
812     + add $4,%esp ; \
813     + jmp ret_from_intr
814     +#define INTERRUPT_RETURN iret
815     #endif
816    
817     #ifdef CONFIG_PREEMPT
818     -#define preempt_stop cli; TRACE_IRQS_OFF
819     +#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
820     #else
821     #define preempt_stop
822     #define resume_kernel restore_nocheck
823     @@ -206,18 +224,21 @@
824    
825     #define RING0_INT_FRAME \
826     CFI_STARTPROC simple;\
827     + CFI_SIGNAL_FRAME;\
828     CFI_DEF_CFA esp, 3*4;\
829     /*CFI_OFFSET cs, -2*4;*/\
830     CFI_OFFSET eip, -3*4
831    
832     #define RING0_EC_FRAME \
833     CFI_STARTPROC simple;\
834     + CFI_SIGNAL_FRAME;\
835     CFI_DEF_CFA esp, 4*4;\
836     /*CFI_OFFSET cs, -2*4;*/\
837     CFI_OFFSET eip, -3*4
838    
839     #define RING0_PTREGS_FRAME \
840     CFI_STARTPROC simple;\
841     + CFI_SIGNAL_FRAME;\
842     CFI_DEF_CFA esp, OLDESP-EBX;\
843     /*CFI_OFFSET cs, CS-OLDESP;*/\
844     CFI_OFFSET eip, EIP-OLDESP;\
845     @@ -263,8 +284,9 @@
846     check_userspace:
847     movl EFLAGS(%esp), %eax # mix EFLAGS and CS
848     movb CS(%esp), %al
849     - testl $(VM_MASK | 2), %eax
850     - jz resume_kernel
851     + andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
852     + cmpl $USER_RPL, %eax
853     + jb resume_kernel # not returning to v8086 or userspace
854     ENTRY(resume_userspace)
855     DISABLE_INTERRUPTS # make sure we don't miss an interrupt
856     # setting need_resched or sigpending
857     @@ -277,7 +299,7 @@
858    
859     #ifdef CONFIG_PREEMPT
860     ENTRY(resume_kernel)
861     - cli
862     + DISABLE_INTERRUPTS
863     cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
864     jnz restore_nocheck
865     need_resched:
866     @@ -297,6 +319,7 @@
867     # sysenter call handler stub
868     ENTRY(sysenter_entry)
869     CFI_STARTPROC simple
870     + CFI_SIGNAL_FRAME
871     CFI_DEF_CFA esp, 0
872     CFI_REGISTER esp, ebp
873     movl SYSENTER_stack_esp0(%esp),%esp
874     @@ -305,7 +328,7 @@
875     * No need to follow this irqs on/off section: the syscall
876     * disabled irqs and here we enable it straight after entry:
877     */
878     - sti
879     + ENABLE_INTERRUPTS
880     pushl $(__USER_DS)
881     CFI_ADJUST_CFA_OFFSET 4
882     /*CFI_REL_OFFSET ss, 0*/
883     @@ -359,26 +382,8 @@
884     movl EIP(%esp), %edx
885     movl OLDESP(%esp), %ecx
886     xorl %ebp,%ebp
887     -#ifdef CONFIG_XEN
888     TRACE_IRQS_ON
889     - __ENABLE_INTERRUPTS
890     -sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/
891     - __TEST_PENDING
892     - jnz 14f # process more events if necessary...
893     - movl ESI(%esp), %esi
894     - sysexit
895     -14: __DISABLE_INTERRUPTS
896     - TRACE_IRQS_OFF
897     -sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/
898     - push %esp
899     - call evtchn_do_upcall
900     - add $4,%esp
901     - jmp ret_from_intr
902     -#else
903     - TRACE_IRQS_ON
904     - sti
905     - sysexit
906     -#endif /* !CONFIG_XEN */
907     + ENABLE_INTERRUPTS_SYSEXIT
908     CFI_ENDPROC
909    
910     # pv sysenter call handler stub
911     @@ -444,8 +449,8 @@
912     # See comments in process.c:copy_thread() for details.
913     movb OLDSS(%esp), %ah
914     movb CS(%esp), %al
915     - andl $(VM_MASK | (4 << 8) | 3), %eax
916     - cmpl $((4 << 8) | 3), %eax
917     + andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
918     + cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
919     CFI_REMEMBER_STATE
920     je ldt_ss # returning to user-space with LDT SS
921     restore_nocheck:
922     @@ -467,12 +472,11 @@
923     RESTORE_REGS
924     addl $4, %esp
925     CFI_ADJUST_CFA_OFFSET -4
926     -1: iret
927     +1: INTERRUPT_RETURN
928     .section .fixup,"ax"
929     iret_exc:
930     #ifndef CONFIG_XEN
931     - TRACE_IRQS_ON
932     - sti
933     + ENABLE_INTERRUPTS
934     #endif
935     pushl $0 # no error code
936     pushl $do_iret_error
937     @@ -498,7 +502,7 @@
938     * dosemu and wine happy. */
939     subl $8, %esp # reserve space for switch16 pointer
940     CFI_ADJUST_CFA_OFFSET 8
941     - cli
942     + DISABLE_INTERRUPTS
943     TRACE_IRQS_OFF
944     movl %esp, %eax
945     /* Set up the 16bit stack frame with switch32 pointer on top,
946     @@ -508,7 +512,7 @@
947     TRACE_IRQS_IRET
948     RESTORE_REGS
949     lss 20+4(%esp), %esp # switch to 16bit stack
950     -1: iret
951     +1: INTERRUPT_RETURN
952     .section __ex_table,"a"
953     .align 4
954     .long 1b,iret_exc
955     @@ -524,7 +528,7 @@
956     RESTORE_REGS
957     addl $4, %esp
958     CFI_ADJUST_CFA_OFFSET -4
959     -1: iret
960     +1: INTERRUPT_RETURN
961     .section __ex_table,"a"
962     .align 4
963     .long 1b,iret_exc
964     @@ -713,11 +717,9 @@
965     #define UNWIND_ESPFIX_STACK
966     #endif
967    
968     -ENTRY(divide_error)
969     - RING0_INT_FRAME
970     - pushl $0 # no error code
971     - CFI_ADJUST_CFA_OFFSET 4
972     - pushl $do_divide_error
973     +KPROBE_ENTRY(page_fault)
974     + RING0_EC_FRAME
975     + pushl $do_page_fault
976     CFI_ADJUST_CFA_OFFSET 4
977     ALIGN
978     error_code:
979     @@ -767,6 +769,7 @@
980     call *%edi
981     jmp ret_from_exception
982     CFI_ENDPROC
983     +KPROBE_END(page_fault)
984    
985     #ifdef CONFIG_XEN
986     # A note on the "critical region" in our callback handler.
987     @@ -926,7 +929,7 @@
988     CFI_ADJUST_CFA_OFFSET 4
989     SAVE_ALL
990     #ifndef CONFIG_XEN
991     - movl %cr0, %eax
992     + GET_CR0_INTO_EAX
993     testl $0x4, %eax # EM (math emulation bit)
994     je device_available_emulate
995     pushl $0 # temporary storage for ORIG_EIP
996     @@ -961,9 +964,15 @@
997     jne ok; \
998     label: \
999     movl SYSENTER_stack_esp0+offset(%esp),%esp; \
1000     + CFI_DEF_CFA esp, 0; \
1001     + CFI_UNDEFINED eip; \
1002     pushfl; \
1003     + CFI_ADJUST_CFA_OFFSET 4; \
1004     pushl $__KERNEL_CS; \
1005     - pushl $sysenter_past_esp
1006     + CFI_ADJUST_CFA_OFFSET 4; \
1007     + pushl $sysenter_past_esp; \
1008     + CFI_ADJUST_CFA_OFFSET 4; \
1009     + CFI_REL_OFFSET eip, 0
1010     #endif /* CONFIG_XEN */
1011    
1012     KPROBE_ENTRY(debug)
1013     @@ -982,7 +991,8 @@
1014     call do_debug
1015     jmp ret_from_exception
1016     CFI_ENDPROC
1017     - .previous .text
1018     +KPROBE_END(debug)
1019     +
1020     #ifndef CONFIG_XEN
1021     /*
1022     * NMI is doubly nasty. It can happen _while_ we're handling
1023     @@ -992,7 +1002,7 @@
1024     * check whether we got an NMI on the debug path where the debug
1025     * fault happened on the sysenter path.
1026     */
1027     -ENTRY(nmi)
1028     +KPROBE_ENTRY(nmi)
1029     RING0_INT_FRAME
1030     pushl %eax
1031     CFI_ADJUST_CFA_OFFSET 4
1032     @@ -1017,6 +1027,7 @@
1033     cmpl $sysenter_entry,12(%esp)
1034     je nmi_debug_stack_check
1035     nmi_stack_correct:
1036     + /* We have a RING0_INT_FRAME here */
1037     pushl %eax
1038     CFI_ADJUST_CFA_OFFSET 4
1039     SAVE_ALL
1040     @@ -1027,9 +1038,12 @@
1041     CFI_ENDPROC
1042    
1043     nmi_stack_fixup:
1044     + RING0_INT_FRAME
1045     FIX_STACK(12,nmi_stack_correct, 1)
1046     jmp nmi_stack_correct
1047     +
1048     nmi_debug_stack_check:
1049     + /* We have a RING0_INT_FRAME here */
1050     cmpw $__KERNEL_CS,16(%esp)
1051     jne nmi_stack_correct
1052     cmpl $debug,(%esp)
1053     @@ -1040,8 +1054,10 @@
1054     jmp nmi_stack_correct
1055    
1056     nmi_16bit_stack:
1057     - RING0_INT_FRAME
1058     - /* create the pointer to lss back */
1059     + /* We have a RING0_INT_FRAME here.
1060     + *
1061     + * create the pointer to lss back
1062     + */
1063     pushl %ss
1064     CFI_ADJUST_CFA_OFFSET 4
1065     pushl %esp
1066     @@ -1062,14 +1078,14 @@
1067     call do_nmi
1068     RESTORE_REGS
1069     lss 12+4(%esp), %esp # back to 16bit stack
1070     -1: iret
1071     +1: INTERRUPT_RETURN
1072     CFI_ENDPROC
1073     .section __ex_table,"a"
1074     .align 4
1075     .long 1b,iret_exc
1076     .previous
1077     #else
1078     -ENTRY(nmi)
1079     +KPROBE_ENTRY(nmi)
1080     RING0_INT_FRAME
1081     pushl %eax
1082     CFI_ADJUST_CFA_OFFSET 4
1083     @@ -1081,6 +1097,7 @@
1084     jmp restore_all
1085     CFI_ENDPROC
1086     #endif
1087     +KPROBE_END(nmi)
1088    
1089     KPROBE_ENTRY(int3)
1090     RING0_INT_FRAME
1091     @@ -1092,7 +1109,7 @@
1092     call do_int3
1093     jmp ret_from_exception
1094     CFI_ENDPROC
1095     - .previous .text
1096     +KPROBE_END(int3)
1097    
1098     ENTRY(overflow)
1099     RING0_INT_FRAME
1100     @@ -1157,7 +1174,7 @@
1101     CFI_ADJUST_CFA_OFFSET 4
1102     jmp error_code
1103     CFI_ENDPROC
1104     - .previous .text
1105     +KPROBE_END(general_protection)
1106    
1107     ENTRY(alignment_check)
1108     RING0_EC_FRAME
1109     @@ -1166,13 +1183,14 @@
1110     jmp error_code
1111     CFI_ENDPROC
1112    
1113     -KPROBE_ENTRY(page_fault)
1114     - RING0_EC_FRAME
1115     - pushl $do_page_fault
1116     +ENTRY(divide_error)
1117     + RING0_INT_FRAME
1118     + pushl $0 # no error code
1119     + CFI_ADJUST_CFA_OFFSET 4
1120     + pushl $do_divide_error
1121     CFI_ADJUST_CFA_OFFSET 4
1122     jmp error_code
1123     CFI_ENDPROC
1124     - .previous .text
1125    
1126     #ifdef CONFIG_X86_MCE
1127     ENTRY(machine_check)
1128     @@ -1234,6 +1252,19 @@
1129     jmp error_code
1130     CFI_ENDPROC
1131    
1132     +ENTRY(kernel_thread_helper)
1133     + pushl $0 # fake return address for unwinder
1134     + CFI_STARTPROC
1135     + movl %edx,%eax
1136     + push %edx
1137     + CFI_ADJUST_CFA_OFFSET 4
1138     + call *%ebx
1139     + push %eax
1140     + CFI_ADJUST_CFA_OFFSET 4
1141     + call do_exit
1142     + CFI_ENDPROC
1143     +ENDPROC(kernel_thread_helper)
1144     +
1145     .section .rodata,"a"
1146     #include "syscall_table.S"
1147    
1148 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/entry_64-xen.S linux-2.6.25-xen/arch/x86/kernel/entry_64-xen.S
1149     --- linux-2.6.25/arch/x86/kernel/entry_64-xen.S 2008-05-23 20:51:11.000000000 +0200
1150     +++ linux-2.6.25-xen/arch/x86/kernel/entry_64-xen.S 2008-05-23 20:47:55.000000000 +0200
1151 niro 609 @@ -4,9 +4,6 @@
1152     * Copyright (C) 1991, 1992 Linus Torvalds
1153     * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
1154     * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
1155     - *
1156 niro 611 - * $Id: 1020-2.6.25-xen-patch-2.6.19.patch,v 1.2 2008-05-23 18:49:01 niro Exp $
1157 niro 609 - *
1158     * Jun Nakajima <jun.nakajima@intel.com>
1159     * Asit Mallick <asit.k.mallick@intel.com>
1160     * Modified for Xen
1161     @@ -26,15 +23,25 @@
1162     * at the top of the kernel process stack.
1163     * - partial stack frame: partially saved registers upto R11.
1164     * - full stack frame: Like partial stack frame, but all register saved.
1165     - *
1166     - * TODO:
1167     - * - schedule it carefully for the final hardware.
1168     + *
1169     + * Some macro usage:
1170     + * - CFI macros are used to generate dwarf2 unwind information for better
1171     + * backtraces. They don't change any code.
1172     + * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
1173     + * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
1174     + * There are unfortunately lots of special cases where some registers
1175     + * not touched. The macro is a big mess that should be cleaned up.
1176     + * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
1177     + * Gives a full stack frame.
1178     + * - ENTRY/END Define functions in the symbol table.
1179     + * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
1180     + * frame that is otherwise undefined after a SYSCALL
1181     + * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
1182     + * - errorentry/paranoidentry/zeroentry - Define exception entry points.
1183     */
1184    
1185     -#define ASSEMBLY 1
1186     #include <linux/linkage.h>
1187     #include <asm/segment.h>
1188     -#include <asm/smp.h>
1189     #include <asm/cache.h>
1190     #include <asm/errno.h>
1191     #include <asm/dwarf2.h>
1192     @@ -117,6 +124,7 @@
1193     .macro CFI_DEFAULT_STACK start=1,adj=0
1194     .if \start
1195     CFI_STARTPROC simple
1196     + CFI_SIGNAL_FRAME
1197     CFI_DEF_CFA rsp,SS+8-(\adj*ARGOFFSET)
1198     .else
1199     CFI_DEF_CFA_OFFSET SS+8-(\adj*ARGOFFSET)
1200     @@ -207,6 +215,7 @@
1201     */
1202     .macro _frame ref
1203     CFI_STARTPROC simple
1204     + CFI_SIGNAL_FRAME
1205     CFI_DEF_CFA rsp,SS+8-\ref
1206     /*CFI_REL_OFFSET ss,SS-\ref*/
1207     CFI_REL_OFFSET rsp,RSP-\ref
1208 niro 611 @@ -255,6 +264,8 @@
1209     CFI_REMEMBER_STATE
1210     jnz tracesys
1211 niro 609 cmpq $__NR_syscall_max,%rax
1212     + movq $-ENOSYS,%rcx
1213     + cmova %rcx,%rax
1214 niro 611 ja badsys
1215     movq %r10,%rcx
1216     call *sys_call_table(,%rax,8) # XXX: rip relative
1217 niro 609 @@ -349,6 +360,7 @@
1218     */
1219     ENTRY(int_ret_from_sys_call)
1220     CFI_STARTPROC simple
1221     + CFI_SIGNAL_FRAME
1222     CFI_DEF_CFA rsp,SS+8-ARGOFFSET
1223     /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
1224     CFI_REL_OFFSET rsp,RSP-ARGOFFSET
1225     @@ -583,8 +595,7 @@
1226     #ifdef CONFIG_PREEMPT
1227     /* Returning to kernel space. Check if we need preemption */
1228     /* rcx: threadinfo. interrupts off. */
1229     - .p2align
1230     -retint_kernel:
1231     +ENTRY(retint_kernel)
1232     cmpl $0,threadinfo_preempt_count(%rcx)
1233     jnz retint_restore_args
1234     bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
1235     @@ -644,7 +655,6 @@
1236     END(call_function_interrupt)
1237     #endif
1238    
1239     -#ifdef CONFIG_X86_LOCAL_APIC
1240     ENTRY(apic_timer_interrupt)
1241     apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
1242     END(apic_timer_interrupt)
1243     @@ -656,7 +666,6 @@
1244     ENTRY(spurious_interrupt)
1245     apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
1246     END(spurious_interrupt)
1247     -#endif
1248     #endif /* !CONFIG_XEN */
1249    
1250     /*
1251     @@ -755,7 +764,9 @@
1252     testl $3,CS(%rsp)
1253     jnz paranoid_userspace\trace
1254     paranoid_swapgs\trace:
1255     + .if \trace
1256     TRACE_IRQS_IRETQ 0
1257     + .endif
1258     swapgs
1259     paranoid_restore\trace:
1260     RESTORE_ALL 8
1261     @@ -802,7 +813,7 @@
1262     * Exception entry point. This expects an error code/orig_rax on the stack
1263     * and the exception handler in %rax.
1264     */
1265     -ENTRY(error_entry)
1266     +KPROBE_ENTRY(error_entry)
1267     _frame RDI
1268     CFI_REL_OFFSET rax,0
1269     /* rdi slot contains rax, oldrax contains error code */
1270     @@ -896,7 +907,7 @@
1271     jmp error_sti
1272     #endif
1273     CFI_ENDPROC
1274     -END(error_entry)
1275     +KPROBE_END(error_entry)
1276    
1277     ENTRY(hypervisor_callback)
1278     zeroentry do_hypervisor_callback
1279     @@ -936,26 +947,6 @@
1280     CFI_ENDPROC
1281     END(do_hypervisor_callback)
1282    
1283     -#ifdef CONFIG_X86_LOCAL_APIC
1284     -KPROBE_ENTRY(nmi)
1285     - zeroentry do_nmi_callback
1286     -ENTRY(do_nmi_callback)
1287     - CFI_STARTPROC
1288     - addq $8, %rsp
1289     - CFI_ENDPROC
1290     - CFI_DEFAULT_STACK
1291     - call do_nmi
1292     - orl $NMI_MASK,EFLAGS(%rsp)
1293     - RESTORE_REST
1294     - XEN_BLOCK_EVENTS(%rsi)
1295     - TRACE_IRQS_OFF
1296     - GET_THREAD_INFO(%rcx)
1297     - jmp retint_restore_args
1298     - CFI_ENDPROC
1299     - .previous .text
1300     -END(nmi)
1301     -#endif
1302     -
1303     ALIGN
1304     restore_all_enable_events:
1305     CFI_DEFAULT_STACK adj=1
1306     @@ -1121,7 +1112,7 @@
1307     * do_sys_execve asm fallback arguments:
1308     * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
1309     */
1310     -ENTRY(execve)
1311     +ENTRY(kernel_execve)
1312     CFI_STARTPROC
1313     FAKE_STACK_FRAME $0
1314     SAVE_ALL
1315     @@ -1135,12 +1126,11 @@
1316     UNFAKE_STACK_FRAME
1317     ret
1318     CFI_ENDPROC
1319     -ENDPROC(execve)
1320     +ENDPROC(kernel_execve)
1321    
1322     KPROBE_ENTRY(page_fault)
1323     errorentry do_page_fault
1324     -END(page_fault)
1325     - .previous .text
1326     +KPROBE_END(page_fault)
1327    
1328     ENTRY(coprocessor_error)
1329     zeroentry do_coprocessor_error
1330     @@ -1162,25 +1152,25 @@
1331     zeroentry do_debug
1332     /* paranoidexit
1333     CFI_ENDPROC */
1334     -END(debug)
1335     - .previous .text
1336     +KPROBE_END(debug)
1337    
1338     -#if 0
1339     - /* runs on exception stack */
1340     KPROBE_ENTRY(nmi)
1341     - INTR_FRAME
1342     - pushq $-1
1343     - CFI_ADJUST_CFA_OFFSET 8
1344     - paranoidentry do_nmi, 0, 0
1345     -#ifdef CONFIG_TRACE_IRQFLAGS
1346     - paranoidexit 0
1347     -#else
1348     - jmp paranoid_exit1
1349     - CFI_ENDPROC
1350     -#endif
1351     -END(nmi)
1352     - .previous .text
1353     -#endif
1354     + zeroentry do_nmi_callback
1355     +KPROBE_END(nmi)
1356     +do_nmi_callback:
1357     + CFI_STARTPROC
1358     + addq $8, %rsp
1359     + CFI_ENDPROC
1360     + CFI_DEFAULT_STACK
1361     + call do_nmi
1362     + orl $NMI_MASK,EFLAGS(%rsp)
1363     + RESTORE_REST
1364     + XEN_BLOCK_EVENTS(%rsi)
1365     + TRACE_IRQS_OFF
1366     + GET_THREAD_INFO(%rcx)
1367     + jmp retint_restore_args
1368     + CFI_ENDPROC
1369     +END(do_nmi_callback)
1370    
1371     KPROBE_ENTRY(int3)
1372     /* INTR_FRAME
1373     @@ -1189,8 +1179,7 @@
1374     zeroentry do_int3
1375     /* jmp paranoid_exit1
1376     CFI_ENDPROC */
1377     -END(int3)
1378     - .previous .text
1379     +KPROBE_END(int3)
1380    
1381     ENTRY(overflow)
1382     zeroentry do_overflow
1383     @@ -1241,8 +1230,7 @@
1384    
1385     KPROBE_ENTRY(general_protection)
1386     errorentry do_general_protection
1387     -END(general_protection)
1388     - .previous .text
1389     +KPROBE_END(general_protection)
1390    
1391     ENTRY(alignment_check)
1392     errorentry do_alignment_check
1393 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/genapic_xen_64.c linux-2.6.25-xen/arch/x86/kernel/genapic_xen_64.c
1394     --- linux-2.6.25/arch/x86/kernel/genapic_xen_64.c 2008-05-23 20:51:11.000000000 +0200
1395     +++ linux-2.6.25-xen/arch/x86/kernel/genapic_xen_64.c 2008-05-23 20:39:03.000000000 +0200
1396 niro 609 @@ -71,6 +71,13 @@
1397     return cpu_online_map;
1398     }
1399    
1400     +static cpumask_t xen_vector_allocation_domain(int cpu)
1401     +{
1402     + cpumask_t domain = CPU_MASK_NONE;
1403     + cpu_set(cpu, domain);
1404     + return domain;
1405     +}
1406     +
1407     /*
1408     * Set up the logical destination ID.
1409     * Do nothing, not called now.
1410     @@ -147,8 +154,8 @@
1411     .int_delivery_mode = dest_LowestPrio,
1412     #endif
1413     .int_dest_mode = (APIC_DEST_LOGICAL != 0),
1414     - .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST,
1415     .target_cpus = xen_target_cpus,
1416     + .vector_allocation_domain = xen_vector_allocation_domain,
1417     #ifdef CONFIG_XEN_PRIVILEGED_GUEST
1418     .apic_id_registered = xen_apic_id_registered,
1419     #endif
1420 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/head_32-xen.S linux-2.6.25-xen/arch/x86/kernel/head_32-xen.S
1421     --- linux-2.6.25/arch/x86/kernel/head_32-xen.S 2008-05-23 20:51:11.000000000 +0200
1422     +++ linux-2.6.25-xen/arch/x86/kernel/head_32-xen.S 2008-05-23 20:39:03.000000000 +0200
1423     @@ -62,7 +62,7 @@
1424     movl %eax,%gs
1425     cld # gcc2 wants the direction flag cleared at all times
1426    
1427     - pushl %eax # fake return address
1428     + pushl $0 # fake return address for unwinder
1429     jmp start_kernel
1430    
1431     #define HYPERCALL_PAGE_OFFSET 0x1000
1432     diff -Naur linux-2.6.25/arch/x86/kernel/head64-xen.c linux-2.6.25-xen/arch/x86/kernel/head64-xen.c
1433     --- linux-2.6.25/arch/x86/kernel/head64-xen.c 2008-05-23 20:51:11.000000000 +0200
1434     +++ linux-2.6.25-xen/arch/x86/kernel/head64-xen.c 2008-05-23 20:39:03.000000000 +0200
1435 niro 609 @@ -54,11 +54,9 @@
1436     new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
1437     if (!new_data) {
1438     if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
1439     - printk("so old bootloader that it does not support commandline?!\n");
1440     return;
1441     }
1442     new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
1443     - printk("old bootloader convention, maybe loadlin?\n");
1444     }
1445     command_line = (char *) ((u64)(new_data));
1446     memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
1447     @@ -70,25 +68,6 @@
1448     memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
1449     saved_command_line[max_cmdline-1] = '\0';
1450     #endif
1451     - printk("Bootdata ok (command line is %s)\n", saved_command_line);
1452     -}
1453     -
1454     -static void __init setup_boot_cpu_data(void)
1455     -{
1456     - unsigned int dummy, eax;
1457     -
1458     - /* get vendor info */
1459     - cpuid(0, (unsigned int *)&boot_cpu_data.cpuid_level,
1460     - (unsigned int *)&boot_cpu_data.x86_vendor_id[0],
1461     - (unsigned int *)&boot_cpu_data.x86_vendor_id[8],
1462     - (unsigned int *)&boot_cpu_data.x86_vendor_id[4]);
1463     -
1464     - /* get cpu type */
1465     - cpuid(1, &eax, &dummy, &dummy,
1466     - (unsigned int *) &boot_cpu_data.x86_capability);
1467     - boot_cpu_data.x86 = (eax >> 8) & 0xf;
1468     - boot_cpu_data.x86_model = (eax >> 4) & 0xf;
1469     - boot_cpu_data.x86_mask = eax & 0xf;
1470     }
1471    
1472     #include <xen/interface/memory.h>
1473     @@ -101,7 +80,6 @@
1474     {
1475     struct xen_machphys_mapping mapping;
1476     unsigned long machine_to_phys_nr_ents;
1477     - char *s;
1478     int i;
1479    
1480     setup_xen_features();
1481     @@ -128,10 +106,7 @@
1482     asm volatile("lidt %0" :: "m" (idt_descr));
1483     #endif
1484    
1485     - /*
1486     - * This must be called really, really early:
1487     - */
1488     - lockdep_init();
1489     + early_printk("Kernel alive\n");
1490    
1491     for (i = 0; i < NR_CPUS; i++)
1492     cpu_pda(i) = &boot_cpu_pda[i];
1493     @@ -141,22 +116,5 @@
1494     #ifdef CONFIG_SMP
1495     cpu_set(0, cpu_online_map);
1496     #endif
1497     - s = strstr(saved_command_line, "earlyprintk=");
1498     - if (s != NULL)
1499     - setup_early_printk(strchr(s, '=') + 1);
1500     -#ifdef CONFIG_NUMA
1501     - s = strstr(saved_command_line, "numa=");
1502     - if (s != NULL)
1503     - numa_setup(s+5);
1504     -#endif
1505     -#ifdef CONFIG_X86_IO_APIC
1506     - if (strstr(saved_command_line, "disableapic"))
1507     - disable_apic = 1;
1508     -#endif
1509     - /* You need early console to see that */
1510     - if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE)
1511     - panic("Kernel too big for kernel mapping\n");
1512     -
1513     - setup_boot_cpu_data();
1514     start_kernel();
1515     }
1516 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/head_64-xen.S linux-2.6.25-xen/arch/x86/kernel/head_64-xen.S
1517     --- linux-2.6.25/arch/x86/kernel/head_64-xen.S 2008-05-23 20:51:11.000000000 +0200
1518     +++ linux-2.6.25-xen/arch/x86/kernel/head_64-xen.S 2008-05-23 20:49:19.000000000 +0200
1519 niro 609 @@ -5,9 +5,6 @@
1520     * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
1521     * Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
1522     * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
1523     - *
1524 niro 611 - * $Id: 1020-2.6.25-xen-patch-2.6.19.patch,v 1.2 2008-05-23 18:49:01 niro Exp $
1525 niro 609 - *
1526     * Jun Nakajima <jun.nakajima@intel.com>
1527     * Modified for Xen
1528     */
1529     @@ -149,7 +146,7 @@
1530     .quad 0,0 /* TSS */
1531     .quad 0,0 /* LDT */
1532     .quad 0,0,0 /* three TLS descriptors */
1533     - .quad 0 /* unused */
1534     + .quad 0x0000f40000000000 /* node/CPU stored in limit */
1535     gdt_end:
1536     /* asm/segment.h:GDT_ENTRIES must match this */
1537     /* This should be a multiple of the cache line size */
1538 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/io_apic_32-xen.c linux-2.6.25-xen/arch/x86/kernel/io_apic_32-xen.c
1539     --- linux-2.6.25/arch/x86/kernel/io_apic_32-xen.c 2008-05-23 20:51:11.000000000 +0200
1540     +++ linux-2.6.25-xen/arch/x86/kernel/io_apic_32-xen.c 2008-05-23 20:39:03.000000000 +0200
1541 niro 609 @@ -31,6 +31,9 @@
1542     #include <linux/acpi.h>
1543     #include <linux/module.h>
1544     #include <linux/sysdev.h>
1545     +#include <linux/pci.h>
1546     +#include <linux/msi.h>
1547     +#include <linux/htirq.h>
1548    
1549     #include <asm/io.h>
1550     #include <asm/smp.h>
1551     @@ -38,13 +41,15 @@
1552     #include <asm/timer.h>
1553     #include <asm/i8259.h>
1554     #include <asm/nmi.h>
1555     +#include <asm/msidef.h>
1556     +#include <asm/hypertransport.h>
1557    
1558     #include <mach_apic.h>
1559     +#include <mach_apicdef.h>
1560    
1561     #include "io_ports.h"
1562    
1563     #ifdef CONFIG_XEN
1564     -
1565     #include <xen/interface/xen.h>
1566     #include <xen/interface/physdev.h>
1567    
1568     @@ -55,32 +60,7 @@
1569    
1570     unsigned long io_apic_irqs;
1571    
1572     -static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
1573     -{
1574     - struct physdev_apic apic_op;
1575     - int ret;
1576     -
1577     - apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
1578     - apic_op.reg = reg;
1579     - ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
1580     - if (ret)
1581     - return ret;
1582     - return apic_op.value;
1583     -}
1584     -
1585     -static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
1586     -{
1587     - struct physdev_apic apic_op;
1588     -
1589     - apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
1590     - apic_op.reg = reg;
1591     - apic_op.value = value;
1592     - WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
1593     -}
1594     -
1595     -#define io_apic_read(a,r) xen_io_apic_read(a,r)
1596     -#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
1597     -
1598     +#define clear_IO_APIC() ((void)0)
1599     #endif /* CONFIG_XEN */
1600    
1601     int (*ioapic_renumber_irq)(int ioapic, int irq);
1602     @@ -105,7 +85,7 @@
1603     */
1604     int nr_ioapic_registers[MAX_IO_APICS];
1605    
1606     -int disable_timer_pin_1 __initdata;
1607     +static int disable_timer_pin_1 __initdata;
1608    
1609     /*
1610     * Rough estimation of how many shared IRQs there are, can
1611     @@ -125,12 +105,122 @@
1612     int apic, pin, next;
1613     } irq_2_pin[PIN_MAP_SIZE];
1614    
1615     -int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
1616     -#ifdef CONFIG_PCI_MSI
1617     -#define vector_to_irq(vector) \
1618     - (platform_legacy_irq(vector) ? vector : vector_irq[vector])
1619     +#ifndef CONFIG_XEN
1620     +struct io_apic {
1621     + unsigned int index;
1622     + unsigned int unused[3];
1623     + unsigned int data;
1624     +};
1625     +
1626     +static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
1627     +{
1628     + return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
1629     + + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
1630     +}
1631     +#endif
1632     +
1633     +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
1634     +{
1635     +#ifndef CONFIG_XEN
1636     + struct io_apic __iomem *io_apic = io_apic_base(apic);
1637     + writel(reg, &io_apic->index);
1638     + return readl(&io_apic->data);
1639     +#else
1640     + struct physdev_apic apic_op;
1641     + int ret;
1642     +
1643     + apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
1644     + apic_op.reg = reg;
1645     + ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
1646     + if (ret)
1647     + return ret;
1648     + return apic_op.value;
1649     +#endif
1650     +}
1651     +
1652     +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
1653     +{
1654     +#ifndef CONFIG_XEN
1655     + struct io_apic __iomem *io_apic = io_apic_base(apic);
1656     + writel(reg, &io_apic->index);
1657     + writel(value, &io_apic->data);
1658     +#else
1659     + struct physdev_apic apic_op;
1660     +
1661     + apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
1662     + apic_op.reg = reg;
1663     + apic_op.value = value;
1664     + WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
1665     +#endif
1666     +}
1667     +
1668     +#ifndef CONFIG_XEN
1669     +/*
1670     + * Re-write a value: to be used for read-modify-write
1671     + * cycles where the read already set up the index register.
1672     + *
1673     + * Older SiS APIC requires we rewrite the index register
1674     + */
1675     +static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
1676     +{
1677     + volatile struct io_apic *io_apic = io_apic_base(apic);
1678     + if (sis_apic_bug)
1679     + writel(reg, &io_apic->index);
1680     + writel(value, &io_apic->data);
1681     +}
1682     #else
1683     -#define vector_to_irq(vector) (vector)
1684     +#define io_apic_modify io_apic_write
1685     +#endif
1686     +
1687     +union entry_union {
1688     + struct { u32 w1, w2; };
1689     + struct IO_APIC_route_entry entry;
1690     +};
1691     +
1692     +static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
1693     +{
1694     + union entry_union eu;
1695     + unsigned long flags;
1696     + spin_lock_irqsave(&ioapic_lock, flags);
1697     + eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
1698     + eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
1699     + spin_unlock_irqrestore(&ioapic_lock, flags);
1700     + return eu.entry;
1701     +}
1702     +
1703     +/*
1704     + * When we write a new IO APIC routing entry, we need to write the high
1705     + * word first! If the mask bit in the low word is clear, we will enable
1706     + * the interrupt, and we need to make sure the entry is fully populated
1707     + * before that happens.
1708     + */
1709     +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
1710     +{
1711     + unsigned long flags;
1712     + union entry_union eu;
1713     + eu.entry = e;
1714     + spin_lock_irqsave(&ioapic_lock, flags);
1715     + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
1716     + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
1717     + spin_unlock_irqrestore(&ioapic_lock, flags);
1718     +}
1719     +
1720     +#ifndef CONFIG_XEN
1721     +/*
1722     + * When we mask an IO APIC routing entry, we need to write the low
1723     + * word first, in order to set the mask bit before we change the
1724     + * high bits!
1725     + */
1726     +static void ioapic_mask_entry(int apic, int pin)
1727     +{
1728     + unsigned long flags;
1729     + union entry_union eu = { .entry.mask = 1 };
1730     +
1731     + spin_lock_irqsave(&ioapic_lock, flags);
1732     + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
1733     + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
1734     + spin_unlock_irqrestore(&ioapic_lock, flags);
1735     +}
1736     #endif
1737    
1738     /*
1739     @@ -156,9 +246,7 @@
1740     entry->pin = pin;
1741     }
1742    
1743     -#ifdef CONFIG_XEN
1744     -#define clear_IO_APIC() ((void)0)
1745     -#else
1746     +#ifndef CONFIG_XEN
1747     /*
1748     * Reroute an IRQ to a different pin.
1749     */
1750     @@ -243,25 +331,16 @@
1751     static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
1752     {
1753     struct IO_APIC_route_entry entry;
1754     - unsigned long flags;
1755    
1756     /* Check delivery_mode to be sure we're not clearing an SMI pin */
1757     - spin_lock_irqsave(&ioapic_lock, flags);
1758     - *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
1759     - *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
1760     - spin_unlock_irqrestore(&ioapic_lock, flags);
1761     + entry = ioapic_read_entry(apic, pin);
1762     if (entry.delivery_mode == dest_SMI)
1763     return;
1764    
1765     /*
1766     * Disable it in the IO-APIC irq-routing table:
1767     */
1768     - memset(&entry, 0, sizeof(entry));
1769     - entry.mask = 1;
1770     - spin_lock_irqsave(&ioapic_lock, flags);
1771     - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
1772     - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
1773     - spin_unlock_irqrestore(&ioapic_lock, flags);
1774     + ioapic_mask_entry(apic, pin);
1775     }
1776    
1777     static void clear_IO_APIC (void)
1778     @@ -301,7 +380,7 @@
1779     break;
1780     entry = irq_2_pin + entry->next;
1781     }
1782     - set_irq_info(irq, cpumask);
1783     + set_native_irq_info(irq, cpumask);
1784     spin_unlock_irqrestore(&ioapic_lock, flags);
1785     }
1786    
1787     @@ -1207,40 +1286,40 @@
1788     /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
1789     u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
1790    
1791     -int assign_irq_vector(int irq)
1792     +static int __assign_irq_vector(int irq)
1793     {
1794     - unsigned long flags;
1795     int vector;
1796     struct physdev_irq irq_op;
1797    
1798     - BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
1799     -
1800     - spin_lock_irqsave(&vector_lock, flags);
1801     + BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
1802    
1803     - if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
1804     - spin_unlock_irqrestore(&vector_lock, flags);
1805     - return IO_APIC_VECTOR(irq);
1806     - }
1807     + if (irq_vector[irq] > 0)
1808     + return irq_vector[irq];
1809    
1810     irq_op.irq = irq;
1811     - if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
1812     - spin_unlock_irqrestore(&vector_lock, flags);
1813     + if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
1814     return -ENOSPC;
1815     - }
1816    
1817     vector = irq_op.vector;
1818     - vector_irq[vector] = irq;
1819     - if (irq != AUTO_ASSIGN)
1820     - IO_APIC_VECTOR(irq) = vector;
1821     + irq_vector[irq] = vector;
1822     +
1823     + return vector;
1824     +}
1825    
1826     +static int assign_irq_vector(int irq)
1827     +{
1828     + unsigned long flags;
1829     + int vector;
1830     +
1831     + spin_lock_irqsave(&vector_lock, flags);
1832     + vector = __assign_irq_vector(irq);
1833     spin_unlock_irqrestore(&vector_lock, flags);
1834    
1835     return vector;
1836     }
1837    
1838     #ifndef CONFIG_XEN
1839     -static struct hw_interrupt_type ioapic_level_type;
1840     -static struct hw_interrupt_type ioapic_edge_type;
1841     +static struct irq_chip ioapic_chip;
1842    
1843     #define IOAPIC_AUTO -1
1844     #define IOAPIC_EDGE 0
1845     @@ -1248,16 +1327,16 @@
1846    
1847     static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
1848     {
1849     - unsigned idx;
1850     -
1851     - idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
1852     -
1853     if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1854     trigger == IOAPIC_LEVEL)
1855     - irq_desc[idx].chip = &ioapic_level_type;
1856     - else
1857     - irq_desc[idx].chip = &ioapic_edge_type;
1858     - set_intr_gate(vector, interrupt[idx]);
1859     + set_irq_chip_and_handler_name(irq, &ioapic_chip,
1860     + handle_fasteoi_irq, "fasteoi");
1861     + else {
1862     + irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
1863     + set_irq_chip_and_handler_name(irq, &ioapic_chip,
1864     + handle_edge_irq, "edge");
1865     + }
1866     + set_intr_gate(vector, interrupt[irq]);
1867     }
1868     #else
1869     #define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
1870     @@ -1328,9 +1407,8 @@
1871     if (!apic && (irq < 16))
1872     disable_8259A_irq(irq);
1873     }
1874     + ioapic_write_entry(apic, pin, entry);
1875     spin_lock_irqsave(&ioapic_lock, flags);
1876     - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
1877     - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
1878     set_native_irq_info(irq, TARGET_CPUS);
1879     spin_unlock_irqrestore(&ioapic_lock, flags);
1880     }
1881     @@ -1347,7 +1425,6 @@
1882     static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
1883     {
1884     struct IO_APIC_route_entry entry;
1885     - unsigned long flags;
1886    
1887     memset(&entry,0,sizeof(entry));
1888    
1889     @@ -1372,15 +1449,13 @@
1890     * The timer IRQ doesn't have to know that behind the
1891     * scene we have a 8259A-master in AEOI mode ...
1892     */
1893     - irq_desc[0].chip = &ioapic_edge_type;
1894     + irq_desc[0].chip = &ioapic_chip;
1895     + set_irq_handler(0, handle_edge_irq);
1896    
1897     /*
1898     * Add it to the IO-APIC irq-routing table:
1899     */
1900     - spin_lock_irqsave(&ioapic_lock, flags);
1901     - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
1902     - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
1903     - spin_unlock_irqrestore(&ioapic_lock, flags);
1904     + ioapic_write_entry(apic, pin, entry);
1905    
1906     enable_8259A_irq(0);
1907     }
1908     @@ -1490,10 +1565,7 @@
1909     for (i = 0; i <= reg_01.bits.entries; i++) {
1910     struct IO_APIC_route_entry entry;
1911    
1912     - spin_lock_irqsave(&ioapic_lock, flags);
1913     - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
1914     - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
1915     - spin_unlock_irqrestore(&ioapic_lock, flags);
1916     + entry = ioapic_read_entry(apic, i);
1917    
1918     printk(KERN_DEBUG " %02x %03X %02X ",
1919     i,
1920     @@ -1513,17 +1585,12 @@
1921     );
1922     }
1923     }
1924     - if (use_pci_vector())
1925     - printk(KERN_INFO "Using vector-based indexing\n");
1926     printk(KERN_DEBUG "IRQ to pin mappings:\n");
1927     for (i = 0; i < NR_IRQS; i++) {
1928     struct irq_pin_list *entry = irq_2_pin + i;
1929     if (entry->pin < 0)
1930     continue;
1931     - if (use_pci_vector() && !platform_legacy_irq(i))
1932     - printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
1933     - else
1934     - printk(KERN_DEBUG "IRQ%d ", i);
1935     + printk(KERN_DEBUG "IRQ%d ", i);
1936     for (;;) {
1937     printk("-> %d:%d", entry->apic, entry->pin);
1938     if (!entry->next)
1939     @@ -1709,10 +1776,7 @@
1940     /* See if any of the pins is in ExtINT mode */
1941     for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1942     struct IO_APIC_route_entry entry;
1943     - spin_lock_irqsave(&ioapic_lock, flags);
1944     - *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
1945     - *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
1946     - spin_unlock_irqrestore(&ioapic_lock, flags);
1947     + entry = ioapic_read_entry(apic, pin);
1948    
1949    
1950     /* If the interrupt line is enabled and in ExtInt mode
1951     @@ -1770,7 +1834,6 @@
1952     */
1953     if (ioapic_i8259.pin != -1) {
1954     struct IO_APIC_route_entry entry;
1955     - unsigned long flags;
1956    
1957     memset(&entry, 0, sizeof(entry));
1958     entry.mask = 0; /* Enabled */
1959     @@ -1787,12 +1850,7 @@
1960     /*
1961     * Add it to the IO-APIC irq-routing table:
1962     */
1963     - spin_lock_irqsave(&ioapic_lock, flags);
1964     - io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
1965     - *(((int *)&entry)+1));
1966     - io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
1967     - *(((int *)&entry)+0));
1968     - spin_unlock_irqrestore(&ioapic_lock, flags);
1969     + ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1970     }
1971     disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1972     #endif
1973     @@ -1959,6 +2017,8 @@
1974     */
1975    
1976     /*
1977     + * Startup quirk:
1978     + *
1979     * Starting up a edge-triggered IO-APIC interrupt is
1980     * nasty - we need to make sure that we get the edge.
1981     * If it is already asserted for some reason, we need
1982     @@ -1966,8 +2026,10 @@
1983     *
1984     * This is not complete - we should be able to fake
1985     * an edge even if it isn't on the 8259A...
1986     + *
1987     + * (We do this for level-triggered IRQs too - it cannot hurt.)
1988     */
1989     -static unsigned int startup_edge_ioapic_irq(unsigned int irq)
1990     +static unsigned int startup_ioapic_irq(unsigned int irq)
1991     {
1992     int was_pending = 0;
1993     unsigned long flags;
1994     @@ -1984,47 +2046,18 @@
1995     return was_pending;
1996     }
1997    
1998     -/*
1999     - * Once we have recorded IRQ_PENDING already, we can mask the
2000     - * interrupt for real. This prevents IRQ storms from unhandled
2001     - * devices.
2002     - */
2003     -static void ack_edge_ioapic_irq(unsigned int irq)
2004     -{
2005     - move_irq(irq);
2006     - if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
2007     - == (IRQ_PENDING | IRQ_DISABLED))
2008     - mask_IO_APIC_irq(irq);
2009     - ack_APIC_irq();
2010     -}
2011     -
2012     -/*
2013     - * Level triggered interrupts can just be masked,
2014     - * and shutting down and starting up the interrupt
2015     - * is the same as enabling and disabling them -- except
2016     - * with a startup need to return a "was pending" value.
2017     - *
2018     - * Level triggered interrupts are special because we
2019     - * do not touch any IO-APIC register while handling
2020     - * them. We ack the APIC in the end-IRQ handler, not
2021     - * in the start-IRQ-handler. Protection against reentrance
2022     - * from the same interrupt is still provided, both by the
2023     - * generic IRQ layer and by the fact that an unacked local
2024     - * APIC does not accept IRQs.
2025     - */
2026     -static unsigned int startup_level_ioapic_irq (unsigned int irq)
2027     +static void ack_ioapic_irq(unsigned int irq)
2028     {
2029     - unmask_IO_APIC_irq(irq);
2030     -
2031     - return 0; /* don't check for pending */
2032     + move_native_irq(irq);
2033     + ack_APIC_irq();
2034     }
2035    
2036     -static void end_level_ioapic_irq (unsigned int irq)
2037     +static void ack_ioapic_quirk_irq(unsigned int irq)
2038     {
2039     unsigned long v;
2040     int i;
2041    
2042     - move_irq(irq);
2043     + move_native_irq(irq);
2044     /*
2045     * It appears there is an erratum which affects at least version 0x11
2046     * of I/O APIC (that's the 82093AA and cores integrated into various
2047     @@ -2044,7 +2077,7 @@
2048     * operation to prevent an edge-triggered interrupt escaping meanwhile.
2049     * The idea is from Manfred Spraul. --macro
2050     */
2051     - i = IO_APIC_VECTOR(irq);
2052     + i = irq_vector[irq];
2053    
2054     v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2055    
2056     @@ -2059,104 +2092,24 @@
2057     }
2058     }
2059    
2060     -#ifdef CONFIG_PCI_MSI
2061     -static unsigned int startup_edge_ioapic_vector(unsigned int vector)
2062     -{
2063     - int irq = vector_to_irq(vector);
2064     -
2065     - return startup_edge_ioapic_irq(irq);
2066     -}
2067     -
2068     -static void ack_edge_ioapic_vector(unsigned int vector)
2069     -{
2070     - int irq = vector_to_irq(vector);
2071     -
2072     - move_native_irq(vector);
2073     - ack_edge_ioapic_irq(irq);
2074     -}
2075     -
2076     -static unsigned int startup_level_ioapic_vector (unsigned int vector)
2077     -{
2078     - int irq = vector_to_irq(vector);
2079     -
2080     - return startup_level_ioapic_irq (irq);
2081     -}
2082     -
2083     -static void end_level_ioapic_vector (unsigned int vector)
2084     -{
2085     - int irq = vector_to_irq(vector);
2086     -
2087     - move_native_irq(vector);
2088     - end_level_ioapic_irq(irq);
2089     -}
2090     -
2091     -static void mask_IO_APIC_vector (unsigned int vector)
2092     -{
2093     - int irq = vector_to_irq(vector);
2094     -
2095     - mask_IO_APIC_irq(irq);
2096     -}
2097     -
2098     -static void unmask_IO_APIC_vector (unsigned int vector)
2099     -{
2100     - int irq = vector_to_irq(vector);
2101     -
2102     - unmask_IO_APIC_irq(irq);
2103     -}
2104     -
2105     -#ifdef CONFIG_SMP
2106     -static void set_ioapic_affinity_vector (unsigned int vector,
2107     - cpumask_t cpu_mask)
2108     -{
2109     - int irq = vector_to_irq(vector);
2110     -
2111     - set_native_irq_info(vector, cpu_mask);
2112     - set_ioapic_affinity_irq(irq, cpu_mask);
2113     -}
2114     -#endif
2115     -#endif
2116     -
2117     -static int ioapic_retrigger(unsigned int irq)
2118     +static int ioapic_retrigger_irq(unsigned int irq)
2119     {
2120     - send_IPI_self(IO_APIC_VECTOR(irq));
2121     + send_IPI_self(irq_vector[irq]);
2122    
2123     return 1;
2124     }
2125    
2126     -/*
2127     - * Level and edge triggered IO-APIC interrupts need different handling,
2128     - * so we use two separate IRQ descriptors. Edge triggered IRQs can be
2129     - * handled with the level-triggered descriptor, but that one has slightly
2130     - * more overhead. Level-triggered interrupts cannot be handled with the
2131     - * edge-triggered handler, without risking IRQ storms and other ugly
2132     - * races.
2133     - */
2134     -static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
2135     - .typename = "IO-APIC-edge",
2136     - .startup = startup_edge_ioapic,
2137     - .shutdown = shutdown_edge_ioapic,
2138     - .enable = enable_edge_ioapic,
2139     - .disable = disable_edge_ioapic,
2140     - .ack = ack_edge_ioapic,
2141     - .end = end_edge_ioapic,
2142     -#ifdef CONFIG_SMP
2143     - .set_affinity = set_ioapic_affinity,
2144     -#endif
2145     - .retrigger = ioapic_retrigger,
2146     -};
2147     -
2148     -static struct hw_interrupt_type ioapic_level_type __read_mostly = {
2149     - .typename = "IO-APIC-level",
2150     - .startup = startup_level_ioapic,
2151     - .shutdown = shutdown_level_ioapic,
2152     - .enable = enable_level_ioapic,
2153     - .disable = disable_level_ioapic,
2154     - .ack = mask_and_ack_level_ioapic,
2155     - .end = end_level_ioapic,
2156     +static struct irq_chip ioapic_chip __read_mostly = {
2157     + .name = "IO-APIC",
2158     + .startup = startup_ioapic_irq,
2159     + .mask = mask_IO_APIC_irq,
2160     + .unmask = unmask_IO_APIC_irq,
2161     + .ack = ack_ioapic_irq,
2162     + .eoi = ack_ioapic_quirk_irq,
2163     #ifdef CONFIG_SMP
2164     - .set_affinity = set_ioapic_affinity,
2165     + .set_affinity = set_ioapic_affinity_irq,
2166     #endif
2167     - .retrigger = ioapic_retrigger,
2168     + .retrigger = ioapic_retrigger_irq,
2169     };
2170     #endif /* !CONFIG_XEN */
2171    
2172     @@ -2177,12 +2130,7 @@
2173     */
2174     for (irq = 0; irq < NR_IRQS ; irq++) {
2175     int tmp = irq;
2176     - if (use_pci_vector()) {
2177     - if (!platform_legacy_irq(tmp))
2178     - if ((tmp = vector_to_irq(tmp)) == -1)
2179     - continue;
2180     - }
2181     - if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
2182     + if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
2183     /*
2184     * Hmm.. We don't have an entry for this,
2185     * so default to an old-fashioned 8259
2186     @@ -2193,22 +2141,23 @@
2187     #ifndef CONFIG_XEN
2188     else
2189     /* Strange. Oh, well.. */
2190     - irq_desc[irq].chip = &no_irq_type;
2191     + irq_desc[irq].chip = &no_irq_chip;
2192     #endif
2193     }
2194     }
2195     }
2196    
2197     #ifndef CONFIG_XEN
2198     -static void enable_lapic_irq (unsigned int irq)
2199     -{
2200     - unsigned long v;
2201     +/*
2202     + * The local APIC irq-chip implementation:
2203     + */
2204    
2205     - v = apic_read(APIC_LVT0);
2206     - apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
2207     +static void ack_apic(unsigned int irq)
2208     +{
2209     + ack_APIC_irq();
2210     }
2211    
2212     -static void disable_lapic_irq (unsigned int irq)
2213     +static void mask_lapic_irq (unsigned int irq)
2214     {
2215     unsigned long v;
2216    
2217     @@ -2216,21 +2165,19 @@
2218     apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
2219     }
2220    
2221     -static void ack_lapic_irq (unsigned int irq)
2222     +static void unmask_lapic_irq (unsigned int irq)
2223     {
2224     - ack_APIC_irq();
2225     -}
2226     + unsigned long v;
2227    
2228     -static void end_lapic_irq (unsigned int i) { /* nothing */ }
2229     + v = apic_read(APIC_LVT0);
2230     + apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
2231     +}
2232    
2233     -static struct hw_interrupt_type lapic_irq_type __read_mostly = {
2234     - .typename = "local-APIC-edge",
2235     - .startup = NULL, /* startup_irq() not used for IRQ0 */
2236     - .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
2237     - .enable = enable_lapic_irq,
2238     - .disable = disable_lapic_irq,
2239     - .ack = ack_lapic_irq,
2240     - .end = end_lapic_irq
2241     +static struct irq_chip lapic_chip __read_mostly = {
2242     + .name = "local-APIC-edge",
2243     + .mask = mask_lapic_irq,
2244     + .unmask = unmask_lapic_irq,
2245     + .eoi = ack_apic,
2246     };
2247    
2248     static void setup_nmi (void)
2249     @@ -2263,17 +2210,13 @@
2250     int apic, pin, i;
2251     struct IO_APIC_route_entry entry0, entry1;
2252     unsigned char save_control, save_freq_select;
2253     - unsigned long flags;
2254    
2255     pin = find_isa_irq_pin(8, mp_INT);
2256     apic = find_isa_irq_apic(8, mp_INT);
2257     if (pin == -1)
2258     return;
2259    
2260     - spin_lock_irqsave(&ioapic_lock, flags);
2261     - *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
2262     - *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
2263     - spin_unlock_irqrestore(&ioapic_lock, flags);
2264     + entry0 = ioapic_read_entry(apic, pin);
2265     clear_IO_APIC_pin(apic, pin);
2266    
2267     memset(&entry1, 0, sizeof(entry1));
2268     @@ -2286,10 +2229,7 @@
2269     entry1.trigger = 0;
2270     entry1.vector = 0;
2271    
2272     - spin_lock_irqsave(&ioapic_lock, flags);
2273     - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
2274     - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
2275     - spin_unlock_irqrestore(&ioapic_lock, flags);
2276     + ioapic_write_entry(apic, pin, entry1);
2277    
2278     save_control = CMOS_READ(RTC_CONTROL);
2279     save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
2280     @@ -2308,10 +2248,7 @@
2281     CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
2282     clear_IO_APIC_pin(apic, pin);
2283    
2284     - spin_lock_irqsave(&ioapic_lock, flags);
2285     - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
2286     - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
2287     - spin_unlock_irqrestore(&ioapic_lock, flags);
2288     + ioapic_write_entry(apic, pin, entry0);
2289     }
2290    
2291     int timer_uses_ioapic_pin_0;
2292     @@ -2411,7 +2348,8 @@
2293     printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
2294    
2295     disable_8259A_irq(0);
2296     - irq_desc[0].chip = &lapic_irq_type;
2297     + set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
2298     + "fasteio");
2299     apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
2300     enable_8259A_irq(0);
2301    
2302     @@ -2523,17 +2461,12 @@
2303     {
2304     struct IO_APIC_route_entry *entry;
2305     struct sysfs_ioapic_data *data;
2306     - unsigned long flags;
2307     int i;
2308    
2309     data = container_of(dev, struct sysfs_ioapic_data, dev);
2310     entry = data->entry;
2311     - spin_lock_irqsave(&ioapic_lock, flags);
2312     - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
2313     - *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
2314     - *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
2315     - }
2316     - spin_unlock_irqrestore(&ioapic_lock, flags);
2317     + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
2318     + entry[i] = ioapic_read_entry(dev->id, i);
2319    
2320     return 0;
2321     }
2322     @@ -2555,11 +2488,9 @@
2323     reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
2324     io_apic_write(dev->id, 0, reg_00.raw);
2325     }
2326     - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
2327     - io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
2328     - io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
2329     - }
2330     spin_unlock_irqrestore(&ioapic_lock, flags);
2331     + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
2332     + ioapic_write_entry(dev->id, i, entry[i]);
2333    
2334     return 0;
2335     }
2336     @@ -2605,6 +2536,240 @@
2337    
2338     device_initcall(ioapic_init_sysfs);
2339    
2340     +#ifndef CONFIG_XEN
2341     +/*
2342     + * Dynamic irq allocate and deallocation
2343     + */
2344     +int create_irq(void)
2345     +{
2346     + /* Allocate an unused irq */
2347     + int irq, new, vector;
2348     + unsigned long flags;
2349     +
2350     + irq = -ENOSPC;
2351     + spin_lock_irqsave(&vector_lock, flags);
2352     + for (new = (NR_IRQS - 1); new >= 0; new--) {
2353     + if (platform_legacy_irq(new))
2354     + continue;
2355     + if (irq_vector[new] != 0)
2356     + continue;
2357     + vector = __assign_irq_vector(new);
2358     + if (likely(vector > 0))
2359     + irq = new;
2360     + break;
2361     + }
2362     + spin_unlock_irqrestore(&vector_lock, flags);
2363     +
2364     + if (irq >= 0) {
2365     + set_intr_gate(vector, interrupt[irq]);
2366     + dynamic_irq_init(irq);
2367     + }
2368     + return irq;
2369     +}
2370     +
2371     +void destroy_irq(unsigned int irq)
2372     +{
2373     + unsigned long flags;
2374     +
2375     + dynamic_irq_cleanup(irq);
2376     +
2377     + spin_lock_irqsave(&vector_lock, flags);
2378     + irq_vector[irq] = 0;
2379     + spin_unlock_irqrestore(&vector_lock, flags);
2380     +}
2381     +#endif
2382     +
2383     +/*
2384     + * MSI mesage composition
2385     + */
2386     +#ifdef CONFIG_PCI_MSI
2387     +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
2388     +{
2389     + int vector;
2390     + unsigned dest;
2391     +
2392     + vector = assign_irq_vector(irq);
2393     + if (vector >= 0) {
2394     + dest = cpu_mask_to_apicid(TARGET_CPUS);
2395     +
2396     + msg->address_hi = MSI_ADDR_BASE_HI;
2397     + msg->address_lo =
2398     + MSI_ADDR_BASE_LO |
2399     + ((INT_DEST_MODE == 0) ?
2400     + MSI_ADDR_DEST_MODE_PHYSICAL:
2401     + MSI_ADDR_DEST_MODE_LOGICAL) |
2402     + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2403     + MSI_ADDR_REDIRECTION_CPU:
2404     + MSI_ADDR_REDIRECTION_LOWPRI) |
2405     + MSI_ADDR_DEST_ID(dest);
2406     +
2407     + msg->data =
2408     + MSI_DATA_TRIGGER_EDGE |
2409     + MSI_DATA_LEVEL_ASSERT |
2410     + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2411     + MSI_DATA_DELIVERY_FIXED:
2412     + MSI_DATA_DELIVERY_LOWPRI) |
2413     + MSI_DATA_VECTOR(vector);
2414     + }
2415     + return vector;
2416     +}
2417     +
2418     +#ifdef CONFIG_SMP
2419     +static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2420     +{
2421     + struct msi_msg msg;
2422     + unsigned int dest;
2423     + cpumask_t tmp;
2424     + int vector;
2425     +
2426     + cpus_and(tmp, mask, cpu_online_map);
2427     + if (cpus_empty(tmp))
2428     + tmp = TARGET_CPUS;
2429     +
2430     + vector = assign_irq_vector(irq);
2431     + if (vector < 0)
2432     + return;
2433     +
2434     + dest = cpu_mask_to_apicid(mask);
2435     +
2436     + read_msi_msg(irq, &msg);
2437     +
2438     + msg.data &= ~MSI_DATA_VECTOR_MASK;
2439     + msg.data |= MSI_DATA_VECTOR(vector);
2440     + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
2441     + msg.address_lo |= MSI_ADDR_DEST_ID(dest);
2442     +
2443     + write_msi_msg(irq, &msg);
2444     + set_native_irq_info(irq, mask);
2445     +}
2446     +#endif /* CONFIG_SMP */
2447     +
2448     +/*
2449     + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
2450     + * which implement the MSI or MSI-X Capability Structure.
2451     + */
2452     +static struct irq_chip msi_chip = {
2453     + .name = "PCI-MSI",
2454     + .unmask = unmask_msi_irq,
2455     + .mask = mask_msi_irq,
2456     + .ack = ack_ioapic_irq,
2457     +#ifdef CONFIG_SMP
2458     + .set_affinity = set_msi_irq_affinity,
2459     +#endif
2460     + .retrigger = ioapic_retrigger_irq,
2461     +};
2462     +
2463     +int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
2464     +{
2465     + struct msi_msg msg;
2466     + int ret;
2467     + ret = msi_compose_msg(dev, irq, &msg);
2468     + if (ret < 0)
2469     + return ret;
2470     +
2471     + write_msi_msg(irq, &msg);
2472     +
2473     + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
2474     + "edge");
2475     +
2476     + return 0;
2477     +}
2478     +
2479     +void arch_teardown_msi_irq(unsigned int irq)
2480     +{
2481     + return;
2482     +}
2483     +
2484     +#endif /* CONFIG_PCI_MSI */
2485     +
2486     +/*
2487     + * Hypertransport interrupt support
2488     + */
2489     +#ifdef CONFIG_HT_IRQ
2490     +
2491     +#ifdef CONFIG_SMP
2492     +
2493     +static void target_ht_irq(unsigned int irq, unsigned int dest)
2494     +{
2495     + struct ht_irq_msg msg;
2496     + fetch_ht_irq_msg(irq, &msg);
2497     +
2498     + msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
2499     + msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
2500     +
2501     + msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
2502     + msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
2503     +
2504     + write_ht_irq_msg(irq, &msg);
2505     +}
2506     +
2507     +static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
2508     +{
2509     + unsigned int dest;
2510     + cpumask_t tmp;
2511     +
2512     + cpus_and(tmp, mask, cpu_online_map);
2513     + if (cpus_empty(tmp))
2514     + tmp = TARGET_CPUS;
2515     +
2516     + cpus_and(mask, tmp, CPU_MASK_ALL);
2517     +
2518     + dest = cpu_mask_to_apicid(mask);
2519     +
2520     + target_ht_irq(irq, dest);
2521     + set_native_irq_info(irq, mask);
2522     +}
2523     +#endif
2524     +
2525     +static struct irq_chip ht_irq_chip = {
2526     + .name = "PCI-HT",
2527     + .mask = mask_ht_irq,
2528     + .unmask = unmask_ht_irq,
2529     + .ack = ack_ioapic_irq,
2530     +#ifdef CONFIG_SMP
2531     + .set_affinity = set_ht_irq_affinity,
2532     +#endif
2533     + .retrigger = ioapic_retrigger_irq,
2534     +};
2535     +
2536     +int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
2537     +{
2538     + int vector;
2539     +
2540     + vector = assign_irq_vector(irq);
2541     + if (vector >= 0) {
2542     + struct ht_irq_msg msg;
2543     + unsigned dest;
2544     + cpumask_t tmp;
2545     +
2546     + cpus_clear(tmp);
2547     + cpu_set(vector >> 8, tmp);
2548     + dest = cpu_mask_to_apicid(tmp);
2549     +
2550     + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
2551     +
2552     + msg.address_lo =
2553     + HT_IRQ_LOW_BASE |
2554     + HT_IRQ_LOW_DEST_ID(dest) |
2555     + HT_IRQ_LOW_VECTOR(vector) |
2556     + ((INT_DEST_MODE == 0) ?
2557     + HT_IRQ_LOW_DM_PHYSICAL :
2558     + HT_IRQ_LOW_DM_LOGICAL) |
2559     + HT_IRQ_LOW_RQEOI_EDGE |
2560     + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2561     + HT_IRQ_LOW_MT_FIXED :
2562     + HT_IRQ_LOW_MT_ARBITRATED) |
2563     + HT_IRQ_LOW_IRQ_MASKED;
2564     +
2565     + write_ht_irq_msg(irq, &msg);
2566     +
2567     + set_irq_chip_and_handler_name(irq, &ht_irq_chip,
2568     + handle_edge_irq, "edge");
2569     + }
2570     + return vector;
2571     +}
2572     +#endif /* CONFIG_HT_IRQ */
2573     +
2574     /* --------------------------------------------------------------------------
2575     ACPI-based IOAPIC Configuration
2576     -------------------------------------------------------------------------- */
2577     @@ -2758,13 +2923,34 @@
2578     if (!ioapic && (irq < 16))
2579     disable_8259A_irq(irq);
2580    
2581     + ioapic_write_entry(ioapic, pin, entry);
2582     spin_lock_irqsave(&ioapic_lock, flags);
2583     - io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
2584     - io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
2585     - set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
2586     + set_native_irq_info(irq, TARGET_CPUS);
2587     spin_unlock_irqrestore(&ioapic_lock, flags);
2588    
2589     return 0;
2590     }
2591    
2592     #endif /* CONFIG_ACPI */
2593     +
2594     +static int __init parse_disable_timer_pin_1(char *arg)
2595     +{
2596     + disable_timer_pin_1 = 1;
2597     + return 0;
2598     +}
2599     +early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
2600     +
2601     +static int __init parse_enable_timer_pin_1(char *arg)
2602     +{
2603     + disable_timer_pin_1 = -1;
2604     + return 0;
2605     +}
2606     +early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
2607     +
2608     +static int __init parse_noapic(char *arg)
2609     +{
2610     + /* disable IO-APIC */
2611     + disable_ioapic_setup();
2612     + return 0;
2613     +}
2614     +early_param("noapic", parse_noapic);
2615 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/io_apic_64-xen.c linux-2.6.25-xen/arch/x86/kernel/io_apic_64-xen.c
2616     --- linux-2.6.25/arch/x86/kernel/io_apic_64-xen.c 2008-05-23 20:51:11.000000000 +0200
2617     +++ linux-2.6.25-xen/arch/x86/kernel/io_apic_64-xen.c 2008-05-23 20:39:03.000000000 +0200
2618 niro 609 @@ -26,9 +26,12 @@
2619     #include <linux/delay.h>
2620     #include <linux/sched.h>
2621     #include <linux/smp_lock.h>
2622     +#include <linux/pci.h>
2623     #include <linux/mc146818rtc.h>
2624     #include <linux/acpi.h>
2625     #include <linux/sysdev.h>
2626     +#include <linux/msi.h>
2627     +#include <linux/htirq.h>
2628     #ifdef CONFIG_ACPI
2629     #include <acpi/acpi_bus.h>
2630     #endif
2631     @@ -41,6 +44,10 @@
2632     #include <asm/acpi.h>
2633     #include <asm/dma.h>
2634     #include <asm/nmi.h>
2635     +#include <asm/msidef.h>
2636     +#include <asm/hypertransport.h>
2637     +
2638     +static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result);
2639    
2640     #define __apicdebuginit __init
2641    
2642     @@ -48,17 +55,29 @@
2643    
2644     static int no_timer_check;
2645    
2646     -int disable_timer_pin_1 __initdata;
2647     +static int disable_timer_pin_1 __initdata;
2648    
2649     -#ifndef CONFIG_XEN
2650     -int timer_over_8254 __initdata = 0;
2651     +#ifdef CONFIG_XEN
2652     +#include <xen/interface/xen.h>
2653     +#include <xen/interface/physdev.h>
2654     +
2655     +/* Fake i8259 */
2656     +#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
2657     +#define disable_8259A_irq(_irq) ((void)0)
2658     +#define i8259A_irq_pending(_irq) (0)
2659     +
2660     +unsigned long io_apic_irqs;
2661     +
2662     +#define clear_IO_APIC() ((void)0)
2663     +#else
2664     +int timer_over_8254 __initdata = 1;
2665    
2666     /* Where if anywhere is the i8259 connect in external int mode */
2667     static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
2668     #endif
2669    
2670     static DEFINE_SPINLOCK(ioapic_lock);
2671     -static DEFINE_SPINLOCK(vector_lock);
2672     +DEFINE_SPINLOCK(vector_lock);
2673    
2674     /*
2675     * # of IRQ routing registers
2676     @@ -83,28 +102,27 @@
2677     short apic, pin, next;
2678     } irq_2_pin[PIN_MAP_SIZE];
2679    
2680     -int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
2681     -#ifdef CONFIG_PCI_MSI
2682     -#define vector_to_irq(vector) \
2683     - (platform_legacy_irq(vector) ? vector : vector_irq[vector])
2684     -#else
2685     -#define vector_to_irq(vector) (vector)
2686     -#endif
2687     -
2688     -#ifdef CONFIG_XEN
2689     -
2690     -#include <xen/interface/xen.h>
2691     -#include <xen/interface/physdev.h>
2692     -
2693     -/* Fake i8259 */
2694     -#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
2695     -#define disable_8259A_irq(_irq) ((void)0)
2696     -#define i8259A_irq_pending(_irq) (0)
2697     +#ifndef CONFIG_XEN
2698     +struct io_apic {
2699     + unsigned int index;
2700     + unsigned int unused[3];
2701     + unsigned int data;
2702     +};
2703    
2704     -unsigned long io_apic_irqs;
2705     +static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
2706     +{
2707     + return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
2708     + + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
2709     +}
2710     +#endif
2711    
2712     -static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
2713     +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
2714     {
2715     +#ifndef CONFIG_XEN
2716     + struct io_apic __iomem *io_apic = io_apic_base(apic);
2717     + writel(reg, &io_apic->index);
2718     + return readl(&io_apic->data);
2719     +#else
2720     struct physdev_apic apic_op;
2721     int ret;
2722    
2723     @@ -114,31 +132,131 @@
2724     if (ret)
2725     return ret;
2726     return apic_op.value;
2727     +#endif
2728     }
2729    
2730     -static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
2731     +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
2732     {
2733     +#ifndef CONFIG_XEN
2734     + struct io_apic __iomem *io_apic = io_apic_base(apic);
2735     + writel(reg, &io_apic->index);
2736     + writel(value, &io_apic->data);
2737     +#else
2738     struct physdev_apic apic_op;
2739    
2740     apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
2741     apic_op.reg = reg;
2742     apic_op.value = value;
2743     WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
2744     +#endif
2745 niro 611 +}
2746     +
2747 niro 609 +#ifndef CONFIG_XEN
2748     +/*
2749     + * Re-write a value: to be used for read-modify-write
2750     + * cycles where the read already set up the index register.
2751     + */
2752     +static inline void io_apic_modify(unsigned int apic, unsigned int value)
2753     +{
2754     + struct io_apic __iomem *io_apic = io_apic_base(apic);
2755     + writel(value, &io_apic->data);
2756 niro 611 }
2757 niro 609 +#else
2758     +#define io_apic_modify io_apic_write
2759     +#endif
2760    
2761 niro 611 -#define io_apic_read(a,r) xen_io_apic_read(a,r)
2762     -#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
2763 niro 609 +/*
2764     + * Synchronize the IO-APIC and the CPU by doing
2765     + * a dummy read from the IO-APIC
2766     + */
2767     +static inline void io_apic_sync(unsigned int apic)
2768     +{
2769     +#ifndef CONFIG_XEN
2770     + struct io_apic __iomem *io_apic = io_apic_base(apic);
2771     + readl(&io_apic->data);
2772     +#endif
2773     +}
2774    
2775 niro 611 -#define clear_IO_APIC() ((void)0)
2776 niro 609 +union entry_union {
2777     + struct { u32 w1, w2; };
2778     + struct IO_APIC_route_entry entry;
2779     +};
2780 niro 611
2781     -#else
2782 niro 609 +static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
2783     +{
2784     + union entry_union eu;
2785     + unsigned long flags;
2786     + spin_lock_irqsave(&ioapic_lock, flags);
2787     + eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
2788     + eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
2789     + spin_unlock_irqrestore(&ioapic_lock, flags);
2790     + return eu.entry;
2791     +}
2792     +
2793     +/*
2794     + * When we write a new IO APIC routing entry, we need to write the high
2795     + * word first! If the mask bit in the low word is clear, we will enable
2796     + * the interrupt, and we need to make sure the entry is fully populated
2797     + * before that happens.
2798     + */
2799     +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
2800     +{
2801     + unsigned long flags;
2802     + union entry_union eu;
2803     + eu.entry = e;
2804     + spin_lock_irqsave(&ioapic_lock, flags);
2805     + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
2806     + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
2807     + spin_unlock_irqrestore(&ioapic_lock, flags);
2808     +}
2809     +
2810     +#ifndef CONFIG_XEN
2811     +/*
2812     + * When we mask an IO APIC routing entry, we need to write the low
2813     + * word first, in order to set the mask bit before we change the
2814     + * high bits!
2815     + */
2816     +static void ioapic_mask_entry(int apic, int pin)
2817     +{
2818     + unsigned long flags;
2819     + union entry_union eu = { .entry.mask = 1 };
2820     +
2821     + spin_lock_irqsave(&ioapic_lock, flags);
2822     + io_apic_write(apic, 0x10 + 2*pin, eu.w1);
2823     + io_apic_write(apic, 0x11 + 2*pin, eu.w2);
2824     + spin_unlock_irqrestore(&ioapic_lock, flags);
2825     +}
2826    
2827     #ifdef CONFIG_SMP
2828     +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
2829     +{
2830     + int apic, pin;
2831     + struct irq_pin_list *entry = irq_2_pin + irq;
2832     +
2833     + BUG_ON(irq >= NR_IRQS);
2834     + for (;;) {
2835     + unsigned int reg;
2836     + apic = entry->apic;
2837     + pin = entry->pin;
2838     + if (pin == -1)
2839     + break;
2840     + io_apic_write(apic, 0x11 + pin*2, dest);
2841     + reg = io_apic_read(apic, 0x10 + pin*2);
2842     + reg &= ~0x000000ff;
2843     + reg |= vector;
2844     + io_apic_modify(apic, reg);
2845     + if (!entry->next)
2846     + break;
2847     + entry = irq_2_pin + entry->next;
2848     + }
2849     +}
2850     +
2851     static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
2852     {
2853     unsigned long flags;
2854     unsigned int dest;
2855     cpumask_t tmp;
2856     + int vector;
2857    
2858     cpus_and(tmp, mask, cpu_online_map);
2859     if (cpus_empty(tmp))
2860     @@ -146,7 +264,11 @@
2861    
2862     cpus_and(mask, tmp, CPU_MASK_ALL);
2863    
2864     - dest = cpu_mask_to_apicid(mask);
2865     + vector = assign_irq_vector(irq, mask, &tmp);
2866     + if (vector < 0)
2867     + return;
2868     +
2869     + dest = cpu_mask_to_apicid(tmp);
2870    
2871     /*
2872     * Only the high 8 bits are valid.
2873     @@ -154,13 +276,12 @@
2874     dest = SET_APIC_LOGICAL_ID(dest);
2875    
2876     spin_lock_irqsave(&ioapic_lock, flags);
2877     - __DO_ACTION(1, = dest, )
2878     - set_irq_info(irq, mask);
2879     + __target_IO_APIC_irq(irq, dest, vector);
2880     + set_native_irq_info(irq, mask);
2881     spin_unlock_irqrestore(&ioapic_lock, flags);
2882     }
2883     #endif
2884     -
2885     -#endif /* !CONFIG_XEN */
2886     +#endif
2887    
2888     /*
2889     * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
2890     @@ -240,24 +361,15 @@
2891     static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
2892     {
2893     struct IO_APIC_route_entry entry;
2894     - unsigned long flags;
2895    
2896     /* Check delivery_mode to be sure we're not clearing an SMI pin */
2897     - spin_lock_irqsave(&ioapic_lock, flags);
2898     - *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
2899     - *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
2900     - spin_unlock_irqrestore(&ioapic_lock, flags);
2901     + entry = ioapic_read_entry(apic, pin);
2902     if (entry.delivery_mode == dest_SMI)
2903     return;
2904     /*
2905     * Disable it in the IO-APIC irq-routing table:
2906     */
2907     - memset(&entry, 0, sizeof(entry));
2908     - entry.mask = 1;
2909     - spin_lock_irqsave(&ioapic_lock, flags);
2910     - io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
2911     - io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
2912     - spin_unlock_irqrestore(&ioapic_lock, flags);
2913     + ioapic_mask_entry(apic, pin);
2914     }
2915    
2916     static void clear_IO_APIC (void)
2917     @@ -271,16 +383,6 @@
2918    
2919     #endif /* !CONFIG_XEN */
2920    
2921     -static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
2922     -
2923     -/*
2924     - * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
2925     - * specific CPU-side IRQs.
2926     - */
2927     -
2928     -#define MAX_PIRQS 8
2929     -static int pirq_entries [MAX_PIRQS];
2930     -static int pirqs_enabled;
2931     int skip_ioapic_setup;
2932     int ioapic_force;
2933    
2934     @@ -289,18 +391,17 @@
2935     static int __init disable_ioapic_setup(char *str)
2936     {
2937     skip_ioapic_setup = 1;
2938     - return 1;
2939     + return 0;
2940     }
2941     +early_param("noapic", disable_ioapic_setup);
2942    
2943     -static int __init enable_ioapic_setup(char *str)
2944     +/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
2945     +static int __init disable_timer_pin_setup(char *arg)
2946     {
2947     - ioapic_force = 1;
2948     - skip_ioapic_setup = 0;
2949     + disable_timer_pin_1 = 1;
2950     return 1;
2951     }
2952     -
2953     -__setup("noapic", disable_ioapic_setup);
2954     -__setup("apic", enable_ioapic_setup);
2955     +__setup("disable_timer_pin_1", disable_timer_pin_setup);
2956    
2957     #ifndef CONFIG_XEN
2958     static int __init setup_disable_8254_timer(char *s)
2959     @@ -318,137 +419,6 @@
2960     __setup("enable_8254_timer", setup_enable_8254_timer);
2961     #endif /* !CONFIG_XEN */
2962    
2963     -#include <asm/pci-direct.h>
2964     -#include <linux/pci_ids.h>
2965     -#include <linux/pci.h>
2966     -
2967     -
2968     -#ifdef CONFIG_ACPI
2969     -
2970     -static int nvidia_hpet_detected __initdata;
2971     -
2972     -static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
2973     -{
2974     - nvidia_hpet_detected = 1;
2975     - return 0;
2976     -}
2977     -#endif
2978     -
2979     -/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC
2980     - off. Check for an Nvidia or VIA PCI bridge and turn it off.
2981     - Use pci direct infrastructure because this runs before the PCI subsystem.
2982     -
2983     - Can be overwritten with "apic"
2984     -
2985     - And another hack to disable the IOMMU on VIA chipsets.
2986     -
2987     - ... and others. Really should move this somewhere else.
2988     -
2989     - Kludge-O-Rama. */
2990     -void __init check_ioapic(void)
2991     -{
2992     - int num,slot,func;
2993     - /* Poor man's PCI discovery */
2994     - for (num = 0; num < 32; num++) {
2995     - for (slot = 0; slot < 32; slot++) {
2996     - for (func = 0; func < 8; func++) {
2997     - u32 class;
2998     - u32 vendor;
2999     - u8 type;
3000     - class = read_pci_config(num,slot,func,
3001     - PCI_CLASS_REVISION);
3002     - if (class == 0xffffffff)
3003     - break;
3004     -
3005     - if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
3006     - continue;
3007     -
3008     - vendor = read_pci_config(num, slot, func,
3009     - PCI_VENDOR_ID);
3010     - vendor &= 0xffff;
3011     - switch (vendor) {
3012     - case PCI_VENDOR_ID_VIA:
3013     -#ifdef CONFIG_IOMMU
3014     - if ((end_pfn > MAX_DMA32_PFN ||
3015     - force_iommu) &&
3016     - !iommu_aperture_allowed) {
3017     - printk(KERN_INFO
3018     - "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n");
3019     - iommu_aperture_disabled = 1;
3020     - }
3021     -#endif
3022     - return;
3023     - case PCI_VENDOR_ID_NVIDIA:
3024     -#ifdef CONFIG_ACPI
3025     - /*
3026     - * All timer overrides on Nvidia are
3027     - * wrong unless HPET is enabled.
3028     - */
3029     - nvidia_hpet_detected = 0;
3030     - acpi_table_parse(ACPI_HPET,
3031     - nvidia_hpet_check);
3032     - if (nvidia_hpet_detected == 0) {
3033     - acpi_skip_timer_override = 1;
3034     - printk(KERN_INFO "Nvidia board "
3035     - "detected. Ignoring ACPI "
3036     - "timer override.\n");
3037     - }
3038     -#endif
3039     - /* RED-PEN skip them on mptables too? */
3040     - return;
3041     - case PCI_VENDOR_ID_ATI:
3042     -
3043     - /* This should be actually default, but
3044     - for 2.6.16 let's do it for ATI only where
3045     - it's really needed. */
3046     -#ifndef CONFIG_XEN
3047     - if (timer_over_8254 == 1) {
3048     - timer_over_8254 = 0;
3049     - printk(KERN_INFO
3050     - "ATI board detected. Disabling timer routing over 8254.\n");
3051     - }
3052     -#endif
3053     - return;
3054     - }
3055     -
3056     -
3057     - /* No multi-function device? */
3058     - type = read_pci_config_byte(num,slot,func,
3059     - PCI_HEADER_TYPE);
3060     - if (!(type & 0x80))
3061     - break;
3062     - }
3063     - }
3064     - }
3065     -}
3066     -
3067     -static int __init ioapic_pirq_setup(char *str)
3068     -{
3069     - int i, max;
3070     - int ints[MAX_PIRQS+1];
3071     -
3072     - get_options(str, ARRAY_SIZE(ints), ints);
3073     -
3074     - for (i = 0; i < MAX_PIRQS; i++)
3075     - pirq_entries[i] = -1;
3076     -
3077     - pirqs_enabled = 1;
3078     - apic_printk(APIC_VERBOSE, "PIRQ redirection, working around broken MP-BIOS.\n");
3079     - max = MAX_PIRQS;
3080     - if (ints[0] < MAX_PIRQS)
3081     - max = ints[0];
3082     -
3083     - for (i = 0; i < max; i++) {
3084     - apic_printk(APIC_VERBOSE, "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
3085     - /*
3086     - * PIRQs are mapped upside down, usually.
3087     - */
3088     - pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
3089     - }
3090     - return 1;
3091     -}
3092     -
3093     -__setup("pirq=", ioapic_pirq_setup);
3094    
3095     /*
3096     * Find the IRQ entry number of a certain pin.
3097     @@ -478,9 +448,7 @@
3098     for (i = 0; i < mp_irq_entries; i++) {
3099     int lbus = mp_irqs[i].mpc_srcbus;
3100    
3101     - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
3102     - mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
3103     - mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
3104     + if (test_bit(lbus, mp_bus_not_pci) &&
3105     (mp_irqs[i].mpc_irqtype == type) &&
3106     (mp_irqs[i].mpc_srcbusirq == irq))
3107    
3108     @@ -496,9 +464,7 @@
3109     for (i = 0; i < mp_irq_entries; i++) {
3110     int lbus = mp_irqs[i].mpc_srcbus;
3111    
3112     - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
3113     - mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
3114     - mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
3115     + if (test_bit(lbus, mp_bus_not_pci) &&
3116     (mp_irqs[i].mpc_irqtype == type) &&
3117     (mp_irqs[i].mpc_srcbusirq == irq))
3118     break;
3119     @@ -539,7 +505,7 @@
3120     mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
3121     break;
3122    
3123     - if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
3124     + if (!test_bit(lbus, mp_bus_not_pci) &&
3125     !mp_irqs[i].mpc_irqtype &&
3126     (bus == lbus) &&
3127     (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
3128     @@ -562,27 +528,6 @@
3129     return best_guess;
3130     }
3131    
3132     -/*
3133     - * EISA Edge/Level control register, ELCR
3134     - */
3135     -static int EISA_ELCR(unsigned int irq)
3136     -{
3137     - if (irq < 16) {
3138     - unsigned int port = 0x4d0 + (irq >> 3);
3139     - return (inb(port) >> (irq & 7)) & 1;
3140     - }
3141     - apic_printk(APIC_VERBOSE, "Broken MPtable reports ISA irq %d\n", irq);
3142     - return 0;
3143     -}
3144     -
3145     -/* EISA interrupts are always polarity zero and can be edge or level
3146     - * trigger depending on the ELCR value. If an interrupt is listed as
3147     - * EISA conforming in the MP table, that means its trigger type must
3148     - * be read in from the ELCR */
3149     -
3150     -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
3151     -#define default_EISA_polarity(idx) (0)
3152     -
3153     /* ISA interrupts are always polarity zero edge triggered,
3154     * when listed as conforming in the MP table. */
3155    
3156     @@ -595,12 +540,6 @@
3157     #define default_PCI_trigger(idx) (1)
3158     #define default_PCI_polarity(idx) (1)
3159    
3160     -/* MCA interrupts are always polarity zero level triggered,
3161     - * when listed as conforming in the MP table. */
3162     -
3163     -#define default_MCA_trigger(idx) (1)
3164     -#define default_MCA_polarity(idx) (0)
3165     -
3166     static int __init MPBIOS_polarity(int idx)
3167     {
3168     int bus = mp_irqs[idx].mpc_srcbus;
3169     @@ -612,38 +551,11 @@
3170     switch (mp_irqs[idx].mpc_irqflag & 3)
3171     {
3172     case 0: /* conforms, ie. bus-type dependent polarity */
3173     - {
3174     - switch (mp_bus_id_to_type[bus])
3175     - {
3176     - case MP_BUS_ISA: /* ISA pin */
3177     - {
3178     - polarity = default_ISA_polarity(idx);
3179     - break;
3180     - }
3181     - case MP_BUS_EISA: /* EISA pin */
3182     - {
3183     - polarity = default_EISA_polarity(idx);
3184     - break;
3185     - }
3186     - case MP_BUS_PCI: /* PCI pin */
3187     - {
3188     - polarity = default_PCI_polarity(idx);
3189     - break;
3190     - }
3191     - case MP_BUS_MCA: /* MCA pin */
3192     - {
3193     - polarity = default_MCA_polarity(idx);
3194     - break;
3195     - }
3196     - default:
3197     - {
3198     - printk(KERN_WARNING "broken BIOS!!\n");
3199     - polarity = 1;
3200     - break;
3201     - }
3202     - }
3203     + if (test_bit(bus, mp_bus_not_pci))
3204     + polarity = default_ISA_polarity(idx);
3205     + else
3206     + polarity = default_PCI_polarity(idx);
3207     break;
3208     - }
3209     case 1: /* high active */
3210     {
3211     polarity = 0;
3212     @@ -681,38 +593,11 @@
3213     switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
3214     {
3215     case 0: /* conforms, ie. bus-type dependent */
3216     - {
3217     - switch (mp_bus_id_to_type[bus])
3218     - {
3219     - case MP_BUS_ISA: /* ISA pin */
3220     - {
3221     - trigger = default_ISA_trigger(idx);
3222     - break;
3223     - }
3224     - case MP_BUS_EISA: /* EISA pin */
3225     - {
3226     - trigger = default_EISA_trigger(idx);
3227     - break;
3228     - }
3229     - case MP_BUS_PCI: /* PCI pin */
3230     - {
3231     - trigger = default_PCI_trigger(idx);
3232     - break;
3233     - }
3234     - case MP_BUS_MCA: /* MCA pin */
3235     - {
3236     - trigger = default_MCA_trigger(idx);
3237     - break;
3238     - }
3239     - default:
3240     - {
3241     - printk(KERN_WARNING "broken BIOS!!\n");
3242     - trigger = 1;
3243     - break;
3244     - }
3245     - }
3246     + if (test_bit(bus, mp_bus_not_pci))
3247     + trigger = default_ISA_trigger(idx);
3248     + else
3249     + trigger = default_PCI_trigger(idx);
3250     break;
3251     - }
3252     case 1: /* edge */
3253     {
3254     trigger = 0;
3255     @@ -749,64 +634,6 @@
3256     return MPBIOS_trigger(idx);
3257     }
3258    
3259     -static int next_irq = 16;
3260     -
3261     -/*
3262     - * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ
3263     - * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
3264     - * from ACPI, which can reach 800 in large boxen.
3265     - *
3266     - * Compact the sparse GSI space into a sequential IRQ series and reuse
3267     - * vectors if possible.
3268     - */
3269     -int gsi_irq_sharing(int gsi)
3270     -{
3271     - int i, tries, vector;
3272     -
3273     - BUG_ON(gsi >= NR_IRQ_VECTORS);
3274     -
3275     - if (platform_legacy_irq(gsi))
3276     - return gsi;
3277     -
3278     - if (gsi_2_irq[gsi] != 0xFF)
3279     - return (int)gsi_2_irq[gsi];
3280     -
3281     - tries = NR_IRQS;
3282     - try_again:
3283     - vector = assign_irq_vector(gsi);
3284     -
3285     - /*
3286     - * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
3287     - * use of vector and if found, return that IRQ. However, we never want
3288     - * to share legacy IRQs, which usually have a different trigger mode
3289     - * than PCI.
3290     - */
3291     - for (i = 0; i < NR_IRQS; i++)
3292     - if (IO_APIC_VECTOR(i) == vector)
3293     - break;
3294     - if (platform_legacy_irq(i)) {
3295     - if (--tries >= 0) {
3296     - IO_APIC_VECTOR(i) = 0;
3297     - goto try_again;
3298     - }
3299     - panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);
3300     - }
3301     - if (i < NR_IRQS) {
3302     - gsi_2_irq[gsi] = i;
3303     - printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
3304     - gsi, vector, i);
3305     - return i;
3306     - }
3307     -
3308     - i = next_irq++;
3309     - BUG_ON(i >= NR_IRQS);
3310     - gsi_2_irq[gsi] = i;
3311     - IO_APIC_VECTOR(i) = vector;
3312     - printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
3313     - gsi, vector, i);
3314     - return i;
3315     -}
3316     -
3317     static int pin_2_irq(int idx, int apic, int pin)
3318     {
3319     int irq, i;
3320     @@ -818,49 +645,16 @@
3321     if (mp_irqs[idx].mpc_dstirq != pin)
3322     printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
3323    
3324     - switch (mp_bus_id_to_type[bus])
3325     - {
3326     - case MP_BUS_ISA: /* ISA pin */
3327     - case MP_BUS_EISA:
3328     - case MP_BUS_MCA:
3329     - {
3330     - irq = mp_irqs[idx].mpc_srcbusirq;
3331     - break;
3332     - }
3333     - case MP_BUS_PCI: /* PCI pin */
3334     - {
3335     - /*
3336     - * PCI IRQs are mapped in order
3337     - */
3338     - i = irq = 0;
3339     - while (i < apic)
3340     - irq += nr_ioapic_registers[i++];
3341     - irq += pin;
3342     - irq = gsi_irq_sharing(irq);
3343     - break;
3344     - }
3345     - default:
3346     - {
3347     - printk(KERN_ERR "unknown bus type %d.\n",bus);
3348     - irq = 0;
3349     - break;
3350     - }
3351     - }
3352     - BUG_ON(irq >= NR_IRQS);
3353     -
3354     - /*
3355     - * PCI IRQ command line redirection. Yes, limits are hardcoded.
3356     - */
3357     - if ((pin >= 16) && (pin <= 23)) {
3358     - if (pirq_entries[pin-16] != -1) {
3359     - if (!pirq_entries[pin-16]) {
3360     - apic_printk(APIC_VERBOSE, "disabling PIRQ%d\n", pin-16);
3361     - } else {
3362     - irq = pirq_entries[pin-16];
3363     - apic_printk(APIC_VERBOSE, "using PIRQ%d -> IRQ %d\n",
3364     - pin-16, irq);
3365     - }
3366     - }
3367     + if (test_bit(bus, mp_bus_not_pci)) {
3368     + irq = mp_irqs[idx].mpc_srcbusirq;
3369     + } else {
3370     + /*
3371     + * PCI IRQs are mapped in order
3372     + */
3373     + i = irq = 0;
3374     + while (i < apic)
3375     + irq += nr_ioapic_registers[i++];
3376     + irq += pin;
3377     }
3378     BUG_ON(irq >= NR_IRQS);
3379     return irq;
3380     @@ -884,43 +678,68 @@
3381     }
3382    
3383     /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
3384     -u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
3385     +static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
3386    
3387     -int assign_irq_vector(int irq)
3388     +static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
3389     {
3390     - unsigned long flags;
3391     int vector;
3392     struct physdev_irq irq_op;
3393    
3394     - BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
3395     + BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
3396    
3397     - spin_lock_irqsave(&vector_lock, flags);
3398     + cpus_and(*result, mask, cpu_online_map);
3399    
3400     - if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
3401     - spin_unlock_irqrestore(&vector_lock, flags);
3402     - return IO_APIC_VECTOR(irq);
3403     - }
3404     + if (irq_vector[irq] > 0)
3405     + return irq_vector[irq];
3406    
3407     irq_op.irq = irq;
3408     - if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
3409     - spin_unlock_irqrestore(&vector_lock, flags);
3410     + if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
3411     return -ENOSPC;
3412     - }
3413    
3414     vector = irq_op.vector;
3415     - vector_irq[vector] = irq;
3416     - if (irq != AUTO_ASSIGN)
3417     - IO_APIC_VECTOR(irq) = vector;
3418     + irq_vector[irq] = vector;
3419    
3420     - spin_unlock_irqrestore(&vector_lock, flags);
3421     + return vector;
3422     +}
3423    
3424     +static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
3425     +{
3426     + int vector;
3427     + unsigned long flags;
3428     +
3429     + spin_lock_irqsave(&vector_lock, flags);
3430     + vector = __assign_irq_vector(irq, mask, result);
3431     + spin_unlock_irqrestore(&vector_lock, flags);
3432     return vector;
3433     }
3434    
3435     -extern void (*interrupt[NR_IRQS])(void);
3436     #ifndef CONFIG_XEN
3437     -static struct hw_interrupt_type ioapic_level_type;
3438     -static struct hw_interrupt_type ioapic_edge_type;
3439     +void __setup_vector_irq(int cpu)
3440     +{
3441     + /* Initialize vector_irq on a new cpu */
3442     + /* This function must be called with vector_lock held */
3443     + int irq, vector;
3444     +
3445     + /* Mark the inuse vectors */
3446     + for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) {
3447     + if (!cpu_isset(cpu, irq_domain[irq]))
3448     + continue;
3449     + vector = irq_vector[irq];
3450     + per_cpu(vector_irq, cpu)[vector] = irq;
3451     + }
3452     + /* Mark the free vectors */
3453     + for (vector = 0; vector < NR_VECTORS; ++vector) {
3454     + irq = per_cpu(vector_irq, cpu)[vector];
3455     + if (irq < 0)
3456     + continue;
3457     + if (!cpu_isset(cpu, irq_domain[irq]))
3458     + per_cpu(vector_irq, cpu)[vector] = -1;
3459     + }
3460     +}
3461     +
3462     +extern void (*interrupt[NR_IRQS])(void);
3463     +
3464     +static struct irq_chip ioapic_chip;
3465    
3466     #define IOAPIC_AUTO -1
3467     #define IOAPIC_EDGE 0
3468     @@ -928,16 +747,15 @@
3469    
3470     static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
3471     {
3472     - unsigned idx;
3473     -
3474     - idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
3475     -
3476     if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
3477     trigger == IOAPIC_LEVEL)
3478     - irq_desc[idx].chip = &ioapic_level_type;
3479     - else
3480     - irq_desc[idx].chip = &ioapic_edge_type;
3481     - set_intr_gate(vector, interrupt[idx]);
3482     + set_irq_chip_and_handler_name(irq, &ioapic_chip,
3483     + handle_fasteoi_irq, "fasteoi");
3484     + else {
3485     + irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
3486     + set_irq_chip_and_handler_name(irq, &ioapic_chip,
3487     + handle_edge_irq, "edge");
3488     + }
3489     }
3490     #else
3491     #define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
3492     @@ -990,16 +808,21 @@
3493     continue;
3494    
3495     if (IO_APIC_IRQ(irq)) {
3496     - vector = assign_irq_vector(irq);
3497     + cpumask_t mask;
3498     + vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
3499     + if (vector < 0)
3500     + continue;
3501     +
3502     + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
3503     entry.vector = vector;
3504    
3505     ioapic_register_intr(irq, vector, IOAPIC_AUTO);
3506     if (!apic && (irq < 16))
3507     disable_8259A_irq(irq);
3508     }
3509     + ioapic_write_entry(apic, pin, entry);
3510     +
3511     spin_lock_irqsave(&ioapic_lock, flags);
3512     - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
3513     - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
3514     set_native_irq_info(irq, TARGET_CPUS);
3515     spin_unlock_irqrestore(&ioapic_lock, flags);
3516     }
3517     @@ -1042,7 +865,7 @@
3518     * The timer IRQ doesn't have to know that behind the
3519     * scene we have a 8259A-master in AEOI mode ...
3520     */
3521     - irq_desc[0].chip = &ioapic_edge_type;
3522     + set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
3523    
3524     /*
3525     * Add it to the IO-APIC irq-routing table:
3526     @@ -1138,10 +961,7 @@
3527     for (i = 0; i <= reg_01.bits.entries; i++) {
3528     struct IO_APIC_route_entry entry;
3529    
3530     - spin_lock_irqsave(&ioapic_lock, flags);
3531     - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
3532     - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
3533     - spin_unlock_irqrestore(&ioapic_lock, flags);
3534     + entry = ioapic_read_entry(apic, i);
3535    
3536     printk(KERN_DEBUG " %02x %03X %02X ",
3537     i,
3538     @@ -1161,17 +981,12 @@
3539     );
3540     }
3541     }
3542     - if (use_pci_vector())
3543     - printk(KERN_INFO "Using vector-based indexing\n");
3544     printk(KERN_DEBUG "IRQ to pin mappings:\n");
3545     for (i = 0; i < NR_IRQS; i++) {
3546     struct irq_pin_list *entry = irq_2_pin + i;
3547     if (entry->pin < 0)
3548     continue;
3549     - if (use_pci_vector() && !platform_legacy_irq(i))
3550     - printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
3551     - else
3552     - printk(KERN_DEBUG "IRQ%d ", i);
3553     + printk(KERN_DEBUG "IRQ%d ", i);
3554     for (;;) {
3555     printk("-> %d:%d", entry->apic, entry->pin);
3556     if (!entry->next)
3557     @@ -1335,9 +1150,6 @@
3558     irq_2_pin[i].pin = -1;
3559     irq_2_pin[i].next = 0;
3560     }
3561     - if (!pirqs_enabled)
3562     - for (i = 0; i < MAX_PIRQS; i++)
3563     - pirq_entries[i] = -1;
3564    
3565     /*
3566     * The number of IO-APIC IRQ registers (== #pins):
3567     @@ -1354,11 +1166,7 @@
3568     /* See if any of the pins is in ExtINT mode */
3569     for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
3570     struct IO_APIC_route_entry entry;
3571     - spin_lock_irqsave(&ioapic_lock, flags);
3572     - *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
3573     - *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
3574     - spin_unlock_irqrestore(&ioapic_lock, flags);
3575     -
3576     + entry = ioapic_read_entry(apic, pin);
3577    
3578     /* If the interrupt line is enabled and in ExtInt mode
3579     * I have found the pin where the i8259 is connected.
3580     @@ -1412,7 +1220,6 @@
3581     */
3582     if (ioapic_i8259.pin != -1) {
3583     struct IO_APIC_route_entry entry;
3584     - unsigned long flags;
3585    
3586     memset(&entry, 0, sizeof(entry));
3587     entry.mask = 0; /* Enabled */
3588     @@ -1429,12 +1236,7 @@
3589     /*
3590     * Add it to the IO-APIC irq-routing table:
3591     */
3592     - spin_lock_irqsave(&ioapic_lock, flags);
3593     - io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
3594     - *(((int *)&entry)+1));
3595     - io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
3596     - *(((int *)&entry)+0));
3597     - spin_unlock_irqrestore(&ioapic_lock, flags);
3598     + ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
3599     }
3600    
3601     disconnect_bsp_APIC(ioapic_i8259.pin != -1);
3602     @@ -1442,76 +1244,6 @@
3603     }
3604    
3605     /*
3606     - * function to set the IO-APIC physical IDs based on the
3607     - * values stored in the MPC table.
3608     - *
3609     - * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
3610     - */
3611     -
3612     -#ifndef CONFIG_XEN
3613     -static void __init setup_ioapic_ids_from_mpc (void)
3614     -{
3615     - union IO_APIC_reg_00 reg_00;
3616     - int apic;
3617     - int i;
3618     - unsigned char old_id;
3619     - unsigned long flags;
3620     -
3621     - /*
3622     - * Set the IOAPIC ID to the value stored in the MPC table.
3623     - */
3624     - for (apic = 0; apic < nr_ioapics; apic++) {
3625     -
3626     - /* Read the register 0 value */
3627     - spin_lock_irqsave(&ioapic_lock, flags);
3628     - reg_00.raw = io_apic_read(apic, 0);
3629     - spin_unlock_irqrestore(&ioapic_lock, flags);
3630     -
3631     - old_id = mp_ioapics[apic].mpc_apicid;
3632     -
3633     -
3634     - printk(KERN_INFO "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid);
3635     -
3636     -
3637     - /*
3638     - * We need to adjust the IRQ routing table
3639     - * if the ID changed.
3640     - */
3641     - if (old_id != mp_ioapics[apic].mpc_apicid)
3642     - for (i = 0; i < mp_irq_entries; i++)
3643     - if (mp_irqs[i].mpc_dstapic == old_id)
3644     - mp_irqs[i].mpc_dstapic
3645     - = mp_ioapics[apic].mpc_apicid;
3646     -
3647     - /*
3648     - * Read the right value from the MPC table and
3649     - * write it into the ID register.
3650     - */
3651     - apic_printk(APIC_VERBOSE,KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
3652     - mp_ioapics[apic].mpc_apicid);
3653     -
3654     - reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
3655     - spin_lock_irqsave(&ioapic_lock, flags);
3656     - io_apic_write(apic, 0, reg_00.raw);
3657     - spin_unlock_irqrestore(&ioapic_lock, flags);
3658     -
3659     - /*
3660     - * Sanity check
3661     - */
3662     - spin_lock_irqsave(&ioapic_lock, flags);
3663     - reg_00.raw = io_apic_read(apic, 0);
3664     - spin_unlock_irqrestore(&ioapic_lock, flags);
3665     - if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
3666     - printk("could not set ID!\n");
3667     - else
3668     - apic_printk(APIC_VERBOSE," ok.\n");
3669     - }
3670     -}
3671     -#else
3672     -static void __init setup_ioapic_ids_from_mpc(void) { }
3673     -#endif
3674     -
3675     -/*
3676     * There is a nasty bug in some older SMP boards, their mptable lies
3677     * about the timer IRQ. We do the following to work around the situation:
3678     *
3679     @@ -1565,7 +1297,7 @@
3680     * an edge even if it isn't on the 8259A...
3681     */
3682    
3683     -static unsigned int startup_edge_ioapic_irq(unsigned int irq)
3684     +static unsigned int startup_ioapic_irq(unsigned int irq)
3685     {
3686     int was_pending = 0;
3687     unsigned long flags;
3688     @@ -1582,107 +1314,19 @@
3689     return was_pending;
3690     }
3691    
3692     -/*
3693     - * Once we have recorded IRQ_PENDING already, we can mask the
3694     - * interrupt for real. This prevents IRQ storms from unhandled
3695     - * devices.
3696     - */
3697     -static void ack_edge_ioapic_irq(unsigned int irq)
3698     -{
3699     - move_irq(irq);
3700     - if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
3701     - == (IRQ_PENDING | IRQ_DISABLED))
3702     - mask_IO_APIC_irq(irq);
3703     - ack_APIC_irq();
3704     -}
3705     -
3706     -/*
3707     - * Level triggered interrupts can just be masked,
3708     - * and shutting down and starting up the interrupt
3709     - * is the same as enabling and disabling them -- except
3710     - * with a startup need to return a "was pending" value.
3711     - *
3712     - * Level triggered interrupts are special because we
3713     - * do not touch any IO-APIC register while handling
3714     - * them. We ack the APIC in the end-IRQ handler, not
3715     - * in the start-IRQ-handler. Protection against reentrance
3716     - * from the same interrupt is still provided, both by the
3717     - * generic IRQ layer and by the fact that an unacked local
3718     - * APIC does not accept IRQs.
3719     - */
3720     -static unsigned int startup_level_ioapic_irq (unsigned int irq)
3721     -{
3722     - unmask_IO_APIC_irq(irq);
3723     -
3724     - return 0; /* don't check for pending */
3725     -}
3726     -
3727     -static void end_level_ioapic_irq (unsigned int irq)
3728     -{
3729     - move_irq(irq);
3730     - ack_APIC_irq();
3731     -}
3732     -
3733     -#ifdef CONFIG_PCI_MSI
3734     -static unsigned int startup_edge_ioapic_vector(unsigned int vector)
3735     -{
3736     - int irq = vector_to_irq(vector);
3737     -
3738     - return startup_edge_ioapic_irq(irq);
3739     -}
3740     -
3741     -static void ack_edge_ioapic_vector(unsigned int vector)
3742     -{
3743     - int irq = vector_to_irq(vector);
3744     -
3745     - move_native_irq(vector);
3746     - ack_edge_ioapic_irq(irq);
3747     -}
3748     -
3749     -static unsigned int startup_level_ioapic_vector (unsigned int vector)
3750     -{
3751     - int irq = vector_to_irq(vector);
3752     -
3753     - return startup_level_ioapic_irq (irq);
3754     -}
3755     -
3756     -static void end_level_ioapic_vector (unsigned int vector)
3757     -{
3758     - int irq = vector_to_irq(vector);
3759     -
3760     - move_native_irq(vector);
3761     - end_level_ioapic_irq(irq);
3762     -}
3763     -
3764     -static void mask_IO_APIC_vector (unsigned int vector)
3765     -{
3766     - int irq = vector_to_irq(vector);
3767     -
3768     - mask_IO_APIC_irq(irq);
3769     -}
3770     -
3771     -static void unmask_IO_APIC_vector (unsigned int vector)
3772     -{
3773     - int irq = vector_to_irq(vector);
3774     -
3775     - unmask_IO_APIC_irq(irq);
3776     -}
3777     -
3778     -#ifdef CONFIG_SMP
3779     -static void set_ioapic_affinity_vector (unsigned int vector,
3780     - cpumask_t cpu_mask)
3781     +static int ioapic_retrigger_irq(unsigned int irq)
3782     {
3783     - int irq = vector_to_irq(vector);
3784     + cpumask_t mask;
3785     + unsigned vector;
3786     + unsigned long flags;
3787    
3788     - set_native_irq_info(vector, cpu_mask);
3789     - set_ioapic_affinity_irq(irq, cpu_mask);
3790     -}
3791     -#endif // CONFIG_SMP
3792     -#endif // CONFIG_PCI_MSI
3793     + spin_lock_irqsave(&vector_lock, flags);
3794     + vector = irq_vector[irq];
3795     + cpus_clear(mask);
3796     + cpu_set(first_cpu(irq_domain[irq]), mask);
3797    
3798     -static int ioapic_retrigger(unsigned int irq)
3799     -{
3800     - send_IPI_self(IO_APIC_VECTOR(irq));
3801     + send_IPI_mask(mask, vector);
3802     + spin_unlock_irqrestore(&vector_lock, flags);
3803    
3804     return 1;
3805     }
3806     @@ -1696,32 +1340,47 @@
3807     * races.
3808     */
3809    
3810     -static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
3811     - .typename = "IO-APIC-edge",
3812     - .startup = startup_edge_ioapic,
3813     - .shutdown = shutdown_edge_ioapic,
3814     - .enable = enable_edge_ioapic,
3815     - .disable = disable_edge_ioapic,
3816     - .ack = ack_edge_ioapic,
3817     - .end = end_edge_ioapic,
3818     -#ifdef CONFIG_SMP
3819     - .set_affinity = set_ioapic_affinity,
3820     +static void ack_apic_edge(unsigned int irq)
3821     +{
3822     + move_native_irq(irq);
3823     + ack_APIC_irq();
3824     +}
3825     +
3826     +static void ack_apic_level(unsigned int irq)
3827     +{
3828     + int do_unmask_irq = 0;
3829     +
3830     +#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
3831     + /* If we are moving the irq we need to mask it */
3832     + if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
3833     + do_unmask_irq = 1;
3834     + mask_IO_APIC_irq(irq);
3835     + }
3836     #endif
3837     - .retrigger = ioapic_retrigger,
3838     -};
3839    
3840     -static struct hw_interrupt_type ioapic_level_type __read_mostly = {
3841     - .typename = "IO-APIC-level",
3842     - .startup = startup_level_ioapic,
3843     - .shutdown = shutdown_level_ioapic,
3844     - .enable = enable_level_ioapic,
3845     - .disable = disable_level_ioapic,
3846     - .ack = mask_and_ack_level_ioapic,
3847     - .end = end_level_ioapic,
3848     + /*
3849     + * We must acknowledge the irq before we move it or the acknowledge will
3850     + * not propogate properly.
3851     + */
3852     + ack_APIC_irq();
3853     +
3854     + /* Now we can move and renable the irq */
3855     + move_masked_irq(irq);
3856     + if (unlikely(do_unmask_irq))
3857     + unmask_IO_APIC_irq(irq);
3858     +}
3859     +
3860     +static struct irq_chip ioapic_chip __read_mostly = {
3861     + .name = "IO-APIC",
3862     + .startup = startup_ioapic_irq,
3863     + .mask = mask_IO_APIC_irq,
3864     + .unmask = unmask_IO_APIC_irq,
3865     + .ack = ack_apic_edge,
3866     + .eoi = ack_apic_level,
3867     #ifdef CONFIG_SMP
3868     - .set_affinity = set_ioapic_affinity,
3869     + .set_affinity = set_ioapic_affinity_irq,
3870     #endif
3871     - .retrigger = ioapic_retrigger,
3872     + .retrigger = ioapic_retrigger_irq,
3873     };
3874     #endif /* !CONFIG_XEN */
3875    
3876     @@ -1742,12 +1401,7 @@
3877     */
3878     for (irq = 0; irq < NR_IRQS ; irq++) {
3879     int tmp = irq;
3880     - if (use_pci_vector()) {
3881     - if (!platform_legacy_irq(tmp))
3882     - if ((tmp = vector_to_irq(tmp)) == -1)
3883     - continue;
3884     - }
3885     - if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
3886     + if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
3887     /*
3888     * Hmm.. We don't have an entry for this,
3889     * so default to an old-fashioned 8259
3890     @@ -1758,7 +1412,7 @@
3891     #ifndef CONFIG_XEN
3892     else
3893     /* Strange. Oh, well.. */
3894     - irq_desc[irq].chip = &no_irq_type;
3895     + irq_desc[irq].chip = &no_irq_chip;
3896     #endif
3897     }
3898     }
3899     @@ -1879,8 +1533,6 @@
3900     spin_unlock_irqrestore(&ioapic_lock, flags);
3901     }
3902    
3903     -int timer_uses_ioapic_pin_0;
3904     -
3905     /*
3906     * This code may look a bit paranoid, but it's supposed to cooperate with
3907     * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
3908     @@ -1893,13 +1545,13 @@
3909     {
3910     int apic1, pin1, apic2, pin2;
3911     int vector;
3912     + cpumask_t mask;
3913    
3914     /*
3915     * get/set the timer IRQ vector:
3916     */
3917     disable_8259A_irq(0);
3918     - vector = assign_irq_vector(0);
3919     - set_intr_gate(vector, interrupt[0]);
3920     + vector = assign_irq_vector(0, TARGET_CPUS, &mask);
3921    
3922     /*
3923     * Subtle, code in do_timer_interrupt() expects an AEOI
3924     @@ -1918,9 +1570,6 @@
3925     pin2 = ioapic_i8259.pin;
3926     apic2 = ioapic_i8259.apic;
3927    
3928     - if (pin1 == 0)
3929     - timer_uses_ioapic_pin_0 = 1;
3930     -
3931     apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
3932     vector, apic1, pin1, apic2, pin2);
3933    
3934     @@ -2035,11 +1684,6 @@
3935    
3936     apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
3937    
3938     - /*
3939     - * Set up the IO-APIC IRQ routing table.
3940     - */
3941     - if (!acpi_ioapic)
3942     - setup_ioapic_ids_from_mpc();
3943     #ifndef CONFIG_XEN
3944     sync_Arb_IDs();
3945     #endif /* !CONFIG_XEN */
3946     @@ -2060,17 +1704,12 @@
3947     {
3948     struct IO_APIC_route_entry *entry;
3949     struct sysfs_ioapic_data *data;
3950     - unsigned long flags;
3951     int i;
3952    
3953     data = container_of(dev, struct sysfs_ioapic_data, dev);
3954     entry = data->entry;
3955     - spin_lock_irqsave(&ioapic_lock, flags);
3956     - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
3957     - *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
3958     - *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
3959     - }
3960     - spin_unlock_irqrestore(&ioapic_lock, flags);
3961     + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
3962     + *entry = ioapic_read_entry(dev->id, i);
3963    
3964     return 0;
3965     }
3966     @@ -2092,11 +1731,9 @@
3967     reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
3968     io_apic_write(dev->id, 0, reg_00.raw);
3969     }
3970     - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
3971     - io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
3972     - io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
3973     - }
3974     spin_unlock_irqrestore(&ioapic_lock, flags);
3975     + for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
3976     + ioapic_write_entry(dev->id, i, entry[i]);
3977    
3978     return 0;
3979     }
3980     @@ -2142,26 +1779,254 @@
3981    
3982     device_initcall(ioapic_init_sysfs);
3983    
3984     -/* --------------------------------------------------------------------------
3985     - ACPI-based IOAPIC Configuration
3986     - -------------------------------------------------------------------------- */
3987     +#ifndef CONFIG_XEN
3988     +/*
3989     + * Dynamic irq allocate and deallocation
3990     + */
3991     +int create_irq(void)
3992     +{
3993     + /* Allocate an unused irq */
3994     + int irq;
3995     + int new;
3996     + int vector = 0;
3997     + unsigned long flags;
3998     + cpumask_t mask;
3999    
4000     -#ifdef CONFIG_ACPI
4001     + irq = -ENOSPC;
4002     + spin_lock_irqsave(&vector_lock, flags);
4003     + for (new = (NR_IRQS - 1); new >= 0; new--) {
4004     + if (platform_legacy_irq(new))
4005     + continue;
4006     + if (irq_vector[new] != 0)
4007     + continue;
4008     + vector = __assign_irq_vector(new, TARGET_CPUS, &mask);
4009     + if (likely(vector > 0))
4010     + irq = new;
4011     + break;
4012     + }
4013     + spin_unlock_irqrestore(&vector_lock, flags);
4014    
4015     -#define IO_APIC_MAX_ID 0xFE
4016     + if (irq >= 0) {
4017     + dynamic_irq_init(irq);
4018     + }
4019     + return irq;
4020     +}
4021    
4022     -int __init io_apic_get_version (int ioapic)
4023     +void destroy_irq(unsigned int irq)
4024     {
4025     - union IO_APIC_reg_01 reg_01;
4026     unsigned long flags;
4027    
4028     - spin_lock_irqsave(&ioapic_lock, flags);
4029     - reg_01.raw = io_apic_read(ioapic, 1);
4030     - spin_unlock_irqrestore(&ioapic_lock, flags);
4031     + dynamic_irq_cleanup(irq);
4032     +
4033     + spin_lock_irqsave(&vector_lock, flags);
4034     + irq_vector[irq] = 0;
4035     + spin_unlock_irqrestore(&vector_lock, flags);
4036     +}
4037     +#endif
4038     +
4039     +/*
4040     + * MSI mesage composition
4041     + */
4042     +#ifdef CONFIG_PCI_MSI
4043     +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
4044     +{
4045     + int vector;
4046     + unsigned dest;
4047     + cpumask_t tmp;
4048     +
4049     + vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
4050     + if (vector >= 0) {
4051     + dest = cpu_mask_to_apicid(tmp);
4052     +
4053     + msg->address_hi = MSI_ADDR_BASE_HI;
4054     + msg->address_lo =
4055     + MSI_ADDR_BASE_LO |
4056     + ((INT_DEST_MODE == 0) ?
4057     + MSI_ADDR_DEST_MODE_PHYSICAL:
4058     + MSI_ADDR_DEST_MODE_LOGICAL) |
4059     + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
4060     + MSI_ADDR_REDIRECTION_CPU:
4061     + MSI_ADDR_REDIRECTION_LOWPRI) |
4062     + MSI_ADDR_DEST_ID(dest);
4063     +
4064     + msg->data =
4065     + MSI_DATA_TRIGGER_EDGE |
4066     + MSI_DATA_LEVEL_ASSERT |
4067     + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
4068     + MSI_DATA_DELIVERY_FIXED:
4069     + MSI_DATA_DELIVERY_LOWPRI) |
4070     + MSI_DATA_VECTOR(vector);
4071     + }
4072     + return vector;
4073     +}
4074     +
4075     +#ifdef CONFIG_SMP
4076     +static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
4077     +{
4078     + struct msi_msg msg;
4079     + unsigned int dest;
4080     + cpumask_t tmp;
4081     + int vector;
4082     +
4083     + cpus_and(tmp, mask, cpu_online_map);
4084     + if (cpus_empty(tmp))
4085     + tmp = TARGET_CPUS;
4086     +
4087     + cpus_and(mask, tmp, CPU_MASK_ALL);
4088     +
4089     + vector = assign_irq_vector(irq, mask, &tmp);
4090     + if (vector < 0)
4091     + return;
4092     +
4093     + dest = cpu_mask_to_apicid(tmp);
4094     +
4095     + read_msi_msg(irq, &msg);
4096     +
4097     + msg.data &= ~MSI_DATA_VECTOR_MASK;
4098     + msg.data |= MSI_DATA_VECTOR(vector);
4099     + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
4100     + msg.address_lo |= MSI_ADDR_DEST_ID(dest);
4101     +
4102     + write_msi_msg(irq, &msg);
4103     + set_native_irq_info(irq, mask);
4104     +}
4105     +#endif /* CONFIG_SMP */
4106     +
4107     +/*
4108     + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
4109     + * which implement the MSI or MSI-X Capability Structure.
4110     + */
4111     +static struct irq_chip msi_chip = {
4112     + .name = "PCI-MSI",
4113     + .unmask = unmask_msi_irq,
4114     + .mask = mask_msi_irq,
4115     + .ack = ack_apic_edge,
4116     +#ifdef CONFIG_SMP
4117     + .set_affinity = set_msi_irq_affinity,
4118     +#endif
4119     + .retrigger = ioapic_retrigger_irq,
4120     +};
4121     +
4122     +int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
4123     +{
4124     + struct msi_msg msg;
4125     + int ret;
4126     + ret = msi_compose_msg(dev, irq, &msg);
4127     + if (ret < 0)
4128     + return ret;
4129     +
4130     + write_msi_msg(irq, &msg);
4131     +
4132     + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
4133     +
4134     + return 0;
4135     +}
4136     +
4137     +void arch_teardown_msi_irq(unsigned int irq)
4138     +{
4139     + return;
4140     +}
4141     +
4142     +#endif /* CONFIG_PCI_MSI */
4143     +
4144     +/*
4145     + * Hypertransport interrupt support
4146     + */
4147     +#ifdef CONFIG_HT_IRQ
4148     +
4149     +#ifdef CONFIG_SMP
4150     +
4151     +static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
4152     +{
4153     + struct ht_irq_msg msg;
4154     + fetch_ht_irq_msg(irq, &msg);
4155     +
4156     + msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
4157     + msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
4158    
4159     - return reg_01.bits.version;
4160     + msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
4161     + msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
4162     +
4163     + write_ht_irq_msg(irq, &msg);
4164     }
4165    
4166     +static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
4167     +{
4168     + unsigned int dest;
4169     + cpumask_t tmp;
4170     + int vector;
4171     +
4172     + cpus_and(tmp, mask, cpu_online_map);
4173     + if (cpus_empty(tmp))
4174     + tmp = TARGET_CPUS;
4175     +
4176     + cpus_and(mask, tmp, CPU_MASK_ALL);
4177     +
4178     + vector = assign_irq_vector(irq, mask, &tmp);
4179     + if (vector < 0)
4180     + return;
4181     +
4182     + dest = cpu_mask_to_apicid(tmp);
4183     +
4184     + target_ht_irq(irq, dest, vector);
4185     + set_native_irq_info(irq, mask);
4186     +}
4187     +#endif
4188     +
4189     +static struct irq_chip ht_irq_chip = {
4190     + .name = "PCI-HT",
4191     + .mask = mask_ht_irq,
4192     + .unmask = unmask_ht_irq,
4193     + .ack = ack_apic_edge,
4194     +#ifdef CONFIG_SMP
4195     + .set_affinity = set_ht_irq_affinity,
4196     +#endif
4197     + .retrigger = ioapic_retrigger_irq,
4198     +};
4199     +
4200     +int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
4201     +{
4202     + int vector;
4203     + cpumask_t tmp;
4204     +
4205     + vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
4206     + if (vector >= 0) {
4207     + struct ht_irq_msg msg;
4208     + unsigned dest;
4209     +
4210     + dest = cpu_mask_to_apicid(tmp);
4211     +
4212     + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
4213     +
4214     + msg.address_lo =
4215     + HT_IRQ_LOW_BASE |
4216     + HT_IRQ_LOW_DEST_ID(dest) |
4217     + HT_IRQ_LOW_VECTOR(vector) |
4218     + ((INT_DEST_MODE == 0) ?
4219     + HT_IRQ_LOW_DM_PHYSICAL :
4220     + HT_IRQ_LOW_DM_LOGICAL) |
4221     + HT_IRQ_LOW_RQEOI_EDGE |
4222     + ((INT_DELIVERY_MODE != dest_LowestPrio) ?
4223     + HT_IRQ_LOW_MT_FIXED :
4224     + HT_IRQ_LOW_MT_ARBITRATED) |
4225     + HT_IRQ_LOW_IRQ_MASKED;
4226     +
4227     + write_ht_irq_msg(irq, &msg);
4228     +
4229     + set_irq_chip_and_handler_name(irq, &ht_irq_chip,
4230     + handle_edge_irq, "edge");
4231     + }
4232     + return vector;
4233     +}
4234     +#endif /* CONFIG_HT_IRQ */
4235     +
4236     +/* --------------------------------------------------------------------------
4237     + ACPI-based IOAPIC Configuration
4238     + -------------------------------------------------------------------------- */
4239     +
4240     +#ifdef CONFIG_ACPI
4241     +
4242     +#define IO_APIC_MAX_ID 0xFE
4243    
4244     int __init io_apic_get_redir_entries (int ioapic)
4245     {
4246     @@ -2180,6 +2045,8 @@
4247     {
4248     struct IO_APIC_route_entry entry;
4249     unsigned long flags;
4250     + int vector;
4251     + cpumask_t mask;
4252    
4253     if (!IO_APIC_IRQ(irq)) {
4254     apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
4255     @@ -2188,6 +2055,17 @@
4256     }
4257    
4258     /*
4259     + * IRQs < 16 are already in the irq_2_pin[] map
4260     + */
4261     + if (irq >= 16)
4262     + add_pin_to_irq(irq, ioapic, pin);
4263     +
4264     +
4265     + vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
4266     + if (vector < 0)
4267     + return vector;
4268     +
4269     + /*
4270     * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
4271     * Note that we mask (disable) IRQs now -- these get enabled when the
4272     * corresponding device driver registers for this IRQ.
4273     @@ -2197,19 +2075,11 @@
4274    
4275     entry.delivery_mode = INT_DELIVERY_MODE;
4276     entry.dest_mode = INT_DEST_MODE;
4277     - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
4278     + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
4279     entry.trigger = edge_level;
4280     entry.polarity = active_high_low;
4281     entry.mask = 1; /* Disabled (masked) */
4282     -
4283     - irq = gsi_irq_sharing(irq);
4284     - /*
4285     - * IRQs < 16 are already in the irq_2_pin[] map
4286     - */
4287     - if (irq >= 16)
4288     - add_pin_to_irq(irq, ioapic, pin);
4289     -
4290     - entry.vector = assign_irq_vector(irq);
4291     + entry.vector = vector & 0xff;
4292    
4293     apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
4294     "IRQ %d Mode:%i Active:%i)\n", ioapic,
4295     @@ -2221,10 +2091,10 @@
4296     if (!ioapic && (irq < 16))
4297     disable_8259A_irq(irq);
4298    
4299     + ioapic_write_entry(ioapic, pin, entry);
4300     +
4301     spin_lock_irqsave(&ioapic_lock, flags);
4302     - io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
4303     - io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
4304     - set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
4305     + set_native_irq_info(irq, TARGET_CPUS);
4306     spin_unlock_irqrestore(&ioapic_lock, flags);
4307    
4308     return 0;
4309 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/ioport_64-xen.c linux-2.6.25-xen/arch/x86/kernel/ioport_64-xen.c
4310     --- linux-2.6.25/arch/x86/kernel/ioport_64-xen.c 2008-05-23 20:51:11.000000000 +0200
4311     +++ linux-2.6.25-xen/arch/x86/kernel/ioport_64-xen.c 2008-05-23 20:39:03.000000000 +0200
4312 niro 609 @@ -58,6 +58,7 @@
4313    
4314     memset(bitmap, 0xff, IO_BITMAP_BYTES);
4315     t->io_bitmap_ptr = bitmap;
4316     + set_thread_flag(TIF_IO_BITMAP);
4317    
4318     set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap);
4319     set_iobitmap.nr_ports = IO_BITMAP_BITS;
4320 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/irq_32-xen.c linux-2.6.25-xen/arch/x86/kernel/irq_32-xen.c
4321     --- linux-2.6.25/arch/x86/kernel/irq_32-xen.c 2008-05-23 20:51:11.000000000 +0200
4322     +++ linux-2.6.25-xen/arch/x86/kernel/irq_32-xen.c 2008-05-23 20:39:03.000000000 +0200
4323 niro 609 @@ -53,8 +53,10 @@
4324     */
4325     fastcall unsigned int do_IRQ(struct pt_regs *regs)
4326     {
4327     + struct pt_regs *old_regs;
4328     /* high bit used in ret_from_ code */
4329     int irq = ~regs->orig_eax;
4330     + struct irq_desc *desc = irq_desc + irq;
4331     #ifdef CONFIG_4KSTACKS
4332     union irq_ctx *curctx, *irqctx;
4333     u32 *isp;
4334     @@ -66,6 +68,7 @@
4335     BUG();
4336     }
4337    
4338     + old_regs = set_irq_regs(regs);
4339     irq_enter();
4340     #ifdef CONFIG_DEBUG_STACKOVERFLOW
4341     /* Debugging check for stack overflow: is there less than 1KB free? */
4342     @@ -110,19 +113,20 @@
4343     (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
4344    
4345     asm volatile(
4346     - " xchgl %%ebx,%%esp \n"
4347     - " call __do_IRQ \n"
4348     + " xchgl %%ebx,%%esp \n"
4349     + " call *%%edi \n"
4350     " movl %%ebx,%%esp \n"
4351     : "=a" (arg1), "=d" (arg2), "=b" (ebx)
4352     - : "0" (irq), "1" (regs), "2" (isp)
4353     - : "memory", "cc", "ecx"
4354     + : "0" (irq), "1" (desc), "2" (isp),
4355     + "D" (desc->handle_irq)
4356     + : "memory", "cc"
4357     );
4358     } else
4359     #endif
4360     - __do_IRQ(irq, regs);
4361     + desc->handle_irq(irq, desc);
4362    
4363     irq_exit();
4364     -
4365     + set_irq_regs(old_regs);
4366     return 1;
4367     }
4368    
4369     @@ -253,7 +257,8 @@
4370     for_each_online_cpu(j)
4371     seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
4372     #endif
4373     - seq_printf(p, " %14s", irq_desc[i].chip->typename);
4374     + seq_printf(p, " %8s", irq_desc[i].chip->name);
4375     + seq_printf(p, "-%-8s", irq_desc[i].name);
4376     seq_printf(p, " %s", action->name);
4377    
4378     for (action=action->next; action; action = action->next)
4379 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/irq_64-xen.c linux-2.6.25-xen/arch/x86/kernel/irq_64-xen.c
4380     --- linux-2.6.25/arch/x86/kernel/irq_64-xen.c 2008-05-23 20:51:11.000000000 +0200
4381     +++ linux-2.6.25-xen/arch/x86/kernel/irq_64-xen.c 2008-05-23 20:39:03.000000000 +0200
4382 niro 609 @@ -20,11 +20,6 @@
4383     #include <asm/idle.h>
4384    
4385     atomic_t irq_err_count;
4386     -#ifdef CONFIG_X86_IO_APIC
4387     -#ifdef APIC_MISMATCH_DEBUG
4388     -atomic_t irq_mis_count;
4389     -#endif
4390     -#endif
4391    
4392     #ifdef CONFIG_DEBUG_STACKOVERFLOW
4393     /*
4394     @@ -79,7 +74,8 @@
4395     for_each_online_cpu(j)
4396     seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
4397     #endif
4398     - seq_printf(p, " %14s", irq_desc[i].chip->typename);
4399     + seq_printf(p, " %8s", irq_desc[i].chip->name);
4400     + seq_printf(p, "-%-8s", irq_desc[i].name);
4401    
4402     seq_printf(p, " %s", action->name);
4403     for (action=action->next; action; action = action->next)
4404     @@ -99,11 +95,6 @@
4405     seq_putc(p, '\n');
4406     #endif
4407     seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
4408     -#ifdef CONFIG_X86_IO_APIC
4409     -#ifdef APIC_MISMATCH_DEBUG
4410     - seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
4411     -#endif
4412     -#endif
4413     }
4414     return 0;
4415     }
4416     @@ -114,24 +105,28 @@
4417     * handlers).
4418     */
4419     asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
4420     -{
4421     +{
4422     + struct pt_regs *old_regs = set_irq_regs(regs);
4423     +
4424     /* high bit used in ret_from_ code */
4425     unsigned irq = ~regs->orig_rax;
4426    
4427     - if (unlikely(irq >= NR_IRQS)) {
4428     - printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
4429     - __FUNCTION__, irq);
4430     - BUG();
4431     - }
4432     -
4433     exit_idle();
4434     irq_enter();
4435     +
4436     #ifdef CONFIG_DEBUG_STACKOVERFLOW
4437     stack_overflow_check(regs);
4438     #endif
4439     - __do_IRQ(irq, regs);
4440     +
4441     + if (likely(irq < NR_IRQS))
4442     + generic_handle_irq(irq);
4443     + else
4444     + printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n",
4445     + __func__, smp_processor_id(), irq);
4446     +
4447     irq_exit();
4448    
4449     + set_irq_regs(old_regs);
4450     return 1;
4451     }
4452    
4453     @@ -192,6 +187,6 @@
4454     */
4455     void ack_bad_irq(unsigned int irq)
4456     {
4457     - printk("unexpected IRQ trap at vector %02x\n", irq);
4458     + printk("unexpected IRQ trap at irq %02x\n", irq);
4459     }
4460     #endif
4461 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/ldt_32-xen.c linux-2.6.25-xen/arch/x86/kernel/ldt_32-xen.c
4462     --- linux-2.6.25/arch/x86/kernel/ldt_32-xen.c 2008-05-23 20:51:11.000000000 +0200
4463     +++ linux-2.6.25-xen/arch/x86/kernel/ldt_32-xen.c 2008-05-23 20:39:03.000000000 +0200
4464 niro 609 @@ -1,5 +1,5 @@
4465     /*
4466     - * linux/kernel/ldt.c
4467     + * linux/arch/i386/kernel/ldt.c
4468     *
4469     * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
4470     * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
4471 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/Makefile linux-2.6.25-xen/arch/x86/kernel/Makefile
4472     --- linux-2.6.25/arch/x86/kernel/Makefile 2008-05-23 20:51:22.000000000 +0200
4473     +++ linux-2.6.25-xen/arch/x86/kernel/Makefile 2008-05-23 20:39:03.000000000 +0200
4474     @@ -91,7 +91,7 @@
4475     ###
4476     # 64 bit specific files
4477     ifeq ($(CONFIG_X86_64),y)
4478     - obj-y += genapic_64.o genapic_flat_64.o
4479     + obj-$(CONFIG_X86_LOCAL_APIC) += genapic_64.o genapic_flat_64.o
4480     obj-$(CONFIG_X86_XEN_GENAPIC) += genapic_64.o genapic_xen_64.o
4481     obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
4482     obj-$(CONFIG_AUDIT) += audit_64.o
4483     @@ -104,5 +104,6 @@
4484     pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o
4485     endif
4486    
4487     -disabled-obj-$(CONFIG_XEN) := i8253.o i8259_$(BITS).o reboot.o smpboot_$(BITS).o tsc_$(BITS).o
4488     +disabled-obj-$(CONFIG_XEN) := early-quirks.o i8253.o i8259_$(BITS).o reboot.o \
4489     + smpboot_$(BITS).o tsc_$(BITS).o
4490     %/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) :=
4491     diff -Naur linux-2.6.25/arch/x86/kernel/microcode-xen.c linux-2.6.25-xen/arch/x86/kernel/microcode-xen.c
4492     --- linux-2.6.25/arch/x86/kernel/microcode-xen.c 2008-05-23 20:51:11.000000000 +0200
4493     +++ linux-2.6.25-xen/arch/x86/kernel/microcode-xen.c 2008-05-23 20:39:03.000000000 +0200
4494 niro 609 @@ -2,6 +2,7 @@
4495     * Intel CPU Microcode Update Driver for Linux
4496     *
4497     * Copyright (C) 2000-2004 Tigran Aivazian
4498     + * 2006 Shaohua Li <shaohua.li@intel.com>
4499     *
4500     * This driver allows to upgrade microcode on Intel processors
4501     * belonging to IA-32 family - PentiumPro, Pentium II,
4502     @@ -33,7 +34,9 @@
4503     #include <linux/spinlock.h>
4504     #include <linux/mm.h>
4505     #include <linux/mutex.h>
4506     -#include <linux/syscalls.h>
4507     +#include <linux/cpu.h>
4508     +#include <linux/firmware.h>
4509     +#include <linux/platform_device.h>
4510    
4511     #include <asm/msr.h>
4512     #include <asm/uaccess.h>
4513     @@ -55,12 +58,7 @@
4514     /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
4515     static DEFINE_MUTEX(microcode_mutex);
4516    
4517     -static int microcode_open (struct inode *unused1, struct file *unused2)
4518     -{
4519     - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
4520     -}
4521     -
4522     -
4523     +#ifdef CONFIG_MICROCODE_OLD_INTERFACE
4524     static int do_microcode_update (const void __user *ubuf, size_t len)
4525     {
4526     int err;
4527     @@ -85,6 +83,11 @@
4528     return err;
4529     }
4530    
4531     +static int microcode_open (struct inode *unused1, struct file *unused2)
4532     +{
4533     + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
4534     +}
4535     +
4536     static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
4537     {
4538     ssize_t ret;
4539     @@ -117,7 +120,7 @@
4540     .fops = &microcode_fops,
4541     };
4542    
4543     -static int __init microcode_init (void)
4544     +static int __init microcode_dev_init (void)
4545     {
4546     int error;
4547    
4548     @@ -129,6 +132,68 @@
4549     return error;
4550     }
4551    
4552     + return 0;
4553     +}
4554     +
4555     +static void __exit microcode_dev_exit (void)
4556     +{
4557     + misc_deregister(&microcode_dev);
4558     +}
4559     +
4560     +MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
4561     +#else
4562     +#define microcode_dev_init() 0
4563     +#define microcode_dev_exit() do { } while(0)
4564     +#endif
4565     +
4566     +/* fake device for request_firmware */
4567     +static struct platform_device *microcode_pdev;
4568     +
4569     +static int request_microcode(void)
4570     +{
4571     + char name[30];
4572     + const struct cpuinfo_x86 *c = &boot_cpu_data;
4573     + const struct firmware *firmware;
4574     + int error;
4575     + struct xen_platform_op op;
4576     +
4577     + sprintf(name,"intel-ucode/%02x-%02x-%02x",
4578     + c->x86, c->x86_model, c->x86_mask);
4579     + error = request_firmware(&firmware, name, &microcode_pdev->dev);
4580     + if (error) {
4581     + pr_debug("ucode data file %s load failed\n", name);
4582     + return error;
4583     + }
4584     +
4585     + op.cmd = XENPF_microcode_update;
4586     + set_xen_guest_handle(op.u.microcode.data, (void *)firmware->data);
4587     + op.u.microcode.length = firmware->size;
4588     + error = HYPERVISOR_platform_op(&op);
4589     +
4590     + release_firmware(firmware);
4591     +
4592     + if (error)
4593     + pr_debug("ucode load failed\n");
4594     +
4595     + return error;
4596     +}
4597     +
4598     +static int __init microcode_init (void)
4599     +{
4600     + int error;
4601     +
4602     + error = microcode_dev_init();
4603     + if (error)
4604     + return error;
4605     + microcode_pdev = platform_device_register_simple("microcode", -1,
4606     + NULL, 0);
4607     + if (IS_ERR(microcode_pdev)) {
4608     + microcode_dev_exit();
4609     + return PTR_ERR(microcode_pdev);
4610     + }
4611     +
4612     + request_microcode();
4613     +
4614     printk(KERN_INFO
4615     "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
4616     return 0;
4617     @@ -136,9 +201,9 @@
4618    
4619     static void __exit microcode_exit (void)
4620     {
4621     - misc_deregister(&microcode_dev);
4622     + microcode_dev_exit();
4623     + platform_device_unregister(microcode_pdev);
4624     }
4625    
4626     module_init(microcode_init)
4627     module_exit(microcode_exit)
4628     -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
4629 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/mpparse_32-xen.c linux-2.6.25-xen/arch/x86/kernel/mpparse_32-xen.c
4630     --- linux-2.6.25/arch/x86/kernel/mpparse_32-xen.c 2008-05-23 20:51:11.000000000 +0200
4631     +++ linux-2.6.25-xen/arch/x86/kernel/mpparse_32-xen.c 2008-05-23 20:39:03.000000000 +0200
4632 niro 609 @@ -30,6 +30,7 @@
4633     #include <asm/io_apic.h>
4634    
4635     #include <mach_apic.h>
4636     +#include <mach_apicdef.h>
4637     #include <mach_mpparse.h>
4638     #include <bios_ebda.h>
4639    
4640     @@ -68,7 +69,7 @@
4641     /* Processor that is doing the boot up */
4642     unsigned int boot_cpu_physical_apicid = -1U;
4643     /* Internal processor count */
4644     -static unsigned int __devinitdata num_processors;
4645     +unsigned int __cpuinitdata num_processors;
4646    
4647     /* Bitmask of physically existing CPUs */
4648     physid_mask_t phys_cpu_present_map;
4649     @@ -235,12 +236,14 @@
4650    
4651     mpc_oem_bus_info(m, str, translation_table[mpc_record]);
4652    
4653     +#if MAX_MP_BUSSES < 256
4654     if (m->mpc_busid >= MAX_MP_BUSSES) {
4655     printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
4656     " is too large, max. supported is %d\n",
4657     m->mpc_busid, str, MAX_MP_BUSSES - 1);
4658     return;
4659     }
4660     +#endif
4661    
4662     if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
4663     mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
4664     @@ -300,19 +303,6 @@
4665     m->mpc_irqtype, m->mpc_irqflag & 3,
4666     (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
4667     m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
4668     - /*
4669     - * Well it seems all SMP boards in existence
4670     - * use ExtINT/LVT1 == LINT0 and
4671     - * NMI/LVT2 == LINT1 - the following check
4672     - * will show us if this assumptions is false.
4673     - * Until then we do not have to add baggage.
4674     - */
4675     - if ((m->mpc_irqtype == mp_ExtINT) &&
4676     - (m->mpc_destapiclint != 0))
4677     - BUG();
4678     - if ((m->mpc_irqtype == mp_NMI) &&
4679     - (m->mpc_destapiclint != 1))
4680     - BUG();
4681     }
4682    
4683     #ifdef CONFIG_X86_NUMAQ
4684     @@ -838,8 +828,7 @@
4685    
4686     #ifdef CONFIG_ACPI
4687    
4688     -void __init mp_register_lapic_address (
4689     - u64 address)
4690     +void __init mp_register_lapic_address(u64 address)
4691     {
4692     #ifndef CONFIG_XEN
4693     mp_lapic_addr = (unsigned long) address;
4694     @@ -853,13 +842,10 @@
4695     #endif
4696     }
4697    
4698     -
4699     -void __devinit mp_register_lapic (
4700     - u8 id,
4701     - u8 enabled)
4702     +void __devinit mp_register_lapic (u8 id, u8 enabled)
4703     {
4704     struct mpc_config_processor processor;
4705     - int boot_cpu = 0;
4706     + int boot_cpu = 0;
4707    
4708     if (MAX_APICS - id <= 0) {
4709     printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
4710     @@ -898,11 +884,9 @@
4711     u32 pin_programmed[4];
4712     } mp_ioapic_routing[MAX_IO_APICS];
4713    
4714     -
4715     -static int mp_find_ioapic (
4716     - int gsi)
4717     +static int mp_find_ioapic (int gsi)
4718     {
4719     - int i = 0;
4720     + int i = 0;
4721    
4722     /* Find the IOAPIC that manages this GSI. */
4723     for (i = 0; i < nr_ioapics; i++) {
4724     @@ -915,15 +899,11 @@
4725    
4726     return -1;
4727     }
4728     -
4729    
4730     -void __init mp_register_ioapic (
4731     - u8 id,
4732     - u32 address,
4733     - u32 gsi_base)
4734     +void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
4735     {
4736     - int idx = 0;
4737     - int tmpid;
4738     + int idx = 0;
4739     + int tmpid;
4740    
4741     if (nr_ioapics >= MAX_IO_APICS) {
4742     printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
4743     @@ -971,16 +951,10 @@
4744     mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
4745     mp_ioapic_routing[idx].gsi_base,
4746     mp_ioapic_routing[idx].gsi_end);
4747     -
4748     - return;
4749     }
4750    
4751     -
4752     -void __init mp_override_legacy_irq (
4753     - u8 bus_irq,
4754     - u8 polarity,
4755     - u8 trigger,
4756     - u32 gsi)
4757     +void __init
4758     +mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
4759     {
4760     struct mpc_config_intsrc intsrc;
4761     int ioapic = -1;
4762     @@ -1018,15 +992,13 @@
4763     mp_irqs[mp_irq_entries] = intsrc;
4764     if (++mp_irq_entries == MAX_IRQ_SOURCES)
4765     panic("Max # of irq sources exceeded!\n");
4766     -
4767     - return;
4768     }
4769    
4770     void __init mp_config_acpi_legacy_irqs (void)
4771     {
4772     struct mpc_config_intsrc intsrc;
4773     - int i = 0;
4774     - int ioapic = -1;
4775     + int i = 0;
4776     + int ioapic = -1;
4777    
4778     /*
4779     * Fabricate the legacy ISA bus (bus #31).
4780     @@ -1095,12 +1067,12 @@
4781    
4782     #define MAX_GSI_NUM 4096
4783    
4784     -int mp_register_gsi (u32 gsi, int triggering, int polarity)
4785     +int mp_register_gsi(u32 gsi, int triggering, int polarity)
4786     {
4787     - int ioapic = -1;
4788     - int ioapic_pin = 0;
4789     - int idx, bit = 0;
4790     - static int pci_irq = 16;
4791     + int ioapic = -1;
4792     + int ioapic_pin = 0;
4793     + int idx, bit = 0;
4794     + static int pci_irq = 16;
4795     /*
4796     * Mapping between Global System Interrups, which
4797     * represent all possible interrupts, and IRQs
4798 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/mpparse_64-xen.c linux-2.6.25-xen/arch/x86/kernel/mpparse_64-xen.c
4799     --- linux-2.6.25/arch/x86/kernel/mpparse_64-xen.c 2008-05-23 20:51:11.000000000 +0200
4800     +++ linux-2.6.25-xen/arch/x86/kernel/mpparse_64-xen.c 2008-05-23 20:39:03.000000000 +0200
4801 niro 609 @@ -41,8 +41,7 @@
4802     * Various Linux-internal data structures created from the
4803     * MP-table.
4804     */
4805     -unsigned char apic_version [MAX_APICS];
4806     -unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
4807     +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
4808     int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
4809    
4810     static int mp_current_pci_id = 0;
4811     @@ -56,7 +55,6 @@
4812     int mp_irq_entries;
4813    
4814     int nr_ioapics;
4815     -int pic_mode;
4816     unsigned long mp_lapic_addr = 0;
4817    
4818    
4819     @@ -71,19 +69,6 @@
4820     /* Bitmask of physically existing CPUs */
4821     physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
4822    
4823     -/* ACPI MADT entry parsing functions */
4824     -#ifdef CONFIG_ACPI
4825     -extern struct acpi_boot_flags acpi_boot;
4826     -#ifdef CONFIG_X86_LOCAL_APIC
4827     -extern int acpi_parse_lapic (acpi_table_entry_header *header);
4828     -extern int acpi_parse_lapic_addr_ovr (acpi_table_entry_header *header);
4829     -extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header);
4830     -#endif /*CONFIG_X86_LOCAL_APIC*/
4831     -#ifdef CONFIG_X86_IO_APIC
4832     -extern int acpi_parse_ioapic (acpi_table_entry_header *header);
4833     -#endif /*CONFIG_X86_IO_APIC*/
4834     -#endif /*CONFIG_ACPI*/
4835     -
4836     u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
4837    
4838    
4839     @@ -109,24 +94,20 @@
4840     static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
4841     {
4842     int cpu;
4843     - unsigned char ver;
4844     cpumask_t tmp_map;
4845     + char *bootup_cpu = "";
4846    
4847     if (!(m->mpc_cpuflag & CPU_ENABLED)) {
4848     disabled_cpus++;
4849     return;
4850     }
4851     -
4852     - printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
4853     - m->mpc_apicid,
4854     - (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8,
4855     - (m->mpc_cpufeature & CPU_MODEL_MASK)>>4,
4856     - m->mpc_apicver);
4857     -
4858     if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
4859     - Dprintk(" Bootup CPU\n");
4860     + bootup_cpu = " (Bootup-CPU)";
4861     boot_cpu_id = m->mpc_apicid;
4862     }
4863     +
4864     + printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
4865     +
4866     if (num_processors >= NR_CPUS) {
4867     printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
4868     " Processor ignored.\n", NR_CPUS);
4869     @@ -137,24 +118,7 @@
4870     cpus_complement(tmp_map, cpu_present_map);
4871     cpu = first_cpu(tmp_map);
4872    
4873     -#if MAX_APICS < 255
4874     - if ((int)m->mpc_apicid > MAX_APICS) {
4875     - printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
4876     - m->mpc_apicid, MAX_APICS);
4877     - return;
4878     - }
4879     -#endif
4880     - ver = m->mpc_apicver;
4881     -
4882     physid_set(m->mpc_apicid, phys_cpu_present_map);
4883     - /*
4884     - * Validate version
4885     - */
4886     - if (ver == 0x0) {
4887     - printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
4888     - ver = 0x10;
4889     - }
4890     - apic_version[m->mpc_apicid] = ver;
4891     if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
4892     /*
4893     * bios_cpu_apicid is required to have processors listed
4894     @@ -185,37 +149,42 @@
4895     Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
4896    
4897     if (strncmp(str, "ISA", 3) == 0) {
4898     - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
4899     - } else if (strncmp(str, "EISA", 4) == 0) {
4900     - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
4901     + set_bit(m->mpc_busid, mp_bus_not_pci);
4902     } else if (strncmp(str, "PCI", 3) == 0) {
4903     - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
4904     + clear_bit(m->mpc_busid, mp_bus_not_pci);
4905     mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
4906     mp_current_pci_id++;
4907     - } else if (strncmp(str, "MCA", 3) == 0) {
4908     - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
4909     } else {
4910     printk(KERN_ERR "Unknown bustype %s\n", str);
4911     }
4912     }
4913    
4914     +static int bad_ioapic(unsigned long address)
4915     +{
4916     + if (nr_ioapics >= MAX_IO_APICS) {
4917     + printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
4918     + "(found %d)\n", MAX_IO_APICS, nr_ioapics);
4919     + panic("Recompile kernel with bigger MAX_IO_APICS!\n");
4920     + }
4921     + if (!address) {
4922     + printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
4923     + " found in table, skipping!\n");
4924     + return 1;
4925     + }
4926     + return 0;
4927     +}
4928     +
4929     static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
4930     {
4931     if (!(m->mpc_flags & MPC_APIC_USABLE))
4932     return;
4933    
4934     - printk("I/O APIC #%d Version %d at 0x%X.\n",
4935     - m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
4936     - if (nr_ioapics >= MAX_IO_APICS) {
4937     - printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n",
4938     - MAX_IO_APICS, nr_ioapics);
4939     - panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
4940     - }
4941     - if (!m->mpc_apicaddr) {
4942     - printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
4943     - " found in MP table, skipping!\n");
4944     + printk("I/O APIC #%d at 0x%X.\n",
4945     + m->mpc_apicid, m->mpc_apicaddr);
4946     +
4947     + if (bad_ioapic(m->mpc_apicaddr))
4948     return;
4949     - }
4950     +
4951     mp_ioapics[nr_ioapics] = *m;
4952     nr_ioapics++;
4953     }
4954     @@ -239,19 +208,6 @@
4955     m->mpc_irqtype, m->mpc_irqflag & 3,
4956     (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
4957     m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
4958     - /*
4959     - * Well it seems all SMP boards in existence
4960     - * use ExtINT/LVT1 == LINT0 and
4961     - * NMI/LVT2 == LINT1 - the following check
4962     - * will show us if this assumptions is false.
4963     - * Until then we do not have to add baggage.
4964     - */
4965     - if ((m->mpc_irqtype == mp_ExtINT) &&
4966     - (m->mpc_destapiclint != 0))
4967     - BUG();
4968     - if ((m->mpc_irqtype == mp_NMI) &&
4969     - (m->mpc_destapiclint != 1))
4970     - BUG();
4971     }
4972    
4973     /*
4974     @@ -265,7 +221,7 @@
4975     unsigned char *mpt=((unsigned char *)mpc)+count;
4976    
4977     if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
4978     - printk("SMP mptable: bad signature [%c%c%c%c]!\n",
4979     + printk("MPTABLE: bad signature [%c%c%c%c]!\n",
4980     mpc->mpc_signature[0],
4981     mpc->mpc_signature[1],
4982     mpc->mpc_signature[2],
4983     @@ -273,31 +229,31 @@
4984     return 0;
4985     }
4986     if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
4987     - printk("SMP mptable: checksum error!\n");
4988     + printk("MPTABLE: checksum error!\n");
4989     return 0;
4990     }
4991     if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
4992     - printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
4993     + printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
4994     mpc->mpc_spec);
4995     return 0;
4996     }
4997     if (!mpc->mpc_lapic) {
4998     - printk(KERN_ERR "SMP mptable: null local APIC address!\n");
4999     + printk(KERN_ERR "MPTABLE: null local APIC address!\n");
5000     return 0;
5001     }
5002     memcpy(str,mpc->mpc_oem,8);
5003     - str[8]=0;
5004     - printk(KERN_INFO "OEM ID: %s ",str);
5005     + str[8] = 0;
5006     + printk(KERN_INFO "MPTABLE: OEM ID: %s ",str);
5007    
5008     memcpy(str,mpc->mpc_productid,12);
5009     - str[12]=0;
5010     - printk("Product ID: %s ",str);
5011     + str[12] = 0;
5012     + printk("MPTABLE: Product ID: %s ",str);
5013    
5014     - printk("APIC at: 0x%X\n",mpc->mpc_lapic);
5015     + printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic);
5016    
5017     /* save the local APIC address, it might be non-default */
5018     if (!acpi_lapic)
5019     - mp_lapic_addr = mpc->mpc_lapic;
5020     + mp_lapic_addr = mpc->mpc_lapic;
5021    
5022     /*
5023     * Now process the configuration blocks.
5024     @@ -309,7 +265,7 @@
5025     struct mpc_config_processor *m=
5026     (struct mpc_config_processor *)mpt;
5027     if (!acpi_lapic)
5028     - MP_processor_info(m);
5029     + MP_processor_info(m);
5030     mpt += sizeof(*m);
5031     count += sizeof(*m);
5032     break;
5033     @@ -328,8 +284,8 @@
5034     struct mpc_config_ioapic *m=
5035     (struct mpc_config_ioapic *)mpt;
5036     MP_ioapic_info(m);
5037     - mpt+=sizeof(*m);
5038     - count+=sizeof(*m);
5039     + mpt += sizeof(*m);
5040     + count += sizeof(*m);
5041     break;
5042     }
5043     case MP_INTSRC:
5044     @@ -338,8 +294,8 @@
5045     (struct mpc_config_intsrc *)mpt;
5046    
5047     MP_intsrc_info(m);
5048     - mpt+=sizeof(*m);
5049     - count+=sizeof(*m);
5050     + mpt += sizeof(*m);
5051     + count += sizeof(*m);
5052     break;
5053     }
5054     case MP_LINTSRC:
5055     @@ -347,15 +303,15 @@
5056     struct mpc_config_lintsrc *m=
5057     (struct mpc_config_lintsrc *)mpt;
5058     MP_lintsrc_info(m);
5059     - mpt+=sizeof(*m);
5060     - count+=sizeof(*m);
5061     + mpt += sizeof(*m);
5062     + count += sizeof(*m);
5063     break;
5064     }
5065     }
5066     }
5067     clustered_apic_check();
5068     if (!num_processors)
5069     - printk(KERN_ERR "SMP mptable: no processors registered!\n");
5070     + printk(KERN_ERR "MPTABLE: no processors registered!\n");
5071     return num_processors;
5072     }
5073    
5074     @@ -451,13 +407,10 @@
5075     * 2 CPUs, numbered 0 & 1.
5076     */
5077     processor.mpc_type = MP_PROCESSOR;
5078     - /* Either an integrated APIC or a discrete 82489DX. */
5079     - processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
5080     + processor.mpc_apicver = 0;
5081     processor.mpc_cpuflag = CPU_ENABLED;
5082     - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
5083     - (boot_cpu_data.x86_model << 4) |
5084     - boot_cpu_data.x86_mask;
5085     - processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
5086     + processor.mpc_cpufeature = 0;
5087     + processor.mpc_featureflag = 0;
5088     processor.mpc_reserved[0] = 0;
5089     processor.mpc_reserved[1] = 0;
5090     for (i = 0; i < 2; i++) {
5091     @@ -476,14 +429,6 @@
5092     case 5:
5093     memcpy(bus.mpc_bustype, "ISA ", 6);
5094     break;
5095     - case 2:
5096     - case 6:
5097     - case 3:
5098     - memcpy(bus.mpc_bustype, "EISA ", 6);
5099     - break;
5100     - case 4:
5101     - case 7:
5102     - memcpy(bus.mpc_bustype, "MCA ", 6);
5103     }
5104     MP_bus_info(&bus);
5105     if (mpc_default_type > 4) {
5106     @@ -494,7 +439,7 @@
5107    
5108     ioapic.mpc_type = MP_IOAPIC;
5109     ioapic.mpc_apicid = 2;
5110     - ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
5111     + ioapic.mpc_apicver = 0;
5112     ioapic.mpc_flags = MPC_APIC_USABLE;
5113     ioapic.mpc_apicaddr = 0xFEC00000;
5114     MP_ioapic_info(&ioapic);
5115     @@ -537,13 +482,6 @@
5116     printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
5117    
5118     printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
5119     - if (mpf->mpf_feature2 & (1<<7)) {
5120     - printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
5121     - pic_mode = 1;
5122     - } else {
5123     - printk(KERN_INFO " Virtual Wire compatibility mode.\n");
5124     - pic_mode = 0;
5125     - }
5126    
5127     /*
5128     * Now see if we need to read further.
5129     @@ -620,7 +558,7 @@
5130     return 0;
5131     }
5132    
5133     -void __init find_intel_smp (void)
5134     +void __init find_smp_config(void)
5135     {
5136     unsigned int address;
5137    
5138     @@ -637,9 +575,7 @@
5139     smp_scan_config(0xF0000,0x10000))
5140     return;
5141     /*
5142     - * If it is an SMP machine we should know now, unless the
5143     - * configuration is in an EISA/MCA bus machine with an
5144     - * extended bios data area.
5145     + * If it is an SMP machine we should know now.
5146     *
5147     * there is a real-mode segmented pointer pointing to the
5148     * 4K EBDA area at 0x40E, calculate and scan it here.
5149     @@ -660,64 +596,38 @@
5150     printk(KERN_INFO "No mptable found.\n");
5151     }
5152    
5153     -/*
5154     - * - Intel MP Configuration Table
5155     - */
5156     -void __init find_smp_config (void)
5157     -{
5158     -#ifdef CONFIG_X86_LOCAL_APIC
5159     - find_intel_smp();
5160     -#endif
5161     -}
5162     -
5163     -
5164     /* --------------------------------------------------------------------------
5165     ACPI-based MP Configuration
5166     -------------------------------------------------------------------------- */
5167    
5168     #ifdef CONFIG_ACPI
5169    
5170     -void __init mp_register_lapic_address (
5171     - u64 address)
5172     +void __init mp_register_lapic_address(u64 address)
5173     {
5174     #ifndef CONFIG_XEN
5175     mp_lapic_addr = (unsigned long) address;
5176     -
5177     set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
5178     -
5179     if (boot_cpu_id == -1U)
5180     boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
5181     -
5182     - Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
5183     #endif
5184     }
5185    
5186     -
5187     -void __cpuinit mp_register_lapic (
5188     - u8 id,
5189     - u8 enabled)
5190     +void __cpuinit mp_register_lapic (u8 id, u8 enabled)
5191     {
5192     struct mpc_config_processor processor;
5193     int boot_cpu = 0;
5194    
5195     - if (id >= MAX_APICS) {
5196     - printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
5197     - id, MAX_APICS);
5198     - return;
5199     - }
5200     -
5201     - if (id == boot_cpu_physical_apicid)
5202     + if (id == boot_cpu_id)
5203     boot_cpu = 1;
5204    
5205     #ifndef CONFIG_XEN
5206     processor.mpc_type = MP_PROCESSOR;
5207     processor.mpc_apicid = id;
5208     - processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
5209     + processor.mpc_apicver = 0;
5210     processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
5211     processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
5212     - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
5213     - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
5214     - processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
5215     + processor.mpc_cpufeature = 0;
5216     + processor.mpc_featureflag = 0;
5217     processor.mpc_reserved[0] = 0;
5218     processor.mpc_reserved[1] = 0;
5219     #endif
5220     @@ -725,8 +635,6 @@
5221     MP_processor_info(&processor);
5222     }
5223    
5224     -#ifdef CONFIG_X86_IO_APIC
5225     -
5226     #define MP_ISA_BUS 0
5227     #define MP_MAX_IOAPIC_PIN 127
5228    
5229     @@ -737,11 +645,9 @@
5230     u32 pin_programmed[4];
5231     } mp_ioapic_routing[MAX_IO_APICS];
5232    
5233     -
5234     -static int mp_find_ioapic (
5235     - int gsi)
5236     +static int mp_find_ioapic(int gsi)
5237     {
5238     - int i = 0;
5239     + int i = 0;
5240    
5241     /* Find the IOAPIC that manages this GSI. */
5242     for (i = 0; i < nr_ioapics; i++) {
5243     @@ -751,28 +657,15 @@
5244     }
5245    
5246     printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
5247     -
5248     return -1;
5249     }
5250     -
5251    
5252     -void __init mp_register_ioapic (
5253     - u8 id,
5254     - u32 address,
5255     - u32 gsi_base)
5256     +void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
5257     {
5258     - int idx = 0;
5259     + int idx = 0;
5260    
5261     - if (nr_ioapics >= MAX_IO_APICS) {
5262     - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
5263     - "(found %d)\n", MAX_IO_APICS, nr_ioapics);
5264     - panic("Recompile kernel with bigger MAX_IO_APICS!\n");
5265     - }
5266     - if (!address) {
5267     - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
5268     - " found in MADT table, skipping!\n");
5269     + if (bad_ioapic(address))
5270     return;
5271     - }
5272    
5273     idx = nr_ioapics++;
5274    
5275     @@ -784,7 +677,7 @@
5276     set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
5277     #endif
5278     mp_ioapics[idx].mpc_apicid = id;
5279     - mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
5280     + mp_ioapics[idx].mpc_apicver = 0;
5281    
5282     /*
5283     * Build basic IRQ lookup table to facilitate gsi->io_apic lookups
5284     @@ -795,21 +688,15 @@
5285     mp_ioapic_routing[idx].gsi_end = gsi_base +
5286     io_apic_get_redir_entries(idx);
5287    
5288     - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
5289     + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, "
5290     "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
5291     - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
5292     + mp_ioapics[idx].mpc_apicaddr,
5293     mp_ioapic_routing[idx].gsi_start,
5294     mp_ioapic_routing[idx].gsi_end);
5295     -
5296     - return;
5297     }
5298    
5299     -
5300     -void __init mp_override_legacy_irq (
5301     - u8 bus_irq,
5302     - u8 polarity,
5303     - u8 trigger,
5304     - u32 gsi)
5305     +void __init
5306     +mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
5307     {
5308     struct mpc_config_intsrc intsrc;
5309     int ioapic = -1;
5310     @@ -847,22 +734,18 @@
5311     mp_irqs[mp_irq_entries] = intsrc;
5312     if (++mp_irq_entries == MAX_IRQ_SOURCES)
5313     panic("Max # of irq sources exceeded!\n");
5314     -
5315     - return;
5316     }
5317    
5318     -
5319     -void __init mp_config_acpi_legacy_irqs (void)
5320     +void __init mp_config_acpi_legacy_irqs(void)
5321     {
5322     struct mpc_config_intsrc intsrc;
5323     - int i = 0;
5324     - int ioapic = -1;
5325     + int i = 0;
5326     + int ioapic = -1;
5327    
5328     /*
5329     * Fabricate the legacy ISA bus (bus #31).
5330     */
5331     - mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
5332     - Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
5333     + set_bit(MP_ISA_BUS, mp_bus_not_pci);
5334    
5335     /*
5336     * Locate the IOAPIC that manages the ISA IRQs (0-15).
5337     @@ -915,24 +798,13 @@
5338     if (++mp_irq_entries == MAX_IRQ_SOURCES)
5339     panic("Max # of irq sources exceeded!\n");
5340     }
5341     -
5342     - return;
5343     }
5344    
5345     -#define MAX_GSI_NUM 4096
5346     -
5347     int mp_register_gsi(u32 gsi, int triggering, int polarity)
5348     {
5349     - int ioapic = -1;
5350     - int ioapic_pin = 0;
5351     - int idx, bit = 0;
5352     - static int pci_irq = 16;
5353     - /*
5354     - * Mapping between Global System Interrupts, which
5355     - * represent all possible interrupts, to the IRQs
5356     - * assigned to actual devices.
5357     - */
5358     - static int gsi_to_irq[MAX_GSI_NUM];
5359     + int ioapic = -1;
5360     + int ioapic_pin = 0;
5361     + int idx, bit = 0;
5362    
5363     if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
5364     return gsi;
5365     @@ -965,47 +837,14 @@
5366     if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
5367     Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
5368     mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
5369     - return gsi_to_irq[gsi];
5370     + return gsi;
5371     }
5372    
5373     mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
5374    
5375     - if (triggering == ACPI_LEVEL_SENSITIVE) {
5376     - /*
5377     - * For PCI devices assign IRQs in order, avoiding gaps
5378     - * due to unused I/O APIC pins.
5379     - */
5380     - int irq = gsi;
5381     - if (gsi < MAX_GSI_NUM) {
5382     - /*
5383     - * Retain the VIA chipset work-around (gsi > 15), but
5384     - * avoid a problem where the 8254 timer (IRQ0) is setup
5385     - * via an override (so it's not on pin 0 of the ioapic),
5386     - * and at the same time, the pin 0 interrupt is a PCI
5387     - * type. The gsi > 15 test could cause these two pins
5388     - * to be shared as IRQ0, and they are not shareable.
5389     - * So test for this condition, and if necessary, avoid
5390     - * the pin collision.
5391     - */
5392     - if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
5393     - gsi = pci_irq++;
5394     - /*
5395     - * Don't assign IRQ used by ACPI SCI
5396     - */
5397     - if (gsi == acpi_fadt.sci_int)
5398     - gsi = pci_irq++;
5399     - gsi_to_irq[irq] = gsi;
5400     - } else {
5401     - printk(KERN_ERR "GSI %u is too high\n", gsi);
5402     - return gsi;
5403     - }
5404     - }
5405     -
5406     io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
5407     triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
5408     polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
5409     return gsi;
5410     }
5411     -
5412     -#endif /*CONFIG_X86_IO_APIC*/
5413     #endif /*CONFIG_ACPI*/
5414 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/pci-dma_32-xen.c linux-2.6.25-xen/arch/x86/kernel/pci-dma_32-xen.c
5415     --- linux-2.6.25/arch/x86/kernel/pci-dma_32-xen.c 2008-05-23 20:51:11.000000000 +0200
5416     +++ linux-2.6.25-xen/arch/x86/kernel/pci-dma_32-xen.c 2008-05-23 20:39:03.000000000 +0200
5417 niro 609 @@ -116,8 +116,7 @@
5418     {
5419     int i, rc;
5420    
5421     - if (direction == DMA_NONE)
5422     - BUG();
5423     + BUG_ON(!valid_dma_direction(direction));
5424     WARN_ON(nents == 0 || sg[0].length == 0);
5425    
5426     if (swiotlb) {
5427     @@ -148,7 +147,7 @@
5428     {
5429     int i;
5430    
5431     - BUG_ON(direction == DMA_NONE);
5432     + BUG_ON(!valid_dma_direction(direction));
5433     if (swiotlb)
5434     swiotlb_unmap_sg(hwdev, sg, nents, direction);
5435     else {
5436     @@ -165,8 +164,7 @@
5437     {
5438     dma_addr_t dma_addr;
5439    
5440     - BUG_ON(direction == DMA_NONE);
5441     -
5442     + BUG_ON(!valid_dma_direction(direction));
5443     if (swiotlb) {
5444     dma_addr = swiotlb_map_page(
5445     dev, page, offset, size, direction);
5446     @@ -183,7 +181,7 @@
5447     dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
5448     enum dma_data_direction direction)
5449     {
5450     - BUG_ON(direction == DMA_NONE);
5451     + BUG_ON(!valid_dma_direction(direction));
5452     if (swiotlb)
5453     swiotlb_unmap_page(dev, dma_address, size, direction);
5454     else
5455     @@ -365,8 +363,7 @@
5456     {
5457     dma_addr_t dma;
5458    
5459     - if (direction == DMA_NONE)
5460     - BUG();
5461     + BUG_ON(!valid_dma_direction(direction));
5462     WARN_ON(size == 0);
5463    
5464     if (swiotlb) {
5465     @@ -387,8 +384,7 @@
5466     dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
5467     enum dma_data_direction direction)
5468     {
5469     - if (direction == DMA_NONE)
5470     - BUG();
5471     + BUG_ON(!valid_dma_direction(direction));
5472     if (swiotlb)
5473     swiotlb_unmap_single(dev, dma_addr, size, direction);
5474     else
5475 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/pci-swiotlb_64-xen.c linux-2.6.25-xen/arch/x86/kernel/pci-swiotlb_64-xen.c
5476     --- linux-2.6.25/arch/x86/kernel/pci-swiotlb_64-xen.c 2008-05-23 20:51:21.000000000 +0200
5477     +++ linux-2.6.25-xen/arch/x86/kernel/pci-swiotlb_64-xen.c 2008-05-23 20:39:03.000000000 +0200
5478 niro 609 @@ -3,7 +3,8 @@
5479     #include <linux/pci.h>
5480     #include <linux/cache.h>
5481     #include <linux/module.h>
5482     -#include <asm/dma-mapping.h>
5483     +#include <linux/dma-mapping.h>
5484     +
5485     #include <asm/proto.h>
5486     #include <asm/swiotlb.h>
5487     #include <asm/dma.h>
5488 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/process_32-xen.c linux-2.6.25-xen/arch/x86/kernel/process_32-xen.c
5489     --- linux-2.6.25/arch/x86/kernel/process_32-xen.c 2008-05-23 20:51:11.000000000 +0200
5490     +++ linux-2.6.25-xen/arch/x86/kernel/process_32-xen.c 2008-05-23 20:39:03.000000000 +0200
5491 niro 609 @@ -37,6 +37,7 @@
5492     #include <linux/kallsyms.h>
5493     #include <linux/ptrace.h>
5494     #include <linux/random.h>
5495     +#include <linux/personality.h>
5496    
5497     #include <asm/uaccess.h>
5498     #include <asm/pgtable.h>
5499     @@ -186,7 +187,7 @@
5500     void cpu_idle_wait(void)
5501     {
5502     unsigned int cpu, this_cpu = get_cpu();
5503     - cpumask_t map;
5504     + cpumask_t map, tmp = current->cpus_allowed;
5505    
5506     set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
5507     put_cpu();
5508     @@ -208,6 +209,8 @@
5509     }
5510     cpus_and(map, map, cpu_online_map);
5511     } while (!cpus_empty(map));
5512     +
5513     + set_cpus_allowed(current, tmp);
5514     }
5515     EXPORT_SYMBOL_GPL(cpu_idle_wait);
5516    
5517     @@ -240,9 +243,9 @@
5518     if (user_mode_vm(regs))
5519     printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
5520     printk(" EFLAGS: %08lx %s (%s %.*s)\n",
5521     - regs->eflags, print_tainted(), system_utsname.release,
5522     - (int)strcspn(system_utsname.version, " "),
5523     - system_utsname.version);
5524     + regs->eflags, print_tainted(), init_utsname()->release,
5525     + (int)strcspn(init_utsname()->version, " "),
5526     + init_utsname()->version);
5527     printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
5528     regs->eax,regs->ebx,regs->ecx,regs->edx);
5529     printk("ESI: %08lx EDI: %08lx EBP: %08lx",
5530     @@ -264,15 +267,6 @@
5531     * the "args".
5532     */
5533     extern void kernel_thread_helper(void);
5534     -__asm__(".section .text\n"
5535     - ".align 4\n"
5536     - "kernel_thread_helper:\n\t"
5537     - "movl %edx,%eax\n\t"
5538     - "pushl %edx\n\t"
5539     - "call *%ebx\n\t"
5540     - "pushl %eax\n\t"
5541     - "call do_exit\n"
5542     - ".previous");
5543    
5544     /*
5545     * Create a kernel thread
5546     @@ -290,7 +284,7 @@
5547     regs.xes = __USER_DS;
5548     regs.orig_eax = -1;
5549     regs.eip = (unsigned long) kernel_thread_helper;
5550     - regs.xcs = GET_KERNEL_CS();
5551     + regs.xcs = __KERNEL_CS | get_kernel_rpl();
5552     regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
5553    
5554     /* Ok, create the new process.. */
5555     @@ -369,13 +363,12 @@
5556    
5557     tsk = current;
5558     if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
5559     - p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
5560     + p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
5561     + IO_BITMAP_BYTES, GFP_KERNEL);
5562     if (!p->thread.io_bitmap_ptr) {
5563     p->thread.io_bitmap_max = 0;
5564     return -ENOMEM;
5565     }
5566     - memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr,
5567     - IO_BITMAP_BYTES);
5568     set_tsk_thread_flag(p, TIF_IO_BITMAP);
5569     }
5570    
5571     @@ -850,7 +843,7 @@
5572    
5573     unsigned long arch_align_stack(unsigned long sp)
5574     {
5575     - if (randomize_va_space)
5576     + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
5577     sp -= get_random_int() % 8192;
5578     return sp & ~0xf;
5579     }
5580 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/process_64-xen.c linux-2.6.25-xen/arch/x86/kernel/process_64-xen.c
5581     --- linux-2.6.25/arch/x86/kernel/process_64-xen.c 2008-05-23 20:51:11.000000000 +0200
5582     +++ linux-2.6.25-xen/arch/x86/kernel/process_64-xen.c 2008-05-23 20:39:03.000000000 +0200
5583 niro 609 @@ -89,25 +89,24 @@
5584     }
5585     EXPORT_SYMBOL(idle_notifier_unregister);
5586    
5587     -enum idle_state { CPU_IDLE, CPU_NOT_IDLE };
5588     -static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
5589     -
5590     void enter_idle(void)
5591     {
5592     - __get_cpu_var(idle_state) = CPU_IDLE;
5593     + write_pda(isidle, 1);
5594     atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
5595     }
5596    
5597     static void __exit_idle(void)
5598     {
5599     - __get_cpu_var(idle_state) = CPU_NOT_IDLE;
5600     + if (test_and_clear_bit_pda(0, isidle) == 0)
5601     + return;
5602     atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
5603     }
5604    
5605     /* Called from interrupts to signify idle end */
5606     void exit_idle(void)
5607     {
5608     - if (current->pid | read_pda(irqcount))
5609     + /* idle loop has pid 0 */
5610     + if (current->pid)
5611     return;
5612     __exit_idle();
5613     }
5614     @@ -184,6 +183,9 @@
5615     play_dead();
5616     enter_idle();
5617     idle();
5618     + /* In many cases the interrupt that ended idle
5619     + has already called exit_idle. But some idle
5620     + loops can be woken up without interrupt. */
5621     __exit_idle();
5622     }
5623    
5624     @@ -196,7 +198,7 @@
5625     void cpu_idle_wait(void)
5626     {
5627     unsigned int cpu, this_cpu = get_cpu();
5628     - cpumask_t map;
5629     + cpumask_t map, tmp = current->cpus_allowed;
5630    
5631     set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
5632     put_cpu();
5633     @@ -219,6 +221,8 @@
5634     }
5635     cpus_and(map, map, cpu_online_map);
5636     } while (!cpus_empty(map));
5637     +
5638     + set_cpus_allowed(current, tmp);
5639     }
5640     EXPORT_SYMBOL_GPL(cpu_idle_wait);
5641    
5642     @@ -250,9 +254,9 @@
5643     print_modules();
5644     printk("Pid: %d, comm: %.20s %s %s %.*s\n",
5645     current->pid, current->comm, print_tainted(),
5646     - system_utsname.release,
5647     - (int)strcspn(system_utsname.version, " "),
5648     - system_utsname.version);
5649     + init_utsname()->release,
5650     + (int)strcspn(init_utsname()->version, " "),
5651     + init_utsname()->version);
5652     printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
5653     printk_address(regs->rip);
5654     printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
5655     @@ -310,6 +314,7 @@
5656    
5657     kfree(t->io_bitmap_ptr);
5658     t->io_bitmap_ptr = NULL;
5659     + clear_thread_flag(TIF_IO_BITMAP);
5660     /*
5661     * Careful, clear this in the TSS too:
5662     */
5663     @@ -340,6 +345,7 @@
5664     if (t->flags & _TIF_IA32)
5665     current_thread_info()->status |= TS_COMPAT;
5666     }
5667     + t->flags &= ~_TIF_DEBUG;
5668    
5669     tsk->thread.debugreg0 = 0;
5670     tsk->thread.debugreg1 = 0;
5671     @@ -432,7 +438,7 @@
5672     asm("mov %%es,%0" : "=m" (p->thread.es));
5673     asm("mov %%ds,%0" : "=m" (p->thread.ds));
5674    
5675     - if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
5676     + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
5677     p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
5678     if (!p->thread.io_bitmap_ptr) {
5679     p->thread.io_bitmap_max = 0;
5680     @@ -440,6 +446,7 @@
5681     }
5682     memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
5683     IO_BITMAP_BYTES);
5684     + set_tsk_thread_flag(p, TIF_IO_BITMAP);
5685     }
5686    
5687     /*
5688     @@ -474,6 +481,30 @@
5689     }
5690    
5691     /*
5692     + * This special macro can be used to load a debugging register
5693     + */
5694     +#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
5695     +
5696     +static inline void __switch_to_xtra(struct task_struct *prev_p,
5697     + struct task_struct *next_p)
5698     +{
5699     + struct thread_struct *prev, *next;
5700     +
5701     + prev = &prev_p->thread,
5702     + next = &next_p->thread;
5703     +
5704     + if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
5705     + loaddebug(next, 0);
5706     + loaddebug(next, 1);
5707     + loaddebug(next, 2);
5708     + loaddebug(next, 3);
5709     + /* no 4 and 5 */
5710     + loaddebug(next, 6);
5711     + loaddebug(next, 7);
5712     + }
5713     +}
5714     +
5715     +/*
5716     * switch_to(x,y) should switch tasks from x to y.
5717     *
5718     * This could still be optimized:
5719     @@ -495,6 +526,10 @@
5720     struct physdev_set_iobitmap iobmp_op;
5721     multicall_entry_t _mcl[8], *mcl = _mcl;
5722    
5723     + /* we're going to use this soon, after a few expensive things */
5724     + if (next_p->fpu_counter>5)
5725     + prefetch(&next->i387.fxsave);
5726     +
5727     /*
5728     * This is basically '__unlazy_fpu', except that we queue a
5729     * multicall to indicate FPU task switch, rather than
5730     @@ -507,7 +542,8 @@
5731     mcl->op = __HYPERVISOR_fpu_taskswitch;
5732     mcl->args[0] = 1;
5733     mcl++;
5734     - }
5735     + } else
5736     + prev_p->fpu_counter = 0;
5737    
5738     /*
5739     * Reload esp0, LDT and the page table pointer:
5740     @@ -587,21 +623,29 @@
5741     write_pda(oldrsp, next->userrsp);
5742     write_pda(pcurrent, next_p);
5743     write_pda(kernelstack,
5744     - task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
5745     + (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
5746     +#ifdef CONFIG_CC_STACKPROTECTOR
5747     + write_pda(stack_canary, next_p->stack_canary);
5748     +
5749     + /*
5750     + * Build time only check to make sure the stack_canary is at
5751     + * offset 40 in the pda; this is a gcc ABI requirement
5752     + */
5753     + BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
5754     +#endif
5755    
5756     /*
5757     * Now maybe reload the debug registers
5758     */
5759     - if (unlikely(next->debugreg7)) {
5760     - set_debugreg(next->debugreg0, 0);
5761     - set_debugreg(next->debugreg1, 1);
5762     - set_debugreg(next->debugreg2, 2);
5763     - set_debugreg(next->debugreg3, 3);
5764     - /* no 4 and 5 */
5765     - set_debugreg(next->debugreg6, 6);
5766     - set_debugreg(next->debugreg7, 7);
5767     - }
5768     + if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
5769     + __switch_to_xtra(prev_p, next_p);
5770    
5771     + /* If the task has used fpu the last 5 timeslices, just do a full
5772     + * restore of the math state immediately to avoid the trap; the
5773     + * chances of needing FPU soon are obviously high now
5774     + */
5775     + if (next_p->fpu_counter>5)
5776     + math_state_restore();
5777     return prev_p;
5778     }
5779    
5780     @@ -821,7 +865,7 @@
5781    
5782     unsigned long arch_align_stack(unsigned long sp)
5783     {
5784     - if (randomize_va_space)
5785     + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
5786     sp -= get_random_int() % 8192;
5787     return sp & ~0xf;
5788     }
5789 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/setup_32-xen.c linux-2.6.25-xen/arch/x86/kernel/setup_32-xen.c
5790     --- linux-2.6.25/arch/x86/kernel/setup_32-xen.c 2008-05-23 20:51:11.000000000 +0200
5791     +++ linux-2.6.25-xen/arch/x86/kernel/setup_32-xen.c 2008-05-23 20:39:03.000000000 +0200
5792 niro 609 @@ -56,6 +56,7 @@
5793     #include <asm/apic.h>
5794     #include <asm/e820.h>
5795     #include <asm/mpspec.h>
5796     +#include <asm/mmzone.h>
5797     #include <asm/setup.h>
5798     #include <asm/arch_hooks.h>
5799     #include <asm/sections.h>
5800     @@ -105,18 +106,6 @@
5801    
5802     unsigned long mmu_cr4_features;
5803    
5804     -#ifdef CONFIG_ACPI
5805     - int acpi_disabled = 0;
5806     -#else
5807     - int acpi_disabled = 1;
5808     -#endif
5809     -EXPORT_SYMBOL(acpi_disabled);
5810     -
5811     -#ifdef CONFIG_ACPI
5812     -int __initdata acpi_force = 0;
5813     -extern acpi_interrupt_flags acpi_sci_flags;
5814     -#endif
5815     -
5816     /* for MCA, but anyone else can use it if they want */
5817     unsigned int machine_id;
5818     #ifdef CONFIG_MCA
5819     @@ -170,7 +159,6 @@
5820     #endif
5821    
5822     extern void early_cpu_init(void);
5823     -extern void generic_apic_probe(char *);
5824     extern int root_mountflags;
5825    
5826     unsigned long saved_videomode;
5827     @@ -243,9 +231,6 @@
5828     .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
5829     } };
5830    
5831     -#define ADAPTER_ROM_RESOURCES \
5832     - (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
5833     -
5834     static struct resource video_rom_resource = {
5835     .name = "Video ROM",
5836     .start = 0xc0000,
5837     @@ -307,9 +292,6 @@
5838     .flags = IORESOURCE_BUSY | IORESOURCE_IO
5839     } };
5840    
5841     -#define STANDARD_IO_RESOURCES \
5842     - (sizeof standard_io_resources / sizeof standard_io_resources[0])
5843     -
5844     #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
5845    
5846     static int __init romchecksum(unsigned char *rom, unsigned long length)
5847     @@ -372,7 +354,7 @@
5848     }
5849    
5850     /* check for adapter roms on 2k boundaries */
5851     - for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
5852     + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
5853     rom = isa_bus_to_virt(start);
5854     if (!romsignature(rom))
5855     continue;
5856     @@ -764,246 +746,152 @@
5857     }
5858     #endif
5859    
5860     -static void __init parse_cmdline_early (char ** cmdline_p)
5861     +static int __initdata user_defined_memmap = 0;
5862     +
5863     +/*
5864     + * "mem=nopentium" disables the 4MB page tables.
5865     + * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
5866     + * to <mem>, overriding the bios size.
5867     + * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
5868     + * <start> to <start>+<mem>, overriding the bios size.
5869     + *
5870     + * HPA tells me bootloaders need to parse mem=, so no new
5871     + * option should be mem= [also see Documentation/i386/boot.txt]
5872     + */
5873     +static int __init parse_mem(char *arg)
5874     {
5875     - char c = ' ', *to = command_line, *from = saved_command_line;
5876     - int len = 0, max_cmdline;
5877     - int userdef = 0;
5878     -
5879     - if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
5880     - max_cmdline = COMMAND_LINE_SIZE;
5881     - memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
5882     - /* Save unparsed command line copy for /proc/cmdline */
5883     - saved_command_line[max_cmdline-1] = '\0';
5884     -
5885     - for (;;) {
5886     - if (c != ' ')
5887     - goto next_char;
5888     - /*
5889     - * "mem=nopentium" disables the 4MB page tables.
5890     - * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
5891     - * to <mem>, overriding the bios size.
5892     - * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
5893     - * <start> to <start>+<mem>, overriding the bios size.
5894     - *
5895     - * HPA tells me bootloaders need to parse mem=, so no new
5896     - * option should be mem= [also see Documentation/i386/boot.txt]
5897     - */
5898     - if (!memcmp(from, "mem=", 4)) {
5899     - if (to != command_line)
5900     - to--;
5901     - if (!memcmp(from+4, "nopentium", 9)) {
5902     - from += 9+4;
5903     - clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
5904     - disable_pse = 1;
5905     - } else {
5906     - /* If the user specifies memory size, we
5907     - * limit the BIOS-provided memory map to
5908     - * that size. exactmap can be used to specify
5909     - * the exact map. mem=number can be used to
5910     - * trim the existing memory map.
5911     - */
5912     - unsigned long long mem_size;
5913     -
5914     - mem_size = memparse(from+4, &from);
5915     - limit_regions(mem_size);
5916     - userdef=1;
5917     - }
5918     - }
5919     + if (!arg)
5920     + return -EINVAL;
5921    
5922     - else if (!memcmp(from, "memmap=", 7)) {
5923     - if (to != command_line)
5924     - to--;
5925     - if (!memcmp(from+7, "exactmap", 8)) {
5926     -#ifdef CONFIG_CRASH_DUMP
5927     - /* If we are doing a crash dump, we
5928     - * still need to know the real mem
5929     - * size before original memory map is
5930     - * reset.
5931     - */
5932     - find_max_pfn();
5933     - saved_max_pfn = max_pfn;
5934     -#endif
5935     - from += 8+7;
5936     - e820.nr_map = 0;
5937     - userdef = 1;
5938     - } else {
5939     - /* If the user specifies memory size, we
5940     - * limit the BIOS-provided memory map to
5941     - * that size. exactmap can be used to specify
5942     - * the exact map. mem=number can be used to
5943     - * trim the existing memory map.
5944     - */
5945     - unsigned long long start_at, mem_size;
5946     + if (strcmp(arg, "nopentium") == 0) {
5947     + clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
5948     + disable_pse = 1;
5949     + } else {
5950     + /* If the user specifies memory size, we
5951     + * limit the BIOS-provided memory map to
5952     + * that size. exactmap can be used to specify
5953     + * the exact map. mem=number can be used to
5954     + * trim the existing memory map.
5955     + */
5956     + unsigned long long mem_size;
5957    
5958     - mem_size = memparse(from+7, &from);
5959     - if (*from == '@') {
5960     - start_at = memparse(from+1, &from);
5961     - add_memory_region(start_at, mem_size, E820_RAM);
5962     - } else if (*from == '#') {
5963     - start_at = memparse(from+1, &from);
5964     - add_memory_region(start_at, mem_size, E820_ACPI);
5965     - } else if (*from == '$') {
5966     - start_at = memparse(from+1, &from);
5967     - add_memory_region(start_at, mem_size, E820_RESERVED);
5968     - } else {
5969     - limit_regions(mem_size);
5970     - userdef=1;
5971     - }
5972     - }
5973     - }
5974     -
5975     - else if (!memcmp(from, "noexec=", 7))
5976     - noexec_setup(from + 7);
5977     + mem_size = memparse(arg, &arg);
5978     + limit_regions(mem_size);
5979     + user_defined_memmap = 1;
5980     + }
5981     + return 0;
5982     +}
5983     +early_param("mem", parse_mem);
5984    
5985     +static int __init parse_memmap(char *arg)
5986     +{
5987     + if (!arg)
5988     + return -EINVAL;
5989    
5990     -#ifdef CONFIG_X86_MPPARSE
5991     - /*
5992     - * If the BIOS enumerates physical processors before logical,
5993     - * maxcpus=N at enumeration-time can be used to disable HT.
5994     + if (strcmp(arg, "exactmap") == 0) {
5995     +#ifdef CONFIG_CRASH_DUMP
5996     + /* If we are doing a crash dump, we
5997     + * still need to know the real mem
5998     + * size before original memory map is
5999     + * reset.
6000     */
6001     - else if (!memcmp(from, "maxcpus=", 8)) {
6002     - extern unsigned int maxcpus;
6003     -
6004     - maxcpus = simple_strtoul(from + 8, NULL, 0);
6005     - }
6006     + find_max_pfn();
6007     + saved_max_pfn = max_pfn;
6008     #endif
6009     + e820.nr_map = 0;
6010     + user_defined_memmap = 1;
6011     + } else {
6012     + /* If the user specifies memory size, we
6013     + * limit the BIOS-provided memory map to
6014     + * that size. exactmap can be used to specify
6015     + * the exact map. mem=number can be used to
6016     + * trim the existing memory map.
6017     + */
6018     + unsigned long long start_at, mem_size;
6019    
6020     -#ifdef CONFIG_ACPI
6021     - /* "acpi=off" disables both ACPI table parsing and interpreter */
6022     - else if (!memcmp(from, "acpi=off", 8)) {
6023     - disable_acpi();
6024     - }
6025     -
6026     - /* acpi=force to over-ride black-list */
6027     - else if (!memcmp(from, "acpi=force", 10)) {
6028     - acpi_force = 1;
6029     - acpi_ht = 1;
6030     - acpi_disabled = 0;
6031     - }
6032     -
6033     - /* acpi=strict disables out-of-spec workarounds */
6034     - else if (!memcmp(from, "acpi=strict", 11)) {
6035     - acpi_strict = 1;
6036     - }
6037     -
6038     - /* Limit ACPI just to boot-time to enable HT */
6039     - else if (!memcmp(from, "acpi=ht", 7)) {
6040     - if (!acpi_force)
6041     - disable_acpi();
6042     - acpi_ht = 1;
6043     - }
6044     -
6045     - /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
6046     - else if (!memcmp(from, "pci=noacpi", 10)) {
6047     - acpi_disable_pci();
6048     - }
6049     - /* "acpi=noirq" disables ACPI interrupt routing */
6050     - else if (!memcmp(from, "acpi=noirq", 10)) {
6051     - acpi_noirq_set();
6052     + mem_size = memparse(arg, &arg);
6053     + if (*arg == '@') {
6054     + start_at = memparse(arg+1, &arg);
6055     + add_memory_region(start_at, mem_size, E820_RAM);
6056     + } else if (*arg == '#') {
6057     + start_at = memparse(arg+1, &arg);
6058     + add_memory_region(start_at, mem_size, E820_ACPI);
6059     + } else if (*arg == '$') {
6060     + start_at = memparse(arg+1, &arg);
6061     + add_memory_region(start_at, mem_size, E820_RESERVED);
6062     + } else {
6063     + limit_regions(mem_size);
6064     + user_defined_memmap = 1;
6065     }
6066     + }
6067     + return 0;
6068     +}
6069     +early_param("memmap", parse_memmap);
6070    
6071     - else if (!memcmp(from, "acpi_sci=edge", 13))
6072     - acpi_sci_flags.trigger = 1;
6073 niro 611 -
6074     - else if (!memcmp(from, "acpi_sci=level", 14))
6075     - acpi_sci_flags.trigger = 3;
6076 niro 609 +#ifdef CONFIG_PROC_VMCORE
6077     +/* elfcorehdr= specifies the location of elf core header
6078     + * stored by the crashed kernel.
6079     + */
6080     +static int __init parse_elfcorehdr(char *arg)
6081     +{
6082     + if (!arg)
6083     + return -EINVAL;
6084    
6085 niro 611 - else if (!memcmp(from, "acpi_sci=high", 13))
6086     - acpi_sci_flags.polarity = 1;
6087 niro 609 + elfcorehdr_addr = memparse(arg, &arg);
6088     + return 0;
6089     +}
6090     +early_param("elfcorehdr", parse_elfcorehdr);
6091     +#endif /* CONFIG_PROC_VMCORE */
6092    
6093 niro 611 - else if (!memcmp(from, "acpi_sci=low", 12))
6094     - acpi_sci_flags.polarity = 3;
6095 niro 609 +/*
6096     + * highmem=size forces highmem to be exactly 'size' bytes.
6097     + * This works even on boxes that have no highmem otherwise.
6098     + * This also works to reduce highmem size on bigger boxes.
6099     + */
6100     +static int __init parse_highmem(char *arg)
6101     +{
6102     + if (!arg)
6103     + return -EINVAL;
6104    
6105 niro 611 -#ifdef CONFIG_X86_IO_APIC
6106     - else if (!memcmp(from, "acpi_skip_timer_override", 24))
6107     - acpi_skip_timer_override = 1;
6108 niro 609 + highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
6109     + return 0;
6110     +}
6111     +early_param("highmem", parse_highmem);
6112    
6113 niro 611 - if (!memcmp(from, "disable_timer_pin_1", 19))
6114     - disable_timer_pin_1 = 1;
6115     - if (!memcmp(from, "enable_timer_pin_1", 18))
6116     - disable_timer_pin_1 = -1;
6117     -
6118     - /* disable IO-APIC */
6119     - else if (!memcmp(from, "noapic", 6))
6120     - disable_ioapic_setup();
6121     -#endif /* CONFIG_X86_IO_APIC */
6122     -#endif /* CONFIG_ACPI */
6123 niro 609 +/*
6124     + * vmalloc=size forces the vmalloc area to be exactly 'size'
6125     + * bytes. This can be used to increase (or decrease) the
6126     + * vmalloc area - the default is 128m.
6127     + */
6128     +static int __init parse_vmalloc(char *arg)
6129     +{
6130     + if (!arg)
6131     + return -EINVAL;
6132    
6133     -#ifdef CONFIG_X86_LOCAL_APIC
6134     - /* enable local APIC */
6135     - else if (!memcmp(from, "lapic", 5))
6136     - lapic_enable();
6137     -
6138     - /* disable local APIC */
6139     - else if (!memcmp(from, "nolapic", 6))
6140     - lapic_disable();
6141     -#endif /* CONFIG_X86_LOCAL_APIC */
6142     + __VMALLOC_RESERVE = memparse(arg, &arg);
6143     + return 0;
6144     +}
6145     +early_param("vmalloc", parse_vmalloc);
6146    
6147     -#ifdef CONFIG_KEXEC
6148     - /* crashkernel=size@addr specifies the location to reserve for
6149     - * a crash kernel. By reserving this memory we guarantee
6150     - * that linux never set's it up as a DMA target.
6151     - * Useful for holding code to do something appropriate
6152     - * after a kernel panic.
6153     - */
6154     - else if (!memcmp(from, "crashkernel=", 12)) {
6155     #ifndef CONFIG_XEN
6156     - unsigned long size, base;
6157     - size = memparse(from+12, &from);
6158     - if (*from == '@') {
6159     - base = memparse(from+1, &from);
6160     - /* FIXME: Do I want a sanity check
6161     - * to validate the memory range?
6162     - */
6163     - crashk_res.start = base;
6164     - crashk_res.end = base + size - 1;
6165     - }
6166     -#else
6167     - printk("Ignoring crashkernel command line, "
6168     - "parameter will be supplied by xen\n");
6169     -#endif
6170     - }
6171     -#endif
6172     -#ifdef CONFIG_PROC_VMCORE
6173     - /* elfcorehdr= specifies the location of elf core header
6174     - * stored by the crashed kernel.
6175     - */
6176     - else if (!memcmp(from, "elfcorehdr=", 11))
6177     - elfcorehdr_addr = memparse(from+11, &from);
6178     -#endif
6179     +/*
6180     + * reservetop=size reserves a hole at the top of the kernel address space which
6181     + * a hypervisor can load into later. Needed for dynamically loaded hypervisors,
6182     + * so relocating the fixmap can be done before paging initialization.
6183     + */
6184     +static int __init parse_reservetop(char *arg)
6185     +{
6186     + unsigned long address;
6187    
6188     - /*
6189     - * highmem=size forces highmem to be exactly 'size' bytes.
6190     - * This works even on boxes that have no highmem otherwise.
6191     - * This also works to reduce highmem size on bigger boxes.
6192     - */
6193     - else if (!memcmp(from, "highmem=", 8))
6194     - highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
6195     -
6196     - /*
6197     - * vmalloc=size forces the vmalloc area to be exactly 'size'
6198     - * bytes. This can be used to increase (or decrease) the
6199     - * vmalloc area - the default is 128m.
6200     - */
6201     - else if (!memcmp(from, "vmalloc=", 8))
6202     - __VMALLOC_RESERVE = memparse(from+8, &from);
6203     + if (!arg)
6204     + return -EINVAL;
6205    
6206     - next_char:
6207     - c = *(from++);
6208     - if (!c)
6209     - break;
6210     - if (COMMAND_LINE_SIZE <= ++len)
6211     - break;
6212     - *(to++) = c;
6213     - }
6214     - *to = '\0';
6215     - *cmdline_p = command_line;
6216     - if (userdef) {
6217     - printk(KERN_INFO "user-defined physical RAM map:\n");
6218     - print_memory_map("user");
6219     - }
6220     + address = memparse(arg, &arg);
6221     + reserve_top_address(address);
6222     + return 0;
6223     }
6224     +early_param("reservetop", parse_reservetop);
6225     +#endif
6226    
6227     /*
6228     * Callback for efi_memory_walk.
6229     @@ -1024,7 +912,7 @@
6230     static int __init
6231     efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
6232     {
6233     - memory_present(0, start, end);
6234     + memory_present(0, PFN_UP(start), PFN_DOWN(end));
6235     return 0;
6236     }
6237    
6238     @@ -1291,6 +1179,14 @@
6239     }
6240     printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
6241     pages_to_mb(highend_pfn - highstart_pfn));
6242     + num_physpages = highend_pfn;
6243     + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
6244     +#else
6245     + num_physpages = max_low_pfn;
6246     + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
6247     +#endif
6248     +#ifdef CONFIG_FLATMEM
6249     + max_mapnr = num_physpages;
6250     #endif
6251     printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
6252     pages_to_mb(max_low_pfn));
6253     @@ -1302,22 +1198,19 @@
6254    
6255     void __init zone_sizes_init(void)
6256     {
6257     - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
6258     - unsigned int max_dma, low;
6259     -
6260     - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
6261     - low = max_low_pfn;
6262     -
6263     - if (low < max_dma)
6264     - zones_size[ZONE_DMA] = low;
6265     - else {
6266     - zones_size[ZONE_DMA] = max_dma;
6267     - zones_size[ZONE_NORMAL] = low - max_dma;
6268     + unsigned long max_zone_pfns[MAX_NR_ZONES];
6269     + memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
6270     + max_zone_pfns[ZONE_DMA] =
6271     + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
6272     + max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
6273     #ifdef CONFIG_HIGHMEM
6274     - zones_size[ZONE_HIGHMEM] = highend_pfn - low;
6275     + max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
6276     + add_active_range(0, 0, highend_pfn);
6277     +#else
6278     + add_active_range(0, 0, max_low_pfn);
6279     #endif
6280     - }
6281     - free_area_init(zones_size);
6282     +
6283     + free_area_init_nodes(max_zone_pfns);
6284     }
6285     #else
6286     extern unsigned long __init setup_memory(void);
6287     @@ -1374,6 +1267,7 @@
6288     */
6289     acpi_reserve_bootmem();
6290     #endif
6291     + numa_kva_reserve();
6292     #endif /* !CONFIG_XEN */
6293    
6294     #ifdef CONFIG_BLK_DEV_INITRD
6295     @@ -1559,7 +1453,7 @@
6296     request_resource(&iomem_resource, &video_ram_resource);
6297    
6298     /* request I/O space for devices used on all i[345]86 PCs */
6299     - for (i = 0; i < STANDARD_IO_RESOURCES; i++)
6300     + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
6301     request_resource(&ioport_resource, &standard_io_resources[i]);
6302     return 0;
6303     }
6304     @@ -1700,17 +1594,19 @@
6305     data_resource.start = virt_to_phys(_etext);
6306     data_resource.end = virt_to_phys(_edata)-1;
6307    
6308     - parse_cmdline_early(cmdline_p);
6309     + if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
6310     + i = COMMAND_LINE_SIZE;
6311     + memcpy(saved_command_line, xen_start_info->cmd_line, i);
6312     + saved_command_line[i - 1] = '\0';
6313     + parse_early_param();
6314    
6315     -#ifdef CONFIG_EARLY_PRINTK
6316     - {
6317     - char *s = strstr(*cmdline_p, "earlyprintk=");
6318     - if (s) {
6319     - setup_early_printk(strchr(s, '=') + 1);
6320     - printk("early console enabled\n");
6321     - }
6322     + if (user_defined_memmap) {
6323     + printk(KERN_INFO "user-defined physical RAM map:\n");
6324     + print_memory_map("user");
6325     }
6326     -#endif
6327     +
6328     + strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
6329     + *cmdline_p = command_line;
6330    
6331     max_low_pfn = setup_memory();
6332    
6333     @@ -1817,7 +1713,7 @@
6334     dmi_scan_machine();
6335    
6336     #ifdef CONFIG_X86_GENERICARCH
6337     - generic_apic_probe(*cmdline_p);
6338     + generic_apic_probe();
6339     #endif
6340     if (efi_enabled)
6341     efi_map_memmap();
6342     @@ -1838,9 +1734,11 @@
6343     acpi_boot_table_init();
6344     #endif
6345    
6346     +#ifdef CONFIG_PCI
6347     #ifdef CONFIG_X86_IO_APIC
6348     check_acpi_pci(); /* Checks more than just ACPI actually */
6349     #endif
6350     +#endif
6351    
6352     #ifdef CONFIG_ACPI
6353     acpi_boot_init();
6354 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/setup64-xen.c linux-2.6.25-xen/arch/x86/kernel/setup64-xen.c
6355     --- linux-2.6.25/arch/x86/kernel/setup64-xen.c 2008-05-23 20:51:11.000000000 +0200
6356     +++ linux-2.6.25-xen/arch/x86/kernel/setup64-xen.c 2008-05-23 20:39:03.000000000 +0200
6357     @@ -31,7 +31,7 @@
6358     #include <asm/hypervisor.h>
6359     #endif
6360    
6361     -char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
6362     +char x86_boot_params[BOOT_PARAM_SIZE] __initdata;
6363    
6364     cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
6365    
6366     @@ -55,8 +55,10 @@
6367     on Enable(default)
6368     off Disable
6369     */
6370     -int __init nonx_setup(char *str)
6371     +static int __init nonx_setup(char *str)
6372     {
6373     + if (!str)
6374     + return -EINVAL;
6375     if (!strncmp(str, "on", 2)) {
6376     __supported_pte_mask |= _PAGE_NX;
6377     do_not_nx = 0;
6378     @@ -64,9 +66,9 @@
6379     do_not_nx = 1;
6380     __supported_pte_mask &= ~_PAGE_NX;
6381     }
6382     - return 1;
6383     + return 0;
6384     }
6385     -__setup("noexec=", nonx_setup); /* parsed early actually */
6386     +early_param("noexec", nonx_setup);
6387    
6388     int force_personality32 = 0;
6389    
6390     @@ -102,12 +104,9 @@
6391     #endif
6392    
6393     /* Copy section for each CPU (we discard the original) */
6394     - size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
6395     -#ifdef CONFIG_MODULES
6396     - if (size < PERCPU_ENOUGH_ROOM)
6397     - size = PERCPU_ENOUGH_ROOM;
6398     -#endif
6399     + size = PERCPU_ENOUGH_ROOM;
6400    
6401     + printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
6402     for_each_cpu_mask (i, cpu_possible_map) {
6403     char *ptr;
6404    
6405     @@ -169,7 +168,10 @@
6406     /* Setup up data that may be needed in __get_free_pages early */
6407     asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
6408     #ifndef CONFIG_XEN
6409     + /* Memory clobbers used to order PDA accessed */
6410     + mb();
6411     wrmsrl(MSR_GS_BASE, pda);
6412     + mb();
6413     #else
6414     if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
6415     (unsigned long)pda))
6416     @@ -302,28 +304,17 @@
6417     * set up and load the per-CPU TSS
6418     */
6419     for (v = 0; v < N_EXCEPTION_STACKS; v++) {
6420     + static const unsigned int order[N_EXCEPTION_STACKS] = {
6421     + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
6422     + [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
6423     + };
6424     if (cpu) {
6425     - static const unsigned int order[N_EXCEPTION_STACKS] = {
6426     - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
6427     - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
6428     - };
6429     -
6430     estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
6431     if (!estacks)
6432     panic("Cannot allocate exception stack %ld %d\n",
6433     v, cpu);
6434     }
6435     - switch (v + 1) {
6436     -#if DEBUG_STKSZ > EXCEPTION_STKSZ
6437     - case DEBUG_STACK:
6438     - cpu_pda(cpu)->debugstack = (unsigned long)estacks;
6439     - estacks += DEBUG_STKSZ;
6440     - break;
6441     -#endif
6442     - default:
6443     - estacks += EXCEPTION_STKSZ;
6444     - break;
6445     - }
6446     + estacks += PAGE_SIZE << order[v];
6447     orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks;
6448     }
6449    
6450     diff -Naur linux-2.6.25/arch/x86/kernel/setup_64-xen.c linux-2.6.25-xen/arch/x86/kernel/setup_64-xen.c
6451     --- linux-2.6.25/arch/x86/kernel/setup_64-xen.c 2008-05-23 20:51:11.000000000 +0200
6452     +++ linux-2.6.25-xen/arch/x86/kernel/setup_64-xen.c 2008-05-23 20:39:03.000000000 +0200
6453 niro 609 @@ -118,16 +118,6 @@
6454    
6455     unsigned long mmu_cr4_features;
6456    
6457     -int acpi_disabled;
6458     -EXPORT_SYMBOL(acpi_disabled);
6459     -#ifdef CONFIG_ACPI
6460     -extern int __initdata acpi_ht;
6461     -extern acpi_interrupt_flags acpi_sci_flags;
6462     -int __initdata acpi_force = 0;
6463     -#endif
6464     -
6465     -int acpi_numa __initdata;
6466     -
6467     /* Boot loader ID as an integer, for the benefit of proc_dointvec */
6468     int bootloader_type;
6469    
6470     @@ -151,10 +141,6 @@
6471    
6472     struct edid_info edid_info;
6473     EXPORT_SYMBOL_GPL(edid_info);
6474     -struct e820map e820;
6475     -#ifdef CONFIG_XEN
6476     -struct e820map machine_e820;
6477     -#endif
6478    
6479     extern int root_mountflags;
6480    
6481     @@ -181,9 +167,6 @@
6482     .flags = IORESOURCE_BUSY | IORESOURCE_IO }
6483     };
6484    
6485     -#define STANDARD_IO_RESOURCES \
6486     - (sizeof standard_io_resources / sizeof standard_io_resources[0])
6487     -
6488     #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
6489    
6490     struct resource data_resource = {
6491     @@ -230,9 +213,6 @@
6492     .flags = IORESOURCE_ROM }
6493     };
6494    
6495     -#define ADAPTER_ROM_RESOURCES \
6496     - (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
6497     -
6498     static struct resource video_rom_resource = {
6499     .name = "Video ROM",
6500     .start = 0xc0000,
6501     @@ -309,7 +289,8 @@
6502     }
6503    
6504     /* check for adapter roms on 2k boundaries */
6505     - for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
6506     + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper;
6507     + start += 2048) {
6508     rom = isa_bus_to_virt(start);
6509     if (!romsignature(rom))
6510     continue;
6511     @@ -329,187 +310,22 @@
6512     }
6513     }
6514    
6515     -/* Check for full argument with no trailing characters */
6516     -static int fullarg(char *p, char *arg)
6517     +#ifdef CONFIG_PROC_VMCORE
6518     +/* elfcorehdr= specifies the location of elf core header
6519     + * stored by the crashed kernel. This option will be passed
6520     + * by kexec loader to the capture kernel.
6521     + */
6522     +static int __init setup_elfcorehdr(char *arg)
6523     {
6524     - int l = strlen(arg);
6525     - return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l]));
6526     + char *end;
6527     + if (!arg)
6528     + return -EINVAL;
6529     + elfcorehdr_addr = memparse(arg, &end);
6530     + return end > arg ? 0 : -EINVAL;
6531     }
6532     -
6533     -static __init void parse_cmdline_early (char ** cmdline_p)
6534     -{
6535     - char c = ' ', *to = command_line, *from = COMMAND_LINE;
6536     - int len = 0;
6537     - int userdef = 0;
6538     -
6539     - for (;;) {
6540     - if (c != ' ')
6541     - goto next_char;
6542     -
6543     -#ifdef CONFIG_SMP
6544     - /*
6545     - * If the BIOS enumerates physical processors before logical,
6546     - * maxcpus=N at enumeration-time can be used to disable HT.
6547     - */
6548     - else if (!memcmp(from, "maxcpus=", 8)) {
6549     - extern unsigned int maxcpus;
6550     -
6551     - maxcpus = simple_strtoul(from + 8, NULL, 0);
6552     - }
6553     -#endif
6554     -#ifdef CONFIG_ACPI
6555     - /* "acpi=off" disables both ACPI table parsing and interpreter init */
6556     - if (fullarg(from,"acpi=off"))
6557     - disable_acpi();
6558     -
6559     - if (fullarg(from, "acpi=force")) {
6560     - /* add later when we do DMI horrors: */
6561     - acpi_force = 1;
6562     - acpi_disabled = 0;
6563     - }
6564     -
6565     - /* acpi=ht just means: do ACPI MADT parsing
6566     - at bootup, but don't enable the full ACPI interpreter */
6567     - if (fullarg(from, "acpi=ht")) {
6568     - if (!acpi_force)
6569     - disable_acpi();
6570     - acpi_ht = 1;
6571     - }
6572     - else if (fullarg(from, "pci=noacpi"))
6573     - acpi_disable_pci();
6574     - else if (fullarg(from, "acpi=noirq"))
6575     - acpi_noirq_set();
6576     -
6577     - else if (fullarg(from, "acpi_sci=edge"))
6578     - acpi_sci_flags.trigger = 1;
6579     - else if (fullarg(from, "acpi_sci=level"))
6580     - acpi_sci_flags.trigger = 3;
6581     - else if (fullarg(from, "acpi_sci=high"))
6582     - acpi_sci_flags.polarity = 1;
6583     - else if (fullarg(from, "acpi_sci=low"))
6584     - acpi_sci_flags.polarity = 3;
6585     -
6586     - /* acpi=strict disables out-of-spec workarounds */
6587     - else if (fullarg(from, "acpi=strict")) {
6588     - acpi_strict = 1;
6589     - }
6590     -#ifdef CONFIG_X86_IO_APIC
6591     - else if (fullarg(from, "acpi_skip_timer_override"))
6592     - acpi_skip_timer_override = 1;
6593     -#endif
6594     -#endif
6595     -
6596     -#ifndef CONFIG_XEN
6597     - if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) {
6598     - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
6599     - disable_apic = 1;
6600     - }
6601     -
6602     - if (fullarg(from, "noapic"))
6603     - skip_ioapic_setup = 1;
6604     -
6605     - if (fullarg(from,"apic")) {
6606     - skip_ioapic_setup = 0;
6607     - ioapic_force = 1;
6608     - }
6609     -#endif
6610     -
6611     - if (!memcmp(from, "mem=", 4))
6612     - parse_memopt(from+4, &from);
6613     -
6614     - if (!memcmp(from, "memmap=", 7)) {
6615     - /* exactmap option is for used defined memory */
6616     - if (!memcmp(from+7, "exactmap", 8)) {
6617     -#ifdef CONFIG_CRASH_DUMP
6618     - /* If we are doing a crash dump, we
6619     - * still need to know the real mem
6620     - * size before original memory map is
6621     - * reset.
6622     - */
6623     - saved_max_pfn = e820_end_of_ram();
6624     -#endif
6625     - from += 8+7;
6626     - end_pfn_map = 0;
6627     - e820.nr_map = 0;
6628     - userdef = 1;
6629     - }
6630     - else {
6631     - parse_memmapopt(from+7, &from);
6632     - userdef = 1;
6633     - }
6634     - }
6635     -
6636     -#ifdef CONFIG_NUMA
6637     - if (!memcmp(from, "numa=", 5))
6638     - numa_setup(from+5);
6639     +early_param("elfcorehdr", setup_elfcorehdr);
6640     #endif
6641    
6642     - if (!memcmp(from,"iommu=",6)) {
6643     - iommu_setup(from+6);
6644     - }
6645     -
6646     - if (fullarg(from,"oops=panic"))
6647     - panic_on_oops = 1;
6648     -
6649     - if (!memcmp(from, "noexec=", 7))
6650     - nonx_setup(from + 7);
6651     -
6652     -#ifdef CONFIG_KEXEC
6653     - /* crashkernel=size@addr specifies the location to reserve for
6654     - * a crash kernel. By reserving this memory we guarantee
6655     - * that linux never set's it up as a DMA target.
6656     - * Useful for holding code to do something appropriate
6657     - * after a kernel panic.
6658     - */
6659     - else if (!memcmp(from, "crashkernel=", 12)) {
6660     -#ifndef CONFIG_XEN
6661     - unsigned long size, base;
6662     - size = memparse(from+12, &from);
6663     - if (*from == '@') {
6664     - base = memparse(from+1, &from);
6665     - /* FIXME: Do I want a sanity check
6666     - * to validate the memory range?
6667     - */
6668     - crashk_res.start = base;
6669     - crashk_res.end = base + size - 1;
6670     - }
6671     -#else
6672     - printk("Ignoring crashkernel command line, "
6673     - "parameter will be supplied by xen\n");
6674     -#endif
6675     - }
6676     -#endif
6677     -
6678     -#ifdef CONFIG_PROC_VMCORE
6679     - /* elfcorehdr= specifies the location of elf core header
6680     - * stored by the crashed kernel. This option will be passed
6681     - * by kexec loader to the capture kernel.
6682     - */
6683     - else if(!memcmp(from, "elfcorehdr=", 11))
6684     - elfcorehdr_addr = memparse(from+11, &from);
6685     -#endif
6686     -
6687     -#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_XEN)
6688     - else if (!memcmp(from, "additional_cpus=", 16))
6689     - setup_additional_cpus(from+16);
6690     -#endif
6691     -
6692     - next_char:
6693     - c = *(from++);
6694     - if (!c)
6695     - break;
6696     - if (COMMAND_LINE_SIZE <= ++len)
6697     - break;
6698     - *(to++) = c;
6699     - }
6700     - if (userdef) {
6701     - printk(KERN_INFO "user-defined physical RAM map:\n");
6702     - e820_print_map("user");
6703     - }
6704     - *to = '\0';
6705     - *cmdline_p = command_line;
6706     -}
6707     -
6708     #ifndef CONFIG_NUMA
6709     static void __init
6710     contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
6711     @@ -521,10 +337,11 @@
6712     if (bootmap == -1L)
6713     panic("Cannot find bootmem map of size %ld\n",bootmap_size);
6714     bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
6715     + e820_register_active_regions(0, start_pfn, end_pfn);
6716     #ifdef CONFIG_XEN
6717     - e820_bootmem_free(NODE_DATA(0), 0, xen_start_info->nr_pages<<PAGE_SHIFT);
6718     + free_bootmem_with_active_regions(0, xen_start_info->nr_pages);
6719     #else
6720     - e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
6721     + free_bootmem_with_active_regions(0, end_pfn);
6722     #endif
6723     reserve_bootmem(bootmap, bootmap_size);
6724     }
6725     @@ -587,6 +404,10 @@
6726     void __init setup_arch(char **cmdline_p)
6727     {
6728     #ifdef CONFIG_XEN
6729     + extern struct e820map machine_e820;
6730     +
6731     + printk(KERN_INFO "Command line: %s\n", saved_command_line);
6732     +
6733     /* Register a call for panic conditions. */
6734     atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
6735    
6736     @@ -612,6 +433,8 @@
6737    
6738     ARCH_SETUP
6739     #else
6740     + printk(KERN_INFO "Command line: %s\n", saved_command_line);
6741     +
6742     ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
6743     screen_info = SCREEN_INFO;
6744     edid_info = EDID_INFO;
6745     @@ -639,16 +462,22 @@
6746     data_resource.start = virt_to_phys(&_etext);
6747     data_resource.end = virt_to_phys(&_edata)-1;
6748    
6749     - parse_cmdline_early(cmdline_p);
6750     -
6751     early_identify_cpu(&boot_cpu_data);
6752    
6753     + strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
6754     + *cmdline_p = command_line;
6755     +
6756     + parse_early_param();
6757     +
6758     + finish_e820_parsing();
6759     +
6760     + e820_register_active_regions(0, 0, -1UL);
6761     /*
6762     * partially used pages are not usable - thus
6763     * we are rounding upwards:
6764     */
6765     end_pfn = e820_end_of_ram();
6766     - num_physpages = end_pfn; /* for pfn_valid */
6767     + num_physpages = end_pfn;
6768    
6769     check_efer();
6770    
6771     @@ -659,6 +488,14 @@
6772     if (is_initial_xendomain())
6773     dmi_scan_machine();
6774    
6775     + /* How many end-of-memory variables you have, grandma! */
6776     + max_low_pfn = end_pfn;
6777     + max_pfn = end_pfn;
6778     + high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
6779     +
6780     + /* Remove active ranges so rediscovery with NUMA-awareness happens */
6781     + remove_all_active_ranges();
6782     +
6783     #ifdef CONFIG_ACPI_NUMA
6784     /*
6785     * Parse SRAT to discover nodes.
6786     @@ -848,16 +685,16 @@
6787     BUG();
6788     }
6789    
6790     +#ifdef CONFIG_ACPI
6791     if (!is_initial_xendomain()) {
6792     acpi_disabled = 1;
6793     -#ifdef CONFIG_ACPI
6794     acpi_ht = 0;
6795     -#endif
6796     }
6797     #endif
6798     +#endif
6799    
6800     -#ifndef CONFIG_XEN
6801     - check_ioapic();
6802     +#if defined(CONFIG_PCI) && !defined(CONFIG_XEN)
6803     + early_quirks();
6804     #endif
6805    
6806     zap_low_mappings(0);
6807     @@ -917,6 +754,7 @@
6808     }
6809     #else
6810     e820_reserve_resources(e820.map, e820.nr_map);
6811     + e820_mark_nosave_regions();
6812     #endif
6813    
6814     request_resource(&iomem_resource, &video_ram_resource);
6815     @@ -924,7 +762,7 @@
6816     {
6817     unsigned i;
6818     /* request I/O space for devices used on all i[345]86 PCs */
6819     - for (i = 0; i < STANDARD_IO_RESOURCES; i++)
6820     + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
6821     request_resource(&ioport_resource, &standard_io_resources[i]);
6822     }
6823    
6824     @@ -1108,7 +946,7 @@
6825     #endif
6826     }
6827    
6828     -static void __init init_amd(struct cpuinfo_x86 *c)
6829     +static void __cpuinit init_amd(struct cpuinfo_x86 *c)
6830     {
6831     unsigned level;
6832    
6833     @@ -1164,6 +1002,12 @@
6834    
6835     /* Fix cpuid4 emulation for more */
6836     num_cache_leaves = 3;
6837     +
6838     + /* When there is only one core no need to synchronize RDTSC */
6839     + if (num_possible_cpus() == 1)
6840     + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
6841     + else
6842     + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
6843     }
6844    
6845     static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
6846     @@ -1245,8 +1089,7 @@
6847     node = first_node(node_online_map);
6848     numa_set_node(cpu, node);
6849    
6850     - if (acpi_numa > 0)
6851     - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
6852     + printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
6853     #endif
6854     }
6855    
6856     @@ -1280,6 +1123,8 @@
6857     if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
6858     (c->x86 == 0x6 && c->x86_model >= 0x0e))
6859     set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
6860     + if (c->x86 == 6)
6861     + set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
6862     set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
6863     c->x86_max_cores = intel_num_cpu_cores(c);
6864    
6865     @@ -1498,8 +1343,8 @@
6866    
6867     /* Intel-defined (#2) */
6868     "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
6869     - "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
6870     - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
6871     + "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
6872     + NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL,
6873     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
6874    
6875     /* VIA/Cyrix/Centaur-defined */
6876 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/smp_32-xen.c linux-2.6.25-xen/arch/x86/kernel/smp_32-xen.c
6877     --- linux-2.6.25/arch/x86/kernel/smp_32-xen.c 2008-05-23 20:51:11.000000000 +0200
6878     +++ linux-2.6.25-xen/arch/x86/kernel/smp_32-xen.c 2008-05-23 20:39:03.000000000 +0200
6879 niro 609 @@ -279,8 +279,7 @@
6880     * 2) Leave the mm if we are in the lazy tlb mode.
6881     */
6882    
6883     -irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
6884     - struct pt_regs *regs)
6885     +irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id)
6886     {
6887     unsigned long cpu;
6888    
6889     @@ -567,16 +566,14 @@
6890     * all the work is done automatically when
6891     * we return from the interrupt.
6892     */
6893     -irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
6894     - struct pt_regs *regs)
6895     +irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
6896     {
6897    
6898     return IRQ_HANDLED;
6899     }
6900    
6901     #include <linux/kallsyms.h>
6902     -irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
6903     - struct pt_regs *regs)
6904     +irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
6905     {
6906     void (*func) (void *info) = call_data->func;
6907     void *info = call_data->info;
6908     @@ -603,3 +600,69 @@
6909     return IRQ_HANDLED;
6910     }
6911    
6912     +/*
6913     + * this function sends a 'generic call function' IPI to one other CPU
6914     + * in the system.
6915     + *
6916     + * cpu is a standard Linux logical CPU number.
6917     + */
6918     +static void
6919     +__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
6920     + int nonatomic, int wait)
6921     +{
6922     + struct call_data_struct data;
6923     + int cpus = 1;
6924     +
6925     + data.func = func;
6926     + data.info = info;
6927     + atomic_set(&data.started, 0);
6928     + data.wait = wait;
6929     + if (wait)
6930     + atomic_set(&data.finished, 0);
6931     +
6932     + call_data = &data;
6933     + wmb();
6934     + /* Send a message to all other CPUs and wait for them to respond */
6935     + send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
6936     +
6937     + /* Wait for response */
6938     + while (atomic_read(&data.started) != cpus)
6939     + cpu_relax();
6940     +
6941     + if (!wait)
6942     + return;
6943     +
6944     + while (atomic_read(&data.finished) != cpus)
6945     + cpu_relax();
6946     +}
6947     +
6948     +/*
6949     + * smp_call_function_single - Run a function on another CPU
6950     + * @func: The function to run. This must be fast and non-blocking.
6951     + * @info: An arbitrary pointer to pass to the function.
6952     + * @nonatomic: Currently unused.
6953     + * @wait: If true, wait until function has completed on other CPUs.
6954     + *
6955     + * Retrurns 0 on success, else a negative status code.
6956     + *
6957     + * Does not return until the remote CPU is nearly ready to execute <func>
6958     + * or is or has executed.
6959     + */
6960     +
6961     +int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
6962     + int nonatomic, int wait)
6963     +{
6964     + /* prevent preemption and reschedule on another processor */
6965     + int me = get_cpu();
6966     + if (cpu == me) {
6967     + WARN_ON(1);
6968     + put_cpu();
6969     + return -EBUSY;
6970     + }
6971     + spin_lock_bh(&call_lock);
6972     + __smp_call_function_single(cpu, func, info, nonatomic, wait);
6973     + spin_unlock_bh(&call_lock);
6974     + put_cpu();
6975     + return 0;
6976     +}
6977     +EXPORT_SYMBOL(smp_call_function_single);
6978 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/smp_64-xen.c linux-2.6.25-xen/arch/x86/kernel/smp_64-xen.c
6979     --- linux-2.6.25/arch/x86/kernel/smp_64-xen.c 2008-05-23 20:51:11.000000000 +0200
6980     +++ linux-2.6.25-xen/arch/x86/kernel/smp_64-xen.c 2008-05-23 20:39:03.000000000 +0200
6981 niro 609 @@ -381,9 +381,8 @@
6982     /* prevent preemption and reschedule on another processor */
6983     int me = get_cpu();
6984     if (cpu == me) {
6985     - WARN_ON(1);
6986     put_cpu();
6987     - return -EBUSY;
6988     + return 0;
6989     }
6990     spin_lock_bh(&call_lock);
6991     __smp_call_function_single(cpu, func, info, nonatomic, wait);
6992     @@ -501,7 +500,7 @@
6993     #ifndef CONFIG_XEN
6994     asmlinkage void smp_reschedule_interrupt(void)
6995     #else
6996     -asmlinkage irqreturn_t smp_reschedule_interrupt(void)
6997     +asmlinkage irqreturn_t smp_reschedule_interrupt(int irq, void *ctx)
6998     #endif
6999     {
7000     #ifndef CONFIG_XEN
7001     @@ -514,7 +513,7 @@
7002     #ifndef CONFIG_XEN
7003     asmlinkage void smp_call_function_interrupt(void)
7004     #else
7005     -asmlinkage irqreturn_t smp_call_function_interrupt(void)
7006     +asmlinkage irqreturn_t smp_call_function_interrupt(int irq, void *ctx)
7007     #endif
7008     {
7009     void (*func) (void *info) = call_data->func;
7010     @@ -545,31 +544,3 @@
7011     return IRQ_HANDLED;
7012     #endif
7013     }
7014     -
7015     -int safe_smp_processor_id(void)
7016     -{
7017     -#ifdef CONFIG_XEN
7018     - return smp_processor_id();
7019     -#else
7020     - unsigned apicid, i;
7021     -
7022     - if (disable_apic)
7023     - return 0;
7024     -
7025     - apicid = hard_smp_processor_id();
7026     - if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid)
7027     - return apicid;
7028     -
7029     - for (i = 0; i < NR_CPUS; ++i) {
7030     - if (x86_cpu_to_apicid[i] == apicid)
7031     - return i;
7032     - }
7033     -
7034     - /* No entries in x86_cpu_to_apicid? Either no MPS|ACPI,
7035     - * or called too early. Either way, we must be CPU 0. */
7036     - if (x86_cpu_to_apicid[0] == BAD_APICID)
7037     - return 0;
7038     -
7039     - return 0; /* Should not happen */
7040     -#endif
7041     -}
7042 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/time_32-xen.c linux-2.6.25-xen/arch/x86/kernel/time_32-xen.c
7043     --- linux-2.6.25/arch/x86/kernel/time_32-xen.c 2008-05-23 20:51:22.000000000 +0200
7044     +++ linux-2.6.25-xen/arch/x86/kernel/time_32-xen.c 2008-05-23 20:39:03.000000000 +0200
7045 niro 609 @@ -89,7 +89,6 @@
7046     unsigned long vxtime_hz = PIT_TICK_RATE;
7047     struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
7048     volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
7049     -unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
7050     struct timespec __xtime __section_xtime;
7051     struct timezone __sys_tz __section_sys_tz;
7052     #endif
7053     @@ -97,8 +96,6 @@
7054     unsigned int cpu_khz; /* Detected as we calibrate the TSC */
7055     EXPORT_SYMBOL(cpu_khz);
7056    
7057     -extern unsigned long wall_jiffies;
7058     -
7059     DEFINE_SPINLOCK(rtc_lock);
7060     EXPORT_SYMBOL(rtc_lock);
7061    
7062     @@ -265,11 +262,10 @@
7063     time_t wtm_sec, xtime_sec;
7064     u64 tmp, wc_nsec;
7065    
7066     - /* Adjust wall-clock time base based on wall_jiffies ticks. */
7067     + /* Adjust wall-clock time base. */
7068     wc_nsec = processed_system_time;
7069     wc_nsec += sec * (u64)NSEC_PER_SEC;
7070     wc_nsec += nsec;
7071     - wc_nsec -= (jiffies - wall_jiffies) * (u64)NS_PER_TICK;
7072    
7073     /* Split wallclock base into seconds and nanoseconds. */
7074     tmp = wc_nsec;
7075     @@ -387,16 +383,10 @@
7076     shadow = &per_cpu(shadow_time, cpu);
7077    
7078     do {
7079     - unsigned long lost;
7080     -
7081     local_time_version = shadow->version;
7082     seq = read_seqbegin(&xtime_lock);
7083    
7084     usec = get_usec_offset(shadow);
7085     - lost = jiffies - wall_jiffies;
7086     -
7087     - if (unlikely(lost))
7088     - usec += lost * (USEC_PER_SEC / HZ);
7089    
7090     sec = xtime.tv_sec;
7091     usec += (xtime.tv_nsec / NSEC_PER_USEC);
7092     @@ -519,7 +509,7 @@
7093     write_seqlock_irq(&xtime_lock);
7094    
7095     sec = xtime.tv_sec;
7096     - nsec = xtime.tv_nsec + ((jiffies - wall_jiffies) * (u64)NS_PER_TICK);
7097     + nsec = xtime.tv_nsec;
7098     __normalize_time(&sec, &nsec);
7099    
7100     op.cmd = XENPF_settime;
7101     @@ -593,42 +583,49 @@
7102     }
7103     #endif
7104    
7105     -#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
7106     unsigned long profile_pc(struct pt_regs *regs)
7107     {
7108     unsigned long pc = instruction_pointer(regs);
7109    
7110     -#ifdef __x86_64__
7111     - /* Assume the lock function has either no stack frame or only a single word.
7112     - This checks if the address on the stack looks like a kernel text address.
7113     - There is a small window for false hits, but in that case the tick
7114     - is just accounted to the spinlock function.
7115     - Better would be to write these functions in assembler again
7116     - and check exactly. */
7117     +#if defined(CONFIG_SMP) || defined(__x86_64__)
7118     if (!user_mode_vm(regs) && in_lock_functions(pc)) {
7119     - char *v = *(char **)regs->rsp;
7120     - if ((v >= _stext && v <= _etext) ||
7121     - (v >= _sinittext && v <= _einittext) ||
7122     - (v >= (char *)MODULES_VADDR && v <= (char *)MODULES_END))
7123     - return (unsigned long)v;
7124     - return ((unsigned long *)regs->rsp)[1];
7125     +# ifdef CONFIG_FRAME_POINTER
7126     +# ifdef __i386__
7127     + return ((unsigned long *)regs->ebp)[1];
7128     +# else
7129     + return ((unsigned long *)regs->rbp)[1];
7130     +# endif
7131     +# else
7132     +# ifdef __i386__
7133     + unsigned long *sp;
7134     + if ((regs->xcs & 2) == 0)
7135     + sp = (unsigned long *)&regs->esp;
7136     + else
7137     + sp = (unsigned long *)regs->esp;
7138     +# else
7139     + unsigned long *sp = (unsigned long *)regs->rsp;
7140     +# endif
7141     + /* Return address is either directly at stack pointer
7142     + or above a saved eflags. Eflags has bits 22-31 zero,
7143     + kernel addresses don't. */
7144     + if (sp[0] >> 22)
7145     + return sp[0];
7146     + if (sp[1] >> 22)
7147     + return sp[1];
7148     +# endif
7149     }
7150     -#else
7151     - if (!user_mode_vm(regs) && in_lock_functions(pc))
7152     - return *(unsigned long *)(regs->ebp + 4);
7153     #endif
7154    
7155     return pc;
7156     }
7157     EXPORT_SYMBOL(profile_pc);
7158     -#endif
7159    
7160     /*
7161     * This is the same as the above, except we _also_ save the current
7162     * Time Stamp Counter value at the time of the timer interrupt, so that
7163     * we later on can estimate the time of day more exactly.
7164     */
7165     -irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
7166     +irqreturn_t timer_interrupt(int irq, void *dev_id)
7167     {
7168     s64 delta, delta_cpu, stolen, blocked;
7169     u64 sched_time;
7170     @@ -686,10 +683,14 @@
7171     }
7172    
7173     /* System-wide jiffy work. */
7174     - while (delta >= NS_PER_TICK) {
7175     - delta -= NS_PER_TICK;
7176     - processed_system_time += NS_PER_TICK;
7177     - do_timer(regs);
7178     + if (delta >= NS_PER_TICK) {
7179     + do_div(delta, NS_PER_TICK);
7180     + processed_system_time += delta * NS_PER_TICK;
7181     + while (delta > HZ) {
7182     + do_timer(HZ);
7183     + delta -= HZ;
7184     + }
7185     + do_timer(delta);
7186     }
7187    
7188     if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
7189     @@ -734,7 +735,7 @@
7190     if (delta_cpu > 0) {
7191     do_div(delta_cpu, NS_PER_TICK);
7192     per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
7193     - if (user_mode_vm(regs))
7194     + if (user_mode_vm(get_irq_regs()))
7195     account_user_time(current, (cputime_t)delta_cpu);
7196     else
7197     account_system_time(current, HARDIRQ_OFFSET,
7198     @@ -748,10 +749,10 @@
7199     /* Local timer processing (see update_process_times()). */
7200     run_local_timers();
7201     if (rcu_pending(cpu))
7202     - rcu_check_callbacks(cpu, user_mode_vm(regs));
7203     + rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs()));
7204     scheduler_tick();
7205     run_posix_cpu_timers(current);
7206     - profile_tick(CPU_PROFILING, regs);
7207     + profile_tick(CPU_PROFILING);
7208    
7209     return IRQ_HANDLED;
7210     }
7211     @@ -959,10 +960,11 @@
7212     /* Duplicate of time_init() below, with hpet_enable part added */
7213     static void __init hpet_time_init(void)
7214     {
7215     - xtime.tv_sec = get_cmos_time();
7216     - xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
7217     - set_normalized_timespec(&wall_to_monotonic,
7218     - -xtime.tv_sec, -xtime.tv_nsec);
7219     + struct timespec ts;
7220     + ts.tv_sec = get_cmos_time();
7221     + ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
7222     +
7223     + do_settimeofday(&ts);
7224    
7225     if ((hpet_enable() >= 0) && hpet_use_timer) {
7226     printk("Using HPET for base-timer\n");
7227 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/traps_32-xen.c linux-2.6.25-xen/arch/x86/kernel/traps_32-xen.c
7228     --- linux-2.6.25/arch/x86/kernel/traps_32-xen.c 2008-05-23 20:51:11.000000000 +0200
7229     +++ linux-2.6.25-xen/arch/x86/kernel/traps_32-xen.c 2008-05-23 20:39:03.000000000 +0200
7230 niro 609 @@ -28,6 +28,7 @@
7231     #include <linux/kprobes.h>
7232     #include <linux/kexec.h>
7233     #include <linux/unwind.h>
7234     +#include <linux/uaccess.h>
7235    
7236     #ifdef CONFIG_EISA
7237     #include <linux/ioport.h>
7238     @@ -40,7 +41,6 @@
7239    
7240     #include <asm/processor.h>
7241     #include <asm/system.h>
7242     -#include <asm/uaccess.h>
7243     #include <asm/io.h>
7244     #include <asm/atomic.h>
7245     #include <asm/debugreg.h>
7246     @@ -51,11 +51,14 @@
7247     #include <asm/smp.h>
7248     #include <asm/arch_hooks.h>
7249     #include <asm/kdebug.h>
7250     +#include <asm/stacktrace.h>
7251    
7252     #include <linux/module.h>
7253    
7254     #include "mach_traps.h"
7255    
7256     +int panic_on_unrecovered_nmi;
7257     +
7258     asmlinkage int system_call(void);
7259    
7260     struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
7261     @@ -124,62 +127,63 @@
7262     p < (void *)tinfo + THREAD_SIZE - 3;
7263     }
7264    
7265     -/*
7266     - * Print one address/symbol entries per line.
7267     - */
7268     -static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl)
7269     -{
7270     - printk(" [<%08lx>] ", addr);
7271     -
7272     - print_symbol("%s\n", addr);
7273     -}
7274     -
7275     static inline unsigned long print_context_stack(struct thread_info *tinfo,
7276     unsigned long *stack, unsigned long ebp,
7277     - char *log_lvl)
7278     + struct stacktrace_ops *ops, void *data)
7279     {
7280     unsigned long addr;
7281    
7282     #ifdef CONFIG_FRAME_POINTER
7283     while (valid_stack_ptr(tinfo, (void *)ebp)) {
7284     + unsigned long new_ebp;
7285     addr = *(unsigned long *)(ebp + 4);
7286     - print_addr_and_symbol(addr, log_lvl);
7287     + ops->address(data, addr);
7288     /*
7289     * break out of recursive entries (such as
7290     - * end_of_stack_stop_unwind_function):
7291     + * end_of_stack_stop_unwind_function). Also,
7292     + * we can never allow a frame pointer to
7293     + * move downwards!
7294     */
7295     - if (ebp == *(unsigned long *)ebp)
7296     + new_ebp = *(unsigned long *)ebp;
7297     + if (new_ebp <= ebp)
7298     break;
7299     - ebp = *(unsigned long *)ebp;
7300     + ebp = new_ebp;
7301     }
7302     #else
7303     while (valid_stack_ptr(tinfo, stack)) {
7304     addr = *stack++;
7305     if (__kernel_text_address(addr))
7306     - print_addr_and_symbol(addr, log_lvl);
7307     + ops->address(data, addr);
7308     }
7309     #endif
7310     return ebp;
7311     }
7312    
7313     +struct ops_and_data {
7314     + struct stacktrace_ops *ops;
7315     + void *data;
7316     +};
7317     +
7318     static asmlinkage int
7319     -show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
7320     +dump_trace_unwind(struct unwind_frame_info *info, void *data)
7321     {
7322     + struct ops_and_data *oad = (struct ops_and_data *)data;
7323     int n = 0;
7324    
7325     while (unwind(info) == 0 && UNW_PC(info)) {
7326     n++;
7327     - print_addr_and_symbol(UNW_PC(info), log_lvl);
7328     + oad->ops->address(oad->data, UNW_PC(info));
7329     if (arch_unw_user_mode(info))
7330     break;
7331     }
7332     return n;
7333     }
7334    
7335     -static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
7336     - unsigned long *stack, char *log_lvl)
7337     +void dump_trace(struct task_struct *task, struct pt_regs *regs,
7338     + unsigned long *stack,
7339     + struct stacktrace_ops *ops, void *data)
7340     {
7341     - unsigned long ebp;
7342     + unsigned long ebp = 0;
7343    
7344     if (!task)
7345     task = current;
7346     @@ -187,54 +191,116 @@
7347     if (call_trace >= 0) {
7348     int unw_ret = 0;
7349     struct unwind_frame_info info;
7350     + struct ops_and_data oad = { .ops = ops, .data = data };
7351    
7352     if (regs) {
7353     if (unwind_init_frame_info(&info, task, regs) == 0)
7354     - unw_ret = show_trace_unwind(&info, log_lvl);
7355     + unw_ret = dump_trace_unwind(&info, &oad);
7356     } else if (task == current)
7357     - unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
7358     + unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
7359     else {
7360     if (unwind_init_blocked(&info, task) == 0)
7361     - unw_ret = show_trace_unwind(&info, log_lvl);
7362     + unw_ret = dump_trace_unwind(&info, &oad);
7363     }
7364     if (unw_ret > 0) {
7365     if (call_trace == 1 && !arch_unw_user_mode(&info)) {
7366     - print_symbol("DWARF2 unwinder stuck at %s\n",
7367     + ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
7368     UNW_PC(&info));
7369     if (UNW_SP(&info) >= PAGE_OFFSET) {
7370     - printk("Leftover inexact backtrace:\n");
7371     + ops->warning(data, "Leftover inexact backtrace:\n");
7372     stack = (void *)UNW_SP(&info);
7373     + if (!stack)
7374     + return;
7375     + ebp = UNW_FP(&info);
7376     } else
7377     - printk("Full inexact backtrace again:\n");
7378     + ops->warning(data, "Full inexact backtrace again:\n");
7379     } else if (call_trace >= 1)
7380     return;
7381     else
7382     - printk("Full inexact backtrace again:\n");
7383     + ops->warning(data, "Full inexact backtrace again:\n");
7384     } else
7385     - printk("Inexact backtrace:\n");
7386     + ops->warning(data, "Inexact backtrace:\n");
7387     }
7388     -
7389     - if (task == current) {
7390     - /* Grab ebp right from our regs */
7391     - asm ("movl %%ebp, %0" : "=r" (ebp) : );
7392     - } else {
7393     - /* ebp is the last reg pushed by switch_to */
7394     - ebp = *(unsigned long *) task->thread.esp;
7395     + if (!stack) {
7396     + unsigned long dummy;
7397     + stack = &dummy;
7398     + if (task && task != current)
7399     + stack = (unsigned long *)task->thread.esp;
7400     + }
7401     +
7402     +#ifdef CONFIG_FRAME_POINTER
7403     + if (!ebp) {
7404     + if (task == current) {
7405     + /* Grab ebp right from our regs */
7406     + asm ("movl %%ebp, %0" : "=r" (ebp) : );
7407     + } else {
7408     + /* ebp is the last reg pushed by switch_to */
7409     + ebp = *(unsigned long *) task->thread.esp;
7410     + }
7411     }
7412     +#endif
7413    
7414     while (1) {
7415     struct thread_info *context;
7416     context = (struct thread_info *)
7417     ((unsigned long)stack & (~(THREAD_SIZE - 1)));
7418     - ebp = print_context_stack(context, stack, ebp, log_lvl);
7419     + ebp = print_context_stack(context, stack, ebp, ops, data);
7420     + /* Should be after the line below, but somewhere
7421     + in early boot context comes out corrupted and we
7422     + can't reference it -AK */
7423     + if (ops->stack(data, "IRQ") < 0)
7424     + break;
7425     stack = (unsigned long*)context->previous_esp;
7426     if (!stack)
7427     break;
7428     - printk("%s =======================\n", log_lvl);
7429     }
7430     }
7431     +EXPORT_SYMBOL(dump_trace);
7432 niro 611 +
7433 niro 609 +static void
7434     +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
7435     +{
7436     + printk(data);
7437     + print_symbol(msg, symbol);
7438     + printk("\n");
7439     +}
7440     +
7441     +static void print_trace_warning(void *data, char *msg)
7442     +{
7443     + printk("%s%s\n", (char *)data, msg);
7444     +}
7445 niro 611
7446     -void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
7447 niro 609 +static int print_trace_stack(void *data, char *name)
7448     +{
7449     + return 0;
7450     +}
7451     +
7452     +/*
7453     + * Print one address/symbol entries per line.
7454     + */
7455     +static void print_trace_address(void *data, unsigned long addr)
7456     +{
7457     + printk("%s [<%08lx>] ", (char *)data, addr);
7458     + print_symbol("%s\n", addr);
7459     +}
7460     +
7461     +static struct stacktrace_ops print_trace_ops = {
7462     + .warning = print_trace_warning,
7463     + .warning_symbol = print_trace_warning_symbol,
7464     + .stack = print_trace_stack,
7465     + .address = print_trace_address,
7466     +};
7467     +
7468     +static void
7469     +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
7470     + unsigned long * stack, char *log_lvl)
7471     +{
7472     + dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
7473     + printk("%s =======================\n", log_lvl);
7474     +}
7475     +
7476     +void show_trace(struct task_struct *task, struct pt_regs *regs,
7477     + unsigned long * stack)
7478     {
7479     show_trace_log_lvl(task, regs, stack, "");
7480     }
7481     @@ -297,12 +363,13 @@
7482     ss = regs->xss & 0xffff;
7483     }
7484     print_modules();
7485     - printk(KERN_EMERG "CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\n"
7486     - "EFLAGS: %08lx (%s %.*s) \n",
7487     + printk(KERN_EMERG "CPU: %d\n"
7488     + KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n"
7489     + KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n",
7490     smp_processor_id(), 0xffff & regs->xcs, regs->eip,
7491     - print_tainted(), regs->eflags, system_utsname.release,
7492     - (int)strcspn(system_utsname.version, " "),
7493     - system_utsname.version);
7494     + print_tainted(), regs->eflags, init_utsname()->release,
7495     + (int)strcspn(init_utsname()->version, " "),
7496     + init_utsname()->version);
7497     print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip);
7498     printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
7499     regs->eax, regs->ebx, regs->ecx, regs->edx);
7500     @@ -319,6 +386,8 @@
7501     */
7502     if (in_kernel) {
7503     u8 __user *eip;
7504     + int code_bytes = 64;
7505     + unsigned char c;
7506    
7507     printk("\n" KERN_EMERG "Stack: ");
7508     show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
7509     @@ -326,9 +395,12 @@
7510     printk(KERN_EMERG "Code: ");
7511    
7512     eip = (u8 __user *)regs->eip - 43;
7513     - for (i = 0; i < 64; i++, eip++) {
7514     - unsigned char c;
7515     -
7516     + if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
7517     + /* try starting at EIP */
7518     + eip = (u8 __user *)regs->eip;
7519     + code_bytes = 32;
7520     + }
7521     + for (i = 0; i < code_bytes; i++, eip++) {
7522     if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
7523     printk(" Bad EIP value.");
7524     break;
7525     @@ -349,7 +421,7 @@
7526    
7527     if (eip < PAGE_OFFSET)
7528     return;
7529     - if (__get_user(ud2, (unsigned short __user *)eip))
7530     + if (probe_kernel_address((unsigned short __user *)eip, ud2))
7531     return;
7532     if (ud2 != 0x0b0f)
7533     return;
7534     @@ -362,7 +434,8 @@
7535     char *file;
7536     char c;
7537    
7538     - if (__get_user(line, (unsigned short __user *)(eip + 2)))
7539     + if (probe_kernel_address((unsigned short __user *)(eip + 2),
7540     + line))
7541     break;
7542     if (__get_user(file, (char * __user *)(eip + 4)) ||
7543     (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
7544     @@ -604,18 +677,24 @@
7545     }
7546     }
7547    
7548     -static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
7549     +static __kprobes void
7550     +mem_parity_error(unsigned char reason, struct pt_regs * regs)
7551     {
7552     - printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying "
7553     - "to continue\n");
7554     + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
7555     + "CPU %d.\n", reason, smp_processor_id());
7556     printk(KERN_EMERG "You probably have a hardware problem with your RAM "
7557     "chips\n");
7558     + if (panic_on_unrecovered_nmi)
7559     + panic("NMI: Not continuing");
7560     +
7561     + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
7562    
7563     /* Clear and disable the memory parity error line. */
7564     clear_mem_error(reason);
7565     }
7566    
7567     -static void io_check_error(unsigned char reason, struct pt_regs * regs)
7568     +static __kprobes void
7569     +io_check_error(unsigned char reason, struct pt_regs * regs)
7570     {
7571     printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
7572     show_registers(regs);
7573     @@ -624,7 +703,8 @@
7574     clear_io_check_error(reason);
7575     }
7576    
7577     -static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
7578     +static __kprobes void
7579     +unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
7580     {
7581     #ifdef CONFIG_MCA
7582     /* Might actually be able to figure out what the guilty party
7583     @@ -634,15 +714,18 @@
7584     return;
7585     }
7586     #endif
7587     - printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
7588     - reason, smp_processor_id());
7589     - printk("Dazed and confused, but trying to continue\n");
7590     - printk("Do you have a strange power saving mode enabled?\n");
7591     + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
7592     + "CPU %d.\n", reason, smp_processor_id());
7593     + printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
7594     + if (panic_on_unrecovered_nmi)
7595     + panic("NMI: Not continuing");
7596     +
7597     + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
7598     }
7599    
7600     static DEFINE_SPINLOCK(nmi_print_lock);
7601    
7602     -void die_nmi (struct pt_regs *regs, const char *msg)
7603     +void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
7604     {
7605     if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
7606     NOTIFY_STOP)
7607     @@ -674,7 +757,7 @@
7608     do_exit(SIGSEGV);
7609     }
7610    
7611     -static void default_do_nmi(struct pt_regs * regs)
7612     +static __kprobes void default_do_nmi(struct pt_regs * regs)
7613     {
7614     unsigned char reason = 0;
7615    
7616     @@ -691,12 +774,12 @@
7617     * Ok, so this is none of the documented NMI sources,
7618     * so it must be the NMI watchdog.
7619     */
7620     - if (nmi_watchdog) {
7621     - nmi_watchdog_tick(regs);
7622     + if (nmi_watchdog_tick(regs, reason))
7623     return;
7624     - }
7625     + if (!do_nmi_callback(regs, smp_processor_id()))
7626     #endif
7627     - unknown_nmi_error(reason, regs);
7628     + unknown_nmi_error(reason, regs);
7629     +
7630     return;
7631     }
7632     if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
7633     @@ -712,14 +795,7 @@
7634     reassert_nmi();
7635     }
7636    
7637     -static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
7638     -{
7639     - return 0;
7640     -}
7641     -
7642     -static nmi_callback_t nmi_callback = dummy_nmi_callback;
7643     -
7644     -fastcall void do_nmi(struct pt_regs * regs, long error_code)
7645     +fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
7646     {
7647     int cpu;
7648    
7649     @@ -729,25 +805,11 @@
7650    
7651     ++nmi_count(cpu);
7652    
7653     - if (!rcu_dereference(nmi_callback)(regs, cpu))
7654     - default_do_nmi(regs);
7655     + default_do_nmi(regs);
7656    
7657     nmi_exit();
7658     }
7659    
7660     -void set_nmi_callback(nmi_callback_t callback)
7661     -{
7662     - vmalloc_sync_all();
7663     - rcu_assign_pointer(nmi_callback, callback);
7664     -}
7665     -EXPORT_SYMBOL_GPL(set_nmi_callback);
7666     -
7667     -void unset_nmi_callback(void)
7668     -{
7669     - nmi_callback = dummy_nmi_callback;
7670     -}
7671     -EXPORT_SYMBOL_GPL(unset_nmi_callback);
7672     -
7673     #ifdef CONFIG_KPROBES
7674     fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
7675     {
7676 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/traps_64-xen.c linux-2.6.25-xen/arch/x86/kernel/traps_64-xen.c
7677     --- linux-2.6.25/arch/x86/kernel/traps_64-xen.c 2008-05-23 20:51:11.000000000 +0200
7678     +++ linux-2.6.25-xen/arch/x86/kernel/traps_64-xen.c 2008-05-23 20:39:03.000000000 +0200
7679 niro 609 @@ -23,6 +23,7 @@
7680     #include <linux/delay.h>
7681     #include <linux/spinlock.h>
7682     #include <linux/interrupt.h>
7683     +#include <linux/kallsyms.h>
7684     #include <linux/module.h>
7685     #include <linux/moduleparam.h>
7686     #include <linux/nmi.h>
7687     @@ -45,6 +46,7 @@
7688     #include <asm/pda.h>
7689     #include <asm/proto.h>
7690     #include <asm/nmi.h>
7691     +#include <asm/stacktrace.h>
7692    
7693     asmlinkage void divide_error(void);
7694     asmlinkage void debug(void);
7695     @@ -114,7 +116,6 @@
7696     #endif
7697    
7698     #ifdef CONFIG_KALLSYMS
7699     -# include <linux/kallsyms.h>
7700     void printk_address(unsigned long address)
7701     {
7702     unsigned long offset = 0, symsize;
7703     @@ -142,7 +143,7 @@
7704     #endif
7705    
7706     static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
7707     - unsigned *usedp, const char **idp)
7708     + unsigned *usedp, char **idp)
7709     {
7710     #ifndef CONFIG_X86_NO_TSS
7711     static char ids[][8] = {
7712     @@ -162,26 +163,7 @@
7713     * 'stack' is in one of them:
7714     */
7715     for (k = 0; k < N_EXCEPTION_STACKS; k++) {
7716     - unsigned long end;
7717     -
7718     - /*
7719     - * set 'end' to the end of the exception stack.
7720     - */
7721     - switch (k + 1) {
7722     - /*
7723     - * TODO: this block is not needed i think, because
7724     - * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK]
7725     - * properly too.
7726     - */
7727     -#if DEBUG_STKSZ > EXCEPTION_STKSZ
7728     - case DEBUG_STACK:
7729     - end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ;
7730     - break;
7731     -#endif
7732     - default:
7733     - end = per_cpu(orig_ist, cpu).ist[k];
7734     - break;
7735     - }
7736     + unsigned long end = per_cpu(orig_ist, cpu).ist[k];
7737     /*
7738     * Is 'stack' above this exception frame's end?
7739     * If yes then skip to the next frame.
7740     @@ -236,13 +218,19 @@
7741     return NULL;
7742     }
7743    
7744     -static int show_trace_unwind(struct unwind_frame_info *info, void *context)
7745     +struct ops_and_data {
7746     + struct stacktrace_ops *ops;
7747     + void *data;
7748     +};
7749     +
7750     +static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
7751     {
7752     + struct ops_and_data *oad = (struct ops_and_data *)context;
7753     int n = 0;
7754    
7755     while (unwind(info) == 0 && UNW_PC(info)) {
7756     n++;
7757     - printk_address(UNW_PC(info));
7758     + oad->ops->address(oad->data, UNW_PC(info));
7759     if (arch_unw_user_mode(info))
7760     break;
7761     }
7762     @@ -256,13 +244,19 @@
7763     * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
7764     */
7765    
7766     -void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack)
7767     +static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
7768     {
7769     - const unsigned cpu = safe_smp_processor_id();
7770     + void *t = (void *)tinfo;
7771     + return p > t && p < t + THREAD_SIZE - 3;
7772     +}
7773     +
7774     +void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack,
7775     + struct stacktrace_ops *ops, void *data)
7776     +{
7777     + const unsigned cpu = smp_processor_id();
7778     unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
7779     unsigned used = 0;
7780     -
7781     - printk("\nCall Trace:\n");
7782     + struct thread_info *tinfo;
7783    
7784     if (!tsk)
7785     tsk = current;
7786     @@ -270,32 +264,47 @@
7787     if (call_trace >= 0) {
7788     int unw_ret = 0;
7789     struct unwind_frame_info info;
7790     + struct ops_and_data oad = { .ops = ops, .data = data };
7791    
7792     if (regs) {
7793     if (unwind_init_frame_info(&info, tsk, regs) == 0)
7794     - unw_ret = show_trace_unwind(&info, NULL);
7795     + unw_ret = dump_trace_unwind(&info, &oad);
7796     } else if (tsk == current)
7797     - unw_ret = unwind_init_running(&info, show_trace_unwind, NULL);
7798     + unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
7799     else {
7800     if (unwind_init_blocked(&info, tsk) == 0)
7801     - unw_ret = show_trace_unwind(&info, NULL);
7802     + unw_ret = dump_trace_unwind(&info, &oad);
7803     }
7804     if (unw_ret > 0) {
7805     if (call_trace == 1 && !arch_unw_user_mode(&info)) {
7806     - print_symbol("DWARF2 unwinder stuck at %s\n",
7807     + ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
7808     UNW_PC(&info));
7809     if ((long)UNW_SP(&info) < 0) {
7810     - printk("Leftover inexact backtrace:\n");
7811     + ops->warning(data, "Leftover inexact backtrace:\n");
7812     stack = (unsigned long *)UNW_SP(&info);
7813     + if (!stack)
7814     + return;
7815     } else
7816     - printk("Full inexact backtrace again:\n");
7817     + ops->warning(data, "Full inexact backtrace again:\n");
7818     } else if (call_trace >= 1)
7819     return;
7820     else
7821     - printk("Full inexact backtrace again:\n");
7822     + ops->warning(data, "Full inexact backtrace again:\n");
7823     } else
7824     - printk("Inexact backtrace:\n");
7825     + ops->warning(data, "Inexact backtrace:\n");
7826     + }
7827     + if (!stack) {
7828     + unsigned long dummy;
7829     + stack = &dummy;
7830     + if (tsk && tsk != current)
7831     + stack = (unsigned long *)tsk->thread.rsp;
7832     }
7833     + /*
7834     + * Align the stack pointer on word boundary, later loops
7835     + * rely on that (and corruption / debug info bugs can cause
7836     + * unaligned values here):
7837     + */
7838     + stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1));
7839    
7840     /*
7841     * Print function call entries within a stack. 'cond' is the
7842     @@ -305,7 +314,9 @@
7843     #define HANDLE_STACK(cond) \
7844     do while (cond) { \
7845     unsigned long addr = *stack++; \
7846     - if (kernel_text_address(addr)) { \
7847     + if (oops_in_progress ? \
7848     + __kernel_text_address(addr) : \
7849     + kernel_text_address(addr)) { \
7850     /* \
7851     * If the address is either in the text segment of the \
7852     * kernel, or in the region which contains vmalloc'ed \
7853     @@ -314,7 +325,7 @@
7854     * down the cause of the crash will be able to figure \
7855     * out the call path that was taken. \
7856     */ \
7857     - printk_address(addr); \
7858     + ops->address(data, addr); \
7859     } \
7860     } while (0)
7861    
7862     @@ -323,16 +334,17 @@
7863     * current stack address. If the stacks consist of nested
7864     * exceptions
7865     */
7866     - for ( ; ; ) {
7867     - const char *id;
7868     + for (;;) {
7869     + char *id;
7870     unsigned long *estack_end;
7871     estack_end = in_exception_stack(cpu, (unsigned long)stack,
7872     &used, &id);
7873    
7874     if (estack_end) {
7875     - printk(" <%s>", id);
7876     + if (ops->stack(data, id) < 0)
7877     + break;
7878     HANDLE_STACK (stack < estack_end);
7879     - printk(" <EOE>");
7880     + ops->stack(data, "<EOE>");
7881     /*
7882     * We link to the next stack via the
7883     * second-to-last pointer (index -2 to end) in the
7884     @@ -347,7 +359,8 @@
7885     (IRQSTACKSIZE - 64) / sizeof(*irqstack);
7886    
7887     if (stack >= irqstack && stack < irqstack_end) {
7888     - printk(" <IRQ>");
7889     + if (ops->stack(data, "IRQ") < 0)
7890     + break;
7891     HANDLE_STACK (stack < irqstack_end);
7892     /*
7893     * We link to the next stack (which would be
7894     @@ -356,7 +369,7 @@
7895     */
7896     stack = (unsigned long *) (irqstack_end[-1]);
7897     irqstack_end = NULL;
7898     - printk(" <EOI>");
7899     + ops->stack(data, "EOI");
7900     continue;
7901     }
7902     }
7903     @@ -364,19 +377,58 @@
7904     }
7905    
7906     /*
7907     - * This prints the process stack:
7908     + * This handles the process stack:
7909     */
7910     - HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
7911     + tinfo = current_thread_info();
7912     + HANDLE_STACK (valid_stack_ptr(tinfo, stack));
7913     #undef HANDLE_STACK
7914     +}
7915     +EXPORT_SYMBOL(dump_trace);
7916     +
7917     +static void
7918     +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
7919     +{
7920     + print_symbol(msg, symbol);
7921     + printk("\n");
7922     +}
7923     +
7924     +static void print_trace_warning(void *data, char *msg)
7925     +{
7926     + printk("%s\n", msg);
7927     +}
7928     +
7929     +static int print_trace_stack(void *data, char *name)
7930     +{
7931     + printk(" <%s> ", name);
7932     + return 0;
7933     +}
7934     +
7935     +static void print_trace_address(void *data, unsigned long addr)
7936     +{
7937     + printk_address(addr);
7938     +}
7939     +
7940     +static struct stacktrace_ops print_trace_ops = {
7941     + .warning = print_trace_warning,
7942     + .warning_symbol = print_trace_warning_symbol,
7943     + .stack = print_trace_stack,
7944     + .address = print_trace_address,
7945     +};
7946    
7947     +void
7948     +show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack)
7949     +{
7950     + printk("\nCall Trace:\n");
7951     + dump_trace(tsk, regs, stack, &print_trace_ops, NULL);
7952     printk("\n");
7953     }
7954    
7955     -static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp)
7956     +static void
7957     +_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
7958     {
7959     unsigned long *stack;
7960     int i;
7961     - const int cpu = safe_smp_processor_id();
7962     + const int cpu = smp_processor_id();
7963     unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
7964     unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
7965    
7966     @@ -430,7 +482,7 @@
7967     int i;
7968     int in_kernel = !user_mode(regs);
7969     unsigned long rsp;
7970     - const int cpu = safe_smp_processor_id();
7971     + const int cpu = smp_processor_id();
7972     struct task_struct *cur = cpu_pda(cpu)->pcurrent;
7973    
7974     rsp = regs->rsp;
7975     @@ -505,9 +557,11 @@
7976    
7977     unsigned __kprobes long oops_begin(void)
7978     {
7979     - int cpu = safe_smp_processor_id();
7980     + int cpu = smp_processor_id();
7981     unsigned long flags;
7982    
7983     + oops_enter();
7984     +
7985     /* racy, but better than risking deadlock. */
7986     local_irq_save(flags);
7987     if (!spin_trylock(&die_lock)) {
7988     @@ -536,6 +590,7 @@
7989     spin_unlock_irqrestore(&die_lock, flags);
7990     if (panic_on_oops)
7991     panic("Fatal exception");
7992     + oops_exit();
7993     }
7994    
7995     void __kprobes __die(const char * str, struct pt_regs * regs, long err)
7996     @@ -573,7 +628,7 @@
7997     }
7998    
7999     #ifdef CONFIG_X86_LOCAL_APIC
8000     -void __kprobes die_nmi(char *str, struct pt_regs *regs)
8001     +void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
8002     {
8003     unsigned long flags = oops_begin();
8004    
8005     @@ -581,13 +636,12 @@
8006     * We are in trouble anyway, lets at least try
8007     * to get a message out.
8008     */
8009     - printk(str, safe_smp_processor_id());
8010     + printk(str, smp_processor_id());
8011     show_registers(regs);
8012     if (kexec_should_crash(current))
8013     crash_kexec(regs);
8014     - if (panic_on_timeout || panic_on_oops)
8015     - panic("nmi watchdog");
8016     - printk("console shuts up ...\n");
8017     + if (do_panic || panic_on_oops)
8018     + panic("Non maskable interrupt");
8019     oops_end(flags);
8020     nmi_exit();
8021     local_irq_enable();
8022     @@ -734,8 +788,15 @@
8023     static __kprobes void
8024     mem_parity_error(unsigned char reason, struct pt_regs * regs)
8025     {
8026     - printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
8027     - printk("You probably have a hardware problem with your RAM chips\n");
8028     + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
8029     + reason);
8030     + printk(KERN_EMERG "You probably have a hardware problem with your "
8031     + "RAM chips\n");
8032     +
8033     + if (panic_on_unrecovered_nmi)
8034     + panic("NMI: Not continuing");
8035     +
8036     + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
8037    
8038     #if 0 /* XEN */
8039     /* Clear and disable the memory parity error line. */
8040     @@ -762,9 +823,15 @@
8041    
8042     static __kprobes void
8043     unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
8044     -{ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
8045     - printk("Dazed and confused, but trying to continue\n");
8046     - printk("Do you have a strange power saving mode enabled?\n");
8047     +{
8048     + printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
8049     + reason);
8050     + printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
8051     +
8052     + if (panic_on_unrecovered_nmi)
8053     + panic("NMI: Not continuing");
8054     +
8055     + printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
8056     }
8057    
8058     /* Runs on IST stack. This code must keep interrupts off all the time.
8059     @@ -789,12 +856,12 @@
8060     * Ok, so this is none of the documented NMI sources,
8061     * so it must be the NMI watchdog.
8062     */
8063     - if (nmi_watchdog > 0) {
8064     - nmi_watchdog_tick(regs,reason);
8065     + if (nmi_watchdog_tick(regs,reason))
8066     return;
8067     - }
8068     #endif
8069     - unknown_nmi_error(reason, regs);
8070     + if (!do_nmi_callback(regs,cpu))
8071     + unknown_nmi_error(reason, regs);
8072     +
8073     return;
8074     }
8075     if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
8076     @@ -1081,6 +1148,7 @@
8077     init_fpu(me);
8078     restore_fpu_checking(&me->thread.i387.fxsave);
8079     task_thread_info(me)->status |= TS_USEDFPU;
8080     + me->fpu_counter++;
8081     }
8082    
8083    
8084     @@ -1141,24 +1209,30 @@
8085     }
8086    
8087    
8088     -/* Actual parsing is done early in setup.c. */
8089     -static int __init oops_dummy(char *s)
8090     +static int __init oops_setup(char *s)
8091     {
8092     - panic_on_oops = 1;
8093     - return 1;
8094     + if (!s)
8095     + return -EINVAL;
8096     + if (!strcmp(s, "panic"))
8097     + panic_on_oops = 1;
8098     + return 0;
8099     }
8100     -__setup("oops=", oops_dummy);
8101     +early_param("oops", oops_setup);
8102    
8103     static int __init kstack_setup(char *s)
8104     {
8105     + if (!s)
8106     + return -EINVAL;
8107     kstack_depth_to_print = simple_strtoul(s,NULL,0);
8108     - return 1;
8109     + return 0;
8110     }
8111     -__setup("kstack=", kstack_setup);
8112     +early_param("kstack", kstack_setup);
8113    
8114     #ifdef CONFIG_STACK_UNWIND
8115     static int __init call_trace_setup(char *s)
8116     {
8117     + if (!s)
8118     + return -EINVAL;
8119     if (strcmp(s, "old") == 0)
8120     call_trace = -1;
8121     else if (strcmp(s, "both") == 0)
8122     @@ -1167,7 +1241,7 @@
8123     call_trace = 1;
8124     else if (strcmp(s, "new") == 0)
8125     call_trace = 2;
8126     - return 1;
8127     + return 0;
8128     }
8129     -__setup("call_trace=", call_trace_setup);
8130     +early_param("call_trace", call_trace_setup);
8131     #endif
8132 niro 611 diff -Naur linux-2.6.25/arch/x86/kernel/vsyscall_64-xen.c linux-2.6.25-xen/arch/x86/kernel/vsyscall_64-xen.c
8133     --- linux-2.6.25/arch/x86/kernel/vsyscall_64-xen.c 2008-05-23 20:51:11.000000000 +0200
8134     +++ linux-2.6.25-xen/arch/x86/kernel/vsyscall_64-xen.c 2008-05-23 20:39:03.000000000 +0200
8135 niro 609 @@ -26,6 +26,10 @@
8136     #include <linux/seqlock.h>
8137     #include <linux/jiffies.h>
8138     #include <linux/sysctl.h>
8139     +#include <linux/getcpu.h>
8140     +#include <linux/cpu.h>
8141     +#include <linux/smp.h>
8142     +#include <linux/notifier.h>
8143    
8144     #include <asm/vsyscall.h>
8145     #include <asm/pgtable.h>
8146     @@ -33,11 +37,15 @@
8147     #include <asm/fixmap.h>
8148     #include <asm/errno.h>
8149     #include <asm/io.h>
8150     +#include <asm/segment.h>
8151     +#include <asm/desc.h>
8152     +#include <asm/topology.h>
8153    
8154     #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
8155    
8156     int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
8157     seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
8158     +int __vgetcpu_mode __section_vgetcpu_mode;
8159    
8160     #include <asm/unistd.h>
8161    
8162     @@ -61,8 +69,7 @@
8163     sequence = read_seqbegin(&__xtime_lock);
8164    
8165     sec = __xtime.tv_sec;
8166     - usec = (__xtime.tv_nsec / 1000) +
8167     - (__jiffies - __wall_jiffies) * (1000000 / HZ);
8168     + usec = __xtime.tv_nsec / 1000;
8169    
8170     if (__vxtime.mode != VXTIME_HPET) {
8171     t = get_cycles_sync();
8172     @@ -72,7 +79,8 @@
8173     __vxtime.tsc_quot) >> 32;
8174     /* See comment in x86_64 do_gettimeofday. */
8175     } else {
8176     - usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
8177     + usec += ((readl((void __iomem *)
8178     + fix_to_virt(VSYSCALL_HPET) + 0xf0) -
8179     __vxtime.last) * __vxtime.quot) >> 32;
8180     }
8181     } while (read_seqretry(&__xtime_lock, sequence));
8182     @@ -127,9 +135,46 @@
8183     return __xtime.tv_sec;
8184     }
8185    
8186     -long __vsyscall(2) venosys_0(void)
8187     -{
8188     - return -ENOSYS;
8189     +/* Fast way to get current CPU and node.
8190     + This helps to do per node and per CPU caches in user space.
8191     + The result is not guaranteed without CPU affinity, but usually
8192     + works out because the scheduler tries to keep a thread on the same
8193     + CPU.
8194     +
8195     + tcache must point to a two element sized long array.
8196     + All arguments can be NULL. */
8197     +long __vsyscall(2)
8198     +vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
8199     +{
8200     + unsigned int dummy, p;
8201     + unsigned long j = 0;
8202     +
8203     + /* Fast cache - only recompute value once per jiffies and avoid
8204     + relatively costly rdtscp/cpuid otherwise.
8205     + This works because the scheduler usually keeps the process
8206     + on the same CPU and this syscall doesn't guarantee its
8207     + results anyways.
8208     + We do this here because otherwise user space would do it on
8209     + its own in a likely inferior way (no access to jiffies).
8210     + If you don't like it pass NULL. */
8211     + if (tcache && tcache->blob[0] == (j = __jiffies)) {
8212     + p = tcache->blob[1];
8213     + } else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
8214     + /* Load per CPU data from RDTSCP */
8215     + rdtscp(dummy, dummy, p);
8216     + } else {
8217     + /* Load per CPU data from GDT */
8218     + asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
8219     + }
8220     + if (tcache) {
8221     + tcache->blob[0] = j;
8222     + tcache->blob[1] = p;
8223     + }
8224     + if (cpu)
8225     + *cpu = p & 0xfff;
8226     + if (node)
8227     + *node = p >> 12;
8228     + return 0;
8229     }
8230    
8231     long __vsyscall(3) venosys_1(void)
8232     @@ -149,7 +194,8 @@
8233     void __user *buffer, size_t *lenp, loff_t *ppos)
8234     {
8235     extern u16 vsysc1, vsysc2;
8236     - u16 *map1, *map2;
8237     + u16 __iomem *map1;
8238     + u16 __iomem *map2;
8239     int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
8240     if (!write)
8241     return ret;
8242     @@ -164,11 +210,11 @@
8243     goto out;
8244     }
8245     if (!sysctl_vsyscall) {
8246     - *map1 = SYSCALL;
8247     - *map2 = SYSCALL;
8248     + writew(SYSCALL, map1);
8249     + writew(SYSCALL, map2);
8250     } else {
8251     - *map1 = NOP2;
8252     - *map2 = NOP2;
8253     + writew(NOP2, map1);
8254     + writew(NOP2, map2);
8255     }
8256     iounmap(map2);
8257     out:
8258     @@ -200,6 +246,48 @@
8259    
8260     #endif
8261    
8262     +/* Assume __initcall executes before all user space. Hopefully kmod
8263     + doesn't violate that. We'll find out if it does. */
8264     +static void __cpuinit vsyscall_set_cpu(int cpu)
8265     +{
8266     + unsigned long d;
8267     + unsigned long node = 0;
8268     +#ifdef CONFIG_NUMA
8269     + node = cpu_to_node[cpu];
8270     +#endif
8271     + if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP))
8272     + write_rdtscp_aux((node << 12) | cpu);
8273     +
8274     + /* Store cpu number in limit so that it can be loaded quickly
8275     + in user space in vgetcpu.
8276     + 12 bits for the CPU and 8 bits for the node. */
8277     + d = 0x0f40000000000ULL;
8278     + d |= cpu;
8279     + d |= (node & 0xf) << 12;
8280     + d |= (node >> 4) << 48;
8281     + if (HYPERVISOR_update_descriptor(virt_to_machine(cpu_gdt(cpu)
8282     + + GDT_ENTRY_PER_CPU),
8283     + d))
8284     + BUG();
8285     +}
8286     +
8287     +static void __cpuinit cpu_vsyscall_init(void *arg)
8288     +{
8289     + /* preemption should be already off */
8290     + vsyscall_set_cpu(raw_smp_processor_id());
8291     +}
8292     +
8293     +#ifdef CONFIG_HOTPLUG_CPU
8294     +static int __cpuinit
8295     +cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
8296     +{
8297     + long cpu = (long)arg;
8298     + if (action == CPU_ONLINE)
8299     + smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
8300     + return NOTIFY_DONE;
8301     +}
8302     +#endif
8303     +
8304     static void __init map_vsyscall(void)
8305     {
8306     extern char __vsyscall_0;
8307     @@ -214,13 +302,20 @@
8308     VSYSCALL_ADDR(__NR_vgettimeofday)));
8309     BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
8310     BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
8311     + BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
8312     map_vsyscall();
8313     #ifdef CONFIG_XEN
8314     sysctl_vsyscall = 0; /* disable vgettimeofay() */
8315     + if (boot_cpu_has(X86_FEATURE_RDTSCP))
8316     + vgetcpu_mode = VGETCPU_RDTSCP;
8317     + else
8318     + vgetcpu_mode = VGETCPU_LSL;
8319     #endif
8320     #ifdef CONFIG_SYSCTL
8321     register_sysctl_table(kernel_root_table2, 0);
8322     #endif
8323     + on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
8324     + hotcpu_notifier(cpu_vsyscall_notifier, 0);
8325     return 0;
8326     }
8327    
8328 niro 611 diff -Naur linux-2.6.25/arch/x86/mach-xen/setup.c linux-2.6.25-xen/arch/x86/mach-xen/setup.c
8329     --- linux-2.6.25/arch/x86/mach-xen/setup.c 2008-05-23 20:51:11.000000000 +0200
8330     +++ linux-2.6.25-xen/arch/x86/mach-xen/setup.c 2008-05-23 20:39:03.000000000 +0200
8331 niro 609 @@ -103,8 +103,10 @@
8332    
8333     setup_xen_features();
8334    
8335     - if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
8336     - set_fixaddr_top(pp.virt_start);
8337     + if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) {
8338     + hypervisor_virt_start = pp.virt_start;
8339     + reserve_top_address(0UL - pp.virt_start);
8340     + }
8341    
8342     if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
8343     machine_to_phys_mapping = (unsigned long *)mapping.v_start;
8344 niro 611 diff -Naur linux-2.6.25/arch/x86/mm/fault_32-xen.c linux-2.6.25-xen/arch/x86/mm/fault_32-xen.c
8345     --- linux-2.6.25/arch/x86/mm/fault_32-xen.c 2008-05-23 20:51:11.000000000 +0200
8346     +++ linux-2.6.25-xen/arch/x86/mm/fault_32-xen.c 2008-05-23 20:39:03.000000000 +0200
8347 niro 609 @@ -27,21 +27,24 @@
8348     #include <asm/uaccess.h>
8349     #include <asm/desc.h>
8350     #include <asm/kdebug.h>
8351     +#include <asm/segment.h>
8352    
8353     extern void die(const char *,struct pt_regs *,long);
8354    
8355     -#ifdef CONFIG_KPROBES
8356     -ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
8357     +static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
8358     +
8359     int register_page_fault_notifier(struct notifier_block *nb)
8360     {
8361     vmalloc_sync_all();
8362     return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
8363     }
8364     +EXPORT_SYMBOL_GPL(register_page_fault_notifier);
8365    
8366     int unregister_page_fault_notifier(struct notifier_block *nb)
8367     {
8368     return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
8369     }
8370     +EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
8371    
8372     static inline int notify_page_fault(enum die_val val, const char *str,
8373     struct pt_regs *regs, long err, int trap, int sig)
8374     @@ -55,14 +58,6 @@
8375     };
8376     return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
8377     }
8378     -#else
8379     -static inline int notify_page_fault(enum die_val val, const char *str,
8380     - struct pt_regs *regs, long err, int trap, int sig)
8381     -{
8382     - return NOTIFY_DONE;
8383     -}
8384     -#endif
8385     -
8386    
8387     /*
8388     * Unlock any spinlocks which will prevent us from getting the
8389     @@ -119,10 +114,10 @@
8390     }
8391    
8392     /* The standard kernel/user address space limit. */
8393     - *eip_limit = (seg & 2) ? USER_DS.seg : KERNEL_DS.seg;
8394     + *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
8395    
8396     /* By far the most common cases. */
8397     - if (likely(seg == __USER_CS || seg == GET_KERNEL_CS()))
8398     + if (likely(SEGMENT_IS_FLAT_CODE(seg)))
8399     return eip;
8400    
8401     /* Check the segment exists, is within the current LDT/GDT size,
8402     @@ -559,11 +554,7 @@
8403     write = 0;
8404     switch (error_code & 3) {
8405     default: /* 3: write, present */
8406     -#ifdef TEST_VERIFY_AREA
8407     - if (regs->cs == GET_KERNEL_CS())
8408     - printk("WP fault at %08lx\n", regs->eip);
8409     -#endif
8410     - /* fall through */
8411     + /* fall through */
8412     case 2: /* write, not present */
8413     if (!(vma->vm_flags & VM_WRITE))
8414     goto bad_area;
8415     @@ -572,7 +563,7 @@
8416     case 1: /* read, present */
8417     goto bad_area;
8418     case 0: /* read, not present */
8419     - if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
8420     + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
8421     goto bad_area;
8422     }
8423    
8424     @@ -704,7 +695,7 @@
8425     */
8426     out_of_memory:
8427     up_read(&mm->mmap_sem);
8428     - if (tsk->pid == 1) {
8429     + if (is_init(tsk)) {
8430     yield();
8431     down_read(&mm->mmap_sem);
8432     goto survive;
8433 niro 611 diff -Naur linux-2.6.25/arch/x86/mm/fault_64-xen.c linux-2.6.25-xen/arch/x86/mm/fault_64-xen.c
8434     --- linux-2.6.25/arch/x86/mm/fault_64-xen.c 2008-05-23 20:51:11.000000000 +0200
8435     +++ linux-2.6.25-xen/arch/x86/mm/fault_64-xen.c 2008-05-23 20:39:03.000000000 +0200
8436 niro 609 @@ -40,8 +40,7 @@
8437     #define PF_RSVD (1<<3)
8438     #define PF_INSTR (1<<4)
8439    
8440     -#ifdef CONFIG_KPROBES
8441     -ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
8442     +static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
8443    
8444     /* Hook to register for page fault notifications */
8445     int register_page_fault_notifier(struct notifier_block *nb)
8446     @@ -49,11 +48,13 @@
8447     vmalloc_sync_all();
8448     return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
8449     }
8450     +EXPORT_SYMBOL_GPL(register_page_fault_notifier);
8451    
8452     int unregister_page_fault_notifier(struct notifier_block *nb)
8453     {
8454     return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
8455     }
8456     +EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
8457    
8458     static inline int notify_page_fault(enum die_val val, const char *str,
8459     struct pt_regs *regs, long err, int trap, int sig)
8460     @@ -67,13 +68,6 @@
8461     };
8462     return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
8463     }
8464     -#else
8465     -static inline int notify_page_fault(enum die_val val, const char *str,
8466     - struct pt_regs *regs, long err, int trap, int sig)
8467     -{
8468     - return NOTIFY_DONE;
8469     -}
8470     -#endif
8471    
8472     void bust_spinlocks(int yes)
8473     {
8474     @@ -102,7 +96,7 @@
8475     static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
8476     unsigned long error_code)
8477     {
8478     - unsigned char *instr;
8479     + unsigned char __user *instr;
8480     int scan_more = 1;
8481     int prefetch = 0;
8482     unsigned char *max_instr;
8483     @@ -111,7 +105,7 @@
8484     if (error_code & PF_INSTR)
8485     return 0;
8486    
8487     - instr = (unsigned char *)convert_rip_to_linear(current, regs);
8488     + instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
8489     max_instr = instr + 15;
8490    
8491     if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
8492     @@ -122,7 +116,7 @@
8493     unsigned char instr_hi;
8494     unsigned char instr_lo;
8495    
8496     - if (__get_user(opcode, instr))
8497     + if (__get_user(opcode, (char __user *)instr))
8498     break;
8499    
8500     instr_hi = opcode & 0xf0;
8501     @@ -160,7 +154,7 @@
8502     case 0x00:
8503     /* Prefetch instruction is 0x0F0D or 0x0F18 */
8504     scan_more = 0;
8505     - if (__get_user(opcode, instr))
8506     + if (__get_user(opcode, (char __user *)instr))
8507     break;
8508     prefetch = (instr_lo == 0xF) &&
8509     (opcode == 0x0D || opcode == 0x18);
8510     @@ -176,7 +170,7 @@
8511     static int bad_address(void *p)
8512     {
8513     unsigned long dummy;
8514     - return __get_user(dummy, (unsigned long *)p);
8515     + return __get_user(dummy, (unsigned long __user *)p);
8516     }
8517    
8518     void dump_pagetable(unsigned long address)
8519     @@ -248,7 +242,7 @@
8520    
8521     int unhandled_signal(struct task_struct *tsk, int sig)
8522     {
8523     - if (tsk->pid == 1)
8524     + if (is_init(tsk))
8525     return 1;
8526     if (tsk->ptrace & PT_PTRACED)
8527     return 0;
8528     @@ -300,7 +294,7 @@
8529     if (pgd_none(*pgd))
8530     set_pgd(pgd, *pgd_ref);
8531     else
8532     - BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
8533     + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
8534    
8535     /* Below here mismatches are bugs because these lower tables
8536     are shared */
8537     @@ -309,7 +303,7 @@
8538     pud_ref = pud_offset(pgd_ref, address);
8539     if (pud_none(*pud_ref))
8540     return -1;
8541     - if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref))
8542     + if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
8543     BUG();
8544     pmd = pmd_offset(pud, address);
8545     pmd_ref = pmd_offset(pud_ref, address);
8546     @@ -531,7 +525,7 @@
8547     case PF_PROT: /* read, present */
8548     goto bad_area;
8549     case 0: /* read, not present */
8550     - if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
8551     + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
8552     goto bad_area;
8553     }
8554    
8555     @@ -647,7 +641,7 @@
8556     */
8557     out_of_memory:
8558     up_read(&mm->mmap_sem);
8559     - if (current->pid == 1) {
8560     + if (is_init(current)) {
8561     yield();
8562     goto again;
8563     }
8564     @@ -702,7 +696,7 @@
8565     if (pgd_none(*pgd))
8566     set_pgd(pgd, *pgd_ref);
8567     else
8568     - BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
8569     + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
8570     }
8571     spin_unlock(&pgd_lock);
8572     set_bit(pgd_index(address), insync);
8573 niro 611 diff -Naur linux-2.6.25/arch/x86/mm/highmem_32-xen.c linux-2.6.25-xen/arch/x86/mm/highmem_32-xen.c
8574     --- linux-2.6.25/arch/x86/mm/highmem_32-xen.c 2008-05-23 20:51:11.000000000 +0200
8575     +++ linux-2.6.25-xen/arch/x86/mm/highmem_32-xen.c 2008-05-23 20:39:03.000000000 +0200
8576 niro 609 @@ -38,11 +38,9 @@
8577    
8578     idx = type + KM_TYPE_NR*smp_processor_id();
8579     vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
8580     -#ifdef CONFIG_DEBUG_HIGHMEM
8581     if (!pte_none(*(kmap_pte-idx)))
8582     BUG();
8583     -#endif
8584     - set_pte_at_sync(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
8585     + set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
8586    
8587     return (void*) vaddr;
8588     }
8589     @@ -62,36 +60,26 @@
8590    
8591     void kunmap_atomic(void *kvaddr, enum km_type type)
8592     {
8593     -#if defined(CONFIG_DEBUG_HIGHMEM) || defined(CONFIG_XEN)
8594     unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
8595     enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
8596    
8597     - if (vaddr < FIXADDR_START) { // FIXME
8598     +#ifdef CONFIG_DEBUG_HIGHMEM
8599     + if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) {
8600     dec_preempt_count();
8601     preempt_check_resched();
8602     return;
8603     }
8604     -#endif
8605    
8606     -#if defined(CONFIG_DEBUG_HIGHMEM)
8607     if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
8608     BUG();
8609     -
8610     - /*
8611     - * force other mappings to Oops if they'll try to access
8612     - * this pte without first remap it
8613     - */
8614     - pte_clear(&init_mm, vaddr, kmap_pte-idx);
8615     - __flush_tlb_one(vaddr);
8616     -#elif defined(CONFIG_XEN)
8617     +#endif
8618     /*
8619     - * We must ensure there are no dangling pagetable references when
8620     - * returning memory to Xen (decrease_reservation).
8621     - * XXX TODO: We could make this faster by only zapping when
8622     - * kmap_flush_unused is called but that is trickier and more invasive.
8623     + * Force other mappings to Oops if they'll try to access this pte
8624     + * without first remap it. Keeping stale mappings around is a bad idea
8625     + * also, in case the page changes cacheability attributes or becomes
8626     + * a protected page in a hypervisor.
8627     */
8628     - pte_clear(&init_mm, vaddr, kmap_pte-idx);
8629     -#endif
8630     + kpte_clear_flush(kmap_pte-idx, vaddr);
8631    
8632     dec_preempt_count();
8633     preempt_check_resched();
8634     @@ -110,7 +98,6 @@
8635     idx = type + KM_TYPE_NR*smp_processor_id();
8636     vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
8637     set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
8638     - __flush_tlb_one(vaddr);
8639    
8640     return (void*) vaddr;
8641     }
8642 niro 611 diff -Naur linux-2.6.25/arch/x86/mm/hypervisor.c linux-2.6.25-xen/arch/x86/mm/hypervisor.c
8643     --- linux-2.6.25/arch/x86/mm/hypervisor.c 2008-05-23 20:51:11.000000000 +0200
8644     +++ linux-2.6.25-xen/arch/x86/mm/hypervisor.c 2008-05-23 20:39:03.000000000 +0200
8645 niro 609 @@ -569,7 +569,8 @@
8646     #define MAX_BATCHED_FULL_PTES 32
8647    
8648     int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
8649     - unsigned long addr, unsigned long end, pgprot_t newprot)
8650     + unsigned long addr, unsigned long end, pgprot_t newprot,
8651     + int dirty_accountable)
8652     {
8653     int rc = 0, i = 0;
8654     mmu_update_t u[MAX_BATCHED_FULL_PTES];
8655     @@ -582,10 +583,14 @@
8656     pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
8657     do {
8658     if (pte_present(*pte)) {
8659     + pte_t ptent = pte_modify(*pte, newprot);
8660     +
8661     + if (dirty_accountable && pte_dirty(ptent))
8662     + ptent = pte_mkwrite(ptent);
8663     u[i].ptr = (__pmd_val(*pmd) & PHYSICAL_PAGE_MASK)
8664     | ((unsigned long)pte & ~PAGE_MASK)
8665     | MMU_PT_UPDATE_PRESERVE_AD;
8666     - u[i].val = __pte_val(pte_modify(*pte, newprot));
8667     + u[i].val = __pte_val(ptent);
8668     if (++i == MAX_BATCHED_FULL_PTES) {
8669     if ((rc = HYPERVISOR_mmu_update(
8670     &u[0], i, NULL, DOMID_SELF)) != 0)
8671 niro 611 diff -Naur linux-2.6.25/arch/x86/mm/init_32-xen.c linux-2.6.25-xen/arch/x86/mm/init_32-xen.c
8672     --- linux-2.6.25/arch/x86/mm/init_32-xen.c 2008-05-23 20:51:11.000000000 +0200
8673     +++ linux-2.6.25-xen/arch/x86/mm/init_32-xen.c 2008-05-23 20:39:03.000000000 +0200
8674 niro 609 @@ -464,16 +464,22 @@
8675     * on Enable
8676     * off Disable
8677     */
8678     -void __init noexec_setup(const char *str)
8679     +static int __init noexec_setup(char *str)
8680     {
8681     - if (!strncmp(str, "on",2) && cpu_has_nx) {
8682     - __supported_pte_mask |= _PAGE_NX;
8683     - disable_nx = 0;
8684     - } else if (!strncmp(str,"off",3)) {
8685     + if (!str || !strcmp(str, "on")) {
8686     + if (cpu_has_nx) {
8687     + __supported_pte_mask |= _PAGE_NX;
8688     + disable_nx = 0;
8689     + }
8690     + } else if (!strcmp(str,"off")) {
8691     disable_nx = 1;
8692     __supported_pte_mask &= ~_PAGE_NX;
8693     - }
8694     + } else
8695     + return -EINVAL;
8696     +
8697     + return 0;
8698     }
8699     +early_param("noexec", noexec_setup);
8700    
8701     int nx_enabled = 0;
8702     #ifdef CONFIG_X86_PAE
8703     @@ -516,6 +522,7 @@
8704     pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
8705     else
8706     pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
8707     + pte_update_defer(&init_mm, vaddr, pte);
8708     __flush_tlb_all();
8709     out:
8710     return ret;
8711     @@ -598,18 +605,6 @@
8712     }
8713     }
8714    
8715     -static void __init set_max_mapnr_init(void)
8716     -{
8717     -#ifdef CONFIG_HIGHMEM
8718     - num_physpages = highend_pfn;
8719     -#else
8720     - num_physpages = max_low_pfn;
8721     -#endif
8722     -#ifdef CONFIG_FLATMEM
8723     - max_mapnr = num_physpages;
8724     -#endif
8725     -}
8726     -
8727     static struct kcore_list kcore_mem, kcore_vmalloc;
8728    
8729     void __init mem_init(void)
8730     @@ -630,8 +625,7 @@
8731     #endif
8732    
8733     #ifdef CONFIG_FLATMEM
8734     - if (!mem_map)
8735     - BUG();
8736     + BUG_ON(!mem_map);
8737     #endif
8738    
8739     bad_ppro = ppro_with_ram_bug();
8740     @@ -646,17 +640,6 @@
8741     }
8742     #endif
8743    
8744     - set_max_mapnr_init();
8745     -
8746     -#ifdef CONFIG_HIGHMEM
8747     - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
8748     -#else
8749     - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
8750     -#endif
8751     - printk("vmalloc area: %lx-%lx, maxmem %lx\n",
8752     - VMALLOC_START,VMALLOC_END,MAXMEM);
8753     - BUG_ON(VMALLOC_START > VMALLOC_END);
8754     -
8755     /* this will put all low memory onto the freelists */
8756     totalram_pages += free_all_bootmem();
8757     /* XEN: init and count low-mem pages outside initial allocation. */
8758     @@ -694,6 +677,48 @@
8759     (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
8760     );
8761    
8762     +#if 1 /* double-sanity-check paranoia */
8763     + printk("virtual kernel memory layout:\n"
8764     + " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
8765     +#ifdef CONFIG_HIGHMEM
8766     + " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
8767     +#endif
8768     + " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
8769     + " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
8770     + " .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
8771     + " .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
8772     + " .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
8773     + FIXADDR_START, FIXADDR_TOP,
8774     + (FIXADDR_TOP - FIXADDR_START) >> 10,
8775     +
8776     +#ifdef CONFIG_HIGHMEM
8777     + PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
8778     + (LAST_PKMAP*PAGE_SIZE) >> 10,
8779     +#endif
8780     +
8781     + VMALLOC_START, VMALLOC_END,
8782     + (VMALLOC_END - VMALLOC_START) >> 20,
8783     +
8784     + (unsigned long)__va(0), (unsigned long)high_memory,
8785     + ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
8786     +
8787     + (unsigned long)&__init_begin, (unsigned long)&__init_end,
8788     + ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10,
8789     +
8790     + (unsigned long)&_etext, (unsigned long)&_edata,
8791     + ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
8792     +
8793     + (unsigned long)&_text, (unsigned long)&_etext,
8794     + ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
8795     +
8796     +#ifdef CONFIG_HIGHMEM
8797     + BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
8798     + BUG_ON(VMALLOC_END > PKMAP_BASE);
8799     +#endif
8800     + BUG_ON(VMALLOC_START > VMALLOC_END);
8801     + BUG_ON((unsigned long)high_memory > VMALLOC_START);
8802     +#endif /* double-sanity-check paranoia */
8803     +
8804     #ifdef CONFIG_X86_PAE
8805     if (!cpu_has_pae)
8806     panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
8807     @@ -724,7 +749,7 @@
8808     int arch_add_memory(int nid, u64 start, u64 size)
8809     {
8810     struct pglist_data *pgdata = &contig_page_data;
8811     - struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
8812     + struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
8813     unsigned long start_pfn = start >> PAGE_SHIFT;
8814     unsigned long nr_pages = size >> PAGE_SHIFT;
8815    
8816 niro 611 diff -Naur linux-2.6.25/arch/x86/mm/init_64-xen.c linux-2.6.25-xen/arch/x86/mm/init_64-xen.c
8817     --- linux-2.6.25/arch/x86/mm/init_64-xen.c 2008-05-23 20:51:11.000000000 +0200
8818     +++ linux-2.6.25-xen/arch/x86/mm/init_64-xen.c 2008-05-23 20:39:03.000000000 +0200
8819 niro 609 @@ -61,8 +61,6 @@
8820    
8821     extern unsigned long *contiguous_bitmap;
8822    
8823     -static unsigned long dma_reserve __initdata;
8824     -
8825     DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
8826     extern unsigned long start_pfn;
8827    
8828     @@ -416,7 +414,6 @@
8829    
8830     /* actually usually some more */
8831     if (size >= LARGE_PAGE_SIZE) {
8832     - printk("SMBIOS area too long %lu\n", size);
8833     return NULL;
8834     }
8835     set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
8836     @@ -438,13 +435,15 @@
8837     #endif
8838    
8839     static void __meminit
8840     -phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
8841     +phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
8842     {
8843     - int i, k;
8844     + int i = pmd_index(address);
8845    
8846     - for (i = 0; i < PTRS_PER_PMD; pmd++, i++) {
8847     + for (; i < PTRS_PER_PMD; i++) {
8848     unsigned long pte_phys;
8849     + pmd_t *pmd = pmd_page + i;
8850     pte_t *pte, *pte_save;
8851     + int k;
8852    
8853     if (address >= end) {
8854     if (!after_bootmem)
8855     @@ -452,6 +451,12 @@
8856     set_pmd(pmd, __pmd(0));
8857     break;
8858     }
8859     +
8860     + if (__pmd_val(*pmd)) {
8861     + address += PMD_SIZE;
8862     + continue;
8863     + }
8864     +
8865     pte = alloc_static_page(&pte_phys);
8866     pte_save = pte;
8867     for (k = 0; k < PTRS_PER_PTE; pte++, k++, address += PTE_SIZE) {
8868     @@ -474,40 +479,35 @@
8869     static void __meminit
8870     phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
8871     {
8872     - pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
8873     -
8874     - if (pmd_none(*pmd)) {
8875     - spin_lock(&init_mm.page_table_lock);
8876     - phys_pmd_init(pmd, address, end);
8877     - spin_unlock(&init_mm.page_table_lock);
8878     - __flush_tlb_all();
8879     - }
8880     + pmd_t *pmd = pmd_offset(pud,0);
8881     + spin_lock(&init_mm.page_table_lock);
8882     + phys_pmd_init(pmd, address, end);
8883     + spin_unlock(&init_mm.page_table_lock);
8884     + __flush_tlb_all();
8885     }
8886    
8887     -static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
8888     +static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
8889     {
8890     - long i = pud_index(address);
8891     -
8892     - pud = pud + i;
8893     -
8894     - if (after_bootmem && pud_val(*pud)) {
8895     - phys_pmd_update(pud, address, end);
8896     - return;
8897     - }
8898     + int i = pud_index(addr);
8899    
8900     - for (; i < PTRS_PER_PUD; pud++, i++) {
8901     - unsigned long paddr, pmd_phys;
8902     + for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
8903     + unsigned long pmd_phys;
8904     + pud_t *pud = pud_page + pud_index(addr);
8905     pmd_t *pmd;
8906    
8907     - paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
8908     - if (paddr >= end)
8909     + if (addr >= end)
8910     break;
8911    
8912     + if (__pud_val(*pud)) {
8913     + phys_pmd_update(pud, addr, end);
8914     + continue;
8915     + }
8916     +
8917     pmd = alloc_static_page(&pmd_phys);
8918     early_make_page_readonly(pmd, XENFEAT_writable_page_tables);
8919     spin_lock(&init_mm.page_table_lock);
8920     set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
8921     - phys_pmd_init(pmd, paddr, end);
8922     + phys_pmd_init(pmd, addr, end);
8923     spin_unlock(&init_mm.page_table_lock);
8924     }
8925     __flush_tlb();
8926     @@ -771,69 +771,18 @@
8927     #endif
8928     }
8929    
8930     -/* Compute zone sizes for the DMA and DMA32 zones in a node. */
8931     -__init void
8932     -size_zones(unsigned long *z, unsigned long *h,
8933     - unsigned long start_pfn, unsigned long end_pfn)
8934     -{
8935     - int i;
8936     - unsigned long w;
8937     -
8938     - for (i = 0; i < MAX_NR_ZONES; i++)
8939     - z[i] = 0;
8940     -
8941     - if (start_pfn < MAX_DMA_PFN)
8942     - z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
8943     - if (start_pfn < MAX_DMA32_PFN) {
8944     - unsigned long dma32_pfn = MAX_DMA32_PFN;
8945     - if (dma32_pfn > end_pfn)
8946     - dma32_pfn = end_pfn;
8947     - z[ZONE_DMA32] = dma32_pfn - start_pfn;
8948     - }
8949     - z[ZONE_NORMAL] = end_pfn - start_pfn;
8950     -
8951     - /* Remove lower zones from higher ones. */
8952     - w = 0;
8953     - for (i = 0; i < MAX_NR_ZONES; i++) {
8954     - if (z[i])
8955     - z[i] -= w;
8956     - w += z[i];
8957     - }
8958     -
8959     - /* Compute holes */
8960     - w = start_pfn;
8961     - for (i = 0; i < MAX_NR_ZONES; i++) {
8962     - unsigned long s = w;
8963     - w += z[i];
8964     - h[i] = e820_hole_size(s, w);
8965     - }
8966     -
8967     - /* Add the space pace needed for mem_map to the holes too. */
8968     - for (i = 0; i < MAX_NR_ZONES; i++)
8969     - h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
8970     -
8971     - /* The 16MB DMA zone has the kernel and other misc mappings.
8972     - Account them too */
8973     - if (h[ZONE_DMA]) {
8974     - h[ZONE_DMA] += dma_reserve;
8975     - if (h[ZONE_DMA] >= z[ZONE_DMA]) {
8976     - printk(KERN_WARNING
8977     - "Kernel too large and filling up ZONE_DMA?\n");
8978     - h[ZONE_DMA] = z[ZONE_DMA];
8979     - }
8980     - }
8981     -}
8982     -
8983     #ifndef CONFIG_NUMA
8984     void __init paging_init(void)
8985     {
8986     - unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
8987     + unsigned long max_zone_pfns[MAX_NR_ZONES];
8988     + memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
8989     + max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
8990     + max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
8991     + max_zone_pfns[ZONE_NORMAL] = end_pfn;
8992    
8993     memory_present(0, 0, end_pfn);
8994     sparse_init();
8995     - size_zones(zones, holes, 0, end_pfn);
8996     - free_area_init_node(0, NODE_DATA(0), zones,
8997     - __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
8998     + free_area_init_nodes(max_zone_pfns);
8999    
9000     init_mm.context.pinned = 1;
9001     }
9002     @@ -887,36 +836,23 @@
9003    
9004     #ifdef CONFIG_MEMORY_HOTPLUG
9005     /*
9006     - * XXX: memory_add_physaddr_to_nid() is to find node id from physical address
9007     - * via probe interface of sysfs. If acpi notifies hot-add event, then it
9008     - * can tell node id by searching dsdt. But, probe interface doesn't have
9009     - * node id. So, return 0 as node id at this time.
9010     - */
9011     -#ifdef CONFIG_NUMA
9012     -int memory_add_physaddr_to_nid(u64 start)
9013     -{
9014     - return 0;
9015     -}
9016     -#endif
9017     -
9018     -/*
9019     * Memory is added always to NORMAL zone. This means you will never get
9020     * additional DMA/DMA32 memory.
9021     */
9022     int arch_add_memory(int nid, u64 start, u64 size)
9023     {
9024     struct pglist_data *pgdat = NODE_DATA(nid);
9025     - struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
9026     + struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
9027     unsigned long start_pfn = start >> PAGE_SHIFT;
9028     unsigned long nr_pages = size >> PAGE_SHIFT;
9029     int ret;
9030    
9031     + init_memory_mapping(start, (start + size -1));
9032     +
9033     ret = __add_pages(zone, start_pfn, nr_pages);
9034     if (ret)
9035     goto error;
9036    
9037     - init_memory_mapping(start, (start + size -1));
9038     -
9039     return ret;
9040     error:
9041     printk("%s: Problem encountered in __add_pages!\n", __func__);
9042     @@ -930,7 +866,17 @@
9043     }
9044     EXPORT_SYMBOL_GPL(remove_memory);
9045    
9046     -#else /* CONFIG_MEMORY_HOTPLUG */
9047     +#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
9048     +int memory_add_physaddr_to_nid(u64 start)
9049     +{
9050     + return 0;
9051     +}
9052     +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
9053     +#endif
9054     +
9055     +#endif /* CONFIG_MEMORY_HOTPLUG */
9056     +
9057     +#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
9058     /*
9059     * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
9060     * just online the pages.
9061     @@ -956,7 +902,7 @@
9062     }
9063     return err;
9064     }
9065     -#endif /* CONFIG_MEMORY_HOTPLUG */
9066     +#endif
9067    
9068     static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
9069     kcore_vsyscall;
9070     @@ -973,12 +919,6 @@
9071    
9072     pci_iommu_alloc();
9073    
9074     - /* How many end-of-memory variables you have, grandma! */
9075     - max_low_pfn = end_pfn;
9076     - max_pfn = end_pfn;
9077     - num_physpages = end_pfn;
9078     - high_memory = (void *) __va(end_pfn * PAGE_SIZE);
9079     -
9080     /* clear the zero-page */
9081     memset(empty_zero_page, 0, PAGE_SIZE);
9082    
9083     @@ -996,7 +936,8 @@
9084     init_page_count(pfn_to_page(pfn));
9085     totalram_pages++;
9086     }
9087     - reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
9088     + reservedpages = end_pfn - totalram_pages -
9089     + absent_pages_in_range(0, end_pfn);
9090    
9091     after_bootmem = 1;
9092    
9093     @@ -1103,15 +1044,34 @@
9094    
9095     void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
9096     {
9097     - /* Should check here against the e820 map to avoid double free */
9098     #ifdef CONFIG_NUMA
9099     int nid = phys_to_nid(phys);
9100     +#endif
9101     + unsigned long pfn = phys >> PAGE_SHIFT;
9102     + if (pfn >= end_pfn) {
9103     + /* This can happen with kdump kernels when accessing firmware
9104     + tables. */
9105     + if (pfn < end_pfn_map)
9106     + return;
9107     + printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
9108     + phys, len);
9109     + return;
9110     + }
9111     +
9112     + /* Should check here against the e820 map to avoid double free */
9113     +#ifdef CONFIG_NUMA
9114     reserve_bootmem_node(NODE_DATA(nid), phys, len);
9115     #else
9116     reserve_bootmem(phys, len);
9117     #endif
9118     - if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
9119     +#ifndef CONFIG_XEN
9120     + if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
9121     + static unsigned long dma_reserve __initdata;
9122     +
9123     dma_reserve += len / PAGE_SIZE;
9124     + set_dma_reserve(dma_reserve);
9125     + }
9126     +#endif
9127     }
9128    
9129     int kern_addr_valid(unsigned long addr)
9130 niro 611 diff -Naur linux-2.6.25/arch/x86/mm/ioremap_32-xen.c linux-2.6.25-xen/arch/x86/mm/ioremap_32-xen.c
9131     --- linux-2.6.25/arch/x86/mm/ioremap_32-xen.c 2008-05-23 20:51:11.000000000 +0200
9132     +++ linux-2.6.25-xen/arch/x86/mm/ioremap_32-xen.c 2008-05-23 20:39:03.000000000 +0200
9133 niro 609 @@ -12,7 +12,7 @@
9134     #include <linux/init.h>
9135     #include <linux/slab.h>
9136     #include <linux/module.h>
9137     -#include <asm/io.h>
9138     +#include <linux/io.h>
9139     #include <asm/fixmap.h>
9140     #include <asm/cacheflush.h>
9141     #include <asm/tlbflush.h>
9142     @@ -118,7 +118,7 @@
9143     if (domid == DOMID_SELF)
9144     return -EINVAL;
9145    
9146     - vma->vm_flags |= VM_IO | VM_RESERVED;
9147     + vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
9148    
9149     vma->vm_mm->context.has_foreign_mappings = 1;
9150    
9151     @@ -203,6 +203,7 @@
9152     void __iomem * addr;
9153     struct vm_struct * area;
9154     unsigned long offset, last_addr;
9155     + pgprot_t prot;
9156     domid_t domid = DOMID_IO;
9157    
9158     /* Don't allow wraparound or zero size */
9159     @@ -234,6 +235,8 @@
9160     domid = DOMID_SELF;
9161     }
9162    
9163     + prot = __pgprot(_KERNPG_TABLE | flags);
9164     +
9165     /*
9166     * Mappings have to be page-aligned
9167     */
9168     @@ -249,10 +252,9 @@
9169     return NULL;
9170     area->phys_addr = phys_addr;
9171     addr = (void __iomem *) area->addr;
9172     - flags |= _KERNPG_TABLE;
9173     if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr,
9174     phys_addr>>PAGE_SHIFT,
9175     - size, __pgprot(flags), domid)) {
9176     + size, prot, domid)) {
9177     vunmap((void __force *) addr);
9178     return NULL;
9179     }
9180 niro 611 diff -Naur linux-2.6.25/arch/x86/mm/pageattr_64-xen.c linux-2.6.25-xen/arch/x86/mm/pageattr_64-xen.c
9181     --- linux-2.6.25/arch/x86/mm/pageattr_64-xen.c 2008-05-23 20:51:11.000000000 +0200
9182     +++ linux-2.6.25-xen/arch/x86/mm/pageattr_64-xen.c 2008-05-23 20:39:03.000000000 +0200
9183 niro 609 @@ -371,8 +371,8 @@
9184     BUG_ON(pud_none(*pud));
9185     pmd = pmd_offset(pud, address);
9186     BUG_ON(__pmd_val(*pmd) & _PAGE_PSE);
9187     - pgprot_val(ref_prot) |= _PAGE_PSE;
9188     large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot);
9189     + large_pte = pte_mkhuge(large_pte);
9190     set_pte((pte_t *)pmd, large_pte);
9191     }
9192    
9193     @@ -382,32 +382,28 @@
9194     {
9195     pte_t *kpte;
9196     struct page *kpte_page;
9197     - unsigned kpte_flags;
9198     pgprot_t ref_prot2;
9199     kpte = lookup_address(address);
9200     if (!kpte) return 0;
9201     kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
9202     - kpte_flags = pte_val(*kpte);
9203     if (pgprot_val(prot) != pgprot_val(ref_prot)) {
9204     - if ((kpte_flags & _PAGE_PSE) == 0) {
9205     + if (!pte_huge(*kpte)) {
9206     set_pte(kpte, pfn_pte(pfn, prot));
9207     } else {
9208     /*
9209     * split_large_page will take the reference for this
9210     * change_page_attr on the split page.
9211     */
9212     -
9213     struct page *split;
9214     - ref_prot2 = __pgprot(pgprot_val(pte_pgprot(*lookup_address(address))) & ~(1<<_PAGE_BIT_PSE));
9215     -
9216     + ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
9217     split = split_large_page(address, prot, ref_prot2);
9218     if (!split)
9219     return -ENOMEM;
9220     - set_pte(kpte,mk_pte(split, ref_prot2));
9221     + set_pte(kpte, mk_pte(split, ref_prot2));
9222     kpte_page = split;
9223     - }
9224     + }
9225     page_private(kpte_page)++;
9226     - } else if ((kpte_flags & _PAGE_PSE) == 0) {
9227     + } else if (!pte_huge(*kpte)) {
9228     set_pte(kpte, pfn_pte(pfn, ref_prot));
9229     BUG_ON(page_private(kpte_page) == 0);
9230     page_private(kpte_page)--;
9231     @@ -464,10 +460,12 @@
9232     * lowmem */
9233     if (__pa(address) < KERNEL_TEXT_SIZE) {
9234     unsigned long addr2;
9235     - pgprot_t prot2 = prot;
9236     + pgprot_t prot2;
9237     addr2 = __START_KERNEL_map + __pa(address);
9238     - pgprot_val(prot2) &= ~_PAGE_NX;
9239     - err = __change_page_attr(addr2, pfn, prot2, PAGE_KERNEL_EXEC);
9240     + /* Make sure the kernel mappings stay executable */
9241     + prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
9242     + err = __change_page_attr(addr2, pfn, prot2,
9243     + PAGE_KERNEL_EXEC);
9244     }
9245     }
9246     up_write(&init_mm.mmap_sem);
9247 niro 611 diff -Naur linux-2.6.25/arch/x86/mm/pgtable_32-xen.c linux-2.6.25-xen/arch/x86/mm/pgtable_32-xen.c
9248     --- linux-2.6.25/arch/x86/mm/pgtable_32-xen.c 2008-05-23 20:51:11.000000000 +0200
9249     +++ linux-2.6.25-xen/arch/x86/mm/pgtable_32-xen.c 2008-05-23 20:39:03.000000000 +0200
9250 niro 609 @@ -68,7 +68,9 @@
9251     printk(KERN_INFO "%lu pages writeback\n",
9252     global_page_state(NR_WRITEBACK));
9253     printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
9254     - printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB));
9255     + printk(KERN_INFO "%lu pages slab\n",
9256     + global_page_state(NR_SLAB_RECLAIMABLE) +
9257     + global_page_state(NR_SLAB_UNRECLAIMABLE));
9258     printk(KERN_INFO "%lu pages pagetables\n",
9259     global_page_state(NR_PAGETABLE));
9260     }
9261     @@ -108,18 +110,11 @@
9262     __flush_tlb_one(vaddr);
9263     }
9264    
9265     -static int nr_fixmaps = 0;
9266     +static int fixmaps;
9267     unsigned long hypervisor_virt_start = HYPERVISOR_VIRT_START;
9268     -unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE);
9269     +unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - PAGE_SIZE);
9270     EXPORT_SYMBOL(__FIXADDR_TOP);
9271    
9272     -void __init set_fixaddr_top(unsigned long top)
9273     -{
9274     - BUG_ON(nr_fixmaps > 0);
9275     - hypervisor_virt_start = top;
9276     - __FIXADDR_TOP = hypervisor_virt_start - 2 * PAGE_SIZE;
9277     -}
9278     -
9279     void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
9280     {
9281     unsigned long address = __fix_to_virt(idx);
9282     @@ -141,7 +136,21 @@
9283     if (HYPERVISOR_update_va_mapping(address, pte,
9284     UVMF_INVLPG|UVMF_ALL))
9285     BUG();
9286     - nr_fixmaps++;
9287     + fixmaps++;
9288     +}
9289     +
9290     +/**
9291     + * reserve_top_address - reserves a hole in the top of kernel address space
9292     + * @reserve - size of hole to reserve
9293     + *
9294     + * Can be used to relocate the fixmap area and poke a hole in the top
9295     + * of kernel address space to make room for a hypervisor.
9296     + */
9297     +void __init reserve_top_address(unsigned long reserve)
9298     +{
9299     + BUG_ON(fixmaps > 0);
9300     + __FIXADDR_TOP = -reserve - PAGE_SIZE;
9301     + __VMALLOC_RESERVE += reserve;
9302     }
9303    
9304     pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
9305 niro 611 diff -Naur linux-2.6.25/arch/x86/pci/irq-xen.c linux-2.6.25-xen/arch/x86/pci/irq-xen.c
9306     --- linux-2.6.25/arch/x86/pci/irq-xen.c 2008-05-23 20:51:11.000000000 +0200
9307     +++ linux-2.6.25-xen/arch/x86/pci/irq-xen.c 2008-05-23 20:39:03.000000000 +0200
9308 niro 609 @@ -991,10 +991,6 @@
9309     pci_name(bridge), 'A' + pin, irq);
9310     }
9311     if (irq >= 0) {
9312     - if (use_pci_vector() &&
9313     - !platform_legacy_irq(irq))
9314     - irq = IO_APIC_VECTOR(irq);
9315     -
9316     printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
9317     pci_name(dev), 'A' + pin, irq);
9318     dev->irq = irq;
9319     @@ -1155,10 +1151,6 @@
9320     }
9321     dev = temp_dev;
9322     if (irq >= 0) {
9323     -#ifdef CONFIG_PCI_MSI
9324     - if (!platform_legacy_irq(irq))
9325     - irq = IO_APIC_VECTOR(irq);
9326     -#endif
9327     printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
9328     pci_name(dev), 'A' + pin, irq);
9329     dev->irq = irq;
9330     @@ -1179,33 +1171,3 @@
9331     }
9332     return 0;
9333     }
9334     -
9335     -int pci_vector_resources(int last, int nr_released)
9336     -{
9337     - int count = nr_released;
9338     -
9339     - int next = last;
9340     - int offset = (last % 8);
9341     -
9342     - while (next < FIRST_SYSTEM_VECTOR) {
9343     - next += 8;
9344     -#ifdef CONFIG_X86_64
9345     - if (next == IA32_SYSCALL_VECTOR)
9346     - continue;
9347     -#else
9348     - if (next == SYSCALL_VECTOR)
9349     - continue;
9350     -#endif
9351     - count++;
9352     - if (next >= FIRST_SYSTEM_VECTOR) {
9353     - if (offset%8) {
9354     - next = FIRST_DEVICE_VECTOR + offset;
9355     - offset++;
9356     - continue;
9357     - }
9358     - count--;
9359     - }
9360     - }
9361     -
9362     - return count;
9363     -}
9364 niro 611 diff -Naur linux-2.6.25/drivers/char/tpm/tpm_xen.c linux-2.6.25-xen/drivers/char/tpm/tpm_xen.c
9365     --- linux-2.6.25/drivers/char/tpm/tpm_xen.c 2008-05-23 20:51:15.000000000 +0200
9366     +++ linux-2.6.25-xen/drivers/char/tpm/tpm_xen.c 2008-05-23 20:39:03.000000000 +0200
9367 niro 609 @@ -85,8 +85,7 @@
9368    
9369     /* local function prototypes */
9370     static irqreturn_t tpmif_int(int irq,
9371     - void *tpm_priv,
9372     - struct pt_regs *ptregs);
9373     + void *tpm_priv);
9374     static void tpmif_rx_action(unsigned long unused);
9375     static int tpmif_connect(struct xenbus_device *dev,
9376     struct tpm_private *tp,
9377     @@ -559,7 +558,7 @@
9378     }
9379    
9380    
9381     -static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
9382     +static irqreturn_t tpmif_int(int irq, void *tpm_priv)
9383     {
9384     struct tpm_private *tp = tpm_priv;
9385     unsigned long flags;
9386 niro 611 diff -Naur linux-2.6.25/drivers/pci/Kconfig linux-2.6.25-xen/drivers/pci/Kconfig
9387     --- linux-2.6.25/drivers/pci/Kconfig 2008-04-17 04:49:44.000000000 +0200
9388     +++ linux-2.6.25-xen/drivers/pci/Kconfig 2008-05-23 20:39:03.000000000 +0200
9389 niro 609 @@ -45,7 +45,7 @@
9390     config HT_IRQ
9391     bool "Interrupts on hypertransport devices"
9392     default y
9393     - depends on PCI && X86_LOCAL_APIC && X86_IO_APIC
9394     + depends on PCI && X86_LOCAL_APIC && X86_IO_APIC && !XEN
9395     help
9396     This allows native hypertransport devices to use interrupts.
9397    
9398 niro 611 diff -Naur linux-2.6.25/drivers/xen/balloon/balloon.c linux-2.6.25-xen/drivers/xen/balloon/balloon.c
9399     --- linux-2.6.25/drivers/xen/balloon/balloon.c 2008-05-23 20:51:11.000000000 +0200
9400     +++ linux-2.6.25-xen/drivers/xen/balloon/balloon.c 2008-05-23 20:39:03.000000000 +0200
9401 niro 609 @@ -84,7 +84,7 @@
9402     /* VM /proc information for memory */
9403     extern unsigned long totalram_pages;
9404    
9405     -#ifndef MODULE
9406     +#if !defined(MODULE) && defined(CONFIG_HIGHMEM)
9407     extern unsigned long totalhigh_pages;
9408     #define inc_totalhigh_pages() (totalhigh_pages++)
9409     #define dec_totalhigh_pages() (totalhigh_pages--)
9410 niro 611 diff -Naur linux-2.6.25/drivers/xen/blkback/blkback.c linux-2.6.25-xen/drivers/xen/blkback/blkback.c
9411     --- linux-2.6.25/drivers/xen/blkback/blkback.c 2008-05-23 20:51:11.000000000 +0200
9412     +++ linux-2.6.25-xen/drivers/xen/blkback/blkback.c 2008-05-23 20:39:03.000000000 +0200
9413 niro 609 @@ -288,7 +288,7 @@
9414     wake_up(&blkif->wq);
9415     }
9416    
9417     -irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
9418     +irqreturn_t blkif_be_int(int irq, void *dev_id)
9419     {
9420     blkif_notify_work(dev_id);
9421     return IRQ_HANDLED;
9422 niro 611 diff -Naur linux-2.6.25/drivers/xen/blkback/common.h linux-2.6.25-xen/drivers/xen/blkback/common.h
9423     --- linux-2.6.25/drivers/xen/blkback/common.h 2008-05-23 20:51:11.000000000 +0200
9424     +++ linux-2.6.25-xen/drivers/xen/blkback/common.h 2008-05-23 20:39:03.000000000 +0200
9425 niro 609 @@ -130,7 +130,7 @@
9426    
9427     void blkif_xenbus_init(void);
9428    
9429     -irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
9430     +irqreturn_t blkif_be_int(int irq, void *dev_id);
9431     int blkif_schedule(void *arg);
9432    
9433     int blkback_barrier(struct xenbus_transaction xbt,
9434 niro 611 diff -Naur linux-2.6.25/drivers/xen/blkfront/blkfront.c linux-2.6.25-xen/drivers/xen/blkfront/blkfront.c
9435     --- linux-2.6.25/drivers/xen/blkfront/blkfront.c 2008-05-23 20:51:11.000000000 +0200
9436     +++ linux-2.6.25-xen/drivers/xen/blkfront/blkfront.c 2008-05-23 20:39:03.000000000 +0200
9437 niro 609 @@ -69,7 +69,7 @@
9438    
9439     static void kick_pending_request_queues(struct blkfront_info *);
9440    
9441     -static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs);
9442     +static irqreturn_t blkif_int(int irq, void *dev_id);
9443     static void blkif_restart_queue(void *arg);
9444     static void blkif_recover(struct blkfront_info *);
9445     static void blkif_completion(struct blk_shadow *);
9446     @@ -698,7 +698,7 @@
9447     }
9448    
9449    
9450     -static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
9451     +static irqreturn_t blkif_int(int irq, void *dev_id)
9452     {
9453     struct request *req;
9454     blkif_response_t *bret;
9455 niro 611 diff -Naur linux-2.6.25/drivers/xen/blktap/blktap.c linux-2.6.25-xen/drivers/xen/blktap/blktap.c
9456     --- linux-2.6.25/drivers/xen/blktap/blktap.c 2008-05-23 20:51:11.000000000 +0200
9457     +++ linux-2.6.25-xen/drivers/xen/blktap/blktap.c 2008-05-23 20:39:03.000000000 +0200
9458 niro 609 @@ -1175,7 +1175,7 @@
9459     wake_up(&blkif->wq);
9460     }
9461    
9462     -irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
9463     +irqreturn_t tap_blkif_be_int(int irq, void *dev_id)
9464     {
9465     blkif_notify_work(dev_id);
9466     return IRQ_HANDLED;
9467 niro 611 diff -Naur linux-2.6.25/drivers/xen/blktap/common.h linux-2.6.25-xen/drivers/xen/blktap/common.h
9468     --- linux-2.6.25/drivers/xen/blktap/common.h 2008-05-23 20:51:11.000000000 +0200
9469     +++ linux-2.6.25-xen/drivers/xen/blktap/common.h 2008-05-23 20:39:03.000000000 +0200
9470 niro 609 @@ -112,7 +112,7 @@
9471    
9472     void tap_blkif_xenbus_init(void);
9473    
9474     -irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
9475     +irqreturn_t tap_blkif_be_int(int irq, void *dev_id);
9476     int tap_blkif_schedule(void *arg);
9477    
9478     int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif);
9479 niro 611 diff -Naur linux-2.6.25/drivers/xen/console/console.c linux-2.6.25-xen/drivers/xen/console/console.c
9480     --- linux-2.6.25/drivers/xen/console/console.c 2008-05-23 20:51:11.000000000 +0200
9481     +++ linux-2.6.25-xen/drivers/xen/console/console.c 2008-05-23 20:39:03.000000000 +0200
9482 niro 609 @@ -345,7 +345,7 @@
9483     static int xencons_priv_irq;
9484     static char x_char;
9485    
9486     -void xencons_rx(char *buf, unsigned len, struct pt_regs *regs)
9487     +void xencons_rx(char *buf, unsigned len)
9488     {
9489     int i;
9490     unsigned long flags;
9491     @@ -370,8 +370,7 @@
9492     if (time_before(jiffies, sysrq_timeout)) {
9493     spin_unlock_irqrestore(
9494     &xencons_lock, flags);
9495     - handle_sysrq(
9496     - buf[i], regs, xencons_tty);
9497     + handle_sysrq(buf[i], xencons_tty);
9498     spin_lock_irqsave(
9499     &xencons_lock, flags);
9500     continue;
9501     @@ -436,14 +435,13 @@
9502     }
9503    
9504     /* Privileged receive callback and transmit kicker. */
9505     -static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id,
9506     - struct pt_regs *regs)
9507     +static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id)
9508     {
9509     static char rbuf[16];
9510     int l;
9511    
9512     while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0)
9513     - xencons_rx(rbuf, l, regs);
9514     + xencons_rx(rbuf, l);
9515    
9516     xencons_tx();
9517    
9518 niro 611 diff -Naur linux-2.6.25/drivers/xen/console/xencons_ring.c linux-2.6.25-xen/drivers/xen/console/xencons_ring.c
9519     --- linux-2.6.25/drivers/xen/console/xencons_ring.c 2008-05-23 20:51:11.000000000 +0200
9520     +++ linux-2.6.25-xen/drivers/xen/console/xencons_ring.c 2008-05-23 20:39:03.000000000 +0200
9521 niro 609 @@ -83,7 +83,7 @@
9522     return sent;
9523     }
9524    
9525     -static irqreturn_t handle_input(int irq, void *unused, struct pt_regs *regs)
9526     +static irqreturn_t handle_input(int irq, void *unused)
9527     {
9528     struct xencons_interface *intf = xencons_interface();
9529     XENCONS_RING_IDX cons, prod;
9530     @@ -94,7 +94,7 @@
9531     BUG_ON((prod - cons) > sizeof(intf->in));
9532    
9533     while (cons != prod) {
9534     - xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1, regs);
9535     + xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1);
9536     cons++;
9537     }
9538    
9539 niro 611 diff -Naur linux-2.6.25/drivers/xen/core/evtchn.c linux-2.6.25-xen/drivers/xen/core/evtchn.c
9540     --- linux-2.6.25/drivers/xen/core/evtchn.c 2008-05-23 20:51:11.000000000 +0200
9541     +++ linux-2.6.25-xen/drivers/xen/core/evtchn.c 2008-05-23 20:39:03.000000000 +0200
9542 niro 609 @@ -507,7 +507,7 @@
9543    
9544     int bind_caller_port_to_irqhandler(
9545     unsigned int caller_port,
9546     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
9547     + irq_handler_t handler,
9548     unsigned long irqflags,
9549     const char *devname,
9550     void *dev_id)
9551     @@ -530,7 +530,7 @@
9552    
9553     int bind_listening_port_to_irqhandler(
9554     unsigned int remote_domain,
9555     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
9556     + irq_handler_t handler,
9557     unsigned long irqflags,
9558     const char *devname,
9559     void *dev_id)
9560     @@ -554,7 +554,7 @@
9561     int bind_interdomain_evtchn_to_irqhandler(
9562     unsigned int remote_domain,
9563     unsigned int remote_port,
9564     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
9565     + irq_handler_t handler,
9566     unsigned long irqflags,
9567     const char *devname,
9568     void *dev_id)
9569     @@ -578,7 +578,7 @@
9570     int bind_virq_to_irqhandler(
9571     unsigned int virq,
9572     unsigned int cpu,
9573     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
9574     + irq_handler_t handler,
9575     unsigned long irqflags,
9576     const char *devname,
9577     void *dev_id)
9578     @@ -602,7 +602,7 @@
9579     int bind_ipi_to_irqhandler(
9580     unsigned int ipi,
9581     unsigned int cpu,
9582     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
9583     + irq_handler_t handler,
9584     unsigned long irqflags,
9585     const char *devname,
9586     void *dev_id)
9587     @@ -687,15 +687,7 @@
9588     return 0;
9589     }
9590    
9591     -static void shutdown_dynirq(unsigned int irq)
9592     -{
9593     - int evtchn = evtchn_from_irq(irq);
9594     -
9595     - if (VALID_EVTCHN(evtchn))
9596     - mask_evtchn(evtchn);
9597     -}
9598     -
9599     -static void enable_dynirq(unsigned int irq)
9600     +static void unmask_dynirq(unsigned int irq)
9601     {
9602     int evtchn = evtchn_from_irq(irq);
9603    
9604     @@ -703,7 +695,7 @@
9605     unmask_evtchn(evtchn);
9606     }
9607    
9608     -static void disable_dynirq(unsigned int irq)
9609     +static void mask_dynirq(unsigned int irq)
9610     {
9611     int evtchn = evtchn_from_irq(irq);
9612    
9613     @@ -731,12 +723,12 @@
9614     unmask_evtchn(evtchn);
9615     }
9616    
9617     -static struct hw_interrupt_type dynirq_type = {
9618     - .typename = "Dynamic-irq",
9619     +static struct irq_chip dynirq_chip = {
9620     + .name = "Dynamic-irq",
9621     .startup = startup_dynirq,
9622     - .shutdown = shutdown_dynirq,
9623     - .enable = enable_dynirq,
9624     - .disable = disable_dynirq,
9625     + .mask = mask_dynirq,
9626     + .unmask = unmask_dynirq,
9627     + .mask_ack = ack_dynirq,
9628     .ack = ack_dynirq,
9629     .end = end_dynirq,
9630     #ifdef CONFIG_SMP
9631     @@ -820,12 +812,12 @@
9632     irq_info[irq] = IRQ_UNBOUND;
9633     }
9634    
9635     -static void enable_pirq(unsigned int irq)
9636     +static void unmask_pirq(unsigned int irq)
9637     {
9638     startup_pirq(irq);
9639     }
9640    
9641     -static void disable_pirq(unsigned int irq)
9642     +static void mask_pirq(unsigned int irq)
9643     {
9644     }
9645    
9646     @@ -854,12 +846,14 @@
9647     }
9648     }
9649    
9650     -static struct hw_interrupt_type pirq_type = {
9651     +static struct irq_chip pirq_chip = {
9652     + .name = "Phys-irq",
9653     .typename = "Phys-irq",
9654     .startup = startup_pirq,
9655     .shutdown = shutdown_pirq,
9656     - .enable = enable_pirq,
9657     - .disable = disable_pirq,
9658     + .mask = mask_pirq,
9659     + .unmask = unmask_pirq,
9660     + .mask_ack = ack_pirq,
9661     .ack = ack_pirq,
9662     .end = end_pirq,
9663     #ifdef CONFIG_SMP
9664     @@ -1043,7 +1037,8 @@
9665     irq_desc[dynirq_to_irq(i)].status = IRQ_DISABLED;
9666     irq_desc[dynirq_to_irq(i)].action = NULL;
9667     irq_desc[dynirq_to_irq(i)].depth = 1;
9668     - irq_desc[dynirq_to_irq(i)].chip = &dynirq_type;
9669     + set_irq_chip_and_handler_name(dynirq_to_irq(i), &dynirq_chip,
9670     + handle_level_irq, "level");
9671     }
9672    
9673     /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */
9674     @@ -1059,6 +1054,7 @@
9675     irq_desc[pirq_to_irq(i)].status = IRQ_DISABLED;
9676     irq_desc[pirq_to_irq(i)].action = NULL;
9677     irq_desc[pirq_to_irq(i)].depth = 1;
9678     - irq_desc[pirq_to_irq(i)].chip = &pirq_type;
9679     + set_irq_chip_and_handler_name(pirq_to_irq(i), &pirq_chip,
9680     + handle_level_irq, "level");
9681     }
9682     }
9683 niro 611 diff -Naur linux-2.6.25/drivers/xen/core/reboot.c linux-2.6.25-xen/drivers/xen/core/reboot.c
9684     --- linux-2.6.25/drivers/xen/core/reboot.c 2008-05-23 20:51:11.000000000 +0200
9685     +++ linux-2.6.25-xen/drivers/xen/core/reboot.c 2008-05-23 20:39:03.000000000 +0200
9686 niro 609 @@ -13,6 +13,7 @@
9687    
9688     #ifdef HAVE_XEN_PLATFORM_COMPAT_H
9689     #include <xen/platform-compat.h>
9690     +#undef handle_sysrq
9691     #endif
9692    
9693     MODULE_LICENSE("Dual BSD/GPL");
9694     @@ -203,7 +204,7 @@
9695    
9696     #ifdef CONFIG_MAGIC_SYSRQ
9697     if (sysrq_key != '\0')
9698     - handle_sysrq(sysrq_key, NULL, NULL);
9699     + handle_sysrq(sysrq_key, NULL);
9700     #endif
9701     }
9702    
9703 niro 611 diff -Naur linux-2.6.25/drivers/xen/core/smpboot.c linux-2.6.25-xen/drivers/xen/core/smpboot.c
9704     --- linux-2.6.25/drivers/xen/core/smpboot.c 2008-05-23 20:51:11.000000000 +0200
9705     +++ linux-2.6.25-xen/drivers/xen/core/smpboot.c 2008-05-23 20:39:03.000000000 +0200
9706 niro 609 @@ -25,8 +25,8 @@
9707     #include <xen/cpu_hotplug.h>
9708     #include <xen/xenbus.h>
9709    
9710     -extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
9711     -extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
9712     +extern irqreturn_t smp_reschedule_interrupt(int, void *);
9713     +extern irqreturn_t smp_call_function_interrupt(int, void *);
9714    
9715     extern int local_setup_timer(unsigned int cpu);
9716     extern void local_teardown_timer(unsigned int cpu);
9717     @@ -66,8 +66,6 @@
9718     #if defined(__i386__)
9719     u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff };
9720     EXPORT_SYMBOL(x86_cpu_to_apicid);
9721     -#elif !defined(CONFIG_X86_IO_APIC)
9722     -unsigned int maxcpus = NR_CPUS;
9723     #endif
9724    
9725     void __init prefill_possible_map(void)
9726 niro 611 diff -Naur linux-2.6.25/drivers/xen/fbfront/xenfb.c linux-2.6.25-xen/drivers/xen/fbfront/xenfb.c
9727     --- linux-2.6.25/drivers/xen/fbfront/xenfb.c 2008-05-23 20:51:11.000000000 +0200
9728     +++ linux-2.6.25-xen/drivers/xen/fbfront/xenfb.c 2008-05-23 20:39:03.000000000 +0200
9729 niro 609 @@ -523,8 +523,7 @@
9730     .fb_set_par = xenfb_set_par,
9731     };
9732    
9733     -static irqreturn_t xenfb_event_handler(int rq, void *dev_id,
9734     - struct pt_regs *regs)
9735     +static irqreturn_t xenfb_event_handler(int rq, void *dev_id)
9736     {
9737     /*
9738     * No in events recognized, simply ignore them all.
9739 niro 611 diff -Naur linux-2.6.25/drivers/xen/fbfront/xenkbd.c linux-2.6.25-xen/drivers/xen/fbfront/xenkbd.c
9740     --- linux-2.6.25/drivers/xen/fbfront/xenkbd.c 2008-05-23 20:51:11.000000000 +0200
9741     +++ linux-2.6.25-xen/drivers/xen/fbfront/xenkbd.c 2008-05-23 20:39:03.000000000 +0200
9742 niro 609 @@ -46,7 +46,7 @@
9743     * to do that.
9744     */
9745    
9746     -static irqreturn_t input_handler(int rq, void *dev_id, struct pt_regs *regs)
9747     +static irqreturn_t input_handler(int rq, void *dev_id)
9748     {
9749     struct xenkbd_info *info = dev_id;
9750     struct xenkbd_page *page = info->page;
9751 niro 611 diff -Naur linux-2.6.25/drivers/xen/gntdev/gntdev.c linux-2.6.25-xen/drivers/xen/gntdev/gntdev.c
9752     --- linux-2.6.25/drivers/xen/gntdev/gntdev.c 2008-05-23 20:51:11.000000000 +0200
9753     +++ linux-2.6.25-xen/drivers/xen/gntdev/gntdev.c 2008-05-23 20:39:03.000000000 +0200
9754 niro 609 @@ -755,9 +755,6 @@
9755     BUG();
9756     }
9757    
9758     - /* Copy the existing value of the PTE for returning. */
9759     - copy = *ptep;
9760     -
9761     /* Calculate the grant relating to this PTE. */
9762     slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
9763    
9764     @@ -772,6 +769,10 @@
9765     GNTDEV_INVALID_HANDLE &&
9766     !xen_feature(XENFEAT_auto_translated_physmap)) {
9767     /* NOT USING SHADOW PAGE TABLES. */
9768     +
9769     + /* Copy the existing value of the PTE for returning. */
9770     + copy = *ptep;
9771     +
9772     gnttab_set_unmap_op(&op, virt_to_machine(ptep),
9773     GNTMAP_contains_pte,
9774     private_data->grants[slot_index]
9775     @@ -784,7 +785,7 @@
9776     op.status);
9777     } else {
9778     /* USING SHADOW PAGE TABLES. */
9779     - pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
9780     + copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
9781     }
9782    
9783     /* Finally, we unmap the grant from kernel space. */
9784     @@ -812,7 +813,7 @@
9785     >> PAGE_SHIFT, INVALID_P2M_ENTRY);
9786    
9787     } else {
9788     - pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
9789     + copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
9790     }
9791    
9792     return copy;
9793 niro 611 diff -Naur linux-2.6.25/drivers/xen/Kconfig linux-2.6.25-xen/drivers/xen/Kconfig
9794     --- linux-2.6.25/drivers/xen/Kconfig 2008-05-23 20:51:14.000000000 +0200
9795     +++ linux-2.6.25-xen/drivers/xen/Kconfig 2008-05-23 20:39:03.000000000 +0200
9796     @@ -278,6 +278,9 @@
9797     config HAVE_IRQ_IGNORE_UNHANDLED
9798     def_bool y
9799    
9800     +config GENERIC_HARDIRQS_NO__DO_IRQ
9801     + def_bool y
9802     +
9803     config NO_IDLE_HZ
9804     def_bool y
9805    
9806     diff -Naur linux-2.6.25/drivers/xen/netback/accel.c linux-2.6.25-xen/drivers/xen/netback/accel.c
9807     --- linux-2.6.25/drivers/xen/netback/accel.c 2008-05-23 20:51:11.000000000 +0200
9808     +++ linux-2.6.25-xen/drivers/xen/netback/accel.c 2008-05-23 20:39:03.000000000 +0200
9809 niro 609 @@ -65,7 +65,7 @@
9810    
9811     if (IS_ERR(eth_name)) {
9812     /* Probably means not present */
9813     - DPRINTK("%s: no match due to xenbus_read accel error %d\n",
9814     + DPRINTK("%s: no match due to xenbus_read accel error %ld\n",
9815     __FUNCTION__, PTR_ERR(eth_name));
9816     return 0;
9817     } else {
9818 niro 611 diff -Naur linux-2.6.25/drivers/xen/netback/common.h linux-2.6.25-xen/drivers/xen/netback/common.h
9819     --- linux-2.6.25/drivers/xen/netback/common.h 2008-05-23 20:51:11.000000000 +0200
9820     +++ linux-2.6.25-xen/drivers/xen/netback/common.h 2008-05-23 20:39:03.000000000 +0200
9821 niro 609 @@ -200,7 +200,7 @@
9822    
9823     int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
9824     struct net_device_stats *netif_be_get_stats(struct net_device *dev);
9825     -irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
9826     +irqreturn_t netif_be_int(int irq, void *dev_id);
9827    
9828     static inline int netbk_can_queue(struct net_device *dev)
9829     {
9830 niro 611 diff -Naur linux-2.6.25/drivers/xen/netback/loopback.c linux-2.6.25-xen/drivers/xen/netback/loopback.c
9831     --- linux-2.6.25/drivers/xen/netback/loopback.c 2008-05-23 20:51:11.000000000 +0200
9832     +++ linux-2.6.25-xen/drivers/xen/netback/loopback.c 2008-05-23 20:39:03.000000000 +0200
9833 niro 609 @@ -151,7 +151,7 @@
9834     np->stats.rx_bytes += skb->len;
9835     np->stats.rx_packets++;
9836    
9837     - if (skb->ip_summed == CHECKSUM_HW) {
9838     + if (skb->ip_summed == CHECKSUM_PARTIAL) {
9839     /* Defer checksum calculation. */
9840     skb->proto_csum_blank = 1;
9841     /* Must be a local packet: assert its integrity. */
9842 niro 611 diff -Naur linux-2.6.25/drivers/xen/netback/netback.c linux-2.6.25-xen/drivers/xen/netback/netback.c
9843     --- linux-2.6.25/drivers/xen/netback/netback.c 2008-05-23 20:51:11.000000000 +0200
9844     +++ linux-2.6.25-xen/drivers/xen/netback/netback.c 2008-05-23 20:39:03.000000000 +0200
9845 niro 609 @@ -677,7 +677,7 @@
9846     id = meta[npo.meta_cons].id;
9847     flags = nr_frags ? NETRXF_more_data : 0;
9848    
9849     - if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
9850     + if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
9851     flags |= NETRXF_csum_blank | NETRXF_data_validated;
9852     else if (skb->proto_data_valid) /* remote but checksummed? */
9853     flags |= NETRXF_data_validated;
9854     @@ -1441,7 +1441,7 @@
9855     netif_idx_release(netif_page_index(page));
9856     }
9857    
9858     -irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
9859     +irqreturn_t netif_be_int(int irq, void *dev_id)
9860     {
9861     netif_t *netif = dev_id;
9862    
9863     @@ -1508,7 +1508,7 @@
9864     }
9865    
9866     #ifdef NETBE_DEBUG_INTERRUPT
9867     -static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
9868     +static irqreturn_t netif_be_dbg(int irq, void *dev_id)
9869     {
9870     struct list_head *ent;
9871     netif_t *netif;
9872 niro 611 diff -Naur linux-2.6.25/drivers/xen/netfront/netfront.c linux-2.6.25-xen/drivers/xen/netfront/netfront.c
9873     --- linux-2.6.25/drivers/xen/netfront/netfront.c 2008-05-23 20:51:11.000000000 +0200
9874     +++ linux-2.6.25-xen/drivers/xen/netfront/netfront.c 2008-05-23 20:39:03.000000000 +0200
9875 niro 609 @@ -136,7 +136,7 @@
9876     {
9877     return skb_is_gso(skb) &&
9878     (!skb_gso_ok(skb, dev->features) ||
9879     - unlikely(skb->ip_summed != CHECKSUM_HW));
9880     + unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
9881     }
9882     #else
9883     #define HAVE_GSO 0
9884     @@ -222,7 +222,7 @@
9885     static void network_alloc_rx_buffers(struct net_device *);
9886     static void send_fake_arp(struct net_device *);
9887    
9888     -static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs);
9889     +static irqreturn_t netif_int(int irq, void *dev_id);
9890    
9891     #ifdef CONFIG_SYSFS
9892     static int xennet_sysfs_addif(struct net_device *netdev);
9893     @@ -992,7 +992,7 @@
9894     tx->flags = 0;
9895     extra = NULL;
9896    
9897     - if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
9898     + if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
9899     tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
9900     #ifdef CONFIG_XEN
9901     if (skb->proto_data_valid) /* remote but checksummed? */
9902     @@ -1049,7 +1049,7 @@
9903     return 0;
9904     }
9905    
9906     -static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
9907     +static irqreturn_t netif_int(int irq, void *dev_id)
9908     {
9909     struct net_device *dev = dev_id;
9910     struct netfront_info *np = netdev_priv(dev);
9911 niro 611 diff -Naur linux-2.6.25/drivers/xen/pciback/pciback.h linux-2.6.25-xen/drivers/xen/pciback/pciback.h
9912     --- linux-2.6.25/drivers/xen/pciback/pciback.h 2008-05-23 20:51:11.000000000 +0200
9913     +++ linux-2.6.25-xen/drivers/xen/pciback/pciback.h 2008-05-23 20:39:03.000000000 +0200
9914 niro 609 @@ -87,7 +87,7 @@
9915     void pciback_release_devices(struct pciback_device *pdev);
9916    
9917     /* Handles events from front-end */
9918     -irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
9919     +irqreturn_t pciback_handle_event(int irq, void *dev_id);
9920     void pciback_do_op(void *data);
9921    
9922     int pciback_xenbus_register(void);
9923 niro 611 diff -Naur linux-2.6.25/drivers/xen/pciback/pciback_ops.c linux-2.6.25-xen/drivers/xen/pciback/pciback_ops.c
9924     --- linux-2.6.25/drivers/xen/pciback/pciback_ops.c 2008-05-23 20:51:11.000000000 +0200
9925     +++ linux-2.6.25-xen/drivers/xen/pciback/pciback_ops.c 2008-05-23 20:39:03.000000000 +0200
9926 niro 609 @@ -85,7 +85,7 @@
9927     test_and_schedule_op(pdev);
9928     }
9929    
9930     -irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
9931     +irqreturn_t pciback_handle_event(int irq, void *dev_id)
9932     {
9933     struct pciback_device *pdev = dev_id;
9934    
9935 niro 611 diff -Naur linux-2.6.25/drivers/xen/pcifront/pci_op.c linux-2.6.25-xen/drivers/xen/pcifront/pci_op.c
9936     --- linux-2.6.25/drivers/xen/pcifront/pci_op.c 2008-05-23 20:51:11.000000000 +0200
9937     +++ linux-2.6.25-xen/drivers/xen/pcifront/pci_op.c 2008-05-23 20:39:03.000000000 +0200
9938 niro 609 @@ -392,10 +392,16 @@
9939    
9940     d = pci_scan_single_device(b, devfn);
9941     if (d) {
9942     + int err;
9943     +
9944     dev_info(&pdev->xdev->dev, "New device on "
9945     "%04x:%02x:%02x.%02x found.\n", domain, bus,
9946     PCI_SLOT(devfn), PCI_FUNC(devfn));
9947     - pci_bus_add_device(d);
9948     + err = pci_bus_add_device(d);
9949     + if (err)
9950     + dev_err(&pdev->xdev->dev,
9951     + "error %d adding device, continuing.\n",
9952     + err);
9953     }
9954     }
9955    
9956 niro 611 diff -Naur linux-2.6.25/drivers/xen/privcmd/compat_privcmd.c linux-2.6.25-xen/drivers/xen/privcmd/compat_privcmd.c
9957     --- linux-2.6.25/drivers/xen/privcmd/compat_privcmd.c 2008-05-23 20:51:11.000000000 +0200
9958     +++ linux-2.6.25-xen/drivers/xen/privcmd/compat_privcmd.c 2008-05-23 20:39:03.000000000 +0200
9959 niro 609 @@ -18,7 +18,6 @@
9960     * Authors: Jimi Xenidis <jimix@watson.ibm.com>
9961     */
9962    
9963     -#include <linux/config.h>
9964     #include <linux/compat.h>
9965     #include <linux/ioctl.h>
9966     #include <linux/syscalls.h>
9967 niro 611 diff -Naur linux-2.6.25/drivers/xen/privcmd/privcmd.c linux-2.6.25-xen/drivers/xen/privcmd/privcmd.c
9968     --- linux-2.6.25/drivers/xen/privcmd/privcmd.c 2008-05-23 20:51:11.000000000 +0200
9969     +++ linux-2.6.25-xen/drivers/xen/privcmd/privcmd.c 2008-05-23 20:39:03.000000000 +0200
9970 niro 609 @@ -236,7 +236,7 @@
9971     #endif
9972    
9973     /* DONTCOPY is essential for Xen as copy_page_range is broken. */
9974     - vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
9975     + vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTCOPY;
9976     vma->vm_ops = &privcmd_vm_ops;
9977     vma->vm_private_data = NULL;
9978    
9979 niro 611 diff -Naur linux-2.6.25/drivers/xen/sfc_netback/accel_xenbus.c linux-2.6.25-xen/drivers/xen/sfc_netback/accel_xenbus.c
9980     --- linux-2.6.25/drivers/xen/sfc_netback/accel_xenbus.c 2008-05-23 20:51:11.000000000 +0200
9981     +++ linux-2.6.25-xen/drivers/xen/sfc_netback/accel_xenbus.c 2008-05-23 20:39:03.000000000 +0200
9982 niro 609 @@ -68,8 +68,7 @@
9983    
9984    
9985     /* Demultiplex a message IRQ from the frontend driver. */
9986     -static irqreturn_t msgirq_from_frontend(int irq, void *context,
9987     - struct pt_regs *unused)
9988     +static irqreturn_t msgirq_from_frontend(int irq, void *context)
9989     {
9990     struct xenbus_device *dev = context;
9991     struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
9992     @@ -84,8 +83,7 @@
9993     * functionally, but we need it to pass to the bind function, and may
9994     * get called spuriously
9995     */
9996     -static irqreturn_t netirq_from_frontend(int irq, void *context,
9997     - struct pt_regs *unused)
9998     +static irqreturn_t netirq_from_frontend(int irq, void *context)
9999     {
10000     VPRINTK("netirq %d from device %s\n", irq,
10001     ((struct xenbus_device *)context)->nodename);
10002 niro 611 diff -Naur linux-2.6.25/drivers/xen/sfc_netfront/accel.h linux-2.6.25-xen/drivers/xen/sfc_netfront/accel.h
10003     --- linux-2.6.25/drivers/xen/sfc_netfront/accel.h 2008-05-23 20:51:11.000000000 +0200
10004     +++ linux-2.6.25-xen/drivers/xen/sfc_netfront/accel.h 2008-05-23 20:39:03.000000000 +0200
10005 niro 609 @@ -449,10 +449,8 @@
10006     u32 ip, u16 port, u8 protocol);
10007    
10008     /* Process an IRQ received from back end driver */
10009     -irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
10010     - struct pt_regs *unused);
10011     -irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
10012     - struct pt_regs *unused);
10013     +irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context);
10014     +irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context);
10015    
10016     #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
10017     extern void netfront_accel_msg_from_bend(struct work_struct *context);
10018 niro 611 diff -Naur linux-2.6.25/drivers/xen/sfc_netfront/accel_msg.c linux-2.6.25-xen/drivers/xen/sfc_netfront/accel_msg.c
10019     --- linux-2.6.25/drivers/xen/sfc_netfront/accel_msg.c 2008-05-23 20:51:11.000000000 +0200
10020     +++ linux-2.6.25-xen/drivers/xen/sfc_netfront/accel_msg.c 2008-05-23 20:39:03.000000000 +0200
10021 niro 609 @@ -490,8 +490,7 @@
10022     }
10023    
10024    
10025     -irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
10026     - struct pt_regs *unused)
10027     +irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context)
10028     {
10029     netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
10030     VPRINTK("irq %d from device %s\n", irq, vnic->dev->nodename);
10031     @@ -502,8 +501,7 @@
10032     }
10033    
10034     /* Process an interrupt received from the NIC via backend */
10035     -irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
10036     - struct pt_regs *unused)
10037     +irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context)
10038     {
10039     netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
10040     struct net_device *net_dev = vnic->net_dev;
10041 niro 611 diff -Naur linux-2.6.25/drivers/xen/sfc_netfront/accel_tso.c linux-2.6.25-xen/drivers/xen/sfc_netfront/accel_tso.c
10042     --- linux-2.6.25/drivers/xen/sfc_netfront/accel_tso.c 2008-05-23 20:51:11.000000000 +0200
10043     +++ linux-2.6.25-xen/drivers/xen/sfc_netfront/accel_tso.c 2008-05-23 20:39:03.000000000 +0200
10044 niro 609 @@ -363,7 +363,7 @@
10045    
10046     tso_check_safe(skb);
10047    
10048     - if (skb->ip_summed != CHECKSUM_HW)
10049     + if (skb->ip_summed != CHECKSUM_PARTIAL)
10050     EPRINTK("Trying to TSO send a packet without HW checksum\n");
10051    
10052     tso_start(&state, skb);
10053 niro 611 diff -Naur linux-2.6.25/drivers/xen/sfc_netfront/accel_vi.c linux-2.6.25-xen/drivers/xen/sfc_netfront/accel_vi.c
10054     --- linux-2.6.25/drivers/xen/sfc_netfront/accel_vi.c 2008-05-23 20:51:11.000000000 +0200
10055     +++ linux-2.6.25-xen/drivers/xen/sfc_netfront/accel_vi.c 2008-05-23 20:39:03.000000000 +0200
10056 niro 609 @@ -461,7 +461,7 @@
10057    
10058     frag_i = -1;
10059    
10060     - if (skb->ip_summed == CHECKSUM_HW) {
10061     + if (skb->ip_summed == CHECKSUM_PARTIAL) {
10062     /* Set to zero to encourage falcon to work it out for us */
10063     *(u16*)(skb->h.raw + skb->csum) = 0;
10064     }
10065     @@ -580,7 +580,7 @@
10066    
10067     kva = buf->pkt_kva;
10068    
10069     - if (skb->ip_summed == CHECKSUM_HW) {
10070     + if (skb->ip_summed == CHECKSUM_PARTIAL) {
10071     /* Set to zero to encourage falcon to work it out for us */
10072     *(u16*)(skb->h.raw + skb->csum) = 0;
10073     }
10074 niro 611 diff -Naur linux-2.6.25/drivers/xen/tpmback/common.h linux-2.6.25-xen/drivers/xen/tpmback/common.h
10075     --- linux-2.6.25/drivers/xen/tpmback/common.h 2008-05-23 20:51:11.000000000 +0200
10076     +++ linux-2.6.25-xen/drivers/xen/tpmback/common.h 2008-05-23 20:39:03.000000000 +0200
10077 niro 609 @@ -61,7 +61,7 @@
10078     void tpmif_xenbus_init(void);
10079     void tpmif_xenbus_exit(void);
10080     int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn);
10081     -irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs);
10082     +irqreturn_t tpmif_be_int(int irq, void *dev_id);
10083    
10084     long int tpmback_get_instance(struct backend_info *bi);
10085    
10086 niro 611 diff -Naur linux-2.6.25/drivers/xen/tpmback/tpmback.c linux-2.6.25-xen/drivers/xen/tpmback/tpmback.c
10087     --- linux-2.6.25/drivers/xen/tpmback/tpmback.c 2008-05-23 20:51:11.000000000 +0200
10088     +++ linux-2.6.25-xen/drivers/xen/tpmback/tpmback.c 2008-05-23 20:39:03.000000000 +0200
10089 niro 609 @@ -502,7 +502,7 @@
10090     list_del(&pak->next);
10091     write_unlock_irqrestore(&dataex.pak_lock, flags);
10092    
10093     - DPRINTK("size given by app: %d, available: %d\n", size, left);
10094     + DPRINTK("size given by app: %zu, available: %u\n", size, left);
10095    
10096     ret_size = min_t(size_t, size, left);
10097    
10098     @@ -899,7 +899,7 @@
10099     }
10100     }
10101    
10102     -irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs)
10103     +irqreturn_t tpmif_be_int(int irq, void *dev_id)
10104     {
10105     tpmif_t *tpmif = (tpmif_t *) dev_id;
10106    
10107 niro 611 diff -Naur linux-2.6.25/drivers/xen/xenbus/xenbus_comms.c linux-2.6.25-xen/drivers/xen/xenbus/xenbus_comms.c
10108     --- linux-2.6.25/drivers/xen/xenbus/xenbus_comms.c 2008-05-23 20:51:19.000000000 +0200
10109     +++ linux-2.6.25-xen/drivers/xen/xenbus/xenbus_comms.c 2008-05-23 20:39:03.000000000 +0200
10110 niro 609 @@ -55,7 +55,7 @@
10111    
10112     static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
10113    
10114     -static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
10115     +static irqreturn_t wake_waiting(int irq, void *unused)
10116     {
10117     if (unlikely(xenstored_ready == 0)) {
10118     xenstored_ready = 1;
10119 niro 611 diff -Naur linux-2.6.25/drivers/xen/xenoprof/xenoprofile.c linux-2.6.25-xen/drivers/xen/xenoprof/xenoprofile.c
10120     --- linux-2.6.25/drivers/xen/xenoprof/xenoprofile.c 2008-05-23 20:51:11.000000000 +0200
10121     +++ linux-2.6.25-xen/drivers/xen/xenoprof/xenoprofile.c 2008-05-23 20:39:03.000000000 +0200
10122 niro 609 @@ -195,7 +195,7 @@
10123     }
10124    
10125     static irqreturn_t
10126     -xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
10127     +xenoprof_ovf_interrupt(int irq, void * dev_id)
10128     {
10129     struct xenoprof_buf * buf;
10130     static unsigned long flag;
10131 niro 611 diff -Naur linux-2.6.25/include/asm-generic/pgtable.h linux-2.6.25-xen/include/asm-generic/pgtable.h
10132     --- linux-2.6.25/include/asm-generic/pgtable.h 2008-05-23 20:51:15.000000000 +0200
10133     +++ linux-2.6.25-xen/include/asm-generic/pgtable.h 2008-05-23 20:39:03.000000000 +0200
10134 niro 609 @@ -100,7 +100,7 @@
10135     #endif
10136    
10137     #ifndef arch_change_pte_range
10138     -#define arch_change_pte_range(mm, pmd, addr, end, newprot) 0
10139     +#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) 0
10140     #endif
10141    
10142     #ifndef __HAVE_ARCH_PTE_SAME
10143 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/desc_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/desc_32.h
10144     --- linux-2.6.25/include/asm-x86/mach-xen/asm/desc_32.h 2008-05-23 20:51:11.000000000 +0200
10145     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/desc_32.h 2008-05-23 20:39:03.000000000 +0200
10146 niro 609 @@ -32,52 +32,110 @@
10147     return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
10148     }
10149    
10150     +/*
10151     + * This is the ldt that every process will get unless we need
10152     + * something other than this.
10153     + */
10154     +extern struct desc_struct default_ldt[];
10155     +extern struct desc_struct idt_table[];
10156     +extern void set_intr_gate(unsigned int irq, void * addr);
10157     +
10158     +static inline void pack_descriptor(__u32 *a, __u32 *b,
10159     + unsigned long base, unsigned long limit, unsigned char type, unsigned char flags)
10160     +{
10161     + *a = ((base & 0xffff) << 16) | (limit & 0xffff);
10162     + *b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
10163     + (limit & 0x000f0000) | ((type & 0xff) << 8) | ((flags & 0xf) << 20);
10164     +}
10165     +
10166     +static inline void pack_gate(__u32 *a, __u32 *b,
10167     + unsigned long base, unsigned short seg, unsigned char type, unsigned char flags)
10168     +{
10169     + *a = (seg << 16) | (base & 0xffff);
10170     + *b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff);
10171     +}
10172     +
10173     +#define DESCTYPE_LDT 0x82 /* present, system, DPL-0, LDT */
10174     +#define DESCTYPE_TSS 0x89 /* present, system, DPL-0, 32-bit TSS */
10175     +#define DESCTYPE_TASK 0x85 /* present, system, DPL-0, task gate */
10176     +#define DESCTYPE_INT 0x8e /* present, system, DPL-0, interrupt gate */
10177     +#define DESCTYPE_TRAP 0x8f /* present, system, DPL-0, trap gate */
10178     +#define DESCTYPE_DPL3 0x60 /* DPL-3 */
10179     +#define DESCTYPE_S 0x10 /* !system */
10180     +
10181     #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
10182     #define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8))
10183    
10184     #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
10185     #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
10186     -#define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr))
10187     -#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt))
10188     +#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
10189     +#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
10190    
10191     #define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr))
10192     #define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
10193     -#define store_tr(tr) __asm__ ("str %0":"=mr" (tr))
10194     -#define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt))
10195     +#define store_tr(tr) __asm__ ("str %0":"=m" (tr))
10196     +#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
10197    
10198     -/*
10199     - * This is the ldt that every process will get unless we need
10200     - * something other than this.
10201     - */
10202     -extern struct desc_struct default_ldt[];
10203     -extern void set_intr_gate(unsigned int irq, void * addr);
10204     +#if TLS_SIZE != 24
10205     +# error update this code.
10206     +#endif
10207     +
10208     +static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
10209     +{
10210     +#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \
10211     + *(u64 *)&t->tls_array[i]) \
10212     + BUG()
10213     + C(0); C(1); C(2);
10214     +#undef C
10215     +}
10216    
10217     -#define _set_tssldt_desc(n,addr,limit,type) \
10218     -__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
10219     - "movw %w1,2(%2)\n\t" \
10220     - "rorl $16,%1\n\t" \
10221     - "movb %b1,4(%2)\n\t" \
10222     - "movb %4,5(%2)\n\t" \
10223     - "movb $0,6(%2)\n\t" \
10224     - "movb %h1,7(%2)\n\t" \
10225     - "rorl $16,%1" \
10226     - : "=m"(*(n)) : "q" (addr), "r"(n), "ir"(limit), "i"(type))
10227     +#ifndef CONFIG_XEN
10228     +static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
10229     +{
10230     + __u32 *lp = (__u32 *)((char *)dt + entry*8);
10231     + *lp = entry_a;
10232     + *(lp+1) = entry_b;
10233     +}
10234    
10235     -#ifndef CONFIG_X86_NO_TSS
10236     -static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr)
10237     +#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
10238     +#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
10239     +#else
10240     +extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
10241     +extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
10242     +#endif
10243     +#ifndef CONFIG_X86_NO_IDT
10244     +#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
10245     +
10246     +static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
10247     {
10248     - _set_tssldt_desc(&get_cpu_gdt_table(cpu)[entry], (int)addr,
10249     - offsetof(struct tss_struct, __cacheline_filler) - 1, 0x89);
10250     + __u32 a, b;
10251     + pack_gate(&a, &b, (unsigned long)addr, seg, type, 0);
10252     + write_idt_entry(idt_table, gate, a, b);
10253     }
10254     +#endif
10255    
10256     -#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
10257     +#ifndef CONFIG_X86_NO_TSS
10258     +static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
10259     +{
10260     + __u32 a, b;
10261     + pack_descriptor(&a, &b, (unsigned long)addr,
10262     + offsetof(struct tss_struct, __cacheline_filler) - 1,
10263     + DESCTYPE_TSS, 0);
10264     + write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b);
10265     +}
10266     #endif
10267    
10268     -static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
10269     +static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries)
10270     {
10271     - _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
10272     + __u32 a, b;
10273     + pack_descriptor(&a, &b, (unsigned long)addr,
10274     + entries * sizeof(struct desc_struct) - 1,
10275     + DESCTYPE_LDT, 0);
10276     + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
10277     }
10278    
10279     +#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
10280     +
10281     #define LDT_entry_a(info) \
10282     ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
10283    
10284     @@ -103,21 +161,6 @@
10285     (info)->seg_not_present == 1 && \
10286     (info)->useable == 0 )
10287    
10288     -extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
10289     -
10290     -#if TLS_SIZE != 24
10291     -# error update this code.
10292     -#endif
10293     -
10294     -static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
10295     -{
10296     -#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \
10297     - *(u64 *)&t->tls_array[i])) \
10298     - BUG();
10299     - C(0); C(1); C(2);
10300     -#undef C
10301     -}
10302     -
10303     static inline void clear_LDT(void)
10304     {
10305     int cpu = get_cpu();
10306 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/dma-mapping_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/dma-mapping_64.h
10307     --- linux-2.6.25/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2008-05-23 20:51:11.000000000 +0200
10308     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/dma-mapping_64.h 2008-05-23 20:39:03.000000000 +0200
10309 niro 609 @@ -55,13 +55,6 @@
10310     extern struct dma_mapping_ops* dma_ops;
10311     extern int iommu_merge;
10312    
10313     -static inline int valid_dma_direction(int dma_direction)
10314     -{
10315     - return ((dma_direction == DMA_BIDIRECTIONAL) ||
10316     - (dma_direction == DMA_TO_DEVICE) ||
10317     - (dma_direction == DMA_FROM_DEVICE));
10318     -}
10319     -
10320     #if 0
10321     static inline int dma_mapping_error(dma_addr_t dma_addr)
10322     {
10323 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/e820_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/e820_64.h
10324     --- linux-2.6.25/include/asm-x86/mach-xen/asm/e820_64.h 2008-05-23 20:51:11.000000000 +0200
10325     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/e820_64.h 2008-05-23 20:39:03.000000000 +0200
10326 niro 609 @@ -19,13 +19,9 @@
10327    
10328     #define E820_RAM 1
10329     #define E820_RESERVED 2
10330     -#define E820_ACPI 3 /* usable as RAM once ACPI tables have been read */
10331     +#define E820_ACPI 3
10332     #define E820_NVS 4
10333    
10334     -#define HIGH_MEMORY (1024*1024)
10335     -
10336     -#define LOWMEMSIZE() (0x9f000)
10337     -
10338     #ifndef __ASSEMBLY__
10339     struct e820entry {
10340     u64 addr; /* start of memory segment */
10341     @@ -46,17 +42,16 @@
10342     extern void contig_e820_setup(void);
10343     extern unsigned long e820_end_of_ram(void);
10344     extern void e820_reserve_resources(struct e820entry *e820, int nr_map);
10345     +extern void e820_mark_nosave_regions(void);
10346     extern void e820_print_map(char *who);
10347     extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
10348     extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
10349    
10350     -extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end);
10351     extern void e820_setup_gap(struct e820entry *e820, int nr_map);
10352     -extern unsigned long e820_hole_size(unsigned long start_pfn,
10353     - unsigned long end_pfn);
10354     +extern void e820_register_active_regions(int nid,
10355     + unsigned long start_pfn, unsigned long end_pfn);
10356    
10357     -extern void __init parse_memopt(char *p, char **end);
10358     -extern void __init parse_memmapopt(char *p, char **end);
10359     +extern void finish_e820_parsing(void);
10360    
10361     extern struct e820map e820;
10362    
10363 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/fixmap_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/fixmap_32.h
10364     --- linux-2.6.25/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-05-23 20:51:11.000000000 +0200
10365     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-05-23 20:39:03.000000000 +0200
10366 niro 609 @@ -55,7 +55,7 @@
10367     #ifdef CONFIG_X86_LOCAL_APIC
10368     FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
10369     #endif
10370     -#ifdef CONFIG_X86_IO_APIC
10371     +#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_XEN)
10372     FIX_IO_APIC_BASE_0,
10373     FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
10374     #endif
10375     @@ -95,10 +95,9 @@
10376     __end_of_fixed_addresses
10377     };
10378    
10379     -extern void set_fixaddr_top(unsigned long top);
10380     -
10381     extern void __set_fixmap(enum fixed_addresses idx,
10382     maddr_t phys, pgprot_t flags);
10383     +extern void reserve_top_address(unsigned long reserve);
10384    
10385     #define set_fixmap(idx, phys) \
10386     __set_fixmap(idx, phys, PAGE_KERNEL)
10387 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/fixmap_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/fixmap_64.h
10388     --- linux-2.6.25/include/asm-x86/mach-xen/asm/fixmap_64.h 2008-05-23 20:51:11.000000000 +0200
10389     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/fixmap_64.h 2008-05-23 20:39:03.000000000 +0200
10390 niro 609 @@ -41,7 +41,7 @@
10391     #ifdef CONFIG_X86_LOCAL_APIC
10392     FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
10393     #endif
10394     -#ifdef CONFIG_X86_IO_APIC
10395     +#ifndef CONFIG_XEN
10396     FIX_IO_APIC_BASE_0,
10397     FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
10398     #endif
10399 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/hw_irq_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/hw_irq_32.h
10400     --- linux-2.6.25/include/asm-x86/mach-xen/asm/hw_irq_32.h 2008-05-23 20:51:11.000000000 +0200
10401     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/hw_irq_32.h 2008-05-23 20:39:03.000000000 +0200
10402 niro 609 @@ -17,8 +17,6 @@
10403     #include <asm/irq.h>
10404     #include <asm/sections.h>
10405    
10406     -struct hw_interrupt_type;
10407     -
10408     #define NMI_VECTOR 0x02
10409    
10410     /*
10411     @@ -28,10 +26,6 @@
10412     * Interrupt entry/exit code at both C and assembly level
10413     */
10414    
10415     -extern u8 irq_vector[NR_IRQ_VECTORS];
10416     -#define IO_APIC_VECTOR(irq) (irq_vector[irq])
10417     -#define AUTO_ASSIGN -1
10418     -
10419     extern void (*interrupt[NR_IRQS])(void);
10420    
10421     #ifdef CONFIG_SMP
10422     @@ -44,7 +38,7 @@
10423     fastcall void apic_timer_interrupt(void);
10424     fastcall void error_interrupt(void);
10425     fastcall void spurious_interrupt(void);
10426     -fastcall void thermal_interrupt(struct pt_regs *);
10427     +fastcall void thermal_interrupt(void);
10428     #define platform_legacy_irq(irq) ((irq) < 16)
10429     #endif
10430    
10431 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/hw_irq_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/hw_irq_64.h
10432     --- linux-2.6.25/include/asm-x86/mach-xen/asm/hw_irq_64.h 2008-05-23 20:51:11.000000000 +0200
10433     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/hw_irq_64.h 2008-05-23 20:39:03.000000000 +0200
10434 niro 609 @@ -19,8 +19,7 @@
10435     #include <asm/irq.h>
10436     #include <linux/profile.h>
10437     #include <linux/smp.h>
10438     -
10439     -struct hw_interrupt_type;
10440     +#include <linux/percpu.h>
10441     #endif
10442    
10443     #define NMI_VECTOR 0x02
10444     @@ -77,9 +76,10 @@
10445    
10446    
10447     #ifndef __ASSEMBLY__
10448     -extern u8 irq_vector[NR_IRQ_VECTORS];
10449     -#define IO_APIC_VECTOR(irq) (irq_vector[irq])
10450     -#define AUTO_ASSIGN -1
10451     +typedef int vector_irq_t[NR_VECTORS];
10452     +DECLARE_PER_CPU(vector_irq_t, vector_irq);
10453     +extern void __setup_vector_irq(int cpu);
10454     +extern spinlock_t vector_lock;
10455    
10456     /*
10457     * Various low-level irq details needed by irq.c, process.c,
10458 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/io_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/io_32.h
10459     --- linux-2.6.25/include/asm-x86/mach-xen/asm/io_32.h 2008-05-23 20:51:11.000000000 +0200
10460     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/io_32.h 2008-05-23 20:39:03.000000000 +0200
10461 niro 609 @@ -237,33 +237,6 @@
10462    
10463     #define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(b),(c),(d))
10464    
10465     -/**
10466     - * check_signature - find BIOS signatures
10467     - * @io_addr: mmio address to check
10468     - * @signature: signature block
10469     - * @length: length of signature
10470     - *
10471     - * Perform a signature comparison with the mmio address io_addr. This
10472     - * address should have been obtained by ioremap.
10473     - * Returns 1 on a match.
10474     - */
10475     -
10476     -static inline int check_signature(volatile void __iomem * io_addr,
10477     - const unsigned char *signature, int length)
10478     -{
10479     - int retval = 0;
10480     - do {
10481     - if (readb(io_addr) != *signature)
10482     - goto out;
10483     - io_addr++;
10484     - signature++;
10485     - length--;
10486     - } while (length);
10487     - retval = 1;
10488     -out:
10489     - return retval;
10490     -}
10491     -
10492     /*
10493     * Cache management
10494     *
10495 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/io_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/io_64.h
10496     --- linux-2.6.25/include/asm-x86/mach-xen/asm/io_64.h 2008-05-23 20:51:11.000000000 +0200
10497     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/io_64.h 2008-05-23 20:39:03.000000000 +0200
10498 niro 609 @@ -273,33 +273,6 @@
10499    
10500     #define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(b),(c),(d))
10501    
10502     -/**
10503     - * check_signature - find BIOS signatures
10504     - * @io_addr: mmio address to check
10505     - * @signature: signature block
10506     - * @length: length of signature
10507     - *
10508     - * Perform a signature comparison with the mmio address io_addr. This
10509     - * address should have been obtained by ioremap.
10510     - * Returns 1 on a match.
10511     - */
10512     -
10513     -static inline int check_signature(void __iomem *io_addr,
10514     - const unsigned char *signature, int length)
10515     -{
10516     - int retval = 0;
10517     - do {
10518     - if (readb(io_addr) != *signature)
10519     - goto out;
10520     - io_addr++;
10521     - signature++;
10522     - length--;
10523     - } while (length);
10524     - retval = 1;
10525     -out:
10526     - return retval;
10527     -}
10528     -
10529     /* Nothing to do */
10530    
10531     #define dma_cache_inv(_start,_size) do { } while (0)
10532 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable-2level.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable-2level.h
10533     --- linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable-2level.h 2008-05-23 20:51:11.000000000 +0200
10534     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable-2level.h 2008-05-23 20:39:03.000000000 +0200
10535 niro 609 @@ -23,14 +23,6 @@
10536     set_pte((ptep), (pteval)); \
10537     } while (0)
10538    
10539     -#define set_pte_at_sync(_mm,addr,ptep,pteval) do { \
10540     - if (((_mm) != current->mm && (_mm) != &init_mm) || \
10541     - HYPERVISOR_update_va_mapping((addr), (pteval), UVMF_INVLPG)) { \
10542     - set_pte((ptep), (pteval)); \
10543     - xen_invlpg((addr)); \
10544     - } \
10545     -} while (0)
10546     -
10547     #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
10548    
10549     #define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval))
10550     @@ -40,6 +32,7 @@
10551    
10552     #define pte_none(x) (!(x).pte_low)
10553    
10554     +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
10555     static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
10556     {
10557     pte_t pte = *ptep;
10558     @@ -51,6 +44,7 @@
10559     return pte;
10560     }
10561    
10562     +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
10563     #define ptep_clear_flush(vma, addr, ptep) \
10564     ({ \
10565     pte_t *__ptep = (ptep); \
10566     @@ -66,8 +60,6 @@
10567     __res; \
10568     })
10569    
10570     -#define pte_same(a, b) ((a).pte_low == (b).pte_low)
10571     -
10572     #define __pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
10573     #define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \
10574     __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte)))
10575 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable_32.h
10576     --- linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-05-23 20:51:11.000000000 +0200
10577     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-05-23 20:39:03.000000000 +0200
10578 niro 609 @@ -260,31 +260,89 @@
10579     # include <asm/pgtable-2level.h>
10580     #endif
10581    
10582     -#define ptep_test_and_clear_dirty(vma, addr, ptep) \
10583     +/*
10584     + * Rules for using pte_update - it must be called after any PTE update which
10585     + * has not been done using the set_pte / clear_pte interfaces. It is used by
10586     + * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE
10587     + * updates should either be sets, clears, or set_pte_atomic for P->P
10588     + * transitions, which means this hook should only be called for user PTEs.
10589     + * This hook implies a P->P protection or access change has taken place, which
10590     + * requires a subsequent TLB flush. The notification can optionally be delayed
10591     + * until the TLB flush event by using the pte_update_defer form of the
10592     + * interface, but care must be taken to assure that the flush happens while
10593     + * still holding the same page table lock so that the shadow and primary pages
10594     + * do not become out of sync on SMP.
10595     + */
10596     +#define pte_update(mm, addr, ptep) do { } while (0)
10597     +#define pte_update_defer(mm, addr, ptep) do { } while (0)
10598     +
10599     +
10600     +/*
10601     + * We only update the dirty/accessed state if we set
10602     + * the dirty bit by hand in the kernel, since the hardware
10603     + * will do the accessed bit for us, and we don't want to
10604     + * race with other CPU's that might be updating the dirty
10605     + * bit at the same time.
10606     + */
10607     +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
10608     +#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
10609     +do { \
10610     + if (dirty) \
10611     + ptep_establish(vma, address, ptep, entry); \
10612     +} while (0)
10613     +
10614     +/*
10615     + * We don't actually have these, but we want to advertise them so that
10616     + * we can encompass the flush here.
10617     + */
10618     +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
10619     +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
10620     +
10621     +/*
10622     + * Rules for using ptep_establish: the pte MUST be a user pte, and
10623     + * must be a present->present transition.
10624     + */
10625     +#define __HAVE_ARCH_PTEP_ESTABLISH
10626     +#define ptep_establish(vma, address, ptep, pteval) \
10627     +do { \
10628     + if ( likely((vma)->vm_mm == current->mm) ) { \
10629     + BUG_ON(HYPERVISOR_update_va_mapping(address, \
10630     + pteval, \
10631     + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
10632     + UVMF_INVLPG|UVMF_MULTI)); \
10633     + } else { \
10634     + xen_l1_entry_update(ptep, pteval); \
10635     + flush_tlb_page(vma, address); \
10636     + } \
10637     +} while (0)
10638     +
10639     +#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
10640     +#define ptep_clear_flush_dirty(vma, address, ptep) \
10641     ({ \
10642     pte_t __pte = *(ptep); \
10643     - int __ret = pte_dirty(__pte); \
10644     - if (__ret) { \
10645     - __pte = pte_mkclean(__pte); \
10646     - if ((vma)->vm_mm != current->mm || \
10647     - HYPERVISOR_update_va_mapping(addr, __pte, 0)) \
10648     - (ptep)->pte_low = __pte.pte_low; \
10649     - } \
10650     - __ret; \
10651     + int __dirty = pte_dirty(__pte); \
10652     + __pte = pte_mkclean(__pte); \
10653     + if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
10654     + ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
10655     + else if (__dirty) \
10656     + (ptep)->pte_low = __pte.pte_low; \
10657     + __dirty; \
10658     })
10659    
10660     -#define ptep_test_and_clear_young(vma, addr, ptep) \
10661     +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
10662     +#define ptep_clear_flush_young(vma, address, ptep) \
10663     ({ \
10664     pte_t __pte = *(ptep); \
10665     - int __ret = pte_young(__pte); \
10666     - if (__ret) \
10667     - __pte = pte_mkold(__pte); \
10668     - if ((vma)->vm_mm != current->mm || \
10669     - HYPERVISOR_update_va_mapping(addr, __pte, 0)) \
10670     - (ptep)->pte_low = __pte.pte_low; \
10671     - __ret; \
10672     + int __young = pte_young(__pte); \
10673     + __pte = pte_mkold(__pte); \
10674     + if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
10675     + ptep_set_access_flags(vma, address, ptep, __pte, __young); \
10676     + else if (__young) \
10677     + (ptep)->pte_low = __pte.pte_low; \
10678     + __young; \
10679     })
10680    
10681     +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
10682     #define ptep_get_and_clear_full(mm, addr, ptep, full) \
10683     ((full) ? ({ \
10684     pte_t __res = *(ptep); \
10685     @@ -296,6 +354,7 @@
10686     }) : \
10687     ptep_get_and_clear(mm, addr, ptep))
10688    
10689     +#define __HAVE_ARCH_PTEP_SET_WRPROTECT
10690     static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
10691     {
10692     pte_t pte = *ptep;
10693     @@ -391,11 +450,11 @@
10694     #define pte_index(address) \
10695     (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
10696     #define pte_offset_kernel(dir, address) \
10697     - ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address))
10698     + ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
10699    
10700     #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
10701    
10702     -#define pmd_page_kernel(pmd) \
10703     +#define pmd_page_vaddr(pmd) \
10704     ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
10705    
10706     /*
10707     @@ -418,8 +477,6 @@
10708     static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
10709     #endif
10710    
10711     -extern void noexec_setup(const char *str);
10712     -
10713     #if defined(CONFIG_HIGHPTE)
10714     #define pte_offset_map(dir, address) \
10715     ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \
10716     @@ -437,37 +494,17 @@
10717     #define pte_unmap_nested(pte) do { } while (0)
10718     #endif
10719    
10720     -#define __HAVE_ARCH_PTEP_ESTABLISH
10721     -#define ptep_establish(vma, address, ptep, pteval) \
10722     - do { \
10723     - if ( likely((vma)->vm_mm == current->mm) ) { \
10724     - BUG_ON(HYPERVISOR_update_va_mapping(address, \
10725     - pteval, \
10726     - (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
10727     - UVMF_INVLPG|UVMF_MULTI)); \
10728     - } else { \
10729     - xen_l1_entry_update(ptep, pteval); \
10730     - flush_tlb_page(vma, address); \
10731     - } \
10732     - } while (0)
10733     +/* Clear a kernel PTE and flush it from the TLB */
10734     +#define kpte_clear_flush(ptep, vaddr) do { \
10735     + if (HYPERVISOR_update_va_mapping(vaddr, __pte(0), UVMF_INVLPG)) \
10736     + BUG(); \
10737     +} while (0)
10738    
10739     /*
10740     * The i386 doesn't have any external MMU info: the kernel page
10741     * tables contain all the necessary information.
10742     - *
10743     - * Also, we only update the dirty/accessed state if we set
10744     - * the dirty bit by hand in the kernel, since the hardware
10745     - * will do the accessed bit for us, and we don't want to
10746     - * race with other CPU's that might be updating the dirty
10747     - * bit at the same time.
10748     */
10749     #define update_mmu_cache(vma,address,pte) do { } while (0)
10750     -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
10751     -#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
10752     - do { \
10753     - if (dirty) \
10754     - ptep_establish(vma, address, ptep, entry); \
10755     - } while (0)
10756    
10757     #include <xen/features.h>
10758     void make_lowmem_page_readonly(void *va, unsigned int feature);
10759     @@ -516,10 +553,11 @@
10760     unsigned long size);
10761    
10762     int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
10763     - unsigned long addr, unsigned long end, pgprot_t newprot);
10764     + unsigned long addr, unsigned long end, pgprot_t newprot,
10765     + int dirty_accountable);
10766    
10767     -#define arch_change_pte_range(mm, pmd, addr, end, newprot) \
10768     - xen_change_pte_range(mm, pmd, addr, end, newprot)
10769     +#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
10770     + xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
10771    
10772     #define io_remap_pfn_range(vma,from,pfn,size,prot) \
10773     direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
10774     @@ -528,13 +566,6 @@
10775     #define GET_IOSPACE(pfn) 0
10776     #define GET_PFN(pfn) (pfn)
10777    
10778     -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
10779     -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
10780     -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
10781     -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
10782     -#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
10783     -#define __HAVE_ARCH_PTEP_SET_WRPROTECT
10784     -#define __HAVE_ARCH_PTE_SAME
10785     #include <asm-generic/pgtable.h>
10786    
10787     #endif /* _I386_PGTABLE_H */
10788 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable-3level.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable-3level.h
10789     --- linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-05-23 20:51:11.000000000 +0200
10790     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-05-23 20:39:03.000000000 +0200
10791     @@ -53,7 +53,6 @@
10792     * not possible, use pte_get_and_clear to obtain the old pte
10793     * value and then use set_pte to update it. -ben
10794     */
10795     -#define __HAVE_ARCH_SET_PTE_ATOMIC
10796    
10797     static inline void set_pte(pte_t *ptep, pte_t pte)
10798     {
10799     @@ -70,14 +69,6 @@
10800     set_pte((ptep), (pteval)); \
10801     } while (0)
10802    
10803     -#define set_pte_at_sync(_mm,addr,ptep,pteval) do { \
10804     - if (((_mm) != current->mm && (_mm) != &init_mm) || \
10805     - HYPERVISOR_update_va_mapping((addr), (pteval), UVMF_INVLPG)) { \
10806     - set_pte((ptep), (pteval)); \
10807     - xen_invlpg((addr)); \
10808     - } \
10809     -} while (0)
10810     -
10811     #define set_pmd(pmdptr,pmdval) \
10812     xen_l2_entry_update((pmdptr), (pmdval))
10813     #define set_pud(pudptr,pudval) \
10814     @@ -94,7 +85,7 @@
10815     #define pud_page(pud) \
10816     ((struct page *) __va(pud_val(pud) & PAGE_MASK))
10817    
10818     -#define pud_page_kernel(pud) \
10819     +#define pud_page_vaddr(pud) \
10820     ((unsigned long) __va(pud_val(pud) & PAGE_MASK))
10821    
10822    
10823     @@ -124,6 +115,7 @@
10824    
10825     #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
10826    
10827     +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
10828     static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
10829     {
10830     pte_t pte = *ptep;
10831     @@ -142,6 +134,7 @@
10832     return pte;
10833     }
10834    
10835     +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
10836     #define ptep_clear_flush(vma, addr, ptep) \
10837     ({ \
10838     pte_t *__ptep = (ptep); \
10839     @@ -159,6 +152,7 @@
10840     __res; \
10841     })
10842    
10843     +#define __HAVE_ARCH_PTE_SAME
10844     static inline int pte_same(pte_t a, pte_t b)
10845     {
10846     return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
10847     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable_64.h
10848     --- linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-05-23 20:51:11.000000000 +0200
10849     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-05-23 20:39:03.000000000 +0200
10850 niro 609 @@ -43,12 +43,9 @@
10851    
10852     #define swapper_pg_dir init_level4_pgt
10853    
10854     -extern int nonx_setup(char *str);
10855     extern void paging_init(void);
10856     extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
10857    
10858     -extern unsigned long pgkern_mask;
10859     -
10860     /*
10861     * ZERO_PAGE is a global shared page that is always zero: used
10862     * for zero-mapped memory areas etc..
10863     @@ -118,9 +115,6 @@
10864     set_pgd(__user_pgd(pgd), __pgd(0));
10865     }
10866    
10867     -#define pud_page(pud) \
10868     - ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
10869     -
10870     #define pte_same(a, b) ((a).pte == (b).pte)
10871    
10872     #define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK))
10873     @@ -332,7 +326,7 @@
10874     #define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT)
10875     static inline int pte_user(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
10876     static inline int pte_read(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
10877     -static inline int pte_exec(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
10878     +static inline int pte_exec(pte_t pte) { return !(__pte_val(pte) & _PAGE_NX); }
10879     static inline int pte_dirty(pte_t pte) { return __pte_val(pte) & _PAGE_DIRTY; }
10880     static inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED; }
10881     static inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; }
10882     @@ -345,29 +339,12 @@
10883     static inline pte_t pte_mkold(pte_t pte) { __pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
10884     static inline pte_t pte_wrprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_RW; return pte; }
10885     static inline pte_t pte_mkread(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
10886     -static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
10887     +static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) &= ~_PAGE_NX; return pte; }
10888     static inline pte_t pte_mkdirty(pte_t pte) { __pte_val(pte) |= _PAGE_DIRTY; return pte; }
10889     static inline pte_t pte_mkyoung(pte_t pte) { __pte_val(pte) |= _PAGE_ACCESSED; return pte; }
10890     static inline pte_t pte_mkwrite(pte_t pte) { __pte_val(pte) |= _PAGE_RW; return pte; }
10891     static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; }
10892     -
10893     -#define ptep_test_and_clear_dirty(vma, addr, ptep) \
10894     -({ \
10895     - pte_t __pte = *(ptep); \
10896     - int __ret = pte_dirty(__pte); \
10897     - if (__ret) \
10898     - set_pte_at((vma)->vm_mm, addr, ptep, pte_mkclean(__pte)); \
10899     - __ret; \
10900     -})
10901     -
10902     -#define ptep_test_and_clear_young(vma, addr, ptep) \
10903     -({ \
10904     - pte_t __pte = *(ptep); \
10905     - int __ret = pte_young(__pte); \
10906     - if (__ret) \
10907     - set_pte_at((vma)->vm_mm, addr, ptep, pte_mkold(__pte)); \
10908     - __ret; \
10909     -})
10910     +static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; }
10911    
10912     static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
10913     {
10914     @@ -395,7 +372,8 @@
10915     * Level 4 access.
10916     * Never use these in the common code.
10917     */
10918     -#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
10919     +#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
10920     +#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT))
10921     #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
10922     #define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
10923     #define pgd_offset_k(address) (init_level4_pgt + pgd_index(address))
10924     @@ -404,16 +382,18 @@
10925    
10926     /* PUD - Level3 access */
10927     /* to find an entry in a page-table-directory. */
10928     +#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
10929     +#define pud_page(pud) (pfn_to_page(pud_val(pud) >> PAGE_SHIFT))
10930     #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
10931     -#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address))
10932     +#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address))
10933     #define pud_present(pud) (__pud_val(pud) & _PAGE_PRESENT)
10934    
10935     /* PMD - Level 2 access */
10936     -#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
10937     +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
10938     #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
10939    
10940     #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
10941     -#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
10942     +#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \
10943     pmd_index(address))
10944     #define pmd_none(x) (!__pmd_val(x))
10945     #if CONFIG_XEN_COMPAT <= 0x030002
10946     @@ -444,6 +424,7 @@
10947     {
10948     unsigned long pteval;
10949     pteval = physpage | pgprot_val(pgprot);
10950     + pteval &= __supported_pte_mask;
10951     return __pte(pteval);
10952     }
10953    
10954     @@ -465,7 +446,7 @@
10955    
10956     #define pte_index(address) \
10957     (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
10958     -#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
10959     +#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
10960     pte_index(address))
10961    
10962     /* x86-64 always has all page tables mapped. */
10963     @@ -506,6 +487,40 @@
10964     ptep_establish(vma, address, ptep, entry); \
10965     } while (0)
10966    
10967     +
10968     +/*
10969     + * i386 says: We don't actually have these, but we want to advertise
10970     + * them so that we can encompass the flush here.
10971     + */
10972     +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
10973     +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
10974     +
10975     +#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
10976     +#define ptep_clear_flush_dirty(vma, address, ptep) \
10977     +({ \
10978     + pte_t __pte = *(ptep); \
10979     + int __dirty = pte_dirty(__pte); \
10980     + __pte = pte_mkclean(__pte); \
10981     + if ((vma)->vm_mm->context.pinned) \
10982     + ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
10983     + else if (__dirty) \
10984     + set_pte(ptep, __pte); \
10985     + __dirty; \
10986     +})
10987     +
10988     +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
10989     +#define ptep_clear_flush_young(vma, address, ptep) \
10990     +({ \
10991     + pte_t __pte = *(ptep); \
10992     + int __young = pte_young(__pte); \
10993     + __pte = pte_mkold(__pte); \
10994     + if ((vma)->vm_mm->context.pinned) \
10995     + ptep_set_access_flags(vma, address, ptep, __pte, __young); \
10996     + else if (__young) \
10997     + set_pte(ptep, __pte); \
10998     + __young; \
10999     +})
11000     +
11001     /* Encode and de-code a swap entry */
11002     #define __swp_type(x) (((x).val >> 1) & 0x3f)
11003     #define __swp_offset(x) ((x).val >> 8)
11004     @@ -547,10 +562,11 @@
11005     unsigned long size);
11006    
11007     int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
11008     - unsigned long addr, unsigned long end, pgprot_t newprot);
11009     + unsigned long addr, unsigned long end, pgprot_t newprot,
11010     + int dirty_accountable);
11011    
11012     -#define arch_change_pte_range(mm, pmd, addr, end, newprot) \
11013     - xen_change_pte_range(mm, pmd, addr, end, newprot)
11014     +#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
11015     + xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
11016    
11017     #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
11018     direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
11019     @@ -572,8 +588,6 @@
11020     #define kc_offset_to_vaddr(o) \
11021     (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
11022    
11023     -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
11024     -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
11025     #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
11026     #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
11027     #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
11028 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/processor_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/processor_32.h
11029     --- linux-2.6.25/include/asm-x86/mach-xen/asm/processor_32.h 2008-05-23 20:51:22.000000000 +0200
11030     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/processor_32.h 2008-05-23 20:39:03.000000000 +0200
11031 niro 609 @@ -146,6 +146,18 @@
11032     #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
11033     #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
11034    
11035     +static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
11036     + unsigned int *ecx, unsigned int *edx)
11037     +{
11038     + /* ecx is often an input as well as an output. */
11039     + __asm__(XEN_CPUID
11040     + : "=a" (*eax),
11041     + "=b" (*ebx),
11042     + "=c" (*ecx),
11043     + "=d" (*edx)
11044     + : "0" (*eax), "2" (*ecx));
11045     +}
11046     +
11047     /*
11048     * Generic CPUID function
11049     * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
11050     @@ -153,24 +165,18 @@
11051     */
11052     static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
11053     {
11054     - __asm__(XEN_CPUID
11055     - : "=a" (*eax),
11056     - "=b" (*ebx),
11057     - "=c" (*ecx),
11058     - "=d" (*edx)
11059     - : "0" (op), "c"(0));
11060     + *eax = op;
11061     + *ecx = 0;
11062     + __cpuid(eax, ebx, ecx, edx);
11063     }
11064    
11065     /* Some CPUID calls want 'count' to be placed in ecx */
11066     static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
11067     - int *edx)
11068     + int *edx)
11069     {
11070     - __asm__(XEN_CPUID
11071     - : "=a" (*eax),
11072     - "=b" (*ebx),
11073     - "=c" (*ecx),
11074     - "=d" (*edx)
11075     - : "0" (op), "c" (count));
11076     + *eax = op;
11077     + *ecx = count;
11078     + __cpuid(eax, ebx, ecx, edx);
11079     }
11080    
11081     /*
11082     @@ -178,42 +184,30 @@
11083     */
11084     static inline unsigned int cpuid_eax(unsigned int op)
11085     {
11086     - unsigned int eax;
11087     + unsigned int eax, ebx, ecx, edx;
11088    
11089     - __asm__(XEN_CPUID
11090     - : "=a" (eax)
11091     - : "0" (op)
11092     - : "bx", "cx", "dx");
11093     + cpuid(op, &eax, &ebx, &ecx, &edx);
11094     return eax;
11095     }
11096     static inline unsigned int cpuid_ebx(unsigned int op)
11097     {
11098     - unsigned int eax, ebx;
11099     + unsigned int eax, ebx, ecx, edx;
11100    
11101     - __asm__(XEN_CPUID
11102     - : "=a" (eax), "=b" (ebx)
11103     - : "0" (op)
11104     - : "cx", "dx" );
11105     + cpuid(op, &eax, &ebx, &ecx, &edx);
11106     return ebx;
11107     }
11108     static inline unsigned int cpuid_ecx(unsigned int op)
11109     {
11110     - unsigned int eax, ecx;
11111     + unsigned int eax, ebx, ecx, edx;
11112    
11113     - __asm__(XEN_CPUID
11114     - : "=a" (eax), "=c" (ecx)
11115     - : "0" (op)
11116     - : "bx", "dx" );
11117     + cpuid(op, &eax, &ebx, &ecx, &edx);
11118     return ecx;
11119     }
11120     static inline unsigned int cpuid_edx(unsigned int op)
11121     {
11122     - unsigned int eax, edx;
11123     + unsigned int eax, ebx, ecx, edx;
11124    
11125     - __asm__(XEN_CPUID
11126     - : "=a" (eax), "=d" (edx)
11127     - : "0" (op)
11128     - : "bx", "cx");
11129     + cpuid(op, &eax, &ebx, &ecx, &edx);
11130     return edx;
11131     }
11132    
11133     @@ -315,6 +309,8 @@
11134     : :"a" (eax), "c" (ecx));
11135     }
11136    
11137     +extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
11138     +
11139     /* from system description table in BIOS. Mostly for MCA use, but
11140     others may find it useful. */
11141     extern unsigned int machine_id;
11142 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/processor_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/processor_64.h
11143     --- linux-2.6.25/include/asm-x86/mach-xen/asm/processor_64.h 2008-05-23 20:51:11.000000000 +0200
11144     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/processor_64.h 2008-05-23 20:39:03.000000000 +0200
11145 niro 609 @@ -484,6 +484,8 @@
11146     : :"a" (eax), "c" (ecx));
11147     }
11148    
11149     +extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
11150     +
11151     #define stack_current() \
11152     ({ \
11153     struct thread_info *ti; \
11154 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/segment_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/segment_32.h
11155     --- linux-2.6.25/include/asm-x86/mach-xen/asm/segment_32.h 2008-05-23 20:51:11.000000000 +0200
11156     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/segment_32.h 2008-05-23 20:39:03.000000000 +0200
11157 niro 609 @@ -61,11 +61,9 @@
11158    
11159     #define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0)
11160     #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
11161     -#define GET_KERNEL_CS() (__KERNEL_CS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) )
11162    
11163     #define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1)
11164     #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
11165     -#define GET_KERNEL_DS() (__KERNEL_DS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) )
11166    
11167     #define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4)
11168     #define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5)
11169     @@ -85,6 +83,11 @@
11170    
11171     #define GDT_SIZE (GDT_ENTRIES * 8)
11172    
11173     +/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
11174     +#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
11175     +/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
11176     +#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
11177     +
11178     /* Simple and small GDT entries for booting only */
11179    
11180     #define GDT_ENTRY_BOOT_CS 2
11181     @@ -114,4 +117,16 @@
11182     */
11183     #define IDT_ENTRIES 256
11184    
11185     +/* Bottom two bits of selector give the ring privilege level */
11186     +#define SEGMENT_RPL_MASK 0x3
11187     +/* Bit 2 is table indicator (LDT/GDT) */
11188     +#define SEGMENT_TI_MASK 0x4
11189     +
11190     +/* User mode is privilege level 3 */
11191     +#define USER_RPL 0x3
11192     +/* LDT segment has TI set, GDT has it cleared */
11193     +#define SEGMENT_LDT 0x4
11194     +#define SEGMENT_GDT 0x0
11195     +
11196     +#define get_kernel_rpl() (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1)
11197     #endif
11198 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/smp_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/smp_32.h
11199     --- linux-2.6.25/include/asm-x86/mach-xen/asm/smp_32.h 2008-05-23 20:51:11.000000000 +0200
11200     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/smp_32.h 2008-05-23 20:39:03.000000000 +0200
11201 niro 609 @@ -79,25 +79,36 @@
11202     return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
11203     }
11204     #endif
11205     -
11206     -static __inline int logical_smp_processor_id(void)
11207     -{
11208     - /* we don't want to mark this access volatile - bad code generation */
11209     - return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
11210     -}
11211     -
11212     #endif
11213    
11214     +#define safe_smp_processor_id() smp_processor_id()
11215     extern int __cpu_disable(void);
11216     extern void __cpu_die(unsigned int cpu);
11217     extern void prefill_possible_map(void);
11218     +extern unsigned int num_processors;
11219     +
11220     #endif /* !__ASSEMBLY__ */
11221    
11222     #else /* CONFIG_SMP */
11223    
11224     +#define safe_smp_processor_id() 0
11225     #define cpu_physical_id(cpu) boot_cpu_physical_apicid
11226    
11227     #define NO_PROC_ID 0xFF /* No processor magic marker */
11228    
11229     #endif
11230     +
11231     +#ifndef __ASSEMBLY__
11232     +
11233     +extern u8 apicid_2_node[];
11234     +
11235     +#ifdef CONFIG_X86_LOCAL_APIC
11236     +static __inline int logical_smp_processor_id(void)
11237     +{
11238     + /* we don't want to mark this access volatile - bad code generation */
11239     + return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
11240     +}
11241     +#endif
11242     +#endif
11243     +
11244     #endif
11245 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/smp_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/smp_64.h
11246     --- linux-2.6.25/include/asm-x86/mach-xen/asm/smp_64.h 2008-05-23 20:51:11.000000000 +0200
11247     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/smp_64.h 2008-05-23 20:39:03.000000000 +0200
11248 niro 609 @@ -4,15 +4,12 @@
11249     /*
11250     * We need the APIC definitions automatically as part of 'smp.h'
11251     */
11252     -#ifndef __ASSEMBLY__
11253     #include <linux/threads.h>
11254     #include <linux/cpumask.h>
11255     #include <linux/bitops.h>
11256     extern int disable_apic;
11257     -#endif
11258    
11259     #ifdef CONFIG_X86_LOCAL_APIC
11260     -#ifndef __ASSEMBLY__
11261     #include <asm/fixmap.h>
11262     #include <asm/mpspec.h>
11263     #ifdef CONFIG_X86_IO_APIC
11264     @@ -21,10 +18,8 @@
11265     #include <asm/apic.h>
11266     #include <asm/thread_info.h>
11267     #endif
11268     -#endif
11269    
11270     #ifdef CONFIG_SMP
11271     -#ifndef ASSEMBLY
11272    
11273     #include <asm/pda.h>
11274    
11275     @@ -41,14 +36,11 @@
11276    
11277     extern void smp_alloc_memory(void);
11278     extern volatile unsigned long smp_invalidate_needed;
11279     -extern int pic_mode;
11280     extern void lock_ipi_call_lock(void);
11281     extern void unlock_ipi_call_lock(void);
11282     extern int smp_num_siblings;
11283     extern void smp_send_reschedule(int cpu);
11284     void smp_stop_cpu(void);
11285     -extern int smp_call_function_single(int cpuid, void (*func) (void *info),
11286     - void *info, int retry, int wait);
11287    
11288     extern cpumask_t cpu_sibling_map[NR_CPUS];
11289     extern cpumask_t cpu_core_map[NR_CPUS];
11290     @@ -77,20 +69,16 @@
11291     }
11292     #endif
11293    
11294     -extern int safe_smp_processor_id(void);
11295     extern int __cpu_disable(void);
11296     extern void __cpu_die(unsigned int cpu);
11297     extern void prefill_possible_map(void);
11298     extern unsigned num_processors;
11299     extern unsigned disabled_cpus;
11300    
11301     -#endif /* !ASSEMBLY */
11302     -
11303     #define NO_PROC_ID 0xFF /* No processor magic marker */
11304    
11305     #endif
11306    
11307     -#ifndef ASSEMBLY
11308     /*
11309     * Some lowlevel functions might want to know about
11310     * the real APIC ID <-> CPU # mapping.
11311     @@ -114,11 +102,8 @@
11312     }
11313     #endif
11314    
11315     -#endif /* !ASSEMBLY */
11316     -
11317     #ifndef CONFIG_SMP
11318     #define stack_smp_processor_id() 0
11319     -#define safe_smp_processor_id() 0
11320     #define cpu_logical_map(x) (x)
11321     #else
11322     #include <asm/thread_info.h>
11323     @@ -130,7 +115,6 @@
11324     })
11325     #endif
11326    
11327     -#ifndef __ASSEMBLY__
11328     #ifdef CONFIG_X86_LOCAL_APIC
11329     static __inline int logical_smp_processor_id(void)
11330     {
11331     @@ -138,13 +122,18 @@
11332     return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
11333     }
11334     #endif
11335     -#endif
11336    
11337     #ifdef CONFIG_SMP
11338     #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
11339     #else
11340     #define cpu_physical_id(cpu) boot_cpu_id
11341     -#endif
11342     -
11343     +static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
11344     + void *info, int retry, int wait)
11345     +{
11346     + /* Disable interrupts here? */
11347     + func(info);
11348     + return 0;
11349     +}
11350     +#endif /* !CONFIG_SMP */
11351     #endif
11352    
11353 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/system_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/system_32.h
11354     --- linux-2.6.25/include/asm-x86/mach-xen/asm/system_32.h 2008-05-23 20:51:11.000000000 +0200
11355     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/system_32.h 2008-05-23 20:39:03.000000000 +0200
11356 niro 609 @@ -267,6 +267,9 @@
11357     #define cmpxchg(ptr,o,n)\
11358     ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
11359     (unsigned long)(n),sizeof(*(ptr))))
11360     +#define sync_cmpxchg(ptr,o,n)\
11361     + ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
11362     + (unsigned long)(n),sizeof(*(ptr))))
11363     #endif
11364    
11365     static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
11366 niro 611 @@ -296,6 +299,39 @@
11367     return old;
11368     }
11369    
11370 niro 609 +/*
11371     + * Always use locked operations when touching memory shared with a
11372     + * hypervisor, since the system may be SMP even if the guest kernel
11373     + * isn't.
11374     + */
11375     +static inline unsigned long __sync_cmpxchg(volatile void *ptr,
11376     + unsigned long old,
11377     + unsigned long new, int size)
11378     +{
11379     + unsigned long prev;
11380     + switch (size) {
11381     + case 1:
11382     + __asm__ __volatile__("lock; cmpxchgb %b1,%2"
11383     + : "=a"(prev)
11384     + : "q"(new), "m"(*__xg(ptr)), "0"(old)
11385     + : "memory");
11386     + return prev;
11387     + case 2:
11388     + __asm__ __volatile__("lock; cmpxchgw %w1,%2"
11389     + : "=a"(prev)
11390     + : "r"(new), "m"(*__xg(ptr)), "0"(old)
11391     + : "memory");
11392     + return prev;
11393     + case 4:
11394     + __asm__ __volatile__("lock; cmpxchgl %1,%2"
11395     + : "=a"(prev)
11396     + : "r"(new), "m"(*__xg(ptr)), "0"(old)
11397     + : "memory");
11398 niro 611 + return prev;
11399     + }
11400     + return old;
11401     +}
11402     +
11403     #ifndef CONFIG_X86_CMPXCHG
11404     /*
11405     * Building a kernel capable running on 80386. It may be necessary to
11406     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/system_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/system_64.h
11407     --- linux-2.6.25/include/asm-x86/mach-xen/asm/system_64.h 2008-05-23 20:51:11.000000000 +0200
11408     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/system_64.h 2008-05-23 20:39:03.000000000 +0200
11409 niro 609 @@ -24,6 +24,7 @@
11410     #define __EXTRA_CLOBBER \
11411     ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15"
11412    
11413     +/* Save restore flags to clear handle leaking NT */
11414     #define switch_to(prev,next,last) \
11415     asm volatile(SAVE_CONTEXT \
11416     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
11417 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/tlbflush_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/tlbflush_32.h
11418     --- linux-2.6.25/include/asm-x86/mach-xen/asm/tlbflush_32.h 2008-05-23 20:51:11.000000000 +0200
11419     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/tlbflush_32.h 2008-05-23 20:39:03.000000000 +0200
11420 niro 609 @@ -8,8 +8,6 @@
11421     #define __flush_tlb_global() xen_tlb_flush()
11422     #define __flush_tlb_all() xen_tlb_flush()
11423    
11424     -extern unsigned long pgkern_mask;
11425     -
11426     #define cpu_has_invlpg (boot_cpu_data.x86 > 3)
11427    
11428     #define __flush_tlb_single(addr) xen_invlpg(addr)
11429 niro 611 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/tlbflush_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/tlbflush_64.h
11430     --- linux-2.6.25/include/asm-x86/mach-xen/asm/tlbflush_64.h 2008-05-23 20:51:11.000000000 +0200
11431     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/tlbflush_64.h 2008-05-23 20:39:03.000000000 +0200
11432 niro 609 @@ -12,9 +12,6 @@
11433     */
11434     #define __flush_tlb_global() xen_tlb_flush()
11435    
11436     -
11437     -extern unsigned long pgkern_mask;
11438     -
11439     #define __flush_tlb_all() __flush_tlb_global()
11440    
11441     #define __flush_tlb_one(addr) xen_invlpg((unsigned long)addr)
11442 niro 611 diff -Naur linux-2.6.25/include/asm-x86/thread_info_64.h linux-2.6.25-xen/include/asm-x86/thread_info_64.h
11443     --- linux-2.6.25/include/asm-x86/thread_info_64.h 2008-04-17 04:49:44.000000000 +0200
11444     +++ linux-2.6.25-xen/include/asm-x86/thread_info_64.h 2008-05-23 20:39:03.000000000 +0200
11445 niro 609 @@ -157,10 +157,14 @@
11446     (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
11447    
11448     /* flags to check in __switch_to() */
11449     +#ifndef CONFIG_XEN
11450     #define _TIF_WORK_CTXSW \
11451     (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS)
11452     #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
11453     #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
11454     +#else
11455     +#define _TIF_WORK_CTXSW _TIF_DEBUG
11456     +#endif
11457    
11458     #define PREEMPT_ACTIVE 0x10000000
11459    
11460 niro 611 diff -Naur linux-2.6.25/include/linux/skbuff.h linux-2.6.25-xen/include/linux/skbuff.h
11461     --- linux-2.6.25/include/linux/skbuff.h 2008-05-23 20:51:15.000000000 +0200
11462     +++ linux-2.6.25-xen/include/linux/skbuff.h 2008-05-23 20:39:03.000000000 +0200
11463 niro 609 @@ -1821,5 +1821,12 @@
11464     }
11465    
11466     bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
11467     +
11468     +#ifdef CONFIG_XEN
11469     +int skb_checksum_setup(struct sk_buff *skb);
11470     +#else
11471     +static inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
11472     +#endif
11473     +
11474     #endif /* __KERNEL__ */
11475     #endif /* _LINUX_SKBUFF_H */
11476 niro 611 diff -Naur linux-2.6.25/include/xen/evtchn.h linux-2.6.25-xen/include/xen/evtchn.h
11477     --- linux-2.6.25/include/xen/evtchn.h 2008-05-23 20:51:11.000000000 +0200
11478     +++ linux-2.6.25-xen/include/xen/evtchn.h 2008-05-23 20:39:03.000000000 +0200
11479 niro 609 @@ -54,34 +54,34 @@
11480     */
11481     int bind_caller_port_to_irqhandler(
11482     unsigned int caller_port,
11483     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
11484     + irq_handler_t handler,
11485     unsigned long irqflags,
11486     const char *devname,
11487     void *dev_id);
11488     int bind_listening_port_to_irqhandler(
11489     unsigned int remote_domain,
11490     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
11491     + irq_handler_t handler,
11492     unsigned long irqflags,
11493     const char *devname,
11494     void *dev_id);
11495     int bind_interdomain_evtchn_to_irqhandler(
11496     unsigned int remote_domain,
11497     unsigned int remote_port,
11498     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
11499     + irq_handler_t handler,
11500     unsigned long irqflags,
11501     const char *devname,
11502     void *dev_id);
11503     int bind_virq_to_irqhandler(
11504     unsigned int virq,
11505     unsigned int cpu,
11506     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
11507     + irq_handler_t handler,
11508     unsigned long irqflags,
11509     const char *devname,
11510     void *dev_id);
11511     int bind_ipi_to_irqhandler(
11512     unsigned int ipi,
11513     unsigned int cpu,
11514     - irqreturn_t (*handler)(int, void *, struct pt_regs *),
11515     + irq_handler_t handler,
11516     unsigned long irqflags,
11517     const char *devname,
11518     void *dev_id);
11519 niro 611 diff -Naur linux-2.6.25/include/xen/xencons.h linux-2.6.25-xen/include/xen/xencons.h
11520     --- linux-2.6.25/include/xen/xencons.h 2008-05-23 20:51:11.000000000 +0200
11521     +++ linux-2.6.25-xen/include/xen/xencons.h 2008-05-23 20:39:03.000000000 +0200
11522 niro 609 @@ -8,7 +8,7 @@
11523     void xencons_resume(void);
11524    
11525     /* Interrupt work hooks. Receive data, or kick data out. */
11526     -void xencons_rx(char *buf, unsigned len, struct pt_regs *regs);
11527     +void xencons_rx(char *buf, unsigned len);
11528     void xencons_tx(void);
11529    
11530     int xencons_ring_init(void);
11531 niro 611 diff -Naur linux-2.6.25/mm/mprotect.c linux-2.6.25-xen/mm/mprotect.c
11532     --- linux-2.6.25/mm/mprotect.c 2008-05-23 20:51:15.000000000 +0200
11533     +++ linux-2.6.25-xen/mm/mprotect.c 2008-05-23 20:39:03.000000000 +0200
11534 niro 609 @@ -86,7 +86,7 @@
11535     next = pmd_addr_end(addr, end);
11536     if (pmd_none_or_clear_bad(pmd))
11537     continue;
11538     - if (arch_change_pte_range(mm, pmd, addr, next, newprot))
11539     + if (arch_change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable))
11540     continue;
11541     change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable);
11542     } while (pmd++, addr = next, addr != end);
11543 niro 611 diff -Naur linux-2.6.25/net/core/dev.c linux-2.6.25-xen/net/core/dev.c
11544     --- linux-2.6.25/net/core/dev.c 2008-05-23 20:51:15.000000000 +0200
11545     +++ linux-2.6.25-xen/net/core/dev.c 2008-05-23 20:39:03.000000000 +0200
11546     @@ -1607,15 +1607,14 @@
11547 niro 609 }
11548     if ((skb->h.raw + skb->csum + 2) > skb->tail)
11549     goto out;
11550     - skb->ip_summed = CHECKSUM_HW;
11551     + skb->ip_summed = CHECKSUM_PARTIAL;
11552     skb->proto_csum_blank = 0;
11553     }
11554     return 0;
11555     out:
11556     return -EPROTO;
11557     }
11558     -#else
11559     -inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
11560     +EXPORT_SYMBOL(skb_checksum_setup);
11561     #endif
11562    
11563     /**
11564 niro 611 @@ -2111,7 +2110,7 @@
11565 niro 609 case CHECKSUM_UNNECESSARY:
11566     skb->proto_data_valid = 1;
11567     break;
11568     - case CHECKSUM_HW:
11569     + case CHECKSUM_PARTIAL:
11570     /* XXX Implement me. */
11571     default:
11572     skb->proto_data_valid = 0;
11573 niro 611 @@ -4644,7 +4643,6 @@
11574 niro 609 EXPORT_SYMBOL(net_enable_timestamp);
11575     EXPORT_SYMBOL(net_disable_timestamp);
11576     EXPORT_SYMBOL(dev_get_flags);
11577     -EXPORT_SYMBOL(skb_checksum_setup);
11578    
11579     #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
11580     EXPORT_SYMBOL(br_handle_frame_hook);