Magellan Linux

Annotation of /trunk/kernel26-xen/patches-2.6.25-r1/1024-2.6.25-xen-patch-2.6.23.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 609 - (hide annotations) (download)
Fri May 23 17:35:37 2008 UTC (16 years ago) by niro
File size: 171857 byte(s)
-using opensuse xen patchset, updated kernel configs

1 niro 609 diff -Naur linux-2.6.25/arch/x86/ia32/ia32entry-xen.S linux-2.6.25-xen/arch/x86/ia32/ia32entry-xen.S
2     --- linux-2.6.25/arch/x86/ia32/ia32entry-xen.S 2008-05-23 18:33:27.000000000 +0200
3     +++ linux-2.6.25-xen/arch/x86/ia32/ia32entry-xen.S 2008-05-23 18:27:40.000000000 +0200
4     @@ -105,7 +105,7 @@
5     movl $VSYSCALL32_SYSEXIT,8(%rsp)
6     movq %rax,(%rsp)
7     cld
8     - SAVE_ARGS 0,0,0
9     + SAVE_ARGS 0,0,1
10     /* no need to do an access_ok check here because rbp has been
11     32bit zero extended */
12     1: movl (%rbp),%r9d
13     @@ -244,7 +244,7 @@
14     */
15    
16     ENTRY(ia32_syscall)
17     - CFI_STARTPROC simple
18     + CFI_STARTPROC32 simple
19     CFI_SIGNAL_FRAME
20     CFI_DEF_CFA rsp,SS+8-RIP+16
21     /*CFI_REL_OFFSET ss,SS-RIP+16*/
22     @@ -280,6 +280,7 @@
23    
24     ia32_tracesys:
25     SAVE_REST
26     + CLEAR_RREGS
27     movq $-ENOSYS,RAX(%rsp) /* really needed? */
28     movq %rsp,%rdi /* &pt_regs -> arg1 */
29     call syscall_trace_enter
30     @@ -476,7 +477,7 @@
31     .quad sys_init_module
32     .quad sys_delete_module
33     .quad quiet_ni_syscall /* 130 get_kernel_syms */
34     - .quad sys_quotactl
35     + .quad sys32_quotactl
36     .quad sys_getpgid
37     .quad sys_fchdir
38     .quad quiet_ni_syscall /* bdflush */
39     @@ -669,4 +670,5 @@
40     .quad compat_sys_signalfd
41     .quad compat_sys_timerfd
42     .quad sys_eventfd
43     + .quad sys32_fallocate
44     ia32_syscall_end:
45     diff -Naur linux-2.6.25/arch/x86/kernel/acpi/sleep_32-xen.c linux-2.6.25-xen/arch/x86/kernel/acpi/sleep_32-xen.c
46     --- linux-2.6.25/arch/x86/kernel/acpi/sleep_32-xen.c 2008-05-23 18:32:29.000000000 +0200
47     +++ linux-2.6.25-xen/arch/x86/kernel/acpi/sleep_32-xen.c 2008-05-23 18:27:40.000000000 +0200
48     @@ -15,7 +15,7 @@
49     #ifndef CONFIG_ACPI_PV_SLEEP
50     /* address in low memory of the wakeup routine. */
51     unsigned long acpi_wakeup_address = 0;
52     -unsigned long acpi_video_flags;
53     +unsigned long acpi_realmode_flags;
54     extern char wakeup_start, wakeup_end;
55    
56     extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
57     @@ -74,9 +74,11 @@
58     {
59     while ((str != NULL) && (*str != '\0')) {
60     if (strncmp(str, "s3_bios", 7) == 0)
61     - acpi_video_flags = 1;
62     + acpi_realmode_flags |= 1;
63     if (strncmp(str, "s3_mode", 7) == 0)
64     - acpi_video_flags |= 2;
65     + acpi_realmode_flags |= 2;
66     + if (strncmp(str, "s3_beep", 7) == 0)
67     + acpi_realmode_flags |= 4;
68     str = strchr(str, ',');
69     if (str != NULL)
70     str += strspn(str, ", \t");
71     @@ -86,9 +88,11 @@
72    
73     __setup("acpi_sleep=", acpi_sleep_setup);
74    
75     +/* Ouch, we want to delete this. We already have better version in userspace, in
76     + s2ram from suspend.sf.net project */
77     static __init int reset_videomode_after_s3(struct dmi_system_id *d)
78     {
79     - acpi_video_flags |= 2;
80     + acpi_realmode_flags |= 2;
81     return 0;
82     }
83    
84     diff -Naur linux-2.6.25/arch/x86/kernel/acpi/sleep_64-xen.c linux-2.6.25-xen/arch/x86/kernel/acpi/sleep_64-xen.c
85     --- linux-2.6.25/arch/x86/kernel/acpi/sleep_64-xen.c 2008-05-23 18:33:27.000000000 +0200
86     +++ linux-2.6.25-xen/arch/x86/kernel/acpi/sleep_64-xen.c 2008-05-23 18:27:40.000000000 +0200
87     @@ -51,12 +51,10 @@
88     Low-Level Sleep Support
89     -------------------------------------------------------------------------- */
90    
91     -#ifdef CONFIG_ACPI_SLEEP
92     -
93     #ifndef CONFIG_ACPI_PV_SLEEP
94     /* address in low memory of the wakeup routine. */
95     unsigned long acpi_wakeup_address = 0;
96     -unsigned long acpi_video_flags;
97     +unsigned long acpi_realmode_flags;
98     extern char wakeup_start, wakeup_end;
99    
100     extern unsigned long acpi_copy_wakeup_routine(unsigned long);
101     @@ -109,9 +107,11 @@
102     {
103     while ((str != NULL) && (*str != '\0')) {
104     if (strncmp(str, "s3_bios", 7) == 0)
105     - acpi_video_flags = 1;
106     + acpi_realmode_flags |= 1;
107     if (strncmp(str, "s3_mode", 7) == 0)
108     - acpi_video_flags |= 2;
109     + acpi_realmode_flags |= 2;
110     + if (strncmp(str, "s3_beep", 7) == 0)
111     + acpi_realmode_flags |= 4;
112     str = strchr(str, ',');
113     if (str != NULL)
114     str += strspn(str, ", \t");
115     @@ -123,8 +123,6 @@
116     __setup("acpi_sleep=", acpi_sleep_setup);
117     #endif /* CONFIG_ACPI_PV_SLEEP */
118    
119     -#endif /*CONFIG_ACPI_SLEEP */
120     -
121     void acpi_pci_link_exit(void)
122     {
123     }
124     diff -Naur linux-2.6.25/arch/x86/kernel/apic_64-xen.c linux-2.6.25-xen/arch/x86/kernel/apic_64-xen.c
125     --- linux-2.6.25/arch/x86/kernel/apic_64-xen.c 2008-05-23 18:33:27.000000000 +0200
126     +++ linux-2.6.25-xen/arch/x86/kernel/apic_64-xen.c 2008-05-23 18:27:40.000000000 +0200
127     @@ -50,7 +50,7 @@
128     * holds up an irq slot - in excessive cases (when multiple
129     * unexpected vectors occur) that might lock up the APIC
130     * completely.
131     - * But don't ack when the APIC is disabled. -AK
132     + * But don't ack when the APIC is disabled. -AK
133     */
134     if (!disable_apic)
135     ack_APIC_irq();
136     @@ -132,20 +132,6 @@
137     if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
138     ack_APIC_irq();
139    
140     -#if 0
141     - static unsigned long last_warning;
142     - static unsigned long skipped;
143     -
144     - /* see sw-dev-man vol 3, chapter 7.4.13.5 */
145     - if (time_before(last_warning+30*HZ,jiffies)) {
146     - printk(KERN_INFO "spurious APIC interrupt on CPU#%d, %ld skipped.\n",
147     - smp_processor_id(), skipped);
148     - last_warning = jiffies;
149     - skipped = 0;
150     - } else {
151     - skipped++;
152     - }
153     -#endif
154     irq_exit();
155     }
156    
157     @@ -177,7 +163,7 @@
158     7: Illegal register address
159     */
160     printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
161     - smp_processor_id(), v , v1);
162     + smp_processor_id(), v , v1);
163     irq_exit();
164     }
165    
166     diff -Naur linux-2.6.25/arch/x86/kernel/asm-offsets_32.c linux-2.6.25-xen/arch/x86/kernel/asm-offsets_32.c
167     --- linux-2.6.25/arch/x86/kernel/asm-offsets_32.c 2008-05-23 18:33:26.000000000 +0200
168     +++ linux-2.6.25-xen/arch/x86/kernel/asm-offsets_32.c 2008-05-23 18:27:40.000000000 +0200
169     @@ -18,7 +18,9 @@
170     #include <asm/bootparam.h>
171     #include <asm/elf.h>
172    
173     +#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN)
174     #include <xen/interface/xen.h>
175     +#endif
176    
177     #include <linux/lguest.h>
178     #include "../../../drivers/lguest/lg.h"
179     @@ -133,7 +135,7 @@
180     OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
181     #endif
182    
183     -#ifdef CONFIG_XEN
184     +#ifdef CONFIG_PARAVIRT_XEN
185     BLANK();
186     OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
187     OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
188     diff -Naur linux-2.6.25/arch/x86/kernel/cpu/common-xen.c linux-2.6.25-xen/arch/x86/kernel/cpu/common-xen.c
189     --- linux-2.6.25/arch/x86/kernel/cpu/common-xen.c 2008-05-23 18:33:27.000000000 +0200
190     +++ linux-2.6.25-xen/arch/x86/kernel/cpu/common-xen.c 2008-05-23 18:27:40.000000000 +0200
191     @@ -360,6 +360,8 @@
192     if ( xlvl >= 0x80000004 )
193     get_model_name(c); /* Default name */
194     }
195     +
196     + init_scattered_cpuid_features(c);
197     }
198    
199     early_intel_workaround(c);
200     @@ -611,7 +613,6 @@
201     extern int amd_init_cpu(void);
202     extern int centaur_init_cpu(void);
203     extern int transmeta_init_cpu(void);
204     -extern int rise_init_cpu(void);
205     extern int nexgen_init_cpu(void);
206     extern int umc_init_cpu(void);
207    
208     @@ -623,7 +624,6 @@
209     amd_init_cpu();
210     centaur_init_cpu();
211     transmeta_init_cpu();
212     - rise_init_cpu();
213     nexgen_init_cpu();
214     umc_init_cpu();
215     early_cpu_detect();
216     diff -Naur linux-2.6.25/arch/x86/kernel/cpu/mtrr/main-xen.c linux-2.6.25-xen/arch/x86/kernel/cpu/mtrr/main-xen.c
217     --- linux-2.6.25/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-05-23 18:33:27.000000000 +0200
218     +++ linux-2.6.25-xen/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-05-23 18:27:40.000000000 +0200
219     @@ -167,7 +167,7 @@
220     EXPORT_SYMBOL(mtrr_add);
221     EXPORT_SYMBOL(mtrr_del);
222    
223     -__init void mtrr_bp_init(void)
224     +void __init mtrr_bp_init(void)
225     {
226     }
227    
228     diff -Naur linux-2.6.25/arch/x86/kernel/e820_32-xen.c linux-2.6.25-xen/arch/x86/kernel/e820_32-xen.c
229     --- linux-2.6.25/arch/x86/kernel/e820_32-xen.c 2008-05-23 18:33:27.000000000 +0200
230     +++ linux-2.6.25-xen/arch/x86/kernel/e820_32-xen.c 2008-05-23 18:27:40.000000000 +0200
231     @@ -10,6 +10,7 @@
232     #include <linux/efi.h>
233     #include <linux/pfn.h>
234     #include <linux/uaccess.h>
235     +#include <linux/suspend.h>
236    
237     #include <asm/pgtable.h>
238     #include <asm/page.h>
239     @@ -343,6 +344,37 @@
240    
241     subsys_initcall(request_standard_resources);
242    
243     +#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
244     +/**
245     + * e820_mark_nosave_regions - Find the ranges of physical addresses that do not
246     + * correspond to e820 RAM areas and mark the corresponding pages as nosave for
247     + * hibernation.
248     + *
249     + * This function requires the e820 map to be sorted and without any
250     + * overlapping entries and assumes the first e820 area to be RAM.
251     + */
252     +void __init e820_mark_nosave_regions(void)
253     +{
254     + int i;
255     + unsigned long pfn;
256     +
257     + pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
258     + for (i = 1; i < e820.nr_map; i++) {
259     + struct e820entry *ei = &e820.map[i];
260     +
261     + if (pfn < PFN_UP(ei->addr))
262     + register_nosave_region(pfn, PFN_UP(ei->addr));
263     +
264     + pfn = PFN_DOWN(ei->addr + ei->size);
265     + if (ei->type != E820_RAM)
266     + register_nosave_region(PFN_UP(ei->addr), pfn);
267     +
268     + if (pfn >= max_low_pfn)
269     + break;
270     + }
271     +}
272     +#endif
273     +
274     void __init add_memory_region(unsigned long long start,
275     unsigned long long size, int type)
276     {
277     @@ -789,7 +821,7 @@
278     case E820_NVS:
279     printk("(ACPI NVS)\n");
280     break;
281     - default: printk("type %lu\n", e820.map[i].type);
282     + default: printk("type %u\n", e820.map[i].type);
283     break;
284     }
285     }
286     diff -Naur linux-2.6.25/arch/x86/kernel/e820_64-xen.c linux-2.6.25-xen/arch/x86/kernel/e820_64-xen.c
287     --- linux-2.6.25/arch/x86/kernel/e820_64-xen.c 2008-05-23 18:33:27.000000000 +0200
288     +++ linux-2.6.25-xen/arch/x86/kernel/e820_64-xen.c 2008-05-23 18:27:40.000000000 +0200
289     @@ -224,37 +224,6 @@
290     }
291    
292     /*
293     - * Find the hole size in the range.
294     - */
295     -unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
296     -{
297     - unsigned long ram = 0;
298     - int i;
299     -
300     - for (i = 0; i < e820.nr_map; i++) {
301     - struct e820entry *ei = &e820.map[i];
302     - unsigned long last, addr;
303     -
304     - if (ei->type != E820_RAM ||
305     - ei->addr+ei->size <= start ||
306     - ei->addr >= end)
307     - continue;
308     -
309     - addr = round_up(ei->addr, PAGE_SIZE);
310     - if (addr < start)
311     - addr = start;
312     -
313     - last = round_down(ei->addr + ei->size, PAGE_SIZE);
314     - if (last >= end)
315     - last = end;
316     -
317     - if (last > addr)
318     - ram += last - addr;
319     - }
320     - return ((end - start) - ram);
321     -}
322     -
323     -/*
324     * Mark e820 reserved areas as busy for the resource manager.
325     */
326     void __init e820_reserve_resources(struct e820entry *e820, int nr_map)
327     @@ -327,47 +296,61 @@
328     }
329     #endif
330    
331     +/*
332     + * Finds an active region in the address range from start_pfn to end_pfn and
333     + * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
334     + */
335     +static int __init e820_find_active_region(const struct e820entry *ei,
336     + unsigned long start_pfn,
337     + unsigned long end_pfn,
338     + unsigned long *ei_startpfn,
339     + unsigned long *ei_endpfn)
340     +{
341     + *ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
342     + *ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT;
343     +
344     + /* Skip map entries smaller than a page */
345     + if (*ei_startpfn >= *ei_endpfn)
346     + return 0;
347     +
348     + /* Check if end_pfn_map should be updated */
349     + if (ei->type != E820_RAM && *ei_endpfn > end_pfn_map)
350     + end_pfn_map = *ei_endpfn;
351     +
352     + /* Skip if map is outside the node */
353     + if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
354     + *ei_startpfn >= end_pfn)
355     + return 0;
356     +
357     + /* Check for overlaps */
358     + if (*ei_startpfn < start_pfn)
359     + *ei_startpfn = start_pfn;
360     + if (*ei_endpfn > end_pfn)
361     + *ei_endpfn = end_pfn;
362     +
363     + /* Obey end_user_pfn to save on memmap */
364     + if (*ei_startpfn >= end_user_pfn)
365     + return 0;
366     + if (*ei_endpfn > end_user_pfn)
367     + *ei_endpfn = end_user_pfn;
368     +
369     + return 1;
370     +}
371     +
372     /* Walk the e820 map and register active regions within a node */
373     void __init
374     e820_register_active_regions(int nid, unsigned long start_pfn,
375     unsigned long end_pfn)
376     {
377     + unsigned long ei_startpfn;
378     + unsigned long ei_endpfn;
379     int i;
380     - unsigned long ei_startpfn, ei_endpfn;
381     - for (i = 0; i < e820.nr_map; i++) {
382     - struct e820entry *ei = &e820.map[i];
383     - ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
384     - ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE)
385     - >> PAGE_SHIFT;
386     -
387     - /* Skip map entries smaller than a page */
388     - if (ei_startpfn >= ei_endpfn)
389     - continue;
390     -
391     - /* Check if end_pfn_map should be updated */
392     - if (ei->type != E820_RAM && ei_endpfn > end_pfn_map)
393     - end_pfn_map = ei_endpfn;
394     -
395     - /* Skip if map is outside the node */
396     - if (ei->type != E820_RAM ||
397     - ei_endpfn <= start_pfn ||
398     - ei_startpfn >= end_pfn)
399     - continue;
400     -
401     - /* Check for overlaps */
402     - if (ei_startpfn < start_pfn)
403     - ei_startpfn = start_pfn;
404     - if (ei_endpfn > end_pfn)
405     - ei_endpfn = end_pfn;
406     -
407     - /* Obey end_user_pfn to save on memmap */
408     - if (ei_startpfn >= end_user_pfn)
409     - continue;
410     - if (ei_endpfn > end_user_pfn)
411     - ei_endpfn = end_user_pfn;
412    
413     - add_active_range(nid, ei_startpfn, ei_endpfn);
414     - }
415     + for (i = 0; i < e820.nr_map; i++)
416     + if (e820_find_active_region(&e820.map[i],
417     + start_pfn, end_pfn,
418     + &ei_startpfn, &ei_endpfn))
419     + add_active_range(nid, ei_startpfn, ei_endpfn);
420     }
421    
422     /*
423     @@ -388,12 +371,35 @@
424     e820.nr_map++;
425     }
426    
427     +/*
428     + * Find the hole size (in bytes) in the memory range.
429     + * @start: starting address of the memory range to scan
430     + * @end: ending address of the memory range to scan
431     + */
432     +unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
433     +{
434     + unsigned long start_pfn = start >> PAGE_SHIFT;
435     + unsigned long end_pfn = end >> PAGE_SHIFT;
436     + unsigned long ei_startpfn;
437     + unsigned long ei_endpfn;
438     + unsigned long ram = 0;
439     + int i;
440     +
441     + for (i = 0; i < e820.nr_map; i++) {
442     + if (e820_find_active_region(&e820.map[i],
443     + start_pfn, end_pfn,
444     + &ei_startpfn, &ei_endpfn))
445     + ram += ei_endpfn - ei_startpfn;
446     + }
447     + return end - start - (ram << PAGE_SHIFT);
448     +}
449     +
450     void __init e820_print_map(char *who)
451     {
452     int i;
453    
454     for (i = 0; i < e820.nr_map; i++) {
455     - printk(" %s: %016Lx - %016Lx ", who,
456     + printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
457     (unsigned long long) e820.map[i].addr,
458     (unsigned long long) (e820.map[i].addr + e820.map[i].size));
459     switch (e820.map[i].type) {
460     diff -Naur linux-2.6.25/arch/x86/kernel/early_printk-xen.c linux-2.6.25-xen/arch/x86/kernel/early_printk-xen.c
461     --- linux-2.6.25/arch/x86/kernel/early_printk-xen.c 2008-05-23 18:33:27.000000000 +0200
462     +++ linux-2.6.25-xen/arch/x86/kernel/early_printk-xen.c 2008-05-23 18:27:40.000000000 +0200
463     @@ -174,6 +174,7 @@
464     * mappings. Someone should fix this for domain 0. For now, use fake serial.
465     */
466     #define early_vga_console early_serial_console
467     +#define xenboot_console early_serial_console
468    
469     #endif
470    
471     @@ -261,20 +262,22 @@
472     } else if (!strncmp(buf, "ttyS", 4)) {
473     early_serial_init(buf);
474     early_console = &early_serial_console;
475     - } else if (!strncmp(buf, "vga", 3)
476     + } else if (!strncmp(buf, "vga", 3)) {
477     #ifndef CONFIG_XEN
478     && SCREEN_INFO.orig_video_isVGA == 1) {
479     max_xpos = SCREEN_INFO.orig_video_cols;
480     max_ypos = SCREEN_INFO.orig_video_lines;
481     current_ypos = SCREEN_INFO.orig_y;
482     -#else
483     - || !strncmp(buf, "xen", 3)) {
484     #endif
485     early_console = &early_vga_console;
486     } else if (!strncmp(buf, "simnow", 6)) {
487     simnow_init(buf + 6);
488     early_console = &simnow_console;
489     keep_early = 1;
490     +#ifdef CONFIG_XEN
491     + } else if (!strncmp(buf, "xen", 3)) {
492     + early_console = &xenboot_console;
493     +#endif
494     }
495    
496     if (keep_early)
497     diff -Naur linux-2.6.25/arch/x86/kernel/entry_32.S linux-2.6.25-xen/arch/x86/kernel/entry_32.S
498     --- linux-2.6.25/arch/x86/kernel/entry_32.S 2008-05-23 18:32:54.000000000 +0200
499     +++ linux-2.6.25-xen/arch/x86/kernel/entry_32.S 2008-05-23 18:27:40.000000000 +0200
500     @@ -1022,7 +1022,7 @@
501     CFI_ENDPROC
502     ENDPROC(kernel_thread_helper)
503    
504     -#ifdef CONFIG_XEN
505     +#ifdef CONFIG_PARAVIRT_XEN
506     ENTRY(xen_hypervisor_callback)
507     CFI_STARTPROC
508     pushl $0
509     @@ -1105,7 +1105,7 @@
510     .previous
511     ENDPROC(xen_failsafe_callback)
512    
513     -#endif /* CONFIG_XEN */
514     +#endif /* CONFIG_PARAVIRT_XEN */
515    
516     .section .rodata,"a"
517     #include "syscall_table_32.S"
518     diff -Naur linux-2.6.25/arch/x86/kernel/entry_32-xen.S linux-2.6.25-xen/arch/x86/kernel/entry_32-xen.S
519     --- linux-2.6.25/arch/x86/kernel/entry_32-xen.S 2008-05-23 18:33:27.000000000 +0200
520     +++ linux-2.6.25-xen/arch/x86/kernel/entry_32-xen.S 2008-05-23 18:27:40.000000000 +0200
521     @@ -452,9 +452,6 @@
522     1: INTERRUPT_RETURN
523     .section .fixup,"ax"
524     iret_exc:
525     -#ifndef CONFIG_XEN
526     - ENABLE_INTERRUPTS(CLBR_NONE)
527     -#endif
528     pushl $0 # no error code
529     pushl $do_iret_error
530     jmp error_code
531     diff -Naur linux-2.6.25/arch/x86/kernel/entry_64-xen.S linux-2.6.25-xen/arch/x86/kernel/entry_64-xen.S
532     --- linux-2.6.25/arch/x86/kernel/entry_64-xen.S 2008-05-23 18:33:27.000000000 +0200
533     +++ linux-2.6.25-xen/arch/x86/kernel/entry_64-xen.S 2008-05-23 18:27:40.000000000 +0200
534     @@ -310,7 +310,7 @@
535     TRACE_IRQS_ON
536     /* sti */
537     XEN_UNBLOCK_EVENTS(%rsi)
538     - testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
539     + testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
540     jz 1f
541    
542     /* Really a signal */
543     @@ -409,7 +409,7 @@
544     jmp int_restore_rest
545    
546     int_signal:
547     - testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
548     + testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
549     jz 1f
550     movq %rsp,%rdi # &ptregs -> arg1
551     xorl %esi,%esi # oldset -> arg2
552     @@ -552,7 +552,7 @@
553     jmp retint_check
554    
555     retint_signal:
556     - testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
557     + testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
558     jz retint_restore_args
559     TRACE_IRQS_ON
560     XEN_UNBLOCK_EVENTS(%rsi)
561     diff -Naur linux-2.6.25/arch/x86/kernel/head_32-xen.S linux-2.6.25-xen/arch/x86/kernel/head_32-xen.S
562     --- linux-2.6.25/arch/x86/kernel/head_32-xen.S 2008-05-23 18:33:27.000000000 +0200
563     +++ linux-2.6.25-xen/arch/x86/kernel/head_32-xen.S 2008-05-23 18:27:40.000000000 +0200
564     @@ -86,7 +86,10 @@
565     /*
566     * BSS section
567     */
568     -.section ".bss.page_aligned","w"
569     +.section ".bss.page_aligned","wa"
570     + .align PAGE_SIZE_asm
571     +ENTRY(swapper_pg_pmd)
572     + .fill 1024,4,0
573     ENTRY(empty_zero_page)
574     .fill 4096,1,0
575    
576     @@ -136,25 +139,25 @@
577     #endif /* CONFIG_XEN_COMPAT <= 0x030002 */
578    
579    
580     - ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "linux")
581     - ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "2.6")
582     - ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0")
583     - ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long, __PAGE_OFFSET)
584     + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
585     + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6")
586     + ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0")
587     + ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long __PAGE_OFFSET)
588     #if CONFIG_XEN_COMPAT <= 0x030002
589     - ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, __PAGE_OFFSET)
590     + ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long __PAGE_OFFSET)
591     #else
592     - ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, 0)
593     + ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long 0)
594     #endif
595     - ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long, startup_32)
596     - ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long, hypercall_page)
597     - ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long, HYPERVISOR_VIRT_START)
598     - ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
599     + ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_32)
600     + ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page)
601     + ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long HYPERVISOR_VIRT_START)
602     + ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
603     #ifdef CONFIG_X86_PAE
604     - ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes")
605     - ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad, _PAGE_PRESENT,_PAGE_PRESENT)
606     + ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
607     + ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad _PAGE_PRESENT, _PAGE_PRESENT)
608     #else
609     - ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "no")
610     - ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long, _PAGE_PRESENT,_PAGE_PRESENT)
611     + ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no")
612     + ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long _PAGE_PRESENT, _PAGE_PRESENT)
613     #endif
614     - ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic")
615     - ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 1)
616     + ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
617     + ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
618     diff -Naur linux-2.6.25/arch/x86/kernel/head64-xen.c linux-2.6.25-xen/arch/x86/kernel/head64-xen.c
619     --- linux-2.6.25/arch/x86/kernel/head64-xen.c 2008-05-23 18:33:27.000000000 +0200
620     +++ linux-2.6.25-xen/arch/x86/kernel/head64-xen.c 2008-05-23 18:27:40.000000000 +0200
621     @@ -87,7 +87,7 @@
622     unsigned long machine_to_phys_nr_ents;
623     int i;
624    
625     - setup_xen_features();
626     + xen_setup_features();
627    
628     xen_start_info = (struct start_info *)real_mode_data;
629     if (!xen_feature(XENFEAT_auto_translated_physmap))
630     diff -Naur linux-2.6.25/arch/x86/kernel/head_64-xen.S linux-2.6.25-xen/arch/x86/kernel/head_64-xen.S
631     --- linux-2.6.25/arch/x86/kernel/head_64-xen.S 2008-05-23 18:33:27.000000000 +0200
632     +++ linux-2.6.25-xen/arch/x86/kernel/head_64-xen.S 2008-05-23 18:27:40.000000000 +0200
633     @@ -23,7 +23,7 @@
634     #include <asm/dwarf2.h>
635     #include <xen/interface/elfnote.h>
636    
637     - .section .bootstrap.text, "ax", @progbits
638     + .section .text.head, "ax", @progbits
639     .code64
640     .globl startup_64
641     startup_64:
642     @@ -47,7 +47,7 @@
643    
644     #define NEXT_PAGE(name) \
645     .balign PAGE_SIZE; \
646     - phys_##name = . - .bootstrap.text; \
647     + phys_##name = . - .text.head; \
648     ENTRY(name)
649    
650     NEXT_PAGE(init_level4_pgt)
651     @@ -75,6 +75,12 @@
652     NEXT_PAGE(level2_kernel_pgt)
653     .fill 512,8,0
654    
655     +NEXT_PAGE(level2_fixmap_pgt)
656     + .fill 512,8,0
657     +
658     +NEXT_PAGE(level1_fixmap_pgt)
659     + .fill 512,8,0
660     +
661     NEXT_PAGE(hypercall_page)
662     CFI_STARTPROC
663     .rept 0x1000 / 0x20
664     @@ -189,18 +195,18 @@
665     .byte 0
666     #endif /* CONFIG_XEN_COMPAT <= 0x030002 */
667    
668     - ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "linux")
669     - ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "2.6")
670     - ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0")
671     - ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .quad, __START_KERNEL_map)
672     + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
673     + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6")
674     + ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0")
675     + ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .quad __START_KERNEL_map)
676     #if CONFIG_XEN_COMPAT <= 0x030002
677     - ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad, __START_KERNEL_map)
678     + ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad __START_KERNEL_map)
679     #else
680     - ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad, 0)
681     + ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad 0)
682     #endif
683     - ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .quad, startup_64)
684     - ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .quad, hypercall_page)
685     - ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad, _PAGE_PRESENT,_PAGE_PRESENT)
686     - ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
687     - ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic")
688     - ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 1)
689     + ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .quad startup_64)
690     + ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .quad hypercall_page)
691     + ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad _PAGE_PRESENT, _PAGE_PRESENT)
692     + ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
693     + ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
694     + ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
695     diff -Naur linux-2.6.25/arch/x86/kernel/init_task-xen.c linux-2.6.25-xen/arch/x86/kernel/init_task-xen.c
696     --- linux-2.6.25/arch/x86/kernel/init_task-xen.c 2008-05-23 18:32:29.000000000 +0200
697     +++ linux-2.6.25-xen/arch/x86/kernel/init_task-xen.c 2008-05-23 18:27:40.000000000 +0200
698     @@ -46,6 +46,6 @@
699     * per-CPU TSS segments. Threads are completely 'soft' on Linux,
700     * no more per-task TSS's.
701     */
702     -DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS;
703     +DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
704     #endif
705    
706     diff -Naur linux-2.6.25/arch/x86/kernel/io_apic_32-xen.c linux-2.6.25-xen/arch/x86/kernel/io_apic_32-xen.c
707     --- linux-2.6.25/arch/x86/kernel/io_apic_32-xen.c 2008-05-23 18:33:27.000000000 +0200
708     +++ linux-2.6.25-xen/arch/x86/kernel/io_apic_32-xen.c 2008-05-23 18:27:40.000000000 +0200
709     @@ -397,14 +397,6 @@
710     # include <linux/slab.h> /* kmalloc() */
711     # include <linux/timer.h> /* time_after() */
712    
713     -#ifdef CONFIG_BALANCED_IRQ_DEBUG
714     -# define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
715     -# define Dprintk(x...) do { TDprintk(x); } while (0)
716     -# else
717     -# define TDprintk(x...)
718     -# define Dprintk(x...)
719     -# endif
720     -
721     #define IRQBALANCE_CHECK_ARCH -999
722     #define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
723     #define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
724     @@ -487,7 +479,7 @@
725     static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
726     {
727     int i, j;
728     - Dprintk("Rotating IRQs among CPUs.\n");
729     +
730     for_each_online_cpu(i) {
731     for (j = 0; j < NR_IRQS; j++) {
732     if (!irq_desc[j].action)
733     @@ -604,19 +596,11 @@
734     max_loaded = tmp_loaded; /* processor */
735     imbalance = (max_cpu_irq - min_cpu_irq) / 2;
736    
737     - Dprintk("max_loaded cpu = %d\n", max_loaded);
738     - Dprintk("min_loaded cpu = %d\n", min_loaded);
739     - Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
740     - Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
741     - Dprintk("load imbalance = %lu\n", imbalance);
742     -
743     /* if imbalance is less than approx 10% of max load, then
744     * observe diminishing returns action. - quit
745     */
746     - if (imbalance < (max_cpu_irq >> 3)) {
747     - Dprintk("Imbalance too trivial\n");
748     + if (imbalance < (max_cpu_irq >> 3))
749     goto not_worth_the_effort;
750     - }
751    
752     tryanotherirq:
753     /* if we select an IRQ to move that can't go where we want, then
754     @@ -673,9 +657,6 @@
755     cpus_and(tmp, target_cpu_mask, allowed_mask);
756    
757     if (!cpus_empty(tmp)) {
758     -
759     - Dprintk("irq = %d moved to cpu = %d\n",
760     - selected_irq, min_loaded);
761     /* mark for change destination */
762     set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
763    
764     @@ -695,7 +676,6 @@
765     */
766     balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
767     balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
768     - Dprintk("IRQ worth rotating not found\n");
769     return;
770     }
771    
772     @@ -711,6 +691,7 @@
773     set_pending_irq(i, cpumask_of_cpu(0));
774     }
775    
776     + set_freezable();
777     for ( ; ; ) {
778     time_remaining = schedule_timeout_interruptible(time_remaining);
779     try_to_freeze();
780     @@ -820,14 +801,6 @@
781     static int pirqs_enabled;
782     int skip_ioapic_setup;
783    
784     -static int __init ioapic_setup(char *str)
785     -{
786     - skip_ioapic_setup = 1;
787     - return 1;
788     -}
789     -
790     -__setup("noapic", ioapic_setup);
791     -
792     static int __init ioapic_pirq_setup(char *str)
793     {
794     int i, max;
795     @@ -1313,12 +1286,15 @@
796     static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
797     {
798     if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
799     - trigger == IOAPIC_LEVEL)
800     + trigger == IOAPIC_LEVEL) {
801     + irq_desc[irq].status |= IRQ_LEVEL;
802     set_irq_chip_and_handler_name(irq, &ioapic_chip,
803     handle_fasteoi_irq, "fasteoi");
804     - else
805     + } else {
806     + irq_desc[irq].status &= ~IRQ_LEVEL;
807     set_irq_chip_and_handler_name(irq, &ioapic_chip,
808     handle_edge_irq, "edge");
809     + }
810     set_intr_gate(vector, interrupt[irq]);
811     }
812     #else
813     @@ -1943,7 +1919,7 @@
814     * - if this function detects that timer IRQs are defunct, then we fall
815     * back to ISA timer IRQs
816     */
817     -int __init timer_irq_works(void)
818     +static int __init timer_irq_works(void)
819     {
820     unsigned long t1 = jiffies;
821    
822     diff -Naur linux-2.6.25/arch/x86/kernel/io_apic_64-xen.c linux-2.6.25-xen/arch/x86/kernel/io_apic_64-xen.c
823     --- linux-2.6.25/arch/x86/kernel/io_apic_64-xen.c 2008-05-23 18:33:27.000000000 +0200
824     +++ linux-2.6.25-xen/arch/x86/kernel/io_apic_64-xen.c 2008-05-23 18:27:40.000000000 +0200
825     @@ -164,7 +164,9 @@
826     #endif
827     }
828    
829     -#ifndef CONFIG_XEN
830     +#ifdef CONFIG_XEN
831     +#define io_apic_modify io_apic_write
832     +#else
833     /*
834     * Re-write a value: to be used for read-modify-write
835     * cycles where the read already set up the index register.
836     @@ -174,8 +176,32 @@
837     struct io_apic __iomem *io_apic = io_apic_base(apic);
838     writel(value, &io_apic->data);
839     }
840     -#else
841     -#define io_apic_modify io_apic_write
842     +
843     +static int io_apic_level_ack_pending(unsigned int irq)
844     +{
845     + struct irq_pin_list *entry;
846     + unsigned long flags;
847     + int pending = 0;
848     +
849     + spin_lock_irqsave(&ioapic_lock, flags);
850     + entry = irq_2_pin + irq;
851     + for (;;) {
852     + unsigned int reg;
853     + int pin;
854     +
855     + pin = entry->pin;
856     + if (pin == -1)
857     + break;
858     + reg = io_apic_read(entry->apic, 0x10 + pin*2);
859     + /* Is the remote IRR bit set? */
860     + pending |= (reg >> 14) & 1;
861     + if (!entry->next)
862     + break;
863     + entry = irq_2_pin + entry->next;
864     + }
865     + spin_unlock_irqrestore(&ioapic_lock, flags);
866     + return pending;
867     +}
868     #endif
869    
870     /*
871     @@ -403,14 +429,12 @@
872     int skip_ioapic_setup;
873     int ioapic_force;
874    
875     -/* dummy parsing: see setup.c */
876     -
877     -static int __init disable_ioapic_setup(char *str)
878     +static int __init parse_noapic(char *str)
879     {
880     - skip_ioapic_setup = 1;
881     + disable_ioapic_setup();
882     return 0;
883     }
884     -early_param("noapic", disable_ioapic_setup);
885     +early_param("noapic", parse_noapic);
886    
887     /* Actually the next is obsolete, but keep it for paranoid reasons -AK */
888     static int __init disable_timer_pin_setup(char *arg)
889     @@ -758,12 +782,15 @@
890    
891     static void ioapic_register_intr(int irq, unsigned long trigger)
892     {
893     - if (trigger)
894     + if (trigger) {
895     + irq_desc[irq].status |= IRQ_LEVEL;
896     set_irq_chip_and_handler_name(irq, &ioapic_chip,
897     handle_fasteoi_irq, "fasteoi");
898     - else
899     + } else {
900     + irq_desc[irq].status &= ~IRQ_LEVEL;
901     set_irq_chip_and_handler_name(irq, &ioapic_chip,
902     handle_edge_irq, "edge");
903     + }
904     }
905     #else
906     #define ioapic_register_intr(irq,trigger) ((void)0)
907     @@ -1412,9 +1439,37 @@
908     ack_APIC_irq();
909    
910     /* Now we can move and renable the irq */
911     - move_masked_irq(irq);
912     - if (unlikely(do_unmask_irq))
913     + if (unlikely(do_unmask_irq)) {
914     + /* Only migrate the irq if the ack has been received.
915     + *
916     + * On rare occasions the broadcast level triggered ack gets
917     + * delayed going to ioapics, and if we reprogram the
918     + * vector while Remote IRR is still set the irq will never
919     + * fire again.
920     + *
921     + * To prevent this scenario we read the Remote IRR bit
922     + * of the ioapic. This has two effects.
923     + * - On any sane system the read of the ioapic will
924     + * flush writes (and acks) going to the ioapic from
925     + * this cpu.
926     + * - We get to see if the ACK has actually been delivered.
927     + *
928     + * Based on failed experiments of reprogramming the
929     + * ioapic entry from outside of irq context starting
930     + * with masking the ioapic entry and then polling until
931     + * Remote IRR was clear before reprogramming the
932     + * ioapic I don't trust the Remote IRR bit to be
933     + * completey accurate.
934     + *
935     + * However there appears to be no other way to plug
936     + * this race, so if the Remote IRR bit is not
937     + * accurate and is causing problems then it is a hardware bug
938     + * and you can go talk to the chipset vendor about it.
939     + */
940     + if (!io_apic_level_ack_pending(irq))
941     + move_masked_irq(irq);
942     unmask_IO_APIC_irq(irq);
943     + }
944     }
945    
946     static struct irq_chip ioapic_chip __read_mostly = {
947     diff -Naur linux-2.6.25/arch/x86/kernel/irq_32-xen.c linux-2.6.25-xen/arch/x86/kernel/irq_32-xen.c
948     --- linux-2.6.25/arch/x86/kernel/irq_32-xen.c 2008-05-23 18:33:27.000000000 +0200
949     +++ linux-2.6.25-xen/arch/x86/kernel/irq_32-xen.c 2008-05-23 18:27:40.000000000 +0200
950     @@ -21,7 +21,7 @@
951     #include <asm/apic.h>
952     #include <asm/uaccess.h>
953    
954     -DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
955     +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
956     EXPORT_PER_CPU_SYMBOL(irq_stat);
957    
958     DEFINE_PER_CPU(struct pt_regs *, irq_regs);
959     @@ -149,15 +149,11 @@
960    
961     #ifdef CONFIG_4KSTACKS
962    
963     -/*
964     - * These should really be __section__(".bss.page_aligned") as well, but
965     - * gcc's 3.0 and earlier don't handle that correctly.
966     - */
967     static char softirq_stack[NR_CPUS * THREAD_SIZE]
968     - __attribute__((__aligned__(THREAD_SIZE)));
969     + __attribute__((__section__(".bss.page_aligned")));
970    
971     static char hardirq_stack[NR_CPUS * THREAD_SIZE]
972     - __attribute__((__aligned__(THREAD_SIZE)));
973     + __attribute__((__section__(".bss.page_aligned")));
974    
975     /*
976     * allocate per-cpu stacks for hardirq and for softirq processing
977     diff -Naur linux-2.6.25/arch/x86/kernel/ldt_64-xen.c linux-2.6.25-xen/arch/x86/kernel/ldt_64-xen.c
978     --- linux-2.6.25/arch/x86/kernel/ldt_64-xen.c 2008-05-23 18:33:27.000000000 +0200
979     +++ linux-2.6.25-xen/arch/x86/kernel/ldt_64-xen.c 2008-05-23 18:27:40.000000000 +0200
980     @@ -114,6 +114,8 @@
981     memset(&mm->context, 0, sizeof(mm->context));
982     init_MUTEX(&mm->context.sem);
983     old_mm = current->mm;
984     + if (old_mm)
985     + mm->context.vdso = old_mm->context.vdso;
986     if (old_mm && old_mm->context.size > 0) {
987     down(&old_mm->context.sem);
988     retval = copy_ldt(&mm->context, &old_mm->context);
989     diff -Naur linux-2.6.25/arch/x86/kernel/microcode-xen.c linux-2.6.25-xen/arch/x86/kernel/microcode-xen.c
990     --- linux-2.6.25/arch/x86/kernel/microcode-xen.c 2008-05-23 18:33:27.000000000 +0200
991     +++ linux-2.6.25-xen/arch/x86/kernel/microcode-xen.c 2008-05-23 18:27:40.000000000 +0200
992     @@ -33,6 +33,7 @@
993     #include <linux/miscdevice.h>
994     #include <linux/spinlock.h>
995     #include <linux/mm.h>
996     +#include <linux/fs.h>
997     #include <linux/mutex.h>
998     #include <linux/cpu.h>
999     #include <linux/firmware.h>
1000     diff -Naur linux-2.6.25/arch/x86/kernel/mpparse_64-xen.c linux-2.6.25-xen/arch/x86/kernel/mpparse_64-xen.c
1001     --- linux-2.6.25/arch/x86/kernel/mpparse_64-xen.c 2008-05-23 18:33:27.000000000 +0200
1002     +++ linux-2.6.25-xen/arch/x86/kernel/mpparse_64-xen.c 2008-05-23 18:27:40.000000000 +0200
1003     @@ -32,7 +32,6 @@
1004    
1005     /* Have we found an MP table */
1006     int smp_found_config;
1007     -unsigned int __initdata maxcpus = NR_CPUS;
1008    
1009     /*
1010     * Various Linux-internal data structures created from the
1011     @@ -657,6 +656,20 @@
1012     return -1;
1013     }
1014    
1015     +static u8 uniq_ioapic_id(u8 id)
1016     +{
1017     + int i;
1018     + DECLARE_BITMAP(used, 256);
1019     + bitmap_zero(used, 256);
1020     + for (i = 0; i < nr_ioapics; i++) {
1021     + struct mpc_config_ioapic *ia = &mp_ioapics[i];
1022     + __set_bit(ia->mpc_apicid, used);
1023     + }
1024     + if (!test_bit(id, used))
1025     + return id;
1026     + return find_first_zero_bit(used, 256);
1027     +}
1028     +
1029     void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
1030     {
1031     int idx = 0;
1032     @@ -664,7 +677,7 @@
1033     if (bad_ioapic(address))
1034     return;
1035    
1036     - idx = nr_ioapics++;
1037     + idx = nr_ioapics;
1038    
1039     mp_ioapics[idx].mpc_type = MP_IOAPIC;
1040     mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
1041     @@ -673,7 +686,7 @@
1042     #ifndef CONFIG_XEN
1043     set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
1044     #endif
1045     - mp_ioapics[idx].mpc_apicid = id;
1046     + mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
1047     mp_ioapics[idx].mpc_apicver = 0;
1048    
1049     /*
1050     @@ -690,6 +703,8 @@
1051     mp_ioapics[idx].mpc_apicaddr,
1052     mp_ioapic_routing[idx].gsi_start,
1053     mp_ioapic_routing[idx].gsi_end);
1054     +
1055     + nr_ioapics++;
1056     }
1057    
1058     void __init
1059     diff -Naur linux-2.6.25/arch/x86/kernel/pci-dma_32-xen.c linux-2.6.25-xen/arch/x86/kernel/pci-dma_32-xen.c
1060     --- linux-2.6.25/arch/x86/kernel/pci-dma_32-xen.c 2008-05-23 18:33:27.000000000 +0200
1061     +++ linux-2.6.25-xen/arch/x86/kernel/pci-dma_32-xen.c 2008-05-23 18:27:40.000000000 +0200
1062     @@ -24,7 +24,7 @@
1063     #include <asm/bug.h>
1064    
1065     #ifdef __x86_64__
1066     -#include <asm/proto.h>
1067     +#include <asm/iommu.h>
1068    
1069     int iommu_merge __read_mostly = 0;
1070     EXPORT_SYMBOL(iommu_merge);
1071     diff -Naur linux-2.6.25/arch/x86/kernel/pci-swiotlb_64-xen.c linux-2.6.25-xen/arch/x86/kernel/pci-swiotlb_64-xen.c
1072     --- linux-2.6.25/arch/x86/kernel/pci-swiotlb_64-xen.c 2008-05-23 18:33:27.000000000 +0200
1073     +++ linux-2.6.25-xen/arch/x86/kernel/pci-swiotlb_64-xen.c 2008-05-23 18:27:40.000000000 +0200
1074     @@ -5,7 +5,7 @@
1075     #include <linux/module.h>
1076     #include <linux/dma-mapping.h>
1077    
1078     -#include <asm/proto.h>
1079     +#include <asm/iommu.h>
1080     #include <asm/swiotlb.h>
1081     #include <asm/dma.h>
1082    
1083     diff -Naur linux-2.6.25/arch/x86/kernel/process_32-xen.c linux-2.6.25-xen/arch/x86/kernel/process_32-xen.c
1084     --- linux-2.6.25/arch/x86/kernel/process_32-xen.c 2008-05-23 18:33:27.000000000 +0200
1085     +++ linux-2.6.25-xen/arch/x86/kernel/process_32-xen.c 2008-05-23 18:27:40.000000000 +0200
1086     @@ -241,6 +241,7 @@
1087     void show_regs(struct pt_regs * regs)
1088     {
1089     unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
1090     + unsigned long d0, d1, d2, d3, d6, d7;
1091    
1092     printk("\n");
1093     printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
1094     @@ -265,6 +266,17 @@
1095     cr3 = read_cr3();
1096     cr4 = read_cr4_safe();
1097     printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
1098     +
1099     + get_debugreg(d0, 0);
1100     + get_debugreg(d1, 1);
1101     + get_debugreg(d2, 2);
1102     + get_debugreg(d3, 3);
1103     + printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
1104     + d0, d1, d2, d3);
1105     + get_debugreg(d6, 6);
1106     + get_debugreg(d7, 7);
1107     + printk("DR6: %08lx DR7: %08lx\n", d6, d7);
1108     +
1109     show_trace(NULL, regs, &regs->esp);
1110     }
1111    
1112     @@ -473,7 +485,30 @@
1113     return 1;
1114     }
1115    
1116     -static noinline void __switch_to_xtra(struct task_struct *next_p)
1117     +#ifdef CONFIG_SECCOMP
1118     +void hard_disable_TSC(void)
1119     +{
1120     + write_cr4(read_cr4() | X86_CR4_TSD);
1121     +}
1122     +void disable_TSC(void)
1123     +{
1124     + preempt_disable();
1125     + if (!test_and_set_thread_flag(TIF_NOTSC))
1126     + /*
1127     + * Must flip the CPU state synchronously with
1128     + * TIF_NOTSC in the current running context.
1129     + */
1130     + hard_disable_TSC();
1131     + preempt_enable();
1132     +}
1133     +void hard_enable_TSC(void)
1134     +{
1135     + write_cr4(read_cr4() & ~X86_CR4_TSD);
1136     +}
1137     +#endif /* CONFIG_SECCOMP */
1138     +
1139     +static noinline void
1140     +__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
1141     {
1142     struct thread_struct *next;
1143    
1144     @@ -488,33 +523,17 @@
1145     set_debugreg(next->debugreg[6], 6);
1146     set_debugreg(next->debugreg[7], 7);
1147     }
1148     -}
1149    
1150     -/*
1151     - * This function selects if the context switch from prev to next
1152     - * has to tweak the TSC disable bit in the cr4.
1153     - */
1154     -static inline void disable_tsc(struct task_struct *prev_p,
1155     - struct task_struct *next_p)
1156     -{
1157     - struct thread_info *prev, *next;
1158     -
1159     - /*
1160     - * gcc should eliminate the ->thread_info dereference if
1161     - * has_secure_computing returns 0 at compile time (SECCOMP=n).
1162     - */
1163     - prev = task_thread_info(prev_p);
1164     - next = task_thread_info(next_p);
1165     -
1166     - if (has_secure_computing(prev) || has_secure_computing(next)) {
1167     - /* slow path here */
1168     - if (has_secure_computing(prev) &&
1169     - !has_secure_computing(next)) {
1170     - write_cr4(read_cr4() & ~X86_CR4_TSD);
1171     - } else if (!has_secure_computing(prev) &&
1172     - has_secure_computing(next))
1173     - write_cr4(read_cr4() | X86_CR4_TSD);
1174     +#ifdef CONFIG_SECCOMP
1175     + if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
1176     + test_tsk_thread_flag(next_p, TIF_NOTSC)) {
1177     + /* prev and next are different */
1178     + if (test_tsk_thread_flag(next_p, TIF_NOTSC))
1179     + hard_disable_TSC();
1180     + else
1181     + hard_enable_TSC();
1182     }
1183     +#endif
1184     }
1185    
1186     /*
1187     @@ -628,10 +647,9 @@
1188     /*
1189     * Now maybe handle debug registers
1190     */
1191     - if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
1192     - __switch_to_xtra(next_p);
1193     -
1194     - disable_tsc(prev_p, next_p);
1195     + if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
1196     + task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
1197     + __switch_to_xtra(prev_p, next_p);
1198    
1199     /*
1200     * Leave lazy mode, flushing any hypercalls made here.
1201     diff -Naur linux-2.6.25/arch/x86/kernel/process_64-xen.c linux-2.6.25-xen/arch/x86/kernel/process_64-xen.c
1202     --- linux-2.6.25/arch/x86/kernel/process_64-xen.c 2008-05-23 18:33:27.000000000 +0200
1203     +++ linux-2.6.25-xen/arch/x86/kernel/process_64-xen.c 2008-05-23 18:27:40.000000000 +0200
1204     @@ -26,6 +26,7 @@
1205     #include <linux/sched.h>
1206     #include <linux/kernel.h>
1207     #include <linux/mm.h>
1208     +#include <linux/fs.h>
1209     #include <linux/elfcore.h>
1210     #include <linux/smp.h>
1211     #include <linux/slab.h>
1212     @@ -249,6 +250,7 @@
1213     void __show_regs(struct pt_regs * regs)
1214     {
1215     unsigned long fs, gs, shadowgs;
1216     + unsigned long d0, d1, d2, d3, d6, d7;
1217     unsigned int fsindex,gsindex;
1218     unsigned int ds,cs,es;
1219    
1220     @@ -288,6 +290,14 @@
1221     fs,fsindex,gs,gsindex,shadowgs);
1222     printk("CS: %04x DS: %04x ES: %04x\n", cs, ds, es);
1223    
1224     + get_debugreg(d0, 0);
1225     + get_debugreg(d1, 1);
1226     + get_debugreg(d2, 2);
1227     + printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
1228     + get_debugreg(d3, 3);
1229     + get_debugreg(d6, 6);
1230     + get_debugreg(d7, 7);
1231     + printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
1232     }
1233    
1234     void show_regs(struct pt_regs *regs)
1235     diff -Naur linux-2.6.25/arch/x86/kernel/setup_32-xen.c linux-2.6.25-xen/arch/x86/kernel/setup_32-xen.c
1236     --- linux-2.6.25/arch/x86/kernel/setup_32-xen.c 2008-05-23 18:33:27.000000000 +0200
1237     +++ linux-2.6.25-xen/arch/x86/kernel/setup_32-xen.c 2008-05-23 18:27:40.000000000 +0200
1238     @@ -117,19 +117,10 @@
1239     /*
1240     * Setup options
1241     */
1242     -struct drive_info_struct { char dummy[32]; } drive_info;
1243     -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
1244     - defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
1245     -EXPORT_SYMBOL(drive_info);
1246     -#endif
1247     struct screen_info screen_info;
1248     EXPORT_SYMBOL(screen_info);
1249     struct apm_info apm_info;
1250     EXPORT_SYMBOL(apm_info);
1251     -struct sys_desc_table_struct {
1252     - unsigned short length;
1253     - unsigned char table[0];
1254     -};
1255     struct edid_info edid_info;
1256     EXPORT_SYMBOL_GPL(edid_info);
1257     #ifndef CONFIG_XEN
1258     @@ -152,7 +143,7 @@
1259    
1260     static char __initdata command_line[COMMAND_LINE_SIZE];
1261    
1262     -unsigned char __initdata boot_params[PARAM_SIZE];
1263     +struct boot_params __initdata boot_params;
1264    
1265     /*
1266     * Point at the empty zero page to start with. We map the real shared_info
1267     @@ -319,18 +310,18 @@
1268     printk(KERN_WARNING "Warning only %ldMB will be used.\n",
1269     MAXMEM>>20);
1270     if (max_pfn > MAX_NONPAE_PFN)
1271     - printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1272     + printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
1273     else
1274     printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
1275     max_pfn = MAXMEM_PFN;
1276     #else /* !CONFIG_HIGHMEM */
1277     -#ifndef CONFIG_X86_PAE
1278     +#ifndef CONFIG_HIGHMEM64G
1279     if (max_pfn > MAX_NONPAE_PFN) {
1280     max_pfn = MAX_NONPAE_PFN;
1281     printk(KERN_WARNING "Warning only 4GB will be used.\n");
1282     - printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1283     + printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
1284     }
1285     -#endif /* !CONFIG_X86_PAE */
1286     +#endif /* !CONFIG_HIGHMEM64G */
1287     #endif /* !CONFIG_HIGHMEM */
1288     } else {
1289     if (highmem_pages == -1)
1290     @@ -517,7 +508,7 @@
1291     *
1292     * This should all compile down to nothing when NUMA is off.
1293     */
1294     -void __init remapped_pgdat_init(void)
1295     +static void __init remapped_pgdat_init(void)
1296     {
1297     int nid;
1298    
1299     @@ -592,7 +583,6 @@
1300     properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd.
1301     */
1302     ROOT_DEV = MKDEV(UNNAMED_MAJOR,0);
1303     - drive_info = DRIVE_INFO;
1304     screen_info = SCREEN_INFO;
1305     copy_edid();
1306     apm_info.bios = APM_BIOS_INFO;
1307     @@ -770,6 +760,8 @@
1308     * NOTE: at this point the bootmem allocator is fully available.
1309     */
1310    
1311     + paravirt_post_allocator_init();
1312     +
1313     if (is_initial_xendomain())
1314     dmi_scan_machine();
1315    
1316     @@ -817,6 +809,7 @@
1317     #endif
1318    
1319     e820_register_memory();
1320     + e820_mark_nosave_regions();
1321    
1322     if (is_initial_xendomain()) {
1323     #ifdef CONFIG_VT
1324     diff -Naur linux-2.6.25/arch/x86/kernel/setup64-xen.c linux-2.6.25-xen/arch/x86/kernel/setup64-xen.c
1325     --- linux-2.6.25/arch/x86/kernel/setup64-xen.c 2008-05-23 18:33:27.000000000 +0200
1326     +++ linux-2.6.25-xen/arch/x86/kernel/setup64-xen.c 2008-05-23 18:27:40.000000000 +0200
1327     @@ -125,11 +125,14 @@
1328     }
1329    
1330     #ifdef CONFIG_XEN
1331     -static void switch_pt(void)
1332     +static void __init_refok switch_pt(int cpu)
1333     {
1334     + if (cpu == 0)
1335     + xen_init_pt();
1336     xen_pt_switch(__pa_symbol(init_level4_pgt));
1337     xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt)));
1338     }
1339     +#define switch_pt() switch_pt(cpu)
1340    
1341     static void __cpuinit cpu_gdt_init(const struct desc_ptr *gdt_descr)
1342     {
1343     @@ -185,9 +188,6 @@
1344     pda->mmu_state = 0;
1345    
1346     if (cpu == 0) {
1347     -#ifdef CONFIG_XEN
1348     - xen_init_pt();
1349     -#endif
1350     /* others are initialized in smpboot.c */
1351     pda->pcurrent = &init_task;
1352     pda->irqstackptr = boot_cpu_stack;
1353     diff -Naur linux-2.6.25/arch/x86/kernel/setup_64-xen.c linux-2.6.25-xen/arch/x86/kernel/setup_64-xen.c
1354     --- linux-2.6.25/arch/x86/kernel/setup_64-xen.c 2008-05-23 18:33:27.000000000 +0200
1355     +++ linux-2.6.25-xen/arch/x86/kernel/setup_64-xen.c 2008-05-23 18:27:40.000000000 +0200
1356     @@ -840,6 +840,8 @@
1357     level = cpuid_eax(1);
1358     if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
1359     set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
1360     + if (c->x86 == 0x10)
1361     + set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
1362    
1363     /* Enable workaround for FXSAVE leak */
1364     if (c->x86 >= 6)
1365     @@ -865,8 +867,14 @@
1366     if (c->extended_cpuid_level >= 0x80000008)
1367     amd_detect_cmp(c);
1368    
1369     - /* Fix cpuid4 emulation for more */
1370     - num_cache_leaves = 3;
1371     + if (c->extended_cpuid_level >= 0x80000006 &&
1372     + (cpuid_edx(0x80000006) & 0xf000))
1373     + num_cache_leaves = 4;
1374     + else
1375     + num_cache_leaves = 3;
1376     +
1377     + if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11)
1378     + set_bit(X86_FEATURE_K8, &c->x86_capability);
1379    
1380     /* RDTSC can be speculated around */
1381     clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
1382     @@ -1111,6 +1119,8 @@
1383     c->x86_capability[2] = cpuid_edx(0x80860001);
1384     }
1385    
1386     + init_scattered_cpuid_features(c);
1387     +
1388     c->apicid = phys_pkg_id(0);
1389    
1390     /*
1391     @@ -1196,7 +1206,7 @@
1392     "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
1393     "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
1394     "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
1395     - "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", NULL,
1396     + "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
1397    
1398     /* AMD-defined */
1399     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1400     @@ -1212,10 +1222,11 @@
1401     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1402    
1403     /* Other (Linux-defined) */
1404     - "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
1405     - "constant_tsc", NULL, NULL,
1406     - "up", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1407     - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1408     + "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
1409     + NULL, NULL, NULL, NULL,
1410     + "constant_tsc", "up", NULL, "arch_perfmon",
1411     + "pebs", "bts", NULL, "sync_rdtsc",
1412     + "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1413     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1414    
1415     /* Intel-defined (#2) */
1416     @@ -1226,7 +1237,7 @@
1417    
1418     /* VIA/Cyrix/Centaur-defined */
1419     NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
1420     - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1421     + "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
1422     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1423     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1424    
1425     @@ -1237,6 +1248,12 @@
1426     "osvw", "ibs", NULL, NULL, NULL, NULL,
1427     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1428     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1429     +
1430     + /* Auxiliary (Linux-defined) */
1431     + "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1432     + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1433     + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1434     + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1435     };
1436     static char *x86_power_flags[] = {
1437     "ts", /* temperature sensor */
1438     diff -Naur linux-2.6.25/arch/x86/kernel/smp_32-xen.c linux-2.6.25-xen/arch/x86/kernel/smp_32-xen.c
1439     --- linux-2.6.25/arch/x86/kernel/smp_32-xen.c 2008-05-23 18:33:27.000000000 +0200
1440     +++ linux-2.6.25-xen/arch/x86/kernel/smp_32-xen.c 2008-05-23 18:27:40.000000000 +0200
1441     @@ -22,6 +22,7 @@
1442    
1443     #include <asm/mtrr.h>
1444     #include <asm/tlbflush.h>
1445     +#include <asm/mmu_context.h>
1446     #if 0
1447     #include <mach_apic.h>
1448     #endif
1449     @@ -217,13 +218,13 @@
1450     static DEFINE_SPINLOCK(tlbstate_lock);
1451    
1452     /*
1453     - * We cannot call mmdrop() because we are in interrupt context,
1454     + * We cannot call mmdrop() because we are in interrupt context,
1455     * instead update mm->cpu_vm_mask.
1456     *
1457     * We need to reload %cr3 since the page tables may be going
1458     * away from under us..
1459     */
1460     -static inline void leave_mm (unsigned long cpu)
1461     +void leave_mm(unsigned long cpu)
1462     {
1463     if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
1464     BUG();
1465     diff -Naur linux-2.6.25/arch/x86/kernel/smp_64-xen.c linux-2.6.25-xen/arch/x86/kernel/smp_64-xen.c
1466     --- linux-2.6.25/arch/x86/kernel/smp_64-xen.c 2008-05-23 18:33:27.000000000 +0200
1467     +++ linux-2.6.25-xen/arch/x86/kernel/smp_64-xen.c 2008-05-23 18:27:40.000000000 +0200
1468     @@ -362,7 +362,7 @@
1469     }
1470    
1471     /*
1472     - * smp_call_function_single - Run a function on another CPU
1473     + * smp_call_function_single - Run a function on a specific CPU
1474     * @func: The function to run. This must be fast and non-blocking.
1475     * @info: An arbitrary pointer to pass to the function.
1476     * @nonatomic: Currently unused.
1477     @@ -379,17 +379,21 @@
1478     {
1479     /* prevent preemption and reschedule on another processor */
1480     int me = get_cpu();
1481     +
1482     + /* Can deadlock when called with interrupts disabled */
1483     + WARN_ON(irqs_disabled());
1484     +
1485     if (cpu == me) {
1486     + local_irq_disable();
1487     + func(info);
1488     + local_irq_enable();
1489     put_cpu();
1490     return 0;
1491     }
1492    
1493     - /* Can deadlock when called with interrupts disabled */
1494     - WARN_ON(irqs_disabled());
1495     -
1496     - spin_lock_bh(&call_lock);
1497     + spin_lock(&call_lock);
1498     __smp_call_function_single(cpu, func, info, nonatomic, wait);
1499     - spin_unlock_bh(&call_lock);
1500     + spin_unlock(&call_lock);
1501     put_cpu();
1502     return 0;
1503     }
1504     diff -Naur linux-2.6.25/arch/x86/kernel/time_32-xen.c linux-2.6.25-xen/arch/x86/kernel/time_32-xen.c
1505     --- linux-2.6.25/arch/x86/kernel/time_32-xen.c 2008-05-23 18:33:27.000000000 +0200
1506     +++ linux-2.6.25-xen/arch/x86/kernel/time_32-xen.c 2008-05-23 18:27:40.000000000 +0200
1507     @@ -76,11 +76,12 @@
1508     #include <xen/evtchn.h>
1509     #include <xen/interface/vcpu.h>
1510    
1511     -#ifdef CONFIG_X86_32
1512     #include <asm/i8253.h>
1513     DEFINE_SPINLOCK(i8253_lock);
1514     EXPORT_SYMBOL(i8253_lock);
1515     -#else
1516     +
1517     +#ifdef CONFIG_X86_64
1518     +#include <asm/vsyscall.h>
1519     volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
1520     #endif
1521    
1522     @@ -209,6 +210,26 @@
1523     return product;
1524     }
1525    
1526     +static inline u64 get64(volatile u64 *ptr)
1527     +{
1528     +#ifndef CONFIG_64BIT
1529     + return cmpxchg64(ptr, 0, 0);
1530     +#else
1531     + return *ptr;
1532     +#define cmpxchg64 cmpxchg
1533     +#endif
1534     +}
1535     +
1536     +static inline u64 get64_local(volatile u64 *ptr)
1537     +{
1538     +#ifndef CONFIG_64BIT
1539     + return cmpxchg64_local(ptr, 0, 0);
1540     +#else
1541     + return *ptr;
1542     +#define cmpxchg64_local cmpxchg_local
1543     +#endif
1544     +}
1545     +
1546     #if 0 /* defined (__i386__) */
1547     int read_current_timer(unsigned long *timer_val)
1548     {
1549     @@ -391,7 +412,7 @@
1550     return retval;
1551     }
1552    
1553     -unsigned long long sched_clock(void)
1554     +static unsigned long long local_clock(void)
1555     {
1556     unsigned int cpu = get_cpu();
1557     struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
1558     @@ -412,6 +433,61 @@
1559     return time;
1560     }
1561    
1562     +/*
1563     + * Runstate accounting
1564     + */
1565     +static void get_runstate_snapshot(struct vcpu_runstate_info *res)
1566     +{
1567     + u64 state_time;
1568     + struct vcpu_runstate_info *state;
1569     +
1570     + BUG_ON(preemptible());
1571     +
1572     + state = &__get_cpu_var(runstate);
1573     +
1574     + do {
1575     + state_time = get64_local(&state->state_entry_time);
1576     + *res = *state;
1577     + } while (get64_local(&state->state_entry_time) != state_time);
1578     +
1579     + WARN_ON_ONCE(res->state != RUNSTATE_running);
1580     +}
1581     +
1582     +/*
1583     + * Xen sched_clock implementation. Returns the number of unstolen
1584     + * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
1585     + * states.
1586     + */
1587     +unsigned long long sched_clock(void)
1588     +{
1589     + struct vcpu_runstate_info runstate;
1590     + cycle_t now;
1591     + u64 ret;
1592     + s64 offset;
1593     +
1594     + /*
1595     + * Ideally sched_clock should be called on a per-cpu basis
1596     + * anyway, so preempt should already be disabled, but that's
1597     + * not current practice at the moment.
1598     + */
1599     + preempt_disable();
1600     +
1601     + now = local_clock();
1602     +
1603     + get_runstate_snapshot(&runstate);
1604     +
1605     + offset = now - runstate.state_entry_time;
1606     + if (offset < 0)
1607     + offset = 0;
1608     +
1609     + ret = offset + runstate.time[RUNSTATE_running]
1610     + + runstate.time[RUNSTATE_blocked];
1611     +
1612     + preempt_enable();
1613     +
1614     + return ret;
1615     +}
1616     +
1617     unsigned long profile_pc(struct pt_regs *regs)
1618     {
1619     unsigned long pc = instruction_pointer(regs);
1620     @@ -459,10 +535,9 @@
1621     irqreturn_t timer_interrupt(int irq, void *dev_id)
1622     {
1623     s64 delta, delta_cpu, stolen, blocked;
1624     - u64 sched_time;
1625     unsigned int i, cpu = smp_processor_id();
1626     struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
1627     - struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
1628     + struct vcpu_runstate_info runstate;
1629    
1630     /*
1631     * Here we are in the timer irq handler. We just have irqs locally
1632     @@ -482,20 +557,7 @@
1633     delta -= processed_system_time;
1634     delta_cpu -= per_cpu(processed_system_time, cpu);
1635    
1636     - /*
1637     - * Obtain a consistent snapshot of stolen/blocked cycles. We
1638     - * can use state_entry_time to detect if we get preempted here.
1639     - */
1640     - do {
1641     - sched_time = runstate->state_entry_time;
1642     - barrier();
1643     - stolen = runstate->time[RUNSTATE_runnable] +
1644     - runstate->time[RUNSTATE_offline] -
1645     - per_cpu(processed_stolen_time, cpu);
1646     - blocked = runstate->time[RUNSTATE_blocked] -
1647     - per_cpu(processed_blocked_time, cpu);
1648     - barrier();
1649     - } while (sched_time != runstate->state_entry_time);
1650     + get_runstate_snapshot(&runstate);
1651     } while (!time_values_up_to_date(cpu));
1652    
1653     if ((unlikely(delta < -(s64)permitted_clock_jitter) ||
1654     @@ -537,6 +599,9 @@
1655     * HACK: Passing NULL to account_steal_time()
1656     * ensures that the ticks are accounted as stolen.
1657     */
1658     + stolen = runstate.time[RUNSTATE_runnable]
1659     + + runstate.time[RUNSTATE_offline]
1660     + - per_cpu(processed_stolen_time, cpu);
1661     if ((stolen > 0) && (delta_cpu > 0)) {
1662     delta_cpu -= stolen;
1663     if (unlikely(delta_cpu < 0))
1664     @@ -552,6 +617,8 @@
1665     * HACK: Passing idle_task to account_steal_time()
1666     * ensures that the ticks are accounted as idle/wait.
1667     */
1668     + blocked = runstate.time[RUNSTATE_blocked]
1669     + - per_cpu(processed_blocked_time, cpu);
1670     if ((blocked > 0) && (delta_cpu > 0)) {
1671     delta_cpu -= blocked;
1672     if (unlikely(delta_cpu < 0))
1673     @@ -600,16 +667,11 @@
1674    
1675     static cycle_t xen_clocksource_read(void)
1676     {
1677     - cycle_t ret = sched_clock();
1678     + cycle_t ret = local_clock();
1679    
1680     #ifdef CONFIG_SMP
1681     for (;;) {
1682     -#ifndef CONFIG_64BIT
1683     - cycle_t last = cmpxchg64(&cs_last, 0, 0);
1684     -#else
1685     - cycle_t last = cs_last;
1686     -#define cmpxchg64 cmpxchg
1687     -#endif
1688     + cycle_t last = get64(&cs_last);
1689    
1690     if ((s64)(ret - last) < 0) {
1691     if (last - ret > permitted_clock_jitter
1692     @@ -640,7 +702,7 @@
1693     extern void time_resume(void);
1694    
1695     time_resume();
1696     - cs_last = sched_clock();
1697     + cs_last = local_clock();
1698     }
1699    
1700     static struct clocksource clocksource_xen = {
1701     @@ -688,56 +750,10 @@
1702     return retval;
1703     }
1704    
1705     -static void sync_cmos_clock(unsigned long dummy);
1706     -
1707     -static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
1708     -int no_sync_cmos_clock;
1709     -
1710     -static void sync_cmos_clock(unsigned long dummy)
1711     -{
1712     - struct timeval now, next;
1713     - int fail = 1;
1714     -
1715     - /*
1716     - * If we have an externally synchronized Linux clock, then update
1717     - * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
1718     - * called as close as possible to 500 ms before the new second starts.
1719     - * This code is run on a timer. If the clock is set, that timer
1720     - * may not expire at the correct time. Thus, we adjust...
1721     - */
1722     - if (!ntp_synced())
1723     - /*
1724     - * Not synced, exit, do not restart a timer (if one is
1725     - * running, let it run out).
1726     - */
1727     - return;
1728     -
1729     - do_gettimeofday(&now);
1730     - if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
1731     - now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
1732     - fail = set_rtc_mmss(now.tv_sec);
1733     -
1734     - next.tv_usec = USEC_AFTER - now.tv_usec;
1735     - if (next.tv_usec <= 0)
1736     - next.tv_usec += USEC_PER_SEC;
1737     -
1738     - if (!fail)
1739     - next.tv_sec = 659;
1740     - else
1741     - next.tv_sec = 0;
1742     -
1743     - if (next.tv_usec >= USEC_PER_SEC) {
1744     - next.tv_sec++;
1745     - next.tv_usec -= USEC_PER_SEC;
1746     - }
1747     - mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
1748     -}
1749     -
1750     -void notify_arch_cmos_timer(void)
1751     +int update_persistent_clock(struct timespec now)
1752     {
1753     - if (!no_sync_cmos_clock)
1754     - mod_timer(&sync_cmos_timer, jiffies + 1);
1755     mod_timer(&sync_xen_wallclock_timer, jiffies + 1);
1756     + return set_rtc_mmss(now.tv_sec);
1757     }
1758    
1759     extern void (*late_time_init)(void);
1760     diff -Naur linux-2.6.25/arch/x86/kernel/traps_32-xen.c linux-2.6.25-xen/arch/x86/kernel/traps_32-xen.c
1761     --- linux-2.6.25/arch/x86/kernel/traps_32-xen.c 2008-05-23 18:33:27.000000000 +0200
1762     +++ linux-2.6.25-xen/arch/x86/kernel/traps_32-xen.c 2008-05-23 18:27:40.000000000 +0200
1763     @@ -41,6 +41,10 @@
1764     #include <linux/mca.h>
1765     #endif
1766    
1767     +#if defined(CONFIG_EDAC)
1768     +#include <linux/edac.h>
1769     +#endif
1770     +
1771     #include <asm/processor.h>
1772     #include <asm/system.h>
1773     #include <asm/io.h>
1774     @@ -102,36 +106,45 @@
1775     int kstack_depth_to_print = 24;
1776     static unsigned int code_bytes = 64;
1777    
1778     -static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
1779     +static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned size)
1780     {
1781     return p > (void *)tinfo &&
1782     - p < (void *)tinfo + THREAD_SIZE - 3;
1783     + p <= (void *)tinfo + THREAD_SIZE - size;
1784     }
1785    
1786     +/* The form of the top of the frame on the stack */
1787     +struct stack_frame {
1788     + struct stack_frame *next_frame;
1789     + unsigned long return_address;
1790     +};
1791     +
1792     static inline unsigned long print_context_stack(struct thread_info *tinfo,
1793     unsigned long *stack, unsigned long ebp,
1794     struct stacktrace_ops *ops, void *data)
1795     {
1796     - unsigned long addr;
1797     -
1798     #ifdef CONFIG_FRAME_POINTER
1799     - while (valid_stack_ptr(tinfo, (void *)ebp)) {
1800     - unsigned long new_ebp;
1801     - addr = *(unsigned long *)(ebp + 4);
1802     + struct stack_frame *frame = (struct stack_frame *)ebp;
1803     + while (valid_stack_ptr(tinfo, frame, sizeof(*frame))) {
1804     + struct stack_frame *next;
1805     + unsigned long addr;
1806     +
1807     + addr = frame->return_address;
1808     ops->address(data, addr);
1809     /*
1810     * break out of recursive entries (such as
1811     * end_of_stack_stop_unwind_function). Also,
1812     * we can never allow a frame pointer to
1813     * move downwards!
1814     - */
1815     - new_ebp = *(unsigned long *)ebp;
1816     - if (new_ebp <= ebp)
1817     + */
1818     + next = frame->next_frame;
1819     + if (next <= frame)
1820     break;
1821     - ebp = new_ebp;
1822     + frame = next;
1823     }
1824     #else
1825     - while (valid_stack_ptr(tinfo, stack)) {
1826     + while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) {
1827     + unsigned long addr;
1828     +
1829     addr = *stack++;
1830     if (__kernel_text_address(addr))
1831     ops->address(data, addr);
1832     @@ -154,7 +167,7 @@
1833     if (!stack) {
1834     unsigned long dummy;
1835     stack = &dummy;
1836     - if (task && task != current)
1837     + if (task != current)
1838     stack = (unsigned long *)task->thread.esp;
1839     }
1840    
1841     @@ -213,6 +226,7 @@
1842     {
1843     printk("%s [<%08lx>] ", (char *)data, addr);
1844     print_symbol("%s\n", addr);
1845     + touch_nmi_watchdog();
1846     }
1847    
1848     static struct stacktrace_ops print_trace_ops = {
1849     @@ -396,7 +410,7 @@
1850     unsigned long esp;
1851     unsigned short ss;
1852    
1853     - report_bug(regs->eip);
1854     + report_bug(regs->eip, regs);
1855    
1856     printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
1857     #ifdef CONFIG_PREEMPT
1858     @@ -439,6 +453,7 @@
1859    
1860     bust_spinlocks(0);
1861     die.lock_owner = -1;
1862     + add_taint(TAINT_DIE);
1863     spin_unlock_irqrestore(&die.lock, flags);
1864    
1865     if (!regs)
1866     @@ -523,10 +538,12 @@
1867     do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
1868     }
1869    
1870     -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
1871     +#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \
1872     fastcall void do_##name(struct pt_regs * regs, long error_code) \
1873     { \
1874     siginfo_t info; \
1875     + if (irq) \
1876     + local_irq_enable(); \
1877     info.si_signo = signr; \
1878     info.si_errno = 0; \
1879     info.si_code = sicode; \
1880     @@ -566,13 +583,13 @@
1881     #endif
1882     DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
1883     DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
1884     -DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip)
1885     +DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip, 0)
1886     DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
1887     DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
1888     DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
1889     DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
1890     -DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
1891     -DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0)
1892     +DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
1893     +DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
1894    
1895     fastcall void __kprobes do_general_protection(struct pt_regs * regs,
1896     long error_code)
1897     @@ -585,6 +602,13 @@
1898    
1899     current->thread.error_code = error_code;
1900     current->thread.trap_no = 13;
1901     + if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
1902     + printk_ratelimit())
1903     + printk(KERN_INFO
1904     + "%s[%d] general protection eip:%lx esp:%lx error:%lx\n",
1905     + current->comm, current->pid,
1906     + regs->eip, regs->esp, error_code);
1907     +
1908     force_sig(SIGSEGV, current);
1909     return;
1910    
1911     @@ -610,6 +634,14 @@
1912     printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
1913     "CPU %d.\n", reason, smp_processor_id());
1914     printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
1915     +
1916     +#if defined(CONFIG_EDAC)
1917     + if(edac_handler_set()) {
1918     + edac_atomic_assert_error();
1919     + return;
1920     + }
1921     +#endif
1922     +
1923     if (panic_on_unrecovered_nmi)
1924     panic("NMI: Not continuing");
1925    
1926     @@ -720,6 +752,8 @@
1927     reassert_nmi();
1928     }
1929    
1930     +static int ignore_nmis;
1931     +
1932     fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
1933     {
1934     int cpu;
1935     @@ -730,11 +764,24 @@
1936    
1937     ++nmi_count(cpu);
1938    
1939     - default_do_nmi(regs);
1940     + if (!ignore_nmis)
1941     + default_do_nmi(regs);
1942    
1943     nmi_exit();
1944     }
1945    
1946     +void stop_nmi(void)
1947     +{
1948     + acpi_nmi_disable();
1949     + ignore_nmis++;
1950     +}
1951     +
1952     +void restart_nmi(void)
1953     +{
1954     + ignore_nmis--;
1955     + acpi_nmi_enable();
1956     +}
1957     +
1958     #ifdef CONFIG_KPROBES
1959     fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
1960     {
1961     @@ -1023,6 +1070,7 @@
1962     thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
1963     tsk->fpu_counter++;
1964     }
1965     +EXPORT_SYMBOL_GPL(math_state_restore);
1966    
1967     #ifndef CONFIG_MATH_EMULATION
1968    
1969     diff -Naur linux-2.6.25/arch/x86/kernel/traps_64-xen.c linux-2.6.25-xen/arch/x86/kernel/traps_64-xen.c
1970     --- linux-2.6.25/arch/x86/kernel/traps_64-xen.c 2008-05-23 18:33:27.000000000 +0200
1971     +++ linux-2.6.25-xen/arch/x86/kernel/traps_64-xen.c 2008-05-23 18:27:40.000000000 +0200
1972     @@ -34,6 +34,10 @@
1973     #include <linux/bug.h>
1974     #include <linux/kdebug.h>
1975    
1976     +#if defined(CONFIG_EDAC)
1977     +#include <linux/edac.h>
1978     +#endif
1979     +
1980     #include <asm/system.h>
1981     #include <asm/io.h>
1982     #include <asm/atomic.h>
1983     @@ -332,6 +336,7 @@
1984    
1985     static void print_trace_address(void *data, unsigned long addr)
1986     {
1987     + touch_nmi_watchdog();
1988     printk_address(addr);
1989     }
1990    
1991     @@ -520,6 +525,7 @@
1992     printk("\n");
1993     notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV);
1994     show_registers(regs);
1995     + add_taint(TAINT_DIE);
1996     /* Executive summary in case the oops scrolled away */
1997     printk(KERN_ALERT "RIP ");
1998     printk_address(regs->rip);
1999     @@ -533,7 +539,7 @@
2000     unsigned long flags = oops_begin();
2001    
2002     if (!user_mode(regs))
2003     - report_bug(regs->rip);
2004     + report_bug(regs->rip, regs);
2005    
2006     __die(str, regs, err);
2007     oops_end(flags);
2008     @@ -582,7 +588,8 @@
2009     tsk->thread.error_code = error_code;
2010     tsk->thread.trap_no = trapnr;
2011    
2012     - if (exception_trace && unhandled_signal(tsk, signr))
2013     + if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
2014     + printk_ratelimit())
2015     printk(KERN_INFO
2016     "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
2017     tsk->comm, tsk->pid, str,
2018     @@ -686,7 +693,8 @@
2019     tsk->thread.error_code = error_code;
2020     tsk->thread.trap_no = 13;
2021    
2022     - if (exception_trace && unhandled_signal(tsk, SIGSEGV))
2023     + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
2024     + printk_ratelimit())
2025     printk(KERN_INFO
2026     "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
2027     tsk->comm, tsk->pid,
2028     @@ -721,6 +729,13 @@
2029     reason);
2030     printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
2031    
2032     +#if defined(CONFIG_EDAC)
2033     + if(edac_handler_set()) {
2034     + edac_atomic_assert_error();
2035     + return;
2036     + }
2037     +#endif
2038     +
2039     if (panic_on_unrecovered_nmi)
2040     panic("NMI: Not continuing");
2041    
2042     diff -Naur linux-2.6.25/arch/x86/kernel/vsyscall_64-xen.c linux-2.6.25-xen/arch/x86/kernel/vsyscall_64-xen.c
2043     --- linux-2.6.25/arch/x86/kernel/vsyscall_64-xen.c 2008-05-23 18:33:27.000000000 +0200
2044     +++ linux-2.6.25-xen/arch/x86/kernel/vsyscall_64-xen.c 2008-05-23 18:27:40.000000000 +0200
2045     @@ -42,6 +42,7 @@
2046     #include <asm/segment.h>
2047     #include <asm/desc.h>
2048     #include <asm/topology.h>
2049     +#include <asm/vgtod.h>
2050    
2051     #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
2052     #define __syscall_clobber "r11","rcx","memory"
2053     @@ -57,26 +58,9 @@
2054     * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
2055     * Try to keep this structure as small as possible to avoid cache line ping pongs
2056     */
2057     -struct vsyscall_gtod_data_t {
2058     - seqlock_t lock;
2059     -
2060     - /* open coded 'struct timespec' */
2061     - time_t wall_time_sec;
2062     - u32 wall_time_nsec;
2063     -
2064     - int sysctl_enabled;
2065     - struct timezone sys_tz;
2066     - struct { /* extract of a clocksource struct */
2067     - cycle_t (*vread)(void);
2068     - cycle_t cycle_last;
2069     - cycle_t mask;
2070     - u32 mult;
2071     - u32 shift;
2072     - } clock;
2073     -};
2074     int __vgetcpu_mode __section_vgetcpu_mode;
2075    
2076     -struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data =
2077     +struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
2078     {
2079     .lock = SEQLOCK_UNLOCKED,
2080     .sysctl_enabled = 1,
2081     @@ -96,6 +80,8 @@
2082     vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
2083     vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
2084     vsyscall_gtod_data.sys_tz = sys_tz;
2085     + vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
2086     + vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
2087     write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
2088     }
2089    
2090     diff -Naur linux-2.6.25/arch/x86/mach-xen/setup.c linux-2.6.25-xen/arch/x86/mach-xen/setup.c
2091     --- linux-2.6.25/arch/x86/mach-xen/setup.c 2008-05-23 18:33:23.000000000 +0200
2092     +++ linux-2.6.25-xen/arch/x86/mach-xen/setup.c 2008-05-23 18:27:40.000000000 +0200
2093     @@ -12,6 +12,7 @@
2094     #include <asm/e820.h>
2095     #include <asm/setup.h>
2096     #include <asm/fixmap.h>
2097     +#include <asm/pgtable.h>
2098    
2099     #include <xen/interface/callback.h>
2100     #include <xen/interface/memory.h>
2101     @@ -101,7 +102,7 @@
2102    
2103     init_mm.pgd = swapper_pg_dir = (pgd_t *)xen_start_info->pt_base;
2104    
2105     - setup_xen_features();
2106     + xen_setup_features();
2107    
2108     if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) {
2109     hypervisor_virt_start = pp.virt_start;
2110     @@ -157,4 +158,18 @@
2111     HYPERVISOR_nmi_op(XENNMI_register_callback, &cb);
2112     }
2113     #endif
2114     +
2115     + /* Do an early initialization of the fixmap area */
2116     + {
2117     + extern pte_t swapper_pg_pmd[PTRS_PER_PTE];
2118     + unsigned long addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE);
2119     + pgd_t *pgd = (pgd_t *)xen_start_info->pt_base;
2120     + pud_t *pud = pud_offset(pgd + pgd_index(addr), addr);
2121     + pmd_t *pmd = pmd_offset(pud, addr);
2122     +
2123     + swapper_pg_dir = pgd;
2124     + init_mm.pgd = pgd;
2125     + make_lowmem_page_readonly(swapper_pg_pmd, XENFEAT_writable_page_tables);
2126     + set_pmd(pmd, __pmd(__pa_symbol(swapper_pg_pmd) | _PAGE_TABLE));
2127     + }
2128     }
2129     diff -Naur linux-2.6.25/arch/x86/Makefile linux-2.6.25-xen/arch/x86/Makefile
2130     --- linux-2.6.25/arch/x86/Makefile 2008-05-23 18:32:52.000000000 +0200
2131     +++ linux-2.6.25-xen/arch/x86/Makefile 2008-05-23 18:27:40.000000000 +0200
2132     @@ -173,7 +173,7 @@
2133     core-y += $(fcore-y)
2134    
2135     # Xen paravirtualization support
2136     -core-$(CONFIG_XEN) += arch/x86/xen/
2137     +core-$(CONFIG_PARAVIRT_XEN) += arch/x86/xen/
2138    
2139     # lguest paravirtualization support
2140     core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/
2141     diff -Naur linux-2.6.25/arch/x86/mm/fault_32-xen.c linux-2.6.25-xen/arch/x86/mm/fault_32-xen.c
2142     --- linux-2.6.25/arch/x86/mm/fault_32-xen.c 2008-05-23 18:33:27.000000000 +0200
2143     +++ linux-2.6.25-xen/arch/x86/mm/fault_32-xen.c 2008-05-23 18:27:40.000000000 +0200
2144     @@ -346,7 +346,7 @@
2145     pmd_k = pmd_offset(pud_k, address);
2146     if (!pmd_present(*pmd_k))
2147     return NULL;
2148     - if (!pmd_present(*pmd))
2149     + if (!pmd_present(*pmd)) {
2150     #if CONFIG_XEN_COMPAT > 0x030002
2151     set_pmd(pmd, *pmd_k);
2152     #else
2153     @@ -356,7 +356,8 @@
2154     */
2155     set_pmd(pmd, __pmd(pmd_val(*pmd_k)));
2156     #endif
2157     - else
2158     + arch_flush_lazy_mmu_mode();
2159     + } else
2160     BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
2161     return pmd_k;
2162     }
2163     @@ -388,6 +389,8 @@
2164     return 0;
2165     }
2166    
2167     +int show_unhandled_signals = 1;
2168     +
2169     /*
2170     * This routine handles page faults. It determines the address,
2171     * and the problem, and then passes it off to one of the appropriate
2172     @@ -408,6 +411,7 @@
2173     struct vm_area_struct * vma;
2174     unsigned long address;
2175     int write, si_code;
2176     + int fault;
2177    
2178     /* get the address */
2179     address = read_cr2();
2180     @@ -541,20 +545,18 @@
2181     * make sure we exit gracefully rather than endlessly redo
2182     * the fault.
2183     */
2184     - switch (handle_mm_fault(mm, vma, address, write)) {
2185     - case VM_FAULT_MINOR:
2186     - tsk->min_flt++;
2187     - break;
2188     - case VM_FAULT_MAJOR:
2189     - tsk->maj_flt++;
2190     - break;
2191     - case VM_FAULT_SIGBUS:
2192     - goto do_sigbus;
2193     - case VM_FAULT_OOM:
2194     + fault = handle_mm_fault(mm, vma, address, write);
2195     + if (unlikely(fault & VM_FAULT_ERROR)) {
2196     + if (fault & VM_FAULT_OOM)
2197     goto out_of_memory;
2198     - default:
2199     - BUG();
2200     + else if (fault & VM_FAULT_SIGBUS)
2201     + goto do_sigbus;
2202     + BUG();
2203     }
2204     + if (fault & VM_FAULT_MAJOR)
2205     + tsk->maj_flt++;
2206     + else
2207     + tsk->min_flt++;
2208    
2209     /*
2210     * Did it hit the DOS screen memory VA from vm86 mode?
2211     @@ -589,6 +591,14 @@
2212     if (is_prefetch(regs, address, error_code))
2213     return;
2214    
2215     + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
2216     + printk_ratelimit()) {
2217     + printk("%s%s[%d]: segfault at %08lx eip %08lx "
2218     + "esp %08lx error %lx\n",
2219     + tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
2220     + tsk->comm, tsk->pid, address, regs->eip,
2221     + regs->esp, error_code);
2222     + }
2223     tsk->thread.cr2 = address;
2224     /* Kernel addresses are always protection faults */
2225     tsk->thread.error_code = error_code | (address >= TASK_SIZE);
2226     diff -Naur linux-2.6.25/arch/x86/mm/fault_64-xen.c linux-2.6.25-xen/arch/x86/mm/fault_64-xen.c
2227     --- linux-2.6.25/arch/x86/mm/fault_64-xen.c 2008-05-23 18:33:27.000000000 +0200
2228     +++ linux-2.6.25-xen/arch/x86/mm/fault_64-xen.c 2008-05-23 18:27:40.000000000 +0200
2229     @@ -159,7 +159,9 @@
2230     pmd_t *pmd;
2231     pte_t *pte;
2232    
2233     - pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
2234     + pgd = (pgd_t *)read_cr3();
2235     +
2236     + pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
2237     pgd += pgd_index(address);
2238     if (bad_address(pgd)) goto bad;
2239     printk("PGD %lx ", pgd_val(*pgd));
2240     @@ -219,16 +221,6 @@
2241     return 0;
2242     }
2243    
2244     -int unhandled_signal(struct task_struct *tsk, int sig)
2245     -{
2246     - if (is_init(tsk))
2247     - return 1;
2248     - if (tsk->ptrace & PT_PTRACED)
2249     - return 0;
2250     - return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) ||
2251     - (tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL);
2252     -}
2253     -
2254     static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
2255     unsigned long error_code)
2256     {
2257     @@ -302,8 +294,8 @@
2258     return 0;
2259     }
2260    
2261     -int page_fault_trace = 0;
2262     -int exception_trace = 1;
2263     +static int page_fault_trace;
2264     +int show_unhandled_signals = 1;
2265    
2266    
2267     #define MEM_VERBOSE 1
2268     @@ -372,7 +364,7 @@
2269     struct vm_area_struct * vma;
2270     unsigned long address;
2271     const struct exception_table_entry *fixup;
2272     - int write;
2273     + int write, fault;
2274     unsigned long flags;
2275     siginfo_t info;
2276    
2277     @@ -384,7 +376,7 @@
2278     prefetchw(&mm->mmap_sem);
2279    
2280     /* get the address */
2281     - address = current_vcpu_info()->arch.cr2;
2282     + address = read_cr2();
2283    
2284     info.si_code = SEGV_MAPERR;
2285    
2286     @@ -445,6 +437,13 @@
2287     if (unlikely(in_atomic() || !mm))
2288     goto bad_area_nosemaphore;
2289    
2290     + /*
2291     + * User-mode registers count as a user access even for any
2292     + * potential system fault or CPU buglet.
2293     + */
2294     + if (user_mode_vm(regs))
2295     + error_code |= PF_USER;
2296     +
2297     again:
2298     /* When running in the kernel we expect faults to occur only to
2299     * addresses in user space. All other faults represent errors in the
2300     @@ -511,19 +510,18 @@
2301     * make sure we exit gracefully rather than endlessly redo
2302     * the fault.
2303     */
2304     - switch (handle_mm_fault(mm, vma, address, write)) {
2305     - case VM_FAULT_MINOR:
2306     - tsk->min_flt++;
2307     - break;
2308     - case VM_FAULT_MAJOR:
2309     - tsk->maj_flt++;
2310     - break;
2311     - case VM_FAULT_SIGBUS:
2312     - goto do_sigbus;
2313     - default:
2314     - goto out_of_memory;
2315     + fault = handle_mm_fault(mm, vma, address, write);
2316     + if (unlikely(fault & VM_FAULT_ERROR)) {
2317     + if (fault & VM_FAULT_OOM)
2318     + goto out_of_memory;
2319     + else if (fault & VM_FAULT_SIGBUS)
2320     + goto do_sigbus;
2321     + BUG();
2322     }
2323     -
2324     + if (fault & VM_FAULT_MAJOR)
2325     + tsk->maj_flt++;
2326     + else
2327     + tsk->min_flt++;
2328     up_read(&mm->mmap_sem);
2329     return;
2330    
2331     @@ -556,7 +554,8 @@
2332     (address >> 32))
2333     return;
2334    
2335     - if (exception_trace && unhandled_signal(tsk, SIGSEGV)) {
2336     + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
2337     + printk_ratelimit()) {
2338     printk(
2339     "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
2340     tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
2341     @@ -630,7 +629,7 @@
2342     }
2343     printk("VM: killing process %s\n", tsk->comm);
2344     if (error_code & 4)
2345     - do_exit(SIGKILL);
2346     + do_group_exit(SIGKILL);
2347     goto no_context;
2348    
2349     do_sigbus:
2350     diff -Naur linux-2.6.25/arch/x86/mm/highmem_32-xen.c linux-2.6.25-xen/arch/x86/mm/highmem_32-xen.c
2351     --- linux-2.6.25/arch/x86/mm/highmem_32-xen.c 2008-05-23 18:33:27.000000000 +0200
2352     +++ linux-2.6.25-xen/arch/x86/mm/highmem_32-xen.c 2008-05-23 18:27:40.000000000 +0200
2353     @@ -34,17 +34,16 @@
2354     /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
2355     pagefault_disable();
2356    
2357     - idx = type + KM_TYPE_NR*smp_processor_id();
2358     - BUG_ON(!pte_none(*(kmap_pte-idx)));
2359     -
2360     if (!PageHighMem(page))
2361     return page_address(page);
2362    
2363     + idx = type + KM_TYPE_NR*smp_processor_id();
2364     vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
2365     + BUG_ON(!pte_none(*(kmap_pte-idx)));
2366     set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
2367     arch_flush_lazy_mmu_mode();
2368    
2369     - return (void*) vaddr;
2370     + return (void *)vaddr;
2371     }
2372    
2373     void *kmap_atomic(struct page *page, enum km_type type)
2374     diff -Naur linux-2.6.25/arch/x86/mm/init_32-xen.c linux-2.6.25-xen/arch/x86/mm/init_32-xen.c
2375     --- linux-2.6.25/arch/x86/mm/init_32-xen.c 2008-05-23 18:33:27.000000000 +0200
2376     +++ linux-2.6.25-xen/arch/x86/mm/init_32-xen.c 2008-05-23 18:27:40.000000000 +0200
2377     @@ -98,7 +98,7 @@
2378     #endif
2379     pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
2380    
2381     - paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
2382     + paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT);
2383     make_lowmem_page_readonly(page_table,
2384     XENFEAT_writable_page_tables);
2385     set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
2386     @@ -448,7 +448,7 @@
2387     xen_pagetable_setup_done(pgd_base);
2388     }
2389    
2390     -#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP)
2391     +#if defined(CONFIG_HIBERNATION) || defined(CONFIG_ACPI)
2392     /*
2393     * Swap suspend & friends need this for resume because things like the intel-agp
2394     * driver might have split up a kernel 4MB mapping.
2395     @@ -487,9 +487,13 @@
2396     flush_tlb_all();
2397     }
2398    
2399     +int nx_enabled = 0;
2400     +
2401     +#ifdef CONFIG_X86_PAE
2402     +
2403     static int disable_nx __initdata = 0;
2404     u64 __supported_pte_mask __read_mostly = ~_PAGE_NX;
2405     -EXPORT_SYMBOL(__supported_pte_mask);
2406     +EXPORT_SYMBOL_GPL(__supported_pte_mask);
2407    
2408     /*
2409     * noexec = on|off
2410     @@ -516,9 +520,6 @@
2411     }
2412     early_param("noexec", noexec_setup);
2413    
2414     -int nx_enabled = 0;
2415     -#ifdef CONFIG_X86_PAE
2416     -
2417     static void __init set_nx(void)
2418     {
2419     unsigned int v[4], l, h;
2420     @@ -803,8 +804,7 @@
2421     PTRS_PER_PMD*sizeof(pmd_t),
2422     PTRS_PER_PMD*sizeof(pmd_t),
2423     SLAB_PANIC,
2424     - pmd_ctor,
2425     - NULL);
2426     + pmd_ctor);
2427     if (!SHARED_KERNEL_PMD) {
2428     /* If we're in PAE mode and have a non-shared
2429     kernel pmd, then the pgd size must be a
2430     diff -Naur linux-2.6.25/arch/x86/mm/init_64-xen.c linux-2.6.25-xen/arch/x86/mm/init_64-xen.c
2431     --- linux-2.6.25/arch/x86/mm/init_64-xen.c 2008-05-23 18:33:27.000000000 +0200
2432     +++ linux-2.6.25-xen/arch/x86/mm/init_64-xen.c 2008-05-23 18:27:40.000000000 +0200
2433     @@ -66,6 +66,9 @@
2434     DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
2435     extern unsigned long start_pfn;
2436    
2437     +extern pmd_t level2_fixmap_pgt[PTRS_PER_PMD];
2438     +extern pte_t level1_fixmap_pgt[PTRS_PER_PTE];
2439     +
2440     /*
2441     * Use this until direct mapping is established, i.e. before __va() is
2442     * available in init_memory_mapping().
2443     @@ -353,6 +356,10 @@
2444     set_pte_phys(address, phys, prot, 0);
2445     set_pte_phys(address, phys, prot, 1);
2446     break;
2447     + case FIX_EARLYCON_MEM_BASE:
2448     + xen_l1_entry_update(level1_fixmap_pgt + pte_index(address),
2449     + pfn_pte_ma(phys >> PAGE_SHIFT, prot));
2450     + break;
2451     default:
2452     set_pte_phys_ma(address, phys, prot);
2453     break;
2454     @@ -588,6 +595,13 @@
2455     __user_pgd(init_level4_pgt)[pgd_index(VSYSCALL_START)] =
2456     __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
2457    
2458     + /* Do an early initialization of the fixmap area. */
2459     + addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE);
2460     + level3_kernel_pgt[pud_index(addr)] =
2461     + __pud(__pa_symbol(level2_fixmap_pgt) | _PAGE_TABLE);
2462     + level2_fixmap_pgt[pmd_index(addr)] =
2463     + __pmd(__pa_symbol(level1_fixmap_pgt) | _PAGE_TABLE);
2464     +
2465     early_make_page_readonly(init_level4_pgt,
2466     XENFEAT_writable_page_tables);
2467     early_make_page_readonly(__user_pgd(init_level4_pgt),
2468     @@ -598,6 +612,10 @@
2469     XENFEAT_writable_page_tables);
2470     early_make_page_readonly(level2_kernel_pgt,
2471     XENFEAT_writable_page_tables);
2472     + early_make_page_readonly(level2_fixmap_pgt,
2473     + XENFEAT_writable_page_tables);
2474     + early_make_page_readonly(level1_fixmap_pgt,
2475     + XENFEAT_writable_page_tables);
2476    
2477     if (!xen_feature(XENFEAT_writable_page_tables)) {
2478     xen_pgd_pin(__pa_symbol(init_level4_pgt));
2479     @@ -1121,41 +1139,6 @@
2480     return pfn_valid(pte_pfn(*pte));
2481     }
2482    
2483     -#ifdef CONFIG_SYSCTL
2484     -#include <linux/sysctl.h>
2485     -
2486     -extern int exception_trace, page_fault_trace;
2487     -
2488     -static ctl_table debug_table2[] = {
2489     - {
2490     - .ctl_name = 99,
2491     - .procname = "exception-trace",
2492     - .data = &exception_trace,
2493     - .maxlen = sizeof(int),
2494     - .mode = 0644,
2495     - .proc_handler = proc_dointvec
2496     - },
2497     - {}
2498     -};
2499     -
2500     -static ctl_table debug_root_table2[] = {
2501     - {
2502     - .ctl_name = CTL_DEBUG,
2503     - .procname = "debug",
2504     - .mode = 0555,
2505     - .child = debug_table2
2506     - },
2507     - {}
2508     -};
2509     -
2510     -static __init int x8664_sysctl_init(void)
2511     -{
2512     - register_sysctl_table(debug_root_table2);
2513     - return 0;
2514     -}
2515     -__initcall(x8664_sysctl_init);
2516     -#endif
2517     -
2518     /* A pseudo VMA to allow ptrace access for the vsyscall page. This only
2519     covers the 64bit vsyscall page now. 32bit has a real VMA now and does
2520     not need special handling anymore. */
2521     @@ -1194,9 +1177,18 @@
2522     }
2523    
2524     #ifndef CONFIG_XEN
2525     -void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
2526     +void * __init alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
2527     {
2528     return __alloc_bootmem_core(pgdat->bdata, size,
2529     SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0);
2530     }
2531     #endif
2532     +
2533     +const char *arch_vma_name(struct vm_area_struct *vma)
2534     +{
2535     + if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
2536     + return "[vdso]";
2537     + if (vma == &gate_vma)
2538     + return "[vsyscall]";
2539     + return NULL;
2540     +}
2541     diff -Naur linux-2.6.25/arch/x86/mm/ioremap_32-xen.c linux-2.6.25-xen/arch/x86/mm/ioremap_32-xen.c
2542     --- linux-2.6.25/arch/x86/mm/ioremap_32-xen.c 2008-05-23 18:33:27.000000000 +0200
2543     +++ linux-2.6.25-xen/arch/x86/mm/ioremap_32-xen.c 2008-05-23 18:27:40.000000000 +0200
2544     @@ -363,9 +363,8 @@
2545    
2546     /* Reset the direct mapping. Can block */
2547     if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) {
2548     - /* p->size includes the guard page, but cpa doesn't like that */
2549     change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)),
2550     - (p->size - PAGE_SIZE) >> PAGE_SHIFT,
2551     + get_vm_area_size(p) >> PAGE_SHIFT,
2552     PAGE_KERNEL);
2553     global_flush_tlb();
2554     }
2555     diff -Naur linux-2.6.25/arch/x86/mm/pageattr_64-xen.c linux-2.6.25-xen/arch/x86/mm/pageattr_64-xen.c
2556     --- linux-2.6.25/arch/x86/mm/pageattr_64-xen.c 2008-05-23 18:33:27.000000000 +0200
2557     +++ linux-2.6.25-xen/arch/x86/mm/pageattr_64-xen.c 2008-05-23 18:27:40.000000000 +0200
2558     @@ -276,7 +276,7 @@
2559     }
2560     #endif /* CONFIG_XEN */
2561    
2562     -static inline pte_t *lookup_address(unsigned long address)
2563     +pte_t *lookup_address(unsigned long address)
2564     {
2565     pgd_t *pgd = pgd_offset_k(address);
2566     pud_t *pud;
2567     @@ -337,14 +337,13 @@
2568     struct page *pg;
2569    
2570     /* When clflush is available always use it because it is
2571     - much cheaper than WBINVD. Disable clflush for now because
2572     - the high level code is not ready yet */
2573     + much cheaper than WBINVD. */
2574     + /* clflush is still broken. Disable for now. */
2575     if (1 || !cpu_has_clflush)
2576     asm volatile("wbinvd" ::: "memory");
2577     else list_for_each_entry(pg, l, lru) {
2578     void *adr = page_address(pg);
2579     - if (cpu_has_clflush)
2580     - cache_flush_page(adr);
2581     + cache_flush_page(adr);
2582     }
2583     __flush_tlb_all();
2584     }
2585     @@ -358,7 +357,8 @@
2586    
2587     static inline void save_page(struct page *fpage)
2588     {
2589     - list_add(&fpage->lru, &deferred_pages);
2590     + if (!test_and_set_bit(PG_arch_1, &fpage->flags))
2591     + list_add(&fpage->lru, &deferred_pages);
2592     }
2593    
2594     /*
2595     @@ -392,9 +392,12 @@
2596     pte_t *kpte;
2597     struct page *kpte_page;
2598     pgprot_t ref_prot2;
2599     +
2600     kpte = lookup_address(address);
2601     if (!kpte) return 0;
2602     kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
2603     + BUG_ON(PageLRU(kpte_page));
2604     + BUG_ON(PageCompound(kpte_page));
2605     if (pgprot_val(prot) != pgprot_val(ref_prot)) {
2606     if (!pte_huge(*kpte)) {
2607     set_pte(kpte, pfn_pte(pfn, prot));
2608     @@ -433,10 +436,9 @@
2609     return 0;
2610     #endif
2611    
2612     - if (page_private(kpte_page) == 0) {
2613     - save_page(kpte_page);
2614     + save_page(kpte_page);
2615     + if (page_private(kpte_page) == 0)
2616     revert_page(address, ref_prot);
2617     - }
2618     return 0;
2619     }
2620    
2621     @@ -508,6 +510,10 @@
2622     flush_map(&l);
2623    
2624     list_for_each_entry_safe(pg, next, &l, lru) {
2625     + list_del(&pg->lru);
2626     + clear_bit(PG_arch_1, &pg->flags);
2627     + if (page_private(pg) != 0)
2628     + continue;
2629     ClearPagePrivate(pg);
2630     __free_page(pg);
2631     }
2632     diff -Naur linux-2.6.25/arch/x86/mm/pgtable_32-xen.c linux-2.6.25-xen/arch/x86/mm/pgtable_32-xen.c
2633     --- linux-2.6.25/arch/x86/mm/pgtable_32-xen.c 2008-05-23 18:33:27.000000000 +0200
2634     +++ linux-2.6.25-xen/arch/x86/mm/pgtable_32-xen.c 2008-05-23 18:27:40.000000000 +0200
2635     @@ -242,7 +242,7 @@
2636    
2637     #if (PTRS_PER_PMD == 1)
2638     /* Non-PAE pgd constructor */
2639     -void pgd_ctor(void *pgd)
2640     +static void pgd_ctor(void *pgd)
2641     {
2642     unsigned long flags;
2643    
2644     @@ -265,7 +265,7 @@
2645     }
2646     #else /* PTRS_PER_PMD > 1 */
2647     /* PAE pgd constructor */
2648     -void pgd_ctor(void *pgd)
2649     +static void pgd_ctor(void *pgd)
2650     {
2651     /* PAE, kernel PMD may be shared */
2652    
2653     @@ -284,7 +284,7 @@
2654     }
2655     #endif /* PTRS_PER_PMD */
2656    
2657     -void pgd_dtor(void *pgd)
2658     +static void pgd_dtor(void *pgd)
2659     {
2660     unsigned long flags; /* can be called from interrupt context */
2661    
2662     diff -Naur linux-2.6.25/arch/x86/pci/irq-xen.c linux-2.6.25-xen/arch/x86/pci/irq-xen.c
2663     --- linux-2.6.25/arch/x86/pci/irq-xen.c 2008-05-23 18:33:26.000000000 +0200
2664     +++ linux-2.6.25-xen/arch/x86/pci/irq-xen.c 2008-05-23 18:27:40.000000000 +0200
2665     @@ -142,8 +142,9 @@
2666     for(i = 1; i < 256; i++) {
2667     if (!busmap[i] || pci_find_bus(0, i))
2668     continue;
2669     - if (pci_scan_bus(i, &pci_root_ops, NULL))
2670     - printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i);
2671     + if (pci_scan_bus_with_sysdata(i))
2672     + printk(KERN_INFO "PCI: Discovered primary peer "
2673     + "bus %02x [IRQ]\n", i);
2674     }
2675     pcibios_last_bus = -1;
2676     }
2677     @@ -553,6 +554,7 @@
2678     case PCI_DEVICE_ID_INTEL_ICH9_3:
2679     case PCI_DEVICE_ID_INTEL_ICH9_4:
2680     case PCI_DEVICE_ID_INTEL_ICH9_5:
2681     + case PCI_DEVICE_ID_INTEL_TOLAPAI_0:
2682     r->name = "PIIX/ICH";
2683     r->get = pirq_piix_get;
2684     r->set = pirq_piix_set;
2685     diff -Naur linux-2.6.25/arch/x86/vdso/vdso32/note.S linux-2.6.25-xen/arch/x86/vdso/vdso32/note.S
2686     --- linux-2.6.25/arch/x86/vdso/vdso32/note.S 2008-04-17 04:49:44.000000000 +0200
2687     +++ linux-2.6.25-xen/arch/x86/vdso/vdso32/note.S 2008-05-23 18:27:40.000000000 +0200
2688     @@ -13,7 +13,7 @@
2689     .long LINUX_VERSION_CODE
2690     ELFNOTE_END
2691    
2692     -#ifdef CONFIG_XEN
2693     +#if defined(CONFIG_X86_XEN) || defined(CONFIG_PARAVIRT_XEN)
2694     /*
2695     * Add a special note telling glibc's dynamic linker a fake hardware
2696     * flavor that it will use to choose the search path for libraries in the
2697     @@ -37,8 +37,12 @@
2698    
2699     ELFNOTE_START(GNU, 2, "a")
2700     .long 1 /* ncaps */
2701     +#ifdef CONFIG_PARAVIRT_XEN
2702     VDSO32_NOTE_MASK: /* Symbol used by arch/x86/xen/setup.c */
2703     .long 0 /* mask */
2704     +#else
2705     + .long 1 << VDSO_NOTE_NONEGSEG_BIT /* mask */
2706     +#endif
2707     .byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */
2708     ELFNOTE_END
2709     #endif
2710     diff -Naur linux-2.6.25/arch/x86/xen/Kconfig linux-2.6.25-xen/arch/x86/xen/Kconfig
2711     --- linux-2.6.25/arch/x86/xen/Kconfig 2008-04-17 04:49:44.000000000 +0200
2712     +++ linux-2.6.25-xen/arch/x86/xen/Kconfig 2008-05-23 18:27:40.000000000 +0200
2713     @@ -2,7 +2,7 @@
2714     # This Kconfig describes xen options
2715     #
2716    
2717     -config XEN
2718     +config PARAVIRT_XEN
2719     bool "Xen guest support"
2720     select PARAVIRT
2721     depends on X86_32
2722     diff -Naur linux-2.6.25/arch/x86/xen/xen-head.S linux-2.6.25-xen/arch/x86/xen/xen-head.S
2723     --- linux-2.6.25/arch/x86/xen/xen-head.S 2008-04-17 04:49:44.000000000 +0200
2724     +++ linux-2.6.25-xen/arch/x86/xen/xen-head.S 2008-05-23 18:27:40.000000000 +0200
2725     @@ -1,7 +1,7 @@
2726     /* Xen-specific pieces of head.S, intended to be included in the right
2727     place in head.S */
2728    
2729     -#ifdef CONFIG_XEN
2730     +#ifdef CONFIG_PARAVIRT_XEN
2731    
2732     #include <linux/elfnote.h>
2733     #include <linux/init.h>
2734     @@ -37,4 +37,4 @@
2735     #endif
2736     ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
2737    
2738     -#endif /*CONFIG_XEN */
2739     +#endif /* CONFIG_PARAVIRT_XEN */
2740     diff -Naur linux-2.6.25/drivers/block/Kconfig linux-2.6.25-xen/drivers/block/Kconfig
2741     --- linux-2.6.25/drivers/block/Kconfig 2008-04-17 04:49:44.000000000 +0200
2742     +++ linux-2.6.25-xen/drivers/block/Kconfig 2008-05-23 18:27:40.000000000 +0200
2743     @@ -417,9 +417,9 @@
2744     help
2745     Include support for the Xilinx SystemACE CompactFlash interface
2746    
2747     -config XEN_BLKDEV_FRONTEND
2748     +config XEN_BLKFRONT
2749     tristate "Xen virtual block device support"
2750     - depends on XEN
2751     + depends on PARAVIRT_XEN
2752     default y
2753     help
2754     This driver implements the front-end of the Xen virtual
2755     diff -Naur linux-2.6.25/drivers/block/Makefile linux-2.6.25-xen/drivers/block/Makefile
2756     --- linux-2.6.25/drivers/block/Makefile 2008-04-17 04:49:44.000000000 +0200
2757     +++ linux-2.6.25-xen/drivers/block/Makefile 2008-05-23 18:29:02.000000000 +0200
2758     @@ -30,4 +30,4 @@
2759     obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
2760     obj-$(CONFIG_BLK_DEV_UB) += ub.o
2761    
2762     -obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
2763     +obj-$(CONFIG_XEN_BLKFRONT) += xen-blkfront.o
2764     diff -Naur linux-2.6.25/drivers/block/xen-blkfront.c linux-2.6.25-xen/drivers/block/xen-blkfront.c
2765     --- linux-2.6.25/drivers/block/xen-blkfront.c 2008-04-17 04:49:44.000000000 +0200
2766     +++ linux-2.6.25-xen/drivers/block/xen-blkfront.c 2008-05-23 18:27:40.000000000 +0200
2767     @@ -965,7 +965,6 @@
2768    
2769     static struct xenbus_driver blkfront = {
2770     .name = "vbd",
2771     - .owner = THIS_MODULE,
2772     .ids = blkfront_ids,
2773     .probe = blkfront_probe,
2774     .remove = blkfront_remove,
2775     diff -Naur linux-2.6.25/drivers/char/Kconfig linux-2.6.25-xen/drivers/char/Kconfig
2776     --- linux-2.6.25/drivers/char/Kconfig 2008-04-17 04:49:44.000000000 +0200
2777     +++ linux-2.6.25-xen/drivers/char/Kconfig 2008-05-23 18:27:40.000000000 +0200
2778     @@ -584,7 +584,7 @@
2779    
2780     config HVC_XEN
2781     bool "Xen Hypervisor Console support"
2782     - depends on XEN
2783     + depends on PARAVIRT_XEN
2784     select HVC_DRIVER
2785     default y
2786     help
2787     diff -Naur linux-2.6.25/drivers/Makefile linux-2.6.25-xen/drivers/Makefile
2788     --- linux-2.6.25/drivers/Makefile 2008-05-23 18:32:47.000000000 +0200
2789     +++ linux-2.6.25-xen/drivers/Makefile 2008-05-23 18:27:40.000000000 +0200
2790     @@ -19,7 +19,7 @@
2791     obj-$(CONFIG_PNP) += pnp/
2792     obj-$(CONFIG_ARM_AMBA) += amba/
2793    
2794     -obj-$(CONFIG_XEN) += xen/
2795     +obj-$(CONFIG_PARAVIRT_XEN) += xen/
2796    
2797     obj-$(CONFIG_CONNECTOR) += connector/
2798    
2799     diff -Naur linux-2.6.25/drivers/net/Kconfig linux-2.6.25-xen/drivers/net/Kconfig
2800     --- linux-2.6.25/drivers/net/Kconfig 2008-05-23 18:32:02.000000000 +0200
2801     +++ linux-2.6.25-xen/drivers/net/Kconfig 2008-05-23 18:27:40.000000000 +0200
2802     @@ -2690,9 +2690,9 @@
2803    
2804     source "drivers/s390/net/Kconfig"
2805    
2806     -config XEN_NETDEV_FRONTEND
2807     +config XEN_NETFRONT
2808     tristate "Xen network device frontend driver"
2809     - depends on XEN
2810     + depends on PARAVIRT_XEN
2811     default y
2812     help
2813     The network device frontend driver allows the kernel to
2814     diff -Naur linux-2.6.25/drivers/net/Makefile linux-2.6.25-xen/drivers/net/Makefile
2815     --- linux-2.6.25/drivers/net/Makefile 2008-05-23 18:32:02.000000000 +0200
2816     +++ linux-2.6.25-xen/drivers/net/Makefile 2008-05-23 18:27:40.000000000 +0200
2817     @@ -140,7 +140,7 @@
2818     obj-$(CONFIG_SLIP) += slip.o
2819     obj-$(CONFIG_SLHC) += slhc.o
2820    
2821     -obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
2822     +obj-$(CONFIG_XEN_NETFRONT) += xen-netfront.o
2823    
2824     obj-$(CONFIG_DUMMY) += dummy.o
2825     obj-$(CONFIG_IFB) += ifb.o
2826     diff -Naur linux-2.6.25/drivers/net/xen-netfront.c linux-2.6.25-xen/drivers/net/xen-netfront.c
2827     --- linux-2.6.25/drivers/net/xen-netfront.c 2008-04-17 04:49:44.000000000 +0200
2828     +++ linux-2.6.25-xen/drivers/net/xen-netfront.c 2008-05-23 18:27:40.000000000 +0200
2829     @@ -36,8 +36,6 @@
2830     #include <linux/skbuff.h>
2831     #include <linux/ethtool.h>
2832     #include <linux/if_ether.h>
2833     -#include <linux/tcp.h>
2834     -#include <linux/udp.h>
2835     #include <linux/moduleparam.h>
2836     #include <linux/mm.h>
2837     #include <net/ip.h>
2838     @@ -754,45 +752,6 @@
2839     return cons;
2840     }
2841    
2842     -static int skb_checksum_setup(struct sk_buff *skb)
2843     -{
2844     - struct iphdr *iph;
2845     - unsigned char *th;
2846     - int err = -EPROTO;
2847     -
2848     - if (skb->protocol != htons(ETH_P_IP))
2849     - goto out;
2850     -
2851     - iph = (void *)skb->data;
2852     - th = skb->data + 4 * iph->ihl;
2853     - if (th >= skb_tail_pointer(skb))
2854     - goto out;
2855     -
2856     - skb->csum_start = th - skb->head;
2857     - switch (iph->protocol) {
2858     - case IPPROTO_TCP:
2859     - skb->csum_offset = offsetof(struct tcphdr, check);
2860     - break;
2861     - case IPPROTO_UDP:
2862     - skb->csum_offset = offsetof(struct udphdr, check);
2863     - break;
2864     - default:
2865     - if (net_ratelimit())
2866     - printk(KERN_ERR "Attempting to checksum a non-"
2867     - "TCP/UDP packet, dropping a protocol"
2868     - " %d packet", iph->protocol);
2869     - goto out;
2870     - }
2871     -
2872     - if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
2873     - goto out;
2874     -
2875     - err = 0;
2876     -
2877     -out:
2878     - return err;
2879     -}
2880     -
2881     static int handle_incoming_queue(struct net_device *dev,
2882     struct sk_buff_head *rxq)
2883     {
2884     @@ -1775,7 +1734,6 @@
2885    
2886     static struct xenbus_driver netfront = {
2887     .name = "vif",
2888     - .owner = THIS_MODULE,
2889     .ids = netfront_ids,
2890     .probe = netfront_probe,
2891     .remove = __devexit_p(xennet_remove),
2892     diff -Naur linux-2.6.25/drivers/xen/blkback/blkback.c linux-2.6.25-xen/drivers/xen/blkback/blkback.c
2893     --- linux-2.6.25/drivers/xen/blkback/blkback.c 2008-05-23 18:33:26.000000000 +0200
2894     +++ linux-2.6.25-xen/drivers/xen/blkback/blkback.c 2008-05-23 18:27:40.000000000 +0200
2895     @@ -154,7 +154,7 @@
2896    
2897     static void plug_queue(blkif_t *blkif, struct bio *bio)
2898     {
2899     - request_queue_t *q = bdev_get_queue(bio->bi_bdev);
2900     + struct request_queue *q = bdev_get_queue(bio->bi_bdev);
2901    
2902     if (q == blkif->plug)
2903     return;
2904     diff -Naur linux-2.6.25/drivers/xen/blkback/common.h linux-2.6.25-xen/drivers/xen/blkback/common.h
2905     --- linux-2.6.25/drivers/xen/blkback/common.h 2008-05-23 18:33:23.000000000 +0200
2906     +++ linux-2.6.25-xen/drivers/xen/blkback/common.h 2008-05-23 18:27:40.000000000 +0200
2907     @@ -79,7 +79,7 @@
2908     wait_queue_head_t wq;
2909     struct task_struct *xenblkd;
2910     unsigned int waiting_reqs;
2911     - request_queue_t *plug;
2912     + struct request_queue *plug;
2913    
2914     /* statistics */
2915     unsigned long st_print;
2916     diff -Naur linux-2.6.25/drivers/xen/blkback/interface.c linux-2.6.25-xen/drivers/xen/blkback/interface.c
2917     --- linux-2.6.25/drivers/xen/blkback/interface.c 2008-05-23 18:33:26.000000000 +0200
2918     +++ linux-2.6.25-xen/drivers/xen/blkback/interface.c 2008-05-23 18:27:40.000000000 +0200
2919     @@ -177,5 +177,5 @@
2920     void __init blkif_interface_init(void)
2921     {
2922     blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t),
2923     - 0, 0, NULL, NULL);
2924     + 0, 0, NULL);
2925     }
2926     diff -Naur linux-2.6.25/drivers/xen/blkfront/blkfront.c linux-2.6.25-xen/drivers/xen/blkfront/blkfront.c
2927     --- linux-2.6.25/drivers/xen/blkfront/blkfront.c 2008-05-23 18:33:27.000000000 +0200
2928     +++ linux-2.6.25-xen/drivers/xen/blkfront/blkfront.c 2008-05-23 18:27:40.000000000 +0200
2929     @@ -653,7 +653,7 @@
2930     * do_blkif_request
2931     * read a block; request is in a request queue
2932     */
2933     -void do_blkif_request(request_queue_t *rq)
2934     +void do_blkif_request(struct request_queue *rq)
2935     {
2936     struct blkfront_info *info = NULL;
2937     struct request *req;
2938     diff -Naur linux-2.6.25/drivers/xen/blkfront/block.h linux-2.6.25-xen/drivers/xen/blkfront/block.h
2939     --- linux-2.6.25/drivers/xen/blkfront/block.h 2008-05-23 18:32:32.000000000 +0200
2940     +++ linux-2.6.25-xen/drivers/xen/blkfront/block.h 2008-05-23 18:27:40.000000000 +0200
2941     @@ -105,7 +105,7 @@
2942     blkif_front_ring_t ring;
2943     unsigned int irq;
2944     struct xlbd_major_info *mi;
2945     - request_queue_t *rq;
2946     + struct request_queue *rq;
2947     struct work_struct work;
2948     struct gnttab_free_callback callback;
2949     struct blk_shadow shadow[BLK_RING_SIZE];
2950     @@ -129,7 +129,7 @@
2951     extern int blkif_getgeo(struct block_device *, struct hd_geometry *);
2952     extern int blkif_check(dev_t dev);
2953     extern int blkif_revalidate(dev_t dev);
2954     -extern void do_blkif_request (request_queue_t *rq);
2955     +extern void do_blkif_request (struct request_queue *rq);
2956    
2957     /* Virtual block-device subsystem. */
2958     /* Note that xlvbd_add doesn't call add_disk for you: you're expected
2959     diff -Naur linux-2.6.25/drivers/xen/blkfront/vbd.c linux-2.6.25-xen/drivers/xen/blkfront/vbd.c
2960     --- linux-2.6.25/drivers/xen/blkfront/vbd.c 2008-05-23 18:32:32.000000000 +0200
2961     +++ linux-2.6.25-xen/drivers/xen/blkfront/vbd.c 2008-05-23 18:27:40.000000000 +0200
2962     @@ -186,7 +186,7 @@
2963     static int
2964     xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
2965     {
2966     - request_queue_t *rq;
2967     + struct request_queue *rq;
2968    
2969     rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
2970     if (rq == NULL)
2971     diff -Naur linux-2.6.25/drivers/xen/blktap/common.h linux-2.6.25-xen/drivers/xen/blktap/common.h
2972     --- linux-2.6.25/drivers/xen/blktap/common.h 2008-05-23 18:33:23.000000000 +0200
2973     +++ linux-2.6.25-xen/drivers/xen/blktap/common.h 2008-05-23 18:27:40.000000000 +0200
2974     @@ -68,7 +68,7 @@
2975     wait_queue_head_t wq;
2976     struct task_struct *xenblkd;
2977     unsigned int waiting_reqs;
2978     - request_queue_t *plug;
2979     + struct request_queue *plug;
2980    
2981     /* statistics */
2982     unsigned long st_print;
2983     diff -Naur linux-2.6.25/drivers/xen/blktap/interface.c linux-2.6.25-xen/drivers/xen/blktap/interface.c
2984     --- linux-2.6.25/drivers/xen/blktap/interface.c 2008-05-23 18:33:26.000000000 +0200
2985     +++ linux-2.6.25-xen/drivers/xen/blktap/interface.c 2008-05-23 18:27:40.000000000 +0200
2986     @@ -170,5 +170,5 @@
2987     void __init tap_blkif_interface_init(void)
2988     {
2989     blkif_cachep = kmem_cache_create("blktapif_cache", sizeof(blkif_t),
2990     - 0, 0, NULL, NULL);
2991     + 0, 0, NULL);
2992     }
2993     diff -Naur linux-2.6.25/drivers/xen/core/features.c linux-2.6.25-xen/drivers/xen/core/features.c
2994     --- linux-2.6.25/drivers/xen/core/features.c 2008-05-23 18:32:32.000000000 +0200
2995     +++ linux-2.6.25-xen/drivers/xen/core/features.c 2008-05-23 18:27:40.000000000 +0200
2996     @@ -19,7 +19,7 @@
2997     /* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */
2998     EXPORT_SYMBOL(xen_features);
2999    
3000     -void setup_xen_features(void)
3001     +void xen_setup_features(void)
3002     {
3003     xen_feature_info_t fi;
3004     int i, j;
3005     diff -Naur linux-2.6.25/drivers/xen/core/reboot.c linux-2.6.25-xen/drivers/xen/core/reboot.c
3006     --- linux-2.6.25/drivers/xen/core/reboot.c 2008-05-23 18:33:26.000000000 +0200
3007     +++ linux-2.6.25-xen/drivers/xen/core/reboot.c 2008-05-23 18:27:40.000000000 +0200
3008     @@ -4,6 +4,7 @@
3009     #include <linux/unistd.h>
3010     #include <linux/module.h>
3011     #include <linux/reboot.h>
3012     +#include <linux/sched.h>
3013     #include <linux/sysrq.h>
3014     #include <asm/hypervisor.h>
3015     #include <xen/xenbus.h>
3016     diff -Naur linux-2.6.25/drivers/xen/Makefile linux-2.6.25-xen/drivers/xen/Makefile
3017     --- linux-2.6.25/drivers/xen/Makefile 2008-05-23 18:33:03.000000000 +0200
3018     +++ linux-2.6.25-xen/drivers/xen/Makefile 2008-05-23 18:27:40.000000000 +0200
3019     @@ -1,10 +1,12 @@
3020     -obj-y += core/
3021     -obj-y += console/
3022     -obj-y += evtchn/
3023     -obj-y += xenbus/
3024     -obj-y += char/
3025     +obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o
3026    
3027     -obj-y += util.o
3028     +obj-$(CONFIG_XEN) += core/
3029     +obj-$(CONFIG_XEN) += console/
3030     +obj-$(CONFIG_XEN) += evtchn/
3031     +obj-y += xenbus/
3032     +obj-$(CONFIG_XEN) += char/
3033     +
3034     +obj-$(CONFIG_XEN) += util.o
3035     obj-$(CONFIG_XEN_BALLOON) += balloon/
3036     obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
3037     obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/
3038     diff -Naur linux-2.6.25/drivers/xen/tpmback/interface.c linux-2.6.25-xen/drivers/xen/tpmback/interface.c
3039     --- linux-2.6.25/drivers/xen/tpmback/interface.c 2008-05-23 18:33:27.000000000 +0200
3040     +++ linux-2.6.25-xen/drivers/xen/tpmback/interface.c 2008-05-23 18:27:40.000000000 +0200
3041     @@ -12,6 +12,7 @@
3042     */
3043    
3044     #include "common.h"
3045     +#include <linux/err.h>
3046     #include <xen/balloon.h>
3047     #include <xen/gnttab.h>
3048    
3049     @@ -159,7 +160,7 @@
3050     int __init tpmif_interface_init(void)
3051     {
3052     tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof (tpmif_t),
3053     - 0, 0, NULL, NULL);
3054     + 0, 0, NULL);
3055     return tpmif_cachep ? 0 : -ENOMEM;
3056     }
3057    
3058     diff -Naur linux-2.6.25/drivers/xen/util.c linux-2.6.25-xen/drivers/xen/util.c
3059     --- linux-2.6.25/drivers/xen/util.c 2008-05-23 18:32:32.000000000 +0200
3060     +++ linux-2.6.25-xen/drivers/xen/util.c 2008-05-23 18:27:40.000000000 +0200
3061     @@ -1,8 +1,5 @@
3062     -#include <linux/mm.h>
3063     +#include <linux/err.h>
3064     #include <linux/module.h>
3065     -#include <linux/slab.h>
3066     -#include <linux/vmalloc.h>
3067     -#include <asm/uaccess.h>
3068     #include <xen/driver_util.h>
3069    
3070     struct class *get_xen_class(void)
3071     @@ -21,45 +18,3 @@
3072     return xen_class;
3073     }
3074     EXPORT_SYMBOL_GPL(get_xen_class);
3075     -
3076     -#ifdef CONFIG_X86
3077     -static int f(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
3078     -{
3079     - /* apply_to_page_range() does all the hard work. */
3080     - return 0;
3081     -}
3082     -
3083     -struct vm_struct *alloc_vm_area(unsigned long size)
3084     -{
3085     - struct vm_struct *area;
3086     -
3087     - area = get_vm_area(size, VM_IOREMAP);
3088     - if (area == NULL)
3089     - return NULL;
3090     -
3091     - /*
3092     - * This ensures that page tables are constructed for this region
3093     - * of kernel virtual address space and mapped into init_mm.
3094     - */
3095     - if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
3096     - area->size, f, NULL)) {
3097     - free_vm_area(area);
3098     - return NULL;
3099     - }
3100     -
3101     - /* Map page directories into every address space. */
3102     - vmalloc_sync_all();
3103     -
3104     - return area;
3105     -}
3106     -EXPORT_SYMBOL_GPL(alloc_vm_area);
3107     -
3108     -void free_vm_area(struct vm_struct *area)
3109     -{
3110     - struct vm_struct *ret;
3111     - ret = remove_vm_area(area->addr);
3112     - BUG_ON(ret != area);
3113     - kfree(area);
3114     -}
3115     -EXPORT_SYMBOL_GPL(free_vm_area);
3116     -#endif /* CONFIG_X86 */
3117     diff -Naur linux-2.6.25/drivers/xen/xenbus/xenbus_client.c linux-2.6.25-xen/drivers/xen/xenbus/xenbus_client.c
3118     --- linux-2.6.25/drivers/xen/xenbus/xenbus_client.c 2008-05-23 18:33:03.000000000 +0200
3119     +++ linux-2.6.25-xen/drivers/xen/xenbus/xenbus_client.c 2008-05-23 18:27:40.000000000 +0200
3120     @@ -30,19 +30,26 @@
3121     * IN THE SOFTWARE.
3122     */
3123    
3124     +#if defined(CONFIG_XEN) || defined(MODULE)
3125     #include <linux/slab.h>
3126     #include <xen/evtchn.h>
3127     #include <xen/gnttab.h>
3128     -#include <xen/xenbus.h>
3129     #include <xen/driver_util.h>
3130     +#else
3131     +#include <linux/types.h>
3132     +#include <linux/vmalloc.h>
3133     +#include <asm/xen/hypervisor.h>
3134     +#include <xen/interface/xen.h>
3135     +#include <xen/interface/event_channel.h>
3136     +#include <xen/events.h>
3137     +#include <xen/grant_table.h>
3138     +#endif
3139     +#include <xen/xenbus.h>
3140    
3141     #ifdef HAVE_XEN_PLATFORM_COMPAT_H
3142     #include <xen/platform-compat.h>
3143     #endif
3144    
3145     -#define DPRINTK(fmt, args...) \
3146     - pr_debug("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
3147     -
3148     const char *xenbus_strstate(enum xenbus_state state)
3149     {
3150     static const char *const name[] = {
3151     @@ -58,6 +65,20 @@
3152     }
3153     EXPORT_SYMBOL_GPL(xenbus_strstate);
3154    
3155     +/**
3156     + * xenbus_watch_path - register a watch
3157     + * @dev: xenbus device
3158     + * @path: path to watch
3159     + * @watch: watch to register
3160     + * @callback: callback to register
3161     + *
3162     + * Register a @watch on the given path, using the given xenbus_watch structure
3163     + * for storage, and the given @callback function as the callback. Return 0 on
3164     + * success, or -errno on error. On success, the given @path will be saved as
3165     + * @watch->node, and remains the caller's to free. On error, @watch->node will
3166     + * be NULL, the device will switch to %XenbusStateClosing, and the error will
3167     + * be saved in the store.
3168     + */
3169     int xenbus_watch_path(struct xenbus_device *dev, const char *path,
3170     struct xenbus_watch *watch,
3171     void (*callback)(struct xenbus_watch *,
3172     @@ -81,6 +102,7 @@
3173     EXPORT_SYMBOL_GPL(xenbus_watch_path);
3174    
3175    
3176     +#if defined(CONFIG_XEN) || defined(MODULE)
3177     int xenbus_watch_path2(struct xenbus_device *dev, const char *path,
3178     const char *path2, struct xenbus_watch *watch,
3179     void (*callback)(struct xenbus_watch *,
3180     @@ -99,8 +121,60 @@
3181     return err;
3182     }
3183     EXPORT_SYMBOL_GPL(xenbus_watch_path2);
3184     +#else
3185     +/**
3186     + * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path
3187     + * @dev: xenbus device
3188     + * @watch: watch to register
3189     + * @callback: callback to register
3190     + * @pathfmt: format of path to watch
3191     + *
3192     + * Register a watch on the given @path, using the given xenbus_watch
3193     + * structure for storage, and the given @callback function as the callback.
3194     + * Return 0 on success, or -errno on error. On success, the watched path
3195     + * (@path/@path2) will be saved as @watch->node, and becomes the caller's to
3196     + * kfree(). On error, watch->node will be NULL, so the caller has nothing to
3197     + * free, the device will switch to %XenbusStateClosing, and the error will be
3198     + * saved in the store.
3199     + */
3200     +int xenbus_watch_pathfmt(struct xenbus_device *dev,
3201     + struct xenbus_watch *watch,
3202     + void (*callback)(struct xenbus_watch *,
3203     + const char **, unsigned int),
3204     + const char *pathfmt, ...)
3205     +{
3206     + int err;
3207     + va_list ap;
3208     + char *path;
3209    
3210     + va_start(ap, pathfmt);
3211     + path = kvasprintf(GFP_KERNEL, pathfmt, ap);
3212     + va_end(ap);
3213    
3214     + if (!path) {
3215     + xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
3216     + return -ENOMEM;
3217     + }
3218     + err = xenbus_watch_path(dev, path, watch, callback);
3219     +
3220     + if (err)
3221     + kfree(path);
3222     + return err;
3223     +}
3224     +EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
3225     +#endif
3226     +
3227     +
3228     +/**
3229     + * xenbus_switch_state
3230     + * @dev: xenbus device
3231     + * @xbt: transaction handle
3232     + * @state: new state
3233     + *
3234     + * Advertise in the store a change of the given driver to the given new_state.
3235     + * Return 0 on success, or -errno on error. On error, the device will switch
3236     + * to XenbusStateClosing, and the error will be saved in the store.
3237     + */
3238     int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
3239     {
3240     /* We check whether the state is currently set to the given value, and
3241     @@ -159,8 +233,8 @@
3242     }
3243    
3244    
3245     -void _dev_error(struct xenbus_device *dev, int err, const char *fmt,
3246     - va_list ap)
3247     +static void _dev_error(struct xenbus_device *dev, int err,
3248     + const char *fmt, va_list ap)
3249     {
3250     int ret;
3251     unsigned int len;
3252     @@ -181,14 +255,16 @@
3253     path_buffer = error_path(dev);
3254    
3255     if (path_buffer == NULL) {
3256     - printk("xenbus: failed to write error node for %s (%s)\n",
3257     - dev->nodename, printf_buffer);
3258     + dev_err(&dev->dev,
3259     + "xenbus: failed to write error node for %s (%s)\n",
3260     + dev->nodename, printf_buffer);
3261     goto fail;
3262     }
3263    
3264     if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) {
3265     - printk("xenbus: failed to write error node for %s (%s)\n",
3266     - dev->nodename, printf_buffer);
3267     + dev_err(&dev->dev,
3268     + "xenbus: failed to write error node for %s (%s)\n",
3269     + dev->nodename, printf_buffer);
3270     goto fail;
3271     }
3272    
3273     @@ -200,6 +276,15 @@
3274     }
3275    
3276    
3277     +/**
3278     + * xenbus_dev_error
3279     + * @dev: xenbus device
3280     + * @err: error to report
3281     + * @fmt: error message format
3282     + *
3283     + * Report the given negative errno into the store, along with the given
3284     + * formatted message.
3285     + */
3286     void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt,
3287     ...)
3288     {
3289     @@ -212,6 +297,16 @@
3290     EXPORT_SYMBOL_GPL(xenbus_dev_error);
3291    
3292    
3293     +/**
3294     + * xenbus_dev_fatal
3295     + * @dev: xenbus device
3296     + * @err: error to report
3297     + * @fmt: error message format
3298     + *
3299     + * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
3300     + * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly
3301     + * closedown of this driver and its peer.
3302     + */
3303     void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt,
3304     ...)
3305     {
3306     @@ -226,6 +321,15 @@
3307     EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
3308    
3309    
3310     +/**
3311     + * xenbus_grant_ring
3312     + * @dev: xenbus device
3313     + * @ring_mfn: mfn of ring to grant
3314     + *
3315     + * Grant access to the given @ring_mfn to the peer of the given device. Return
3316     + * 0 on success, or -errno on error. On error, the device will switch to
3317     + * XenbusStateClosing, and the error will be saved in the store.
3318     + */
3319     int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
3320     {
3321     int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
3322     @@ -236,6 +340,12 @@
3323     EXPORT_SYMBOL_GPL(xenbus_grant_ring);
3324    
3325    
3326     +/**
3327     + * Allocate an event channel for the given xenbus_device, assigning the newly
3328     + * created local port to *port. Return 0 on success, or -errno on error. On
3329     + * error, the device will switch to XenbusStateClosing, and the error will be
3330     + * saved in the store.
3331     + */
3332     int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port)
3333     {
3334     struct evtchn_alloc_unbound alloc_unbound;
3335     @@ -256,6 +366,38 @@
3336     EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);
3337    
3338    
3339     +#if 0 /* !defined(CONFIG_XEN) && !defined(MODULE) */
3340     +/**
3341     + * Bind to an existing interdomain event channel in another domain. Returns 0
3342     + * on success and stores the local port in *port. On error, returns -errno,
3343     + * switches the device to XenbusStateClosing, and saves the error in XenStore.
3344     + */
3345     +int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port)
3346     +{
3347     + struct evtchn_bind_interdomain bind_interdomain;
3348     + int err;
3349     +
3350     + bind_interdomain.remote_dom = dev->otherend_id;
3351     + bind_interdomain.remote_port = remote_port;
3352     +
3353     + err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
3354     + &bind_interdomain);
3355     + if (err)
3356     + xenbus_dev_fatal(dev, err,
3357     + "binding to event channel %d from domain %d",
3358     + remote_port, dev->otherend_id);
3359     + else
3360     + *port = bind_interdomain.local_port;
3361     +
3362     + return err;
3363     +}
3364     +EXPORT_SYMBOL_GPL(xenbus_bind_evtchn);
3365     +#endif
3366     +
3367     +
3368     +/**
3369     + * Free an existing event channel. Returns 0 on success or -errno on error.
3370     + */
3371     int xenbus_free_evtchn(struct xenbus_device *dev, int port)
3372     {
3373     struct evtchn_close close;
3374     @@ -272,6 +414,191 @@
3375     EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
3376    
3377    
3378     +#if 0 /* !defined(CONFIG_XEN) && !defined(MODULE) */
3379     +/**
3380     + * xenbus_map_ring_valloc
3381     + * @dev: xenbus device
3382     + * @gnt_ref: grant reference
3383     + * @vaddr: pointer to address to be filled out by mapping
3384     + *
3385     + * Based on Rusty Russell's skeleton driver's map_page.
3386     + * Map a page of memory into this domain from another domain's grant table.
3387     + * xenbus_map_ring_valloc allocates a page of virtual address space, maps the
3388     + * page to that address, and sets *vaddr to that address.
3389     + * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
3390     + * or -ENOMEM on error. If an error is returned, device will switch to
3391     + * XenbusStateClosing and the error message will be saved in XenStore.
3392     + */
3393     +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
3394     +{
3395     + struct gnttab_map_grant_ref op = {
3396     + .flags = GNTMAP_host_map,
3397     + .ref = gnt_ref,
3398     + .dom = dev->otherend_id,
3399     + };
3400     + struct vm_struct *area;
3401     +
3402     + *vaddr = NULL;
3403     +
3404     + area = alloc_vm_area(PAGE_SIZE);
3405     + if (!area)
3406     + return -ENOMEM;
3407     +
3408     + op.host_addr = (unsigned long)area->addr;
3409     +
3410     + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
3411     + BUG();
3412     +
3413     + if (op.status != GNTST_okay) {
3414     + free_vm_area(area);
3415     + xenbus_dev_fatal(dev, op.status,
3416     + "mapping in shared page %d from domain %d",
3417     + gnt_ref, dev->otherend_id);
3418     + return op.status;
3419     + }
3420     +
3421     + /* Stuff the handle in an unused field */
3422     + area->phys_addr = (unsigned long)op.handle;
3423     +
3424     + *vaddr = area->addr;
3425     + return 0;
3426     +}
3427     +EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
3428     +
3429     +
3430     +/**
3431     + * xenbus_map_ring
3432     + * @dev: xenbus device
3433     + * @gnt_ref: grant reference
3434     + * @handle: pointer to grant handle to be filled
3435     + * @vaddr: address to be mapped to
3436     + *
3437     + * Map a page of memory into this domain from another domain's grant table.
3438     + * xenbus_map_ring does not allocate the virtual address space (you must do
3439     + * this yourself!). It only maps in the page to the specified address.
3440     + * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
3441     + * or -ENOMEM on error. If an error is returned, device will switch to
3442     + * XenbusStateClosing and the error message will be saved in XenStore.
3443     + */
3444     +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
3445     + grant_handle_t *handle, void *vaddr)
3446     +{
3447     + struct gnttab_map_grant_ref op = {
3448     + .host_addr = (unsigned long)vaddr,
3449     + .flags = GNTMAP_host_map,
3450     + .ref = gnt_ref,
3451     + .dom = dev->otherend_id,
3452     + };
3453     +
3454     + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
3455     + BUG();
3456     +
3457     + if (op.status != GNTST_okay) {
3458     + xenbus_dev_fatal(dev, op.status,
3459     + "mapping in shared page %d from domain %d",
3460     + gnt_ref, dev->otherend_id);
3461     + } else
3462     + *handle = op.handle;
3463     +
3464     + return op.status;
3465     +}
3466     +EXPORT_SYMBOL_GPL(xenbus_map_ring);
3467     +
3468     +
3469     +/**
3470     + * xenbus_unmap_ring_vfree
3471     + * @dev: xenbus device
3472     + * @vaddr: addr to unmap
3473     + *
3474     + * Based on Rusty Russell's skeleton driver's unmap_page.
3475     + * Unmap a page of memory in this domain that was imported from another domain.
3476     + * Use xenbus_unmap_ring_vfree if you mapped in your memory with
3477     + * xenbus_map_ring_valloc (it will free the virtual address space).
3478     + * Returns 0 on success and returns GNTST_* on error
3479     + * (see xen/include/interface/grant_table.h).
3480     + */
3481     +int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
3482     +{
3483     + struct vm_struct *area;
3484     + struct gnttab_unmap_grant_ref op = {
3485     + .host_addr = (unsigned long)vaddr,
3486     + };
3487     +
3488     + /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr)
3489     + * method so that we don't have to muck with vmalloc internals here.
3490     + * We could force the user to hang on to their struct vm_struct from
3491     + * xenbus_map_ring_valloc, but these 6 lines considerably simplify
3492     + * this API.
3493     + */
3494     + read_lock(&vmlist_lock);
3495     + for (area = vmlist; area != NULL; area = area->next) {
3496     + if (area->addr == vaddr)
3497     + break;
3498     + }
3499     + read_unlock(&vmlist_lock);
3500     +
3501     + if (!area) {
3502     + xenbus_dev_error(dev, -ENOENT,
3503     + "can't find mapped virtual address %p", vaddr);
3504     + return GNTST_bad_virt_addr;
3505     + }
3506     +
3507     + op.handle = (grant_handle_t)area->phys_addr;
3508     +
3509     + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
3510     + BUG();
3511     +
3512     + if (op.status == GNTST_okay)
3513     + free_vm_area(area);
3514     + else
3515     + xenbus_dev_error(dev, op.status,
3516     + "unmapping page at handle %d error %d",
3517     + (int16_t)area->phys_addr, op.status);
3518     +
3519     + return op.status;
3520     +}
3521     +EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
3522     +
3523     +
3524     +/**
3525     + * xenbus_unmap_ring
3526     + * @dev: xenbus device
3527     + * @handle: grant handle
3528     + * @vaddr: addr to unmap
3529     + *
3530     + * Unmap a page of memory in this domain that was imported from another domain.
3531     + * Returns 0 on success and returns GNTST_* on error
3532     + * (see xen/include/interface/grant_table.h).
3533     + */
3534     +int xenbus_unmap_ring(struct xenbus_device *dev,
3535     + grant_handle_t handle, void *vaddr)
3536     +{
3537     + struct gnttab_unmap_grant_ref op = {
3538     + .host_addr = (unsigned long)vaddr,
3539     + .handle = handle,
3540     + };
3541     +
3542     + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
3543     + BUG();
3544     +
3545     + if (op.status != GNTST_okay)
3546     + xenbus_dev_error(dev, op.status,
3547     + "unmapping page at handle %d error %d",
3548     + handle, op.status);
3549     +
3550     + return op.status;
3551     +}
3552     +EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
3553     +#endif
3554     +
3555     +
3556     +/**
3557     + * xenbus_read_driver_state
3558     + * @path: path for driver
3559     + *
3560     + * Return the state of the driver rooted at the given store path, or
3561     + * XenbusStateUnknown if no state can be read.
3562     + */
3563     enum xenbus_state xenbus_read_driver_state(const char *path)
3564     {
3565     enum xenbus_state result;
3566     diff -Naur linux-2.6.25/drivers/xen/xenbus/xenbus_comms.c linux-2.6.25-xen/drivers/xen/xenbus/xenbus_comms.c
3567     --- linux-2.6.25/drivers/xen/xenbus/xenbus_comms.c 2008-05-23 18:33:26.000000000 +0200
3568     +++ linux-2.6.25-xen/drivers/xen/xenbus/xenbus_comms.c 2008-05-23 18:27:40.000000000 +0200
3569     @@ -34,12 +34,15 @@
3570     #include <linux/interrupt.h>
3571     #include <linux/sched.h>
3572     #include <linux/err.h>
3573     -#include <linux/ptrace.h>
3574     -#include <linux/workqueue.h>
3575     -#include <xen/evtchn.h>
3576     #include <xen/xenbus.h>
3577     -
3578     +#if defined(CONFIG_XEN) || defined(MODULE)
3579     +#include <xen/evtchn.h>
3580     #include <asm/hypervisor.h>
3581     +#else
3582     +#include <asm/xen/hypervisor.h>
3583     +#include <xen/events.h>
3584     +#include <xen/page.h>
3585     +#endif
3586    
3587     #include "xenbus_comms.h"
3588    
3589     @@ -91,6 +94,13 @@
3590     return buf + MASK_XENSTORE_IDX(cons);
3591     }
3592    
3593     +/**
3594     + * xb_write - low level write
3595     + * @data: buffer to send
3596     + * @len: length of buffer
3597     + *
3598     + * Returns 0 on success, error otherwise.
3599     + */
3600     int xb_write(const void *data, unsigned len)
3601     {
3602     struct xenstore_domain_interface *intf = xen_store_interface;
3603     @@ -199,7 +209,9 @@
3604     return 0;
3605     }
3606    
3607     -/* Set up interrupt handler off store event channel. */
3608     +/**
3609     + * xb_init_comms - Set up interrupt handler off store event channel.
3610     + */
3611     int xb_init_comms(void)
3612     {
3613     struct xenstore_domain_interface *intf = xen_store_interface;
3614     @@ -219,7 +231,11 @@
3615     if (xenbus_irq)
3616     unbind_from_irqhandler(xenbus_irq, &xb_waitq);
3617    
3618     +#if defined(CONFIG_XEN) || defined(MODULE)
3619     err = bind_caller_port_to_irqhandler(
3620     +#else
3621     + err = bind_evtchn_to_irqhandler(
3622     +#endif
3623     xen_store_evtchn, wake_waiting,
3624     0, "xenbus", &xb_waitq);
3625     if (err <= 0) {
3626     diff -Naur linux-2.6.25/drivers/xen/xenbus/xenbus_probe_backend.c linux-2.6.25-xen/drivers/xen/xenbus/xenbus_probe_backend.c
3627     --- linux-2.6.25/drivers/xen/xenbus/xenbus_probe_backend.c 2008-05-23 18:33:27.000000000 +0200
3628     +++ linux-2.6.25-xen/drivers/xen/xenbus/xenbus_probe_backend.c 2008-05-23 18:27:40.000000000 +0200
3629     @@ -236,7 +236,7 @@
3630     {
3631     DPRINTK("");
3632    
3633     - dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
3634     + xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
3635     }
3636    
3637     static struct xenbus_watch be_watch = {
3638     diff -Naur linux-2.6.25/drivers/xen/xenbus/xenbus_probe.c linux-2.6.25-xen/drivers/xen/xenbus/xenbus_probe.c
3639     --- linux-2.6.25/drivers/xen/xenbus/xenbus_probe.c 2008-05-23 18:33:27.000000000 +0200
3640     +++ linux-2.6.25-xen/drivers/xen/xenbus/xenbus_probe.c 2008-05-23 18:27:40.000000000 +0200
3641     @@ -43,12 +43,11 @@
3642     #include <linux/mm.h>
3643     #include <linux/notifier.h>
3644     #include <linux/mutex.h>
3645     -#include <linux/module.h>
3646     +#include <linux/io.h>
3647    
3648     -#include <asm/io.h>
3649     #include <asm/page.h>
3650     -#include <asm/maddr.h>
3651     #include <asm/pgtable.h>
3652     +#if defined(CONFIG_XEN) || defined(MODULE)
3653     #include <asm/hypervisor.h>
3654     #include <xen/xenbus.h>
3655     #include <xen/xen_proc.h>
3656     @@ -57,6 +56,12 @@
3657     #ifdef MODULE
3658     #include <xen/hvm.h>
3659     #endif
3660     +#else
3661     +#include <asm/xen/hypervisor.h>
3662     +#include <xen/xenbus.h>
3663     +#include <xen/events.h>
3664     +#include <xen/page.h>
3665     +#endif
3666    
3667     #include "xenbus_comms.h"
3668     #include "xenbus_probe.h"
3669     @@ -168,7 +173,7 @@
3670     return read_otherend_details(xendev, "backend-id", "backend");
3671     }
3672    
3673     -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
3674     +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) && (defined(CONFIG_XEN) || defined(MODULE))
3675     static int xenbus_uevent_frontend(struct device *dev, char **envp,
3676     int num_envp, char *buffer, int buffer_size)
3677     {
3678     @@ -207,12 +212,16 @@
3679     .probe = xenbus_dev_probe,
3680     .remove = xenbus_dev_remove,
3681     .shutdown = xenbus_dev_shutdown,
3682     +#if defined(CONFIG_XEN) || defined(MODULE)
3683     .uevent = xenbus_uevent_frontend,
3684     #endif
3685     +#endif
3686     },
3687     +#if defined(CONFIG_XEN) || defined(MODULE)
3688     .dev = {
3689     .bus_id = "xen",
3690     },
3691     +#endif
3692     };
3693    
3694     static void otherend_changed(struct xenbus_watch *watch,
3695     @@ -228,14 +237,15 @@
3696     if (!dev->otherend ||
3697     strncmp(dev->otherend, vec[XS_WATCH_PATH],
3698     strlen(dev->otherend))) {
3699     - DPRINTK("Ignoring watch at %s", vec[XS_WATCH_PATH]);
3700     + dev_dbg(&dev->dev, "Ignoring watch at %s", vec[XS_WATCH_PATH]);
3701     return;
3702     }
3703    
3704     state = xenbus_read_driver_state(dev->otherend);
3705    
3706     - DPRINTK("state is %d (%s), %s, %s", state, xenbus_strstate(state),
3707     - dev->otherend_watch.node, vec[XS_WATCH_PATH]);
3708     + dev_dbg(&dev->dev, "state is %d (%s), %s, %s",
3709     + state, xenbus_strstate(state), dev->otherend_watch.node,
3710     + vec[XS_WATCH_PATH]);
3711    
3712     #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
3713     /*
3714     @@ -271,8 +281,13 @@
3715    
3716     static int watch_otherend(struct xenbus_device *dev)
3717     {
3718     +#if defined(CONFIG_XEN) || defined(MODULE)
3719     return xenbus_watch_path2(dev, dev->otherend, "state",
3720     &dev->otherend_watch, otherend_changed);
3721     +#else
3722     + return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed,
3723     + "%s/%s", dev->otherend, "state");
3724     +#endif
3725     }
3726    
3727    
3728     @@ -298,9 +313,9 @@
3729    
3730     err = talk_to_otherend(dev);
3731     if (err) {
3732     - printk(KERN_WARNING
3733     - "xenbus_probe: talk_to_otherend on %s failed.\n",
3734     - dev->nodename);
3735     + dev_warn(&dev->dev,
3736     + "xenbus_probe: talk_to_otherend on %s failed.\n",
3737     + dev->nodename);
3738     return err;
3739     }
3740    
3741     @@ -310,9 +325,9 @@
3742    
3743     err = watch_otherend(dev);
3744     if (err) {
3745     - printk(KERN_WARNING
3746     - "xenbus_probe: watch_otherend on %s failed.\n",
3747     - dev->nodename);
3748     + dev_warn(&dev->dev,
3749     + "xenbus_probe: watch_otherend on %s failed.\n",
3750     + dev->nodename);
3751     return err;
3752     }
3753    
3754     @@ -349,14 +364,15 @@
3755    
3756     get_device(&dev->dev);
3757     if (dev->state != XenbusStateConnected) {
3758     - printk("%s: %s: %s != Connected, skipping\n", __FUNCTION__,
3759     - dev->nodename, xenbus_strstate(dev->state));
3760     + dev_info(&dev->dev, "%s: %s: %s != Connected, skipping\n", __FUNCTION__,
3761     + dev->nodename, xenbus_strstate(dev->state));
3762     goto out;
3763     }
3764     xenbus_switch_state(dev, XenbusStateClosing);
3765     timeout = wait_for_completion_timeout(&dev->down, timeout);
3766     if (!timeout)
3767     - printk("%s: %s timeout closing device\n", __FUNCTION__, dev->nodename);
3768     + dev_info(&dev->dev, "%s: %s timeout closing device\n",
3769     + __FUNCTION__, dev->nodename);
3770     out:
3771     put_device(&dev->dev);
3772     }
3773     @@ -544,7 +560,9 @@
3774     xendev->devicetype = tmpstring;
3775     init_completion(&xendev->down);
3776    
3777     +#if defined(CONFIG_XEN) || defined(MODULE)
3778     xendev->dev.parent = &bus->dev;
3779     +#endif
3780     xendev->dev.bus = &bus->bus;
3781     xendev->dev.release = xenbus_dev_release;
3782    
3783     @@ -559,15 +577,16 @@
3784    
3785     err = device_create_file(&xendev->dev, &dev_attr_nodename);
3786     if (err)
3787     - goto unregister;
3788     + goto fail_unregister;
3789     +
3790     err = device_create_file(&xendev->dev, &dev_attr_devtype);
3791     if (err)
3792     - goto unregister;
3793     + goto fail_remove_file;
3794    
3795     return 0;
3796     -unregister:
3797     +fail_remove_file:
3798     device_remove_file(&xendev->dev, &dev_attr_nodename);
3799     - device_remove_file(&xendev->dev, &dev_attr_devtype);
3800     +fail_unregister:
3801     device_unregister(&xendev->dev);
3802     fail:
3803     kfree(xendev);
3804     @@ -580,7 +599,8 @@
3805     char *nodename;
3806     int err;
3807    
3808     - nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", xenbus_frontend.root, type, name);
3809     + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s",
3810     + xenbus_frontend.root, type, name);
3811     if (!nodename)
3812     return -ENOMEM;
3813    
3814     @@ -656,7 +676,7 @@
3815     return (len == 0) ? i : -ERANGE;
3816     }
3817    
3818     -void dev_changed(const char *node, struct xen_bus_type *bus)
3819     +void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
3820     {
3821     int exists, rootlen;
3822     struct xenbus_device *dev;
3823     @@ -664,7 +684,7 @@
3824     const char *p, *root;
3825    
3826     if (bus->error || char_count(node, '/') < 2)
3827     - return;
3828     + return;
3829    
3830     exists = xenbus_exists(XBT_NIL, node, "");
3831     if (!exists) {
3832     @@ -698,7 +718,7 @@
3833     {
3834     DPRINTK("");
3835    
3836     - dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
3837     + xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
3838     }
3839    
3840     /* We watch for devices appearing and vanishing. */
3841     @@ -774,9 +794,9 @@
3842    
3843     if (drv->resume) {
3844     err = drv->resume(xdev);
3845     - if (err) {
3846     + if (err) {
3847     printk(KERN_WARNING
3848     - "xenbus: resume %s failed: %i\n",
3849     + "xenbus: resume %s failed: %i\n",
3850     dev->bus_id, err);
3851     return err;
3852     }
3853     @@ -901,10 +921,16 @@
3854     }
3855     #endif
3856    
3857     -static int xenbus_probe_init(void)
3858     +#ifndef MODULE
3859     +static int __init xenbus_probe_init(void)
3860     +#else
3861     +static int __devinit xenbus_probe_init(void)
3862     +#endif
3863     {
3864     int err = 0;
3865     +#if defined(CONFIG_XEN) || defined(MODULE)
3866     unsigned long page = 0;
3867     +#endif
3868    
3869     DPRINTK("");
3870    
3871     @@ -923,6 +949,7 @@
3872     * Domain0 doesn't have a store_evtchn or store_mfn yet.
3873     */
3874     if (is_initial_xendomain()) {
3875     +#if defined(CONFIG_XEN) || defined(MODULE)
3876     struct evtchn_alloc_unbound alloc_unbound;
3877    
3878     /* Allocate page. */
3879     @@ -960,10 +987,13 @@
3880     if (xsd_port_intf)
3881     xsd_port_intf->read_proc = xsd_port_read;
3882     #endif
3883     +#else
3884     + /* dom0 not yet supported */
3885     +#endif
3886     xen_store_interface = mfn_to_virt(xen_store_mfn);
3887     } else {
3888     xenstored_ready = 1;
3889     -#ifdef CONFIG_XEN
3890     +#ifndef MODULE
3891     xen_store_evtchn = xen_start_info->store_evtchn;
3892     xen_store_mfn = xen_start_info->store_mfn;
3893     xen_store_interface = mfn_to_virt(xen_store_mfn);
3894     @@ -976,7 +1006,9 @@
3895     }
3896    
3897    
3898     +#if defined(CONFIG_XEN) || defined(MODULE)
3899     xenbus_dev_init();
3900     +#endif
3901    
3902     /* Initialize the interface to xenstore. */
3903     err = xs_init();
3904     @@ -986,6 +1018,7 @@
3905     goto err;
3906     }
3907    
3908     +#if defined(CONFIG_XEN) || defined(MODULE)
3909     /* Register ourselves with the kernel device subsystem */
3910     if (!xenbus_frontend.error) {
3911     xenbus_frontend.error = device_register(&xenbus_frontend.dev);
3912     @@ -996,6 +1029,7 @@
3913     xenbus_frontend.error);
3914     }
3915     }
3916     +#endif
3917     xenbus_backend_device_register();
3918    
3919     if (!is_initial_xendomain())
3920     @@ -1004,8 +1038,10 @@
3921     return 0;
3922    
3923     err:
3924     +#if defined(CONFIG_XEN) || defined(MODULE)
3925     if (page)
3926     free_page(page);
3927     +#endif
3928    
3929     /*
3930     * Do not unregister the xenbus front/backend buses here. The buses
3931     @@ -1016,11 +1052,15 @@
3932     return err;
3933     }
3934    
3935     -#ifdef CONFIG_XEN
3936     +#ifndef MODULE
3937     postcore_initcall(xenbus_probe_init);
3938     +#ifdef CONFIG_XEN
3939     MODULE_LICENSE("Dual BSD/GPL");
3940     #else
3941     -int xenbus_init(void)
3942     +MODULE_LICENSE("GPL");
3943     +#endif
3944     +#else
3945     +int __devinit xenbus_init(void)
3946     {
3947     return xenbus_probe_init();
3948     }
3949     diff -Naur linux-2.6.25/drivers/xen/xenbus/xenbus_probe.h linux-2.6.25-xen/drivers/xen/xenbus/xenbus_probe.h
3950     --- linux-2.6.25/drivers/xen/xenbus/xenbus_probe.h 2008-05-23 18:33:27.000000000 +0200
3951     +++ linux-2.6.25-xen/drivers/xen/xenbus/xenbus_probe.h 2008-05-23 18:27:40.000000000 +0200
3952     @@ -56,7 +56,9 @@
3953     int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
3954     int (*probe)(const char *type, const char *dir);
3955     struct bus_type bus;
3956     +#if defined(CONFIG_XEN) || defined(MODULE)
3957     struct device dev;
3958     +#endif
3959     };
3960    
3961     extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
3962     @@ -71,7 +73,7 @@
3963     const char *nodename);
3964     extern int xenbus_probe_devices(struct xen_bus_type *bus);
3965    
3966     -extern void dev_changed(const char *node, struct xen_bus_type *bus);
3967     +extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus);
3968    
3969     #endif
3970    
3971     diff -Naur linux-2.6.25/drivers/xen/xenbus/xenbus_xs.c linux-2.6.25-xen/drivers/xen/xenbus/xenbus_xs.c
3972     --- linux-2.6.25/drivers/xen/xenbus/xenbus_xs.c 2008-05-23 18:33:03.000000000 +0200
3973     +++ linux-2.6.25-xen/drivers/xen/xenbus/xenbus_xs.c 2008-05-23 18:27:40.000000000 +0200
3974     @@ -221,7 +221,7 @@
3975     }
3976    
3977     for (i = 0; i < num_vecs; i++) {
3978     - err = xb_write(iovec[i].iov_base, iovec[i].iov_len);;
3979     + err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
3980     if (err) {
3981     mutex_unlock(&xs_state.request_mutex);
3982     return ERR_PTR(err);
3983     @@ -629,7 +629,9 @@
3984     char token[sizeof(watch) * 2 + 1];
3985     int err;
3986    
3987     +#if defined(CONFIG_XEN) || defined(MODULE)
3988     BUG_ON(watch->flags & XBWF_new_thread);
3989     +#endif
3990    
3991     sprintf(token, "%lX", (long)watch);
3992    
3993     @@ -648,6 +650,11 @@
3994    
3995     up_read(&xs_state.watch_mutex);
3996    
3997     + /* Make sure there are no callbacks running currently (unless
3998     + its us) */
3999     + if (current->pid != xenwatch_pid)
4000     + mutex_lock(&xenwatch_mutex);
4001     +
4002     /* Cancel pending watch events. */
4003     spin_lock(&watch_events_lock);
4004     list_for_each_entry_safe(msg, tmp, &watch_events, list) {
4005     @@ -659,11 +666,8 @@
4006     }
4007     spin_unlock(&watch_events_lock);
4008    
4009     - /* Flush any currently-executing callback, unless we are it. :-) */
4010     - if (current->pid != xenwatch_pid) {
4011     - mutex_lock(&xenwatch_mutex);
4012     + if (current->pid != xenwatch_pid)
4013     mutex_unlock(&xenwatch_mutex);
4014     - }
4015     }
4016     EXPORT_SYMBOL_GPL(unregister_xenbus_watch);
4017    
4018     @@ -701,6 +705,7 @@
4019     up_write(&xs_state.transaction_mutex);
4020     }
4021    
4022     +#if defined(CONFIG_XEN) || defined(MODULE)
4023     static int xenwatch_handle_callback(void *data)
4024     {
4025     struct xs_stored_msg *msg = data;
4026     @@ -718,6 +723,7 @@
4027    
4028     return 0;
4029     }
4030     +#endif
4031    
4032     static int xenwatch_thread(void *unused)
4033     {
4034     @@ -747,6 +753,7 @@
4035    
4036     msg = list_entry(ent, struct xs_stored_msg, list);
4037    
4038     +#if defined(CONFIG_XEN) || defined(MODULE)
4039     /*
4040     * Unlock the mutex before running an XBWF_new_thread
4041     * handler. kthread_run can block which can deadlock
4042     @@ -763,6 +770,15 @@
4043     xenwatch_handle_callback(msg);
4044     mutex_unlock(&xenwatch_mutex);
4045     }
4046     +#else
4047     + msg->u.watch.handle->callback(
4048     + msg->u.watch.handle,
4049     + (const char **)msg->u.watch.vec,
4050     + msg->u.watch.vec_size);
4051     + mutex_unlock(&xenwatch_mutex);
4052     + kfree(msg->u.watch.vec);
4053     + kfree(msg);
4054     +#endif
4055     }
4056    
4057     return 0;
4058     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/e820_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/e820_64.h
4059     --- linux-2.6.25/include/asm-x86/mach-xen/asm/e820_64.h 2008-05-23 18:33:27.000000000 +0200
4060     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/e820_64.h 2008-05-23 18:27:40.000000000 +0200
4061     @@ -11,8 +11,6 @@
4062     #ifndef __E820_HEADER
4063     #define __E820_HEADER
4064    
4065     -#include <linux/mmzone.h>
4066     -
4067     #define E820MAP 0x2d0 /* our map */
4068     #define E820MAX 128 /* number of entries in E820MAP */
4069     #define E820NR 0x1e8 /* # entries in E820MAP */
4070     @@ -30,7 +28,7 @@
4071     } __attribute__((packed));
4072    
4073     struct e820map {
4074     - int nr_map;
4075     + u32 nr_map;
4076     struct e820entry map[E820MAX];
4077     };
4078    
4079     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/fixmap_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/fixmap_32.h
4080     --- linux-2.6.25/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-05-23 18:33:27.000000000 +0200
4081     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-05-23 18:27:40.000000000 +0200
4082     @@ -53,6 +53,8 @@
4083     enum fixed_addresses {
4084     FIX_HOLE,
4085     FIX_VDSO,
4086     + FIX_DBGP_BASE,
4087     + FIX_EARLYCON_MEM_BASE,
4088     #ifdef CONFIG_X86_LOCAL_APIC
4089     FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
4090     #endif
4091     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/fixmap_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/fixmap_64.h
4092     --- linux-2.6.25/include/asm-x86/mach-xen/asm/fixmap_64.h 2008-05-23 18:33:27.000000000 +0200
4093     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/fixmap_64.h 2008-05-23 18:27:40.000000000 +0200
4094     @@ -23,9 +23,9 @@
4095     * compile time, but to set the physical address only
4096     * in the boot process.
4097     *
4098     - * these 'compile-time allocated' memory buffers are
4099     - * fixed-size 4k pages. (or larger if used with an increment
4100     - * highger than 1) use fixmap_set(idx,phys) to associate
4101     + * These 'compile-time allocated' memory buffers are
4102     + * fixed-size 4k pages (or larger if used with an increment
4103     + * higher than 1). Use set_fixmap(idx,phys) to associate
4104     * physical memory with fixmap indices.
4105     *
4106     * TLB entries of such buffers will not be flushed across
4107     @@ -36,6 +36,8 @@
4108     VSYSCALL_LAST_PAGE,
4109     VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
4110     VSYSCALL_HPET,
4111     + FIX_DBGP_BASE,
4112     + FIX_EARLYCON_MEM_BASE,
4113     FIX_HPET_BASE,
4114     #ifdef CONFIG_X86_LOCAL_APIC
4115     FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
4116     @@ -105,7 +107,7 @@
4117     if (idx >= __end_of_fixed_addresses)
4118     __this_fixmap_does_not_exist();
4119    
4120     - return __fix_to_virt(idx);
4121     + return __fix_to_virt(idx);
4122     }
4123    
4124     #endif
4125     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/hw_irq_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/hw_irq_64.h
4126     --- linux-2.6.25/include/asm-x86/mach-xen/asm/hw_irq_64.h 2008-05-23 18:33:27.000000000 +0200
4127     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/hw_irq_64.h 2008-05-23 18:27:40.000000000 +0200
4128     @@ -97,6 +97,26 @@
4129     #endif
4130    
4131     #ifndef __ASSEMBLY__
4132     +
4133     +/* Interrupt handlers registered during init_IRQ */
4134     +void apic_timer_interrupt(void);
4135     +void spurious_interrupt(void);
4136     +void error_interrupt(void);
4137     +void reschedule_interrupt(void);
4138     +void call_function_interrupt(void);
4139     +void irq_move_cleanup_interrupt(void);
4140     +void invalidate_interrupt0(void);
4141     +void invalidate_interrupt1(void);
4142     +void invalidate_interrupt2(void);
4143     +void invalidate_interrupt3(void);
4144     +void invalidate_interrupt4(void);
4145     +void invalidate_interrupt5(void);
4146     +void invalidate_interrupt6(void);
4147     +void invalidate_interrupt7(void);
4148     +void thermal_interrupt(void);
4149     +void threshold_interrupt(void);
4150     +void i8254_timer_resume(void);
4151     +
4152     typedef int vector_irq_t[NR_VECTORS];
4153     DECLARE_PER_CPU(vector_irq_t, vector_irq);
4154     extern void __setup_vector_irq(int cpu);
4155     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/io_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/io_32.h
4156     --- linux-2.6.25/include/asm-x86/mach-xen/asm/io_32.h 2008-05-23 18:33:27.000000000 +0200
4157     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/io_32.h 2008-05-23 18:27:40.000000000 +0200
4158     @@ -125,6 +125,9 @@
4159     * writew/writel functions and the other mmio helpers. The returned
4160     * address is not guaranteed to be usable directly as a virtual
4161     * address.
4162     + *
4163     + * If the area you are trying to map is a PCI BAR you should have a
4164     + * look at pci_iomap().
4165     */
4166    
4167     static inline void __iomem * ioremap(unsigned long offset, unsigned long size)
4168     @@ -142,6 +145,7 @@
4169     */
4170     extern void *bt_ioremap(unsigned long offset, unsigned long size);
4171     extern void bt_iounmap(void *addr, unsigned long size);
4172     +extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
4173    
4174     /* Use early IO mappings for DMI because it's initialized early */
4175     #define dmi_ioremap bt_ioremap
4176     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/io_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/io_64.h
4177     --- linux-2.6.25/include/asm-x86/mach-xen/asm/io_64.h 2008-05-23 18:33:27.000000000 +0200
4178     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/io_64.h 2008-05-23 18:27:40.000000000 +0200
4179     @@ -162,6 +162,7 @@
4180     */
4181     extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size);
4182     extern void iounmap(volatile void __iomem *addr);
4183     +extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
4184    
4185     /*
4186     * ISA I/O bus memory addresses are 1:1 with the physical address.
4187     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/maddr_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/maddr_32.h
4188     --- linux-2.6.25/include/asm-x86/mach-xen/asm/maddr_32.h 2008-05-23 18:32:29.000000000 +0200
4189     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/maddr_32.h 2008-05-23 18:27:40.000000000 +0200
4190     @@ -155,6 +155,7 @@
4191    
4192     #ifdef CONFIG_X86_PAE
4193     #define __pte_ma(x) ((pte_t) { (x), (maddr_t)(x) >> 32 } )
4194     +extern unsigned long long __supported_pte_mask;
4195     static inline pte_t pfn_pte_ma(unsigned long page_nr, pgprot_t pgprot)
4196     {
4197     pte_t pte;
4198     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/mmu_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/mmu_64.h
4199     --- linux-2.6.25/include/asm-x86/mach-xen/asm/mmu_64.h 2008-05-23 18:33:27.000000000 +0200
4200     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/mmu_64.h 2008-05-23 18:27:40.000000000 +0200
4201     @@ -15,6 +15,7 @@
4202     rwlock_t ldtlock;
4203     int size;
4204     struct semaphore sem;
4205     + void *vdso;
4206     #ifdef CONFIG_XEN
4207     unsigned pinned:1;
4208     unsigned has_foreign_mappings:1;
4209     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/mmu_context_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/mmu_context_32.h
4210     --- linux-2.6.25/include/asm-x86/mach-xen/asm/mmu_context_32.h 2008-05-23 18:33:27.000000000 +0200
4211     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/mmu_context_32.h 2008-05-23 18:27:40.000000000 +0200
4212     @@ -51,6 +51,8 @@
4213     : : "r" (0) );
4214     }
4215    
4216     +void leave_mm(unsigned long cpu);
4217     +
4218     static inline void switch_mm(struct mm_struct *prev,
4219     struct mm_struct *next,
4220     struct task_struct *tsk)
4221     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/page_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/page_64.h
4222     --- linux-2.6.25/include/asm-x86/mach-xen/asm/page_64.h 2008-05-23 18:33:27.000000000 +0200
4223     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/page_64.h 2008-05-23 18:27:40.000000000 +0200
4224     @@ -72,7 +72,8 @@
4225     #define clear_user_page(page, vaddr, pg) clear_page(page)
4226     #define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
4227    
4228     -#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
4229     +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
4230     + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
4231     #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
4232    
4233     /*
4234     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pci_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pci_32.h
4235     --- linux-2.6.25/include/asm-x86/mach-xen/asm/pci_32.h 2008-05-23 18:32:29.000000000 +0200
4236     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pci_32.h 2008-05-23 18:27:40.000000000 +0200
4237     @@ -3,6 +3,14 @@
4238    
4239    
4240     #ifdef __KERNEL__
4241     +
4242     +struct pci_sysdata {
4243     + int node; /* NUMA node */
4244     +};
4245     +
4246     +/* scan a bus after allocating a pci_sysdata for it */
4247     +extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
4248     +
4249     #include <linux/mm.h> /* for struct page */
4250    
4251     /* Can be used to override the logic in pci_scan_bus for skipping
4252     @@ -81,48 +89,11 @@
4253    
4254     #endif
4255    
4256     -/* This is always fine. */
4257     -#define pci_dac_dma_supported(pci_dev, mask) (1)
4258     -
4259     -static inline dma64_addr_t
4260     -pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction)
4261     -{
4262     - return ((dma64_addr_t) page_to_phys(page) +
4263     - (dma64_addr_t) offset);
4264     -}
4265     -
4266     -static inline struct page *
4267     -pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
4268     -{
4269     - return pfn_to_page(dma_addr >> PAGE_SHIFT);
4270     -}
4271     -
4272     -static inline unsigned long
4273     -pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr)
4274     -{
4275     - return (dma_addr & ~PAGE_MASK);
4276     -}
4277     -
4278     -static inline void
4279     -pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
4280     -{
4281     -}
4282     -
4283     -static inline void
4284     -pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
4285     -{
4286     - flush_write_buffers();
4287     -}
4288     -
4289     #define HAVE_PCI_MMAP
4290     extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
4291     enum pci_mmap_state mmap_state, int write_combine);
4292    
4293    
4294     -static inline void pcibios_add_platform_entries(struct pci_dev *dev)
4295     -{
4296     -}
4297     -
4298     #ifdef CONFIG_PCI
4299     static inline void pci_dma_burst_advice(struct pci_dev *pdev,
4300     enum pci_dma_burst_strategy *strat,
4301     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pci_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pci_64.h
4302     --- linux-2.6.25/include/asm-x86/mach-xen/asm/pci_64.h 2008-05-23 18:32:29.000000000 +0200
4303     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pci_64.h 2008-05-23 18:27:40.000000000 +0200
4304     @@ -5,6 +5,27 @@
4305    
4306     #ifdef __KERNEL__
4307    
4308     +struct pci_sysdata {
4309     + int node; /* NUMA node */
4310     + void* iommu; /* IOMMU private data */
4311     +};
4312     +
4313     +extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
4314     +
4315     +#ifdef CONFIG_CALGARY_IOMMU
4316     +static inline void* pci_iommu(struct pci_bus *bus)
4317     +{
4318     + struct pci_sysdata *sd = bus->sysdata;
4319     + return sd->iommu;
4320     +}
4321     +
4322     +static inline void set_pci_iommu(struct pci_bus *bus, void *val)
4323     +{
4324     + struct pci_sysdata *sd = bus->sysdata;
4325     + sd->iommu = val;
4326     +}
4327     +#endif /* CONFIG_CALGARY_IOMMU */
4328     +
4329     #include <linux/mm.h> /* for struct page */
4330    
4331     /* Can be used to override the logic in pci_scan_bus for skipping
4332     @@ -56,14 +77,6 @@
4333    
4334     #if defined(CONFIG_IOMMU) || defined(CONFIG_CALGARY_IOMMU)
4335    
4336     -/*
4337     - * x86-64 always supports DAC, but sometimes it is useful to force
4338     - * devices through the IOMMU to get automatic sg list merging.
4339     - * Optional right now.
4340     - */
4341     -extern int iommu_sac_force;
4342     -#define pci_dac_dma_supported(pci_dev, mask) (!iommu_sac_force)
4343     -
4344     #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
4345     dma_addr_t ADDR_NAME;
4346     #define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
4347     @@ -97,8 +110,6 @@
4348     #else
4349     /* No IOMMU */
4350    
4351     -#define pci_dac_dma_supported(pci_dev, mask) 1
4352     -
4353     #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
4354     #define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
4355     #define pci_unmap_addr(PTR, ADDR_NAME) (0)
4356     @@ -110,36 +121,6 @@
4357    
4358     #include <asm-generic/pci-dma-compat.h>
4359    
4360     -static inline dma64_addr_t
4361     -pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction)
4362     -{
4363     - return ((dma64_addr_t) page_to_phys(page) +
4364     - (dma64_addr_t) offset);
4365     -}
4366     -
4367     -static inline struct page *
4368     -pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
4369     -{
4370     - return virt_to_page(__va(dma_addr));
4371     -}
4372     -
4373     -static inline unsigned long
4374     -pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr)
4375     -{
4376     - return (dma_addr & ~PAGE_MASK);
4377     -}
4378     -
4379     -static inline void
4380     -pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
4381     -{
4382     -}
4383     -
4384     -static inline void
4385     -pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
4386     -{
4387     - flush_write_buffers();
4388     -}
4389     -
4390     #ifdef CONFIG_PCI
4391     static inline void pci_dma_burst_advice(struct pci_dev *pdev,
4392     enum pci_dma_burst_strategy *strat,
4393     @@ -154,10 +135,6 @@
4394     extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
4395     enum pci_mmap_state mmap_state, int write_combine);
4396    
4397     -static inline void pcibios_add_platform_entries(struct pci_dev *dev)
4398     -{
4399     -}
4400     -
4401     #endif /* __KERNEL__ */
4402    
4403     /* generic pci stuff */
4404     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pgalloc_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgalloc_32.h
4405     --- linux-2.6.25/include/asm-x86/mach-xen/asm/pgalloc_32.h 2008-05-23 18:33:27.000000000 +0200
4406     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgalloc_32.h 2008-05-23 18:27:40.000000000 +0200
4407     @@ -5,7 +5,7 @@
4408     #include <linux/mm.h> /* for struct page */
4409     #include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */
4410    
4411     -#define paravirt_alloc_pt(pfn) do { } while (0)
4412     +#define paravirt_alloc_pt(mm, pfn) do { } while (0)
4413     #define paravirt_alloc_pd(pfn) do { } while (0)
4414     #define paravirt_alloc_pd(pfn) do { } while (0)
4415     #define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0)
4416     @@ -14,14 +14,14 @@
4417    
4418     #define pmd_populate_kernel(mm, pmd, pte) \
4419     do { \
4420     - paravirt_alloc_pt(__pa(pte) >> PAGE_SHIFT); \
4421     + paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT); \
4422     set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \
4423     } while (0)
4424    
4425     #define pmd_populate(mm, pmd, pte) \
4426     do { \
4427     unsigned long pfn = page_to_pfn(pte); \
4428     - paravirt_alloc_pt(pfn); \
4429     + paravirt_alloc_pt(mm, pfn); \
4430     if (test_bit(PG_pinned, &virt_to_page((mm)->pgd)->flags)) { \
4431     if (!PageHighMem(pte)) \
4432     BUG_ON(HYPERVISOR_update_va_mapping( \
4433     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable-2level.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable-2level.h
4434     --- linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable-2level.h 2008-05-23 18:33:27.000000000 +0200
4435     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable-2level.h 2008-05-23 18:27:40.000000000 +0200
4436     @@ -82,14 +82,6 @@
4437     #define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
4438    
4439     /*
4440     - * All present user pages are user-executable:
4441     - */
4442     -static inline int pte_exec(pte_t pte)
4443     -{
4444     - return pte_user(pte);
4445     -}
4446     -
4447     -/*
4448     * All present pages are kernel-executable:
4449     */
4450     static inline int pte_exec_kernel(pte_t pte)
4451     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable_32.h
4452     --- linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-05-23 18:33:27.000000000 +0200
4453     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-05-23 18:27:40.000000000 +0200
4454     @@ -26,9 +26,6 @@
4455     #include <linux/spinlock.h>
4456     #include <linux/sched.h>
4457    
4458     -/* Is this pagetable pinned? */
4459     -#define PG_pinned PG_arch_1
4460     -
4461     struct vm_area_struct;
4462    
4463     /*
4464     @@ -82,7 +79,7 @@
4465     * area for the same reason. ;)
4466     */
4467     #define VMALLOC_OFFSET (8*1024*1024)
4468     -#define VMALLOC_START (((unsigned long) high_memory + vmalloc_earlyreserve + \
4469     +#define VMALLOC_START (((unsigned long) high_memory + \
4470     2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1))
4471     #ifdef CONFIG_HIGHMEM
4472     # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE)
4473     @@ -231,8 +228,6 @@
4474     * The following only work if pte_present() is true.
4475     * Undefined behaviour if not..
4476     */
4477     -static inline int pte_user(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
4478     -static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
4479     static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; }
4480     static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; }
4481     static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; }
4482     @@ -243,13 +238,9 @@
4483     */
4484     static inline int pte_file(pte_t pte) { return (pte).pte_low & _PAGE_FILE; }
4485    
4486     -static inline pte_t pte_rdprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; }
4487     -static inline pte_t pte_exprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; }
4488     static inline pte_t pte_mkclean(pte_t pte) { (pte).pte_low &= ~_PAGE_DIRTY; return pte; }
4489     static inline pte_t pte_mkold(pte_t pte) { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; }
4490     static inline pte_t pte_wrprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_RW; return pte; }
4491     -static inline pte_t pte_mkread(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; }
4492     -static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; }
4493     static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; }
4494     static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; }
4495     static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; }
4496     @@ -295,22 +286,20 @@
4497     #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
4498     ({ \
4499     int __changed = !pte_same(*(ptep), entry); \
4500     - if (__changed && (dirty)) \
4501     - ptep_establish(vma, address, ptep, entry); \
4502     + if (__changed && (dirty)) { \
4503     + if ( likely((vma)->vm_mm == current->mm) ) { \
4504     + BUG_ON(HYPERVISOR_update_va_mapping(address, \
4505     + entry, \
4506     + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
4507     + UVMF_INVLPG|UVMF_MULTI)); \
4508     + } else { \
4509     + xen_l1_entry_update(ptep, entry); \
4510     + flush_tlb_page(vma, address); \
4511     + } \
4512     + } \
4513     __changed; \
4514     })
4515    
4516     -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
4517     -#define ptep_test_and_clear_dirty(vma, addr, ptep) ({ \
4518     - int __ret = 0; \
4519     - if (pte_dirty(*(ptep))) \
4520     - __ret = test_and_clear_bit(_PAGE_BIT_DIRTY, \
4521     - &(ptep)->pte_low); \
4522     - if (__ret) \
4523     - pte_update((vma)->vm_mm, addr, ptep); \
4524     - __ret; \
4525     -})
4526     -
4527     #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
4528     #define ptep_test_and_clear_young(vma, addr, ptep) ({ \
4529     int __ret = 0; \
4530     @@ -322,37 +311,6 @@
4531     __ret; \
4532     })
4533    
4534     -/*
4535     - * Rules for using ptep_establish: the pte MUST be a user pte, and
4536     - * must be a present->present transition.
4537     - */
4538     -#define __HAVE_ARCH_PTEP_ESTABLISH
4539     -#define ptep_establish(vma, address, ptep, pteval) \
4540     -do { \
4541     - if ( likely((vma)->vm_mm == current->mm) ) { \
4542     - BUG_ON(HYPERVISOR_update_va_mapping(address, \
4543     - pteval, \
4544     - (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
4545     - UVMF_INVLPG|UVMF_MULTI)); \
4546     - } else { \
4547     - xen_l1_entry_update(ptep, pteval); \
4548     - flush_tlb_page(vma, address); \
4549     - } \
4550     -} while (0)
4551     -
4552     -#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
4553     -#define ptep_clear_flush_dirty(vma, address, ptep) \
4554     -({ \
4555     - pte_t __pte = *(ptep); \
4556     - int __dirty = pte_dirty(__pte); \
4557     - __pte = pte_mkclean(__pte); \
4558     - if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
4559     - (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
4560     - else if (__dirty) \
4561     - (ptep)->pte_low = __pte.pte_low; \
4562     - __dirty; \
4563     -})
4564     -
4565     #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
4566     #define ptep_clear_flush_young(vma, address, ptep) \
4567     ({ \
4568     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable-3level.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable-3level.h
4569     --- linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-05-23 18:33:27.000000000 +0200
4570     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-05-23 18:27:40.000000000 +0200
4571     @@ -23,26 +23,11 @@
4572     #define pud_present(pud) 1
4573    
4574     /*
4575     - * Is the pte executable?
4576     - */
4577     -static inline int pte_x(pte_t pte)
4578     -{
4579     - return !(__pte_val(pte) & _PAGE_NX);
4580     -}
4581     -
4582     -/*
4583     - * All present user-pages with !NX bit are user-executable:
4584     - */
4585     -static inline int pte_exec(pte_t pte)
4586     -{
4587     - return pte_user(pte) && pte_x(pte);
4588     -}
4589     -/*
4590     * All present pages with !NX bit are kernel-executable:
4591     */
4592     static inline int pte_exec_kernel(pte_t pte)
4593     {
4594     - return pte_x(pte);
4595     + return !(__pte_val(pte) & _PAGE_NX);
4596     }
4597    
4598     /* Rules for using set_pte: the pte being assigned *must* be
4599     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable_64.h
4600     --- linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-05-23 18:33:27.000000000 +0200
4601     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-05-23 18:27:40.000000000 +0200
4602     @@ -332,21 +332,15 @@
4603     * Undefined behaviour if not..
4604     */
4605     #define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT)
4606     -static inline int pte_user(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
4607     -static inline int pte_read(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
4608     -static inline int pte_exec(pte_t pte) { return !(__pte_val(pte) & _PAGE_NX); }
4609     static inline int pte_dirty(pte_t pte) { return __pte_val(pte) & _PAGE_DIRTY; }
4610     static inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED; }
4611     static inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; }
4612     static inline int pte_file(pte_t pte) { return __pte_val(pte) & _PAGE_FILE; }
4613     static inline int pte_huge(pte_t pte) { return __pte_val(pte) & _PAGE_PSE; }
4614    
4615     -static inline pte_t pte_rdprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_USER; return pte; }
4616     -static inline pte_t pte_exprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_USER; return pte; }
4617     static inline pte_t pte_mkclean(pte_t pte) { __pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
4618     static inline pte_t pte_mkold(pte_t pte) { __pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
4619     static inline pte_t pte_wrprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_RW; return pte; }
4620     -static inline pte_t pte_mkread(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
4621     static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) &= ~_PAGE_NX; return pte; }
4622     static inline pte_t pte_mkdirty(pte_t pte) { __pte_val(pte) |= _PAGE_DIRTY; return pte; }
4623     static inline pte_t pte_mkyoung(pte_t pte) { __pte_val(pte) |= _PAGE_ACCESSED; return pte; }
4624     @@ -354,13 +348,6 @@
4625     static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; }
4626     static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; }
4627    
4628     -static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
4629     -{
4630     - if (!pte_dirty(*ptep))
4631     - return 0;
4632     - return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte);
4633     -}
4634     -
4635     static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
4636     {
4637     if (!pte_young(*ptep))
4638     @@ -500,19 +487,6 @@
4639     __changed; \
4640     })
4641    
4642     -#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
4643     -#define ptep_clear_flush_dirty(vma, address, ptep) \
4644     -({ \
4645     - pte_t __pte = *(ptep); \
4646     - int __dirty = pte_dirty(__pte); \
4647     - __pte = pte_mkclean(__pte); \
4648     - if ((vma)->vm_mm->context.pinned) \
4649     - (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
4650     - else if (__dirty) \
4651     - set_pte(ptep, __pte); \
4652     - __dirty; \
4653     -})
4654     -
4655     #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
4656     #define ptep_clear_flush_young(vma, address, ptep) \
4657     ({ \
4658     @@ -570,6 +544,8 @@
4659     #define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
4660     xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
4661    
4662     +pte_t *lookup_address(unsigned long addr);
4663     +
4664     #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
4665     direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
4666    
4667     @@ -587,7 +563,6 @@
4668     (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
4669    
4670     #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
4671     -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
4672     #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
4673     #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
4674     #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
4675     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/processor_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/processor_32.h
4676     --- linux-2.6.25/include/asm-x86/mach-xen/asm/processor_32.h 2008-05-23 18:33:27.000000000 +0200
4677     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/processor_32.h 2008-05-23 18:27:40.000000000 +0200
4678     @@ -89,7 +89,6 @@
4679     #define X86_VENDOR_UMC 3
4680     #define X86_VENDOR_NEXGEN 4
4681     #define X86_VENDOR_CENTAUR 5
4682     -#define X86_VENDOR_RISE 6
4683     #define X86_VENDOR_TRANSMETA 7
4684     #define X86_VENDOR_NSC 8
4685     #define X86_VENDOR_NUM 9
4686     @@ -122,6 +121,7 @@
4687     extern void identify_boot_cpu(void);
4688     extern void identify_secondary_cpu(struct cpuinfo_x86 *);
4689     extern void print_cpu_info(struct cpuinfo_x86 *);
4690     +extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
4691     extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
4692     extern unsigned short num_cache_leaves;
4693    
4694     @@ -171,17 +171,6 @@
4695     write_cr4(cr4);
4696     }
4697    
4698     -/*
4699     - * NSC/Cyrix CPU indexed register access macros
4700     - */
4701     -
4702     -#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); })
4703     -
4704     -#define setCx86(reg, data) do { \
4705     - outb((reg), 0x22); \
4706     - outb((data), 0x23); \
4707     -} while (0)
4708     -
4709     /* Stop speculative execution */
4710     static inline void sync_core(void)
4711     {
4712     @@ -230,6 +219,10 @@
4713    
4714     #define HAVE_ARCH_PICK_MMAP_LAYOUT
4715    
4716     +extern void hard_disable_TSC(void);
4717     +extern void disable_TSC(void);
4718     +extern void hard_enable_TSC(void);
4719     +
4720     /*
4721     * Size of io_bitmap.
4722     */
4723     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/processor_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/processor_64.h
4724     --- linux-2.6.25/include/asm-x86/mach-xen/asm/processor_64.h 2008-05-23 18:33:27.000000000 +0200
4725     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/processor_64.h 2008-05-23 18:27:40.000000000 +0200
4726     @@ -83,7 +83,6 @@
4727     #define X86_VENDOR_UMC 3
4728     #define X86_VENDOR_NEXGEN 4
4729     #define X86_VENDOR_CENTAUR 5
4730     -#define X86_VENDOR_RISE 6
4731     #define X86_VENDOR_TRANSMETA 7
4732     #define X86_VENDOR_NUM 8
4733     #define X86_VENDOR_UNKNOWN 0xff
4734     @@ -100,6 +99,7 @@
4735    
4736     extern void identify_cpu(struct cpuinfo_x86 *);
4737     extern void print_cpu_info(struct cpuinfo_x86 *);
4738     +extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
4739     extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
4740     extern unsigned short num_cache_leaves;
4741    
4742     @@ -377,12 +377,10 @@
4743     asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory");
4744     }
4745    
4746     -#define cpu_has_fpu 1
4747     -
4748     #define ARCH_HAS_PREFETCH
4749     static inline void prefetch(void *x)
4750     {
4751     - asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
4752     + asm volatile("prefetcht0 (%0)" :: "r" (x));
4753     }
4754    
4755     #define ARCH_HAS_PREFETCHW 1
4756     @@ -400,17 +398,6 @@
4757    
4758     #define cpu_relax() rep_nop()
4759    
4760     -/*
4761     - * NSC/Cyrix CPU indexed register access macros
4762     - */
4763     -
4764     -#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); })
4765     -
4766     -#define setCx86(reg, data) do { \
4767     - outb((reg), 0x22); \
4768     - outb((data), 0x23); \
4769     -} while (0)
4770     -
4771     static inline void serialize_cpu(void)
4772     {
4773     __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx");
4774     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/system_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/system_32.h
4775     --- linux-2.6.25/include/asm-x86/mach-xen/asm/system_32.h 2008-05-23 18:33:27.000000000 +0200
4776     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/system_32.h 2008-05-23 18:27:40.000000000 +0200
4777     @@ -205,11 +205,6 @@
4778     */
4779    
4780    
4781     -/*
4782     - * Actually only lfence would be needed for mb() because all stores done
4783     - * by the kernel should be already ordered. But keep a full barrier for now.
4784     - */
4785     -
4786     #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
4787     #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
4788    
4789     @@ -301,15 +296,6 @@
4790     extern int es7000_plat;
4791     void cpu_idle_wait(void);
4792    
4793     -/*
4794     - * On SMP systems, when the scheduler does migration-cost autodetection,
4795     - * it needs a way to flush as much of the CPU's caches as possible:
4796     - */
4797     -static inline void sched_cacheflush(void)
4798     -{
4799     - wbinvd();
4800     -}
4801     -
4802     extern unsigned long arch_align_stack(unsigned long sp);
4803     extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
4804    
4805     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/system_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/system_64.h
4806     --- linux-2.6.25/include/asm-x86/mach-xen/asm/system_64.h 2008-05-23 18:33:27.000000000 +0200
4807     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/system_64.h 2008-05-23 18:27:40.000000000 +0200
4808     @@ -79,12 +79,16 @@
4809     unsigned long cr0;
4810     asm volatile("movq %%cr0,%0" : "=r" (cr0));
4811     return cr0;
4812     -}
4813     +}
4814    
4815     static inline void write_cr0(unsigned long val)
4816     {
4817     asm volatile("movq %0,%%cr0" :: "r" (val));
4818     -}
4819     +}
4820     +
4821     +#define read_cr2() current_vcpu_info()->arch.cr2
4822     +
4823     +#define write_cr2(val) ((void)(current_vcpu_info()->arch.cr2 = (val)))
4824    
4825     #define read_cr3() ({ \
4826     unsigned long __dummy; \
4827     @@ -103,27 +107,28 @@
4828     unsigned long cr4;
4829     asm("movq %%cr4,%0" : "=r" (cr4));
4830     return cr4;
4831     -}
4832     +}
4833    
4834     static inline void write_cr4(unsigned long val)
4835     {
4836     asm volatile("movq %0,%%cr4" :: "r" (val) : "memory");
4837     -}
4838     -
4839     -#define stts() (HYPERVISOR_fpu_taskswitch(1))
4840     +}
4841    
4842     -#define wbinvd() \
4843     - __asm__ __volatile__ ("wbinvd": : :"memory");
4844     +static inline unsigned long read_cr8(void)
4845     +{
4846     + return 0;
4847     +}
4848    
4849     -/*
4850     - * On SMP systems, when the scheduler does migration-cost autodetection,
4851     - * it needs a way to flush as much of the CPU's caches as possible.
4852     - */
4853     -static inline void sched_cacheflush(void)
4854     +static inline void write_cr8(unsigned long val)
4855     {
4856     - wbinvd();
4857     + BUG_ON(val);
4858     }
4859    
4860     +#define stts() (HYPERVISOR_fpu_taskswitch(1))
4861     +
4862     +#define wbinvd() \
4863     + __asm__ __volatile__ ("wbinvd": : :"memory")
4864     +
4865     #endif /* __KERNEL__ */
4866    
4867     #define nop() __asm__ __volatile__ ("nop")
4868     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/tlbflush_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/tlbflush_32.h
4869     --- linux-2.6.25/include/asm-x86/mach-xen/asm/tlbflush_32.h 2008-05-23 18:33:27.000000000 +0200
4870     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/tlbflush_32.h 2008-05-23 18:27:40.000000000 +0200
4871     @@ -91,7 +91,11 @@
4872     DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
4873     #endif /* SMP */
4874    
4875     -#define flush_tlb_kernel_range(start, end) flush_tlb_all()
4876     +static inline void flush_tlb_kernel_range(unsigned long start,
4877     + unsigned long end)
4878     +{
4879     + flush_tlb_all();
4880     +}
4881    
4882     static inline void flush_tlb_pgtables(struct mm_struct *mm,
4883     unsigned long start, unsigned long end)
4884     diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/tlbflush_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/tlbflush_64.h
4885     --- linux-2.6.25/include/asm-x86/mach-xen/asm/tlbflush_64.h 2008-05-23 18:33:27.000000000 +0200
4886     +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/tlbflush_64.h 2008-05-23 18:27:40.000000000 +0200
4887     @@ -89,7 +89,11 @@
4888    
4889     #endif
4890    
4891     -#define flush_tlb_kernel_range(start, end) flush_tlb_all()
4892     +static inline void flush_tlb_kernel_range(unsigned long start,
4893     + unsigned long end)
4894     +{
4895     + flush_tlb_all();
4896     +}
4897    
4898     static inline void flush_tlb_pgtables(struct mm_struct *mm,
4899     unsigned long start, unsigned long end)
4900     diff -Naur linux-2.6.25/include/asm-x86/thread_info_32.h linux-2.6.25-xen/include/asm-x86/thread_info_32.h
4901     --- linux-2.6.25/include/asm-x86/thread_info_32.h 2008-05-23 18:33:22.000000000 +0200
4902     +++ linux-2.6.25-xen/include/asm-x86/thread_info_32.h 2008-05-23 18:27:40.000000000 +0200
4903     @@ -177,7 +177,8 @@
4904     #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
4905     #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG)
4906     #else
4907     -#define _TIF_WORK_CTXSW _TIF_DEBUG
4908     +#define _TIF_WORK_CTXSW_NEXT (_TIF_NOTSC | _TIF_DEBUG)
4909     +#define _TIF_WORK_CTXSW_PREV (_TIF_NOTSC)
4910     #endif
4911    
4912    
4913     diff -Naur linux-2.6.25/include/asm-x86/xen/interface.h linux-2.6.25-xen/include/asm-x86/xen/interface.h
4914     --- linux-2.6.25/include/asm-x86/xen/interface.h 2008-04-17 04:49:44.000000000 +0200
4915     +++ linux-2.6.25-xen/include/asm-x86/xen/interface.h 2008-05-23 18:27:40.000000000 +0200
4916     @@ -10,27 +10,21 @@
4917     #define __XEN_PUBLIC_ARCH_X86_32_H__
4918    
4919     #ifdef __XEN__
4920     -#define __DEFINE_GUEST_HANDLE(name, type) \
4921     +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \
4922     typedef struct { type *p; } __guest_handle_ ## name
4923     #else
4924     -#define __DEFINE_GUEST_HANDLE(name, type) \
4925     +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \
4926     typedef type * __guest_handle_ ## name
4927     #endif
4928    
4929     -#define DEFINE_GUEST_HANDLE_STRUCT(name) \
4930     - __DEFINE_GUEST_HANDLE(name, struct name)
4931     -#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name)
4932     -#define GUEST_HANDLE(name) __guest_handle_ ## name
4933     +#define DEFINE_XEN_GUEST_HANDLE_STRUCT(name) \
4934     + __DEFINE_XEN_GUEST_HANDLE(name, struct name)
4935     +#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name)
4936     +#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name
4937    
4938     #ifndef __ASSEMBLY__
4939     -/* Guest handles for primitive C types. */
4940     -__DEFINE_GUEST_HANDLE(uchar, unsigned char);
4941     -__DEFINE_GUEST_HANDLE(uint, unsigned int);
4942     -__DEFINE_GUEST_HANDLE(ulong, unsigned long);
4943     -DEFINE_GUEST_HANDLE(char);
4944     -DEFINE_GUEST_HANDLE(int);
4945     -DEFINE_GUEST_HANDLE(long);
4946     -DEFINE_GUEST_HANDLE(void);
4947     +typedef unsigned long xen_pfn_t;
4948     +typedef unsigned long xen_ulong_t;
4949     #endif
4950    
4951     /*
4952     @@ -105,7 +99,7 @@
4953     uint16_t cs; /* code selector */
4954     unsigned long address; /* code offset */
4955     };
4956     -DEFINE_GUEST_HANDLE_STRUCT(trap_info);
4957     +DEFINE_XEN_GUEST_HANDLE_STRUCT(trap_info);
4958    
4959     struct cpu_user_regs {
4960     uint32_t ebx;
4961     @@ -129,7 +123,7 @@
4962     uint16_t fs, _pad4;
4963     uint16_t gs, _pad5;
4964     };
4965     -DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs);
4966     +DEFINE_XEN_GUEST_HANDLE_STRUCT(cpu_user_regs);
4967    
4968     typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
4969    
4970     @@ -157,7 +151,7 @@
4971     unsigned long failsafe_callback_eip;
4972     unsigned long vm_assist; /* VMASST_TYPE_* bitmap */
4973     };
4974     -DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);
4975     +DEFINE_XEN_GUEST_HANDLE_STRUCT(vcpu_guest_context);
4976    
4977     struct arch_shared_info {
4978     unsigned long max_pfn; /* max pfn that appears in table */
4979     diff -Naur linux-2.6.25/include/linux/elfnote.h linux-2.6.25-xen/include/linux/elfnote.h
4980     --- linux-2.6.25/include/linux/elfnote.h 2008-04-17 04:49:44.000000000 +0200
4981     +++ linux-2.6.25-xen/include/linux/elfnote.h 2008-05-23 18:27:40.000000000 +0200
4982     @@ -52,7 +52,7 @@
4983     4484:.balign 4 ; \
4984     .popsection ;
4985    
4986     -#define ELFNOTE(name, type, desc) \
4987     +#define ELFNOTE(name, type, desc...) \
4988     ELFNOTE_START(name, type, "") \
4989     desc ; \
4990     ELFNOTE_END
4991     diff -Naur linux-2.6.25/include/linux/page-flags.h linux-2.6.25-xen/include/linux/page-flags.h
4992     --- linux-2.6.25/include/linux/page-flags.h 2008-05-23 18:33:11.000000000 +0200
4993     +++ linux-2.6.25-xen/include/linux/page-flags.h 2008-05-23 18:27:40.000000000 +0200
4994     @@ -83,6 +83,11 @@
4995     #define PG_private 11 /* If pagecache, has fs-private data */
4996    
4997     #define PG_writeback 12 /* Page is under writeback */
4998     +#ifdef CONFIG_XEN
4999     +/* Cannot alias with PG_owner_priv_1 since bag_page() checks include this bit.
5000     + * Also cannot use PG_arch_1 since that now has a different purpose on x86. */
5001     +#define PG_pinned 13
5002     +#endif
5003     #define PG_compound 14 /* Part of a compound page */
5004     #define PG_swapcache 15 /* Swap page: swp_entry_t in private */
5005    
5006     @@ -96,7 +101,9 @@
5007    
5008     /* PG_owner_priv_1 users should have descriptive aliases */
5009     #define PG_checked PG_owner_priv_1 /* Used by some filesystems */
5010     +#ifdef CONFIG_PARAVIRT_XEN
5011     #define PG_pinned PG_owner_priv_1 /* Xen pinned pagetable */
5012     +#endif
5013    
5014     #if (BITS_PER_LONG > 32)
5015     /*
5016     diff -Naur linux-2.6.25/include/linux/skbuff.h linux-2.6.25-xen/include/linux/skbuff.h
5017     --- linux-2.6.25/include/linux/skbuff.h 2008-05-23 18:33:24.000000000 +0200
5018     +++ linux-2.6.25-xen/include/linux/skbuff.h 2008-05-23 18:27:40.000000000 +0200
5019     @@ -1822,7 +1822,7 @@
5020    
5021     bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
5022    
5023     -#ifdef CONFIG_XEN
5024     +#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN)
5025     int skb_checksum_setup(struct sk_buff *skb);
5026     #else
5027     static inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
5028     diff -Naur linux-2.6.25/include/xen/driver_util.h linux-2.6.25-xen/include/xen/driver_util.h
5029     --- linux-2.6.25/include/xen/driver_util.h 2008-05-23 18:32:29.000000000 +0200
5030     +++ linux-2.6.25-xen/include/xen/driver_util.h 2008-05-23 18:27:40.000000000 +0200
5031     @@ -5,10 +5,6 @@
5032     #include <linux/vmalloc.h>
5033     #include <linux/device.h>
5034    
5035     -/* Allocate/destroy a 'vmalloc' VM area. */
5036     -extern struct vm_struct *alloc_vm_area(unsigned long size);
5037     -extern void free_vm_area(struct vm_struct *area);
5038     -
5039     extern struct class *get_xen_class(void);
5040    
5041     #endif /* __ASM_XEN_DRIVER_UTIL_H__ */
5042     diff -Naur linux-2.6.25/include/xen/features.h linux-2.6.25-xen/include/xen/features.h
5043     --- linux-2.6.25/include/xen/features.h 2008-04-17 04:49:44.000000000 +0200
5044     +++ linux-2.6.25-xen/include/xen/features.h 2008-05-23 18:27:40.000000000 +0200
5045     @@ -10,6 +10,7 @@
5046     #define __XEN_FEATURES_H__
5047    
5048     #include <xen/interface/features.h>
5049     +#include <xen/interface/version.h>
5050    
5051     void xen_setup_features(void);
5052    
5053     @@ -20,4 +21,4 @@
5054     return xen_features[flag];
5055     }
5056    
5057     -#endif /* __ASM_XEN_FEATURES_H__ */
5058     +#endif /* __XEN_FEATURES_H__ */
5059     diff -Naur linux-2.6.25/include/xen/interface/arch-x86/xen.h linux-2.6.25-xen/include/xen/interface/arch-x86/xen.h
5060     --- linux-2.6.25/include/xen/interface/arch-x86/xen.h 2008-05-23 18:32:37.000000000 +0200
5061     +++ linux-2.6.25-xen/include/xen/interface/arch-x86/xen.h 2008-05-23 18:27:40.000000000 +0200
5062     @@ -49,6 +49,9 @@
5063     #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0)
5064     #endif
5065    
5066     +/* Allow co-existing Linux 2.6.23+ Xen interface definitions. */
5067     +#define DEFINE_XEN_GUEST_HANDLE_STRUCT(name) struct name
5068     +
5069     #if defined(__i386__)
5070     #include "xen-x86_32.h"
5071     #elif defined(__x86_64__)
5072     diff -Naur linux-2.6.25/include/xen/interface/event_channel.h linux-2.6.25-xen/include/xen/interface/event_channel.h
5073     --- linux-2.6.25/include/xen/interface/event_channel.h 2008-05-23 18:33:03.000000000 +0200
5074     +++ linux-2.6.25-xen/include/xen/interface/event_channel.h 2008-05-23 18:27:40.000000000 +0200
5075     @@ -248,6 +248,7 @@
5076     struct evtchn_unmask unmask;
5077     } u;
5078     };
5079     +DEFINE_XEN_GUEST_HANDLE_STRUCT(evtchn_op);
5080     typedef struct evtchn_op evtchn_op_t;
5081     DEFINE_XEN_GUEST_HANDLE(evtchn_op_t);
5082    
5083     diff -Naur linux-2.6.25/include/xen/interface/io/netif.h linux-2.6.25-xen/include/xen/interface/io/netif.h
5084     --- linux-2.6.25/include/xen/interface/io/netif.h 2008-05-23 18:33:03.000000000 +0200
5085     +++ linux-2.6.25-xen/include/xen/interface/io/netif.h 2008-05-23 18:27:40.000000000 +0200
5086     @@ -183,8 +183,22 @@
5087     * Generate netif ring structures and types.
5088     */
5089    
5090     +#if defined(CONFIG_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
5091     DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response);
5092     DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response);
5093     +#else
5094     +#define xen_netif_tx_request netif_tx_request
5095     +#define xen_netif_rx_request netif_rx_request
5096     +#define xen_netif_tx_response netif_tx_response
5097     +#define xen_netif_rx_response netif_rx_response
5098     +DEFINE_RING_TYPES(xen_netif_tx,
5099     + struct xen_netif_tx_request,
5100     + struct xen_netif_tx_response);
5101     +DEFINE_RING_TYPES(xen_netif_rx,
5102     + struct xen_netif_rx_request,
5103     + struct xen_netif_rx_response);
5104     +#define xen_netif_extra_info netif_extra_info
5105     +#endif
5106    
5107     #define NETIF_RSP_DROPPED -2
5108     #define NETIF_RSP_ERROR -1
5109     diff -Naur linux-2.6.25/include/xen/interface/memory.h linux-2.6.25-xen/include/xen/interface/memory.h
5110     --- linux-2.6.25/include/xen/interface/memory.h 2008-05-23 18:33:03.000000000 +0200
5111     +++ linux-2.6.25-xen/include/xen/interface/memory.h 2008-05-23 18:27:40.000000000 +0200
5112     @@ -67,6 +67,7 @@
5113     */
5114     domid_t domid;
5115     };
5116     +DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_memory_reservation);
5117     typedef struct xen_memory_reservation xen_memory_reservation_t;
5118     DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);
5119    
5120     @@ -160,6 +161,7 @@
5121     */
5122     unsigned int nr_extents;
5123     };
5124     +DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
5125     typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t;
5126     DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);
5127    
5128     @@ -198,6 +200,7 @@
5129     /* GPFN where the source mapping page should appear. */
5130     xen_pfn_t gpfn;
5131     };
5132     +DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
5133     typedef struct xen_add_to_physmap xen_add_to_physmap_t;
5134     DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
5135    
5136     @@ -222,6 +225,7 @@
5137     */
5138     XEN_GUEST_HANDLE(xen_pfn_t) mfn_list;
5139     };
5140     +DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
5141     typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t;
5142     DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t);
5143    
5144     diff -Naur linux-2.6.25/include/xen/interface/sched.h linux-2.6.25-xen/include/xen/interface/sched.h
5145     --- linux-2.6.25/include/xen/interface/sched.h 2008-05-23 18:33:03.000000000 +0200
5146     +++ linux-2.6.25-xen/include/xen/interface/sched.h 2008-05-23 18:27:40.000000000 +0200
5147     @@ -67,6 +67,7 @@
5148     struct sched_shutdown {
5149     unsigned int reason; /* SHUTDOWN_* */
5150     };
5151     +DEFINE_XEN_GUEST_HANDLE_STRUCT(sched_shutdown);
5152     typedef struct sched_shutdown sched_shutdown_t;
5153     DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t);
5154    
5155     @@ -81,6 +82,7 @@
5156     unsigned int nr_ports;
5157     uint64_t timeout;
5158     };
5159     +DEFINE_XEN_GUEST_HANDLE_STRUCT(sched_poll);
5160     typedef struct sched_poll sched_poll_t;
5161     DEFINE_XEN_GUEST_HANDLE(sched_poll_t);
5162    
5163     diff -Naur linux-2.6.25/include/xen/interface/version.h linux-2.6.25-xen/include/xen/interface/version.h
5164     --- linux-2.6.25/include/xen/interface/version.h 2008-05-23 18:33:03.000000000 +0200
5165     +++ linux-2.6.25-xen/include/xen/interface/version.h 2008-05-23 18:27:40.000000000 +0200
5166     @@ -36,6 +36,9 @@
5167     /* arg == xen_extraversion_t. */
5168     #define XENVER_extraversion 1
5169     typedef char xen_extraversion_t[16];
5170     +struct xen_extraversion {
5171     + xen_extraversion_t extraversion;
5172     +};
5173     #define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t))
5174    
5175     /* arg == xen_compile_info_t. */
5176     @@ -50,10 +53,16 @@
5177    
5178     #define XENVER_capabilities 3
5179     typedef char xen_capabilities_info_t[1024];
5180     +struct xen_capabilities_info {
5181     + xen_capabilities_info_t info;
5182     +};
5183     #define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t))
5184    
5185     #define XENVER_changeset 4
5186     typedef char xen_changeset_info_t[64];
5187     +struct xen_changeset_info {
5188     + xen_changeset_info_t info;
5189     +};
5190     #define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t))
5191    
5192     #define XENVER_platform_parameters 5
5193     diff -Naur linux-2.6.25/include/xen/interface/xen.h linux-2.6.25-xen/include/xen/interface/xen.h
5194     --- linux-2.6.25/include/xen/interface/xen.h 2008-05-23 18:33:03.000000000 +0200
5195     +++ linux-2.6.25-xen/include/xen/interface/xen.h 2008-05-23 18:27:40.000000000 +0200
5196     @@ -29,7 +29,9 @@
5197    
5198     #include "xen-compat.h"
5199    
5200     -#if defined(__i386__) || defined(__x86_64__)
5201     +#if defined(CONFIG_PARAVIRT_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H)
5202     +#include <asm/xen/interface.h>
5203     +#elif defined(__i386__) || defined(__x86_64__)
5204     #include "arch-x86/xen.h"
5205     #elif defined(__ia64__)
5206     #include "arch-ia64.h"
5207     @@ -109,7 +111,7 @@
5208     */
5209    
5210     /* New sched_op hypercall introduced in 0x00030101. */
5211     -#if __XEN_INTERFACE_VERSION__ < 0x00030101
5212     +#if __XEN_INTERFACE_VERSION__ < 0x00030101 || (defined(CONFIG_PARAVIRT_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H))
5213     #undef __HYPERVISOR_sched_op
5214     #define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat
5215     #endif
5216     @@ -123,7 +125,7 @@
5217     #endif
5218    
5219     /* New platform_op hypercall introduced in 0x00030204. */
5220     -#if __XEN_INTERFACE_VERSION__ < 0x00030204
5221     +#if __XEN_INTERFACE_VERSION__ < 0x00030204 || (defined(CONFIG_PARAVIRT_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H))
5222     #define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op
5223     #endif
5224    
5225     @@ -270,6 +272,7 @@
5226     #endif
5227     } arg2;
5228     };
5229     +DEFINE_XEN_GUEST_HANDLE_STRUCT(mmuext_op);
5230     typedef struct mmuext_op mmuext_op_t;
5231     DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
5232     #endif
5233     @@ -352,6 +355,7 @@
5234     uint64_t ptr; /* Machine address of PTE. */
5235     uint64_t val; /* New contents of PTE. */
5236     };
5237     +DEFINE_XEN_GUEST_HANDLE_STRUCT(mmu_update);
5238     typedef struct mmu_update mmu_update_t;
5239     DEFINE_XEN_GUEST_HANDLE(mmu_update_t);
5240    
5241     @@ -360,9 +364,15 @@
5242     * NB. The fields are natural register size for this architecture.
5243     */
5244     struct multicall_entry {
5245     - unsigned long op, result;
5246     + unsigned long op;
5247     +#if !defined(CONFIG_PARAVIRT_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
5248     + unsigned long result;
5249     +#else
5250     + long result;
5251     +#endif
5252     unsigned long args[6];
5253     };
5254     +DEFINE_XEN_GUEST_HANDLE_STRUCT(multicall_entry);
5255     typedef struct multicall_entry multicall_entry_t;
5256     DEFINE_XEN_GUEST_HANDLE(multicall_entry_t);
5257    
5258     diff -Naur linux-2.6.25/include/xen/xenbus.h linux-2.6.25-xen/include/xen/xenbus.h
5259     --- linux-2.6.25/include/xen/xenbus.h 2008-05-23 18:33:27.000000000 +0200
5260     +++ linux-2.6.25-xen/include/xen/xenbus.h 2008-05-23 18:27:40.000000000 +0200
5261     @@ -57,16 +57,20 @@
5262     void (*callback)(struct xenbus_watch *,
5263     const char **vec, unsigned int len);
5264    
5265     +#if defined(CONFIG_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
5266     /* See XBWF_ definitions below. */
5267     unsigned long flags;
5268     +#endif
5269     };
5270    
5271     +#if defined(CONFIG_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
5272     /*
5273     * Execute callback in its own kthread. Useful if the callback is long
5274     * running or heavily serialised, to avoid taking out the main xenwatch thread
5275     * for a long period of time (or even unwittingly causing a deadlock).
5276     */
5277     #define XBWF_new_thread 1
5278     +#endif
5279    
5280     /* A xenbus device. */
5281     struct xenbus_device {
5282     @@ -214,6 +218,7 @@
5283     const char **, unsigned int));
5284    
5285    
5286     +#if defined(CONFIG_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
5287     /**
5288     * Register a watch on the given path/path2, using the given xenbus_watch
5289     * structure for storage, and the given callback function as the callback.
5290     @@ -227,7 +232,13 @@
5291     const char *path2, struct xenbus_watch *watch,
5292     void (*callback)(struct xenbus_watch *,
5293     const char **, unsigned int));
5294     -
5295     +#else
5296     +int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
5297     + void (*callback)(struct xenbus_watch *,
5298     + const char **, unsigned int),
5299     + const char *pathfmt, ...)
5300     + __attribute__ ((format (printf, 4, 5)));
5301     +#endif
5302    
5303     /**
5304     * Advertise in the store a change of the given driver to the given new_state.
5305     diff -Naur linux-2.6.25/net/core/dev.c linux-2.6.25-xen/net/core/dev.c
5306     --- linux-2.6.25/net/core/dev.c 2008-05-23 18:33:27.000000000 +0200
5307     +++ linux-2.6.25-xen/net/core/dev.c 2008-05-23 18:27:40.000000000 +0200
5308     @@ -122,7 +122,7 @@
5309    
5310     #include "net-sysfs.h"
5311    
5312     -#ifdef CONFIG_XEN
5313     +#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN)
5314     #include <net/ip.h>
5315     #include <linux/tcp.h>
5316     #include <linux/udp.h>
5317     @@ -1582,42 +1582,54 @@
5318     return 0;
5319     }
5320    
5321     -#ifdef CONFIG_XEN
5322     +#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN)
5323     inline int skb_checksum_setup(struct sk_buff *skb)
5324     {
5325     - if (skb->proto_csum_blank) {
5326     - struct iphdr *iph;
5327     - unsigned char *th;
5328     + struct iphdr *iph;
5329     + unsigned char *th;
5330     + int err = -EPROTO;
5331    
5332     - if (skb->protocol != htons(ETH_P_IP))
5333     - goto out;
5334     - iph = ip_hdr(skb);
5335     - th = skb_network_header(skb) + 4 * iph->ihl;
5336     - if (th >= skb_tail_pointer(skb))
5337     - goto out;
5338     - skb->csum_start = th - skb->head;
5339     - switch (iph->protocol) {
5340     - case IPPROTO_TCP:
5341     - skb->csum_offset = offsetof(struct tcphdr, check);
5342     - break;
5343     - case IPPROTO_UDP:
5344     - skb->csum_offset = offsetof(struct udphdr, check);
5345     - break;
5346     - default:
5347     - if (net_ratelimit())
5348     - printk(KERN_ERR "Attempting to checksum a non-"
5349     - "TCP/UDP packet, dropping a protocol"
5350     - " %d packet", iph->protocol);
5351     - goto out;
5352     - }
5353     - if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
5354     - goto out;
5355     - skb->ip_summed = CHECKSUM_PARTIAL;
5356     - skb->proto_csum_blank = 0;
5357     +#ifdef CONFIG_XEN
5358     + if (!skb->proto_csum_blank)
5359     + return 0;
5360     +#endif
5361     +
5362     + if (skb->protocol != htons(ETH_P_IP))
5363     + goto out;
5364     +
5365     + iph = ip_hdr(skb);
5366     + th = skb_network_header(skb) + 4 * iph->ihl;
5367     + if (th >= skb_tail_pointer(skb))
5368     + goto out;
5369     +
5370     + skb->csum_start = th - skb->head;
5371     + switch (iph->protocol) {
5372     + case IPPROTO_TCP:
5373     + skb->csum_offset = offsetof(struct tcphdr, check);
5374     + break;
5375     + case IPPROTO_UDP:
5376     + skb->csum_offset = offsetof(struct udphdr, check);
5377     + break;
5378     + default:
5379     + if (net_ratelimit())
5380     + printk(KERN_ERR "Attempting to checksum a non-"
5381     + "TCP/UDP packet, dropping a protocol"
5382     + " %d packet", iph->protocol);
5383     + goto out;
5384     }
5385     - return 0;
5386     +
5387     + if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
5388     + goto out;
5389     +
5390     +#ifdef CONFIG_XEN
5391     + skb->ip_summed = CHECKSUM_PARTIAL;
5392     + skb->proto_csum_blank = 0;
5393     +#endif
5394     +
5395     + err = 0;
5396     +
5397     out:
5398     - return -EPROTO;
5399     + return err;
5400     }
5401     EXPORT_SYMBOL(skb_checksum_setup);
5402     #endif