Magellan Linux

Contents of /trunk/kernel26-xen/patches-2.6.25-r1/1024-2.6.25-xen-patch-2.6.23.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 609 - (show annotations) (download)
Fri May 23 17:35:37 2008 UTC (15 years, 11 months ago) by niro
File size: 171857 byte(s)
-using opensuse xen patchset, updated kernel configs

1 diff -Naur linux-2.6.25/arch/x86/ia32/ia32entry-xen.S linux-2.6.25-xen/arch/x86/ia32/ia32entry-xen.S
2 --- linux-2.6.25/arch/x86/ia32/ia32entry-xen.S 2008-05-23 18:33:27.000000000 +0200
3 +++ linux-2.6.25-xen/arch/x86/ia32/ia32entry-xen.S 2008-05-23 18:27:40.000000000 +0200
4 @@ -105,7 +105,7 @@
5 movl $VSYSCALL32_SYSEXIT,8(%rsp)
6 movq %rax,(%rsp)
7 cld
8 - SAVE_ARGS 0,0,0
9 + SAVE_ARGS 0,0,1
10 /* no need to do an access_ok check here because rbp has been
11 32bit zero extended */
12 1: movl (%rbp),%r9d
13 @@ -244,7 +244,7 @@
14 */
15
16 ENTRY(ia32_syscall)
17 - CFI_STARTPROC simple
18 + CFI_STARTPROC32 simple
19 CFI_SIGNAL_FRAME
20 CFI_DEF_CFA rsp,SS+8-RIP+16
21 /*CFI_REL_OFFSET ss,SS-RIP+16*/
22 @@ -280,6 +280,7 @@
23
24 ia32_tracesys:
25 SAVE_REST
26 + CLEAR_RREGS
27 movq $-ENOSYS,RAX(%rsp) /* really needed? */
28 movq %rsp,%rdi /* &pt_regs -> arg1 */
29 call syscall_trace_enter
30 @@ -476,7 +477,7 @@
31 .quad sys_init_module
32 .quad sys_delete_module
33 .quad quiet_ni_syscall /* 130 get_kernel_syms */
34 - .quad sys_quotactl
35 + .quad sys32_quotactl
36 .quad sys_getpgid
37 .quad sys_fchdir
38 .quad quiet_ni_syscall /* bdflush */
39 @@ -669,4 +670,5 @@
40 .quad compat_sys_signalfd
41 .quad compat_sys_timerfd
42 .quad sys_eventfd
43 + .quad sys32_fallocate
44 ia32_syscall_end:
45 diff -Naur linux-2.6.25/arch/x86/kernel/acpi/sleep_32-xen.c linux-2.6.25-xen/arch/x86/kernel/acpi/sleep_32-xen.c
46 --- linux-2.6.25/arch/x86/kernel/acpi/sleep_32-xen.c 2008-05-23 18:32:29.000000000 +0200
47 +++ linux-2.6.25-xen/arch/x86/kernel/acpi/sleep_32-xen.c 2008-05-23 18:27:40.000000000 +0200
48 @@ -15,7 +15,7 @@
49 #ifndef CONFIG_ACPI_PV_SLEEP
50 /* address in low memory of the wakeup routine. */
51 unsigned long acpi_wakeup_address = 0;
52 -unsigned long acpi_video_flags;
53 +unsigned long acpi_realmode_flags;
54 extern char wakeup_start, wakeup_end;
55
56 extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
57 @@ -74,9 +74,11 @@
58 {
59 while ((str != NULL) && (*str != '\0')) {
60 if (strncmp(str, "s3_bios", 7) == 0)
61 - acpi_video_flags = 1;
62 + acpi_realmode_flags |= 1;
63 if (strncmp(str, "s3_mode", 7) == 0)
64 - acpi_video_flags |= 2;
65 + acpi_realmode_flags |= 2;
66 + if (strncmp(str, "s3_beep", 7) == 0)
67 + acpi_realmode_flags |= 4;
68 str = strchr(str, ',');
69 if (str != NULL)
70 str += strspn(str, ", \t");
71 @@ -86,9 +88,11 @@
72
73 __setup("acpi_sleep=", acpi_sleep_setup);
74
75 +/* Ouch, we want to delete this. We already have better version in userspace, in
76 + s2ram from suspend.sf.net project */
77 static __init int reset_videomode_after_s3(struct dmi_system_id *d)
78 {
79 - acpi_video_flags |= 2;
80 + acpi_realmode_flags |= 2;
81 return 0;
82 }
83
84 diff -Naur linux-2.6.25/arch/x86/kernel/acpi/sleep_64-xen.c linux-2.6.25-xen/arch/x86/kernel/acpi/sleep_64-xen.c
85 --- linux-2.6.25/arch/x86/kernel/acpi/sleep_64-xen.c 2008-05-23 18:33:27.000000000 +0200
86 +++ linux-2.6.25-xen/arch/x86/kernel/acpi/sleep_64-xen.c 2008-05-23 18:27:40.000000000 +0200
87 @@ -51,12 +51,10 @@
88 Low-Level Sleep Support
89 -------------------------------------------------------------------------- */
90
91 -#ifdef CONFIG_ACPI_SLEEP
92 -
93 #ifndef CONFIG_ACPI_PV_SLEEP
94 /* address in low memory of the wakeup routine. */
95 unsigned long acpi_wakeup_address = 0;
96 -unsigned long acpi_video_flags;
97 +unsigned long acpi_realmode_flags;
98 extern char wakeup_start, wakeup_end;
99
100 extern unsigned long acpi_copy_wakeup_routine(unsigned long);
101 @@ -109,9 +107,11 @@
102 {
103 while ((str != NULL) && (*str != '\0')) {
104 if (strncmp(str, "s3_bios", 7) == 0)
105 - acpi_video_flags = 1;
106 + acpi_realmode_flags |= 1;
107 if (strncmp(str, "s3_mode", 7) == 0)
108 - acpi_video_flags |= 2;
109 + acpi_realmode_flags |= 2;
110 + if (strncmp(str, "s3_beep", 7) == 0)
111 + acpi_realmode_flags |= 4;
112 str = strchr(str, ',');
113 if (str != NULL)
114 str += strspn(str, ", \t");
115 @@ -123,8 +123,6 @@
116 __setup("acpi_sleep=", acpi_sleep_setup);
117 #endif /* CONFIG_ACPI_PV_SLEEP */
118
119 -#endif /*CONFIG_ACPI_SLEEP */
120 -
121 void acpi_pci_link_exit(void)
122 {
123 }
124 diff -Naur linux-2.6.25/arch/x86/kernel/apic_64-xen.c linux-2.6.25-xen/arch/x86/kernel/apic_64-xen.c
125 --- linux-2.6.25/arch/x86/kernel/apic_64-xen.c 2008-05-23 18:33:27.000000000 +0200
126 +++ linux-2.6.25-xen/arch/x86/kernel/apic_64-xen.c 2008-05-23 18:27:40.000000000 +0200
127 @@ -50,7 +50,7 @@
128 * holds up an irq slot - in excessive cases (when multiple
129 * unexpected vectors occur) that might lock up the APIC
130 * completely.
131 - * But don't ack when the APIC is disabled. -AK
132 + * But don't ack when the APIC is disabled. -AK
133 */
134 if (!disable_apic)
135 ack_APIC_irq();
136 @@ -132,20 +132,6 @@
137 if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
138 ack_APIC_irq();
139
140 -#if 0
141 - static unsigned long last_warning;
142 - static unsigned long skipped;
143 -
144 - /* see sw-dev-man vol 3, chapter 7.4.13.5 */
145 - if (time_before(last_warning+30*HZ,jiffies)) {
146 - printk(KERN_INFO "spurious APIC interrupt on CPU#%d, %ld skipped.\n",
147 - smp_processor_id(), skipped);
148 - last_warning = jiffies;
149 - skipped = 0;
150 - } else {
151 - skipped++;
152 - }
153 -#endif
154 irq_exit();
155 }
156
157 @@ -177,7 +163,7 @@
158 7: Illegal register address
159 */
160 printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
161 - smp_processor_id(), v , v1);
162 + smp_processor_id(), v , v1);
163 irq_exit();
164 }
165
166 diff -Naur linux-2.6.25/arch/x86/kernel/asm-offsets_32.c linux-2.6.25-xen/arch/x86/kernel/asm-offsets_32.c
167 --- linux-2.6.25/arch/x86/kernel/asm-offsets_32.c 2008-05-23 18:33:26.000000000 +0200
168 +++ linux-2.6.25-xen/arch/x86/kernel/asm-offsets_32.c 2008-05-23 18:27:40.000000000 +0200
169 @@ -18,7 +18,9 @@
170 #include <asm/bootparam.h>
171 #include <asm/elf.h>
172
173 +#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN)
174 #include <xen/interface/xen.h>
175 +#endif
176
177 #include <linux/lguest.h>
178 #include "../../../drivers/lguest/lg.h"
179 @@ -133,7 +135,7 @@
180 OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
181 #endif
182
183 -#ifdef CONFIG_XEN
184 +#ifdef CONFIG_PARAVIRT_XEN
185 BLANK();
186 OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
187 OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
188 diff -Naur linux-2.6.25/arch/x86/kernel/cpu/common-xen.c linux-2.6.25-xen/arch/x86/kernel/cpu/common-xen.c
189 --- linux-2.6.25/arch/x86/kernel/cpu/common-xen.c 2008-05-23 18:33:27.000000000 +0200
190 +++ linux-2.6.25-xen/arch/x86/kernel/cpu/common-xen.c 2008-05-23 18:27:40.000000000 +0200
191 @@ -360,6 +360,8 @@
192 if ( xlvl >= 0x80000004 )
193 get_model_name(c); /* Default name */
194 }
195 +
196 + init_scattered_cpuid_features(c);
197 }
198
199 early_intel_workaround(c);
200 @@ -611,7 +613,6 @@
201 extern int amd_init_cpu(void);
202 extern int centaur_init_cpu(void);
203 extern int transmeta_init_cpu(void);
204 -extern int rise_init_cpu(void);
205 extern int nexgen_init_cpu(void);
206 extern int umc_init_cpu(void);
207
208 @@ -623,7 +624,6 @@
209 amd_init_cpu();
210 centaur_init_cpu();
211 transmeta_init_cpu();
212 - rise_init_cpu();
213 nexgen_init_cpu();
214 umc_init_cpu();
215 early_cpu_detect();
216 diff -Naur linux-2.6.25/arch/x86/kernel/cpu/mtrr/main-xen.c linux-2.6.25-xen/arch/x86/kernel/cpu/mtrr/main-xen.c
217 --- linux-2.6.25/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-05-23 18:33:27.000000000 +0200
218 +++ linux-2.6.25-xen/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-05-23 18:27:40.000000000 +0200
219 @@ -167,7 +167,7 @@
220 EXPORT_SYMBOL(mtrr_add);
221 EXPORT_SYMBOL(mtrr_del);
222
223 -__init void mtrr_bp_init(void)
224 +void __init mtrr_bp_init(void)
225 {
226 }
227
228 diff -Naur linux-2.6.25/arch/x86/kernel/e820_32-xen.c linux-2.6.25-xen/arch/x86/kernel/e820_32-xen.c
229 --- linux-2.6.25/arch/x86/kernel/e820_32-xen.c 2008-05-23 18:33:27.000000000 +0200
230 +++ linux-2.6.25-xen/arch/x86/kernel/e820_32-xen.c 2008-05-23 18:27:40.000000000 +0200
231 @@ -10,6 +10,7 @@
232 #include <linux/efi.h>
233 #include <linux/pfn.h>
234 #include <linux/uaccess.h>
235 +#include <linux/suspend.h>
236
237 #include <asm/pgtable.h>
238 #include <asm/page.h>
239 @@ -343,6 +344,37 @@
240
241 subsys_initcall(request_standard_resources);
242
243 +#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
244 +/**
245 + * e820_mark_nosave_regions - Find the ranges of physical addresses that do not
246 + * correspond to e820 RAM areas and mark the corresponding pages as nosave for
247 + * hibernation.
248 + *
249 + * This function requires the e820 map to be sorted and without any
250 + * overlapping entries and assumes the first e820 area to be RAM.
251 + */
252 +void __init e820_mark_nosave_regions(void)
253 +{
254 + int i;
255 + unsigned long pfn;
256 +
257 + pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
258 + for (i = 1; i < e820.nr_map; i++) {
259 + struct e820entry *ei = &e820.map[i];
260 +
261 + if (pfn < PFN_UP(ei->addr))
262 + register_nosave_region(pfn, PFN_UP(ei->addr));
263 +
264 + pfn = PFN_DOWN(ei->addr + ei->size);
265 + if (ei->type != E820_RAM)
266 + register_nosave_region(PFN_UP(ei->addr), pfn);
267 +
268 + if (pfn >= max_low_pfn)
269 + break;
270 + }
271 +}
272 +#endif
273 +
274 void __init add_memory_region(unsigned long long start,
275 unsigned long long size, int type)
276 {
277 @@ -789,7 +821,7 @@
278 case E820_NVS:
279 printk("(ACPI NVS)\n");
280 break;
281 - default: printk("type %lu\n", e820.map[i].type);
282 + default: printk("type %u\n", e820.map[i].type);
283 break;
284 }
285 }
286 diff -Naur linux-2.6.25/arch/x86/kernel/e820_64-xen.c linux-2.6.25-xen/arch/x86/kernel/e820_64-xen.c
287 --- linux-2.6.25/arch/x86/kernel/e820_64-xen.c 2008-05-23 18:33:27.000000000 +0200
288 +++ linux-2.6.25-xen/arch/x86/kernel/e820_64-xen.c 2008-05-23 18:27:40.000000000 +0200
289 @@ -224,37 +224,6 @@
290 }
291
292 /*
293 - * Find the hole size in the range.
294 - */
295 -unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
296 -{
297 - unsigned long ram = 0;
298 - int i;
299 -
300 - for (i = 0; i < e820.nr_map; i++) {
301 - struct e820entry *ei = &e820.map[i];
302 - unsigned long last, addr;
303 -
304 - if (ei->type != E820_RAM ||
305 - ei->addr+ei->size <= start ||
306 - ei->addr >= end)
307 - continue;
308 -
309 - addr = round_up(ei->addr, PAGE_SIZE);
310 - if (addr < start)
311 - addr = start;
312 -
313 - last = round_down(ei->addr + ei->size, PAGE_SIZE);
314 - if (last >= end)
315 - last = end;
316 -
317 - if (last > addr)
318 - ram += last - addr;
319 - }
320 - return ((end - start) - ram);
321 -}
322 -
323 -/*
324 * Mark e820 reserved areas as busy for the resource manager.
325 */
326 void __init e820_reserve_resources(struct e820entry *e820, int nr_map)
327 @@ -327,47 +296,61 @@
328 }
329 #endif
330
331 +/*
332 + * Finds an active region in the address range from start_pfn to end_pfn and
333 + * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
334 + */
335 +static int __init e820_find_active_region(const struct e820entry *ei,
336 + unsigned long start_pfn,
337 + unsigned long end_pfn,
338 + unsigned long *ei_startpfn,
339 + unsigned long *ei_endpfn)
340 +{
341 + *ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
342 + *ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT;
343 +
344 + /* Skip map entries smaller than a page */
345 + if (*ei_startpfn >= *ei_endpfn)
346 + return 0;
347 +
348 + /* Check if end_pfn_map should be updated */
349 + if (ei->type != E820_RAM && *ei_endpfn > end_pfn_map)
350 + end_pfn_map = *ei_endpfn;
351 +
352 + /* Skip if map is outside the node */
353 + if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
354 + *ei_startpfn >= end_pfn)
355 + return 0;
356 +
357 + /* Check for overlaps */
358 + if (*ei_startpfn < start_pfn)
359 + *ei_startpfn = start_pfn;
360 + if (*ei_endpfn > end_pfn)
361 + *ei_endpfn = end_pfn;
362 +
363 + /* Obey end_user_pfn to save on memmap */
364 + if (*ei_startpfn >= end_user_pfn)
365 + return 0;
366 + if (*ei_endpfn > end_user_pfn)
367 + *ei_endpfn = end_user_pfn;
368 +
369 + return 1;
370 +}
371 +
372 /* Walk the e820 map and register active regions within a node */
373 void __init
374 e820_register_active_regions(int nid, unsigned long start_pfn,
375 unsigned long end_pfn)
376 {
377 + unsigned long ei_startpfn;
378 + unsigned long ei_endpfn;
379 int i;
380 - unsigned long ei_startpfn, ei_endpfn;
381 - for (i = 0; i < e820.nr_map; i++) {
382 - struct e820entry *ei = &e820.map[i];
383 - ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
384 - ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE)
385 - >> PAGE_SHIFT;
386 -
387 - /* Skip map entries smaller than a page */
388 - if (ei_startpfn >= ei_endpfn)
389 - continue;
390 -
391 - /* Check if end_pfn_map should be updated */
392 - if (ei->type != E820_RAM && ei_endpfn > end_pfn_map)
393 - end_pfn_map = ei_endpfn;
394 -
395 - /* Skip if map is outside the node */
396 - if (ei->type != E820_RAM ||
397 - ei_endpfn <= start_pfn ||
398 - ei_startpfn >= end_pfn)
399 - continue;
400 -
401 - /* Check for overlaps */
402 - if (ei_startpfn < start_pfn)
403 - ei_startpfn = start_pfn;
404 - if (ei_endpfn > end_pfn)
405 - ei_endpfn = end_pfn;
406 -
407 - /* Obey end_user_pfn to save on memmap */
408 - if (ei_startpfn >= end_user_pfn)
409 - continue;
410 - if (ei_endpfn > end_user_pfn)
411 - ei_endpfn = end_user_pfn;
412
413 - add_active_range(nid, ei_startpfn, ei_endpfn);
414 - }
415 + for (i = 0; i < e820.nr_map; i++)
416 + if (e820_find_active_region(&e820.map[i],
417 + start_pfn, end_pfn,
418 + &ei_startpfn, &ei_endpfn))
419 + add_active_range(nid, ei_startpfn, ei_endpfn);
420 }
421
422 /*
423 @@ -388,12 +371,35 @@
424 e820.nr_map++;
425 }
426
427 +/*
428 + * Find the hole size (in bytes) in the memory range.
429 + * @start: starting address of the memory range to scan
430 + * @end: ending address of the memory range to scan
431 + */
432 +unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
433 +{
434 + unsigned long start_pfn = start >> PAGE_SHIFT;
435 + unsigned long end_pfn = end >> PAGE_SHIFT;
436 + unsigned long ei_startpfn;
437 + unsigned long ei_endpfn;
438 + unsigned long ram = 0;
439 + int i;
440 +
441 + for (i = 0; i < e820.nr_map; i++) {
442 + if (e820_find_active_region(&e820.map[i],
443 + start_pfn, end_pfn,
444 + &ei_startpfn, &ei_endpfn))
445 + ram += ei_endpfn - ei_startpfn;
446 + }
447 + return end - start - (ram << PAGE_SHIFT);
448 +}
449 +
450 void __init e820_print_map(char *who)
451 {
452 int i;
453
454 for (i = 0; i < e820.nr_map; i++) {
455 - printk(" %s: %016Lx - %016Lx ", who,
456 + printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
457 (unsigned long long) e820.map[i].addr,
458 (unsigned long long) (e820.map[i].addr + e820.map[i].size));
459 switch (e820.map[i].type) {
460 diff -Naur linux-2.6.25/arch/x86/kernel/early_printk-xen.c linux-2.6.25-xen/arch/x86/kernel/early_printk-xen.c
461 --- linux-2.6.25/arch/x86/kernel/early_printk-xen.c 2008-05-23 18:33:27.000000000 +0200
462 +++ linux-2.6.25-xen/arch/x86/kernel/early_printk-xen.c 2008-05-23 18:27:40.000000000 +0200
463 @@ -174,6 +174,7 @@
464 * mappings. Someone should fix this for domain 0. For now, use fake serial.
465 */
466 #define early_vga_console early_serial_console
467 +#define xenboot_console early_serial_console
468
469 #endif
470
471 @@ -261,20 +262,22 @@
472 } else if (!strncmp(buf, "ttyS", 4)) {
473 early_serial_init(buf);
474 early_console = &early_serial_console;
475 - } else if (!strncmp(buf, "vga", 3)
476 + } else if (!strncmp(buf, "vga", 3)) {
477 #ifndef CONFIG_XEN
478 && SCREEN_INFO.orig_video_isVGA == 1) {
479 max_xpos = SCREEN_INFO.orig_video_cols;
480 max_ypos = SCREEN_INFO.orig_video_lines;
481 current_ypos = SCREEN_INFO.orig_y;
482 -#else
483 - || !strncmp(buf, "xen", 3)) {
484 #endif
485 early_console = &early_vga_console;
486 } else if (!strncmp(buf, "simnow", 6)) {
487 simnow_init(buf + 6);
488 early_console = &simnow_console;
489 keep_early = 1;
490 +#ifdef CONFIG_XEN
491 + } else if (!strncmp(buf, "xen", 3)) {
492 + early_console = &xenboot_console;
493 +#endif
494 }
495
496 if (keep_early)
497 diff -Naur linux-2.6.25/arch/x86/kernel/entry_32.S linux-2.6.25-xen/arch/x86/kernel/entry_32.S
498 --- linux-2.6.25/arch/x86/kernel/entry_32.S 2008-05-23 18:32:54.000000000 +0200
499 +++ linux-2.6.25-xen/arch/x86/kernel/entry_32.S 2008-05-23 18:27:40.000000000 +0200
500 @@ -1022,7 +1022,7 @@
501 CFI_ENDPROC
502 ENDPROC(kernel_thread_helper)
503
504 -#ifdef CONFIG_XEN
505 +#ifdef CONFIG_PARAVIRT_XEN
506 ENTRY(xen_hypervisor_callback)
507 CFI_STARTPROC
508 pushl $0
509 @@ -1105,7 +1105,7 @@
510 .previous
511 ENDPROC(xen_failsafe_callback)
512
513 -#endif /* CONFIG_XEN */
514 +#endif /* CONFIG_PARAVIRT_XEN */
515
516 .section .rodata,"a"
517 #include "syscall_table_32.S"
518 diff -Naur linux-2.6.25/arch/x86/kernel/entry_32-xen.S linux-2.6.25-xen/arch/x86/kernel/entry_32-xen.S
519 --- linux-2.6.25/arch/x86/kernel/entry_32-xen.S 2008-05-23 18:33:27.000000000 +0200
520 +++ linux-2.6.25-xen/arch/x86/kernel/entry_32-xen.S 2008-05-23 18:27:40.000000000 +0200
521 @@ -452,9 +452,6 @@
522 1: INTERRUPT_RETURN
523 .section .fixup,"ax"
524 iret_exc:
525 -#ifndef CONFIG_XEN
526 - ENABLE_INTERRUPTS(CLBR_NONE)
527 -#endif
528 pushl $0 # no error code
529 pushl $do_iret_error
530 jmp error_code
531 diff -Naur linux-2.6.25/arch/x86/kernel/entry_64-xen.S linux-2.6.25-xen/arch/x86/kernel/entry_64-xen.S
532 --- linux-2.6.25/arch/x86/kernel/entry_64-xen.S 2008-05-23 18:33:27.000000000 +0200
533 +++ linux-2.6.25-xen/arch/x86/kernel/entry_64-xen.S 2008-05-23 18:27:40.000000000 +0200
534 @@ -310,7 +310,7 @@
535 TRACE_IRQS_ON
536 /* sti */
537 XEN_UNBLOCK_EVENTS(%rsi)
538 - testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
539 + testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
540 jz 1f
541
542 /* Really a signal */
543 @@ -409,7 +409,7 @@
544 jmp int_restore_rest
545
546 int_signal:
547 - testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
548 + testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
549 jz 1f
550 movq %rsp,%rdi # &ptregs -> arg1
551 xorl %esi,%esi # oldset -> arg2
552 @@ -552,7 +552,7 @@
553 jmp retint_check
554
555 retint_signal:
556 - testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
557 + testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
558 jz retint_restore_args
559 TRACE_IRQS_ON
560 XEN_UNBLOCK_EVENTS(%rsi)
561 diff -Naur linux-2.6.25/arch/x86/kernel/head_32-xen.S linux-2.6.25-xen/arch/x86/kernel/head_32-xen.S
562 --- linux-2.6.25/arch/x86/kernel/head_32-xen.S 2008-05-23 18:33:27.000000000 +0200
563 +++ linux-2.6.25-xen/arch/x86/kernel/head_32-xen.S 2008-05-23 18:27:40.000000000 +0200
564 @@ -86,7 +86,10 @@
565 /*
566 * BSS section
567 */
568 -.section ".bss.page_aligned","w"
569 +.section ".bss.page_aligned","wa"
570 + .align PAGE_SIZE_asm
571 +ENTRY(swapper_pg_pmd)
572 + .fill 1024,4,0
573 ENTRY(empty_zero_page)
574 .fill 4096,1,0
575
576 @@ -136,25 +139,25 @@
577 #endif /* CONFIG_XEN_COMPAT <= 0x030002 */
578
579
580 - ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "linux")
581 - ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "2.6")
582 - ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0")
583 - ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long, __PAGE_OFFSET)
584 + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
585 + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6")
586 + ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0")
587 + ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long __PAGE_OFFSET)
588 #if CONFIG_XEN_COMPAT <= 0x030002
589 - ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, __PAGE_OFFSET)
590 + ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long __PAGE_OFFSET)
591 #else
592 - ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, 0)
593 + ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long 0)
594 #endif
595 - ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long, startup_32)
596 - ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long, hypercall_page)
597 - ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long, HYPERVISOR_VIRT_START)
598 - ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
599 + ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_32)
600 + ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page)
601 + ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long HYPERVISOR_VIRT_START)
602 + ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
603 #ifdef CONFIG_X86_PAE
604 - ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes")
605 - ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad, _PAGE_PRESENT,_PAGE_PRESENT)
606 + ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
607 + ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad _PAGE_PRESENT, _PAGE_PRESENT)
608 #else
609 - ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "no")
610 - ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long, _PAGE_PRESENT,_PAGE_PRESENT)
611 + ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no")
612 + ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long _PAGE_PRESENT, _PAGE_PRESENT)
613 #endif
614 - ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic")
615 - ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 1)
616 + ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
617 + ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
618 diff -Naur linux-2.6.25/arch/x86/kernel/head64-xen.c linux-2.6.25-xen/arch/x86/kernel/head64-xen.c
619 --- linux-2.6.25/arch/x86/kernel/head64-xen.c 2008-05-23 18:33:27.000000000 +0200
620 +++ linux-2.6.25-xen/arch/x86/kernel/head64-xen.c 2008-05-23 18:27:40.000000000 +0200
621 @@ -87,7 +87,7 @@
622 unsigned long machine_to_phys_nr_ents;
623 int i;
624
625 - setup_xen_features();
626 + xen_setup_features();
627
628 xen_start_info = (struct start_info *)real_mode_data;
629 if (!xen_feature(XENFEAT_auto_translated_physmap))
630 diff -Naur linux-2.6.25/arch/x86/kernel/head_64-xen.S linux-2.6.25-xen/arch/x86/kernel/head_64-xen.S
631 --- linux-2.6.25/arch/x86/kernel/head_64-xen.S 2008-05-23 18:33:27.000000000 +0200
632 +++ linux-2.6.25-xen/arch/x86/kernel/head_64-xen.S 2008-05-23 18:27:40.000000000 +0200
633 @@ -23,7 +23,7 @@
634 #include <asm/dwarf2.h>
635 #include <xen/interface/elfnote.h>
636
637 - .section .bootstrap.text, "ax", @progbits
638 + .section .text.head, "ax", @progbits
639 .code64
640 .globl startup_64
641 startup_64:
642 @@ -47,7 +47,7 @@
643
644 #define NEXT_PAGE(name) \
645 .balign PAGE_SIZE; \
646 - phys_##name = . - .bootstrap.text; \
647 + phys_##name = . - .text.head; \
648 ENTRY(name)
649
650 NEXT_PAGE(init_level4_pgt)
651 @@ -75,6 +75,12 @@
652 NEXT_PAGE(level2_kernel_pgt)
653 .fill 512,8,0
654
655 +NEXT_PAGE(level2_fixmap_pgt)
656 + .fill 512,8,0
657 +
658 +NEXT_PAGE(level1_fixmap_pgt)
659 + .fill 512,8,0
660 +
661 NEXT_PAGE(hypercall_page)
662 CFI_STARTPROC
663 .rept 0x1000 / 0x20
664 @@ -189,18 +195,18 @@
665 .byte 0
666 #endif /* CONFIG_XEN_COMPAT <= 0x030002 */
667
668 - ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "linux")
669 - ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "2.6")
670 - ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0")
671 - ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .quad, __START_KERNEL_map)
672 + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
673 + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6")
674 + ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0")
675 + ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .quad __START_KERNEL_map)
676 #if CONFIG_XEN_COMPAT <= 0x030002
677 - ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad, __START_KERNEL_map)
678 + ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad __START_KERNEL_map)
679 #else
680 - ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad, 0)
681 + ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad 0)
682 #endif
683 - ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .quad, startup_64)
684 - ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .quad, hypercall_page)
685 - ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad, _PAGE_PRESENT,_PAGE_PRESENT)
686 - ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
687 - ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic")
688 - ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 1)
689 + ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .quad startup_64)
690 + ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .quad hypercall_page)
691 + ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .quad _PAGE_PRESENT, _PAGE_PRESENT)
692 + ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
693 + ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
694 + ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
695 diff -Naur linux-2.6.25/arch/x86/kernel/init_task-xen.c linux-2.6.25-xen/arch/x86/kernel/init_task-xen.c
696 --- linux-2.6.25/arch/x86/kernel/init_task-xen.c 2008-05-23 18:32:29.000000000 +0200
697 +++ linux-2.6.25-xen/arch/x86/kernel/init_task-xen.c 2008-05-23 18:27:40.000000000 +0200
698 @@ -46,6 +46,6 @@
699 * per-CPU TSS segments. Threads are completely 'soft' on Linux,
700 * no more per-task TSS's.
701 */
702 -DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS;
703 +DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
704 #endif
705
706 diff -Naur linux-2.6.25/arch/x86/kernel/io_apic_32-xen.c linux-2.6.25-xen/arch/x86/kernel/io_apic_32-xen.c
707 --- linux-2.6.25/arch/x86/kernel/io_apic_32-xen.c 2008-05-23 18:33:27.000000000 +0200
708 +++ linux-2.6.25-xen/arch/x86/kernel/io_apic_32-xen.c 2008-05-23 18:27:40.000000000 +0200
709 @@ -397,14 +397,6 @@
710 # include <linux/slab.h> /* kmalloc() */
711 # include <linux/timer.h> /* time_after() */
712
713 -#ifdef CONFIG_BALANCED_IRQ_DEBUG
714 -# define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
715 -# define Dprintk(x...) do { TDprintk(x); } while (0)
716 -# else
717 -# define TDprintk(x...)
718 -# define Dprintk(x...)
719 -# endif
720 -
721 #define IRQBALANCE_CHECK_ARCH -999
722 #define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
723 #define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
724 @@ -487,7 +479,7 @@
725 static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
726 {
727 int i, j;
728 - Dprintk("Rotating IRQs among CPUs.\n");
729 +
730 for_each_online_cpu(i) {
731 for (j = 0; j < NR_IRQS; j++) {
732 if (!irq_desc[j].action)
733 @@ -604,19 +596,11 @@
734 max_loaded = tmp_loaded; /* processor */
735 imbalance = (max_cpu_irq - min_cpu_irq) / 2;
736
737 - Dprintk("max_loaded cpu = %d\n", max_loaded);
738 - Dprintk("min_loaded cpu = %d\n", min_loaded);
739 - Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
740 - Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
741 - Dprintk("load imbalance = %lu\n", imbalance);
742 -
743 /* if imbalance is less than approx 10% of max load, then
744 * observe diminishing returns action. - quit
745 */
746 - if (imbalance < (max_cpu_irq >> 3)) {
747 - Dprintk("Imbalance too trivial\n");
748 + if (imbalance < (max_cpu_irq >> 3))
749 goto not_worth_the_effort;
750 - }
751
752 tryanotherirq:
753 /* if we select an IRQ to move that can't go where we want, then
754 @@ -673,9 +657,6 @@
755 cpus_and(tmp, target_cpu_mask, allowed_mask);
756
757 if (!cpus_empty(tmp)) {
758 -
759 - Dprintk("irq = %d moved to cpu = %d\n",
760 - selected_irq, min_loaded);
761 /* mark for change destination */
762 set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
763
764 @@ -695,7 +676,6 @@
765 */
766 balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
767 balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
768 - Dprintk("IRQ worth rotating not found\n");
769 return;
770 }
771
772 @@ -711,6 +691,7 @@
773 set_pending_irq(i, cpumask_of_cpu(0));
774 }
775
776 + set_freezable();
777 for ( ; ; ) {
778 time_remaining = schedule_timeout_interruptible(time_remaining);
779 try_to_freeze();
780 @@ -820,14 +801,6 @@
781 static int pirqs_enabled;
782 int skip_ioapic_setup;
783
784 -static int __init ioapic_setup(char *str)
785 -{
786 - skip_ioapic_setup = 1;
787 - return 1;
788 -}
789 -
790 -__setup("noapic", ioapic_setup);
791 -
792 static int __init ioapic_pirq_setup(char *str)
793 {
794 int i, max;
795 @@ -1313,12 +1286,15 @@
796 static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
797 {
798 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
799 - trigger == IOAPIC_LEVEL)
800 + trigger == IOAPIC_LEVEL) {
801 + irq_desc[irq].status |= IRQ_LEVEL;
802 set_irq_chip_and_handler_name(irq, &ioapic_chip,
803 handle_fasteoi_irq, "fasteoi");
804 - else
805 + } else {
806 + irq_desc[irq].status &= ~IRQ_LEVEL;
807 set_irq_chip_and_handler_name(irq, &ioapic_chip,
808 handle_edge_irq, "edge");
809 + }
810 set_intr_gate(vector, interrupt[irq]);
811 }
812 #else
813 @@ -1943,7 +1919,7 @@
814 * - if this function detects that timer IRQs are defunct, then we fall
815 * back to ISA timer IRQs
816 */
817 -int __init timer_irq_works(void)
818 +static int __init timer_irq_works(void)
819 {
820 unsigned long t1 = jiffies;
821
822 diff -Naur linux-2.6.25/arch/x86/kernel/io_apic_64-xen.c linux-2.6.25-xen/arch/x86/kernel/io_apic_64-xen.c
823 --- linux-2.6.25/arch/x86/kernel/io_apic_64-xen.c 2008-05-23 18:33:27.000000000 +0200
824 +++ linux-2.6.25-xen/arch/x86/kernel/io_apic_64-xen.c 2008-05-23 18:27:40.000000000 +0200
825 @@ -164,7 +164,9 @@
826 #endif
827 }
828
829 -#ifndef CONFIG_XEN
830 +#ifdef CONFIG_XEN
831 +#define io_apic_modify io_apic_write
832 +#else
833 /*
834 * Re-write a value: to be used for read-modify-write
835 * cycles where the read already set up the index register.
836 @@ -174,8 +176,32 @@
837 struct io_apic __iomem *io_apic = io_apic_base(apic);
838 writel(value, &io_apic->data);
839 }
840 -#else
841 -#define io_apic_modify io_apic_write
842 +
843 +static int io_apic_level_ack_pending(unsigned int irq)
844 +{
845 + struct irq_pin_list *entry;
846 + unsigned long flags;
847 + int pending = 0;
848 +
849 + spin_lock_irqsave(&ioapic_lock, flags);
850 + entry = irq_2_pin + irq;
851 + for (;;) {
852 + unsigned int reg;
853 + int pin;
854 +
855 + pin = entry->pin;
856 + if (pin == -1)
857 + break;
858 + reg = io_apic_read(entry->apic, 0x10 + pin*2);
859 + /* Is the remote IRR bit set? */
860 + pending |= (reg >> 14) & 1;
861 + if (!entry->next)
862 + break;
863 + entry = irq_2_pin + entry->next;
864 + }
865 + spin_unlock_irqrestore(&ioapic_lock, flags);
866 + return pending;
867 +}
868 #endif
869
870 /*
871 @@ -403,14 +429,12 @@
872 int skip_ioapic_setup;
873 int ioapic_force;
874
875 -/* dummy parsing: see setup.c */
876 -
877 -static int __init disable_ioapic_setup(char *str)
878 +static int __init parse_noapic(char *str)
879 {
880 - skip_ioapic_setup = 1;
881 + disable_ioapic_setup();
882 return 0;
883 }
884 -early_param("noapic", disable_ioapic_setup);
885 +early_param("noapic", parse_noapic);
886
887 /* Actually the next is obsolete, but keep it for paranoid reasons -AK */
888 static int __init disable_timer_pin_setup(char *arg)
889 @@ -758,12 +782,15 @@
890
891 static void ioapic_register_intr(int irq, unsigned long trigger)
892 {
893 - if (trigger)
894 + if (trigger) {
895 + irq_desc[irq].status |= IRQ_LEVEL;
896 set_irq_chip_and_handler_name(irq, &ioapic_chip,
897 handle_fasteoi_irq, "fasteoi");
898 - else
899 + } else {
900 + irq_desc[irq].status &= ~IRQ_LEVEL;
901 set_irq_chip_and_handler_name(irq, &ioapic_chip,
902 handle_edge_irq, "edge");
903 + }
904 }
905 #else
906 #define ioapic_register_intr(irq,trigger) ((void)0)
907 @@ -1412,9 +1439,37 @@
908 ack_APIC_irq();
909
910 /* Now we can move and renable the irq */
911 - move_masked_irq(irq);
912 - if (unlikely(do_unmask_irq))
913 + if (unlikely(do_unmask_irq)) {
914 + /* Only migrate the irq if the ack has been received.
915 + *
916 + * On rare occasions the broadcast level triggered ack gets
917 + * delayed going to ioapics, and if we reprogram the
918 + * vector while Remote IRR is still set the irq will never
919 + * fire again.
920 + *
921 + * To prevent this scenario we read the Remote IRR bit
922 + * of the ioapic. This has two effects.
923 + * - On any sane system the read of the ioapic will
924 + * flush writes (and acks) going to the ioapic from
925 + * this cpu.
926 + * - We get to see if the ACK has actually been delivered.
927 + *
928 + * Based on failed experiments of reprogramming the
929 + * ioapic entry from outside of irq context starting
930 + * with masking the ioapic entry and then polling until
931 + * Remote IRR was clear before reprogramming the
932 + * ioapic I don't trust the Remote IRR bit to be
933 + * completey accurate.
934 + *
935 + * However there appears to be no other way to plug
936 + * this race, so if the Remote IRR bit is not
937 + * accurate and is causing problems then it is a hardware bug
938 + * and you can go talk to the chipset vendor about it.
939 + */
940 + if (!io_apic_level_ack_pending(irq))
941 + move_masked_irq(irq);
942 unmask_IO_APIC_irq(irq);
943 + }
944 }
945
946 static struct irq_chip ioapic_chip __read_mostly = {
947 diff -Naur linux-2.6.25/arch/x86/kernel/irq_32-xen.c linux-2.6.25-xen/arch/x86/kernel/irq_32-xen.c
948 --- linux-2.6.25/arch/x86/kernel/irq_32-xen.c 2008-05-23 18:33:27.000000000 +0200
949 +++ linux-2.6.25-xen/arch/x86/kernel/irq_32-xen.c 2008-05-23 18:27:40.000000000 +0200
950 @@ -21,7 +21,7 @@
951 #include <asm/apic.h>
952 #include <asm/uaccess.h>
953
954 -DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
955 +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
956 EXPORT_PER_CPU_SYMBOL(irq_stat);
957
958 DEFINE_PER_CPU(struct pt_regs *, irq_regs);
959 @@ -149,15 +149,11 @@
960
961 #ifdef CONFIG_4KSTACKS
962
963 -/*
964 - * These should really be __section__(".bss.page_aligned") as well, but
965 - * gcc's 3.0 and earlier don't handle that correctly.
966 - */
967 static char softirq_stack[NR_CPUS * THREAD_SIZE]
968 - __attribute__((__aligned__(THREAD_SIZE)));
969 + __attribute__((__section__(".bss.page_aligned")));
970
971 static char hardirq_stack[NR_CPUS * THREAD_SIZE]
972 - __attribute__((__aligned__(THREAD_SIZE)));
973 + __attribute__((__section__(".bss.page_aligned")));
974
975 /*
976 * allocate per-cpu stacks for hardirq and for softirq processing
977 diff -Naur linux-2.6.25/arch/x86/kernel/ldt_64-xen.c linux-2.6.25-xen/arch/x86/kernel/ldt_64-xen.c
978 --- linux-2.6.25/arch/x86/kernel/ldt_64-xen.c 2008-05-23 18:33:27.000000000 +0200
979 +++ linux-2.6.25-xen/arch/x86/kernel/ldt_64-xen.c 2008-05-23 18:27:40.000000000 +0200
980 @@ -114,6 +114,8 @@
981 memset(&mm->context, 0, sizeof(mm->context));
982 init_MUTEX(&mm->context.sem);
983 old_mm = current->mm;
984 + if (old_mm)
985 + mm->context.vdso = old_mm->context.vdso;
986 if (old_mm && old_mm->context.size > 0) {
987 down(&old_mm->context.sem);
988 retval = copy_ldt(&mm->context, &old_mm->context);
989 diff -Naur linux-2.6.25/arch/x86/kernel/microcode-xen.c linux-2.6.25-xen/arch/x86/kernel/microcode-xen.c
990 --- linux-2.6.25/arch/x86/kernel/microcode-xen.c 2008-05-23 18:33:27.000000000 +0200
991 +++ linux-2.6.25-xen/arch/x86/kernel/microcode-xen.c 2008-05-23 18:27:40.000000000 +0200
992 @@ -33,6 +33,7 @@
993 #include <linux/miscdevice.h>
994 #include <linux/spinlock.h>
995 #include <linux/mm.h>
996 +#include <linux/fs.h>
997 #include <linux/mutex.h>
998 #include <linux/cpu.h>
999 #include <linux/firmware.h>
1000 diff -Naur linux-2.6.25/arch/x86/kernel/mpparse_64-xen.c linux-2.6.25-xen/arch/x86/kernel/mpparse_64-xen.c
1001 --- linux-2.6.25/arch/x86/kernel/mpparse_64-xen.c 2008-05-23 18:33:27.000000000 +0200
1002 +++ linux-2.6.25-xen/arch/x86/kernel/mpparse_64-xen.c 2008-05-23 18:27:40.000000000 +0200
1003 @@ -32,7 +32,6 @@
1004
1005 /* Have we found an MP table */
1006 int smp_found_config;
1007 -unsigned int __initdata maxcpus = NR_CPUS;
1008
1009 /*
1010 * Various Linux-internal data structures created from the
1011 @@ -657,6 +656,20 @@
1012 return -1;
1013 }
1014
1015 +static u8 uniq_ioapic_id(u8 id)
1016 +{
1017 + int i;
1018 + DECLARE_BITMAP(used, 256);
1019 + bitmap_zero(used, 256);
1020 + for (i = 0; i < nr_ioapics; i++) {
1021 + struct mpc_config_ioapic *ia = &mp_ioapics[i];
1022 + __set_bit(ia->mpc_apicid, used);
1023 + }
1024 + if (!test_bit(id, used))
1025 + return id;
1026 + return find_first_zero_bit(used, 256);
1027 +}
1028 +
1029 void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
1030 {
1031 int idx = 0;
1032 @@ -664,7 +677,7 @@
1033 if (bad_ioapic(address))
1034 return;
1035
1036 - idx = nr_ioapics++;
1037 + idx = nr_ioapics;
1038
1039 mp_ioapics[idx].mpc_type = MP_IOAPIC;
1040 mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
1041 @@ -673,7 +686,7 @@
1042 #ifndef CONFIG_XEN
1043 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
1044 #endif
1045 - mp_ioapics[idx].mpc_apicid = id;
1046 + mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
1047 mp_ioapics[idx].mpc_apicver = 0;
1048
1049 /*
1050 @@ -690,6 +703,8 @@
1051 mp_ioapics[idx].mpc_apicaddr,
1052 mp_ioapic_routing[idx].gsi_start,
1053 mp_ioapic_routing[idx].gsi_end);
1054 +
1055 + nr_ioapics++;
1056 }
1057
1058 void __init
1059 diff -Naur linux-2.6.25/arch/x86/kernel/pci-dma_32-xen.c linux-2.6.25-xen/arch/x86/kernel/pci-dma_32-xen.c
1060 --- linux-2.6.25/arch/x86/kernel/pci-dma_32-xen.c 2008-05-23 18:33:27.000000000 +0200
1061 +++ linux-2.6.25-xen/arch/x86/kernel/pci-dma_32-xen.c 2008-05-23 18:27:40.000000000 +0200
1062 @@ -24,7 +24,7 @@
1063 #include <asm/bug.h>
1064
1065 #ifdef __x86_64__
1066 -#include <asm/proto.h>
1067 +#include <asm/iommu.h>
1068
1069 int iommu_merge __read_mostly = 0;
1070 EXPORT_SYMBOL(iommu_merge);
1071 diff -Naur linux-2.6.25/arch/x86/kernel/pci-swiotlb_64-xen.c linux-2.6.25-xen/arch/x86/kernel/pci-swiotlb_64-xen.c
1072 --- linux-2.6.25/arch/x86/kernel/pci-swiotlb_64-xen.c 2008-05-23 18:33:27.000000000 +0200
1073 +++ linux-2.6.25-xen/arch/x86/kernel/pci-swiotlb_64-xen.c 2008-05-23 18:27:40.000000000 +0200
1074 @@ -5,7 +5,7 @@
1075 #include <linux/module.h>
1076 #include <linux/dma-mapping.h>
1077
1078 -#include <asm/proto.h>
1079 +#include <asm/iommu.h>
1080 #include <asm/swiotlb.h>
1081 #include <asm/dma.h>
1082
1083 diff -Naur linux-2.6.25/arch/x86/kernel/process_32-xen.c linux-2.6.25-xen/arch/x86/kernel/process_32-xen.c
1084 --- linux-2.6.25/arch/x86/kernel/process_32-xen.c 2008-05-23 18:33:27.000000000 +0200
1085 +++ linux-2.6.25-xen/arch/x86/kernel/process_32-xen.c 2008-05-23 18:27:40.000000000 +0200
1086 @@ -241,6 +241,7 @@
1087 void show_regs(struct pt_regs * regs)
1088 {
1089 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
1090 + unsigned long d0, d1, d2, d3, d6, d7;
1091
1092 printk("\n");
1093 printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
1094 @@ -265,6 +266,17 @@
1095 cr3 = read_cr3();
1096 cr4 = read_cr4_safe();
1097 printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
1098 +
1099 + get_debugreg(d0, 0);
1100 + get_debugreg(d1, 1);
1101 + get_debugreg(d2, 2);
1102 + get_debugreg(d3, 3);
1103 + printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
1104 + d0, d1, d2, d3);
1105 + get_debugreg(d6, 6);
1106 + get_debugreg(d7, 7);
1107 + printk("DR6: %08lx DR7: %08lx\n", d6, d7);
1108 +
1109 show_trace(NULL, regs, &regs->esp);
1110 }
1111
1112 @@ -473,7 +485,30 @@
1113 return 1;
1114 }
1115
1116 -static noinline void __switch_to_xtra(struct task_struct *next_p)
1117 +#ifdef CONFIG_SECCOMP
1118 +void hard_disable_TSC(void)
1119 +{
1120 + write_cr4(read_cr4() | X86_CR4_TSD);
1121 +}
1122 +void disable_TSC(void)
1123 +{
1124 + preempt_disable();
1125 + if (!test_and_set_thread_flag(TIF_NOTSC))
1126 + /*
1127 + * Must flip the CPU state synchronously with
1128 + * TIF_NOTSC in the current running context.
1129 + */
1130 + hard_disable_TSC();
1131 + preempt_enable();
1132 +}
1133 +void hard_enable_TSC(void)
1134 +{
1135 + write_cr4(read_cr4() & ~X86_CR4_TSD);
1136 +}
1137 +#endif /* CONFIG_SECCOMP */
1138 +
1139 +static noinline void
1140 +__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
1141 {
1142 struct thread_struct *next;
1143
1144 @@ -488,33 +523,17 @@
1145 set_debugreg(next->debugreg[6], 6);
1146 set_debugreg(next->debugreg[7], 7);
1147 }
1148 -}
1149
1150 -/*
1151 - * This function selects if the context switch from prev to next
1152 - * has to tweak the TSC disable bit in the cr4.
1153 - */
1154 -static inline void disable_tsc(struct task_struct *prev_p,
1155 - struct task_struct *next_p)
1156 -{
1157 - struct thread_info *prev, *next;
1158 -
1159 - /*
1160 - * gcc should eliminate the ->thread_info dereference if
1161 - * has_secure_computing returns 0 at compile time (SECCOMP=n).
1162 - */
1163 - prev = task_thread_info(prev_p);
1164 - next = task_thread_info(next_p);
1165 -
1166 - if (has_secure_computing(prev) || has_secure_computing(next)) {
1167 - /* slow path here */
1168 - if (has_secure_computing(prev) &&
1169 - !has_secure_computing(next)) {
1170 - write_cr4(read_cr4() & ~X86_CR4_TSD);
1171 - } else if (!has_secure_computing(prev) &&
1172 - has_secure_computing(next))
1173 - write_cr4(read_cr4() | X86_CR4_TSD);
1174 +#ifdef CONFIG_SECCOMP
1175 + if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
1176 + test_tsk_thread_flag(next_p, TIF_NOTSC)) {
1177 + /* prev and next are different */
1178 + if (test_tsk_thread_flag(next_p, TIF_NOTSC))
1179 + hard_disable_TSC();
1180 + else
1181 + hard_enable_TSC();
1182 }
1183 +#endif
1184 }
1185
1186 /*
1187 @@ -628,10 +647,9 @@
1188 /*
1189 * Now maybe handle debug registers
1190 */
1191 - if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
1192 - __switch_to_xtra(next_p);
1193 -
1194 - disable_tsc(prev_p, next_p);
1195 + if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
1196 + task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
1197 + __switch_to_xtra(prev_p, next_p);
1198
1199 /*
1200 * Leave lazy mode, flushing any hypercalls made here.
1201 diff -Naur linux-2.6.25/arch/x86/kernel/process_64-xen.c linux-2.6.25-xen/arch/x86/kernel/process_64-xen.c
1202 --- linux-2.6.25/arch/x86/kernel/process_64-xen.c 2008-05-23 18:33:27.000000000 +0200
1203 +++ linux-2.6.25-xen/arch/x86/kernel/process_64-xen.c 2008-05-23 18:27:40.000000000 +0200
1204 @@ -26,6 +26,7 @@
1205 #include <linux/sched.h>
1206 #include <linux/kernel.h>
1207 #include <linux/mm.h>
1208 +#include <linux/fs.h>
1209 #include <linux/elfcore.h>
1210 #include <linux/smp.h>
1211 #include <linux/slab.h>
1212 @@ -249,6 +250,7 @@
1213 void __show_regs(struct pt_regs * regs)
1214 {
1215 unsigned long fs, gs, shadowgs;
1216 + unsigned long d0, d1, d2, d3, d6, d7;
1217 unsigned int fsindex,gsindex;
1218 unsigned int ds,cs,es;
1219
1220 @@ -288,6 +290,14 @@
1221 fs,fsindex,gs,gsindex,shadowgs);
1222 printk("CS: %04x DS: %04x ES: %04x\n", cs, ds, es);
1223
1224 + get_debugreg(d0, 0);
1225 + get_debugreg(d1, 1);
1226 + get_debugreg(d2, 2);
1227 + printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
1228 + get_debugreg(d3, 3);
1229 + get_debugreg(d6, 6);
1230 + get_debugreg(d7, 7);
1231 + printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
1232 }
1233
1234 void show_regs(struct pt_regs *regs)
1235 diff -Naur linux-2.6.25/arch/x86/kernel/setup_32-xen.c linux-2.6.25-xen/arch/x86/kernel/setup_32-xen.c
1236 --- linux-2.6.25/arch/x86/kernel/setup_32-xen.c 2008-05-23 18:33:27.000000000 +0200
1237 +++ linux-2.6.25-xen/arch/x86/kernel/setup_32-xen.c 2008-05-23 18:27:40.000000000 +0200
1238 @@ -117,19 +117,10 @@
1239 /*
1240 * Setup options
1241 */
1242 -struct drive_info_struct { char dummy[32]; } drive_info;
1243 -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
1244 - defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
1245 -EXPORT_SYMBOL(drive_info);
1246 -#endif
1247 struct screen_info screen_info;
1248 EXPORT_SYMBOL(screen_info);
1249 struct apm_info apm_info;
1250 EXPORT_SYMBOL(apm_info);
1251 -struct sys_desc_table_struct {
1252 - unsigned short length;
1253 - unsigned char table[0];
1254 -};
1255 struct edid_info edid_info;
1256 EXPORT_SYMBOL_GPL(edid_info);
1257 #ifndef CONFIG_XEN
1258 @@ -152,7 +143,7 @@
1259
1260 static char __initdata command_line[COMMAND_LINE_SIZE];
1261
1262 -unsigned char __initdata boot_params[PARAM_SIZE];
1263 +struct boot_params __initdata boot_params;
1264
1265 /*
1266 * Point at the empty zero page to start with. We map the real shared_info
1267 @@ -319,18 +310,18 @@
1268 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
1269 MAXMEM>>20);
1270 if (max_pfn > MAX_NONPAE_PFN)
1271 - printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1272 + printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
1273 else
1274 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
1275 max_pfn = MAXMEM_PFN;
1276 #else /* !CONFIG_HIGHMEM */
1277 -#ifndef CONFIG_X86_PAE
1278 +#ifndef CONFIG_HIGHMEM64G
1279 if (max_pfn > MAX_NONPAE_PFN) {
1280 max_pfn = MAX_NONPAE_PFN;
1281 printk(KERN_WARNING "Warning only 4GB will be used.\n");
1282 - printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1283 + printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
1284 }
1285 -#endif /* !CONFIG_X86_PAE */
1286 +#endif /* !CONFIG_HIGHMEM64G */
1287 #endif /* !CONFIG_HIGHMEM */
1288 } else {
1289 if (highmem_pages == -1)
1290 @@ -517,7 +508,7 @@
1291 *
1292 * This should all compile down to nothing when NUMA is off.
1293 */
1294 -void __init remapped_pgdat_init(void)
1295 +static void __init remapped_pgdat_init(void)
1296 {
1297 int nid;
1298
1299 @@ -592,7 +583,6 @@
1300 properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd.
1301 */
1302 ROOT_DEV = MKDEV(UNNAMED_MAJOR,0);
1303 - drive_info = DRIVE_INFO;
1304 screen_info = SCREEN_INFO;
1305 copy_edid();
1306 apm_info.bios = APM_BIOS_INFO;
1307 @@ -770,6 +760,8 @@
1308 * NOTE: at this point the bootmem allocator is fully available.
1309 */
1310
1311 + paravirt_post_allocator_init();
1312 +
1313 if (is_initial_xendomain())
1314 dmi_scan_machine();
1315
1316 @@ -817,6 +809,7 @@
1317 #endif
1318
1319 e820_register_memory();
1320 + e820_mark_nosave_regions();
1321
1322 if (is_initial_xendomain()) {
1323 #ifdef CONFIG_VT
1324 diff -Naur linux-2.6.25/arch/x86/kernel/setup64-xen.c linux-2.6.25-xen/arch/x86/kernel/setup64-xen.c
1325 --- linux-2.6.25/arch/x86/kernel/setup64-xen.c 2008-05-23 18:33:27.000000000 +0200
1326 +++ linux-2.6.25-xen/arch/x86/kernel/setup64-xen.c 2008-05-23 18:27:40.000000000 +0200
1327 @@ -125,11 +125,14 @@
1328 }
1329
1330 #ifdef CONFIG_XEN
1331 -static void switch_pt(void)
1332 +static void __init_refok switch_pt(int cpu)
1333 {
1334 + if (cpu == 0)
1335 + xen_init_pt();
1336 xen_pt_switch(__pa_symbol(init_level4_pgt));
1337 xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt)));
1338 }
1339 +#define switch_pt() switch_pt(cpu)
1340
1341 static void __cpuinit cpu_gdt_init(const struct desc_ptr *gdt_descr)
1342 {
1343 @@ -185,9 +188,6 @@
1344 pda->mmu_state = 0;
1345
1346 if (cpu == 0) {
1347 -#ifdef CONFIG_XEN
1348 - xen_init_pt();
1349 -#endif
1350 /* others are initialized in smpboot.c */
1351 pda->pcurrent = &init_task;
1352 pda->irqstackptr = boot_cpu_stack;
1353 diff -Naur linux-2.6.25/arch/x86/kernel/setup_64-xen.c linux-2.6.25-xen/arch/x86/kernel/setup_64-xen.c
1354 --- linux-2.6.25/arch/x86/kernel/setup_64-xen.c 2008-05-23 18:33:27.000000000 +0200
1355 +++ linux-2.6.25-xen/arch/x86/kernel/setup_64-xen.c 2008-05-23 18:27:40.000000000 +0200
1356 @@ -840,6 +840,8 @@
1357 level = cpuid_eax(1);
1358 if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
1359 set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
1360 + if (c->x86 == 0x10)
1361 + set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
1362
1363 /* Enable workaround for FXSAVE leak */
1364 if (c->x86 >= 6)
1365 @@ -865,8 +867,14 @@
1366 if (c->extended_cpuid_level >= 0x80000008)
1367 amd_detect_cmp(c);
1368
1369 - /* Fix cpuid4 emulation for more */
1370 - num_cache_leaves = 3;
1371 + if (c->extended_cpuid_level >= 0x80000006 &&
1372 + (cpuid_edx(0x80000006) & 0xf000))
1373 + num_cache_leaves = 4;
1374 + else
1375 + num_cache_leaves = 3;
1376 +
1377 + if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11)
1378 + set_bit(X86_FEATURE_K8, &c->x86_capability);
1379
1380 /* RDTSC can be speculated around */
1381 clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
1382 @@ -1111,6 +1119,8 @@
1383 c->x86_capability[2] = cpuid_edx(0x80860001);
1384 }
1385
1386 + init_scattered_cpuid_features(c);
1387 +
1388 c->apicid = phys_pkg_id(0);
1389
1390 /*
1391 @@ -1196,7 +1206,7 @@
1392 "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
1393 "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
1394 "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
1395 - "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", NULL,
1396 + "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
1397
1398 /* AMD-defined */
1399 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1400 @@ -1212,10 +1222,11 @@
1401 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1402
1403 /* Other (Linux-defined) */
1404 - "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
1405 - "constant_tsc", NULL, NULL,
1406 - "up", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1407 - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1408 + "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
1409 + NULL, NULL, NULL, NULL,
1410 + "constant_tsc", "up", NULL, "arch_perfmon",
1411 + "pebs", "bts", NULL, "sync_rdtsc",
1412 + "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1413 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1414
1415 /* Intel-defined (#2) */
1416 @@ -1226,7 +1237,7 @@
1417
1418 /* VIA/Cyrix/Centaur-defined */
1419 NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
1420 - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1421 + "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
1422 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1423 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1424
1425 @@ -1237,6 +1248,12 @@
1426 "osvw", "ibs", NULL, NULL, NULL, NULL,
1427 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1428 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1429 +
1430 + /* Auxiliary (Linux-defined) */
1431 + "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1432 + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1433 + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1434 + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1435 };
1436 static char *x86_power_flags[] = {
1437 "ts", /* temperature sensor */
1438 diff -Naur linux-2.6.25/arch/x86/kernel/smp_32-xen.c linux-2.6.25-xen/arch/x86/kernel/smp_32-xen.c
1439 --- linux-2.6.25/arch/x86/kernel/smp_32-xen.c 2008-05-23 18:33:27.000000000 +0200
1440 +++ linux-2.6.25-xen/arch/x86/kernel/smp_32-xen.c 2008-05-23 18:27:40.000000000 +0200
1441 @@ -22,6 +22,7 @@
1442
1443 #include <asm/mtrr.h>
1444 #include <asm/tlbflush.h>
1445 +#include <asm/mmu_context.h>
1446 #if 0
1447 #include <mach_apic.h>
1448 #endif
1449 @@ -217,13 +218,13 @@
1450 static DEFINE_SPINLOCK(tlbstate_lock);
1451
1452 /*
1453 - * We cannot call mmdrop() because we are in interrupt context,
1454 + * We cannot call mmdrop() because we are in interrupt context,
1455 * instead update mm->cpu_vm_mask.
1456 *
1457 * We need to reload %cr3 since the page tables may be going
1458 * away from under us..
1459 */
1460 -static inline void leave_mm (unsigned long cpu)
1461 +void leave_mm(unsigned long cpu)
1462 {
1463 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
1464 BUG();
1465 diff -Naur linux-2.6.25/arch/x86/kernel/smp_64-xen.c linux-2.6.25-xen/arch/x86/kernel/smp_64-xen.c
1466 --- linux-2.6.25/arch/x86/kernel/smp_64-xen.c 2008-05-23 18:33:27.000000000 +0200
1467 +++ linux-2.6.25-xen/arch/x86/kernel/smp_64-xen.c 2008-05-23 18:27:40.000000000 +0200
1468 @@ -362,7 +362,7 @@
1469 }
1470
1471 /*
1472 - * smp_call_function_single - Run a function on another CPU
1473 + * smp_call_function_single - Run a function on a specific CPU
1474 * @func: The function to run. This must be fast and non-blocking.
1475 * @info: An arbitrary pointer to pass to the function.
1476 * @nonatomic: Currently unused.
1477 @@ -379,17 +379,21 @@
1478 {
1479 /* prevent preemption and reschedule on another processor */
1480 int me = get_cpu();
1481 +
1482 + /* Can deadlock when called with interrupts disabled */
1483 + WARN_ON(irqs_disabled());
1484 +
1485 if (cpu == me) {
1486 + local_irq_disable();
1487 + func(info);
1488 + local_irq_enable();
1489 put_cpu();
1490 return 0;
1491 }
1492
1493 - /* Can deadlock when called with interrupts disabled */
1494 - WARN_ON(irqs_disabled());
1495 -
1496 - spin_lock_bh(&call_lock);
1497 + spin_lock(&call_lock);
1498 __smp_call_function_single(cpu, func, info, nonatomic, wait);
1499 - spin_unlock_bh(&call_lock);
1500 + spin_unlock(&call_lock);
1501 put_cpu();
1502 return 0;
1503 }
1504 diff -Naur linux-2.6.25/arch/x86/kernel/time_32-xen.c linux-2.6.25-xen/arch/x86/kernel/time_32-xen.c
1505 --- linux-2.6.25/arch/x86/kernel/time_32-xen.c 2008-05-23 18:33:27.000000000 +0200
1506 +++ linux-2.6.25-xen/arch/x86/kernel/time_32-xen.c 2008-05-23 18:27:40.000000000 +0200
1507 @@ -76,11 +76,12 @@
1508 #include <xen/evtchn.h>
1509 #include <xen/interface/vcpu.h>
1510
1511 -#ifdef CONFIG_X86_32
1512 #include <asm/i8253.h>
1513 DEFINE_SPINLOCK(i8253_lock);
1514 EXPORT_SYMBOL(i8253_lock);
1515 -#else
1516 +
1517 +#ifdef CONFIG_X86_64
1518 +#include <asm/vsyscall.h>
1519 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
1520 #endif
1521
1522 @@ -209,6 +210,26 @@
1523 return product;
1524 }
1525
1526 +static inline u64 get64(volatile u64 *ptr)
1527 +{
1528 +#ifndef CONFIG_64BIT
1529 + return cmpxchg64(ptr, 0, 0);
1530 +#else
1531 + return *ptr;
1532 +#define cmpxchg64 cmpxchg
1533 +#endif
1534 +}
1535 +
1536 +static inline u64 get64_local(volatile u64 *ptr)
1537 +{
1538 +#ifndef CONFIG_64BIT
1539 + return cmpxchg64_local(ptr, 0, 0);
1540 +#else
1541 + return *ptr;
1542 +#define cmpxchg64_local cmpxchg_local
1543 +#endif
1544 +}
1545 +
1546 #if 0 /* defined (__i386__) */
1547 int read_current_timer(unsigned long *timer_val)
1548 {
1549 @@ -391,7 +412,7 @@
1550 return retval;
1551 }
1552
1553 -unsigned long long sched_clock(void)
1554 +static unsigned long long local_clock(void)
1555 {
1556 unsigned int cpu = get_cpu();
1557 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
1558 @@ -412,6 +433,61 @@
1559 return time;
1560 }
1561
1562 +/*
1563 + * Runstate accounting
1564 + */
1565 +static void get_runstate_snapshot(struct vcpu_runstate_info *res)
1566 +{
1567 + u64 state_time;
1568 + struct vcpu_runstate_info *state;
1569 +
1570 + BUG_ON(preemptible());
1571 +
1572 + state = &__get_cpu_var(runstate);
1573 +
1574 + do {
1575 + state_time = get64_local(&state->state_entry_time);
1576 + *res = *state;
1577 + } while (get64_local(&state->state_entry_time) != state_time);
1578 +
1579 + WARN_ON_ONCE(res->state != RUNSTATE_running);
1580 +}
1581 +
1582 +/*
1583 + * Xen sched_clock implementation. Returns the number of unstolen
1584 + * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
1585 + * states.
1586 + */
1587 +unsigned long long sched_clock(void)
1588 +{
1589 + struct vcpu_runstate_info runstate;
1590 + cycle_t now;
1591 + u64 ret;
1592 + s64 offset;
1593 +
1594 + /*
1595 + * Ideally sched_clock should be called on a per-cpu basis
1596 + * anyway, so preempt should already be disabled, but that's
1597 + * not current practice at the moment.
1598 + */
1599 + preempt_disable();
1600 +
1601 + now = local_clock();
1602 +
1603 + get_runstate_snapshot(&runstate);
1604 +
1605 + offset = now - runstate.state_entry_time;
1606 + if (offset < 0)
1607 + offset = 0;
1608 +
1609 + ret = offset + runstate.time[RUNSTATE_running]
1610 + + runstate.time[RUNSTATE_blocked];
1611 +
1612 + preempt_enable();
1613 +
1614 + return ret;
1615 +}
1616 +
1617 unsigned long profile_pc(struct pt_regs *regs)
1618 {
1619 unsigned long pc = instruction_pointer(regs);
1620 @@ -459,10 +535,9 @@
1621 irqreturn_t timer_interrupt(int irq, void *dev_id)
1622 {
1623 s64 delta, delta_cpu, stolen, blocked;
1624 - u64 sched_time;
1625 unsigned int i, cpu = smp_processor_id();
1626 struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
1627 - struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
1628 + struct vcpu_runstate_info runstate;
1629
1630 /*
1631 * Here we are in the timer irq handler. We just have irqs locally
1632 @@ -482,20 +557,7 @@
1633 delta -= processed_system_time;
1634 delta_cpu -= per_cpu(processed_system_time, cpu);
1635
1636 - /*
1637 - * Obtain a consistent snapshot of stolen/blocked cycles. We
1638 - * can use state_entry_time to detect if we get preempted here.
1639 - */
1640 - do {
1641 - sched_time = runstate->state_entry_time;
1642 - barrier();
1643 - stolen = runstate->time[RUNSTATE_runnable] +
1644 - runstate->time[RUNSTATE_offline] -
1645 - per_cpu(processed_stolen_time, cpu);
1646 - blocked = runstate->time[RUNSTATE_blocked] -
1647 - per_cpu(processed_blocked_time, cpu);
1648 - barrier();
1649 - } while (sched_time != runstate->state_entry_time);
1650 + get_runstate_snapshot(&runstate);
1651 } while (!time_values_up_to_date(cpu));
1652
1653 if ((unlikely(delta < -(s64)permitted_clock_jitter) ||
1654 @@ -537,6 +599,9 @@
1655 * HACK: Passing NULL to account_steal_time()
1656 * ensures that the ticks are accounted as stolen.
1657 */
1658 + stolen = runstate.time[RUNSTATE_runnable]
1659 + + runstate.time[RUNSTATE_offline]
1660 + - per_cpu(processed_stolen_time, cpu);
1661 if ((stolen > 0) && (delta_cpu > 0)) {
1662 delta_cpu -= stolen;
1663 if (unlikely(delta_cpu < 0))
1664 @@ -552,6 +617,8 @@
1665 * HACK: Passing idle_task to account_steal_time()
1666 * ensures that the ticks are accounted as idle/wait.
1667 */
1668 + blocked = runstate.time[RUNSTATE_blocked]
1669 + - per_cpu(processed_blocked_time, cpu);
1670 if ((blocked > 0) && (delta_cpu > 0)) {
1671 delta_cpu -= blocked;
1672 if (unlikely(delta_cpu < 0))
1673 @@ -600,16 +667,11 @@
1674
1675 static cycle_t xen_clocksource_read(void)
1676 {
1677 - cycle_t ret = sched_clock();
1678 + cycle_t ret = local_clock();
1679
1680 #ifdef CONFIG_SMP
1681 for (;;) {
1682 -#ifndef CONFIG_64BIT
1683 - cycle_t last = cmpxchg64(&cs_last, 0, 0);
1684 -#else
1685 - cycle_t last = cs_last;
1686 -#define cmpxchg64 cmpxchg
1687 -#endif
1688 + cycle_t last = get64(&cs_last);
1689
1690 if ((s64)(ret - last) < 0) {
1691 if (last - ret > permitted_clock_jitter
1692 @@ -640,7 +702,7 @@
1693 extern void time_resume(void);
1694
1695 time_resume();
1696 - cs_last = sched_clock();
1697 + cs_last = local_clock();
1698 }
1699
1700 static struct clocksource clocksource_xen = {
1701 @@ -688,56 +750,10 @@
1702 return retval;
1703 }
1704
1705 -static void sync_cmos_clock(unsigned long dummy);
1706 -
1707 -static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
1708 -int no_sync_cmos_clock;
1709 -
1710 -static void sync_cmos_clock(unsigned long dummy)
1711 -{
1712 - struct timeval now, next;
1713 - int fail = 1;
1714 -
1715 - /*
1716 - * If we have an externally synchronized Linux clock, then update
1717 - * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
1718 - * called as close as possible to 500 ms before the new second starts.
1719 - * This code is run on a timer. If the clock is set, that timer
1720 - * may not expire at the correct time. Thus, we adjust...
1721 - */
1722 - if (!ntp_synced())
1723 - /*
1724 - * Not synced, exit, do not restart a timer (if one is
1725 - * running, let it run out).
1726 - */
1727 - return;
1728 -
1729 - do_gettimeofday(&now);
1730 - if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
1731 - now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
1732 - fail = set_rtc_mmss(now.tv_sec);
1733 -
1734 - next.tv_usec = USEC_AFTER - now.tv_usec;
1735 - if (next.tv_usec <= 0)
1736 - next.tv_usec += USEC_PER_SEC;
1737 -
1738 - if (!fail)
1739 - next.tv_sec = 659;
1740 - else
1741 - next.tv_sec = 0;
1742 -
1743 - if (next.tv_usec >= USEC_PER_SEC) {
1744 - next.tv_sec++;
1745 - next.tv_usec -= USEC_PER_SEC;
1746 - }
1747 - mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
1748 -}
1749 -
1750 -void notify_arch_cmos_timer(void)
1751 +int update_persistent_clock(struct timespec now)
1752 {
1753 - if (!no_sync_cmos_clock)
1754 - mod_timer(&sync_cmos_timer, jiffies + 1);
1755 mod_timer(&sync_xen_wallclock_timer, jiffies + 1);
1756 + return set_rtc_mmss(now.tv_sec);
1757 }
1758
1759 extern void (*late_time_init)(void);
1760 diff -Naur linux-2.6.25/arch/x86/kernel/traps_32-xen.c linux-2.6.25-xen/arch/x86/kernel/traps_32-xen.c
1761 --- linux-2.6.25/arch/x86/kernel/traps_32-xen.c 2008-05-23 18:33:27.000000000 +0200
1762 +++ linux-2.6.25-xen/arch/x86/kernel/traps_32-xen.c 2008-05-23 18:27:40.000000000 +0200
1763 @@ -41,6 +41,10 @@
1764 #include <linux/mca.h>
1765 #endif
1766
1767 +#if defined(CONFIG_EDAC)
1768 +#include <linux/edac.h>
1769 +#endif
1770 +
1771 #include <asm/processor.h>
1772 #include <asm/system.h>
1773 #include <asm/io.h>
1774 @@ -102,36 +106,45 @@
1775 int kstack_depth_to_print = 24;
1776 static unsigned int code_bytes = 64;
1777
1778 -static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
1779 +static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned size)
1780 {
1781 return p > (void *)tinfo &&
1782 - p < (void *)tinfo + THREAD_SIZE - 3;
1783 + p <= (void *)tinfo + THREAD_SIZE - size;
1784 }
1785
1786 +/* The form of the top of the frame on the stack */
1787 +struct stack_frame {
1788 + struct stack_frame *next_frame;
1789 + unsigned long return_address;
1790 +};
1791 +
1792 static inline unsigned long print_context_stack(struct thread_info *tinfo,
1793 unsigned long *stack, unsigned long ebp,
1794 struct stacktrace_ops *ops, void *data)
1795 {
1796 - unsigned long addr;
1797 -
1798 #ifdef CONFIG_FRAME_POINTER
1799 - while (valid_stack_ptr(tinfo, (void *)ebp)) {
1800 - unsigned long new_ebp;
1801 - addr = *(unsigned long *)(ebp + 4);
1802 + struct stack_frame *frame = (struct stack_frame *)ebp;
1803 + while (valid_stack_ptr(tinfo, frame, sizeof(*frame))) {
1804 + struct stack_frame *next;
1805 + unsigned long addr;
1806 +
1807 + addr = frame->return_address;
1808 ops->address(data, addr);
1809 /*
1810 * break out of recursive entries (such as
1811 * end_of_stack_stop_unwind_function). Also,
1812 * we can never allow a frame pointer to
1813 * move downwards!
1814 - */
1815 - new_ebp = *(unsigned long *)ebp;
1816 - if (new_ebp <= ebp)
1817 + */
1818 + next = frame->next_frame;
1819 + if (next <= frame)
1820 break;
1821 - ebp = new_ebp;
1822 + frame = next;
1823 }
1824 #else
1825 - while (valid_stack_ptr(tinfo, stack)) {
1826 + while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) {
1827 + unsigned long addr;
1828 +
1829 addr = *stack++;
1830 if (__kernel_text_address(addr))
1831 ops->address(data, addr);
1832 @@ -154,7 +167,7 @@
1833 if (!stack) {
1834 unsigned long dummy;
1835 stack = &dummy;
1836 - if (task && task != current)
1837 + if (task != current)
1838 stack = (unsigned long *)task->thread.esp;
1839 }
1840
1841 @@ -213,6 +226,7 @@
1842 {
1843 printk("%s [<%08lx>] ", (char *)data, addr);
1844 print_symbol("%s\n", addr);
1845 + touch_nmi_watchdog();
1846 }
1847
1848 static struct stacktrace_ops print_trace_ops = {
1849 @@ -396,7 +410,7 @@
1850 unsigned long esp;
1851 unsigned short ss;
1852
1853 - report_bug(regs->eip);
1854 + report_bug(regs->eip, regs);
1855
1856 printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
1857 #ifdef CONFIG_PREEMPT
1858 @@ -439,6 +453,7 @@
1859
1860 bust_spinlocks(0);
1861 die.lock_owner = -1;
1862 + add_taint(TAINT_DIE);
1863 spin_unlock_irqrestore(&die.lock, flags);
1864
1865 if (!regs)
1866 @@ -523,10 +538,12 @@
1867 do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
1868 }
1869
1870 -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
1871 +#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \
1872 fastcall void do_##name(struct pt_regs * regs, long error_code) \
1873 { \
1874 siginfo_t info; \
1875 + if (irq) \
1876 + local_irq_enable(); \
1877 info.si_signo = signr; \
1878 info.si_errno = 0; \
1879 info.si_code = sicode; \
1880 @@ -566,13 +583,13 @@
1881 #endif
1882 DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
1883 DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
1884 -DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip)
1885 +DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->eip, 0)
1886 DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
1887 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
1888 DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
1889 DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
1890 -DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
1891 -DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0)
1892 +DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
1893 +DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
1894
1895 fastcall void __kprobes do_general_protection(struct pt_regs * regs,
1896 long error_code)
1897 @@ -585,6 +602,13 @@
1898
1899 current->thread.error_code = error_code;
1900 current->thread.trap_no = 13;
1901 + if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
1902 + printk_ratelimit())
1903 + printk(KERN_INFO
1904 + "%s[%d] general protection eip:%lx esp:%lx error:%lx\n",
1905 + current->comm, current->pid,
1906 + regs->eip, regs->esp, error_code);
1907 +
1908 force_sig(SIGSEGV, current);
1909 return;
1910
1911 @@ -610,6 +634,14 @@
1912 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
1913 "CPU %d.\n", reason, smp_processor_id());
1914 printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
1915 +
1916 +#if defined(CONFIG_EDAC)
1917 + if(edac_handler_set()) {
1918 + edac_atomic_assert_error();
1919 + return;
1920 + }
1921 +#endif
1922 +
1923 if (panic_on_unrecovered_nmi)
1924 panic("NMI: Not continuing");
1925
1926 @@ -720,6 +752,8 @@
1927 reassert_nmi();
1928 }
1929
1930 +static int ignore_nmis;
1931 +
1932 fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
1933 {
1934 int cpu;
1935 @@ -730,11 +764,24 @@
1936
1937 ++nmi_count(cpu);
1938
1939 - default_do_nmi(regs);
1940 + if (!ignore_nmis)
1941 + default_do_nmi(regs);
1942
1943 nmi_exit();
1944 }
1945
1946 +void stop_nmi(void)
1947 +{
1948 + acpi_nmi_disable();
1949 + ignore_nmis++;
1950 +}
1951 +
1952 +void restart_nmi(void)
1953 +{
1954 + ignore_nmis--;
1955 + acpi_nmi_enable();
1956 +}
1957 +
1958 #ifdef CONFIG_KPROBES
1959 fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
1960 {
1961 @@ -1023,6 +1070,7 @@
1962 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
1963 tsk->fpu_counter++;
1964 }
1965 +EXPORT_SYMBOL_GPL(math_state_restore);
1966
1967 #ifndef CONFIG_MATH_EMULATION
1968
1969 diff -Naur linux-2.6.25/arch/x86/kernel/traps_64-xen.c linux-2.6.25-xen/arch/x86/kernel/traps_64-xen.c
1970 --- linux-2.6.25/arch/x86/kernel/traps_64-xen.c 2008-05-23 18:33:27.000000000 +0200
1971 +++ linux-2.6.25-xen/arch/x86/kernel/traps_64-xen.c 2008-05-23 18:27:40.000000000 +0200
1972 @@ -34,6 +34,10 @@
1973 #include <linux/bug.h>
1974 #include <linux/kdebug.h>
1975
1976 +#if defined(CONFIG_EDAC)
1977 +#include <linux/edac.h>
1978 +#endif
1979 +
1980 #include <asm/system.h>
1981 #include <asm/io.h>
1982 #include <asm/atomic.h>
1983 @@ -332,6 +336,7 @@
1984
1985 static void print_trace_address(void *data, unsigned long addr)
1986 {
1987 + touch_nmi_watchdog();
1988 printk_address(addr);
1989 }
1990
1991 @@ -520,6 +525,7 @@
1992 printk("\n");
1993 notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV);
1994 show_registers(regs);
1995 + add_taint(TAINT_DIE);
1996 /* Executive summary in case the oops scrolled away */
1997 printk(KERN_ALERT "RIP ");
1998 printk_address(regs->rip);
1999 @@ -533,7 +539,7 @@
2000 unsigned long flags = oops_begin();
2001
2002 if (!user_mode(regs))
2003 - report_bug(regs->rip);
2004 + report_bug(regs->rip, regs);
2005
2006 __die(str, regs, err);
2007 oops_end(flags);
2008 @@ -582,7 +588,8 @@
2009 tsk->thread.error_code = error_code;
2010 tsk->thread.trap_no = trapnr;
2011
2012 - if (exception_trace && unhandled_signal(tsk, signr))
2013 + if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
2014 + printk_ratelimit())
2015 printk(KERN_INFO
2016 "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
2017 tsk->comm, tsk->pid, str,
2018 @@ -686,7 +693,8 @@
2019 tsk->thread.error_code = error_code;
2020 tsk->thread.trap_no = 13;
2021
2022 - if (exception_trace && unhandled_signal(tsk, SIGSEGV))
2023 + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
2024 + printk_ratelimit())
2025 printk(KERN_INFO
2026 "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
2027 tsk->comm, tsk->pid,
2028 @@ -721,6 +729,13 @@
2029 reason);
2030 printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
2031
2032 +#if defined(CONFIG_EDAC)
2033 + if(edac_handler_set()) {
2034 + edac_atomic_assert_error();
2035 + return;
2036 + }
2037 +#endif
2038 +
2039 if (panic_on_unrecovered_nmi)
2040 panic("NMI: Not continuing");
2041
2042 diff -Naur linux-2.6.25/arch/x86/kernel/vsyscall_64-xen.c linux-2.6.25-xen/arch/x86/kernel/vsyscall_64-xen.c
2043 --- linux-2.6.25/arch/x86/kernel/vsyscall_64-xen.c 2008-05-23 18:33:27.000000000 +0200
2044 +++ linux-2.6.25-xen/arch/x86/kernel/vsyscall_64-xen.c 2008-05-23 18:27:40.000000000 +0200
2045 @@ -42,6 +42,7 @@
2046 #include <asm/segment.h>
2047 #include <asm/desc.h>
2048 #include <asm/topology.h>
2049 +#include <asm/vgtod.h>
2050
2051 #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
2052 #define __syscall_clobber "r11","rcx","memory"
2053 @@ -57,26 +58,9 @@
2054 * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
2055 * Try to keep this structure as small as possible to avoid cache line ping pongs
2056 */
2057 -struct vsyscall_gtod_data_t {
2058 - seqlock_t lock;
2059 -
2060 - /* open coded 'struct timespec' */
2061 - time_t wall_time_sec;
2062 - u32 wall_time_nsec;
2063 -
2064 - int sysctl_enabled;
2065 - struct timezone sys_tz;
2066 - struct { /* extract of a clocksource struct */
2067 - cycle_t (*vread)(void);
2068 - cycle_t cycle_last;
2069 - cycle_t mask;
2070 - u32 mult;
2071 - u32 shift;
2072 - } clock;
2073 -};
2074 int __vgetcpu_mode __section_vgetcpu_mode;
2075
2076 -struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data =
2077 +struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
2078 {
2079 .lock = SEQLOCK_UNLOCKED,
2080 .sysctl_enabled = 1,
2081 @@ -96,6 +80,8 @@
2082 vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
2083 vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
2084 vsyscall_gtod_data.sys_tz = sys_tz;
2085 + vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
2086 + vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
2087 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
2088 }
2089
2090 diff -Naur linux-2.6.25/arch/x86/mach-xen/setup.c linux-2.6.25-xen/arch/x86/mach-xen/setup.c
2091 --- linux-2.6.25/arch/x86/mach-xen/setup.c 2008-05-23 18:33:23.000000000 +0200
2092 +++ linux-2.6.25-xen/arch/x86/mach-xen/setup.c 2008-05-23 18:27:40.000000000 +0200
2093 @@ -12,6 +12,7 @@
2094 #include <asm/e820.h>
2095 #include <asm/setup.h>
2096 #include <asm/fixmap.h>
2097 +#include <asm/pgtable.h>
2098
2099 #include <xen/interface/callback.h>
2100 #include <xen/interface/memory.h>
2101 @@ -101,7 +102,7 @@
2102
2103 init_mm.pgd = swapper_pg_dir = (pgd_t *)xen_start_info->pt_base;
2104
2105 - setup_xen_features();
2106 + xen_setup_features();
2107
2108 if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) {
2109 hypervisor_virt_start = pp.virt_start;
2110 @@ -157,4 +158,18 @@
2111 HYPERVISOR_nmi_op(XENNMI_register_callback, &cb);
2112 }
2113 #endif
2114 +
2115 + /* Do an early initialization of the fixmap area */
2116 + {
2117 + extern pte_t swapper_pg_pmd[PTRS_PER_PTE];
2118 + unsigned long addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE);
2119 + pgd_t *pgd = (pgd_t *)xen_start_info->pt_base;
2120 + pud_t *pud = pud_offset(pgd + pgd_index(addr), addr);
2121 + pmd_t *pmd = pmd_offset(pud, addr);
2122 +
2123 + swapper_pg_dir = pgd;
2124 + init_mm.pgd = pgd;
2125 + make_lowmem_page_readonly(swapper_pg_pmd, XENFEAT_writable_page_tables);
2126 + set_pmd(pmd, __pmd(__pa_symbol(swapper_pg_pmd) | _PAGE_TABLE));
2127 + }
2128 }
2129 diff -Naur linux-2.6.25/arch/x86/Makefile linux-2.6.25-xen/arch/x86/Makefile
2130 --- linux-2.6.25/arch/x86/Makefile 2008-05-23 18:32:52.000000000 +0200
2131 +++ linux-2.6.25-xen/arch/x86/Makefile 2008-05-23 18:27:40.000000000 +0200
2132 @@ -173,7 +173,7 @@
2133 core-y += $(fcore-y)
2134
2135 # Xen paravirtualization support
2136 -core-$(CONFIG_XEN) += arch/x86/xen/
2137 +core-$(CONFIG_PARAVIRT_XEN) += arch/x86/xen/
2138
2139 # lguest paravirtualization support
2140 core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/
2141 diff -Naur linux-2.6.25/arch/x86/mm/fault_32-xen.c linux-2.6.25-xen/arch/x86/mm/fault_32-xen.c
2142 --- linux-2.6.25/arch/x86/mm/fault_32-xen.c 2008-05-23 18:33:27.000000000 +0200
2143 +++ linux-2.6.25-xen/arch/x86/mm/fault_32-xen.c 2008-05-23 18:27:40.000000000 +0200
2144 @@ -346,7 +346,7 @@
2145 pmd_k = pmd_offset(pud_k, address);
2146 if (!pmd_present(*pmd_k))
2147 return NULL;
2148 - if (!pmd_present(*pmd))
2149 + if (!pmd_present(*pmd)) {
2150 #if CONFIG_XEN_COMPAT > 0x030002
2151 set_pmd(pmd, *pmd_k);
2152 #else
2153 @@ -356,7 +356,8 @@
2154 */
2155 set_pmd(pmd, __pmd(pmd_val(*pmd_k)));
2156 #endif
2157 - else
2158 + arch_flush_lazy_mmu_mode();
2159 + } else
2160 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
2161 return pmd_k;
2162 }
2163 @@ -388,6 +389,8 @@
2164 return 0;
2165 }
2166
2167 +int show_unhandled_signals = 1;
2168 +
2169 /*
2170 * This routine handles page faults. It determines the address,
2171 * and the problem, and then passes it off to one of the appropriate
2172 @@ -408,6 +411,7 @@
2173 struct vm_area_struct * vma;
2174 unsigned long address;
2175 int write, si_code;
2176 + int fault;
2177
2178 /* get the address */
2179 address = read_cr2();
2180 @@ -541,20 +545,18 @@
2181 * make sure we exit gracefully rather than endlessly redo
2182 * the fault.
2183 */
2184 - switch (handle_mm_fault(mm, vma, address, write)) {
2185 - case VM_FAULT_MINOR:
2186 - tsk->min_flt++;
2187 - break;
2188 - case VM_FAULT_MAJOR:
2189 - tsk->maj_flt++;
2190 - break;
2191 - case VM_FAULT_SIGBUS:
2192 - goto do_sigbus;
2193 - case VM_FAULT_OOM:
2194 + fault = handle_mm_fault(mm, vma, address, write);
2195 + if (unlikely(fault & VM_FAULT_ERROR)) {
2196 + if (fault & VM_FAULT_OOM)
2197 goto out_of_memory;
2198 - default:
2199 - BUG();
2200 + else if (fault & VM_FAULT_SIGBUS)
2201 + goto do_sigbus;
2202 + BUG();
2203 }
2204 + if (fault & VM_FAULT_MAJOR)
2205 + tsk->maj_flt++;
2206 + else
2207 + tsk->min_flt++;
2208
2209 /*
2210 * Did it hit the DOS screen memory VA from vm86 mode?
2211 @@ -589,6 +591,14 @@
2212 if (is_prefetch(regs, address, error_code))
2213 return;
2214
2215 + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
2216 + printk_ratelimit()) {
2217 + printk("%s%s[%d]: segfault at %08lx eip %08lx "
2218 + "esp %08lx error %lx\n",
2219 + tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
2220 + tsk->comm, tsk->pid, address, regs->eip,
2221 + regs->esp, error_code);
2222 + }
2223 tsk->thread.cr2 = address;
2224 /* Kernel addresses are always protection faults */
2225 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
2226 diff -Naur linux-2.6.25/arch/x86/mm/fault_64-xen.c linux-2.6.25-xen/arch/x86/mm/fault_64-xen.c
2227 --- linux-2.6.25/arch/x86/mm/fault_64-xen.c 2008-05-23 18:33:27.000000000 +0200
2228 +++ linux-2.6.25-xen/arch/x86/mm/fault_64-xen.c 2008-05-23 18:27:40.000000000 +0200
2229 @@ -159,7 +159,9 @@
2230 pmd_t *pmd;
2231 pte_t *pte;
2232
2233 - pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
2234 + pgd = (pgd_t *)read_cr3();
2235 +
2236 + pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
2237 pgd += pgd_index(address);
2238 if (bad_address(pgd)) goto bad;
2239 printk("PGD %lx ", pgd_val(*pgd));
2240 @@ -219,16 +221,6 @@
2241 return 0;
2242 }
2243
2244 -int unhandled_signal(struct task_struct *tsk, int sig)
2245 -{
2246 - if (is_init(tsk))
2247 - return 1;
2248 - if (tsk->ptrace & PT_PTRACED)
2249 - return 0;
2250 - return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) ||
2251 - (tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL);
2252 -}
2253 -
2254 static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
2255 unsigned long error_code)
2256 {
2257 @@ -302,8 +294,8 @@
2258 return 0;
2259 }
2260
2261 -int page_fault_trace = 0;
2262 -int exception_trace = 1;
2263 +static int page_fault_trace;
2264 +int show_unhandled_signals = 1;
2265
2266
2267 #define MEM_VERBOSE 1
2268 @@ -372,7 +364,7 @@
2269 struct vm_area_struct * vma;
2270 unsigned long address;
2271 const struct exception_table_entry *fixup;
2272 - int write;
2273 + int write, fault;
2274 unsigned long flags;
2275 siginfo_t info;
2276
2277 @@ -384,7 +376,7 @@
2278 prefetchw(&mm->mmap_sem);
2279
2280 /* get the address */
2281 - address = current_vcpu_info()->arch.cr2;
2282 + address = read_cr2();
2283
2284 info.si_code = SEGV_MAPERR;
2285
2286 @@ -445,6 +437,13 @@
2287 if (unlikely(in_atomic() || !mm))
2288 goto bad_area_nosemaphore;
2289
2290 + /*
2291 + * User-mode registers count as a user access even for any
2292 + * potential system fault or CPU buglet.
2293 + */
2294 + if (user_mode_vm(regs))
2295 + error_code |= PF_USER;
2296 +
2297 again:
2298 /* When running in the kernel we expect faults to occur only to
2299 * addresses in user space. All other faults represent errors in the
2300 @@ -511,19 +510,18 @@
2301 * make sure we exit gracefully rather than endlessly redo
2302 * the fault.
2303 */
2304 - switch (handle_mm_fault(mm, vma, address, write)) {
2305 - case VM_FAULT_MINOR:
2306 - tsk->min_flt++;
2307 - break;
2308 - case VM_FAULT_MAJOR:
2309 - tsk->maj_flt++;
2310 - break;
2311 - case VM_FAULT_SIGBUS:
2312 - goto do_sigbus;
2313 - default:
2314 - goto out_of_memory;
2315 + fault = handle_mm_fault(mm, vma, address, write);
2316 + if (unlikely(fault & VM_FAULT_ERROR)) {
2317 + if (fault & VM_FAULT_OOM)
2318 + goto out_of_memory;
2319 + else if (fault & VM_FAULT_SIGBUS)
2320 + goto do_sigbus;
2321 + BUG();
2322 }
2323 -
2324 + if (fault & VM_FAULT_MAJOR)
2325 + tsk->maj_flt++;
2326 + else
2327 + tsk->min_flt++;
2328 up_read(&mm->mmap_sem);
2329 return;
2330
2331 @@ -556,7 +554,8 @@
2332 (address >> 32))
2333 return;
2334
2335 - if (exception_trace && unhandled_signal(tsk, SIGSEGV)) {
2336 + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
2337 + printk_ratelimit()) {
2338 printk(
2339 "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
2340 tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
2341 @@ -630,7 +629,7 @@
2342 }
2343 printk("VM: killing process %s\n", tsk->comm);
2344 if (error_code & 4)
2345 - do_exit(SIGKILL);
2346 + do_group_exit(SIGKILL);
2347 goto no_context;
2348
2349 do_sigbus:
2350 diff -Naur linux-2.6.25/arch/x86/mm/highmem_32-xen.c linux-2.6.25-xen/arch/x86/mm/highmem_32-xen.c
2351 --- linux-2.6.25/arch/x86/mm/highmem_32-xen.c 2008-05-23 18:33:27.000000000 +0200
2352 +++ linux-2.6.25-xen/arch/x86/mm/highmem_32-xen.c 2008-05-23 18:27:40.000000000 +0200
2353 @@ -34,17 +34,16 @@
2354 /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
2355 pagefault_disable();
2356
2357 - idx = type + KM_TYPE_NR*smp_processor_id();
2358 - BUG_ON(!pte_none(*(kmap_pte-idx)));
2359 -
2360 if (!PageHighMem(page))
2361 return page_address(page);
2362
2363 + idx = type + KM_TYPE_NR*smp_processor_id();
2364 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
2365 + BUG_ON(!pte_none(*(kmap_pte-idx)));
2366 set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
2367 arch_flush_lazy_mmu_mode();
2368
2369 - return (void*) vaddr;
2370 + return (void *)vaddr;
2371 }
2372
2373 void *kmap_atomic(struct page *page, enum km_type type)
2374 diff -Naur linux-2.6.25/arch/x86/mm/init_32-xen.c linux-2.6.25-xen/arch/x86/mm/init_32-xen.c
2375 --- linux-2.6.25/arch/x86/mm/init_32-xen.c 2008-05-23 18:33:27.000000000 +0200
2376 +++ linux-2.6.25-xen/arch/x86/mm/init_32-xen.c 2008-05-23 18:27:40.000000000 +0200
2377 @@ -98,7 +98,7 @@
2378 #endif
2379 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
2380
2381 - paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
2382 + paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT);
2383 make_lowmem_page_readonly(page_table,
2384 XENFEAT_writable_page_tables);
2385 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
2386 @@ -448,7 +448,7 @@
2387 xen_pagetable_setup_done(pgd_base);
2388 }
2389
2390 -#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP)
2391 +#if defined(CONFIG_HIBERNATION) || defined(CONFIG_ACPI)
2392 /*
2393 * Swap suspend & friends need this for resume because things like the intel-agp
2394 * driver might have split up a kernel 4MB mapping.
2395 @@ -487,9 +487,13 @@
2396 flush_tlb_all();
2397 }
2398
2399 +int nx_enabled = 0;
2400 +
2401 +#ifdef CONFIG_X86_PAE
2402 +
2403 static int disable_nx __initdata = 0;
2404 u64 __supported_pte_mask __read_mostly = ~_PAGE_NX;
2405 -EXPORT_SYMBOL(__supported_pte_mask);
2406 +EXPORT_SYMBOL_GPL(__supported_pte_mask);
2407
2408 /*
2409 * noexec = on|off
2410 @@ -516,9 +520,6 @@
2411 }
2412 early_param("noexec", noexec_setup);
2413
2414 -int nx_enabled = 0;
2415 -#ifdef CONFIG_X86_PAE
2416 -
2417 static void __init set_nx(void)
2418 {
2419 unsigned int v[4], l, h;
2420 @@ -803,8 +804,7 @@
2421 PTRS_PER_PMD*sizeof(pmd_t),
2422 PTRS_PER_PMD*sizeof(pmd_t),
2423 SLAB_PANIC,
2424 - pmd_ctor,
2425 - NULL);
2426 + pmd_ctor);
2427 if (!SHARED_KERNEL_PMD) {
2428 /* If we're in PAE mode and have a non-shared
2429 kernel pmd, then the pgd size must be a
2430 diff -Naur linux-2.6.25/arch/x86/mm/init_64-xen.c linux-2.6.25-xen/arch/x86/mm/init_64-xen.c
2431 --- linux-2.6.25/arch/x86/mm/init_64-xen.c 2008-05-23 18:33:27.000000000 +0200
2432 +++ linux-2.6.25-xen/arch/x86/mm/init_64-xen.c 2008-05-23 18:27:40.000000000 +0200
2433 @@ -66,6 +66,9 @@
2434 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
2435 extern unsigned long start_pfn;
2436
2437 +extern pmd_t level2_fixmap_pgt[PTRS_PER_PMD];
2438 +extern pte_t level1_fixmap_pgt[PTRS_PER_PTE];
2439 +
2440 /*
2441 * Use this until direct mapping is established, i.e. before __va() is
2442 * available in init_memory_mapping().
2443 @@ -353,6 +356,10 @@
2444 set_pte_phys(address, phys, prot, 0);
2445 set_pte_phys(address, phys, prot, 1);
2446 break;
2447 + case FIX_EARLYCON_MEM_BASE:
2448 + xen_l1_entry_update(level1_fixmap_pgt + pte_index(address),
2449 + pfn_pte_ma(phys >> PAGE_SHIFT, prot));
2450 + break;
2451 default:
2452 set_pte_phys_ma(address, phys, prot);
2453 break;
2454 @@ -588,6 +595,13 @@
2455 __user_pgd(init_level4_pgt)[pgd_index(VSYSCALL_START)] =
2456 __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
2457
2458 + /* Do an early initialization of the fixmap area. */
2459 + addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE);
2460 + level3_kernel_pgt[pud_index(addr)] =
2461 + __pud(__pa_symbol(level2_fixmap_pgt) | _PAGE_TABLE);
2462 + level2_fixmap_pgt[pmd_index(addr)] =
2463 + __pmd(__pa_symbol(level1_fixmap_pgt) | _PAGE_TABLE);
2464 +
2465 early_make_page_readonly(init_level4_pgt,
2466 XENFEAT_writable_page_tables);
2467 early_make_page_readonly(__user_pgd(init_level4_pgt),
2468 @@ -598,6 +612,10 @@
2469 XENFEAT_writable_page_tables);
2470 early_make_page_readonly(level2_kernel_pgt,
2471 XENFEAT_writable_page_tables);
2472 + early_make_page_readonly(level2_fixmap_pgt,
2473 + XENFEAT_writable_page_tables);
2474 + early_make_page_readonly(level1_fixmap_pgt,
2475 + XENFEAT_writable_page_tables);
2476
2477 if (!xen_feature(XENFEAT_writable_page_tables)) {
2478 xen_pgd_pin(__pa_symbol(init_level4_pgt));
2479 @@ -1121,41 +1139,6 @@
2480 return pfn_valid(pte_pfn(*pte));
2481 }
2482
2483 -#ifdef CONFIG_SYSCTL
2484 -#include <linux/sysctl.h>
2485 -
2486 -extern int exception_trace, page_fault_trace;
2487 -
2488 -static ctl_table debug_table2[] = {
2489 - {
2490 - .ctl_name = 99,
2491 - .procname = "exception-trace",
2492 - .data = &exception_trace,
2493 - .maxlen = sizeof(int),
2494 - .mode = 0644,
2495 - .proc_handler = proc_dointvec
2496 - },
2497 - {}
2498 -};
2499 -
2500 -static ctl_table debug_root_table2[] = {
2501 - {
2502 - .ctl_name = CTL_DEBUG,
2503 - .procname = "debug",
2504 - .mode = 0555,
2505 - .child = debug_table2
2506 - },
2507 - {}
2508 -};
2509 -
2510 -static __init int x8664_sysctl_init(void)
2511 -{
2512 - register_sysctl_table(debug_root_table2);
2513 - return 0;
2514 -}
2515 -__initcall(x8664_sysctl_init);
2516 -#endif
2517 -
2518 /* A pseudo VMA to allow ptrace access for the vsyscall page. This only
2519 covers the 64bit vsyscall page now. 32bit has a real VMA now and does
2520 not need special handling anymore. */
2521 @@ -1194,9 +1177,18 @@
2522 }
2523
2524 #ifndef CONFIG_XEN
2525 -void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
2526 +void * __init alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
2527 {
2528 return __alloc_bootmem_core(pgdat->bdata, size,
2529 SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0);
2530 }
2531 #endif
2532 +
2533 +const char *arch_vma_name(struct vm_area_struct *vma)
2534 +{
2535 + if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
2536 + return "[vdso]";
2537 + if (vma == &gate_vma)
2538 + return "[vsyscall]";
2539 + return NULL;
2540 +}
2541 diff -Naur linux-2.6.25/arch/x86/mm/ioremap_32-xen.c linux-2.6.25-xen/arch/x86/mm/ioremap_32-xen.c
2542 --- linux-2.6.25/arch/x86/mm/ioremap_32-xen.c 2008-05-23 18:33:27.000000000 +0200
2543 +++ linux-2.6.25-xen/arch/x86/mm/ioremap_32-xen.c 2008-05-23 18:27:40.000000000 +0200
2544 @@ -363,9 +363,8 @@
2545
2546 /* Reset the direct mapping. Can block */
2547 if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) {
2548 - /* p->size includes the guard page, but cpa doesn't like that */
2549 change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)),
2550 - (p->size - PAGE_SIZE) >> PAGE_SHIFT,
2551 + get_vm_area_size(p) >> PAGE_SHIFT,
2552 PAGE_KERNEL);
2553 global_flush_tlb();
2554 }
2555 diff -Naur linux-2.6.25/arch/x86/mm/pageattr_64-xen.c linux-2.6.25-xen/arch/x86/mm/pageattr_64-xen.c
2556 --- linux-2.6.25/arch/x86/mm/pageattr_64-xen.c 2008-05-23 18:33:27.000000000 +0200
2557 +++ linux-2.6.25-xen/arch/x86/mm/pageattr_64-xen.c 2008-05-23 18:27:40.000000000 +0200
2558 @@ -276,7 +276,7 @@
2559 }
2560 #endif /* CONFIG_XEN */
2561
2562 -static inline pte_t *lookup_address(unsigned long address)
2563 +pte_t *lookup_address(unsigned long address)
2564 {
2565 pgd_t *pgd = pgd_offset_k(address);
2566 pud_t *pud;
2567 @@ -337,14 +337,13 @@
2568 struct page *pg;
2569
2570 /* When clflush is available always use it because it is
2571 - much cheaper than WBINVD. Disable clflush for now because
2572 - the high level code is not ready yet */
2573 + much cheaper than WBINVD. */
2574 + /* clflush is still broken. Disable for now. */
2575 if (1 || !cpu_has_clflush)
2576 asm volatile("wbinvd" ::: "memory");
2577 else list_for_each_entry(pg, l, lru) {
2578 void *adr = page_address(pg);
2579 - if (cpu_has_clflush)
2580 - cache_flush_page(adr);
2581 + cache_flush_page(adr);
2582 }
2583 __flush_tlb_all();
2584 }
2585 @@ -358,7 +357,8 @@
2586
2587 static inline void save_page(struct page *fpage)
2588 {
2589 - list_add(&fpage->lru, &deferred_pages);
2590 + if (!test_and_set_bit(PG_arch_1, &fpage->flags))
2591 + list_add(&fpage->lru, &deferred_pages);
2592 }
2593
2594 /*
2595 @@ -392,9 +392,12 @@
2596 pte_t *kpte;
2597 struct page *kpte_page;
2598 pgprot_t ref_prot2;
2599 +
2600 kpte = lookup_address(address);
2601 if (!kpte) return 0;
2602 kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
2603 + BUG_ON(PageLRU(kpte_page));
2604 + BUG_ON(PageCompound(kpte_page));
2605 if (pgprot_val(prot) != pgprot_val(ref_prot)) {
2606 if (!pte_huge(*kpte)) {
2607 set_pte(kpte, pfn_pte(pfn, prot));
2608 @@ -433,10 +436,9 @@
2609 return 0;
2610 #endif
2611
2612 - if (page_private(kpte_page) == 0) {
2613 - save_page(kpte_page);
2614 + save_page(kpte_page);
2615 + if (page_private(kpte_page) == 0)
2616 revert_page(address, ref_prot);
2617 - }
2618 return 0;
2619 }
2620
2621 @@ -508,6 +510,10 @@
2622 flush_map(&l);
2623
2624 list_for_each_entry_safe(pg, next, &l, lru) {
2625 + list_del(&pg->lru);
2626 + clear_bit(PG_arch_1, &pg->flags);
2627 + if (page_private(pg) != 0)
2628 + continue;
2629 ClearPagePrivate(pg);
2630 __free_page(pg);
2631 }
2632 diff -Naur linux-2.6.25/arch/x86/mm/pgtable_32-xen.c linux-2.6.25-xen/arch/x86/mm/pgtable_32-xen.c
2633 --- linux-2.6.25/arch/x86/mm/pgtable_32-xen.c 2008-05-23 18:33:27.000000000 +0200
2634 +++ linux-2.6.25-xen/arch/x86/mm/pgtable_32-xen.c 2008-05-23 18:27:40.000000000 +0200
2635 @@ -242,7 +242,7 @@
2636
2637 #if (PTRS_PER_PMD == 1)
2638 /* Non-PAE pgd constructor */
2639 -void pgd_ctor(void *pgd)
2640 +static void pgd_ctor(void *pgd)
2641 {
2642 unsigned long flags;
2643
2644 @@ -265,7 +265,7 @@
2645 }
2646 #else /* PTRS_PER_PMD > 1 */
2647 /* PAE pgd constructor */
2648 -void pgd_ctor(void *pgd)
2649 +static void pgd_ctor(void *pgd)
2650 {
2651 /* PAE, kernel PMD may be shared */
2652
2653 @@ -284,7 +284,7 @@
2654 }
2655 #endif /* PTRS_PER_PMD */
2656
2657 -void pgd_dtor(void *pgd)
2658 +static void pgd_dtor(void *pgd)
2659 {
2660 unsigned long flags; /* can be called from interrupt context */
2661
2662 diff -Naur linux-2.6.25/arch/x86/pci/irq-xen.c linux-2.6.25-xen/arch/x86/pci/irq-xen.c
2663 --- linux-2.6.25/arch/x86/pci/irq-xen.c 2008-05-23 18:33:26.000000000 +0200
2664 +++ linux-2.6.25-xen/arch/x86/pci/irq-xen.c 2008-05-23 18:27:40.000000000 +0200
2665 @@ -142,8 +142,9 @@
2666 for(i = 1; i < 256; i++) {
2667 if (!busmap[i] || pci_find_bus(0, i))
2668 continue;
2669 - if (pci_scan_bus(i, &pci_root_ops, NULL))
2670 - printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i);
2671 + if (pci_scan_bus_with_sysdata(i))
2672 + printk(KERN_INFO "PCI: Discovered primary peer "
2673 + "bus %02x [IRQ]\n", i);
2674 }
2675 pcibios_last_bus = -1;
2676 }
2677 @@ -553,6 +554,7 @@
2678 case PCI_DEVICE_ID_INTEL_ICH9_3:
2679 case PCI_DEVICE_ID_INTEL_ICH9_4:
2680 case PCI_DEVICE_ID_INTEL_ICH9_5:
2681 + case PCI_DEVICE_ID_INTEL_TOLAPAI_0:
2682 r->name = "PIIX/ICH";
2683 r->get = pirq_piix_get;
2684 r->set = pirq_piix_set;
2685 diff -Naur linux-2.6.25/arch/x86/vdso/vdso32/note.S linux-2.6.25-xen/arch/x86/vdso/vdso32/note.S
2686 --- linux-2.6.25/arch/x86/vdso/vdso32/note.S 2008-04-17 04:49:44.000000000 +0200
2687 +++ linux-2.6.25-xen/arch/x86/vdso/vdso32/note.S 2008-05-23 18:27:40.000000000 +0200
2688 @@ -13,7 +13,7 @@
2689 .long LINUX_VERSION_CODE
2690 ELFNOTE_END
2691
2692 -#ifdef CONFIG_XEN
2693 +#if defined(CONFIG_X86_XEN) || defined(CONFIG_PARAVIRT_XEN)
2694 /*
2695 * Add a special note telling glibc's dynamic linker a fake hardware
2696 * flavor that it will use to choose the search path for libraries in the
2697 @@ -37,8 +37,12 @@
2698
2699 ELFNOTE_START(GNU, 2, "a")
2700 .long 1 /* ncaps */
2701 +#ifdef CONFIG_PARAVIRT_XEN
2702 VDSO32_NOTE_MASK: /* Symbol used by arch/x86/xen/setup.c */
2703 .long 0 /* mask */
2704 +#else
2705 + .long 1 << VDSO_NOTE_NONEGSEG_BIT /* mask */
2706 +#endif
2707 .byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */
2708 ELFNOTE_END
2709 #endif
2710 diff -Naur linux-2.6.25/arch/x86/xen/Kconfig linux-2.6.25-xen/arch/x86/xen/Kconfig
2711 --- linux-2.6.25/arch/x86/xen/Kconfig 2008-04-17 04:49:44.000000000 +0200
2712 +++ linux-2.6.25-xen/arch/x86/xen/Kconfig 2008-05-23 18:27:40.000000000 +0200
2713 @@ -2,7 +2,7 @@
2714 # This Kconfig describes xen options
2715 #
2716
2717 -config XEN
2718 +config PARAVIRT_XEN
2719 bool "Xen guest support"
2720 select PARAVIRT
2721 depends on X86_32
2722 diff -Naur linux-2.6.25/arch/x86/xen/xen-head.S linux-2.6.25-xen/arch/x86/xen/xen-head.S
2723 --- linux-2.6.25/arch/x86/xen/xen-head.S 2008-04-17 04:49:44.000000000 +0200
2724 +++ linux-2.6.25-xen/arch/x86/xen/xen-head.S 2008-05-23 18:27:40.000000000 +0200
2725 @@ -1,7 +1,7 @@
2726 /* Xen-specific pieces of head.S, intended to be included in the right
2727 place in head.S */
2728
2729 -#ifdef CONFIG_XEN
2730 +#ifdef CONFIG_PARAVIRT_XEN
2731
2732 #include <linux/elfnote.h>
2733 #include <linux/init.h>
2734 @@ -37,4 +37,4 @@
2735 #endif
2736 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
2737
2738 -#endif /*CONFIG_XEN */
2739 +#endif /* CONFIG_PARAVIRT_XEN */
2740 diff -Naur linux-2.6.25/drivers/block/Kconfig linux-2.6.25-xen/drivers/block/Kconfig
2741 --- linux-2.6.25/drivers/block/Kconfig 2008-04-17 04:49:44.000000000 +0200
2742 +++ linux-2.6.25-xen/drivers/block/Kconfig 2008-05-23 18:27:40.000000000 +0200
2743 @@ -417,9 +417,9 @@
2744 help
2745 Include support for the Xilinx SystemACE CompactFlash interface
2746
2747 -config XEN_BLKDEV_FRONTEND
2748 +config XEN_BLKFRONT
2749 tristate "Xen virtual block device support"
2750 - depends on XEN
2751 + depends on PARAVIRT_XEN
2752 default y
2753 help
2754 This driver implements the front-end of the Xen virtual
2755 diff -Naur linux-2.6.25/drivers/block/Makefile linux-2.6.25-xen/drivers/block/Makefile
2756 --- linux-2.6.25/drivers/block/Makefile 2008-04-17 04:49:44.000000000 +0200
2757 +++ linux-2.6.25-xen/drivers/block/Makefile 2008-05-23 18:29:02.000000000 +0200
2758 @@ -30,4 +30,4 @@
2759 obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
2760 obj-$(CONFIG_BLK_DEV_UB) += ub.o
2761
2762 -obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
2763 +obj-$(CONFIG_XEN_BLKFRONT) += xen-blkfront.o
2764 diff -Naur linux-2.6.25/drivers/block/xen-blkfront.c linux-2.6.25-xen/drivers/block/xen-blkfront.c
2765 --- linux-2.6.25/drivers/block/xen-blkfront.c 2008-04-17 04:49:44.000000000 +0200
2766 +++ linux-2.6.25-xen/drivers/block/xen-blkfront.c 2008-05-23 18:27:40.000000000 +0200
2767 @@ -965,7 +965,6 @@
2768
2769 static struct xenbus_driver blkfront = {
2770 .name = "vbd",
2771 - .owner = THIS_MODULE,
2772 .ids = blkfront_ids,
2773 .probe = blkfront_probe,
2774 .remove = blkfront_remove,
2775 diff -Naur linux-2.6.25/drivers/char/Kconfig linux-2.6.25-xen/drivers/char/Kconfig
2776 --- linux-2.6.25/drivers/char/Kconfig 2008-04-17 04:49:44.000000000 +0200
2777 +++ linux-2.6.25-xen/drivers/char/Kconfig 2008-05-23 18:27:40.000000000 +0200
2778 @@ -584,7 +584,7 @@
2779
2780 config HVC_XEN
2781 bool "Xen Hypervisor Console support"
2782 - depends on XEN
2783 + depends on PARAVIRT_XEN
2784 select HVC_DRIVER
2785 default y
2786 help
2787 diff -Naur linux-2.6.25/drivers/Makefile linux-2.6.25-xen/drivers/Makefile
2788 --- linux-2.6.25/drivers/Makefile 2008-05-23 18:32:47.000000000 +0200
2789 +++ linux-2.6.25-xen/drivers/Makefile 2008-05-23 18:27:40.000000000 +0200
2790 @@ -19,7 +19,7 @@
2791 obj-$(CONFIG_PNP) += pnp/
2792 obj-$(CONFIG_ARM_AMBA) += amba/
2793
2794 -obj-$(CONFIG_XEN) += xen/
2795 +obj-$(CONFIG_PARAVIRT_XEN) += xen/
2796
2797 obj-$(CONFIG_CONNECTOR) += connector/
2798
2799 diff -Naur linux-2.6.25/drivers/net/Kconfig linux-2.6.25-xen/drivers/net/Kconfig
2800 --- linux-2.6.25/drivers/net/Kconfig 2008-05-23 18:32:02.000000000 +0200
2801 +++ linux-2.6.25-xen/drivers/net/Kconfig 2008-05-23 18:27:40.000000000 +0200
2802 @@ -2690,9 +2690,9 @@
2803
2804 source "drivers/s390/net/Kconfig"
2805
2806 -config XEN_NETDEV_FRONTEND
2807 +config XEN_NETFRONT
2808 tristate "Xen network device frontend driver"
2809 - depends on XEN
2810 + depends on PARAVIRT_XEN
2811 default y
2812 help
2813 The network device frontend driver allows the kernel to
2814 diff -Naur linux-2.6.25/drivers/net/Makefile linux-2.6.25-xen/drivers/net/Makefile
2815 --- linux-2.6.25/drivers/net/Makefile 2008-05-23 18:32:02.000000000 +0200
2816 +++ linux-2.6.25-xen/drivers/net/Makefile 2008-05-23 18:27:40.000000000 +0200
2817 @@ -140,7 +140,7 @@
2818 obj-$(CONFIG_SLIP) += slip.o
2819 obj-$(CONFIG_SLHC) += slhc.o
2820
2821 -obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
2822 +obj-$(CONFIG_XEN_NETFRONT) += xen-netfront.o
2823
2824 obj-$(CONFIG_DUMMY) += dummy.o
2825 obj-$(CONFIG_IFB) += ifb.o
2826 diff -Naur linux-2.6.25/drivers/net/xen-netfront.c linux-2.6.25-xen/drivers/net/xen-netfront.c
2827 --- linux-2.6.25/drivers/net/xen-netfront.c 2008-04-17 04:49:44.000000000 +0200
2828 +++ linux-2.6.25-xen/drivers/net/xen-netfront.c 2008-05-23 18:27:40.000000000 +0200
2829 @@ -36,8 +36,6 @@
2830 #include <linux/skbuff.h>
2831 #include <linux/ethtool.h>
2832 #include <linux/if_ether.h>
2833 -#include <linux/tcp.h>
2834 -#include <linux/udp.h>
2835 #include <linux/moduleparam.h>
2836 #include <linux/mm.h>
2837 #include <net/ip.h>
2838 @@ -754,45 +752,6 @@
2839 return cons;
2840 }
2841
2842 -static int skb_checksum_setup(struct sk_buff *skb)
2843 -{
2844 - struct iphdr *iph;
2845 - unsigned char *th;
2846 - int err = -EPROTO;
2847 -
2848 - if (skb->protocol != htons(ETH_P_IP))
2849 - goto out;
2850 -
2851 - iph = (void *)skb->data;
2852 - th = skb->data + 4 * iph->ihl;
2853 - if (th >= skb_tail_pointer(skb))
2854 - goto out;
2855 -
2856 - skb->csum_start = th - skb->head;
2857 - switch (iph->protocol) {
2858 - case IPPROTO_TCP:
2859 - skb->csum_offset = offsetof(struct tcphdr, check);
2860 - break;
2861 - case IPPROTO_UDP:
2862 - skb->csum_offset = offsetof(struct udphdr, check);
2863 - break;
2864 - default:
2865 - if (net_ratelimit())
2866 - printk(KERN_ERR "Attempting to checksum a non-"
2867 - "TCP/UDP packet, dropping a protocol"
2868 - " %d packet", iph->protocol);
2869 - goto out;
2870 - }
2871 -
2872 - if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
2873 - goto out;
2874 -
2875 - err = 0;
2876 -
2877 -out:
2878 - return err;
2879 -}
2880 -
2881 static int handle_incoming_queue(struct net_device *dev,
2882 struct sk_buff_head *rxq)
2883 {
2884 @@ -1775,7 +1734,6 @@
2885
2886 static struct xenbus_driver netfront = {
2887 .name = "vif",
2888 - .owner = THIS_MODULE,
2889 .ids = netfront_ids,
2890 .probe = netfront_probe,
2891 .remove = __devexit_p(xennet_remove),
2892 diff -Naur linux-2.6.25/drivers/xen/blkback/blkback.c linux-2.6.25-xen/drivers/xen/blkback/blkback.c
2893 --- linux-2.6.25/drivers/xen/blkback/blkback.c 2008-05-23 18:33:26.000000000 +0200
2894 +++ linux-2.6.25-xen/drivers/xen/blkback/blkback.c 2008-05-23 18:27:40.000000000 +0200
2895 @@ -154,7 +154,7 @@
2896
2897 static void plug_queue(blkif_t *blkif, struct bio *bio)
2898 {
2899 - request_queue_t *q = bdev_get_queue(bio->bi_bdev);
2900 + struct request_queue *q = bdev_get_queue(bio->bi_bdev);
2901
2902 if (q == blkif->plug)
2903 return;
2904 diff -Naur linux-2.6.25/drivers/xen/blkback/common.h linux-2.6.25-xen/drivers/xen/blkback/common.h
2905 --- linux-2.6.25/drivers/xen/blkback/common.h 2008-05-23 18:33:23.000000000 +0200
2906 +++ linux-2.6.25-xen/drivers/xen/blkback/common.h 2008-05-23 18:27:40.000000000 +0200
2907 @@ -79,7 +79,7 @@
2908 wait_queue_head_t wq;
2909 struct task_struct *xenblkd;
2910 unsigned int waiting_reqs;
2911 - request_queue_t *plug;
2912 + struct request_queue *plug;
2913
2914 /* statistics */
2915 unsigned long st_print;
2916 diff -Naur linux-2.6.25/drivers/xen/blkback/interface.c linux-2.6.25-xen/drivers/xen/blkback/interface.c
2917 --- linux-2.6.25/drivers/xen/blkback/interface.c 2008-05-23 18:33:26.000000000 +0200
2918 +++ linux-2.6.25-xen/drivers/xen/blkback/interface.c 2008-05-23 18:27:40.000000000 +0200
2919 @@ -177,5 +177,5 @@
2920 void __init blkif_interface_init(void)
2921 {
2922 blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t),
2923 - 0, 0, NULL, NULL);
2924 + 0, 0, NULL);
2925 }
2926 diff -Naur linux-2.6.25/drivers/xen/blkfront/blkfront.c linux-2.6.25-xen/drivers/xen/blkfront/blkfront.c
2927 --- linux-2.6.25/drivers/xen/blkfront/blkfront.c 2008-05-23 18:33:27.000000000 +0200
2928 +++ linux-2.6.25-xen/drivers/xen/blkfront/blkfront.c 2008-05-23 18:27:40.000000000 +0200
2929 @@ -653,7 +653,7 @@
2930 * do_blkif_request
2931 * read a block; request is in a request queue
2932 */
2933 -void do_blkif_request(request_queue_t *rq)
2934 +void do_blkif_request(struct request_queue *rq)
2935 {
2936 struct blkfront_info *info = NULL;
2937 struct request *req;
2938 diff -Naur linux-2.6.25/drivers/xen/blkfront/block.h linux-2.6.25-xen/drivers/xen/blkfront/block.h
2939 --- linux-2.6.25/drivers/xen/blkfront/block.h 2008-05-23 18:32:32.000000000 +0200
2940 +++ linux-2.6.25-xen/drivers/xen/blkfront/block.h 2008-05-23 18:27:40.000000000 +0200
2941 @@ -105,7 +105,7 @@
2942 blkif_front_ring_t ring;
2943 unsigned int irq;
2944 struct xlbd_major_info *mi;
2945 - request_queue_t *rq;
2946 + struct request_queue *rq;
2947 struct work_struct work;
2948 struct gnttab_free_callback callback;
2949 struct blk_shadow shadow[BLK_RING_SIZE];
2950 @@ -129,7 +129,7 @@
2951 extern int blkif_getgeo(struct block_device *, struct hd_geometry *);
2952 extern int blkif_check(dev_t dev);
2953 extern int blkif_revalidate(dev_t dev);
2954 -extern void do_blkif_request (request_queue_t *rq);
2955 +extern void do_blkif_request (struct request_queue *rq);
2956
2957 /* Virtual block-device subsystem. */
2958 /* Note that xlvbd_add doesn't call add_disk for you: you're expected
2959 diff -Naur linux-2.6.25/drivers/xen/blkfront/vbd.c linux-2.6.25-xen/drivers/xen/blkfront/vbd.c
2960 --- linux-2.6.25/drivers/xen/blkfront/vbd.c 2008-05-23 18:32:32.000000000 +0200
2961 +++ linux-2.6.25-xen/drivers/xen/blkfront/vbd.c 2008-05-23 18:27:40.000000000 +0200
2962 @@ -186,7 +186,7 @@
2963 static int
2964 xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
2965 {
2966 - request_queue_t *rq;
2967 + struct request_queue *rq;
2968
2969 rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
2970 if (rq == NULL)
2971 diff -Naur linux-2.6.25/drivers/xen/blktap/common.h linux-2.6.25-xen/drivers/xen/blktap/common.h
2972 --- linux-2.6.25/drivers/xen/blktap/common.h 2008-05-23 18:33:23.000000000 +0200
2973 +++ linux-2.6.25-xen/drivers/xen/blktap/common.h 2008-05-23 18:27:40.000000000 +0200
2974 @@ -68,7 +68,7 @@
2975 wait_queue_head_t wq;
2976 struct task_struct *xenblkd;
2977 unsigned int waiting_reqs;
2978 - request_queue_t *plug;
2979 + struct request_queue *plug;
2980
2981 /* statistics */
2982 unsigned long st_print;
2983 diff -Naur linux-2.6.25/drivers/xen/blktap/interface.c linux-2.6.25-xen/drivers/xen/blktap/interface.c
2984 --- linux-2.6.25/drivers/xen/blktap/interface.c 2008-05-23 18:33:26.000000000 +0200
2985 +++ linux-2.6.25-xen/drivers/xen/blktap/interface.c 2008-05-23 18:27:40.000000000 +0200
2986 @@ -170,5 +170,5 @@
2987 void __init tap_blkif_interface_init(void)
2988 {
2989 blkif_cachep = kmem_cache_create("blktapif_cache", sizeof(blkif_t),
2990 - 0, 0, NULL, NULL);
2991 + 0, 0, NULL);
2992 }
2993 diff -Naur linux-2.6.25/drivers/xen/core/features.c linux-2.6.25-xen/drivers/xen/core/features.c
2994 --- linux-2.6.25/drivers/xen/core/features.c 2008-05-23 18:32:32.000000000 +0200
2995 +++ linux-2.6.25-xen/drivers/xen/core/features.c 2008-05-23 18:27:40.000000000 +0200
2996 @@ -19,7 +19,7 @@
2997 /* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */
2998 EXPORT_SYMBOL(xen_features);
2999
3000 -void setup_xen_features(void)
3001 +void xen_setup_features(void)
3002 {
3003 xen_feature_info_t fi;
3004 int i, j;
3005 diff -Naur linux-2.6.25/drivers/xen/core/reboot.c linux-2.6.25-xen/drivers/xen/core/reboot.c
3006 --- linux-2.6.25/drivers/xen/core/reboot.c 2008-05-23 18:33:26.000000000 +0200
3007 +++ linux-2.6.25-xen/drivers/xen/core/reboot.c 2008-05-23 18:27:40.000000000 +0200
3008 @@ -4,6 +4,7 @@
3009 #include <linux/unistd.h>
3010 #include <linux/module.h>
3011 #include <linux/reboot.h>
3012 +#include <linux/sched.h>
3013 #include <linux/sysrq.h>
3014 #include <asm/hypervisor.h>
3015 #include <xen/xenbus.h>
3016 diff -Naur linux-2.6.25/drivers/xen/Makefile linux-2.6.25-xen/drivers/xen/Makefile
3017 --- linux-2.6.25/drivers/xen/Makefile 2008-05-23 18:33:03.000000000 +0200
3018 +++ linux-2.6.25-xen/drivers/xen/Makefile 2008-05-23 18:27:40.000000000 +0200
3019 @@ -1,10 +1,12 @@
3020 -obj-y += core/
3021 -obj-y += console/
3022 -obj-y += evtchn/
3023 -obj-y += xenbus/
3024 -obj-y += char/
3025 +obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o
3026
3027 -obj-y += util.o
3028 +obj-$(CONFIG_XEN) += core/
3029 +obj-$(CONFIG_XEN) += console/
3030 +obj-$(CONFIG_XEN) += evtchn/
3031 +obj-y += xenbus/
3032 +obj-$(CONFIG_XEN) += char/
3033 +
3034 +obj-$(CONFIG_XEN) += util.o
3035 obj-$(CONFIG_XEN_BALLOON) += balloon/
3036 obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
3037 obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/
3038 diff -Naur linux-2.6.25/drivers/xen/tpmback/interface.c linux-2.6.25-xen/drivers/xen/tpmback/interface.c
3039 --- linux-2.6.25/drivers/xen/tpmback/interface.c 2008-05-23 18:33:27.000000000 +0200
3040 +++ linux-2.6.25-xen/drivers/xen/tpmback/interface.c 2008-05-23 18:27:40.000000000 +0200
3041 @@ -12,6 +12,7 @@
3042 */
3043
3044 #include "common.h"
3045 +#include <linux/err.h>
3046 #include <xen/balloon.h>
3047 #include <xen/gnttab.h>
3048
3049 @@ -159,7 +160,7 @@
3050 int __init tpmif_interface_init(void)
3051 {
3052 tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof (tpmif_t),
3053 - 0, 0, NULL, NULL);
3054 + 0, 0, NULL);
3055 return tpmif_cachep ? 0 : -ENOMEM;
3056 }
3057
3058 diff -Naur linux-2.6.25/drivers/xen/util.c linux-2.6.25-xen/drivers/xen/util.c
3059 --- linux-2.6.25/drivers/xen/util.c 2008-05-23 18:32:32.000000000 +0200
3060 +++ linux-2.6.25-xen/drivers/xen/util.c 2008-05-23 18:27:40.000000000 +0200
3061 @@ -1,8 +1,5 @@
3062 -#include <linux/mm.h>
3063 +#include <linux/err.h>
3064 #include <linux/module.h>
3065 -#include <linux/slab.h>
3066 -#include <linux/vmalloc.h>
3067 -#include <asm/uaccess.h>
3068 #include <xen/driver_util.h>
3069
3070 struct class *get_xen_class(void)
3071 @@ -21,45 +18,3 @@
3072 return xen_class;
3073 }
3074 EXPORT_SYMBOL_GPL(get_xen_class);
3075 -
3076 -#ifdef CONFIG_X86
3077 -static int f(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
3078 -{
3079 - /* apply_to_page_range() does all the hard work. */
3080 - return 0;
3081 -}
3082 -
3083 -struct vm_struct *alloc_vm_area(unsigned long size)
3084 -{
3085 - struct vm_struct *area;
3086 -
3087 - area = get_vm_area(size, VM_IOREMAP);
3088 - if (area == NULL)
3089 - return NULL;
3090 -
3091 - /*
3092 - * This ensures that page tables are constructed for this region
3093 - * of kernel virtual address space and mapped into init_mm.
3094 - */
3095 - if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
3096 - area->size, f, NULL)) {
3097 - free_vm_area(area);
3098 - return NULL;
3099 - }
3100 -
3101 - /* Map page directories into every address space. */
3102 - vmalloc_sync_all();
3103 -
3104 - return area;
3105 -}
3106 -EXPORT_SYMBOL_GPL(alloc_vm_area);
3107 -
3108 -void free_vm_area(struct vm_struct *area)
3109 -{
3110 - struct vm_struct *ret;
3111 - ret = remove_vm_area(area->addr);
3112 - BUG_ON(ret != area);
3113 - kfree(area);
3114 -}
3115 -EXPORT_SYMBOL_GPL(free_vm_area);
3116 -#endif /* CONFIG_X86 */
3117 diff -Naur linux-2.6.25/drivers/xen/xenbus/xenbus_client.c linux-2.6.25-xen/drivers/xen/xenbus/xenbus_client.c
3118 --- linux-2.6.25/drivers/xen/xenbus/xenbus_client.c 2008-05-23 18:33:03.000000000 +0200
3119 +++ linux-2.6.25-xen/drivers/xen/xenbus/xenbus_client.c 2008-05-23 18:27:40.000000000 +0200
3120 @@ -30,19 +30,26 @@
3121 * IN THE SOFTWARE.
3122 */
3123
3124 +#if defined(CONFIG_XEN) || defined(MODULE)
3125 #include <linux/slab.h>
3126 #include <xen/evtchn.h>
3127 #include <xen/gnttab.h>
3128 -#include <xen/xenbus.h>
3129 #include <xen/driver_util.h>
3130 +#else
3131 +#include <linux/types.h>
3132 +#include <linux/vmalloc.h>
3133 +#include <asm/xen/hypervisor.h>
3134 +#include <xen/interface/xen.h>
3135 +#include <xen/interface/event_channel.h>
3136 +#include <xen/events.h>
3137 +#include <xen/grant_table.h>
3138 +#endif
3139 +#include <xen/xenbus.h>
3140
3141 #ifdef HAVE_XEN_PLATFORM_COMPAT_H
3142 #include <xen/platform-compat.h>
3143 #endif
3144
3145 -#define DPRINTK(fmt, args...) \
3146 - pr_debug("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
3147 -
3148 const char *xenbus_strstate(enum xenbus_state state)
3149 {
3150 static const char *const name[] = {
3151 @@ -58,6 +65,20 @@
3152 }
3153 EXPORT_SYMBOL_GPL(xenbus_strstate);
3154
3155 +/**
3156 + * xenbus_watch_path - register a watch
3157 + * @dev: xenbus device
3158 + * @path: path to watch
3159 + * @watch: watch to register
3160 + * @callback: callback to register
3161 + *
3162 + * Register a @watch on the given path, using the given xenbus_watch structure
3163 + * for storage, and the given @callback function as the callback. Return 0 on
3164 + * success, or -errno on error. On success, the given @path will be saved as
3165 + * @watch->node, and remains the caller's to free. On error, @watch->node will
3166 + * be NULL, the device will switch to %XenbusStateClosing, and the error will
3167 + * be saved in the store.
3168 + */
3169 int xenbus_watch_path(struct xenbus_device *dev, const char *path,
3170 struct xenbus_watch *watch,
3171 void (*callback)(struct xenbus_watch *,
3172 @@ -81,6 +102,7 @@
3173 EXPORT_SYMBOL_GPL(xenbus_watch_path);
3174
3175
3176 +#if defined(CONFIG_XEN) || defined(MODULE)
3177 int xenbus_watch_path2(struct xenbus_device *dev, const char *path,
3178 const char *path2, struct xenbus_watch *watch,
3179 void (*callback)(struct xenbus_watch *,
3180 @@ -99,8 +121,60 @@
3181 return err;
3182 }
3183 EXPORT_SYMBOL_GPL(xenbus_watch_path2);
3184 +#else
3185 +/**
3186 + * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path
3187 + * @dev: xenbus device
3188 + * @watch: watch to register
3189 + * @callback: callback to register
3190 + * @pathfmt: format of path to watch
3191 + *
3192 + * Register a watch on the given @path, using the given xenbus_watch
3193 + * structure for storage, and the given @callback function as the callback.
3194 + * Return 0 on success, or -errno on error. On success, the watched path
3195 + * (@path/@path2) will be saved as @watch->node, and becomes the caller's to
3196 + * kfree(). On error, watch->node will be NULL, so the caller has nothing to
3197 + * free, the device will switch to %XenbusStateClosing, and the error will be
3198 + * saved in the store.
3199 + */
3200 +int xenbus_watch_pathfmt(struct xenbus_device *dev,
3201 + struct xenbus_watch *watch,
3202 + void (*callback)(struct xenbus_watch *,
3203 + const char **, unsigned int),
3204 + const char *pathfmt, ...)
3205 +{
3206 + int err;
3207 + va_list ap;
3208 + char *path;
3209
3210 + va_start(ap, pathfmt);
3211 + path = kvasprintf(GFP_KERNEL, pathfmt, ap);
3212 + va_end(ap);
3213
3214 + if (!path) {
3215 + xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
3216 + return -ENOMEM;
3217 + }
3218 + err = xenbus_watch_path(dev, path, watch, callback);
3219 +
3220 + if (err)
3221 + kfree(path);
3222 + return err;
3223 +}
3224 +EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
3225 +#endif
3226 +
3227 +
3228 +/**
3229 + * xenbus_switch_state
3230 + * @dev: xenbus device
3231 + * @xbt: transaction handle
3232 + * @state: new state
3233 + *
3234 + * Advertise in the store a change of the given driver to the given new_state.
3235 + * Return 0 on success, or -errno on error. On error, the device will switch
3236 + * to XenbusStateClosing, and the error will be saved in the store.
3237 + */
3238 int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
3239 {
3240 /* We check whether the state is currently set to the given value, and
3241 @@ -159,8 +233,8 @@
3242 }
3243
3244
3245 -void _dev_error(struct xenbus_device *dev, int err, const char *fmt,
3246 - va_list ap)
3247 +static void _dev_error(struct xenbus_device *dev, int err,
3248 + const char *fmt, va_list ap)
3249 {
3250 int ret;
3251 unsigned int len;
3252 @@ -181,14 +255,16 @@
3253 path_buffer = error_path(dev);
3254
3255 if (path_buffer == NULL) {
3256 - printk("xenbus: failed to write error node for %s (%s)\n",
3257 - dev->nodename, printf_buffer);
3258 + dev_err(&dev->dev,
3259 + "xenbus: failed to write error node for %s (%s)\n",
3260 + dev->nodename, printf_buffer);
3261 goto fail;
3262 }
3263
3264 if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) {
3265 - printk("xenbus: failed to write error node for %s (%s)\n",
3266 - dev->nodename, printf_buffer);
3267 + dev_err(&dev->dev,
3268 + "xenbus: failed to write error node for %s (%s)\n",
3269 + dev->nodename, printf_buffer);
3270 goto fail;
3271 }
3272
3273 @@ -200,6 +276,15 @@
3274 }
3275
3276
3277 +/**
3278 + * xenbus_dev_error
3279 + * @dev: xenbus device
3280 + * @err: error to report
3281 + * @fmt: error message format
3282 + *
3283 + * Report the given negative errno into the store, along with the given
3284 + * formatted message.
3285 + */
3286 void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt,
3287 ...)
3288 {
3289 @@ -212,6 +297,16 @@
3290 EXPORT_SYMBOL_GPL(xenbus_dev_error);
3291
3292
3293 +/**
3294 + * xenbus_dev_fatal
3295 + * @dev: xenbus device
3296 + * @err: error to report
3297 + * @fmt: error message format
3298 + *
3299 + * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
3300 + * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly
3301 + * closedown of this driver and its peer.
3302 + */
3303 void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt,
3304 ...)
3305 {
3306 @@ -226,6 +321,15 @@
3307 EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
3308
3309
3310 +/**
3311 + * xenbus_grant_ring
3312 + * @dev: xenbus device
3313 + * @ring_mfn: mfn of ring to grant
3314 + *
3315 + * Grant access to the given @ring_mfn to the peer of the given device. Return
3316 + * 0 on success, or -errno on error. On error, the device will switch to
3317 + * XenbusStateClosing, and the error will be saved in the store.
3318 + */
3319 int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
3320 {
3321 int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
3322 @@ -236,6 +340,12 @@
3323 EXPORT_SYMBOL_GPL(xenbus_grant_ring);
3324
3325
3326 +/**
3327 + * Allocate an event channel for the given xenbus_device, assigning the newly
3328 + * created local port to *port. Return 0 on success, or -errno on error. On
3329 + * error, the device will switch to XenbusStateClosing, and the error will be
3330 + * saved in the store.
3331 + */
3332 int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port)
3333 {
3334 struct evtchn_alloc_unbound alloc_unbound;
3335 @@ -256,6 +366,38 @@
3336 EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);
3337
3338
3339 +#if 0 /* !defined(CONFIG_XEN) && !defined(MODULE) */
3340 +/**
3341 + * Bind to an existing interdomain event channel in another domain. Returns 0
3342 + * on success and stores the local port in *port. On error, returns -errno,
3343 + * switches the device to XenbusStateClosing, and saves the error in XenStore.
3344 + */
3345 +int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port)
3346 +{
3347 + struct evtchn_bind_interdomain bind_interdomain;
3348 + int err;
3349 +
3350 + bind_interdomain.remote_dom = dev->otherend_id;
3351 + bind_interdomain.remote_port = remote_port;
3352 +
3353 + err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
3354 + &bind_interdomain);
3355 + if (err)
3356 + xenbus_dev_fatal(dev, err,
3357 + "binding to event channel %d from domain %d",
3358 + remote_port, dev->otherend_id);
3359 + else
3360 + *port = bind_interdomain.local_port;
3361 +
3362 + return err;
3363 +}
3364 +EXPORT_SYMBOL_GPL(xenbus_bind_evtchn);
3365 +#endif
3366 +
3367 +
3368 +/**
3369 + * Free an existing event channel. Returns 0 on success or -errno on error.
3370 + */
3371 int xenbus_free_evtchn(struct xenbus_device *dev, int port)
3372 {
3373 struct evtchn_close close;
3374 @@ -272,6 +414,191 @@
3375 EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
3376
3377
3378 +#if 0 /* !defined(CONFIG_XEN) && !defined(MODULE) */
3379 +/**
3380 + * xenbus_map_ring_valloc
3381 + * @dev: xenbus device
3382 + * @gnt_ref: grant reference
3383 + * @vaddr: pointer to address to be filled out by mapping
3384 + *
3385 + * Based on Rusty Russell's skeleton driver's map_page.
3386 + * Map a page of memory into this domain from another domain's grant table.
3387 + * xenbus_map_ring_valloc allocates a page of virtual address space, maps the
3388 + * page to that address, and sets *vaddr to that address.
3389 + * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
3390 + * or -ENOMEM on error. If an error is returned, device will switch to
3391 + * XenbusStateClosing and the error message will be saved in XenStore.
3392 + */
3393 +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
3394 +{
3395 + struct gnttab_map_grant_ref op = {
3396 + .flags = GNTMAP_host_map,
3397 + .ref = gnt_ref,
3398 + .dom = dev->otherend_id,
3399 + };
3400 + struct vm_struct *area;
3401 +
3402 + *vaddr = NULL;
3403 +
3404 + area = alloc_vm_area(PAGE_SIZE);
3405 + if (!area)
3406 + return -ENOMEM;
3407 +
3408 + op.host_addr = (unsigned long)area->addr;
3409 +
3410 + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
3411 + BUG();
3412 +
3413 + if (op.status != GNTST_okay) {
3414 + free_vm_area(area);
3415 + xenbus_dev_fatal(dev, op.status,
3416 + "mapping in shared page %d from domain %d",
3417 + gnt_ref, dev->otherend_id);
3418 + return op.status;
3419 + }
3420 +
3421 + /* Stuff the handle in an unused field */
3422 + area->phys_addr = (unsigned long)op.handle;
3423 +
3424 + *vaddr = area->addr;
3425 + return 0;
3426 +}
3427 +EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
3428 +
3429 +
3430 +/**
3431 + * xenbus_map_ring
3432 + * @dev: xenbus device
3433 + * @gnt_ref: grant reference
3434 + * @handle: pointer to grant handle to be filled
3435 + * @vaddr: address to be mapped to
3436 + *
3437 + * Map a page of memory into this domain from another domain's grant table.
3438 + * xenbus_map_ring does not allocate the virtual address space (you must do
3439 + * this yourself!). It only maps in the page to the specified address.
3440 + * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
3441 + * or -ENOMEM on error. If an error is returned, device will switch to
3442 + * XenbusStateClosing and the error message will be saved in XenStore.
3443 + */
3444 +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
3445 + grant_handle_t *handle, void *vaddr)
3446 +{
3447 + struct gnttab_map_grant_ref op = {
3448 + .host_addr = (unsigned long)vaddr,
3449 + .flags = GNTMAP_host_map,
3450 + .ref = gnt_ref,
3451 + .dom = dev->otherend_id,
3452 + };
3453 +
3454 + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
3455 + BUG();
3456 +
3457 + if (op.status != GNTST_okay) {
3458 + xenbus_dev_fatal(dev, op.status,
3459 + "mapping in shared page %d from domain %d",
3460 + gnt_ref, dev->otherend_id);
3461 + } else
3462 + *handle = op.handle;
3463 +
3464 + return op.status;
3465 +}
3466 +EXPORT_SYMBOL_GPL(xenbus_map_ring);
3467 +
3468 +
3469 +/**
3470 + * xenbus_unmap_ring_vfree
3471 + * @dev: xenbus device
3472 + * @vaddr: addr to unmap
3473 + *
3474 + * Based on Rusty Russell's skeleton driver's unmap_page.
3475 + * Unmap a page of memory in this domain that was imported from another domain.
3476 + * Use xenbus_unmap_ring_vfree if you mapped in your memory with
3477 + * xenbus_map_ring_valloc (it will free the virtual address space).
3478 + * Returns 0 on success and returns GNTST_* on error
3479 + * (see xen/include/interface/grant_table.h).
3480 + */
3481 +int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
3482 +{
3483 + struct vm_struct *area;
3484 + struct gnttab_unmap_grant_ref op = {
3485 + .host_addr = (unsigned long)vaddr,
3486 + };
3487 +
3488 + /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr)
3489 + * method so that we don't have to muck with vmalloc internals here.
3490 + * We could force the user to hang on to their struct vm_struct from
3491 + * xenbus_map_ring_valloc, but these 6 lines considerably simplify
3492 + * this API.
3493 + */
3494 + read_lock(&vmlist_lock);
3495 + for (area = vmlist; area != NULL; area = area->next) {
3496 + if (area->addr == vaddr)
3497 + break;
3498 + }
3499 + read_unlock(&vmlist_lock);
3500 +
3501 + if (!area) {
3502 + xenbus_dev_error(dev, -ENOENT,
3503 + "can't find mapped virtual address %p", vaddr);
3504 + return GNTST_bad_virt_addr;
3505 + }
3506 +
3507 + op.handle = (grant_handle_t)area->phys_addr;
3508 +
3509 + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
3510 + BUG();
3511 +
3512 + if (op.status == GNTST_okay)
3513 + free_vm_area(area);
3514 + else
3515 + xenbus_dev_error(dev, op.status,
3516 + "unmapping page at handle %d error %d",
3517 + (int16_t)area->phys_addr, op.status);
3518 +
3519 + return op.status;
3520 +}
3521 +EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
3522 +
3523 +
3524 +/**
3525 + * xenbus_unmap_ring
3526 + * @dev: xenbus device
3527 + * @handle: grant handle
3528 + * @vaddr: addr to unmap
3529 + *
3530 + * Unmap a page of memory in this domain that was imported from another domain.
3531 + * Returns 0 on success and returns GNTST_* on error
3532 + * (see xen/include/interface/grant_table.h).
3533 + */
3534 +int xenbus_unmap_ring(struct xenbus_device *dev,
3535 + grant_handle_t handle, void *vaddr)
3536 +{
3537 + struct gnttab_unmap_grant_ref op = {
3538 + .host_addr = (unsigned long)vaddr,
3539 + .handle = handle,
3540 + };
3541 +
3542 + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
3543 + BUG();
3544 +
3545 + if (op.status != GNTST_okay)
3546 + xenbus_dev_error(dev, op.status,
3547 + "unmapping page at handle %d error %d",
3548 + handle, op.status);
3549 +
3550 + return op.status;
3551 +}
3552 +EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
3553 +#endif
3554 +
3555 +
3556 +/**
3557 + * xenbus_read_driver_state
3558 + * @path: path for driver
3559 + *
3560 + * Return the state of the driver rooted at the given store path, or
3561 + * XenbusStateUnknown if no state can be read.
3562 + */
3563 enum xenbus_state xenbus_read_driver_state(const char *path)
3564 {
3565 enum xenbus_state result;
3566 diff -Naur linux-2.6.25/drivers/xen/xenbus/xenbus_comms.c linux-2.6.25-xen/drivers/xen/xenbus/xenbus_comms.c
3567 --- linux-2.6.25/drivers/xen/xenbus/xenbus_comms.c 2008-05-23 18:33:26.000000000 +0200
3568 +++ linux-2.6.25-xen/drivers/xen/xenbus/xenbus_comms.c 2008-05-23 18:27:40.000000000 +0200
3569 @@ -34,12 +34,15 @@
3570 #include <linux/interrupt.h>
3571 #include <linux/sched.h>
3572 #include <linux/err.h>
3573 -#include <linux/ptrace.h>
3574 -#include <linux/workqueue.h>
3575 -#include <xen/evtchn.h>
3576 #include <xen/xenbus.h>
3577 -
3578 +#if defined(CONFIG_XEN) || defined(MODULE)
3579 +#include <xen/evtchn.h>
3580 #include <asm/hypervisor.h>
3581 +#else
3582 +#include <asm/xen/hypervisor.h>
3583 +#include <xen/events.h>
3584 +#include <xen/page.h>
3585 +#endif
3586
3587 #include "xenbus_comms.h"
3588
3589 @@ -91,6 +94,13 @@
3590 return buf + MASK_XENSTORE_IDX(cons);
3591 }
3592
3593 +/**
3594 + * xb_write - low level write
3595 + * @data: buffer to send
3596 + * @len: length of buffer
3597 + *
3598 + * Returns 0 on success, error otherwise.
3599 + */
3600 int xb_write(const void *data, unsigned len)
3601 {
3602 struct xenstore_domain_interface *intf = xen_store_interface;
3603 @@ -199,7 +209,9 @@
3604 return 0;
3605 }
3606
3607 -/* Set up interrupt handler off store event channel. */
3608 +/**
3609 + * xb_init_comms - Set up interrupt handler off store event channel.
3610 + */
3611 int xb_init_comms(void)
3612 {
3613 struct xenstore_domain_interface *intf = xen_store_interface;
3614 @@ -219,7 +231,11 @@
3615 if (xenbus_irq)
3616 unbind_from_irqhandler(xenbus_irq, &xb_waitq);
3617
3618 +#if defined(CONFIG_XEN) || defined(MODULE)
3619 err = bind_caller_port_to_irqhandler(
3620 +#else
3621 + err = bind_evtchn_to_irqhandler(
3622 +#endif
3623 xen_store_evtchn, wake_waiting,
3624 0, "xenbus", &xb_waitq);
3625 if (err <= 0) {
3626 diff -Naur linux-2.6.25/drivers/xen/xenbus/xenbus_probe_backend.c linux-2.6.25-xen/drivers/xen/xenbus/xenbus_probe_backend.c
3627 --- linux-2.6.25/drivers/xen/xenbus/xenbus_probe_backend.c 2008-05-23 18:33:27.000000000 +0200
3628 +++ linux-2.6.25-xen/drivers/xen/xenbus/xenbus_probe_backend.c 2008-05-23 18:27:40.000000000 +0200
3629 @@ -236,7 +236,7 @@
3630 {
3631 DPRINTK("");
3632
3633 - dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
3634 + xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
3635 }
3636
3637 static struct xenbus_watch be_watch = {
3638 diff -Naur linux-2.6.25/drivers/xen/xenbus/xenbus_probe.c linux-2.6.25-xen/drivers/xen/xenbus/xenbus_probe.c
3639 --- linux-2.6.25/drivers/xen/xenbus/xenbus_probe.c 2008-05-23 18:33:27.000000000 +0200
3640 +++ linux-2.6.25-xen/drivers/xen/xenbus/xenbus_probe.c 2008-05-23 18:27:40.000000000 +0200
3641 @@ -43,12 +43,11 @@
3642 #include <linux/mm.h>
3643 #include <linux/notifier.h>
3644 #include <linux/mutex.h>
3645 -#include <linux/module.h>
3646 +#include <linux/io.h>
3647
3648 -#include <asm/io.h>
3649 #include <asm/page.h>
3650 -#include <asm/maddr.h>
3651 #include <asm/pgtable.h>
3652 +#if defined(CONFIG_XEN) || defined(MODULE)
3653 #include <asm/hypervisor.h>
3654 #include <xen/xenbus.h>
3655 #include <xen/xen_proc.h>
3656 @@ -57,6 +56,12 @@
3657 #ifdef MODULE
3658 #include <xen/hvm.h>
3659 #endif
3660 +#else
3661 +#include <asm/xen/hypervisor.h>
3662 +#include <xen/xenbus.h>
3663 +#include <xen/events.h>
3664 +#include <xen/page.h>
3665 +#endif
3666
3667 #include "xenbus_comms.h"
3668 #include "xenbus_probe.h"
3669 @@ -168,7 +173,7 @@
3670 return read_otherend_details(xendev, "backend-id", "backend");
3671 }
3672
3673 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
3674 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) && (defined(CONFIG_XEN) || defined(MODULE))
3675 static int xenbus_uevent_frontend(struct device *dev, char **envp,
3676 int num_envp, char *buffer, int buffer_size)
3677 {
3678 @@ -207,12 +212,16 @@
3679 .probe = xenbus_dev_probe,
3680 .remove = xenbus_dev_remove,
3681 .shutdown = xenbus_dev_shutdown,
3682 +#if defined(CONFIG_XEN) || defined(MODULE)
3683 .uevent = xenbus_uevent_frontend,
3684 #endif
3685 +#endif
3686 },
3687 +#if defined(CONFIG_XEN) || defined(MODULE)
3688 .dev = {
3689 .bus_id = "xen",
3690 },
3691 +#endif
3692 };
3693
3694 static void otherend_changed(struct xenbus_watch *watch,
3695 @@ -228,14 +237,15 @@
3696 if (!dev->otherend ||
3697 strncmp(dev->otherend, vec[XS_WATCH_PATH],
3698 strlen(dev->otherend))) {
3699 - DPRINTK("Ignoring watch at %s", vec[XS_WATCH_PATH]);
3700 + dev_dbg(&dev->dev, "Ignoring watch at %s", vec[XS_WATCH_PATH]);
3701 return;
3702 }
3703
3704 state = xenbus_read_driver_state(dev->otherend);
3705
3706 - DPRINTK("state is %d (%s), %s, %s", state, xenbus_strstate(state),
3707 - dev->otherend_watch.node, vec[XS_WATCH_PATH]);
3708 + dev_dbg(&dev->dev, "state is %d (%s), %s, %s",
3709 + state, xenbus_strstate(state), dev->otherend_watch.node,
3710 + vec[XS_WATCH_PATH]);
3711
3712 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
3713 /*
3714 @@ -271,8 +281,13 @@
3715
3716 static int watch_otherend(struct xenbus_device *dev)
3717 {
3718 +#if defined(CONFIG_XEN) || defined(MODULE)
3719 return xenbus_watch_path2(dev, dev->otherend, "state",
3720 &dev->otherend_watch, otherend_changed);
3721 +#else
3722 + return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed,
3723 + "%s/%s", dev->otherend, "state");
3724 +#endif
3725 }
3726
3727
3728 @@ -298,9 +313,9 @@
3729
3730 err = talk_to_otherend(dev);
3731 if (err) {
3732 - printk(KERN_WARNING
3733 - "xenbus_probe: talk_to_otherend on %s failed.\n",
3734 - dev->nodename);
3735 + dev_warn(&dev->dev,
3736 + "xenbus_probe: talk_to_otherend on %s failed.\n",
3737 + dev->nodename);
3738 return err;
3739 }
3740
3741 @@ -310,9 +325,9 @@
3742
3743 err = watch_otherend(dev);
3744 if (err) {
3745 - printk(KERN_WARNING
3746 - "xenbus_probe: watch_otherend on %s failed.\n",
3747 - dev->nodename);
3748 + dev_warn(&dev->dev,
3749 + "xenbus_probe: watch_otherend on %s failed.\n",
3750 + dev->nodename);
3751 return err;
3752 }
3753
3754 @@ -349,14 +364,15 @@
3755
3756 get_device(&dev->dev);
3757 if (dev->state != XenbusStateConnected) {
3758 - printk("%s: %s: %s != Connected, skipping\n", __FUNCTION__,
3759 - dev->nodename, xenbus_strstate(dev->state));
3760 + dev_info(&dev->dev, "%s: %s: %s != Connected, skipping\n", __FUNCTION__,
3761 + dev->nodename, xenbus_strstate(dev->state));
3762 goto out;
3763 }
3764 xenbus_switch_state(dev, XenbusStateClosing);
3765 timeout = wait_for_completion_timeout(&dev->down, timeout);
3766 if (!timeout)
3767 - printk("%s: %s timeout closing device\n", __FUNCTION__, dev->nodename);
3768 + dev_info(&dev->dev, "%s: %s timeout closing device\n",
3769 + __FUNCTION__, dev->nodename);
3770 out:
3771 put_device(&dev->dev);
3772 }
3773 @@ -544,7 +560,9 @@
3774 xendev->devicetype = tmpstring;
3775 init_completion(&xendev->down);
3776
3777 +#if defined(CONFIG_XEN) || defined(MODULE)
3778 xendev->dev.parent = &bus->dev;
3779 +#endif
3780 xendev->dev.bus = &bus->bus;
3781 xendev->dev.release = xenbus_dev_release;
3782
3783 @@ -559,15 +577,16 @@
3784
3785 err = device_create_file(&xendev->dev, &dev_attr_nodename);
3786 if (err)
3787 - goto unregister;
3788 + goto fail_unregister;
3789 +
3790 err = device_create_file(&xendev->dev, &dev_attr_devtype);
3791 if (err)
3792 - goto unregister;
3793 + goto fail_remove_file;
3794
3795 return 0;
3796 -unregister:
3797 +fail_remove_file:
3798 device_remove_file(&xendev->dev, &dev_attr_nodename);
3799 - device_remove_file(&xendev->dev, &dev_attr_devtype);
3800 +fail_unregister:
3801 device_unregister(&xendev->dev);
3802 fail:
3803 kfree(xendev);
3804 @@ -580,7 +599,8 @@
3805 char *nodename;
3806 int err;
3807
3808 - nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", xenbus_frontend.root, type, name);
3809 + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s",
3810 + xenbus_frontend.root, type, name);
3811 if (!nodename)
3812 return -ENOMEM;
3813
3814 @@ -656,7 +676,7 @@
3815 return (len == 0) ? i : -ERANGE;
3816 }
3817
3818 -void dev_changed(const char *node, struct xen_bus_type *bus)
3819 +void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
3820 {
3821 int exists, rootlen;
3822 struct xenbus_device *dev;
3823 @@ -664,7 +684,7 @@
3824 const char *p, *root;
3825
3826 if (bus->error || char_count(node, '/') < 2)
3827 - return;
3828 + return;
3829
3830 exists = xenbus_exists(XBT_NIL, node, "");
3831 if (!exists) {
3832 @@ -698,7 +718,7 @@
3833 {
3834 DPRINTK("");
3835
3836 - dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
3837 + xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
3838 }
3839
3840 /* We watch for devices appearing and vanishing. */
3841 @@ -774,9 +794,9 @@
3842
3843 if (drv->resume) {
3844 err = drv->resume(xdev);
3845 - if (err) {
3846 + if (err) {
3847 printk(KERN_WARNING
3848 - "xenbus: resume %s failed: %i\n",
3849 + "xenbus: resume %s failed: %i\n",
3850 dev->bus_id, err);
3851 return err;
3852 }
3853 @@ -901,10 +921,16 @@
3854 }
3855 #endif
3856
3857 -static int xenbus_probe_init(void)
3858 +#ifndef MODULE
3859 +static int __init xenbus_probe_init(void)
3860 +#else
3861 +static int __devinit xenbus_probe_init(void)
3862 +#endif
3863 {
3864 int err = 0;
3865 +#if defined(CONFIG_XEN) || defined(MODULE)
3866 unsigned long page = 0;
3867 +#endif
3868
3869 DPRINTK("");
3870
3871 @@ -923,6 +949,7 @@
3872 * Domain0 doesn't have a store_evtchn or store_mfn yet.
3873 */
3874 if (is_initial_xendomain()) {
3875 +#if defined(CONFIG_XEN) || defined(MODULE)
3876 struct evtchn_alloc_unbound alloc_unbound;
3877
3878 /* Allocate page. */
3879 @@ -960,10 +987,13 @@
3880 if (xsd_port_intf)
3881 xsd_port_intf->read_proc = xsd_port_read;
3882 #endif
3883 +#else
3884 + /* dom0 not yet supported */
3885 +#endif
3886 xen_store_interface = mfn_to_virt(xen_store_mfn);
3887 } else {
3888 xenstored_ready = 1;
3889 -#ifdef CONFIG_XEN
3890 +#ifndef MODULE
3891 xen_store_evtchn = xen_start_info->store_evtchn;
3892 xen_store_mfn = xen_start_info->store_mfn;
3893 xen_store_interface = mfn_to_virt(xen_store_mfn);
3894 @@ -976,7 +1006,9 @@
3895 }
3896
3897
3898 +#if defined(CONFIG_XEN) || defined(MODULE)
3899 xenbus_dev_init();
3900 +#endif
3901
3902 /* Initialize the interface to xenstore. */
3903 err = xs_init();
3904 @@ -986,6 +1018,7 @@
3905 goto err;
3906 }
3907
3908 +#if defined(CONFIG_XEN) || defined(MODULE)
3909 /* Register ourselves with the kernel device subsystem */
3910 if (!xenbus_frontend.error) {
3911 xenbus_frontend.error = device_register(&xenbus_frontend.dev);
3912 @@ -996,6 +1029,7 @@
3913 xenbus_frontend.error);
3914 }
3915 }
3916 +#endif
3917 xenbus_backend_device_register();
3918
3919 if (!is_initial_xendomain())
3920 @@ -1004,8 +1038,10 @@
3921 return 0;
3922
3923 err:
3924 +#if defined(CONFIG_XEN) || defined(MODULE)
3925 if (page)
3926 free_page(page);
3927 +#endif
3928
3929 /*
3930 * Do not unregister the xenbus front/backend buses here. The buses
3931 @@ -1016,11 +1052,15 @@
3932 return err;
3933 }
3934
3935 -#ifdef CONFIG_XEN
3936 +#ifndef MODULE
3937 postcore_initcall(xenbus_probe_init);
3938 +#ifdef CONFIG_XEN
3939 MODULE_LICENSE("Dual BSD/GPL");
3940 #else
3941 -int xenbus_init(void)
3942 +MODULE_LICENSE("GPL");
3943 +#endif
3944 +#else
3945 +int __devinit xenbus_init(void)
3946 {
3947 return xenbus_probe_init();
3948 }
3949 diff -Naur linux-2.6.25/drivers/xen/xenbus/xenbus_probe.h linux-2.6.25-xen/drivers/xen/xenbus/xenbus_probe.h
3950 --- linux-2.6.25/drivers/xen/xenbus/xenbus_probe.h 2008-05-23 18:33:27.000000000 +0200
3951 +++ linux-2.6.25-xen/drivers/xen/xenbus/xenbus_probe.h 2008-05-23 18:27:40.000000000 +0200
3952 @@ -56,7 +56,9 @@
3953 int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
3954 int (*probe)(const char *type, const char *dir);
3955 struct bus_type bus;
3956 +#if defined(CONFIG_XEN) || defined(MODULE)
3957 struct device dev;
3958 +#endif
3959 };
3960
3961 extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
3962 @@ -71,7 +73,7 @@
3963 const char *nodename);
3964 extern int xenbus_probe_devices(struct xen_bus_type *bus);
3965
3966 -extern void dev_changed(const char *node, struct xen_bus_type *bus);
3967 +extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus);
3968
3969 #endif
3970
3971 diff -Naur linux-2.6.25/drivers/xen/xenbus/xenbus_xs.c linux-2.6.25-xen/drivers/xen/xenbus/xenbus_xs.c
3972 --- linux-2.6.25/drivers/xen/xenbus/xenbus_xs.c 2008-05-23 18:33:03.000000000 +0200
3973 +++ linux-2.6.25-xen/drivers/xen/xenbus/xenbus_xs.c 2008-05-23 18:27:40.000000000 +0200
3974 @@ -221,7 +221,7 @@
3975 }
3976
3977 for (i = 0; i < num_vecs; i++) {
3978 - err = xb_write(iovec[i].iov_base, iovec[i].iov_len);;
3979 + err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
3980 if (err) {
3981 mutex_unlock(&xs_state.request_mutex);
3982 return ERR_PTR(err);
3983 @@ -629,7 +629,9 @@
3984 char token[sizeof(watch) * 2 + 1];
3985 int err;
3986
3987 +#if defined(CONFIG_XEN) || defined(MODULE)
3988 BUG_ON(watch->flags & XBWF_new_thread);
3989 +#endif
3990
3991 sprintf(token, "%lX", (long)watch);
3992
3993 @@ -648,6 +650,11 @@
3994
3995 up_read(&xs_state.watch_mutex);
3996
3997 + /* Make sure there are no callbacks running currently (unless
3998 + its us) */
3999 + if (current->pid != xenwatch_pid)
4000 + mutex_lock(&xenwatch_mutex);
4001 +
4002 /* Cancel pending watch events. */
4003 spin_lock(&watch_events_lock);
4004 list_for_each_entry_safe(msg, tmp, &watch_events, list) {
4005 @@ -659,11 +666,8 @@
4006 }
4007 spin_unlock(&watch_events_lock);
4008
4009 - /* Flush any currently-executing callback, unless we are it. :-) */
4010 - if (current->pid != xenwatch_pid) {
4011 - mutex_lock(&xenwatch_mutex);
4012 + if (current->pid != xenwatch_pid)
4013 mutex_unlock(&xenwatch_mutex);
4014 - }
4015 }
4016 EXPORT_SYMBOL_GPL(unregister_xenbus_watch);
4017
4018 @@ -701,6 +705,7 @@
4019 up_write(&xs_state.transaction_mutex);
4020 }
4021
4022 +#if defined(CONFIG_XEN) || defined(MODULE)
4023 static int xenwatch_handle_callback(void *data)
4024 {
4025 struct xs_stored_msg *msg = data;
4026 @@ -718,6 +723,7 @@
4027
4028 return 0;
4029 }
4030 +#endif
4031
4032 static int xenwatch_thread(void *unused)
4033 {
4034 @@ -747,6 +753,7 @@
4035
4036 msg = list_entry(ent, struct xs_stored_msg, list);
4037
4038 +#if defined(CONFIG_XEN) || defined(MODULE)
4039 /*
4040 * Unlock the mutex before running an XBWF_new_thread
4041 * handler. kthread_run can block which can deadlock
4042 @@ -763,6 +770,15 @@
4043 xenwatch_handle_callback(msg);
4044 mutex_unlock(&xenwatch_mutex);
4045 }
4046 +#else
4047 + msg->u.watch.handle->callback(
4048 + msg->u.watch.handle,
4049 + (const char **)msg->u.watch.vec,
4050 + msg->u.watch.vec_size);
4051 + mutex_unlock(&xenwatch_mutex);
4052 + kfree(msg->u.watch.vec);
4053 + kfree(msg);
4054 +#endif
4055 }
4056
4057 return 0;
4058 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/e820_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/e820_64.h
4059 --- linux-2.6.25/include/asm-x86/mach-xen/asm/e820_64.h 2008-05-23 18:33:27.000000000 +0200
4060 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/e820_64.h 2008-05-23 18:27:40.000000000 +0200
4061 @@ -11,8 +11,6 @@
4062 #ifndef __E820_HEADER
4063 #define __E820_HEADER
4064
4065 -#include <linux/mmzone.h>
4066 -
4067 #define E820MAP 0x2d0 /* our map */
4068 #define E820MAX 128 /* number of entries in E820MAP */
4069 #define E820NR 0x1e8 /* # entries in E820MAP */
4070 @@ -30,7 +28,7 @@
4071 } __attribute__((packed));
4072
4073 struct e820map {
4074 - int nr_map;
4075 + u32 nr_map;
4076 struct e820entry map[E820MAX];
4077 };
4078
4079 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/fixmap_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/fixmap_32.h
4080 --- linux-2.6.25/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-05-23 18:33:27.000000000 +0200
4081 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/fixmap_32.h 2008-05-23 18:27:40.000000000 +0200
4082 @@ -53,6 +53,8 @@
4083 enum fixed_addresses {
4084 FIX_HOLE,
4085 FIX_VDSO,
4086 + FIX_DBGP_BASE,
4087 + FIX_EARLYCON_MEM_BASE,
4088 #ifdef CONFIG_X86_LOCAL_APIC
4089 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
4090 #endif
4091 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/fixmap_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/fixmap_64.h
4092 --- linux-2.6.25/include/asm-x86/mach-xen/asm/fixmap_64.h 2008-05-23 18:33:27.000000000 +0200
4093 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/fixmap_64.h 2008-05-23 18:27:40.000000000 +0200
4094 @@ -23,9 +23,9 @@
4095 * compile time, but to set the physical address only
4096 * in the boot process.
4097 *
4098 - * these 'compile-time allocated' memory buffers are
4099 - * fixed-size 4k pages. (or larger if used with an increment
4100 - * highger than 1) use fixmap_set(idx,phys) to associate
4101 + * These 'compile-time allocated' memory buffers are
4102 + * fixed-size 4k pages (or larger if used with an increment
4103 + * higher than 1). Use set_fixmap(idx,phys) to associate
4104 * physical memory with fixmap indices.
4105 *
4106 * TLB entries of such buffers will not be flushed across
4107 @@ -36,6 +36,8 @@
4108 VSYSCALL_LAST_PAGE,
4109 VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
4110 VSYSCALL_HPET,
4111 + FIX_DBGP_BASE,
4112 + FIX_EARLYCON_MEM_BASE,
4113 FIX_HPET_BASE,
4114 #ifdef CONFIG_X86_LOCAL_APIC
4115 FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
4116 @@ -105,7 +107,7 @@
4117 if (idx >= __end_of_fixed_addresses)
4118 __this_fixmap_does_not_exist();
4119
4120 - return __fix_to_virt(idx);
4121 + return __fix_to_virt(idx);
4122 }
4123
4124 #endif
4125 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/hw_irq_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/hw_irq_64.h
4126 --- linux-2.6.25/include/asm-x86/mach-xen/asm/hw_irq_64.h 2008-05-23 18:33:27.000000000 +0200
4127 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/hw_irq_64.h 2008-05-23 18:27:40.000000000 +0200
4128 @@ -97,6 +97,26 @@
4129 #endif
4130
4131 #ifndef __ASSEMBLY__
4132 +
4133 +/* Interrupt handlers registered during init_IRQ */
4134 +void apic_timer_interrupt(void);
4135 +void spurious_interrupt(void);
4136 +void error_interrupt(void);
4137 +void reschedule_interrupt(void);
4138 +void call_function_interrupt(void);
4139 +void irq_move_cleanup_interrupt(void);
4140 +void invalidate_interrupt0(void);
4141 +void invalidate_interrupt1(void);
4142 +void invalidate_interrupt2(void);
4143 +void invalidate_interrupt3(void);
4144 +void invalidate_interrupt4(void);
4145 +void invalidate_interrupt5(void);
4146 +void invalidate_interrupt6(void);
4147 +void invalidate_interrupt7(void);
4148 +void thermal_interrupt(void);
4149 +void threshold_interrupt(void);
4150 +void i8254_timer_resume(void);
4151 +
4152 typedef int vector_irq_t[NR_VECTORS];
4153 DECLARE_PER_CPU(vector_irq_t, vector_irq);
4154 extern void __setup_vector_irq(int cpu);
4155 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/io_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/io_32.h
4156 --- linux-2.6.25/include/asm-x86/mach-xen/asm/io_32.h 2008-05-23 18:33:27.000000000 +0200
4157 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/io_32.h 2008-05-23 18:27:40.000000000 +0200
4158 @@ -125,6 +125,9 @@
4159 * writew/writel functions and the other mmio helpers. The returned
4160 * address is not guaranteed to be usable directly as a virtual
4161 * address.
4162 + *
4163 + * If the area you are trying to map is a PCI BAR you should have a
4164 + * look at pci_iomap().
4165 */
4166
4167 static inline void __iomem * ioremap(unsigned long offset, unsigned long size)
4168 @@ -142,6 +145,7 @@
4169 */
4170 extern void *bt_ioremap(unsigned long offset, unsigned long size);
4171 extern void bt_iounmap(void *addr, unsigned long size);
4172 +extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
4173
4174 /* Use early IO mappings for DMI because it's initialized early */
4175 #define dmi_ioremap bt_ioremap
4176 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/io_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/io_64.h
4177 --- linux-2.6.25/include/asm-x86/mach-xen/asm/io_64.h 2008-05-23 18:33:27.000000000 +0200
4178 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/io_64.h 2008-05-23 18:27:40.000000000 +0200
4179 @@ -162,6 +162,7 @@
4180 */
4181 extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size);
4182 extern void iounmap(volatile void __iomem *addr);
4183 +extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
4184
4185 /*
4186 * ISA I/O bus memory addresses are 1:1 with the physical address.
4187 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/maddr_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/maddr_32.h
4188 --- linux-2.6.25/include/asm-x86/mach-xen/asm/maddr_32.h 2008-05-23 18:32:29.000000000 +0200
4189 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/maddr_32.h 2008-05-23 18:27:40.000000000 +0200
4190 @@ -155,6 +155,7 @@
4191
4192 #ifdef CONFIG_X86_PAE
4193 #define __pte_ma(x) ((pte_t) { (x), (maddr_t)(x) >> 32 } )
4194 +extern unsigned long long __supported_pte_mask;
4195 static inline pte_t pfn_pte_ma(unsigned long page_nr, pgprot_t pgprot)
4196 {
4197 pte_t pte;
4198 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/mmu_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/mmu_64.h
4199 --- linux-2.6.25/include/asm-x86/mach-xen/asm/mmu_64.h 2008-05-23 18:33:27.000000000 +0200
4200 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/mmu_64.h 2008-05-23 18:27:40.000000000 +0200
4201 @@ -15,6 +15,7 @@
4202 rwlock_t ldtlock;
4203 int size;
4204 struct semaphore sem;
4205 + void *vdso;
4206 #ifdef CONFIG_XEN
4207 unsigned pinned:1;
4208 unsigned has_foreign_mappings:1;
4209 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/mmu_context_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/mmu_context_32.h
4210 --- linux-2.6.25/include/asm-x86/mach-xen/asm/mmu_context_32.h 2008-05-23 18:33:27.000000000 +0200
4211 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/mmu_context_32.h 2008-05-23 18:27:40.000000000 +0200
4212 @@ -51,6 +51,8 @@
4213 : : "r" (0) );
4214 }
4215
4216 +void leave_mm(unsigned long cpu);
4217 +
4218 static inline void switch_mm(struct mm_struct *prev,
4219 struct mm_struct *next,
4220 struct task_struct *tsk)
4221 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/page_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/page_64.h
4222 --- linux-2.6.25/include/asm-x86/mach-xen/asm/page_64.h 2008-05-23 18:33:27.000000000 +0200
4223 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/page_64.h 2008-05-23 18:27:40.000000000 +0200
4224 @@ -72,7 +72,8 @@
4225 #define clear_user_page(page, vaddr, pg) clear_page(page)
4226 #define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
4227
4228 -#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
4229 +#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
4230 + alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
4231 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
4232
4233 /*
4234 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pci_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pci_32.h
4235 --- linux-2.6.25/include/asm-x86/mach-xen/asm/pci_32.h 2008-05-23 18:32:29.000000000 +0200
4236 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pci_32.h 2008-05-23 18:27:40.000000000 +0200
4237 @@ -3,6 +3,14 @@
4238
4239
4240 #ifdef __KERNEL__
4241 +
4242 +struct pci_sysdata {
4243 + int node; /* NUMA node */
4244 +};
4245 +
4246 +/* scan a bus after allocating a pci_sysdata for it */
4247 +extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
4248 +
4249 #include <linux/mm.h> /* for struct page */
4250
4251 /* Can be used to override the logic in pci_scan_bus for skipping
4252 @@ -81,48 +89,11 @@
4253
4254 #endif
4255
4256 -/* This is always fine. */
4257 -#define pci_dac_dma_supported(pci_dev, mask) (1)
4258 -
4259 -static inline dma64_addr_t
4260 -pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction)
4261 -{
4262 - return ((dma64_addr_t) page_to_phys(page) +
4263 - (dma64_addr_t) offset);
4264 -}
4265 -
4266 -static inline struct page *
4267 -pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
4268 -{
4269 - return pfn_to_page(dma_addr >> PAGE_SHIFT);
4270 -}
4271 -
4272 -static inline unsigned long
4273 -pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr)
4274 -{
4275 - return (dma_addr & ~PAGE_MASK);
4276 -}
4277 -
4278 -static inline void
4279 -pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
4280 -{
4281 -}
4282 -
4283 -static inline void
4284 -pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
4285 -{
4286 - flush_write_buffers();
4287 -}
4288 -
4289 #define HAVE_PCI_MMAP
4290 extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
4291 enum pci_mmap_state mmap_state, int write_combine);
4292
4293
4294 -static inline void pcibios_add_platform_entries(struct pci_dev *dev)
4295 -{
4296 -}
4297 -
4298 #ifdef CONFIG_PCI
4299 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
4300 enum pci_dma_burst_strategy *strat,
4301 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pci_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pci_64.h
4302 --- linux-2.6.25/include/asm-x86/mach-xen/asm/pci_64.h 2008-05-23 18:32:29.000000000 +0200
4303 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pci_64.h 2008-05-23 18:27:40.000000000 +0200
4304 @@ -5,6 +5,27 @@
4305
4306 #ifdef __KERNEL__
4307
4308 +struct pci_sysdata {
4309 + int node; /* NUMA node */
4310 + void* iommu; /* IOMMU private data */
4311 +};
4312 +
4313 +extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
4314 +
4315 +#ifdef CONFIG_CALGARY_IOMMU
4316 +static inline void* pci_iommu(struct pci_bus *bus)
4317 +{
4318 + struct pci_sysdata *sd = bus->sysdata;
4319 + return sd->iommu;
4320 +}
4321 +
4322 +static inline void set_pci_iommu(struct pci_bus *bus, void *val)
4323 +{
4324 + struct pci_sysdata *sd = bus->sysdata;
4325 + sd->iommu = val;
4326 +}
4327 +#endif /* CONFIG_CALGARY_IOMMU */
4328 +
4329 #include <linux/mm.h> /* for struct page */
4330
4331 /* Can be used to override the logic in pci_scan_bus for skipping
4332 @@ -56,14 +77,6 @@
4333
4334 #if defined(CONFIG_IOMMU) || defined(CONFIG_CALGARY_IOMMU)
4335
4336 -/*
4337 - * x86-64 always supports DAC, but sometimes it is useful to force
4338 - * devices through the IOMMU to get automatic sg list merging.
4339 - * Optional right now.
4340 - */
4341 -extern int iommu_sac_force;
4342 -#define pci_dac_dma_supported(pci_dev, mask) (!iommu_sac_force)
4343 -
4344 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
4345 dma_addr_t ADDR_NAME;
4346 #define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
4347 @@ -97,8 +110,6 @@
4348 #else
4349 /* No IOMMU */
4350
4351 -#define pci_dac_dma_supported(pci_dev, mask) 1
4352 -
4353 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
4354 #define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
4355 #define pci_unmap_addr(PTR, ADDR_NAME) (0)
4356 @@ -110,36 +121,6 @@
4357
4358 #include <asm-generic/pci-dma-compat.h>
4359
4360 -static inline dma64_addr_t
4361 -pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction)
4362 -{
4363 - return ((dma64_addr_t) page_to_phys(page) +
4364 - (dma64_addr_t) offset);
4365 -}
4366 -
4367 -static inline struct page *
4368 -pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
4369 -{
4370 - return virt_to_page(__va(dma_addr));
4371 -}
4372 -
4373 -static inline unsigned long
4374 -pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr)
4375 -{
4376 - return (dma_addr & ~PAGE_MASK);
4377 -}
4378 -
4379 -static inline void
4380 -pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
4381 -{
4382 -}
4383 -
4384 -static inline void
4385 -pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
4386 -{
4387 - flush_write_buffers();
4388 -}
4389 -
4390 #ifdef CONFIG_PCI
4391 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
4392 enum pci_dma_burst_strategy *strat,
4393 @@ -154,10 +135,6 @@
4394 extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
4395 enum pci_mmap_state mmap_state, int write_combine);
4396
4397 -static inline void pcibios_add_platform_entries(struct pci_dev *dev)
4398 -{
4399 -}
4400 -
4401 #endif /* __KERNEL__ */
4402
4403 /* generic pci stuff */
4404 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pgalloc_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgalloc_32.h
4405 --- linux-2.6.25/include/asm-x86/mach-xen/asm/pgalloc_32.h 2008-05-23 18:33:27.000000000 +0200
4406 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgalloc_32.h 2008-05-23 18:27:40.000000000 +0200
4407 @@ -5,7 +5,7 @@
4408 #include <linux/mm.h> /* for struct page */
4409 #include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */
4410
4411 -#define paravirt_alloc_pt(pfn) do { } while (0)
4412 +#define paravirt_alloc_pt(mm, pfn) do { } while (0)
4413 #define paravirt_alloc_pd(pfn) do { } while (0)
4414 #define paravirt_alloc_pd(pfn) do { } while (0)
4415 #define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0)
4416 @@ -14,14 +14,14 @@
4417
4418 #define pmd_populate_kernel(mm, pmd, pte) \
4419 do { \
4420 - paravirt_alloc_pt(__pa(pte) >> PAGE_SHIFT); \
4421 + paravirt_alloc_pt(mm, __pa(pte) >> PAGE_SHIFT); \
4422 set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \
4423 } while (0)
4424
4425 #define pmd_populate(mm, pmd, pte) \
4426 do { \
4427 unsigned long pfn = page_to_pfn(pte); \
4428 - paravirt_alloc_pt(pfn); \
4429 + paravirt_alloc_pt(mm, pfn); \
4430 if (test_bit(PG_pinned, &virt_to_page((mm)->pgd)->flags)) { \
4431 if (!PageHighMem(pte)) \
4432 BUG_ON(HYPERVISOR_update_va_mapping( \
4433 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable-2level.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable-2level.h
4434 --- linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable-2level.h 2008-05-23 18:33:27.000000000 +0200
4435 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable-2level.h 2008-05-23 18:27:40.000000000 +0200
4436 @@ -82,14 +82,6 @@
4437 #define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
4438
4439 /*
4440 - * All present user pages are user-executable:
4441 - */
4442 -static inline int pte_exec(pte_t pte)
4443 -{
4444 - return pte_user(pte);
4445 -}
4446 -
4447 -/*
4448 * All present pages are kernel-executable:
4449 */
4450 static inline int pte_exec_kernel(pte_t pte)
4451 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable_32.h
4452 --- linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-05-23 18:33:27.000000000 +0200
4453 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable_32.h 2008-05-23 18:27:40.000000000 +0200
4454 @@ -26,9 +26,6 @@
4455 #include <linux/spinlock.h>
4456 #include <linux/sched.h>
4457
4458 -/* Is this pagetable pinned? */
4459 -#define PG_pinned PG_arch_1
4460 -
4461 struct vm_area_struct;
4462
4463 /*
4464 @@ -82,7 +79,7 @@
4465 * area for the same reason. ;)
4466 */
4467 #define VMALLOC_OFFSET (8*1024*1024)
4468 -#define VMALLOC_START (((unsigned long) high_memory + vmalloc_earlyreserve + \
4469 +#define VMALLOC_START (((unsigned long) high_memory + \
4470 2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1))
4471 #ifdef CONFIG_HIGHMEM
4472 # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE)
4473 @@ -231,8 +228,6 @@
4474 * The following only work if pte_present() is true.
4475 * Undefined behaviour if not..
4476 */
4477 -static inline int pte_user(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
4478 -static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
4479 static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; }
4480 static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; }
4481 static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; }
4482 @@ -243,13 +238,9 @@
4483 */
4484 static inline int pte_file(pte_t pte) { return (pte).pte_low & _PAGE_FILE; }
4485
4486 -static inline pte_t pte_rdprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; }
4487 -static inline pte_t pte_exprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; }
4488 static inline pte_t pte_mkclean(pte_t pte) { (pte).pte_low &= ~_PAGE_DIRTY; return pte; }
4489 static inline pte_t pte_mkold(pte_t pte) { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; }
4490 static inline pte_t pte_wrprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_RW; return pte; }
4491 -static inline pte_t pte_mkread(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; }
4492 -static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; }
4493 static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; }
4494 static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; }
4495 static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; }
4496 @@ -295,22 +286,20 @@
4497 #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
4498 ({ \
4499 int __changed = !pte_same(*(ptep), entry); \
4500 - if (__changed && (dirty)) \
4501 - ptep_establish(vma, address, ptep, entry); \
4502 + if (__changed && (dirty)) { \
4503 + if ( likely((vma)->vm_mm == current->mm) ) { \
4504 + BUG_ON(HYPERVISOR_update_va_mapping(address, \
4505 + entry, \
4506 + (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
4507 + UVMF_INVLPG|UVMF_MULTI)); \
4508 + } else { \
4509 + xen_l1_entry_update(ptep, entry); \
4510 + flush_tlb_page(vma, address); \
4511 + } \
4512 + } \
4513 __changed; \
4514 })
4515
4516 -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
4517 -#define ptep_test_and_clear_dirty(vma, addr, ptep) ({ \
4518 - int __ret = 0; \
4519 - if (pte_dirty(*(ptep))) \
4520 - __ret = test_and_clear_bit(_PAGE_BIT_DIRTY, \
4521 - &(ptep)->pte_low); \
4522 - if (__ret) \
4523 - pte_update((vma)->vm_mm, addr, ptep); \
4524 - __ret; \
4525 -})
4526 -
4527 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
4528 #define ptep_test_and_clear_young(vma, addr, ptep) ({ \
4529 int __ret = 0; \
4530 @@ -322,37 +311,6 @@
4531 __ret; \
4532 })
4533
4534 -/*
4535 - * Rules for using ptep_establish: the pte MUST be a user pte, and
4536 - * must be a present->present transition.
4537 - */
4538 -#define __HAVE_ARCH_PTEP_ESTABLISH
4539 -#define ptep_establish(vma, address, ptep, pteval) \
4540 -do { \
4541 - if ( likely((vma)->vm_mm == current->mm) ) { \
4542 - BUG_ON(HYPERVISOR_update_va_mapping(address, \
4543 - pteval, \
4544 - (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
4545 - UVMF_INVLPG|UVMF_MULTI)); \
4546 - } else { \
4547 - xen_l1_entry_update(ptep, pteval); \
4548 - flush_tlb_page(vma, address); \
4549 - } \
4550 -} while (0)
4551 -
4552 -#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
4553 -#define ptep_clear_flush_dirty(vma, address, ptep) \
4554 -({ \
4555 - pte_t __pte = *(ptep); \
4556 - int __dirty = pte_dirty(__pte); \
4557 - __pte = pte_mkclean(__pte); \
4558 - if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
4559 - (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
4560 - else if (__dirty) \
4561 - (ptep)->pte_low = __pte.pte_low; \
4562 - __dirty; \
4563 -})
4564 -
4565 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
4566 #define ptep_clear_flush_young(vma, address, ptep) \
4567 ({ \
4568 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable-3level.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable-3level.h
4569 --- linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-05-23 18:33:27.000000000 +0200
4570 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable-3level.h 2008-05-23 18:27:40.000000000 +0200
4571 @@ -23,26 +23,11 @@
4572 #define pud_present(pud) 1
4573
4574 /*
4575 - * Is the pte executable?
4576 - */
4577 -static inline int pte_x(pte_t pte)
4578 -{
4579 - return !(__pte_val(pte) & _PAGE_NX);
4580 -}
4581 -
4582 -/*
4583 - * All present user-pages with !NX bit are user-executable:
4584 - */
4585 -static inline int pte_exec(pte_t pte)
4586 -{
4587 - return pte_user(pte) && pte_x(pte);
4588 -}
4589 -/*
4590 * All present pages with !NX bit are kernel-executable:
4591 */
4592 static inline int pte_exec_kernel(pte_t pte)
4593 {
4594 - return pte_x(pte);
4595 + return !(__pte_val(pte) & _PAGE_NX);
4596 }
4597
4598 /* Rules for using set_pte: the pte being assigned *must* be
4599 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable_64.h
4600 --- linux-2.6.25/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-05-23 18:33:27.000000000 +0200
4601 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/pgtable_64.h 2008-05-23 18:27:40.000000000 +0200
4602 @@ -332,21 +332,15 @@
4603 * Undefined behaviour if not..
4604 */
4605 #define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT)
4606 -static inline int pte_user(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
4607 -static inline int pte_read(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
4608 -static inline int pte_exec(pte_t pte) { return !(__pte_val(pte) & _PAGE_NX); }
4609 static inline int pte_dirty(pte_t pte) { return __pte_val(pte) & _PAGE_DIRTY; }
4610 static inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED; }
4611 static inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; }
4612 static inline int pte_file(pte_t pte) { return __pte_val(pte) & _PAGE_FILE; }
4613 static inline int pte_huge(pte_t pte) { return __pte_val(pte) & _PAGE_PSE; }
4614
4615 -static inline pte_t pte_rdprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_USER; return pte; }
4616 -static inline pte_t pte_exprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_USER; return pte; }
4617 static inline pte_t pte_mkclean(pte_t pte) { __pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
4618 static inline pte_t pte_mkold(pte_t pte) { __pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
4619 static inline pte_t pte_wrprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_RW; return pte; }
4620 -static inline pte_t pte_mkread(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
4621 static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) &= ~_PAGE_NX; return pte; }
4622 static inline pte_t pte_mkdirty(pte_t pte) { __pte_val(pte) |= _PAGE_DIRTY; return pte; }
4623 static inline pte_t pte_mkyoung(pte_t pte) { __pte_val(pte) |= _PAGE_ACCESSED; return pte; }
4624 @@ -354,13 +348,6 @@
4625 static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; }
4626 static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; }
4627
4628 -static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
4629 -{
4630 - if (!pte_dirty(*ptep))
4631 - return 0;
4632 - return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte);
4633 -}
4634 -
4635 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
4636 {
4637 if (!pte_young(*ptep))
4638 @@ -500,19 +487,6 @@
4639 __changed; \
4640 })
4641
4642 -#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
4643 -#define ptep_clear_flush_dirty(vma, address, ptep) \
4644 -({ \
4645 - pte_t __pte = *(ptep); \
4646 - int __dirty = pte_dirty(__pte); \
4647 - __pte = pte_mkclean(__pte); \
4648 - if ((vma)->vm_mm->context.pinned) \
4649 - (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
4650 - else if (__dirty) \
4651 - set_pte(ptep, __pte); \
4652 - __dirty; \
4653 -})
4654 -
4655 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
4656 #define ptep_clear_flush_young(vma, address, ptep) \
4657 ({ \
4658 @@ -570,6 +544,8 @@
4659 #define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
4660 xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
4661
4662 +pte_t *lookup_address(unsigned long addr);
4663 +
4664 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
4665 direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
4666
4667 @@ -587,7 +563,6 @@
4668 (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
4669
4670 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
4671 -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
4672 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
4673 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
4674 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
4675 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/processor_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/processor_32.h
4676 --- linux-2.6.25/include/asm-x86/mach-xen/asm/processor_32.h 2008-05-23 18:33:27.000000000 +0200
4677 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/processor_32.h 2008-05-23 18:27:40.000000000 +0200
4678 @@ -89,7 +89,6 @@
4679 #define X86_VENDOR_UMC 3
4680 #define X86_VENDOR_NEXGEN 4
4681 #define X86_VENDOR_CENTAUR 5
4682 -#define X86_VENDOR_RISE 6
4683 #define X86_VENDOR_TRANSMETA 7
4684 #define X86_VENDOR_NSC 8
4685 #define X86_VENDOR_NUM 9
4686 @@ -122,6 +121,7 @@
4687 extern void identify_boot_cpu(void);
4688 extern void identify_secondary_cpu(struct cpuinfo_x86 *);
4689 extern void print_cpu_info(struct cpuinfo_x86 *);
4690 +extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
4691 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
4692 extern unsigned short num_cache_leaves;
4693
4694 @@ -171,17 +171,6 @@
4695 write_cr4(cr4);
4696 }
4697
4698 -/*
4699 - * NSC/Cyrix CPU indexed register access macros
4700 - */
4701 -
4702 -#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); })
4703 -
4704 -#define setCx86(reg, data) do { \
4705 - outb((reg), 0x22); \
4706 - outb((data), 0x23); \
4707 -} while (0)
4708 -
4709 /* Stop speculative execution */
4710 static inline void sync_core(void)
4711 {
4712 @@ -230,6 +219,10 @@
4713
4714 #define HAVE_ARCH_PICK_MMAP_LAYOUT
4715
4716 +extern void hard_disable_TSC(void);
4717 +extern void disable_TSC(void);
4718 +extern void hard_enable_TSC(void);
4719 +
4720 /*
4721 * Size of io_bitmap.
4722 */
4723 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/processor_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/processor_64.h
4724 --- linux-2.6.25/include/asm-x86/mach-xen/asm/processor_64.h 2008-05-23 18:33:27.000000000 +0200
4725 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/processor_64.h 2008-05-23 18:27:40.000000000 +0200
4726 @@ -83,7 +83,6 @@
4727 #define X86_VENDOR_UMC 3
4728 #define X86_VENDOR_NEXGEN 4
4729 #define X86_VENDOR_CENTAUR 5
4730 -#define X86_VENDOR_RISE 6
4731 #define X86_VENDOR_TRANSMETA 7
4732 #define X86_VENDOR_NUM 8
4733 #define X86_VENDOR_UNKNOWN 0xff
4734 @@ -100,6 +99,7 @@
4735
4736 extern void identify_cpu(struct cpuinfo_x86 *);
4737 extern void print_cpu_info(struct cpuinfo_x86 *);
4738 +extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
4739 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
4740 extern unsigned short num_cache_leaves;
4741
4742 @@ -377,12 +377,10 @@
4743 asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory");
4744 }
4745
4746 -#define cpu_has_fpu 1
4747 -
4748 #define ARCH_HAS_PREFETCH
4749 static inline void prefetch(void *x)
4750 {
4751 - asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
4752 + asm volatile("prefetcht0 (%0)" :: "r" (x));
4753 }
4754
4755 #define ARCH_HAS_PREFETCHW 1
4756 @@ -400,17 +398,6 @@
4757
4758 #define cpu_relax() rep_nop()
4759
4760 -/*
4761 - * NSC/Cyrix CPU indexed register access macros
4762 - */
4763 -
4764 -#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); })
4765 -
4766 -#define setCx86(reg, data) do { \
4767 - outb((reg), 0x22); \
4768 - outb((data), 0x23); \
4769 -} while (0)
4770 -
4771 static inline void serialize_cpu(void)
4772 {
4773 __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx");
4774 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/system_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/system_32.h
4775 --- linux-2.6.25/include/asm-x86/mach-xen/asm/system_32.h 2008-05-23 18:33:27.000000000 +0200
4776 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/system_32.h 2008-05-23 18:27:40.000000000 +0200
4777 @@ -205,11 +205,6 @@
4778 */
4779
4780
4781 -/*
4782 - * Actually only lfence would be needed for mb() because all stores done
4783 - * by the kernel should be already ordered. But keep a full barrier for now.
4784 - */
4785 -
4786 #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
4787 #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
4788
4789 @@ -301,15 +296,6 @@
4790 extern int es7000_plat;
4791 void cpu_idle_wait(void);
4792
4793 -/*
4794 - * On SMP systems, when the scheduler does migration-cost autodetection,
4795 - * it needs a way to flush as much of the CPU's caches as possible:
4796 - */
4797 -static inline void sched_cacheflush(void)
4798 -{
4799 - wbinvd();
4800 -}
4801 -
4802 extern unsigned long arch_align_stack(unsigned long sp);
4803 extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
4804
4805 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/system_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/system_64.h
4806 --- linux-2.6.25/include/asm-x86/mach-xen/asm/system_64.h 2008-05-23 18:33:27.000000000 +0200
4807 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/system_64.h 2008-05-23 18:27:40.000000000 +0200
4808 @@ -79,12 +79,16 @@
4809 unsigned long cr0;
4810 asm volatile("movq %%cr0,%0" : "=r" (cr0));
4811 return cr0;
4812 -}
4813 +}
4814
4815 static inline void write_cr0(unsigned long val)
4816 {
4817 asm volatile("movq %0,%%cr0" :: "r" (val));
4818 -}
4819 +}
4820 +
4821 +#define read_cr2() current_vcpu_info()->arch.cr2
4822 +
4823 +#define write_cr2(val) ((void)(current_vcpu_info()->arch.cr2 = (val)))
4824
4825 #define read_cr3() ({ \
4826 unsigned long __dummy; \
4827 @@ -103,27 +107,28 @@
4828 unsigned long cr4;
4829 asm("movq %%cr4,%0" : "=r" (cr4));
4830 return cr4;
4831 -}
4832 +}
4833
4834 static inline void write_cr4(unsigned long val)
4835 {
4836 asm volatile("movq %0,%%cr4" :: "r" (val) : "memory");
4837 -}
4838 -
4839 -#define stts() (HYPERVISOR_fpu_taskswitch(1))
4840 +}
4841
4842 -#define wbinvd() \
4843 - __asm__ __volatile__ ("wbinvd": : :"memory");
4844 +static inline unsigned long read_cr8(void)
4845 +{
4846 + return 0;
4847 +}
4848
4849 -/*
4850 - * On SMP systems, when the scheduler does migration-cost autodetection,
4851 - * it needs a way to flush as much of the CPU's caches as possible.
4852 - */
4853 -static inline void sched_cacheflush(void)
4854 +static inline void write_cr8(unsigned long val)
4855 {
4856 - wbinvd();
4857 + BUG_ON(val);
4858 }
4859
4860 +#define stts() (HYPERVISOR_fpu_taskswitch(1))
4861 +
4862 +#define wbinvd() \
4863 + __asm__ __volatile__ ("wbinvd": : :"memory")
4864 +
4865 #endif /* __KERNEL__ */
4866
4867 #define nop() __asm__ __volatile__ ("nop")
4868 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/tlbflush_32.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/tlbflush_32.h
4869 --- linux-2.6.25/include/asm-x86/mach-xen/asm/tlbflush_32.h 2008-05-23 18:33:27.000000000 +0200
4870 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/tlbflush_32.h 2008-05-23 18:27:40.000000000 +0200
4871 @@ -91,7 +91,11 @@
4872 DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
4873 #endif /* SMP */
4874
4875 -#define flush_tlb_kernel_range(start, end) flush_tlb_all()
4876 +static inline void flush_tlb_kernel_range(unsigned long start,
4877 + unsigned long end)
4878 +{
4879 + flush_tlb_all();
4880 +}
4881
4882 static inline void flush_tlb_pgtables(struct mm_struct *mm,
4883 unsigned long start, unsigned long end)
4884 diff -Naur linux-2.6.25/include/asm-x86/mach-xen/asm/tlbflush_64.h linux-2.6.25-xen/include/asm-x86/mach-xen/asm/tlbflush_64.h
4885 --- linux-2.6.25/include/asm-x86/mach-xen/asm/tlbflush_64.h 2008-05-23 18:33:27.000000000 +0200
4886 +++ linux-2.6.25-xen/include/asm-x86/mach-xen/asm/tlbflush_64.h 2008-05-23 18:27:40.000000000 +0200
4887 @@ -89,7 +89,11 @@
4888
4889 #endif
4890
4891 -#define flush_tlb_kernel_range(start, end) flush_tlb_all()
4892 +static inline void flush_tlb_kernel_range(unsigned long start,
4893 + unsigned long end)
4894 +{
4895 + flush_tlb_all();
4896 +}
4897
4898 static inline void flush_tlb_pgtables(struct mm_struct *mm,
4899 unsigned long start, unsigned long end)
4900 diff -Naur linux-2.6.25/include/asm-x86/thread_info_32.h linux-2.6.25-xen/include/asm-x86/thread_info_32.h
4901 --- linux-2.6.25/include/asm-x86/thread_info_32.h 2008-05-23 18:33:22.000000000 +0200
4902 +++ linux-2.6.25-xen/include/asm-x86/thread_info_32.h 2008-05-23 18:27:40.000000000 +0200
4903 @@ -177,7 +177,8 @@
4904 #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
4905 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW | _TIF_DEBUG)
4906 #else
4907 -#define _TIF_WORK_CTXSW _TIF_DEBUG
4908 +#define _TIF_WORK_CTXSW_NEXT (_TIF_NOTSC | _TIF_DEBUG)
4909 +#define _TIF_WORK_CTXSW_PREV (_TIF_NOTSC)
4910 #endif
4911
4912
4913 diff -Naur linux-2.6.25/include/asm-x86/xen/interface.h linux-2.6.25-xen/include/asm-x86/xen/interface.h
4914 --- linux-2.6.25/include/asm-x86/xen/interface.h 2008-04-17 04:49:44.000000000 +0200
4915 +++ linux-2.6.25-xen/include/asm-x86/xen/interface.h 2008-05-23 18:27:40.000000000 +0200
4916 @@ -10,27 +10,21 @@
4917 #define __XEN_PUBLIC_ARCH_X86_32_H__
4918
4919 #ifdef __XEN__
4920 -#define __DEFINE_GUEST_HANDLE(name, type) \
4921 +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \
4922 typedef struct { type *p; } __guest_handle_ ## name
4923 #else
4924 -#define __DEFINE_GUEST_HANDLE(name, type) \
4925 +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \
4926 typedef type * __guest_handle_ ## name
4927 #endif
4928
4929 -#define DEFINE_GUEST_HANDLE_STRUCT(name) \
4930 - __DEFINE_GUEST_HANDLE(name, struct name)
4931 -#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name)
4932 -#define GUEST_HANDLE(name) __guest_handle_ ## name
4933 +#define DEFINE_XEN_GUEST_HANDLE_STRUCT(name) \
4934 + __DEFINE_XEN_GUEST_HANDLE(name, struct name)
4935 +#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name)
4936 +#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name
4937
4938 #ifndef __ASSEMBLY__
4939 -/* Guest handles for primitive C types. */
4940 -__DEFINE_GUEST_HANDLE(uchar, unsigned char);
4941 -__DEFINE_GUEST_HANDLE(uint, unsigned int);
4942 -__DEFINE_GUEST_HANDLE(ulong, unsigned long);
4943 -DEFINE_GUEST_HANDLE(char);
4944 -DEFINE_GUEST_HANDLE(int);
4945 -DEFINE_GUEST_HANDLE(long);
4946 -DEFINE_GUEST_HANDLE(void);
4947 +typedef unsigned long xen_pfn_t;
4948 +typedef unsigned long xen_ulong_t;
4949 #endif
4950
4951 /*
4952 @@ -105,7 +99,7 @@
4953 uint16_t cs; /* code selector */
4954 unsigned long address; /* code offset */
4955 };
4956 -DEFINE_GUEST_HANDLE_STRUCT(trap_info);
4957 +DEFINE_XEN_GUEST_HANDLE_STRUCT(trap_info);
4958
4959 struct cpu_user_regs {
4960 uint32_t ebx;
4961 @@ -129,7 +123,7 @@
4962 uint16_t fs, _pad4;
4963 uint16_t gs, _pad5;
4964 };
4965 -DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs);
4966 +DEFINE_XEN_GUEST_HANDLE_STRUCT(cpu_user_regs);
4967
4968 typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
4969
4970 @@ -157,7 +151,7 @@
4971 unsigned long failsafe_callback_eip;
4972 unsigned long vm_assist; /* VMASST_TYPE_* bitmap */
4973 };
4974 -DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);
4975 +DEFINE_XEN_GUEST_HANDLE_STRUCT(vcpu_guest_context);
4976
4977 struct arch_shared_info {
4978 unsigned long max_pfn; /* max pfn that appears in table */
4979 diff -Naur linux-2.6.25/include/linux/elfnote.h linux-2.6.25-xen/include/linux/elfnote.h
4980 --- linux-2.6.25/include/linux/elfnote.h 2008-04-17 04:49:44.000000000 +0200
4981 +++ linux-2.6.25-xen/include/linux/elfnote.h 2008-05-23 18:27:40.000000000 +0200
4982 @@ -52,7 +52,7 @@
4983 4484:.balign 4 ; \
4984 .popsection ;
4985
4986 -#define ELFNOTE(name, type, desc) \
4987 +#define ELFNOTE(name, type, desc...) \
4988 ELFNOTE_START(name, type, "") \
4989 desc ; \
4990 ELFNOTE_END
4991 diff -Naur linux-2.6.25/include/linux/page-flags.h linux-2.6.25-xen/include/linux/page-flags.h
4992 --- linux-2.6.25/include/linux/page-flags.h 2008-05-23 18:33:11.000000000 +0200
4993 +++ linux-2.6.25-xen/include/linux/page-flags.h 2008-05-23 18:27:40.000000000 +0200
4994 @@ -83,6 +83,11 @@
4995 #define PG_private 11 /* If pagecache, has fs-private data */
4996
4997 #define PG_writeback 12 /* Page is under writeback */
4998 +#ifdef CONFIG_XEN
4999 +/* Cannot alias with PG_owner_priv_1 since bag_page() checks include this bit.
5000 + * Also cannot use PG_arch_1 since that now has a different purpose on x86. */
5001 +#define PG_pinned 13
5002 +#endif
5003 #define PG_compound 14 /* Part of a compound page */
5004 #define PG_swapcache 15 /* Swap page: swp_entry_t in private */
5005
5006 @@ -96,7 +101,9 @@
5007
5008 /* PG_owner_priv_1 users should have descriptive aliases */
5009 #define PG_checked PG_owner_priv_1 /* Used by some filesystems */
5010 +#ifdef CONFIG_PARAVIRT_XEN
5011 #define PG_pinned PG_owner_priv_1 /* Xen pinned pagetable */
5012 +#endif
5013
5014 #if (BITS_PER_LONG > 32)
5015 /*
5016 diff -Naur linux-2.6.25/include/linux/skbuff.h linux-2.6.25-xen/include/linux/skbuff.h
5017 --- linux-2.6.25/include/linux/skbuff.h 2008-05-23 18:33:24.000000000 +0200
5018 +++ linux-2.6.25-xen/include/linux/skbuff.h 2008-05-23 18:27:40.000000000 +0200
5019 @@ -1822,7 +1822,7 @@
5020
5021 bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
5022
5023 -#ifdef CONFIG_XEN
5024 +#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN)
5025 int skb_checksum_setup(struct sk_buff *skb);
5026 #else
5027 static inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
5028 diff -Naur linux-2.6.25/include/xen/driver_util.h linux-2.6.25-xen/include/xen/driver_util.h
5029 --- linux-2.6.25/include/xen/driver_util.h 2008-05-23 18:32:29.000000000 +0200
5030 +++ linux-2.6.25-xen/include/xen/driver_util.h 2008-05-23 18:27:40.000000000 +0200
5031 @@ -5,10 +5,6 @@
5032 #include <linux/vmalloc.h>
5033 #include <linux/device.h>
5034
5035 -/* Allocate/destroy a 'vmalloc' VM area. */
5036 -extern struct vm_struct *alloc_vm_area(unsigned long size);
5037 -extern void free_vm_area(struct vm_struct *area);
5038 -
5039 extern struct class *get_xen_class(void);
5040
5041 #endif /* __ASM_XEN_DRIVER_UTIL_H__ */
5042 diff -Naur linux-2.6.25/include/xen/features.h linux-2.6.25-xen/include/xen/features.h
5043 --- linux-2.6.25/include/xen/features.h 2008-04-17 04:49:44.000000000 +0200
5044 +++ linux-2.6.25-xen/include/xen/features.h 2008-05-23 18:27:40.000000000 +0200
5045 @@ -10,6 +10,7 @@
5046 #define __XEN_FEATURES_H__
5047
5048 #include <xen/interface/features.h>
5049 +#include <xen/interface/version.h>
5050
5051 void xen_setup_features(void);
5052
5053 @@ -20,4 +21,4 @@
5054 return xen_features[flag];
5055 }
5056
5057 -#endif /* __ASM_XEN_FEATURES_H__ */
5058 +#endif /* __XEN_FEATURES_H__ */
5059 diff -Naur linux-2.6.25/include/xen/interface/arch-x86/xen.h linux-2.6.25-xen/include/xen/interface/arch-x86/xen.h
5060 --- linux-2.6.25/include/xen/interface/arch-x86/xen.h 2008-05-23 18:32:37.000000000 +0200
5061 +++ linux-2.6.25-xen/include/xen/interface/arch-x86/xen.h 2008-05-23 18:27:40.000000000 +0200
5062 @@ -49,6 +49,9 @@
5063 #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0)
5064 #endif
5065
5066 +/* Allow co-existing Linux 2.6.23+ Xen interface definitions. */
5067 +#define DEFINE_XEN_GUEST_HANDLE_STRUCT(name) struct name
5068 +
5069 #if defined(__i386__)
5070 #include "xen-x86_32.h"
5071 #elif defined(__x86_64__)
5072 diff -Naur linux-2.6.25/include/xen/interface/event_channel.h linux-2.6.25-xen/include/xen/interface/event_channel.h
5073 --- linux-2.6.25/include/xen/interface/event_channel.h 2008-05-23 18:33:03.000000000 +0200
5074 +++ linux-2.6.25-xen/include/xen/interface/event_channel.h 2008-05-23 18:27:40.000000000 +0200
5075 @@ -248,6 +248,7 @@
5076 struct evtchn_unmask unmask;
5077 } u;
5078 };
5079 +DEFINE_XEN_GUEST_HANDLE_STRUCT(evtchn_op);
5080 typedef struct evtchn_op evtchn_op_t;
5081 DEFINE_XEN_GUEST_HANDLE(evtchn_op_t);
5082
5083 diff -Naur linux-2.6.25/include/xen/interface/io/netif.h linux-2.6.25-xen/include/xen/interface/io/netif.h
5084 --- linux-2.6.25/include/xen/interface/io/netif.h 2008-05-23 18:33:03.000000000 +0200
5085 +++ linux-2.6.25-xen/include/xen/interface/io/netif.h 2008-05-23 18:27:40.000000000 +0200
5086 @@ -183,8 +183,22 @@
5087 * Generate netif ring structures and types.
5088 */
5089
5090 +#if defined(CONFIG_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
5091 DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response);
5092 DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response);
5093 +#else
5094 +#define xen_netif_tx_request netif_tx_request
5095 +#define xen_netif_rx_request netif_rx_request
5096 +#define xen_netif_tx_response netif_tx_response
5097 +#define xen_netif_rx_response netif_rx_response
5098 +DEFINE_RING_TYPES(xen_netif_tx,
5099 + struct xen_netif_tx_request,
5100 + struct xen_netif_tx_response);
5101 +DEFINE_RING_TYPES(xen_netif_rx,
5102 + struct xen_netif_rx_request,
5103 + struct xen_netif_rx_response);
5104 +#define xen_netif_extra_info netif_extra_info
5105 +#endif
5106
5107 #define NETIF_RSP_DROPPED -2
5108 #define NETIF_RSP_ERROR -1
5109 diff -Naur linux-2.6.25/include/xen/interface/memory.h linux-2.6.25-xen/include/xen/interface/memory.h
5110 --- linux-2.6.25/include/xen/interface/memory.h 2008-05-23 18:33:03.000000000 +0200
5111 +++ linux-2.6.25-xen/include/xen/interface/memory.h 2008-05-23 18:27:40.000000000 +0200
5112 @@ -67,6 +67,7 @@
5113 */
5114 domid_t domid;
5115 };
5116 +DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_memory_reservation);
5117 typedef struct xen_memory_reservation xen_memory_reservation_t;
5118 DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);
5119
5120 @@ -160,6 +161,7 @@
5121 */
5122 unsigned int nr_extents;
5123 };
5124 +DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
5125 typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t;
5126 DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);
5127
5128 @@ -198,6 +200,7 @@
5129 /* GPFN where the source mapping page should appear. */
5130 xen_pfn_t gpfn;
5131 };
5132 +DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
5133 typedef struct xen_add_to_physmap xen_add_to_physmap_t;
5134 DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
5135
5136 @@ -222,6 +225,7 @@
5137 */
5138 XEN_GUEST_HANDLE(xen_pfn_t) mfn_list;
5139 };
5140 +DEFINE_XEN_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
5141 typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t;
5142 DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t);
5143
5144 diff -Naur linux-2.6.25/include/xen/interface/sched.h linux-2.6.25-xen/include/xen/interface/sched.h
5145 --- linux-2.6.25/include/xen/interface/sched.h 2008-05-23 18:33:03.000000000 +0200
5146 +++ linux-2.6.25-xen/include/xen/interface/sched.h 2008-05-23 18:27:40.000000000 +0200
5147 @@ -67,6 +67,7 @@
5148 struct sched_shutdown {
5149 unsigned int reason; /* SHUTDOWN_* */
5150 };
5151 +DEFINE_XEN_GUEST_HANDLE_STRUCT(sched_shutdown);
5152 typedef struct sched_shutdown sched_shutdown_t;
5153 DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t);
5154
5155 @@ -81,6 +82,7 @@
5156 unsigned int nr_ports;
5157 uint64_t timeout;
5158 };
5159 +DEFINE_XEN_GUEST_HANDLE_STRUCT(sched_poll);
5160 typedef struct sched_poll sched_poll_t;
5161 DEFINE_XEN_GUEST_HANDLE(sched_poll_t);
5162
5163 diff -Naur linux-2.6.25/include/xen/interface/version.h linux-2.6.25-xen/include/xen/interface/version.h
5164 --- linux-2.6.25/include/xen/interface/version.h 2008-05-23 18:33:03.000000000 +0200
5165 +++ linux-2.6.25-xen/include/xen/interface/version.h 2008-05-23 18:27:40.000000000 +0200
5166 @@ -36,6 +36,9 @@
5167 /* arg == xen_extraversion_t. */
5168 #define XENVER_extraversion 1
5169 typedef char xen_extraversion_t[16];
5170 +struct xen_extraversion {
5171 + xen_extraversion_t extraversion;
5172 +};
5173 #define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t))
5174
5175 /* arg == xen_compile_info_t. */
5176 @@ -50,10 +53,16 @@
5177
5178 #define XENVER_capabilities 3
5179 typedef char xen_capabilities_info_t[1024];
5180 +struct xen_capabilities_info {
5181 + xen_capabilities_info_t info;
5182 +};
5183 #define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t))
5184
5185 #define XENVER_changeset 4
5186 typedef char xen_changeset_info_t[64];
5187 +struct xen_changeset_info {
5188 + xen_changeset_info_t info;
5189 +};
5190 #define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t))
5191
5192 #define XENVER_platform_parameters 5
5193 diff -Naur linux-2.6.25/include/xen/interface/xen.h linux-2.6.25-xen/include/xen/interface/xen.h
5194 --- linux-2.6.25/include/xen/interface/xen.h 2008-05-23 18:33:03.000000000 +0200
5195 +++ linux-2.6.25-xen/include/xen/interface/xen.h 2008-05-23 18:27:40.000000000 +0200
5196 @@ -29,7 +29,9 @@
5197
5198 #include "xen-compat.h"
5199
5200 -#if defined(__i386__) || defined(__x86_64__)
5201 +#if defined(CONFIG_PARAVIRT_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H)
5202 +#include <asm/xen/interface.h>
5203 +#elif defined(__i386__) || defined(__x86_64__)
5204 #include "arch-x86/xen.h"
5205 #elif defined(__ia64__)
5206 #include "arch-ia64.h"
5207 @@ -109,7 +111,7 @@
5208 */
5209
5210 /* New sched_op hypercall introduced in 0x00030101. */
5211 -#if __XEN_INTERFACE_VERSION__ < 0x00030101
5212 +#if __XEN_INTERFACE_VERSION__ < 0x00030101 || (defined(CONFIG_PARAVIRT_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H))
5213 #undef __HYPERVISOR_sched_op
5214 #define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat
5215 #endif
5216 @@ -123,7 +125,7 @@
5217 #endif
5218
5219 /* New platform_op hypercall introduced in 0x00030204. */
5220 -#if __XEN_INTERFACE_VERSION__ < 0x00030204
5221 +#if __XEN_INTERFACE_VERSION__ < 0x00030204 || (defined(CONFIG_PARAVIRT_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H))
5222 #define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op
5223 #endif
5224
5225 @@ -270,6 +272,7 @@
5226 #endif
5227 } arg2;
5228 };
5229 +DEFINE_XEN_GUEST_HANDLE_STRUCT(mmuext_op);
5230 typedef struct mmuext_op mmuext_op_t;
5231 DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
5232 #endif
5233 @@ -352,6 +355,7 @@
5234 uint64_t ptr; /* Machine address of PTE. */
5235 uint64_t val; /* New contents of PTE. */
5236 };
5237 +DEFINE_XEN_GUEST_HANDLE_STRUCT(mmu_update);
5238 typedef struct mmu_update mmu_update_t;
5239 DEFINE_XEN_GUEST_HANDLE(mmu_update_t);
5240
5241 @@ -360,9 +364,15 @@
5242 * NB. The fields are natural register size for this architecture.
5243 */
5244 struct multicall_entry {
5245 - unsigned long op, result;
5246 + unsigned long op;
5247 +#if !defined(CONFIG_PARAVIRT_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
5248 + unsigned long result;
5249 +#else
5250 + long result;
5251 +#endif
5252 unsigned long args[6];
5253 };
5254 +DEFINE_XEN_GUEST_HANDLE_STRUCT(multicall_entry);
5255 typedef struct multicall_entry multicall_entry_t;
5256 DEFINE_XEN_GUEST_HANDLE(multicall_entry_t);
5257
5258 diff -Naur linux-2.6.25/include/xen/xenbus.h linux-2.6.25-xen/include/xen/xenbus.h
5259 --- linux-2.6.25/include/xen/xenbus.h 2008-05-23 18:33:27.000000000 +0200
5260 +++ linux-2.6.25-xen/include/xen/xenbus.h 2008-05-23 18:27:40.000000000 +0200
5261 @@ -57,16 +57,20 @@
5262 void (*callback)(struct xenbus_watch *,
5263 const char **vec, unsigned int len);
5264
5265 +#if defined(CONFIG_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
5266 /* See XBWF_ definitions below. */
5267 unsigned long flags;
5268 +#endif
5269 };
5270
5271 +#if defined(CONFIG_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
5272 /*
5273 * Execute callback in its own kthread. Useful if the callback is long
5274 * running or heavily serialised, to avoid taking out the main xenwatch thread
5275 * for a long period of time (or even unwittingly causing a deadlock).
5276 */
5277 #define XBWF_new_thread 1
5278 +#endif
5279
5280 /* A xenbus device. */
5281 struct xenbus_device {
5282 @@ -214,6 +218,7 @@
5283 const char **, unsigned int));
5284
5285
5286 +#if defined(CONFIG_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
5287 /**
5288 * Register a watch on the given path/path2, using the given xenbus_watch
5289 * structure for storage, and the given callback function as the callback.
5290 @@ -227,7 +232,13 @@
5291 const char *path2, struct xenbus_watch *watch,
5292 void (*callback)(struct xenbus_watch *,
5293 const char **, unsigned int));
5294 -
5295 +#else
5296 +int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
5297 + void (*callback)(struct xenbus_watch *,
5298 + const char **, unsigned int),
5299 + const char *pathfmt, ...)
5300 + __attribute__ ((format (printf, 4, 5)));
5301 +#endif
5302
5303 /**
5304 * Advertise in the store a change of the given driver to the given new_state.
5305 diff -Naur linux-2.6.25/net/core/dev.c linux-2.6.25-xen/net/core/dev.c
5306 --- linux-2.6.25/net/core/dev.c 2008-05-23 18:33:27.000000000 +0200
5307 +++ linux-2.6.25-xen/net/core/dev.c 2008-05-23 18:27:40.000000000 +0200
5308 @@ -122,7 +122,7 @@
5309
5310 #include "net-sysfs.h"
5311
5312 -#ifdef CONFIG_XEN
5313 +#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN)
5314 #include <net/ip.h>
5315 #include <linux/tcp.h>
5316 #include <linux/udp.h>
5317 @@ -1582,42 +1582,54 @@
5318 return 0;
5319 }
5320
5321 -#ifdef CONFIG_XEN
5322 +#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN)
5323 inline int skb_checksum_setup(struct sk_buff *skb)
5324 {
5325 - if (skb->proto_csum_blank) {
5326 - struct iphdr *iph;
5327 - unsigned char *th;
5328 + struct iphdr *iph;
5329 + unsigned char *th;
5330 + int err = -EPROTO;
5331
5332 - if (skb->protocol != htons(ETH_P_IP))
5333 - goto out;
5334 - iph = ip_hdr(skb);
5335 - th = skb_network_header(skb) + 4 * iph->ihl;
5336 - if (th >= skb_tail_pointer(skb))
5337 - goto out;
5338 - skb->csum_start = th - skb->head;
5339 - switch (iph->protocol) {
5340 - case IPPROTO_TCP:
5341 - skb->csum_offset = offsetof(struct tcphdr, check);
5342 - break;
5343 - case IPPROTO_UDP:
5344 - skb->csum_offset = offsetof(struct udphdr, check);
5345 - break;
5346 - default:
5347 - if (net_ratelimit())
5348 - printk(KERN_ERR "Attempting to checksum a non-"
5349 - "TCP/UDP packet, dropping a protocol"
5350 - " %d packet", iph->protocol);
5351 - goto out;
5352 - }
5353 - if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
5354 - goto out;
5355 - skb->ip_summed = CHECKSUM_PARTIAL;
5356 - skb->proto_csum_blank = 0;
5357 +#ifdef CONFIG_XEN
5358 + if (!skb->proto_csum_blank)
5359 + return 0;
5360 +#endif
5361 +
5362 + if (skb->protocol != htons(ETH_P_IP))
5363 + goto out;
5364 +
5365 + iph = ip_hdr(skb);
5366 + th = skb_network_header(skb) + 4 * iph->ihl;
5367 + if (th >= skb_tail_pointer(skb))
5368 + goto out;
5369 +
5370 + skb->csum_start = th - skb->head;
5371 + switch (iph->protocol) {
5372 + case IPPROTO_TCP:
5373 + skb->csum_offset = offsetof(struct tcphdr, check);
5374 + break;
5375 + case IPPROTO_UDP:
5376 + skb->csum_offset = offsetof(struct udphdr, check);
5377 + break;
5378 + default:
5379 + if (net_ratelimit())
5380 + printk(KERN_ERR "Attempting to checksum a non-"
5381 + "TCP/UDP packet, dropping a protocol"
5382 + " %d packet", iph->protocol);
5383 + goto out;
5384 }
5385 - return 0;
5386 +
5387 + if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
5388 + goto out;
5389 +
5390 +#ifdef CONFIG_XEN
5391 + skb->ip_summed = CHECKSUM_PARTIAL;
5392 + skb->proto_csum_blank = 0;
5393 +#endif
5394 +
5395 + err = 0;
5396 +
5397 out:
5398 - return -EPROTO;
5399 + return err;
5400 }
5401 EXPORT_SYMBOL(skb_checksum_setup);
5402 #endif