Contents of /trunk/kernel26-xen/patches-2.6.25-r1/1021-2.6.25-xen-patch-2.6.20.patch
Parent Directory | Revision Log
Revision 609 -
(show annotations)
(download)
Fri May 23 17:35:37 2008 UTC (16 years, 4 months ago) by niro
File size: 199019 byte(s)
Fri May 23 17:35:37 2008 UTC (16 years, 4 months ago) by niro
File size: 199019 byte(s)
-using opensuse xen patchset, updated kernel configs
1 | From: www.kernel.org |
2 | Subject: Linux 2.6.20 |
3 | Patch-mainline: 2.6.20 |
4 | |
5 | Automatically created from "patches.kernel.org/patch-2.6.20" by xen-port-patches.py |
6 | |
7 | Acked-by: jbeulich@novell.com |
8 | |
9 | --- |
10 | arch/x86/Kconfig | 2 |
11 | arch/x86/kernel/asm-offsets_32.c | 6 |
12 | arch/x86/kernel/cpu/common-xen.c | 286 ++++--- |
13 | arch/x86/kernel/cpu/mtrr/main-xen.c | 5 |
14 | arch/x86/kernel/e820_32-xen.c | 1000 ++++++++++++++++++++++++++ |
15 | arch/x86/kernel/entry_32-xen.S | 387 ++++------ |
16 | arch/x86/kernel/entry_64-xen.S | 69 - |
17 | arch/x86/kernel/genapic_64-xen.c | 8 |
18 | arch/x86/kernel/head64-xen.c | 5 |
19 | arch/x86/kernel/head_32-xen.S | 63 + |
20 | arch/x86/kernel/io_apic_32-xen.c | 68 - |
21 | arch/x86/kernel/io_apic_64-xen.c | 133 ++- |
22 | arch/x86/kernel/irq_64-xen.c | 2 |
23 | arch/x86/kernel/ldt_32-xen.c | 4 |
24 | arch/x86/kernel/microcode-xen.c | 6 |
25 | arch/x86/kernel/mpparse_32-xen.c | 12 |
26 | arch/x86/kernel/mpparse_64-xen.c | 2 |
27 | arch/x86/kernel/pci-dma_32-xen.c | 10 |
28 | arch/x86/kernel/process_32-xen.c | 56 - |
29 | arch/x86/kernel/process_64-xen.c | 34 |
30 | arch/x86/kernel/quirks-xen.c | 61 + |
31 | arch/x86/kernel/setup_32-xen.c | 974 ------------------------- |
32 | arch/x86/kernel/setup_64-xen.c | 24 |
33 | arch/x86/kernel/smp_32-xen.c | 4 |
34 | arch/x86/kernel/smp_64-xen.c | 5 |
35 | arch/x86/kernel/time_32-xen.c | 17 |
36 | arch/x86/kernel/traps_32-xen.c | 204 +---- |
37 | arch/x86/kernel/traps_64-xen.c | 139 --- |
38 | arch/x86/kernel/vmlinux_32.lds.S | 6 |
39 | arch/x86/kernel/vsyscall_64-xen.c | 7 |
40 | arch/x86/kvm/Kconfig | 1 |
41 | arch/x86/mm/fault_32-xen.c | 12 |
42 | arch/x86/mm/fault_64-xen.c | 10 |
43 | arch/x86/mm/highmem_32-xen.c | 26 |
44 | arch/x86/mm/init_32-xen.c | 20 |
45 | arch/x86/mm/init_64-xen.c | 7 |
46 | arch/x86/mm/pageattr_64-xen.c | 58 - |
47 | arch/x86/mm/pgtable_32-xen.c | 6 |
48 | arch/x86/pci/irq-xen.c | 4 |
49 | drivers/xen/balloon/balloon.c | 6 |
50 | drivers/xen/blkback/blkback.c | 1 |
51 | drivers/xen/blkback/interface.c | 2 |
52 | drivers/xen/blkfront/blkfront.c | 8 |
53 | drivers/xen/blktap/blktap.c | 1 |
54 | drivers/xen/blktap/interface.c | 2 |
55 | drivers/xen/char/mem.c | 4 |
56 | drivers/xen/console/console.c | 13 |
57 | drivers/xen/core/reboot.c | 10 |
58 | drivers/xen/core/smpboot.c | 21 |
59 | drivers/xen/fbfront/xenfb.c | 1 |
60 | drivers/xen/netback/loopback.c | 1 |
61 | drivers/xen/pciback/conf_space_header.c | 4 |
62 | drivers/xen/pciback/pciback.h | 2 |
63 | drivers/xen/pciback/pciback_ops.c | 6 |
64 | drivers/xen/pciback/xenbus.c | 3 |
65 | drivers/xen/sfc_netfront/accel_vi.c | 4 |
66 | drivers/xen/tpmback/interface.c | 2 |
67 | drivers/xen/xenbus/xenbus_comms.c | 4 |
68 | drivers/xen/xenbus/xenbus_probe.c | 2 |
69 | include/asm-x86/mach-xen/asm/desc_32.h | 100 +- |
70 | include/asm-x86/mach-xen/asm/desc_64.h | 53 - |
71 | include/asm-x86/mach-xen/asm/dma-mapping_32.h | 4 |
72 | include/asm-x86/mach-xen/asm/dma-mapping_64.h | 8 |
73 | include/asm-x86/mach-xen/asm/fixmap_32.h | 5 |
74 | include/asm-x86/mach-xen/asm/hypervisor.h | 9 |
75 | include/asm-x86/mach-xen/asm/io_32.h | 4 |
76 | include/asm-x86/mach-xen/asm/irqflags_32.h | 68 + |
77 | include/asm-x86/mach-xen/asm/mmu_context_32.h | 19 |
78 | include/asm-x86/mach-xen/asm/pgtable-2level.h | 21 |
79 | include/asm-x86/mach-xen/asm/pgtable-3level.h | 67 - |
80 | include/asm-x86/mach-xen/asm/pgtable_32.h | 24 |
81 | include/asm-x86/mach-xen/asm/pgtable_64.h | 23 |
82 | include/asm-x86/mach-xen/asm/processor_32.h | 207 ++--- |
83 | include/asm-x86/mach-xen/asm/processor_64.h | 8 |
84 | include/asm-x86/mach-xen/asm/segment_32.h | 5 |
85 | include/asm-x86/mach-xen/asm/smp_32.h | 3 |
86 | include/asm-x86/mach-xen/asm/smp_64.h | 12 |
87 | include/asm-x86/mach-xen/asm/system_32.h | 12 |
88 | kernel/kexec.c | 2 |
89 | net/core/dev.c | 6 |
90 | 80 files changed, 2263 insertions(+), 2237 deletions(-) |
91 | |
92 | --- a/arch/x86/Kconfig |
93 | +++ b/arch/x86/Kconfig |
94 | @@ -1220,7 +1220,7 @@ |
95 | |
96 | config RELOCATABLE |
97 | bool "Build a relocatable kernel (EXPERIMENTAL)" |
98 | - depends on EXPERIMENTAL |
99 | + depends on EXPERIMENTAL && !X86_XEN |
100 | help |
101 | This builds a kernel image that retains relocation information |
102 | so it can be loaded someplace besides the default 1MB. |
103 | --- a/arch/x86/kernel/asm-offsets_32.c |
104 | +++ b/arch/x86/kernel/asm-offsets_32.c |
105 | @@ -61,6 +61,7 @@ |
106 | OFFSET(TI_exec_domain, thread_info, exec_domain); |
107 | OFFSET(TI_flags, thread_info, flags); |
108 | OFFSET(TI_status, thread_info, status); |
109 | + OFFSET(TI_cpu, thread_info, cpu); |
110 | OFFSET(TI_preempt_count, thread_info, preempt_count); |
111 | OFFSET(TI_addr_limit, thread_info, addr_limit); |
112 | OFFSET(TI_restart_block, thread_info, restart_block); |
113 | @@ -115,6 +116,11 @@ |
114 | |
115 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); |
116 | |
117 | +#ifdef CONFIG_XEN |
118 | + BLANK(); |
119 | + OFFSET(XEN_START_mfn_list, start_info, mfn_list); |
120 | +#endif |
121 | + |
122 | #ifdef CONFIG_PARAVIRT |
123 | BLANK(); |
124 | OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); |
125 | --- a/arch/x86/kernel/cpu/common-xen.c |
126 | +++ b/arch/x86/kernel/cpu/common-xen.c |
127 | @@ -22,6 +22,7 @@ |
128 | #define phys_pkg_id(a,b) a |
129 | #endif |
130 | #endif |
131 | +#include <asm/pda.h> |
132 | #include <asm/hypervisor.h> |
133 | |
134 | #include "cpu.h" |
135 | @@ -29,10 +30,8 @@ |
136 | DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); |
137 | EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); |
138 | |
139 | -#ifndef CONFIG_XEN |
140 | -DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); |
141 | -EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); |
142 | -#endif |
143 | +struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly; |
144 | +EXPORT_SYMBOL(_cpu_pda); |
145 | |
146 | static int cachesize_override __cpuinitdata = -1; |
147 | static int disable_x86_fxsr __cpuinitdata; |
148 | @@ -60,7 +59,7 @@ |
149 | .c_init = default_init, |
150 | .c_vendor = "Unknown", |
151 | }; |
152 | -static struct cpu_dev * this_cpu = &default_cpu; |
153 | +static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu; |
154 | |
155 | static int __init cachesize_setup(char *str) |
156 | { |
157 | @@ -242,29 +241,14 @@ |
158 | return flag_is_changeable_p(X86_EFLAGS_ID); |
159 | } |
160 | |
161 | -/* Do minimum CPU detection early. |
162 | - Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. |
163 | - The others are not touched to avoid unwanted side effects. |
164 | - |
165 | - WARNING: this function is only called on the BP. Don't add code here |
166 | - that is supposed to run on all CPUs. */ |
167 | -static void __init early_cpu_detect(void) |
168 | +void __init cpu_detect(struct cpuinfo_x86 *c) |
169 | { |
170 | - struct cpuinfo_x86 *c = &boot_cpu_data; |
171 | - |
172 | - c->x86_cache_alignment = 32; |
173 | - |
174 | - if (!have_cpuid_p()) |
175 | - return; |
176 | - |
177 | /* Get vendor name */ |
178 | cpuid(0x00000000, &c->cpuid_level, |
179 | (int *)&c->x86_vendor_id[0], |
180 | (int *)&c->x86_vendor_id[8], |
181 | (int *)&c->x86_vendor_id[4]); |
182 | |
183 | - get_cpu_vendor(c, 1); |
184 | - |
185 | c->x86 = 4; |
186 | if (c->cpuid_level >= 0x00000001) { |
187 | u32 junk, tfms, cap0, misc; |
188 | @@ -281,6 +265,26 @@ |
189 | } |
190 | } |
191 | |
192 | +/* Do minimum CPU detection early. |
193 | + Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. |
194 | + The others are not touched to avoid unwanted side effects. |
195 | + |
196 | + WARNING: this function is only called on the BP. Don't add code here |
197 | + that is supposed to run on all CPUs. */ |
198 | +static void __init early_cpu_detect(void) |
199 | +{ |
200 | + struct cpuinfo_x86 *c = &boot_cpu_data; |
201 | + |
202 | + c->x86_cache_alignment = 32; |
203 | + |
204 | + if (!have_cpuid_p()) |
205 | + return; |
206 | + |
207 | + cpu_detect(c); |
208 | + |
209 | + get_cpu_vendor(c, 1); |
210 | +} |
211 | + |
212 | static void __cpuinit generic_identify(struct cpuinfo_x86 * c) |
213 | { |
214 | u32 tfms, xlvl; |
215 | @@ -315,6 +319,8 @@ |
216 | #else |
217 | c->apicid = (ebx >> 24) & 0xFF; |
218 | #endif |
219 | + if (c->x86_capability[0] & (1<<19)) |
220 | + c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; |
221 | } else { |
222 | /* Have CPUID level 0 only - unheard of */ |
223 | c->x86 = 4; |
224 | @@ -379,6 +385,7 @@ |
225 | c->x86_vendor_id[0] = '\0'; /* Unset */ |
226 | c->x86_model_id[0] = '\0'; /* Unset */ |
227 | c->x86_max_cores = 1; |
228 | + c->x86_clflush_size = 32; |
229 | memset(&c->x86_capability, 0, sizeof c->x86_capability); |
230 | |
231 | if (!have_cpuid_p()) { |
232 | @@ -599,61 +606,23 @@ |
233 | #endif |
234 | } |
235 | |
236 | -static void __cpuinit cpu_gdt_init(const struct Xgt_desc_struct *gdt_descr) |
237 | +/* Make sure %gs is initialized properly in idle threads */ |
238 | +struct pt_regs * __devinit idle_regs(struct pt_regs *regs) |
239 | { |
240 | - unsigned long frames[16]; |
241 | - unsigned long va; |
242 | - int f; |
243 | - |
244 | - for (va = gdt_descr->address, f = 0; |
245 | - va < gdt_descr->address + gdt_descr->size; |
246 | - va += PAGE_SIZE, f++) { |
247 | - frames[f] = virt_to_mfn(va); |
248 | - make_lowmem_page_readonly( |
249 | - (void *)va, XENFEAT_writable_descriptor_tables); |
250 | - } |
251 | - if (HYPERVISOR_set_gdt(frames, (gdt_descr->size + 1) / 8)) |
252 | - BUG(); |
253 | + memset(regs, 0, sizeof(struct pt_regs)); |
254 | + regs->xgs = __KERNEL_PDA; |
255 | + return regs; |
256 | } |
257 | |
258 | -/* |
259 | - * cpu_init() initializes state that is per-CPU. Some data is already |
260 | - * initialized (naturally) in the bootstrap process, such as the GDT |
261 | - * and IDT. We reload them nevertheless, this function acts as a |
262 | - * 'CPU state barrier', nothing should get across. |
263 | - */ |
264 | -void __cpuinit cpu_init(void) |
265 | +static __cpuinit int alloc_gdt(int cpu) |
266 | { |
267 | - int cpu = smp_processor_id(); |
268 | -#ifndef CONFIG_X86_NO_TSS |
269 | - struct tss_struct * t = &per_cpu(init_tss, cpu); |
270 | -#endif |
271 | - struct thread_struct *thread = ¤t->thread; |
272 | - struct desc_struct *gdt; |
273 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); |
274 | + struct desc_struct *gdt; |
275 | + struct i386_pda *pda; |
276 | |
277 | - if (cpu_test_and_set(cpu, cpu_initialized)) { |
278 | - printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); |
279 | - for (;;) local_irq_enable(); |
280 | - } |
281 | - printk(KERN_INFO "Initializing CPU#%d\n", cpu); |
282 | - |
283 | - if (cpu_has_vme || cpu_has_de) |
284 | - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); |
285 | - if (tsc_disable && cpu_has_tsc) { |
286 | - printk(KERN_NOTICE "Disabling TSC...\n"); |
287 | - /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ |
288 | - clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); |
289 | - set_in_cr4(X86_CR4_TSD); |
290 | - } |
291 | + gdt = (struct desc_struct *)cpu_gdt_descr->address; |
292 | + pda = cpu_pda(cpu); |
293 | |
294 | -#ifndef CONFIG_XEN |
295 | - /* The CPU hotplug case */ |
296 | - if (cpu_gdt_descr->address) { |
297 | - gdt = (struct desc_struct *)cpu_gdt_descr->address; |
298 | - memset(gdt, 0, PAGE_SIZE); |
299 | - goto old_gdt; |
300 | - } |
301 | /* |
302 | * This is a horrible hack to allocate the GDT. The problem |
303 | * is that cpu_init() is called really early for the boot CPU |
304 | @@ -661,54 +630,141 @@ |
305 | * CPUs, when bootmem will have gone away |
306 | */ |
307 | if (NODE_DATA(0)->bdata->node_bootmem_map) { |
308 | - gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); |
309 | - /* alloc_bootmem_pages panics on failure, so no check */ |
310 | + BUG_ON(gdt != NULL || pda != NULL); |
311 | + |
312 | + gdt = alloc_bootmem_pages(PAGE_SIZE); |
313 | + pda = alloc_bootmem(sizeof(*pda)); |
314 | + /* alloc_bootmem(_pages) panics on failure, so no check */ |
315 | + |
316 | memset(gdt, 0, PAGE_SIZE); |
317 | + memset(pda, 0, sizeof(*pda)); |
318 | } else { |
319 | - gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); |
320 | - if (unlikely(!gdt)) { |
321 | - printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); |
322 | - for (;;) |
323 | - local_irq_enable(); |
324 | + /* GDT and PDA might already have been allocated if |
325 | + this is a CPU hotplug re-insertion. */ |
326 | + if (gdt == NULL) |
327 | + gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); |
328 | + |
329 | + if (pda == NULL) |
330 | + pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu)); |
331 | + |
332 | + if (unlikely(!gdt || !pda)) { |
333 | + free_pages((unsigned long)gdt, 0); |
334 | + kfree(pda); |
335 | + return 0; |
336 | } |
337 | } |
338 | -old_gdt: |
339 | + |
340 | + cpu_gdt_descr->address = (unsigned long)gdt; |
341 | + cpu_pda(cpu) = pda; |
342 | + |
343 | + return 1; |
344 | +} |
345 | + |
346 | +/* Initial PDA used by boot CPU */ |
347 | +struct i386_pda boot_pda = { |
348 | + ._pda = &boot_pda, |
349 | + .cpu_number = 0, |
350 | + .pcurrent = &init_task, |
351 | +}; |
352 | + |
353 | +static inline void set_kernel_gs(void) |
354 | +{ |
355 | + /* Set %gs for this CPU's PDA. Memory clobber is to create a |
356 | + barrier with respect to any PDA operations, so the compiler |
357 | + doesn't move any before here. */ |
358 | + asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory"); |
359 | +} |
360 | + |
361 | +/* Initialize the CPU's GDT and PDA. The boot CPU does this for |
362 | + itself, but secondaries find this done for them. */ |
363 | +__cpuinit int init_gdt(int cpu, struct task_struct *idle) |
364 | +{ |
365 | + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); |
366 | + struct desc_struct *gdt; |
367 | + struct i386_pda *pda; |
368 | + |
369 | + /* For non-boot CPUs, the GDT and PDA should already have been |
370 | + allocated. */ |
371 | + if (!alloc_gdt(cpu)) { |
372 | + printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu); |
373 | + return 0; |
374 | + } |
375 | + |
376 | + gdt = (struct desc_struct *)cpu_gdt_descr->address; |
377 | + pda = cpu_pda(cpu); |
378 | + |
379 | + BUG_ON(gdt == NULL || pda == NULL); |
380 | + |
381 | /* |
382 | * Initialize the per-CPU GDT with the boot GDT, |
383 | * and set up the GDT descriptor: |
384 | */ |
385 | memcpy(gdt, cpu_gdt_table, GDT_SIZE); |
386 | + cpu_gdt_descr->size = GDT_SIZE - 1; |
387 | |
388 | - /* Set up GDT entry for 16bit stack */ |
389 | - *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |= |
390 | - ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) | |
391 | - ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | |
392 | - (CPU_16BIT_STACK_SIZE - 1); |
393 | + pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a, |
394 | + (u32 *)&gdt[GDT_ENTRY_PDA].b, |
395 | + (unsigned long)pda, sizeof(*pda) - 1, |
396 | + 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */ |
397 | + |
398 | + memset(pda, 0, sizeof(*pda)); |
399 | + pda->_pda = pda; |
400 | + pda->cpu_number = cpu; |
401 | + pda->pcurrent = idle; |
402 | |
403 | - cpu_gdt_descr->size = GDT_SIZE - 1; |
404 | - cpu_gdt_descr->address = (unsigned long)gdt; |
405 | -#else |
406 | - if (cpu == 0 && cpu_gdt_descr->address == 0) { |
407 | - gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); |
408 | - /* alloc_bootmem_pages panics on failure, so no check */ |
409 | - memset(gdt, 0, PAGE_SIZE); |
410 | + return 1; |
411 | +} |
412 | |
413 | - memcpy(gdt, cpu_gdt_table, GDT_SIZE); |
414 | - |
415 | - cpu_gdt_descr->size = GDT_SIZE; |
416 | - cpu_gdt_descr->address = (unsigned long)gdt; |
417 | +void __cpuinit cpu_set_gdt(int cpu) |
418 | +{ |
419 | + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); |
420 | + unsigned long va, frames[16]; |
421 | + int f; |
422 | + |
423 | + for (va = cpu_gdt_descr->address, f = 0; |
424 | + va < cpu_gdt_descr->address + cpu_gdt_descr->size; |
425 | + va += PAGE_SIZE, f++) { |
426 | + frames[f] = virt_to_mfn(va); |
427 | + make_lowmem_page_readonly( |
428 | + (void *)va, XENFEAT_writable_descriptor_tables); |
429 | } |
430 | + BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8)); |
431 | + |
432 | + set_kernel_gs(); |
433 | +} |
434 | + |
435 | +/* Common CPU init for both boot and secondary CPUs */ |
436 | +static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) |
437 | +{ |
438 | +#ifndef CONFIG_X86_NO_TSS |
439 | + struct tss_struct * t = &per_cpu(init_tss, cpu); |
440 | #endif |
441 | + struct thread_struct *thread = &curr->thread; |
442 | + |
443 | + if (cpu_test_and_set(cpu, cpu_initialized)) { |
444 | + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); |
445 | + for (;;) local_irq_enable(); |
446 | + } |
447 | |
448 | - cpu_gdt_init(cpu_gdt_descr); |
449 | + printk(KERN_INFO "Initializing CPU#%d\n", cpu); |
450 | + |
451 | + if (cpu_has_vme || cpu_has_de) |
452 | + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); |
453 | + if (tsc_disable && cpu_has_tsc) { |
454 | + printk(KERN_NOTICE "Disabling TSC...\n"); |
455 | + /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ |
456 | + clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); |
457 | + set_in_cr4(X86_CR4_TSD); |
458 | + } |
459 | |
460 | /* |
461 | * Set up and load the per-CPU TSS and LDT |
462 | */ |
463 | atomic_inc(&init_mm.mm_count); |
464 | - current->active_mm = &init_mm; |
465 | - BUG_ON(current->mm); |
466 | - enter_lazy_tlb(&init_mm, current); |
467 | + curr->active_mm = &init_mm; |
468 | + if (curr->mm) |
469 | + BUG(); |
470 | + enter_lazy_tlb(&init_mm, curr); |
471 | |
472 | load_esp0(t, thread); |
473 | |
474 | @@ -719,8 +775,8 @@ |
475 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); |
476 | #endif |
477 | |
478 | - /* Clear %fs and %gs. */ |
479 | - asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0)); |
480 | + /* Clear %fs. */ |
481 | + asm volatile ("mov %0, %%fs" : : "r" (0)); |
482 | |
483 | /* Clear all 6 debug registers: */ |
484 | set_debugreg(0, 0); |
485 | @@ -738,6 +794,38 @@ |
486 | mxcsr_feature_mask_init(); |
487 | } |
488 | |
489 | +/* Entrypoint to initialize secondary CPU */ |
490 | +void __cpuinit secondary_cpu_init(void) |
491 | +{ |
492 | + int cpu = smp_processor_id(); |
493 | + struct task_struct *curr = current; |
494 | + |
495 | + _cpu_init(cpu, curr); |
496 | +} |
497 | + |
498 | +/* |
499 | + * cpu_init() initializes state that is per-CPU. Some data is already |
500 | + * initialized (naturally) in the bootstrap process, such as the GDT |
501 | + * and IDT. We reload them nevertheless, this function acts as a |
502 | + * 'CPU state barrier', nothing should get across. |
503 | + */ |
504 | +void __cpuinit cpu_init(void) |
505 | +{ |
506 | + int cpu = smp_processor_id(); |
507 | + struct task_struct *curr = current; |
508 | + |
509 | + /* Set up the real GDT and PDA, so we can transition from the |
510 | + boot versions. */ |
511 | + if (!init_gdt(cpu, curr)) { |
512 | + /* failed to allocate something; not much we can do... */ |
513 | + for (;;) |
514 | + local_irq_enable(); |
515 | + } |
516 | + |
517 | + cpu_set_gdt(cpu); |
518 | + _cpu_init(cpu, curr); |
519 | +} |
520 | + |
521 | #ifdef CONFIG_HOTPLUG_CPU |
522 | void __cpuinit cpu_uninit(void) |
523 | { |
524 | --- a/arch/x86/kernel/cpu/mtrr/main-xen.c |
525 | +++ b/arch/x86/kernel/cpu/mtrr/main-xen.c |
526 | @@ -12,7 +12,7 @@ |
527 | static DEFINE_MUTEX(mtrr_mutex); |
528 | |
529 | void generic_get_mtrr(unsigned int reg, unsigned long *base, |
530 | - unsigned int *size, mtrr_type * type) |
531 | + unsigned long *size, mtrr_type * type) |
532 | { |
533 | struct xen_platform_op op; |
534 | |
535 | @@ -115,8 +115,7 @@ |
536 | { |
537 | unsigned i; |
538 | mtrr_type ltype; |
539 | - unsigned long lbase; |
540 | - unsigned int lsize; |
541 | + unsigned long lbase, lsize; |
542 | int error = -EINVAL; |
543 | struct xen_platform_op op; |
544 | |
545 | --- /dev/null |
546 | +++ b/arch/x86/kernel/e820_32-xen.c |
547 | @@ -0,0 +1,1000 @@ |
548 | +#include <linux/kernel.h> |
549 | +#include <linux/types.h> |
550 | +#include <linux/init.h> |
551 | +#include <linux/bootmem.h> |
552 | +#include <linux/ioport.h> |
553 | +#include <linux/string.h> |
554 | +#include <linux/kexec.h> |
555 | +#include <linux/module.h> |
556 | +#include <linux/mm.h> |
557 | +#include <linux/efi.h> |
558 | +#include <linux/pfn.h> |
559 | +#include <linux/uaccess.h> |
560 | + |
561 | +#include <asm/pgtable.h> |
562 | +#include <asm/page.h> |
563 | +#include <asm/e820.h> |
564 | +#include <xen/interface/memory.h> |
565 | + |
566 | +#ifdef CONFIG_EFI |
567 | +int efi_enabled = 0; |
568 | +EXPORT_SYMBOL(efi_enabled); |
569 | +#endif |
570 | + |
571 | +struct e820map e820; |
572 | +struct change_member { |
573 | + struct e820entry *pbios; /* pointer to original bios entry */ |
574 | + unsigned long long addr; /* address for this change point */ |
575 | +}; |
576 | +static struct change_member change_point_list[2*E820MAX] __initdata; |
577 | +static struct change_member *change_point[2*E820MAX] __initdata; |
578 | +static struct e820entry *overlap_list[E820MAX] __initdata; |
579 | +static struct e820entry new_bios[E820MAX] __initdata; |
580 | +/* For PCI or other memory-mapped resources */ |
581 | +unsigned long pci_mem_start = 0x10000000; |
582 | +#ifdef CONFIG_PCI |
583 | +EXPORT_SYMBOL(pci_mem_start); |
584 | +#endif |
585 | +extern int user_defined_memmap; |
586 | +struct resource data_resource = { |
587 | + .name = "Kernel data", |
588 | + .start = 0, |
589 | + .end = 0, |
590 | + .flags = IORESOURCE_BUSY | IORESOURCE_MEM |
591 | +}; |
592 | + |
593 | +struct resource code_resource = { |
594 | + .name = "Kernel code", |
595 | + .start = 0, |
596 | + .end = 0, |
597 | + .flags = IORESOURCE_BUSY | IORESOURCE_MEM |
598 | +}; |
599 | + |
600 | +static struct resource system_rom_resource = { |
601 | + .name = "System ROM", |
602 | + .start = 0xf0000, |
603 | + .end = 0xfffff, |
604 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
605 | +}; |
606 | + |
607 | +static struct resource extension_rom_resource = { |
608 | + .name = "Extension ROM", |
609 | + .start = 0xe0000, |
610 | + .end = 0xeffff, |
611 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
612 | +}; |
613 | + |
614 | +static struct resource adapter_rom_resources[] = { { |
615 | + .name = "Adapter ROM", |
616 | + .start = 0xc8000, |
617 | + .end = 0, |
618 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
619 | +}, { |
620 | + .name = "Adapter ROM", |
621 | + .start = 0, |
622 | + .end = 0, |
623 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
624 | +}, { |
625 | + .name = "Adapter ROM", |
626 | + .start = 0, |
627 | + .end = 0, |
628 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
629 | +}, { |
630 | + .name = "Adapter ROM", |
631 | + .start = 0, |
632 | + .end = 0, |
633 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
634 | +}, { |
635 | + .name = "Adapter ROM", |
636 | + .start = 0, |
637 | + .end = 0, |
638 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
639 | +}, { |
640 | + .name = "Adapter ROM", |
641 | + .start = 0, |
642 | + .end = 0, |
643 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
644 | +} }; |
645 | + |
646 | +static struct resource video_rom_resource = { |
647 | + .name = "Video ROM", |
648 | + .start = 0xc0000, |
649 | + .end = 0xc7fff, |
650 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
651 | +}; |
652 | + |
653 | +static struct resource video_ram_resource = { |
654 | + .name = "Video RAM area", |
655 | + .start = 0xa0000, |
656 | + .end = 0xbffff, |
657 | + .flags = IORESOURCE_BUSY | IORESOURCE_MEM |
658 | +}; |
659 | + |
660 | +static struct resource standard_io_resources[] = { { |
661 | + .name = "dma1", |
662 | + .start = 0x0000, |
663 | + .end = 0x001f, |
664 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO |
665 | +}, { |
666 | + .name = "pic1", |
667 | + .start = 0x0020, |
668 | + .end = 0x0021, |
669 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO |
670 | +}, { |
671 | + .name = "timer0", |
672 | + .start = 0x0040, |
673 | + .end = 0x0043, |
674 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO |
675 | +}, { |
676 | + .name = "timer1", |
677 | + .start = 0x0050, |
678 | + .end = 0x0053, |
679 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO |
680 | +}, { |
681 | + .name = "keyboard", |
682 | + .start = 0x0060, |
683 | + .end = 0x006f, |
684 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO |
685 | +}, { |
686 | + .name = "dma page reg", |
687 | + .start = 0x0080, |
688 | + .end = 0x008f, |
689 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO |
690 | +}, { |
691 | + .name = "pic2", |
692 | + .start = 0x00a0, |
693 | + .end = 0x00a1, |
694 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO |
695 | +}, { |
696 | + .name = "dma2", |
697 | + .start = 0x00c0, |
698 | + .end = 0x00df, |
699 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO |
700 | +}, { |
701 | + .name = "fpu", |
702 | + .start = 0x00f0, |
703 | + .end = 0x00ff, |
704 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO |
705 | +} }; |
706 | + |
707 | +static int romsignature(const unsigned char *x) |
708 | +{ |
709 | + unsigned short sig; |
710 | + int ret = 0; |
711 | + if (probe_kernel_address((const unsigned short *)x, sig) == 0) |
712 | + ret = (sig == 0xaa55); |
713 | + return ret; |
714 | +} |
715 | + |
716 | +static int __init romchecksum(unsigned char *rom, unsigned long length) |
717 | +{ |
718 | + unsigned char *p, sum = 0; |
719 | + |
720 | + for (p = rom; p < rom + length; p++) |
721 | + sum += *p; |
722 | + return sum == 0; |
723 | +} |
724 | + |
725 | +static void __init probe_roms(void) |
726 | +{ |
727 | + unsigned long start, length, upper; |
728 | + unsigned char *rom; |
729 | + int i; |
730 | + |
731 | +#ifdef CONFIG_XEN |
732 | + /* Nothing to do if not running in dom0. */ |
733 | + if (!is_initial_xendomain()) |
734 | + return; |
735 | +#endif |
736 | + |
737 | + /* video rom */ |
738 | + upper = adapter_rom_resources[0].start; |
739 | + for (start = video_rom_resource.start; start < upper; start += 2048) { |
740 | + rom = isa_bus_to_virt(start); |
741 | + if (!romsignature(rom)) |
742 | + continue; |
743 | + |
744 | + video_rom_resource.start = start; |
745 | + |
746 | + /* 0 < length <= 0x7f * 512, historically */ |
747 | + length = rom[2] * 512; |
748 | + |
749 | + /* if checksum okay, trust length byte */ |
750 | + if (length && romchecksum(rom, length)) |
751 | + video_rom_resource.end = start + length - 1; |
752 | + |
753 | + request_resource(&iomem_resource, &video_rom_resource); |
754 | + break; |
755 | + } |
756 | + |
757 | + start = (video_rom_resource.end + 1 + 2047) & ~2047UL; |
758 | + if (start < upper) |
759 | + start = upper; |
760 | + |
761 | + /* system rom */ |
762 | + request_resource(&iomem_resource, &system_rom_resource); |
763 | + upper = system_rom_resource.start; |
764 | + |
765 | + /* check for extension rom (ignore length byte!) */ |
766 | + rom = isa_bus_to_virt((unsigned long)extension_rom_resource.start); |
767 | + if (romsignature(rom)) { |
768 | + length = extension_rom_resource.end - extension_rom_resource.start + 1; |
769 | + if (romchecksum(rom, length)) { |
770 | + request_resource(&iomem_resource, &extension_rom_resource); |
771 | + upper = extension_rom_resource.start; |
772 | + } |
773 | + } |
774 | + |
775 | + /* check for adapter roms on 2k boundaries */ |
776 | + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { |
777 | + rom = isa_bus_to_virt(start); |
778 | + if (!romsignature(rom)) |
779 | + continue; |
780 | + |
781 | + /* 0 < length <= 0x7f * 512, historically */ |
782 | + length = rom[2] * 512; |
783 | + |
784 | + /* but accept any length that fits if checksum okay */ |
785 | + if (!length || start + length > upper || !romchecksum(rom, length)) |
786 | + continue; |
787 | + |
788 | + adapter_rom_resources[i].start = start; |
789 | + adapter_rom_resources[i].end = start + length - 1; |
790 | + request_resource(&iomem_resource, &adapter_rom_resources[i]); |
791 | + |
792 | + start = adapter_rom_resources[i++].end & ~2047UL; |
793 | + } |
794 | +} |
795 | + |
796 | +#ifdef CONFIG_XEN |
797 | +static struct e820map machine_e820 __initdata; |
798 | +#define e820 machine_e820 |
799 | +#endif |
800 | + |
801 | +/* |
802 | + * Request address space for all standard RAM and ROM resources |
803 | + * and also for regions reported as reserved by the e820. |
804 | + */ |
805 | +static void __init |
806 | +legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) |
807 | +{ |
808 | + int i; |
809 | + |
810 | + probe_roms(); |
811 | + for (i = 0; i < e820.nr_map; i++) { |
812 | + struct resource *res; |
813 | +#ifndef CONFIG_RESOURCES_64BIT |
814 | + if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) |
815 | + continue; |
816 | +#endif |
817 | + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); |
818 | + switch (e820.map[i].type) { |
819 | + case E820_RAM: res->name = "System RAM"; break; |
820 | + case E820_ACPI: res->name = "ACPI Tables"; break; |
821 | + case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; |
822 | + default: res->name = "reserved"; |
823 | + } |
824 | + res->start = e820.map[i].addr; |
825 | + res->end = res->start + e820.map[i].size - 1; |
826 | + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; |
827 | + if (request_resource(&iomem_resource, res)) { |
828 | + kfree(res); |
829 | + continue; |
830 | + } |
831 | + if (e820.map[i].type == E820_RAM) { |
832 | + /* |
833 | + * We don't know which RAM region contains kernel data, |
834 | + * so we try it repeatedly and let the resource manager |
835 | + * test it. |
836 | + */ |
837 | +#ifndef CONFIG_XEN |
838 | + request_resource(res, code_resource); |
839 | + request_resource(res, data_resource); |
840 | +#endif |
841 | +#ifdef CONFIG_KEXEC |
842 | + request_resource(res, &crashk_res); |
843 | +#ifdef CONFIG_XEN |
844 | + xen_machine_kexec_register_resources(res); |
845 | +#endif |
846 | +#endif |
847 | + } |
848 | + } |
849 | +} |
850 | + |
851 | +#undef e820 |
852 | + |
853 | +/* |
854 | + * Request address space for all standard resources |
855 | + * |
856 | + * This is called just before pcibios_init(), which is also a |
857 | + * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). |
858 | + */ |
859 | +static int __init request_standard_resources(void) |
860 | +{ |
861 | + int i; |
862 | + |
863 | + /* Nothing to do if not running in dom0. */ |
864 | + if (!is_initial_xendomain()) |
865 | + return 0; |
866 | + |
867 | + printk("Setting up standard PCI resources\n"); |
868 | + if (efi_enabled) |
869 | + efi_initialize_iomem_resources(&code_resource, &data_resource); |
870 | + else |
871 | + legacy_init_iomem_resources(&code_resource, &data_resource); |
872 | + |
873 | + /* EFI systems may still have VGA */ |
874 | + request_resource(&iomem_resource, &video_ram_resource); |
875 | + |
876 | + /* request I/O space for devices used on all i[345]86 PCs */ |
877 | + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) |
878 | + request_resource(&ioport_resource, &standard_io_resources[i]); |
879 | + return 0; |
880 | +} |
881 | + |
882 | +subsys_initcall(request_standard_resources); |
883 | + |
884 | +void __init add_memory_region(unsigned long long start, |
885 | + unsigned long long size, int type) |
886 | +{ |
887 | + int x; |
888 | + |
889 | + if (!efi_enabled) { |
890 | + x = e820.nr_map; |
891 | + |
892 | + if (x == E820MAX) { |
893 | + printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); |
894 | + return; |
895 | + } |
896 | + |
897 | + e820.map[x].addr = start; |
898 | + e820.map[x].size = size; |
899 | + e820.map[x].type = type; |
900 | + e820.nr_map++; |
901 | + } |
902 | +} /* add_memory_region */ |
903 | + |
904 | +/* |
905 | + * Sanitize the BIOS e820 map. |
906 | + * |
907 | + * Some e820 responses include overlapping entries. The following |
908 | + * replaces the original e820 map with a new one, removing overlaps. |
909 | + * |
910 | + */ |
911 | +int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) |
912 | +{ |
913 | + struct change_member *change_tmp; |
914 | + unsigned long current_type, last_type; |
915 | + unsigned long long last_addr; |
916 | + int chgidx, still_changing; |
917 | + int overlap_entries; |
918 | + int new_bios_entry; |
919 | + int old_nr, new_nr, chg_nr; |
920 | + int i; |
921 | + |
922 | + /* |
923 | + Visually we're performing the following (1,2,3,4 = memory types)... |
924 | + |
925 | + Sample memory map (w/overlaps): |
926 | + ____22__________________ |
927 | + ______________________4_ |
928 | + ____1111________________ |
929 | + _44_____________________ |
930 | + 11111111________________ |
931 | + ____________________33__ |
932 | + ___________44___________ |
933 | + __________33333_________ |
934 | + ______________22________ |
935 | + ___________________2222_ |
936 | + _________111111111______ |
937 | + _____________________11_ |
938 | + _________________4______ |
939 | + |
940 | + Sanitized equivalent (no overlap): |
941 | + 1_______________________ |
942 | + _44_____________________ |
943 | + ___1____________________ |
944 | + ____22__________________ |
945 | + ______11________________ |
946 | + _________1______________ |
947 | + __________3_____________ |
948 | + ___________44___________ |
949 | + _____________33_________ |
950 | + _______________2________ |
951 | + ________________1_______ |
952 | + _________________4______ |
953 | + ___________________2____ |
954 | + ____________________33__ |
955 | + ______________________4_ |
956 | + */ |
957 | + printk("sanitize start\n"); |
958 | + /* if there's only one memory region, don't bother */ |
959 | + if (*pnr_map < 2) { |
960 | + printk("sanitize bail 0\n"); |
961 | + return -1; |
962 | + } |
963 | + |
964 | + old_nr = *pnr_map; |
965 | + |
966 | + /* bail out if we find any unreasonable addresses in bios map */ |
967 | + for (i=0; i<old_nr; i++) |
968 | + if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) { |
969 | + printk("sanitize bail 1\n"); |
970 | + return -1; |
971 | + } |
972 | + |
973 | + /* create pointers for initial change-point information (for sorting) */ |
974 | + for (i=0; i < 2*old_nr; i++) |
975 | + change_point[i] = &change_point_list[i]; |
976 | + |
977 | + /* record all known change-points (starting and ending addresses), |
978 | + omitting those that are for empty memory regions */ |
979 | + chgidx = 0; |
980 | + for (i=0; i < old_nr; i++) { |
981 | + if (biosmap[i].size != 0) { |
982 | + change_point[chgidx]->addr = biosmap[i].addr; |
983 | + change_point[chgidx++]->pbios = &biosmap[i]; |
984 | + change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; |
985 | + change_point[chgidx++]->pbios = &biosmap[i]; |
986 | + } |
987 | + } |
988 | + chg_nr = chgidx; /* true number of change-points */ |
989 | + |
990 | + /* sort change-point list by memory addresses (low -> high) */ |
991 | + still_changing = 1; |
992 | + while (still_changing) { |
993 | + still_changing = 0; |
994 | + for (i=1; i < chg_nr; i++) { |
995 | + /* if <current_addr> > <last_addr>, swap */ |
996 | + /* or, if current=<start_addr> & last=<end_addr>, swap */ |
997 | + if ((change_point[i]->addr < change_point[i-1]->addr) || |
998 | + ((change_point[i]->addr == change_point[i-1]->addr) && |
999 | + (change_point[i]->addr == change_point[i]->pbios->addr) && |
1000 | + (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) |
1001 | + ) |
1002 | + { |
1003 | + change_tmp = change_point[i]; |
1004 | + change_point[i] = change_point[i-1]; |
1005 | + change_point[i-1] = change_tmp; |
1006 | + still_changing=1; |
1007 | + } |
1008 | + } |
1009 | + } |
1010 | + |
1011 | + /* create a new bios memory map, removing overlaps */ |
1012 | + overlap_entries=0; /* number of entries in the overlap table */ |
1013 | + new_bios_entry=0; /* index for creating new bios map entries */ |
1014 | + last_type = 0; /* start with undefined memory type */ |
1015 | + last_addr = 0; /* start with 0 as last starting address */ |
1016 | + /* loop through change-points, determining affect on the new bios map */ |
1017 | + for (chgidx=0; chgidx < chg_nr; chgidx++) |
1018 | + { |
1019 | + /* keep track of all overlapping bios entries */ |
1020 | + if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) |
1021 | + { |
1022 | + /* add map entry to overlap list (> 1 entry implies an overlap) */ |
1023 | + overlap_list[overlap_entries++]=change_point[chgidx]->pbios; |
1024 | + } |
1025 | + else |
1026 | + { |
1027 | + /* remove entry from list (order independent, so swap with last) */ |
1028 | + for (i=0; i<overlap_entries; i++) |
1029 | + { |
1030 | + if (overlap_list[i] == change_point[chgidx]->pbios) |
1031 | + overlap_list[i] = overlap_list[overlap_entries-1]; |
1032 | + } |
1033 | + overlap_entries--; |
1034 | + } |
1035 | + /* if there are overlapping entries, decide which "type" to use */ |
1036 | + /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ |
1037 | + current_type = 0; |
1038 | + for (i=0; i<overlap_entries; i++) |
1039 | + if (overlap_list[i]->type > current_type) |
1040 | + current_type = overlap_list[i]->type; |
1041 | + /* continue building up new bios map based on this information */ |
1042 | + if (current_type != last_type) { |
1043 | + if (last_type != 0) { |
1044 | + new_bios[new_bios_entry].size = |
1045 | + change_point[chgidx]->addr - last_addr; |
1046 | + /* move forward only if the new size was non-zero */ |
1047 | + if (new_bios[new_bios_entry].size != 0) |
1048 | + if (++new_bios_entry >= E820MAX) |
1049 | + break; /* no more space left for new bios entries */ |
1050 | + } |
1051 | + if (current_type != 0) { |
1052 | + new_bios[new_bios_entry].addr = change_point[chgidx]->addr; |
1053 | + new_bios[new_bios_entry].type = current_type; |
1054 | + last_addr=change_point[chgidx]->addr; |
1055 | + } |
1056 | + last_type = current_type; |
1057 | + } |
1058 | + } |
1059 | + new_nr = new_bios_entry; /* retain count for new bios entries */ |
1060 | + |
1061 | + /* copy new bios mapping into original location */ |
1062 | + memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); |
1063 | + *pnr_map = new_nr; |
1064 | + |
1065 | + printk("sanitize end\n"); |
1066 | + return 0; |
1067 | +} |
1068 | + |
1069 | +/* |
1070 | + * Copy the BIOS e820 map into a safe place. |
1071 | + * |
1072 | + * Sanity-check it while we're at it.. |
1073 | + * |
1074 | + * If we're lucky and live on a modern system, the setup code |
1075 | + * will have given us a memory map that we can use to properly |
1076 | + * set up memory. If we aren't, we'll fake a memory map. |
1077 | + * |
1078 | + * We check to see that the memory map contains at least 2 elements |
1079 | + * before we'll use it, because the detection code in setup.S may |
1080 | + * not be perfect and most every PC known to man has two memory |
1081 | + * regions: one from 0 to 640k, and one from 1mb up. (The IBM |
1082 | + * thinkpad 560x, for example, does not cooperate with the memory |
1083 | + * detection code.) |
1084 | + */ |
1085 | +int __init copy_e820_map(struct e820entry * biosmap, int nr_map) |
1086 | +{ |
1087 | +#ifndef CONFIG_XEN |
1088 | + /* Only one memory region (or negative)? Ignore it */ |
1089 | + if (nr_map < 2) |
1090 | + return -1; |
1091 | +#else |
1092 | + BUG_ON(nr_map < 1); |
1093 | +#endif |
1094 | + |
1095 | + do { |
1096 | + unsigned long long start = biosmap->addr; |
1097 | + unsigned long long size = biosmap->size; |
1098 | + unsigned long long end = start + size; |
1099 | + unsigned long type = biosmap->type; |
1100 | + printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type); |
1101 | + |
1102 | + /* Overflow in 64 bits? Ignore the memory map. */ |
1103 | + if (start > end) |
1104 | + return -1; |
1105 | + |
1106 | +#ifndef CONFIG_XEN |
1107 | + /* |
1108 | + * Some BIOSes claim RAM in the 640k - 1M region. |
1109 | + * Not right. Fix it up. |
1110 | + */ |
1111 | + if (type == E820_RAM) { |
1112 | + printk("copy_e820_map() type is E820_RAM\n"); |
1113 | + if (start < 0x100000ULL && end > 0xA0000ULL) { |
1114 | + printk("copy_e820_map() lies in range...\n"); |
1115 | + if (start < 0xA0000ULL) { |
1116 | + printk("copy_e820_map() start < 0xA0000ULL\n"); |
1117 | + add_memory_region(start, 0xA0000ULL-start, type); |
1118 | + } |
1119 | + if (end <= 0x100000ULL) { |
1120 | + printk("copy_e820_map() end <= 0x100000ULL\n"); |
1121 | + continue; |
1122 | + } |
1123 | + start = 0x100000ULL; |
1124 | + size = end - start; |
1125 | + } |
1126 | + } |
1127 | +#endif |
1128 | + add_memory_region(start, size, type); |
1129 | + } while (biosmap++,--nr_map); |
1130 | + return 0; |
1131 | +} |
1132 | + |
1133 | +/* |
1134 | + * Callback for efi_memory_walk. |
1135 | + */ |
1136 | +static int __init |
1137 | +efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) |
1138 | +{ |
1139 | + unsigned long *max_pfn = arg, pfn; |
1140 | + |
1141 | + if (start < end) { |
1142 | + pfn = PFN_UP(end -1); |
1143 | + if (pfn > *max_pfn) |
1144 | + *max_pfn = pfn; |
1145 | + } |
1146 | + return 0; |
1147 | +} |
1148 | + |
1149 | +static int __init |
1150 | +efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) |
1151 | +{ |
1152 | + memory_present(0, PFN_UP(start), PFN_DOWN(end)); |
1153 | + return 0; |
1154 | +} |
1155 | + |
1156 | +/* |
1157 | + * Find the highest page frame number we have available |
1158 | + */ |
1159 | +void __init find_max_pfn(void) |
1160 | +{ |
1161 | + int i; |
1162 | + |
1163 | + max_pfn = 0; |
1164 | + if (efi_enabled) { |
1165 | + efi_memmap_walk(efi_find_max_pfn, &max_pfn); |
1166 | + efi_memmap_walk(efi_memory_present_wrapper, NULL); |
1167 | + return; |
1168 | + } |
1169 | + |
1170 | + for (i = 0; i < e820.nr_map; i++) { |
1171 | + unsigned long start, end; |
1172 | + /* RAM? */ |
1173 | + if (e820.map[i].type != E820_RAM) |
1174 | + continue; |
1175 | + start = PFN_UP(e820.map[i].addr); |
1176 | + end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); |
1177 | + if (start >= end) |
1178 | + continue; |
1179 | + if (end > max_pfn) |
1180 | + max_pfn = end; |
1181 | + memory_present(0, start, end); |
1182 | + } |
1183 | +} |
1184 | + |
1185 | +/* |
1186 | + * Free all available memory for boot time allocation. Used |
1187 | + * as a callback function by efi_memory_walk() |
1188 | + */ |
1189 | + |
1190 | +static int __init |
1191 | +free_available_memory(unsigned long start, unsigned long end, void *arg) |
1192 | +{ |
1193 | + /* check max_low_pfn */ |
1194 | + if (start >= (max_low_pfn << PAGE_SHIFT)) |
1195 | + return 0; |
1196 | + if (end >= (max_low_pfn << PAGE_SHIFT)) |
1197 | + end = max_low_pfn << PAGE_SHIFT; |
1198 | + if (start < end) |
1199 | + free_bootmem(start, end - start); |
1200 | + |
1201 | + return 0; |
1202 | +} |
1203 | +/* |
1204 | + * Register fully available low RAM pages with the bootmem allocator. |
1205 | + */ |
1206 | +void __init register_bootmem_low_pages(unsigned long max_low_pfn) |
1207 | +{ |
1208 | + int i; |
1209 | + |
1210 | + if (efi_enabled) { |
1211 | + efi_memmap_walk(free_available_memory, NULL); |
1212 | + return; |
1213 | + } |
1214 | + for (i = 0; i < e820.nr_map; i++) { |
1215 | + unsigned long curr_pfn, last_pfn, size; |
1216 | + /* |
1217 | + * Reserve usable low memory |
1218 | + */ |
1219 | + if (e820.map[i].type != E820_RAM) |
1220 | + continue; |
1221 | + /* |
1222 | + * We are rounding up the start address of usable memory: |
1223 | + */ |
1224 | + curr_pfn = PFN_UP(e820.map[i].addr); |
1225 | + if (curr_pfn >= max_low_pfn) |
1226 | + continue; |
1227 | + /* |
1228 | + * ... and at the end of the usable range downwards: |
1229 | + */ |
1230 | + last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); |
1231 | + |
1232 | +#ifdef CONFIG_XEN |
1233 | + /* |
1234 | + * Truncate to the number of actual pages currently |
1235 | + * present. |
1236 | + */ |
1237 | + if (last_pfn > xen_start_info->nr_pages) |
1238 | + last_pfn = xen_start_info->nr_pages; |
1239 | +#endif |
1240 | + |
1241 | + if (last_pfn > max_low_pfn) |
1242 | + last_pfn = max_low_pfn; |
1243 | + |
1244 | + /* |
1245 | + * .. finally, did all the rounding and playing |
1246 | + * around just make the area go away? |
1247 | + */ |
1248 | + if (last_pfn <= curr_pfn) |
1249 | + continue; |
1250 | + |
1251 | + size = last_pfn - curr_pfn; |
1252 | + free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); |
1253 | + } |
1254 | +} |
1255 | + |
1256 | +void __init e820_register_memory(void) |
1257 | +{ |
1258 | + unsigned long gapstart, gapsize, round; |
1259 | + unsigned long long last; |
1260 | + int i; |
1261 | + |
1262 | +#ifdef CONFIG_XEN |
1263 | + if (is_initial_xendomain()) { |
1264 | + struct xen_memory_map memmap; |
1265 | + |
1266 | + memmap.nr_entries = E820MAX; |
1267 | + set_xen_guest_handle(memmap.buffer, machine_e820.map); |
1268 | + |
1269 | + if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)) |
1270 | + BUG(); |
1271 | + machine_e820.nr_map = memmap.nr_entries; |
1272 | + } |
1273 | + else |
1274 | + machine_e820 = e820; |
1275 | +#define e820 machine_e820 |
1276 | +#endif |
1277 | + |
1278 | + /* |
1279 | + * Search for the bigest gap in the low 32 bits of the e820 |
1280 | + * memory space. |
1281 | + */ |
1282 | + last = 0x100000000ull; |
1283 | + gapstart = 0x10000000; |
1284 | + gapsize = 0x400000; |
1285 | + i = e820.nr_map; |
1286 | + while (--i >= 0) { |
1287 | + unsigned long long start = e820.map[i].addr; |
1288 | + unsigned long long end = start + e820.map[i].size; |
1289 | + |
1290 | + /* |
1291 | + * Since "last" is at most 4GB, we know we'll |
1292 | + * fit in 32 bits if this condition is true |
1293 | + */ |
1294 | + if (last > end) { |
1295 | + unsigned long gap = last - end; |
1296 | + |
1297 | + if (gap > gapsize) { |
1298 | + gapsize = gap; |
1299 | + gapstart = end; |
1300 | + } |
1301 | + } |
1302 | + if (start < last) |
1303 | + last = start; |
1304 | + } |
1305 | +#undef e820 |
1306 | + |
1307 | + /* |
1308 | + * See how much we want to round up: start off with |
1309 | + * rounding to the next 1MB area. |
1310 | + */ |
1311 | + round = 0x100000; |
1312 | + while ((gapsize >> 4) > round) |
1313 | + round += round; |
1314 | + /* Fun with two's complement */ |
1315 | + pci_mem_start = (gapstart + round) & -round; |
1316 | + |
1317 | + printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", |
1318 | + pci_mem_start, gapstart, gapsize); |
1319 | +} |
1320 | + |
1321 | +void __init print_memory_map(char *who) |
1322 | +{ |
1323 | + int i; |
1324 | + |
1325 | + for (i = 0; i < e820.nr_map; i++) { |
1326 | + printk(" %s: %016Lx - %016Lx ", who, |
1327 | + e820.map[i].addr, |
1328 | + e820.map[i].addr + e820.map[i].size); |
1329 | + switch (e820.map[i].type) { |
1330 | + case E820_RAM: printk("(usable)\n"); |
1331 | + break; |
1332 | + case E820_RESERVED: |
1333 | + printk("(reserved)\n"); |
1334 | + break; |
1335 | + case E820_ACPI: |
1336 | + printk("(ACPI data)\n"); |
1337 | + break; |
1338 | + case E820_NVS: |
1339 | + printk("(ACPI NVS)\n"); |
1340 | + break; |
1341 | + default: printk("type %lu\n", e820.map[i].type); |
1342 | + break; |
1343 | + } |
1344 | + } |
1345 | +} |
1346 | + |
1347 | +static __init __always_inline void efi_limit_regions(unsigned long long size) |
1348 | +{ |
1349 | + unsigned long long current_addr = 0; |
1350 | + efi_memory_desc_t *md, *next_md; |
1351 | + void *p, *p1; |
1352 | + int i, j; |
1353 | + |
1354 | + j = 0; |
1355 | + p1 = memmap.map; |
1356 | + for (p = p1, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) { |
1357 | + md = p; |
1358 | + next_md = p1; |
1359 | + current_addr = md->phys_addr + |
1360 | + PFN_PHYS(md->num_pages); |
1361 | + if (is_available_memory(md)) { |
1362 | + if (md->phys_addr >= size) continue; |
1363 | + memcpy(next_md, md, memmap.desc_size); |
1364 | + if (current_addr >= size) { |
1365 | + next_md->num_pages -= |
1366 | + PFN_UP(current_addr-size); |
1367 | + } |
1368 | + p1 += memmap.desc_size; |
1369 | + next_md = p1; |
1370 | + j++; |
1371 | + } else if ((md->attribute & EFI_MEMORY_RUNTIME) == |
1372 | + EFI_MEMORY_RUNTIME) { |
1373 | + /* In order to make runtime services |
1374 | + * available we have to include runtime |
1375 | + * memory regions in memory map */ |
1376 | + memcpy(next_md, md, memmap.desc_size); |
1377 | + p1 += memmap.desc_size; |
1378 | + next_md = p1; |
1379 | + j++; |
1380 | + } |
1381 | + } |
1382 | + memmap.nr_map = j; |
1383 | + memmap.map_end = memmap.map + |
1384 | + (memmap.nr_map * memmap.desc_size); |
1385 | +} |
1386 | + |
1387 | +void __init limit_regions(unsigned long long size) |
1388 | +{ |
1389 | + unsigned long long current_addr = 0; |
1390 | + int i; |
1391 | + |
1392 | + print_memory_map("limit_regions start"); |
1393 | + if (efi_enabled) { |
1394 | + efi_limit_regions(size); |
1395 | + return; |
1396 | + } |
1397 | + for (i = 0; i < e820.nr_map; i++) { |
1398 | + current_addr = e820.map[i].addr + e820.map[i].size; |
1399 | + if (current_addr < size) |
1400 | + continue; |
1401 | + |
1402 | + if (e820.map[i].type != E820_RAM) |
1403 | + continue; |
1404 | + |
1405 | + if (e820.map[i].addr >= size) { |
1406 | + /* |
1407 | + * This region starts past the end of the |
1408 | + * requested size, skip it completely. |
1409 | + */ |
1410 | + e820.nr_map = i; |
1411 | + } else { |
1412 | + e820.nr_map = i + 1; |
1413 | + e820.map[i].size -= current_addr - size; |
1414 | + } |
1415 | + print_memory_map("limit_regions endfor"); |
1416 | + return; |
1417 | + } |
1418 | +#ifdef CONFIG_XEN |
1419 | + if (current_addr < size) { |
1420 | + /* |
1421 | + * The e820 map finished before our requested size so |
1422 | + * extend the final entry to the requested address. |
1423 | + */ |
1424 | + --i; |
1425 | + if (e820.map[i].type == E820_RAM) |
1426 | + e820.map[i].size -= current_addr - size; |
1427 | + else |
1428 | + add_memory_region(current_addr, size - current_addr, E820_RAM); |
1429 | + } |
1430 | +#endif |
1431 | + print_memory_map("limit_regions endfunc"); |
1432 | +} |
1433 | + |
1434 | +/* |
1435 | + * This function checks if any part of the range <start,end> is mapped |
1436 | + * with type. |
1437 | + */ |
1438 | +int |
1439 | +e820_any_mapped(u64 start, u64 end, unsigned type) |
1440 | +{ |
1441 | + int i; |
1442 | + |
1443 | +#ifndef CONFIG_XEN |
1444 | + for (i = 0; i < e820.nr_map; i++) { |
1445 | + const struct e820entry *ei = &e820.map[i]; |
1446 | +#else |
1447 | + if (!is_initial_xendomain()) |
1448 | + return 0; |
1449 | + for (i = 0; i < machine_e820.nr_map; ++i) { |
1450 | + const struct e820entry *ei = &machine_e820.map[i]; |
1451 | +#endif |
1452 | + |
1453 | + if (type && ei->type != type) |
1454 | + continue; |
1455 | + if (ei->addr >= end || ei->addr + ei->size <= start) |
1456 | + continue; |
1457 | + return 1; |
1458 | + } |
1459 | + return 0; |
1460 | +} |
1461 | +EXPORT_SYMBOL_GPL(e820_any_mapped); |
1462 | + |
1463 | + /* |
1464 | + * This function checks if the entire range <start,end> is mapped with type. |
1465 | + * |
1466 | + * Note: this function only works correct if the e820 table is sorted and |
1467 | + * not-overlapping, which is the case |
1468 | + */ |
1469 | +int __init |
1470 | +e820_all_mapped(unsigned long s, unsigned long e, unsigned type) |
1471 | +{ |
1472 | + u64 start = s; |
1473 | + u64 end = e; |
1474 | + int i; |
1475 | + |
1476 | +#ifndef CONFIG_XEN |
1477 | + for (i = 0; i < e820.nr_map; i++) { |
1478 | + struct e820entry *ei = &e820.map[i]; |
1479 | +#else |
1480 | + if (!is_initial_xendomain()) |
1481 | + return 0; |
1482 | + for (i = 0; i < machine_e820.nr_map; ++i) { |
1483 | + const struct e820entry *ei = &machine_e820.map[i]; |
1484 | +#endif |
1485 | + |
1486 | + if (type && ei->type != type) |
1487 | + continue; |
1488 | + /* is the region (part) in overlap with the current region ?*/ |
1489 | + if (ei->addr >= end || ei->addr + ei->size <= start) |
1490 | + continue; |
1491 | + /* if the region is at the beginning of <start,end> we move |
1492 | + * start to the end of the region since it's ok until there |
1493 | + */ |
1494 | + if (ei->addr <= start) |
1495 | + start = ei->addr + ei->size; |
1496 | + /* if start is now at or beyond end, we're done, full |
1497 | + * coverage */ |
1498 | + if (start >= end) |
1499 | + return 1; /* we're done */ |
1500 | + } |
1501 | + return 0; |
1502 | +} |
1503 | + |
1504 | +static int __init parse_memmap(char *arg) |
1505 | +{ |
1506 | + if (!arg) |
1507 | + return -EINVAL; |
1508 | + |
1509 | + if (strcmp(arg, "exactmap") == 0) { |
1510 | +#ifdef CONFIG_CRASH_DUMP |
1511 | + /* If we are doing a crash dump, we |
1512 | + * still need to know the real mem |
1513 | + * size before original memory map is |
1514 | + * reset. |
1515 | + */ |
1516 | + find_max_pfn(); |
1517 | + saved_max_pfn = max_pfn; |
1518 | +#endif |
1519 | + e820.nr_map = 0; |
1520 | + user_defined_memmap = 1; |
1521 | + } else { |
1522 | + /* If the user specifies memory size, we |
1523 | + * limit the BIOS-provided memory map to |
1524 | + * that size. exactmap can be used to specify |
1525 | + * the exact map. mem=number can be used to |
1526 | + * trim the existing memory map. |
1527 | + */ |
1528 | + unsigned long long start_at, mem_size; |
1529 | + |
1530 | + mem_size = memparse(arg, &arg); |
1531 | + if (*arg == '@') { |
1532 | + start_at = memparse(arg+1, &arg); |
1533 | + add_memory_region(start_at, mem_size, E820_RAM); |
1534 | + } else if (*arg == '#') { |
1535 | + start_at = memparse(arg+1, &arg); |
1536 | + add_memory_region(start_at, mem_size, E820_ACPI); |
1537 | + } else if (*arg == '$') { |
1538 | + start_at = memparse(arg+1, &arg); |
1539 | + add_memory_region(start_at, mem_size, E820_RESERVED); |
1540 | + } else { |
1541 | + limit_regions(mem_size); |
1542 | + user_defined_memmap = 1; |
1543 | + } |
1544 | + } |
1545 | + return 0; |
1546 | +} |
1547 | +early_param("memmap", parse_memmap); |
1548 | --- a/arch/x86/kernel/entry_32-xen.S |
1549 | +++ b/arch/x86/kernel/entry_32-xen.S |
1550 | @@ -30,12 +30,13 @@ |
1551 | * 18(%esp) - %eax |
1552 | * 1C(%esp) - %ds |
1553 | * 20(%esp) - %es |
1554 | - * 24(%esp) - orig_eax |
1555 | - * 28(%esp) - %eip |
1556 | - * 2C(%esp) - %cs |
1557 | - * 30(%esp) - %eflags |
1558 | - * 34(%esp) - %oldesp |
1559 | - * 38(%esp) - %oldss |
1560 | + * 24(%esp) - %gs |
1561 | + * 28(%esp) - orig_eax |
1562 | + * 2C(%esp) - %eip |
1563 | + * 30(%esp) - %cs |
1564 | + * 34(%esp) - %eflags |
1565 | + * 38(%esp) - %oldesp |
1566 | + * 3C(%esp) - %oldss |
1567 | * |
1568 | * "current" is in register %ebx during any slow entries. |
1569 | */ |
1570 | @@ -48,27 +49,25 @@ |
1571 | #include <asm/smp.h> |
1572 | #include <asm/page.h> |
1573 | #include <asm/desc.h> |
1574 | +#include <asm/percpu.h> |
1575 | #include <asm/dwarf2.h> |
1576 | #include "irq_vectors.h" |
1577 | #include <xen/interface/xen.h> |
1578 | |
1579 | -#define nr_syscalls ((syscall_table_size)/4) |
1580 | +/* |
1581 | + * We use macros for low-level operations which need to be overridden |
1582 | + * for paravirtualization. The following will never clobber any registers: |
1583 | + * INTERRUPT_RETURN (aka. "iret") |
1584 | + * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") |
1585 | + * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). |
1586 | + * |
1587 | + * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must |
1588 | + * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). |
1589 | + * Allowing a register to be clobbered can shrink the paravirt replacement |
1590 | + * enough to patch inline, increasing performance. |
1591 | + */ |
1592 | |
1593 | -EBX = 0x00 |
1594 | -ECX = 0x04 |
1595 | -EDX = 0x08 |
1596 | -ESI = 0x0C |
1597 | -EDI = 0x10 |
1598 | -EBP = 0x14 |
1599 | -EAX = 0x18 |
1600 | -DS = 0x1C |
1601 | -ES = 0x20 |
1602 | -ORIG_EAX = 0x24 |
1603 | -EIP = 0x28 |
1604 | -CS = 0x2C |
1605 | -EFLAGS = 0x30 |
1606 | -OLDESP = 0x34 |
1607 | -OLDSS = 0x38 |
1608 | +#define nr_syscalls ((syscall_table_size)/4) |
1609 | |
1610 | CF_MASK = 0x00000001 |
1611 | TF_MASK = 0x00000100 |
1612 | @@ -79,61 +78,16 @@ |
1613 | /* Pseudo-eflags. */ |
1614 | NMI_MASK = 0x80000000 |
1615 | |
1616 | -#ifndef CONFIG_XEN |
1617 | -/* These are replaces for paravirtualization */ |
1618 | -#define DISABLE_INTERRUPTS cli |
1619 | -#define ENABLE_INTERRUPTS sti |
1620 | -#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit |
1621 | -#define INTERRUPT_RETURN iret |
1622 | -#define GET_CR0_INTO_EAX movl %cr0, %eax |
1623 | -#else |
1624 | -/* Offsets into shared_info_t. */ |
1625 | -#define evtchn_upcall_pending /* 0 */ |
1626 | -#define evtchn_upcall_mask 1 |
1627 | - |
1628 | -#define sizeof_vcpu_shift 6 |
1629 | - |
1630 | -#ifdef CONFIG_SMP |
1631 | -#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \ |
1632 | - shl $sizeof_vcpu_shift,%esi ; \ |
1633 | - addl HYPERVISOR_shared_info,%esi |
1634 | -#else |
1635 | -#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi |
1636 | -#endif |
1637 | - |
1638 | -#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi) |
1639 | -#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi) |
1640 | -#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi) |
1641 | -#define DISABLE_INTERRUPTS GET_VCPU_INFO ; \ |
1642 | - __DISABLE_INTERRUPTS |
1643 | -#define ENABLE_INTERRUPTS GET_VCPU_INFO ; \ |
1644 | - __ENABLE_INTERRUPTS |
1645 | -#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \ |
1646 | -sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \ |
1647 | - __TEST_PENDING ; \ |
1648 | - jnz 14f # process more events if necessary... ; \ |
1649 | - movl ESI(%esp), %esi ; \ |
1650 | - sysexit ; \ |
1651 | -14: __DISABLE_INTERRUPTS ; \ |
1652 | - TRACE_IRQS_OFF ; \ |
1653 | -sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \ |
1654 | - push %esp ; \ |
1655 | - call evtchn_do_upcall ; \ |
1656 | - add $4,%esp ; \ |
1657 | - jmp ret_from_intr |
1658 | -#define INTERRUPT_RETURN iret |
1659 | -#endif |
1660 | - |
1661 | #ifdef CONFIG_PREEMPT |
1662 | -#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF |
1663 | +#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF |
1664 | #else |
1665 | -#define preempt_stop |
1666 | +#define preempt_stop(clobbers) |
1667 | #define resume_kernel restore_nocheck |
1668 | #endif |
1669 | |
1670 | .macro TRACE_IRQS_IRET |
1671 | #ifdef CONFIG_TRACE_IRQFLAGS |
1672 | - testl $IF_MASK,EFLAGS(%esp) # interrupts off? |
1673 | + testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off? |
1674 | jz 1f |
1675 | TRACE_IRQS_ON |
1676 | 1: |
1677 | @@ -148,6 +102,9 @@ |
1678 | |
1679 | #define SAVE_ALL \ |
1680 | cld; \ |
1681 | + pushl %gs; \ |
1682 | + CFI_ADJUST_CFA_OFFSET 4;\ |
1683 | + /*CFI_REL_OFFSET gs, 0;*/\ |
1684 | pushl %es; \ |
1685 | CFI_ADJUST_CFA_OFFSET 4;\ |
1686 | /*CFI_REL_OFFSET es, 0;*/\ |
1687 | @@ -177,7 +134,9 @@ |
1688 | CFI_REL_OFFSET ebx, 0;\ |
1689 | movl $(__USER_DS), %edx; \ |
1690 | movl %edx, %ds; \ |
1691 | - movl %edx, %es; |
1692 | + movl %edx, %es; \ |
1693 | + movl $(__KERNEL_PDA), %edx; \ |
1694 | + movl %edx, %gs |
1695 | |
1696 | #define RESTORE_INT_REGS \ |
1697 | popl %ebx; \ |
1698 | @@ -210,17 +169,22 @@ |
1699 | 2: popl %es; \ |
1700 | CFI_ADJUST_CFA_OFFSET -4;\ |
1701 | /*CFI_RESTORE es;*/\ |
1702 | -.section .fixup,"ax"; \ |
1703 | -3: movl $0,(%esp); \ |
1704 | - jmp 1b; \ |
1705 | +3: popl %gs; \ |
1706 | + CFI_ADJUST_CFA_OFFSET -4;\ |
1707 | + /*CFI_RESTORE gs;*/\ |
1708 | +.pushsection .fixup,"ax"; \ |
1709 | 4: movl $0,(%esp); \ |
1710 | + jmp 1b; \ |
1711 | +5: movl $0,(%esp); \ |
1712 | jmp 2b; \ |
1713 | -.previous; \ |
1714 | +6: movl $0,(%esp); \ |
1715 | + jmp 3b; \ |
1716 | .section __ex_table,"a";\ |
1717 | .align 4; \ |
1718 | - .long 1b,3b; \ |
1719 | - .long 2b,4b; \ |
1720 | -.previous |
1721 | + .long 1b,4b; \ |
1722 | + .long 2b,5b; \ |
1723 | + .long 3b,6b; \ |
1724 | +.popsection |
1725 | |
1726 | #define RING0_INT_FRAME \ |
1727 | CFI_STARTPROC simple;\ |
1728 | @@ -239,18 +203,18 @@ |
1729 | #define RING0_PTREGS_FRAME \ |
1730 | CFI_STARTPROC simple;\ |
1731 | CFI_SIGNAL_FRAME;\ |
1732 | - CFI_DEF_CFA esp, OLDESP-EBX;\ |
1733 | - /*CFI_OFFSET cs, CS-OLDESP;*/\ |
1734 | - CFI_OFFSET eip, EIP-OLDESP;\ |
1735 | - /*CFI_OFFSET es, ES-OLDESP;*/\ |
1736 | - /*CFI_OFFSET ds, DS-OLDESP;*/\ |
1737 | - CFI_OFFSET eax, EAX-OLDESP;\ |
1738 | - CFI_OFFSET ebp, EBP-OLDESP;\ |
1739 | - CFI_OFFSET edi, EDI-OLDESP;\ |
1740 | - CFI_OFFSET esi, ESI-OLDESP;\ |
1741 | - CFI_OFFSET edx, EDX-OLDESP;\ |
1742 | - CFI_OFFSET ecx, ECX-OLDESP;\ |
1743 | - CFI_OFFSET ebx, EBX-OLDESP |
1744 | + CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ |
1745 | + /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ |
1746 | + CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ |
1747 | + /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ |
1748 | + /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ |
1749 | + CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ |
1750 | + CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ |
1751 | + CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ |
1752 | + CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ |
1753 | + CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ |
1754 | + CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ |
1755 | + CFI_OFFSET ebx, PT_EBX-PT_OLDESP |
1756 | |
1757 | ENTRY(ret_from_fork) |
1758 | CFI_STARTPROC |
1759 | @@ -278,17 +242,18 @@ |
1760 | ALIGN |
1761 | RING0_PTREGS_FRAME |
1762 | ret_from_exception: |
1763 | - preempt_stop |
1764 | + preempt_stop(CLBR_ANY) |
1765 | ret_from_intr: |
1766 | GET_THREAD_INFO(%ebp) |
1767 | check_userspace: |
1768 | - movl EFLAGS(%esp), %eax # mix EFLAGS and CS |
1769 | - movb CS(%esp), %al |
1770 | + movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS |
1771 | + movb PT_CS(%esp), %al |
1772 | andl $(VM_MASK | SEGMENT_RPL_MASK), %eax |
1773 | cmpl $USER_RPL, %eax |
1774 | jb resume_kernel # not returning to v8086 or userspace |
1775 | + |
1776 | ENTRY(resume_userspace) |
1777 | - DISABLE_INTERRUPTS # make sure we don't miss an interrupt |
1778 | + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt |
1779 | # setting need_resched or sigpending |
1780 | # between sampling and the iret |
1781 | movl TI_flags(%ebp), %ecx |
1782 | @@ -299,14 +264,14 @@ |
1783 | |
1784 | #ifdef CONFIG_PREEMPT |
1785 | ENTRY(resume_kernel) |
1786 | - DISABLE_INTERRUPTS |
1787 | + DISABLE_INTERRUPTS(CLBR_ANY) |
1788 | cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? |
1789 | jnz restore_nocheck |
1790 | need_resched: |
1791 | movl TI_flags(%ebp), %ecx # need_resched set ? |
1792 | testb $_TIF_NEED_RESCHED, %cl |
1793 | jz restore_all |
1794 | - testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? |
1795 | + testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off (exception path) ? |
1796 | jz restore_all |
1797 | call preempt_schedule_irq |
1798 | jmp need_resched |
1799 | @@ -328,7 +293,7 @@ |
1800 | * No need to follow this irqs on/off section: the syscall |
1801 | * disabled irqs and here we enable it straight after entry: |
1802 | */ |
1803 | - ENABLE_INTERRUPTS |
1804 | + ENABLE_INTERRUPTS(CLBR_NONE) |
1805 | pushl $(__USER_DS) |
1806 | CFI_ADJUST_CFA_OFFSET 4 |
1807 | /*CFI_REL_OFFSET ss, 0*/ |
1808 | @@ -340,12 +305,16 @@ |
1809 | pushl $(__USER_CS) |
1810 | CFI_ADJUST_CFA_OFFSET 4 |
1811 | /*CFI_REL_OFFSET cs, 0*/ |
1812 | +#ifndef CONFIG_COMPAT_VDSO |
1813 | /* |
1814 | * Push current_thread_info()->sysenter_return to the stack. |
1815 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words |
1816 | * pushed above; +8 corresponds to copy_thread's esp0 setting. |
1817 | */ |
1818 | pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) |
1819 | +#else |
1820 | + pushl $SYSENTER_RETURN |
1821 | +#endif |
1822 | CFI_ADJUST_CFA_OFFSET 4 |
1823 | CFI_REL_OFFSET eip, 0 |
1824 | |
1825 | @@ -372,19 +341,27 @@ |
1826 | cmpl $(nr_syscalls), %eax |
1827 | jae syscall_badsys |
1828 | call *sys_call_table(,%eax,4) |
1829 | - movl %eax,EAX(%esp) |
1830 | - DISABLE_INTERRUPTS |
1831 | + movl %eax,PT_EAX(%esp) |
1832 | + DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX) |
1833 | TRACE_IRQS_OFF |
1834 | movl TI_flags(%ebp), %ecx |
1835 | testw $_TIF_ALLWORK_MASK, %cx |
1836 | jne syscall_exit_work |
1837 | /* if something modifies registers it must also disable sysexit */ |
1838 | - movl EIP(%esp), %edx |
1839 | - movl OLDESP(%esp), %ecx |
1840 | + movl PT_EIP(%esp), %edx |
1841 | + movl PT_OLDESP(%esp), %ecx |
1842 | xorl %ebp,%ebp |
1843 | TRACE_IRQS_ON |
1844 | +1: mov PT_GS(%esp), %gs |
1845 | ENABLE_INTERRUPTS_SYSEXIT |
1846 | CFI_ENDPROC |
1847 | +.pushsection .fixup,"ax" |
1848 | +2: movl $0,PT_GS(%esp) |
1849 | + jmp 1b |
1850 | +.section __ex_table,"a" |
1851 | + .align 4 |
1852 | + .long 1b,2b |
1853 | +.popsection |
1854 | |
1855 | # pv sysenter call handler stub |
1856 | ENTRY(sysenter_entry_pv) |
1857 | @@ -419,7 +396,7 @@ |
1858 | CFI_ADJUST_CFA_OFFSET 4 |
1859 | SAVE_ALL |
1860 | GET_THREAD_INFO(%ebp) |
1861 | - testl $TF_MASK,EFLAGS(%esp) |
1862 | + testl $TF_MASK,PT_EFLAGS(%esp) |
1863 | jz no_singlestep |
1864 | orl $_TIF_SINGLESTEP,TI_flags(%ebp) |
1865 | no_singlestep: |
1866 | @@ -431,9 +408,9 @@ |
1867 | jae syscall_badsys |
1868 | syscall_call: |
1869 | call *sys_call_table(,%eax,4) |
1870 | - movl %eax,EAX(%esp) # store the return value |
1871 | + movl %eax,PT_EAX(%esp) # store the return value |
1872 | syscall_exit: |
1873 | - DISABLE_INTERRUPTS # make sure we don't miss an interrupt |
1874 | + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt |
1875 | # setting need_resched or sigpending |
1876 | # between sampling and the iret |
1877 | TRACE_IRQS_OFF |
1878 | @@ -443,12 +420,12 @@ |
1879 | |
1880 | restore_all: |
1881 | #ifndef CONFIG_XEN |
1882 | - movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS |
1883 | - # Warning: OLDSS(%esp) contains the wrong/random values if we |
1884 | + movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS |
1885 | + # Warning: PT_OLDSS(%esp) contains the wrong/random values if we |
1886 | # are returning to the kernel. |
1887 | # See comments in process.c:copy_thread() for details. |
1888 | - movb OLDSS(%esp), %ah |
1889 | - movb CS(%esp), %al |
1890 | + movb PT_OLDSS(%esp), %ah |
1891 | + movb PT_CS(%esp), %al |
1892 | andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax |
1893 | cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax |
1894 | CFI_REMEMBER_STATE |
1895 | @@ -456,7 +433,7 @@ |
1896 | restore_nocheck: |
1897 | #else |
1898 | restore_nocheck: |
1899 | - movl EFLAGS(%esp), %eax |
1900 | + movl PT_EFLAGS(%esp), %eax |
1901 | testl $(VM_MASK|NMI_MASK), %eax |
1902 | CFI_REMEMBER_STATE |
1903 | jnz hypervisor_iret |
1904 | @@ -470,13 +447,13 @@ |
1905 | TRACE_IRQS_IRET |
1906 | restore_nocheck_notrace: |
1907 | RESTORE_REGS |
1908 | - addl $4, %esp |
1909 | + addl $4, %esp # skip orig_eax/error_code |
1910 | CFI_ADJUST_CFA_OFFSET -4 |
1911 | 1: INTERRUPT_RETURN |
1912 | .section .fixup,"ax" |
1913 | iret_exc: |
1914 | #ifndef CONFIG_XEN |
1915 | - ENABLE_INTERRUPTS |
1916 | + ENABLE_INTERRUPTS(CLBR_NONE) |
1917 | #endif |
1918 | pushl $0 # no error code |
1919 | pushl $do_iret_error |
1920 | @@ -490,33 +467,42 @@ |
1921 | CFI_RESTORE_STATE |
1922 | #ifndef CONFIG_XEN |
1923 | ldt_ss: |
1924 | - larl OLDSS(%esp), %eax |
1925 | + larl PT_OLDSS(%esp), %eax |
1926 | jnz restore_nocheck |
1927 | testl $0x00400000, %eax # returning to 32bit stack? |
1928 | jnz restore_nocheck # allright, normal return |
1929 | + |
1930 | +#ifdef CONFIG_PARAVIRT |
1931 | + /* |
1932 | + * The kernel can't run on a non-flat stack if paravirt mode |
1933 | + * is active. Rather than try to fixup the high bits of |
1934 | + * ESP, bypass this code entirely. This may break DOSemu |
1935 | + * and/or Wine support in a paravirt VM, although the option |
1936 | + * is still available to implement the setting of the high |
1937 | + * 16-bits in the INTERRUPT_RETURN paravirt-op. |
1938 | + */ |
1939 | + cmpl $0, paravirt_ops+PARAVIRT_enabled |
1940 | + jne restore_nocheck |
1941 | +#endif |
1942 | + |
1943 | /* If returning to userspace with 16bit stack, |
1944 | * try to fix the higher word of ESP, as the CPU |
1945 | * won't restore it. |
1946 | * This is an "official" bug of all the x86-compatible |
1947 | * CPUs, which we can try to work around to make |
1948 | * dosemu and wine happy. */ |
1949 | - subl $8, %esp # reserve space for switch16 pointer |
1950 | - CFI_ADJUST_CFA_OFFSET 8 |
1951 | - DISABLE_INTERRUPTS |
1952 | + movl PT_OLDESP(%esp), %eax |
1953 | + movl %esp, %edx |
1954 | + call patch_espfix_desc |
1955 | + pushl $__ESPFIX_SS |
1956 | + CFI_ADJUST_CFA_OFFSET 4 |
1957 | + pushl %eax |
1958 | + CFI_ADJUST_CFA_OFFSET 4 |
1959 | + DISABLE_INTERRUPTS(CLBR_EAX) |
1960 | TRACE_IRQS_OFF |
1961 | - movl %esp, %eax |
1962 | - /* Set up the 16bit stack frame with switch32 pointer on top, |
1963 | - * and a switch16 pointer on top of the current frame. */ |
1964 | - call setup_x86_bogus_stack |
1965 | - CFI_ADJUST_CFA_OFFSET -8 # frame has moved |
1966 | - TRACE_IRQS_IRET |
1967 | - RESTORE_REGS |
1968 | - lss 20+4(%esp), %esp # switch to 16bit stack |
1969 | -1: INTERRUPT_RETURN |
1970 | -.section __ex_table,"a" |
1971 | - .align 4 |
1972 | - .long 1b,iret_exc |
1973 | -.previous |
1974 | + lss (%esp), %esp |
1975 | + CFI_ADJUST_CFA_OFFSET -8 |
1976 | + jmp restore_nocheck |
1977 | #else |
1978 | ALIGN |
1979 | restore_all_enable_events: |
1980 | @@ -540,7 +526,7 @@ |
1981 | |
1982 | CFI_RESTORE_STATE |
1983 | hypervisor_iret: |
1984 | - andl $~NMI_MASK, EFLAGS(%esp) |
1985 | + andl $~NMI_MASK, PT_EFLAGS(%esp) |
1986 | RESTORE_REGS |
1987 | addl $4, %esp |
1988 | CFI_ADJUST_CFA_OFFSET -4 |
1989 | @@ -556,7 +542,7 @@ |
1990 | jz work_notifysig |
1991 | work_resched: |
1992 | call schedule |
1993 | - DISABLE_INTERRUPTS # make sure we don't miss an interrupt |
1994 | + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt |
1995 | # setting need_resched or sigpending |
1996 | # between sampling and the iret |
1997 | TRACE_IRQS_OFF |
1998 | @@ -569,7 +555,8 @@ |
1999 | |
2000 | work_notifysig: # deal with pending signals and |
2001 | # notify-resume requests |
2002 | - testl $VM_MASK, EFLAGS(%esp) |
2003 | +#ifdef CONFIG_VM86 |
2004 | + testl $VM_MASK, PT_EFLAGS(%esp) |
2005 | movl %esp, %eax |
2006 | jne work_notifysig_v86 # returning to kernel-space or |
2007 | # vm86-space |
2008 | @@ -579,29 +566,30 @@ |
2009 | |
2010 | ALIGN |
2011 | work_notifysig_v86: |
2012 | -#ifdef CONFIG_VM86 |
2013 | pushl %ecx # save ti_flags for do_notify_resume |
2014 | CFI_ADJUST_CFA_OFFSET 4 |
2015 | call save_v86_state # %eax contains pt_regs pointer |
2016 | popl %ecx |
2017 | CFI_ADJUST_CFA_OFFSET -4 |
2018 | movl %eax, %esp |
2019 | +#else |
2020 | + movl %esp, %eax |
2021 | +#endif |
2022 | xorl %edx, %edx |
2023 | call do_notify_resume |
2024 | jmp resume_userspace_sig |
2025 | -#endif |
2026 | |
2027 | # perform syscall exit tracing |
2028 | ALIGN |
2029 | syscall_trace_entry: |
2030 | - movl $-ENOSYS,EAX(%esp) |
2031 | + movl $-ENOSYS,PT_EAX(%esp) |
2032 | movl %esp, %eax |
2033 | xorl %edx,%edx |
2034 | call do_syscall_trace |
2035 | cmpl $0, %eax |
2036 | jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, |
2037 | # so must skip actual syscall |
2038 | - movl ORIG_EAX(%esp), %eax |
2039 | + movl PT_ORIG_EAX(%esp), %eax |
2040 | cmpl $(nr_syscalls), %eax |
2041 | jnae syscall_call |
2042 | jmp syscall_exit |
2043 | @@ -612,7 +600,7 @@ |
2044 | testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl |
2045 | jz work_pending |
2046 | TRACE_IRQS_ON |
2047 | - ENABLE_INTERRUPTS # could let do_syscall_trace() call |
2048 | + ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call |
2049 | # schedule() instead |
2050 | movl %esp, %eax |
2051 | movl $1, %edx |
2052 | @@ -626,40 +614,39 @@ |
2053 | CFI_ADJUST_CFA_OFFSET 4 |
2054 | SAVE_ALL |
2055 | GET_THREAD_INFO(%ebp) |
2056 | - movl $-EFAULT,EAX(%esp) |
2057 | + movl $-EFAULT,PT_EAX(%esp) |
2058 | jmp resume_userspace |
2059 | |
2060 | syscall_badsys: |
2061 | - movl $-ENOSYS,EAX(%esp) |
2062 | + movl $-ENOSYS,PT_EAX(%esp) |
2063 | jmp resume_userspace |
2064 | CFI_ENDPROC |
2065 | |
2066 | #ifndef CONFIG_XEN |
2067 | #define FIXUP_ESPFIX_STACK \ |
2068 | - movl %esp, %eax; \ |
2069 | - /* switch to 32bit stack using the pointer on top of 16bit stack */ \ |
2070 | - lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \ |
2071 | - /* copy data from 16bit stack to 32bit stack */ \ |
2072 | - call fixup_x86_bogus_stack; \ |
2073 | - /* put ESP to the proper location */ \ |
2074 | - movl %eax, %esp; |
2075 | -#define UNWIND_ESPFIX_STACK \ |
2076 | + /* since we are on a wrong stack, we cant make it a C code :( */ \ |
2077 | + movl %gs:PDA_cpu, %ebx; \ |
2078 | + PER_CPU(cpu_gdt_descr, %ebx); \ |
2079 | + movl GDS_address(%ebx), %ebx; \ |
2080 | + GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ |
2081 | + addl %esp, %eax; \ |
2082 | + pushl $__KERNEL_DS; \ |
2083 | + CFI_ADJUST_CFA_OFFSET 4; \ |
2084 | pushl %eax; \ |
2085 | CFI_ADJUST_CFA_OFFSET 4; \ |
2086 | + lss (%esp), %esp; \ |
2087 | + CFI_ADJUST_CFA_OFFSET -8; |
2088 | +#define UNWIND_ESPFIX_STACK \ |
2089 | movl %ss, %eax; \ |
2090 | - /* see if on 16bit stack */ \ |
2091 | + /* see if on espfix stack */ \ |
2092 | cmpw $__ESPFIX_SS, %ax; \ |
2093 | - je 28f; \ |
2094 | -27: popl %eax; \ |
2095 | - CFI_ADJUST_CFA_OFFSET -4; \ |
2096 | -.section .fixup,"ax"; \ |
2097 | -28: movl $__KERNEL_DS, %eax; \ |
2098 | + jne 27f; \ |
2099 | + movl $__KERNEL_DS, %eax; \ |
2100 | movl %eax, %ds; \ |
2101 | movl %eax, %es; \ |
2102 | - /* switch to 32bit stack */ \ |
2103 | + /* switch to normal stack */ \ |
2104 | FIXUP_ESPFIX_STACK; \ |
2105 | - jmp 27b; \ |
2106 | -.previous |
2107 | +27:; |
2108 | |
2109 | /* |
2110 | * Build the entry stubs and pointer table with |
2111 | @@ -723,13 +710,16 @@ |
2112 | CFI_ADJUST_CFA_OFFSET 4 |
2113 | ALIGN |
2114 | error_code: |
2115 | + /* the function address is in %gs's slot on the stack */ |
2116 | + pushl %es |
2117 | + CFI_ADJUST_CFA_OFFSET 4 |
2118 | + /*CFI_REL_OFFSET es, 0*/ |
2119 | pushl %ds |
2120 | CFI_ADJUST_CFA_OFFSET 4 |
2121 | /*CFI_REL_OFFSET ds, 0*/ |
2122 | pushl %eax |
2123 | CFI_ADJUST_CFA_OFFSET 4 |
2124 | CFI_REL_OFFSET eax, 0 |
2125 | - xorl %eax, %eax |
2126 | pushl %ebp |
2127 | CFI_ADJUST_CFA_OFFSET 4 |
2128 | CFI_REL_OFFSET ebp, 0 |
2129 | @@ -742,7 +732,6 @@ |
2130 | pushl %edx |
2131 | CFI_ADJUST_CFA_OFFSET 4 |
2132 | CFI_REL_OFFSET edx, 0 |
2133 | - decl %eax # eax = -1 |
2134 | pushl %ecx |
2135 | CFI_ADJUST_CFA_OFFSET 4 |
2136 | CFI_REL_OFFSET ecx, 0 |
2137 | @@ -750,18 +739,20 @@ |
2138 | CFI_ADJUST_CFA_OFFSET 4 |
2139 | CFI_REL_OFFSET ebx, 0 |
2140 | cld |
2141 | - pushl %es |
2142 | + pushl %gs |
2143 | CFI_ADJUST_CFA_OFFSET 4 |
2144 | - /*CFI_REL_OFFSET es, 0*/ |
2145 | + /*CFI_REL_OFFSET gs, 0*/ |
2146 | + movl $(__KERNEL_PDA), %ecx |
2147 | + movl %ecx, %gs |
2148 | UNWIND_ESPFIX_STACK |
2149 | popl %ecx |
2150 | CFI_ADJUST_CFA_OFFSET -4 |
2151 | /*CFI_REGISTER es, ecx*/ |
2152 | - movl ES(%esp), %edi # get the function address |
2153 | - movl ORIG_EAX(%esp), %edx # get the error code |
2154 | - movl %eax, ORIG_EAX(%esp) |
2155 | - movl %ecx, ES(%esp) |
2156 | - /*CFI_REL_OFFSET es, ES*/ |
2157 | + movl PT_GS(%esp), %edi # get the function address |
2158 | + movl PT_ORIG_EAX(%esp), %edx # get the error code |
2159 | + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart |
2160 | + mov %ecx, PT_GS(%esp) |
2161 | + /*CFI_REL_OFFSET gs, ES*/ |
2162 | movl $(__USER_DS), %ecx |
2163 | movl %ecx, %ds |
2164 | movl %ecx, %es |
2165 | @@ -793,7 +784,7 @@ |
2166 | pushl %eax |
2167 | CFI_ADJUST_CFA_OFFSET 4 |
2168 | SAVE_ALL |
2169 | - movl EIP(%esp),%eax |
2170 | + movl PT_EIP(%esp),%eax |
2171 | cmpl $scrit,%eax |
2172 | jb 11f |
2173 | cmpl $ecrit,%eax |
2174 | @@ -802,7 +793,7 @@ |
2175 | jb 11f |
2176 | cmpl $sysexit_ecrit,%eax |
2177 | ja 11f |
2178 | - addl $OLDESP,%esp # Remove eflags...ebx from stack frame. |
2179 | + addl $PT_OLDESP,%esp # Remove eflags...ebx from stack frame. |
2180 | 11: push %esp |
2181 | CFI_ADJUST_CFA_OFFSET 4 |
2182 | call evtchn_do_upcall |
2183 | @@ -824,7 +815,7 @@ |
2184 | jne 15f |
2185 | xorl %ecx,%ecx |
2186 | 15: leal (%esp,%ecx),%esi # %esi points at end of src region |
2187 | - leal OLDESP(%esp),%edi # %edi points at end of dst region |
2188 | + leal PT_OLDESP(%esp),%edi # %edi points at end of dst region |
2189 | shrl $2,%ecx # convert words to bytes |
2190 | je 17f # skip loop if nothing to copy |
2191 | 16: subl $4,%esi # pre-decrementing copy loop |
2192 | @@ -848,8 +839,9 @@ |
2193 | .byte 0x18 # pop %eax |
2194 | .byte 0x1c # pop %ds |
2195 | .byte 0x20 # pop %es |
2196 | - .byte 0x24,0x24,0x24 # add $4,%esp |
2197 | - .byte 0x28 # iret |
2198 | + .byte 0x24,0x24 # pop %gs |
2199 | + .byte 0x28,0x28,0x28 # add $4,%esp |
2200 | + .byte 0x2c # iret |
2201 | .byte 0xff,0xff,0xff,0xff # movb $1,1(%esi) |
2202 | .byte 0x00,0x00 # jmp 11b |
2203 | .previous |
2204 | @@ -940,7 +932,7 @@ |
2205 | jmp ret_from_exception |
2206 | device_available_emulate: |
2207 | #endif |
2208 | - preempt_stop |
2209 | + preempt_stop(CLBR_ANY) |
2210 | call math_state_restore |
2211 | jmp ret_from_exception |
2212 | CFI_ENDPROC |
2213 | @@ -1010,7 +1002,7 @@ |
2214 | cmpw $__ESPFIX_SS, %ax |
2215 | popl %eax |
2216 | CFI_ADJUST_CFA_OFFSET -4 |
2217 | - je nmi_16bit_stack |
2218 | + je nmi_espfix_stack |
2219 | cmpl $sysenter_entry,(%esp) |
2220 | je nmi_stack_fixup |
2221 | pushl %eax |
2222 | @@ -1053,7 +1045,7 @@ |
2223 | FIX_STACK(24,nmi_stack_correct, 1) |
2224 | jmp nmi_stack_correct |
2225 | |
2226 | -nmi_16bit_stack: |
2227 | +nmi_espfix_stack: |
2228 | /* We have a RING0_INT_FRAME here. |
2229 | * |
2230 | * create the pointer to lss back |
2231 | @@ -1062,7 +1054,6 @@ |
2232 | CFI_ADJUST_CFA_OFFSET 4 |
2233 | pushl %esp |
2234 | CFI_ADJUST_CFA_OFFSET 4 |
2235 | - movzwl %sp, %esp |
2236 | addw $4, (%esp) |
2237 | /* copy the iret frame of 12 bytes */ |
2238 | .rept 3 |
2239 | @@ -1073,11 +1064,11 @@ |
2240 | CFI_ADJUST_CFA_OFFSET 4 |
2241 | SAVE_ALL |
2242 | FIXUP_ESPFIX_STACK # %eax == %esp |
2243 | - CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved |
2244 | xorl %edx,%edx # zero error code |
2245 | call do_nmi |
2246 | RESTORE_REGS |
2247 | - lss 12+4(%esp), %esp # back to 16bit stack |
2248 | + lss 12+4(%esp), %esp # back to espfix stack |
2249 | + CFI_ADJUST_CFA_OFFSET -24 |
2250 | 1: INTERRUPT_RETURN |
2251 | CFI_ENDPROC |
2252 | .section __ex_table,"a" |
2253 | @@ -1093,12 +1084,25 @@ |
2254 | xorl %edx,%edx # zero error code |
2255 | movl %esp,%eax # pt_regs pointer |
2256 | call do_nmi |
2257 | - orl $NMI_MASK, EFLAGS(%esp) |
2258 | + orl $NMI_MASK, PT_EFLAGS(%esp) |
2259 | jmp restore_all |
2260 | CFI_ENDPROC |
2261 | #endif |
2262 | KPROBE_END(nmi) |
2263 | |
2264 | +#ifdef CONFIG_PARAVIRT |
2265 | +ENTRY(native_iret) |
2266 | +1: iret |
2267 | +.section __ex_table,"a" |
2268 | + .align 4 |
2269 | + .long 1b,iret_exc |
2270 | +.previous |
2271 | + |
2272 | +ENTRY(native_irq_enable_sysexit) |
2273 | + sti |
2274 | + sysexit |
2275 | +#endif |
2276 | + |
2277 | KPROBE_ENTRY(int3) |
2278 | RING0_INT_FRAME |
2279 | pushl $-1 # mark this as an int |
2280 | @@ -1214,37 +1218,6 @@ |
2281 | CFI_ENDPROC |
2282 | #endif /* !CONFIG_XEN */ |
2283 | |
2284 | -#ifdef CONFIG_STACK_UNWIND |
2285 | -ENTRY(arch_unwind_init_running) |
2286 | - CFI_STARTPROC |
2287 | - movl 4(%esp), %edx |
2288 | - movl (%esp), %ecx |
2289 | - leal 4(%esp), %eax |
2290 | - movl %ebx, EBX(%edx) |
2291 | - xorl %ebx, %ebx |
2292 | - movl %ebx, ECX(%edx) |
2293 | - movl %ebx, EDX(%edx) |
2294 | - movl %esi, ESI(%edx) |
2295 | - movl %edi, EDI(%edx) |
2296 | - movl %ebp, EBP(%edx) |
2297 | - movl %ebx, EAX(%edx) |
2298 | - movl $__USER_DS, DS(%edx) |
2299 | - movl $__USER_DS, ES(%edx) |
2300 | - movl %ebx, ORIG_EAX(%edx) |
2301 | - movl %ecx, EIP(%edx) |
2302 | - movl 12(%esp), %ecx |
2303 | - movl $__KERNEL_CS, CS(%edx) |
2304 | - movl %ebx, EFLAGS(%edx) |
2305 | - movl %eax, OLDESP(%edx) |
2306 | - movl 8(%esp), %eax |
2307 | - movl %ecx, 8(%esp) |
2308 | - movl EBX(%edx), %ebx |
2309 | - movl $__KERNEL_DS, OLDSS(%edx) |
2310 | - jmpl *%eax |
2311 | - CFI_ENDPROC |
2312 | -ENDPROC(arch_unwind_init_running) |
2313 | -#endif |
2314 | - |
2315 | ENTRY(fixup_4gb_segment) |
2316 | RING0_EC_FRAME |
2317 | pushl $do_fixup_4gb_segment |
2318 | --- a/arch/x86/kernel/entry_64-xen.S |
2319 | +++ b/arch/x86/kernel/entry_64-xen.S |
2320 | @@ -261,7 +261,6 @@ |
2321 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
2322 | GET_THREAD_INFO(%rcx) |
2323 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) |
2324 | - CFI_REMEMBER_STATE |
2325 | jnz tracesys |
2326 | cmpq $__NR_syscall_max,%rax |
2327 | ja badsys |
2328 | @@ -272,7 +271,6 @@ |
2329 | * Syscall return path ending with SYSRET (fast path) |
2330 | * Has incomplete stack frame and undefined top of stack. |
2331 | */ |
2332 | - .globl ret_from_sys_call |
2333 | ret_from_sys_call: |
2334 | movl $_TIF_ALLWORK_MASK,%edi |
2335 | /* edi: flagmask */ |
2336 | @@ -282,8 +280,8 @@ |
2337 | TRACE_IRQS_OFF |
2338 | movl threadinfo_flags(%rcx),%edx |
2339 | andl %edi,%edx |
2340 | - CFI_REMEMBER_STATE |
2341 | jnz sysret_careful |
2342 | + CFI_REMEMBER_STATE |
2343 | /* |
2344 | * sysretq will re-enable interrupts: |
2345 | */ |
2346 | @@ -292,10 +290,10 @@ |
2347 | RESTORE_ARGS 0,8,0 |
2348 | HYPERVISOR_IRET VGCF_IN_SYSCALL |
2349 | |
2350 | + CFI_RESTORE_STATE |
2351 | /* Handle reschedules */ |
2352 | /* edx: work, edi: workmask */ |
2353 | sysret_careful: |
2354 | - CFI_RESTORE_STATE |
2355 | bt $TIF_NEED_RESCHED,%edx |
2356 | jnc sysret_signal |
2357 | TRACE_IRQS_ON |
2358 | @@ -334,7 +332,6 @@ |
2359 | |
2360 | /* Do syscall tracing */ |
2361 | tracesys: |
2362 | - CFI_RESTORE_STATE |
2363 | SAVE_REST |
2364 | movq $-ENOSYS,RAX(%rsp) |
2365 | FIXUP_TOP_OF_STACK %rdi |
2366 | @@ -350,32 +347,13 @@ |
2367 | call *sys_call_table(,%rax,8) |
2368 | 1: movq %rax,RAX-ARGOFFSET(%rsp) |
2369 | /* Use IRET because user could have changed frame */ |
2370 | - jmp int_ret_from_sys_call |
2371 | - CFI_ENDPROC |
2372 | -END(system_call) |
2373 | |
2374 | /* |
2375 | * Syscall return path ending with IRET. |
2376 | * Has correct top of stack, but partial stack frame. |
2377 | - */ |
2378 | -ENTRY(int_ret_from_sys_call) |
2379 | - CFI_STARTPROC simple |
2380 | - CFI_SIGNAL_FRAME |
2381 | - CFI_DEF_CFA rsp,SS+8-ARGOFFSET |
2382 | - /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/ |
2383 | - CFI_REL_OFFSET rsp,RSP-ARGOFFSET |
2384 | - /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ |
2385 | - /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/ |
2386 | - CFI_REL_OFFSET rip,RIP-ARGOFFSET |
2387 | - CFI_REL_OFFSET rdx,RDX-ARGOFFSET |
2388 | - CFI_REL_OFFSET rcx,RCX-ARGOFFSET |
2389 | - CFI_REL_OFFSET rax,RAX-ARGOFFSET |
2390 | - CFI_REL_OFFSET rdi,RDI-ARGOFFSET |
2391 | - CFI_REL_OFFSET rsi,RSI-ARGOFFSET |
2392 | - CFI_REL_OFFSET r8,R8-ARGOFFSET |
2393 | - CFI_REL_OFFSET r9,R9-ARGOFFSET |
2394 | - CFI_REL_OFFSET r10,R10-ARGOFFSET |
2395 | - CFI_REL_OFFSET r11,R11-ARGOFFSET |
2396 | + */ |
2397 | + .globl int_ret_from_sys_call |
2398 | +int_ret_from_sys_call: |
2399 | XEN_BLOCK_EVENTS(%rsi) |
2400 | TRACE_IRQS_OFF |
2401 | testb $3,CS-ARGOFFSET(%rsp) |
2402 | @@ -428,8 +406,6 @@ |
2403 | popq %rdi |
2404 | CFI_ADJUST_CFA_OFFSET -8 |
2405 | andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi |
2406 | - XEN_BLOCK_EVENTS(%rsi) |
2407 | - TRACE_IRQS_OFF |
2408 | jmp int_restore_rest |
2409 | |
2410 | int_signal: |
2411 | @@ -445,7 +421,7 @@ |
2412 | TRACE_IRQS_OFF |
2413 | jmp int_with_check |
2414 | CFI_ENDPROC |
2415 | -END(int_ret_from_sys_call) |
2416 | +END(system_call) |
2417 | |
2418 | /* |
2419 | * Certain special system calls that need to save a complete full stack frame. |
2420 | @@ -1275,36 +1251,3 @@ |
2421 | ret |
2422 | CFI_ENDPROC |
2423 | ENDPROC(call_softirq) |
2424 | - |
2425 | -#ifdef CONFIG_STACK_UNWIND |
2426 | -ENTRY(arch_unwind_init_running) |
2427 | - CFI_STARTPROC |
2428 | - movq %r15, R15(%rdi) |
2429 | - movq %r14, R14(%rdi) |
2430 | - xchgq %rsi, %rdx |
2431 | - movq %r13, R13(%rdi) |
2432 | - movq %r12, R12(%rdi) |
2433 | - xorl %eax, %eax |
2434 | - movq %rbp, RBP(%rdi) |
2435 | - movq %rbx, RBX(%rdi) |
2436 | - movq (%rsp), %rcx |
2437 | - movq %rax, R11(%rdi) |
2438 | - movq %rax, R10(%rdi) |
2439 | - movq %rax, R9(%rdi) |
2440 | - movq %rax, R8(%rdi) |
2441 | - movq %rax, RAX(%rdi) |
2442 | - movq %rax, RCX(%rdi) |
2443 | - movq %rax, RDX(%rdi) |
2444 | - movq %rax, RSI(%rdi) |
2445 | - movq %rax, RDI(%rdi) |
2446 | - movq %rax, ORIG_RAX(%rdi) |
2447 | - movq %rcx, RIP(%rdi) |
2448 | - leaq 8(%rsp), %rcx |
2449 | - movq $__KERNEL_CS, CS(%rdi) |
2450 | - movq %rax, EFLAGS(%rdi) |
2451 | - movq %rcx, RSP(%rdi) |
2452 | - movq $__KERNEL_DS, SS(%rdi) |
2453 | - jmpq *%rdx |
2454 | - CFI_ENDPROC |
2455 | -ENDPROC(arch_unwind_init_running) |
2456 | -#endif |
2457 | --- a/arch/x86/kernel/genapic_64-xen.c |
2458 | +++ b/arch/x86/kernel/genapic_64-xen.c |
2459 | @@ -34,6 +34,7 @@ |
2460 | |
2461 | #ifndef CONFIG_XEN |
2462 | struct genapic *genapic = &apic_flat; |
2463 | +struct genapic *genapic_force; |
2464 | #else |
2465 | extern struct genapic apic_xen; |
2466 | struct genapic *genapic = &apic_xen; |
2467 | @@ -52,6 +53,13 @@ |
2468 | u8 cluster_cnt[NUM_APIC_CLUSTERS]; |
2469 | int max_apic = 0; |
2470 | |
2471 | + /* genapic selection can be forced because of certain quirks. |
2472 | + */ |
2473 | + if (genapic_force) { |
2474 | + genapic = genapic_force; |
2475 | + goto print; |
2476 | + } |
2477 | + |
2478 | #if defined(CONFIG_ACPI) |
2479 | /* |
2480 | * Some x86_64 machines use physical APIC mode regardless of how many |
2481 | --- a/arch/x86/kernel/head64-xen.c |
2482 | +++ b/arch/x86/kernel/head64-xen.c |
2483 | @@ -101,7 +101,10 @@ |
2484 | machine_to_phys_order++; |
2485 | |
2486 | #if 0 |
2487 | - for (i = 0; i < 256; i++) |
2488 | + /* clear bss before set_intr_gate with early_idt_handler */ |
2489 | + clear_bss(); |
2490 | + |
2491 | + for (i = 0; i < IDT_ENTRIES; i++) |
2492 | set_intr_gate(i, early_idt_handler); |
2493 | asm volatile("lidt %0" :: "m" (idt_descr)); |
2494 | #endif |
2495 | --- a/arch/x86/kernel/head_32-xen.S |
2496 | +++ b/arch/x86/kernel/head_32-xen.S |
2497 | @@ -9,6 +9,7 @@ |
2498 | #include <asm/cache.h> |
2499 | #include <asm/thread_info.h> |
2500 | #include <asm/asm-offsets.h> |
2501 | +#include <asm/boot.h> |
2502 | #include <asm/dwarf2.h> |
2503 | #include <xen/interface/xen.h> |
2504 | #include <xen/interface/elfnote.h> |
2505 | @@ -35,6 +36,8 @@ |
2506 | /* Set up the stack pointer */ |
2507 | movl $(init_thread_union+THREAD_SIZE),%esp |
2508 | |
2509 | + call setup_pda |
2510 | + |
2511 | /* get vendor info */ |
2512 | xorl %eax,%eax # call CPUID with 0 -> return vendor ID |
2513 | XEN_CPUID |
2514 | @@ -57,14 +60,58 @@ |
2515 | |
2516 | movb $1,X86_HARD_MATH |
2517 | |
2518 | - xorl %eax,%eax # Clear FS/GS and LDT |
2519 | + xorl %eax,%eax # Clear FS |
2520 | movl %eax,%fs |
2521 | - movl %eax,%gs |
2522 | + |
2523 | + movl $(__KERNEL_PDA),%eax |
2524 | + mov %eax,%gs |
2525 | + |
2526 | cld # gcc2 wants the direction flag cleared at all times |
2527 | |
2528 | pushl $0 # fake return address for unwinder |
2529 | jmp start_kernel |
2530 | |
2531 | +/* |
2532 | + * Point the GDT at this CPU's PDA. This will be |
2533 | + * cpu_gdt_table and boot_pda. |
2534 | + */ |
2535 | +setup_pda: |
2536 | + /* get the PDA pointer */ |
2537 | + movl $boot_pda, %eax |
2538 | + |
2539 | + /* slot the PDA address into the GDT */ |
2540 | + mov $cpu_gdt_table, %ecx |
2541 | + mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ |
2542 | + shr $16, %eax |
2543 | + mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ |
2544 | + mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */ |
2545 | + |
2546 | + # %esi still points to start_info, and no registers |
2547 | + # need to be preserved. |
2548 | + |
2549 | + movl XEN_START_mfn_list(%esi), %ebx |
2550 | + movl $(cpu_gdt_table - __PAGE_OFFSET), %eax |
2551 | + shrl $PAGE_SHIFT, %eax |
2552 | + movl (%ebx,%eax,4), %ecx |
2553 | + pushl %ecx # frame number for set_gdt below |
2554 | + |
2555 | + xorl %esi, %esi |
2556 | + xorl %edx, %edx |
2557 | + shldl $PAGE_SHIFT, %ecx, %edx |
2558 | + shll $PAGE_SHIFT, %ecx |
2559 | + orl $0x61, %ecx |
2560 | + movl $cpu_gdt_table, %ebx |
2561 | + movl $__HYPERVISOR_update_va_mapping, %eax |
2562 | + int $0x82 |
2563 | + |
2564 | + movl $(PAGE_SIZE_asm / 8), %ecx |
2565 | + movl %esp, %ebx |
2566 | + movl $__HYPERVISOR_set_gdt, %eax |
2567 | + int $0x82 |
2568 | + |
2569 | + popl %ecx |
2570 | + ret |
2571 | + |
2572 | #define HYPERCALL_PAGE_OFFSET 0x1000 |
2573 | .org HYPERCALL_PAGE_OFFSET |
2574 | ENTRY(hypercall_page) |
2575 | @@ -93,7 +140,8 @@ |
2576 | /* |
2577 | * The Global Descriptor Table contains 28 quadwords, per-CPU. |
2578 | */ |
2579 | - .align L1_CACHE_BYTES |
2580 | + .section .data.page_aligned, "aw" |
2581 | + .align PAGE_SIZE_asm |
2582 | ENTRY(cpu_gdt_table) |
2583 | .quad 0x0000000000000000 /* NULL descriptor */ |
2584 | .quad 0x0000000000000000 /* 0x0b reserved */ |
2585 | @@ -135,12 +183,13 @@ |
2586 | .quad 0x0000000000000000 /* 0xc0 APM CS 16 code (16 bit) */ |
2587 | .quad 0x0000000000000000 /* 0xc8 APM DS data */ |
2588 | |
2589 | - .quad 0x0000000000000000 /* 0xd0 - ESPFIX 16-bit SS */ |
2590 | - .quad 0x0000000000000000 /* 0xd8 - unused */ |
2591 | + .quad 0x0000000000000000 /* 0xd0 - ESPFIX SS */ |
2592 | + .quad 0x00cf92000000ffff /* 0xd8 - PDA */ |
2593 | .quad 0x0000000000000000 /* 0xe0 - unused */ |
2594 | .quad 0x0000000000000000 /* 0xe8 - unused */ |
2595 | .quad 0x0000000000000000 /* 0xf0 - unused */ |
2596 | .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ |
2597 | + .align PAGE_SIZE_asm |
2598 | |
2599 | #if CONFIG_XEN_COMPAT <= 0x030002 |
2600 | /* |
2601 | @@ -165,9 +214,9 @@ |
2602 | .ascii ",ELF_PADDR_OFFSET=0x" |
2603 | utoa __PAGE_OFFSET |
2604 | .ascii ",VIRT_ENTRY=0x" |
2605 | - utoa (__PAGE_OFFSET + __PHYSICAL_START + VIRT_ENTRY_OFFSET) |
2606 | + utoa (__PAGE_OFFSET + LOAD_PHYSICAL_ADDR + VIRT_ENTRY_OFFSET) |
2607 | .ascii ",HYPERCALL_PAGE=0x" |
2608 | - utoa ((__PHYSICAL_START+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT) |
2609 | + utoa ((LOAD_PHYSICAL_ADDR+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT) |
2610 | .ascii ",FEATURES=writable_page_tables" |
2611 | .ascii "|writable_descriptor_tables" |
2612 | .ascii "|auto_translated_physmap" |
2613 | --- a/arch/x86/kernel/io_apic_32-xen.c |
2614 | +++ b/arch/x86/kernel/io_apic_32-xen.c |
2615 | @@ -34,6 +34,7 @@ |
2616 | #include <linux/pci.h> |
2617 | #include <linux/msi.h> |
2618 | #include <linux/htirq.h> |
2619 | +#include <linux/freezer.h> |
2620 | |
2621 | #include <asm/io.h> |
2622 | #include <asm/smp.h> |
2623 | @@ -194,14 +195,20 @@ |
2624 | * the interrupt, and we need to make sure the entry is fully populated |
2625 | * before that happens. |
2626 | */ |
2627 | -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) |
2628 | +static void |
2629 | +__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) |
2630 | { |
2631 | - unsigned long flags; |
2632 | union entry_union eu; |
2633 | eu.entry = e; |
2634 | - spin_lock_irqsave(&ioapic_lock, flags); |
2635 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); |
2636 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); |
2637 | +} |
2638 | + |
2639 | +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) |
2640 | +{ |
2641 | + unsigned long flags; |
2642 | + spin_lock_irqsave(&ioapic_lock, flags); |
2643 | + __ioapic_write_entry(apic, pin, e); |
2644 | spin_unlock_irqrestore(&ioapic_lock, flags); |
2645 | } |
2646 | |
2647 | @@ -883,8 +890,7 @@ |
2648 | |
2649 | if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || |
2650 | mp_bus_id_to_type[lbus] == MP_BUS_EISA || |
2651 | - mp_bus_id_to_type[lbus] == MP_BUS_MCA || |
2652 | - mp_bus_id_to_type[lbus] == MP_BUS_NEC98 |
2653 | + mp_bus_id_to_type[lbus] == MP_BUS_MCA |
2654 | ) && |
2655 | (mp_irqs[i].mpc_irqtype == type) && |
2656 | (mp_irqs[i].mpc_srcbusirq == irq)) |
2657 | @@ -903,8 +909,7 @@ |
2658 | |
2659 | if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || |
2660 | mp_bus_id_to_type[lbus] == MP_BUS_EISA || |
2661 | - mp_bus_id_to_type[lbus] == MP_BUS_MCA || |
2662 | - mp_bus_id_to_type[lbus] == MP_BUS_NEC98 |
2663 | + mp_bus_id_to_type[lbus] == MP_BUS_MCA |
2664 | ) && |
2665 | (mp_irqs[i].mpc_irqtype == type) && |
2666 | (mp_irqs[i].mpc_srcbusirq == irq)) |
2667 | @@ -1036,12 +1041,6 @@ |
2668 | #define default_MCA_trigger(idx) (1) |
2669 | #define default_MCA_polarity(idx) (0) |
2670 | |
2671 | -/* NEC98 interrupts are always polarity zero edge triggered, |
2672 | - * when listed as conforming in the MP table. */ |
2673 | - |
2674 | -#define default_NEC98_trigger(idx) (0) |
2675 | -#define default_NEC98_polarity(idx) (0) |
2676 | - |
2677 | static int __init MPBIOS_polarity(int idx) |
2678 | { |
2679 | int bus = mp_irqs[idx].mpc_srcbus; |
2680 | @@ -1076,11 +1075,6 @@ |
2681 | polarity = default_MCA_polarity(idx); |
2682 | break; |
2683 | } |
2684 | - case MP_BUS_NEC98: /* NEC 98 pin */ |
2685 | - { |
2686 | - polarity = default_NEC98_polarity(idx); |
2687 | - break; |
2688 | - } |
2689 | default: |
2690 | { |
2691 | printk(KERN_WARNING "broken BIOS!!\n"); |
2692 | @@ -1150,11 +1144,6 @@ |
2693 | trigger = default_MCA_trigger(idx); |
2694 | break; |
2695 | } |
2696 | - case MP_BUS_NEC98: /* NEC 98 pin */ |
2697 | - { |
2698 | - trigger = default_NEC98_trigger(idx); |
2699 | - break; |
2700 | - } |
2701 | default: |
2702 | { |
2703 | printk(KERN_WARNING "broken BIOS!!\n"); |
2704 | @@ -1216,7 +1205,6 @@ |
2705 | case MP_BUS_ISA: /* ISA pin */ |
2706 | case MP_BUS_EISA: |
2707 | case MP_BUS_MCA: |
2708 | - case MP_BUS_NEC98: |
2709 | { |
2710 | irq = mp_irqs[idx].mpc_srcbusirq; |
2711 | break; |
2712 | @@ -1284,7 +1272,7 @@ |
2713 | } |
2714 | |
2715 | /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ |
2716 | -u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */ |
2717 | +static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */ |
2718 | |
2719 | static int __assign_irq_vector(int irq) |
2720 | { |
2721 | @@ -1407,8 +1395,8 @@ |
2722 | if (!apic && (irq < 16)) |
2723 | disable_8259A_irq(irq); |
2724 | } |
2725 | - ioapic_write_entry(apic, pin, entry); |
2726 | spin_lock_irqsave(&ioapic_lock, flags); |
2727 | + __ioapic_write_entry(apic, pin, entry); |
2728 | set_native_irq_info(irq, TARGET_CPUS); |
2729 | spin_unlock_irqrestore(&ioapic_lock, flags); |
2730 | } |
2731 | @@ -1974,6 +1962,15 @@ |
2732 | #endif |
2733 | |
2734 | #ifndef CONFIG_XEN |
2735 | +static int no_timer_check __initdata; |
2736 | + |
2737 | +static int __init notimercheck(char *s) |
2738 | +{ |
2739 | + no_timer_check = 1; |
2740 | + return 1; |
2741 | +} |
2742 | +__setup("no_timer_check", notimercheck); |
2743 | + |
2744 | /* |
2745 | * There is a nasty bug in some older SMP boards, their mptable lies |
2746 | * about the timer IRQ. We do the following to work around the situation: |
2747 | @@ -1982,10 +1979,13 @@ |
2748 | * - if this function detects that timer IRQs are defunct, then we fall |
2749 | * back to ISA timer IRQs |
2750 | */ |
2751 | -static int __init timer_irq_works(void) |
2752 | +int __init timer_irq_works(void) |
2753 | { |
2754 | unsigned long t1 = jiffies; |
2755 | |
2756 | + if (no_timer_check) |
2757 | + return 1; |
2758 | + |
2759 | local_irq_enable(); |
2760 | /* Let ten ticks pass... */ |
2761 | mdelay((10 * 1000) / HZ); |
2762 | @@ -2212,9 +2212,15 @@ |
2763 | unsigned char save_control, save_freq_select; |
2764 | |
2765 | pin = find_isa_irq_pin(8, mp_INT); |
2766 | + if (pin == -1) { |
2767 | + WARN_ON_ONCE(1); |
2768 | + return; |
2769 | + } |
2770 | apic = find_isa_irq_apic(8, mp_INT); |
2771 | - if (pin == -1) |
2772 | + if (apic == -1) { |
2773 | + WARN_ON_ONCE(1); |
2774 | return; |
2775 | + } |
2776 | |
2777 | entry0 = ioapic_read_entry(apic, pin); |
2778 | clear_IO_APIC_pin(apic, pin); |
2779 | @@ -2259,7 +2265,7 @@ |
2780 | * is so screwy. Thanks to Brian Perkins for testing/hacking this beast |
2781 | * fanatically on his truly buggy board. |
2782 | */ |
2783 | -static inline void check_timer(void) |
2784 | +static inline void __init check_timer(void) |
2785 | { |
2786 | int apic1, pin1, apic2, pin2; |
2787 | int vector; |
2788 | @@ -2543,7 +2549,7 @@ |
2789 | int create_irq(void) |
2790 | { |
2791 | /* Allocate an unused irq */ |
2792 | - int irq, new, vector; |
2793 | + int irq, new, vector = 0; |
2794 | unsigned long flags; |
2795 | |
2796 | irq = -ENOSPC; |
2797 | @@ -2923,8 +2929,8 @@ |
2798 | if (!ioapic && (irq < 16)) |
2799 | disable_8259A_irq(irq); |
2800 | |
2801 | - ioapic_write_entry(ioapic, pin, entry); |
2802 | spin_lock_irqsave(&ioapic_lock, flags); |
2803 | + __ioapic_write_entry(ioapic, pin, entry); |
2804 | set_native_irq_info(irq, TARGET_CPUS); |
2805 | spin_unlock_irqrestore(&ioapic_lock, flags); |
2806 | |
2807 | --- a/arch/x86/kernel/io_apic_64-xen.c |
2808 | +++ b/arch/x86/kernel/io_apic_64-xen.c |
2809 | @@ -199,14 +199,20 @@ |
2810 | * the interrupt, and we need to make sure the entry is fully populated |
2811 | * before that happens. |
2812 | */ |
2813 | -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) |
2814 | +static void |
2815 | +__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) |
2816 | { |
2817 | - unsigned long flags; |
2818 | union entry_union eu; |
2819 | eu.entry = e; |
2820 | - spin_lock_irqsave(&ioapic_lock, flags); |
2821 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); |
2822 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); |
2823 | +} |
2824 | + |
2825 | +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) |
2826 | +{ |
2827 | + unsigned long flags; |
2828 | + spin_lock_irqsave(&ioapic_lock, flags); |
2829 | + __ioapic_write_entry(apic, pin, e); |
2830 | spin_unlock_irqrestore(&ioapic_lock, flags); |
2831 | } |
2832 | |
2833 | @@ -714,6 +720,22 @@ |
2834 | } |
2835 | |
2836 | #ifndef CONFIG_XEN |
2837 | +static void __clear_irq_vector(int irq) |
2838 | +{ |
2839 | + cpumask_t mask; |
2840 | + int cpu, vector; |
2841 | + |
2842 | + BUG_ON(!irq_vector[irq]); |
2843 | + |
2844 | + vector = irq_vector[irq]; |
2845 | + cpus_and(mask, irq_domain[irq], cpu_online_map); |
2846 | + for_each_cpu_mask(cpu, mask) |
2847 | + per_cpu(vector_irq, cpu)[vector] = -1; |
2848 | + |
2849 | + irq_vector[irq] = 0; |
2850 | + irq_domain[irq] = CPU_MASK_NONE; |
2851 | +} |
2852 | + |
2853 | void __setup_vector_irq(int cpu) |
2854 | { |
2855 | /* Initialize vector_irq on a new cpu */ |
2856 | @@ -761,26 +783,65 @@ |
2857 | #define ioapic_register_intr(_irq,_vector,_trigger) ((void)0) |
2858 | #endif /* !CONFIG_XEN */ |
2859 | |
2860 | -static void __init setup_IO_APIC_irqs(void) |
2861 | +static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq) |
2862 | { |
2863 | struct IO_APIC_route_entry entry; |
2864 | - int apic, pin, idx, irq, first_notcon = 1, vector; |
2865 | + int vector; |
2866 | unsigned long flags; |
2867 | |
2868 | - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); |
2869 | |
2870 | - for (apic = 0; apic < nr_ioapics; apic++) { |
2871 | - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { |
2872 | + /* |
2873 | + * add it to the IO-APIC irq-routing table: |
2874 | + */ |
2875 | + memset(&entry,0,sizeof(entry)); |
2876 | |
2877 | - /* |
2878 | - * add it to the IO-APIC irq-routing table: |
2879 | - */ |
2880 | - memset(&entry,0,sizeof(entry)); |
2881 | + entry.delivery_mode = INT_DELIVERY_MODE; |
2882 | + entry.dest_mode = INT_DEST_MODE; |
2883 | + entry.mask = 0; /* enable IRQ */ |
2884 | + entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); |
2885 | |
2886 | - entry.delivery_mode = INT_DELIVERY_MODE; |
2887 | - entry.dest_mode = INT_DEST_MODE; |
2888 | - entry.mask = 0; /* enable IRQ */ |
2889 | + entry.trigger = irq_trigger(idx); |
2890 | + entry.polarity = irq_polarity(idx); |
2891 | + |
2892 | + if (irq_trigger(idx)) { |
2893 | + entry.trigger = 1; |
2894 | + entry.mask = 1; |
2895 | entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); |
2896 | + } |
2897 | + |
2898 | + if (/* !apic && */ !IO_APIC_IRQ(irq)) |
2899 | + return; |
2900 | + |
2901 | + if (IO_APIC_IRQ(irq)) { |
2902 | + cpumask_t mask; |
2903 | + vector = assign_irq_vector(irq, TARGET_CPUS, &mask); |
2904 | + if (vector < 0) |
2905 | + return; |
2906 | + |
2907 | + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); |
2908 | + entry.vector = vector; |
2909 | + |
2910 | + ioapic_register_intr(irq, vector, IOAPIC_AUTO); |
2911 | + if (!apic && (irq < 16)) |
2912 | + disable_8259A_irq(irq); |
2913 | + } |
2914 | + |
2915 | + ioapic_write_entry(apic, pin, entry); |
2916 | + |
2917 | + spin_lock_irqsave(&ioapic_lock, flags); |
2918 | + set_native_irq_info(irq, TARGET_CPUS); |
2919 | + spin_unlock_irqrestore(&ioapic_lock, flags); |
2920 | + |
2921 | +} |
2922 | + |
2923 | +static void __init setup_IO_APIC_irqs(void) |
2924 | +{ |
2925 | + int apic, pin, idx, irq, first_notcon = 1; |
2926 | + |
2927 | + apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); |
2928 | + |
2929 | + for (apic = 0; apic < nr_ioapics; apic++) { |
2930 | + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { |
2931 | |
2932 | idx = find_irq_entry(apic,pin,mp_INT); |
2933 | if (idx == -1) { |
2934 | @@ -792,39 +853,11 @@ |
2935 | continue; |
2936 | } |
2937 | |
2938 | - entry.trigger = irq_trigger(idx); |
2939 | - entry.polarity = irq_polarity(idx); |
2940 | - |
2941 | - if (irq_trigger(idx)) { |
2942 | - entry.trigger = 1; |
2943 | - entry.mask = 1; |
2944 | - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); |
2945 | - } |
2946 | - |
2947 | irq = pin_2_irq(idx, apic, pin); |
2948 | add_pin_to_irq(irq, apic, pin); |
2949 | |
2950 | - if (/* !apic && */ !IO_APIC_IRQ(irq)) |
2951 | - continue; |
2952 | - |
2953 | - if (IO_APIC_IRQ(irq)) { |
2954 | - cpumask_t mask; |
2955 | - vector = assign_irq_vector(irq, TARGET_CPUS, &mask); |
2956 | - if (vector < 0) |
2957 | - continue; |
2958 | - |
2959 | - entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); |
2960 | - entry.vector = vector; |
2961 | + setup_IO_APIC_irq(apic, pin, idx, irq); |
2962 | |
2963 | - ioapic_register_intr(irq, vector, IOAPIC_AUTO); |
2964 | - if (!apic && (irq < 16)) |
2965 | - disable_8259A_irq(irq); |
2966 | - } |
2967 | - ioapic_write_entry(apic, pin, entry); |
2968 | - |
2969 | - spin_lock_irqsave(&ioapic_lock, flags); |
2970 | - set_native_irq_info(irq, TARGET_CPUS); |
2971 | - spin_unlock_irqrestore(&ioapic_lock, flags); |
2972 | } |
2973 | } |
2974 | |
2975 | @@ -1819,7 +1852,7 @@ |
2976 | dynamic_irq_cleanup(irq); |
2977 | |
2978 | spin_lock_irqsave(&vector_lock, flags); |
2979 | - irq_vector[irq] = 0; |
2980 | + __clear_irq_vector(irq); |
2981 | spin_unlock_irqrestore(&vector_lock, flags); |
2982 | } |
2983 | #endif |
2984 | @@ -2123,7 +2156,15 @@ |
2985 | if (irq_entry == -1) |
2986 | continue; |
2987 | irq = pin_2_irq(irq_entry, ioapic, pin); |
2988 | - set_ioapic_affinity_irq(irq, TARGET_CPUS); |
2989 | + |
2990 | + /* setup_IO_APIC_irqs could fail to get vector for some device |
2991 | + * when you have too many devices, because at that time only boot |
2992 | + * cpu is online. |
2993 | + */ |
2994 | + if(!irq_vector[irq]) |
2995 | + setup_IO_APIC_irq(ioapic, pin, irq_entry, irq); |
2996 | + else |
2997 | + set_ioapic_affinity_irq(irq, TARGET_CPUS); |
2998 | } |
2999 | |
3000 | } |
3001 | --- a/arch/x86/kernel/irq_64-xen.c |
3002 | +++ b/arch/x86/kernel/irq_64-xen.c |
3003 | @@ -120,7 +120,7 @@ |
3004 | |
3005 | if (likely(irq < NR_IRQS)) |
3006 | generic_handle_irq(irq); |
3007 | - else |
3008 | + else if (printk_ratelimit()) |
3009 | printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n", |
3010 | __func__, smp_processor_id(), irq); |
3011 | |
3012 | --- a/arch/x86/kernel/ldt_32-xen.c |
3013 | +++ b/arch/x86/kernel/ldt_32-xen.c |
3014 | @@ -177,16 +177,14 @@ |
3015 | { |
3016 | int err; |
3017 | unsigned long size; |
3018 | - void *address; |
3019 | |
3020 | err = 0; |
3021 | - address = &default_ldt[0]; |
3022 | size = 5*sizeof(struct desc_struct); |
3023 | if (size > bytecount) |
3024 | size = bytecount; |
3025 | |
3026 | err = size; |
3027 | - if (copy_to_user(ptr, address, size)) |
3028 | + if (clear_user(ptr, size)) |
3029 | err = -EFAULT; |
3030 | |
3031 | return err; |
3032 | --- a/arch/x86/kernel/microcode-xen.c |
3033 | +++ b/arch/x86/kernel/microcode-xen.c |
3034 | @@ -1,7 +1,7 @@ |
3035 | /* |
3036 | * Intel CPU Microcode Update Driver for Linux |
3037 | * |
3038 | - * Copyright (C) 2000-2004 Tigran Aivazian |
3039 | + * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> |
3040 | * 2006 Shaohua Li <shaohua.li@intel.com> |
3041 | * |
3042 | * This driver allows to upgrade microcode on Intel processors |
3043 | @@ -43,7 +43,7 @@ |
3044 | #include <asm/processor.h> |
3045 | |
3046 | MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); |
3047 | -MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>"); |
3048 | +MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); |
3049 | MODULE_LICENSE("GPL"); |
3050 | |
3051 | static int verbose; |
3052 | @@ -195,7 +195,7 @@ |
3053 | request_microcode(); |
3054 | |
3055 | printk(KERN_INFO |
3056 | - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n"); |
3057 | + "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n"); |
3058 | return 0; |
3059 | } |
3060 | |
3061 | --- a/arch/x86/kernel/mpparse_32-xen.c |
3062 | +++ b/arch/x86/kernel/mpparse_32-xen.c |
3063 | @@ -36,7 +36,7 @@ |
3064 | |
3065 | /* Have we found an MP table */ |
3066 | int smp_found_config; |
3067 | -unsigned int __initdata maxcpus = NR_CPUS; |
3068 | +unsigned int __cpuinitdata maxcpus = NR_CPUS; |
3069 | |
3070 | /* |
3071 | * Various Linux-internal data structures created from the |
3072 | @@ -102,10 +102,10 @@ |
3073 | */ |
3074 | |
3075 | static int mpc_record; |
3076 | -static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata; |
3077 | +static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata; |
3078 | |
3079 | #ifndef CONFIG_XEN |
3080 | -static void __devinit MP_processor_info (struct mpc_config_processor *m) |
3081 | +static void __cpuinit MP_processor_info (struct mpc_config_processor *m) |
3082 | { |
3083 | int ver, apicid; |
3084 | physid_mask_t phys_cpu; |
3085 | @@ -221,7 +221,7 @@ |
3086 | bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; |
3087 | } |
3088 | #else |
3089 | -void __init MP_processor_info (struct mpc_config_processor *m) |
3090 | +static void __cpuinit MP_processor_info (struct mpc_config_processor *m) |
3091 | { |
3092 | num_processors++; |
3093 | } |
3094 | @@ -256,8 +256,6 @@ |
3095 | mp_current_pci_id++; |
3096 | } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) { |
3097 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; |
3098 | - } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) { |
3099 | - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98; |
3100 | } else { |
3101 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); |
3102 | } |
3103 | @@ -842,7 +840,7 @@ |
3104 | #endif |
3105 | } |
3106 | |
3107 | -void __devinit mp_register_lapic (u8 id, u8 enabled) |
3108 | +void __cpuinit mp_register_lapic (u8 id, u8 enabled) |
3109 | { |
3110 | struct mpc_config_processor processor; |
3111 | int boot_cpu = 0; |
3112 | --- a/arch/x86/kernel/mpparse_64-xen.c |
3113 | +++ b/arch/x86/kernel/mpparse_64-xen.c |
3114 | @@ -35,8 +35,6 @@ |
3115 | int smp_found_config; |
3116 | unsigned int __initdata maxcpus = NR_CPUS; |
3117 | |
3118 | -int acpi_found_madt; |
3119 | - |
3120 | /* |
3121 | * Various Linux-internal data structures created from the |
3122 | * MP-table. |
3123 | --- a/arch/x86/kernel/pci-dma_32-xen.c |
3124 | +++ b/arch/x86/kernel/pci-dma_32-xen.c |
3125 | @@ -282,7 +282,7 @@ |
3126 | int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, |
3127 | dma_addr_t device_addr, size_t size, int flags) |
3128 | { |
3129 | - void __iomem *mem_base; |
3130 | + void __iomem *mem_base = NULL; |
3131 | int pages = size >> PAGE_SHIFT; |
3132 | int bitmap_size = (pages + 31)/32; |
3133 | |
3134 | @@ -299,14 +299,12 @@ |
3135 | if (!mem_base) |
3136 | goto out; |
3137 | |
3138 | - dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); |
3139 | + dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); |
3140 | if (!dev->dma_mem) |
3141 | goto out; |
3142 | - memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem)); |
3143 | - dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL); |
3144 | + dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); |
3145 | if (!dev->dma_mem->bitmap) |
3146 | goto free1_out; |
3147 | - memset(dev->dma_mem->bitmap, 0, bitmap_size); |
3148 | |
3149 | dev->dma_mem->virt_base = mem_base; |
3150 | dev->dma_mem->device_base = device_addr; |
3151 | @@ -321,6 +319,8 @@ |
3152 | free1_out: |
3153 | kfree(dev->dma_mem->bitmap); |
3154 | out: |
3155 | + if (mem_base) |
3156 | + iounmap(mem_base); |
3157 | return 0; |
3158 | } |
3159 | EXPORT_SYMBOL(dma_declare_coherent_memory); |
3160 | --- a/arch/x86/kernel/process_32-xen.c |
3161 | +++ b/arch/x86/kernel/process_32-xen.c |
3162 | @@ -60,6 +60,7 @@ |
3163 | |
3164 | #include <asm/tlbflush.h> |
3165 | #include <asm/cpu.h> |
3166 | +#include <asm/pda.h> |
3167 | |
3168 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
3169 | |
3170 | @@ -104,28 +105,24 @@ |
3171 | */ |
3172 | static void poll_idle (void) |
3173 | { |
3174 | - local_irq_enable(); |
3175 | - |
3176 | - asm volatile( |
3177 | - "2:" |
3178 | - "testl %0, %1;" |
3179 | - "rep; nop;" |
3180 | - "je 2b;" |
3181 | - : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); |
3182 | + cpu_relax(); |
3183 | } |
3184 | |
3185 | static void xen_idle(void) |
3186 | { |
3187 | - local_irq_disable(); |
3188 | + current_thread_info()->status &= ~TS_POLLING; |
3189 | + /* |
3190 | + * TS_POLLING-cleared state must be visible before we |
3191 | + * test NEED_RESCHED: |
3192 | + */ |
3193 | + smp_mb(); |
3194 | |
3195 | - if (need_resched()) |
3196 | + local_irq_disable(); |
3197 | + if (!need_resched()) |
3198 | + safe_halt(); /* enables interrupts racelessly */ |
3199 | + else |
3200 | local_irq_enable(); |
3201 | - else { |
3202 | - current_thread_info()->status &= ~TS_POLLING; |
3203 | - smp_mb__after_clear_bit(); |
3204 | - safe_halt(); |
3205 | - current_thread_info()->status |= TS_POLLING; |
3206 | - } |
3207 | + current_thread_info()->status |= TS_POLLING; |
3208 | } |
3209 | #ifdef CONFIG_APM_MODULE |
3210 | EXPORT_SYMBOL(default_idle); |
3211 | @@ -250,8 +247,8 @@ |
3212 | regs->eax,regs->ebx,regs->ecx,regs->edx); |
3213 | printk("ESI: %08lx EDI: %08lx EBP: %08lx", |
3214 | regs->esi, regs->edi, regs->ebp); |
3215 | - printk(" DS: %04x ES: %04x\n", |
3216 | - 0xffff & regs->xds,0xffff & regs->xes); |
3217 | + printk(" DS: %04x ES: %04x GS: %04x\n", |
3218 | + 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs); |
3219 | |
3220 | cr0 = read_cr0(); |
3221 | cr2 = read_cr2(); |
3222 | @@ -282,6 +279,7 @@ |
3223 | |
3224 | regs.xds = __USER_DS; |
3225 | regs.xes = __USER_DS; |
3226 | + regs.xgs = __KERNEL_PDA; |
3227 | regs.orig_eax = -1; |
3228 | regs.eip = (unsigned long) kernel_thread_helper; |
3229 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); |
3230 | @@ -359,7 +357,6 @@ |
3231 | p->thread.eip = (unsigned long) ret_from_fork; |
3232 | |
3233 | savesegment(fs,p->thread.fs); |
3234 | - savesegment(gs,p->thread.gs); |
3235 | |
3236 | tsk = current; |
3237 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { |
3238 | @@ -438,7 +435,7 @@ |
3239 | dump->regs.ds = regs->xds; |
3240 | dump->regs.es = regs->xes; |
3241 | savesegment(fs,dump->regs.fs); |
3242 | - savesegment(gs,dump->regs.gs); |
3243 | + dump->regs.gs = regs->xgs; |
3244 | dump->regs.orig_eax = regs->orig_eax; |
3245 | dump->regs.eip = regs->eip; |
3246 | dump->regs.cs = regs->xcs; |
3247 | @@ -614,17 +611,19 @@ |
3248 | if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL))) |
3249 | BUG(); |
3250 | |
3251 | + /* we're going to use this soon, after a few expensive things */ |
3252 | + if (next_p->fpu_counter > 5) |
3253 | + prefetch(&next->i387.fxsave); |
3254 | + |
3255 | /* |
3256 | - * Restore %fs and %gs if needed. |
3257 | + * Restore %fs if needed. |
3258 | * |
3259 | - * Glibc normally makes %fs be zero, and %gs is one of |
3260 | - * the TLS segments. |
3261 | + * Glibc normally makes %fs be zero. |
3262 | */ |
3263 | if (unlikely(next->fs)) |
3264 | loadsegment(fs, next->fs); |
3265 | |
3266 | - if (next->gs) |
3267 | - loadsegment(gs, next->gs); |
3268 | + write_pda(pcurrent, next_p); |
3269 | |
3270 | /* |
3271 | * Now maybe handle debug registers |
3272 | @@ -634,6 +633,13 @@ |
3273 | |
3274 | disable_tsc(prev_p, next_p); |
3275 | |
3276 | + /* If the task has used fpu the last 5 timeslices, just do a full |
3277 | + * restore of the math state immediately to avoid the trap; the |
3278 | + * chances of needing FPU soon are obviously high now |
3279 | + */ |
3280 | + if (next_p->fpu_counter > 5) |
3281 | + math_state_restore(); |
3282 | + |
3283 | return prev_p; |
3284 | } |
3285 | |
3286 | --- a/arch/x86/kernel/process_64-xen.c |
3287 | +++ b/arch/x86/kernel/process_64-xen.c |
3288 | @@ -119,29 +119,23 @@ |
3289 | static void poll_idle (void) |
3290 | { |
3291 | local_irq_enable(); |
3292 | - |
3293 | - asm volatile( |
3294 | - "2:" |
3295 | - "testl %0,%1;" |
3296 | - "rep; nop;" |
3297 | - "je 2b;" |
3298 | - : : |
3299 | - "i" (_TIF_NEED_RESCHED), |
3300 | - "m" (current_thread_info()->flags)); |
3301 | + cpu_relax(); |
3302 | } |
3303 | |
3304 | static void xen_idle(void) |
3305 | { |
3306 | + current_thread_info()->status &= ~TS_POLLING; |
3307 | + /* |
3308 | + * TS_POLLING-cleared state must be visible before we |
3309 | + * test NEED_RESCHED: |
3310 | + */ |
3311 | + smp_mb(); |
3312 | local_irq_disable(); |
3313 | - |
3314 | - if (need_resched()) |
3315 | - local_irq_enable(); |
3316 | - else { |
3317 | - current_thread_info()->status &= ~TS_POLLING; |
3318 | - smp_mb__after_clear_bit(); |
3319 | + if (!need_resched()) |
3320 | safe_halt(); |
3321 | - current_thread_info()->status |= TS_POLLING; |
3322 | - } |
3323 | + else |
3324 | + local_irq_enable(); |
3325 | + current_thread_info()->status |= TS_POLLING; |
3326 | } |
3327 | |
3328 | #ifdef CONFIG_HOTPLUG_CPU |
3329 | @@ -181,6 +175,12 @@ |
3330 | idle = xen_idle; /* no alternatives */ |
3331 | if (cpu_is_offline(smp_processor_id())) |
3332 | play_dead(); |
3333 | + /* |
3334 | + * Idle routines should keep interrupts disabled |
3335 | + * from here on, until they go to idle. |
3336 | + * Otherwise, idle callbacks can misfire. |
3337 | + */ |
3338 | + local_irq_disable(); |
3339 | enter_idle(); |
3340 | idle(); |
3341 | /* In many cases the interrupt that ended idle |
3342 | --- a/arch/x86/kernel/quirks-xen.c |
3343 | +++ b/arch/x86/kernel/quirks-xen.c |
3344 | @@ -3,10 +3,12 @@ |
3345 | */ |
3346 | #include <linux/pci.h> |
3347 | #include <linux/irq.h> |
3348 | +#include <asm/pci-direct.h> |
3349 | +#include <asm/genapic.h> |
3350 | +#include <asm/cpu.h> |
3351 | |
3352 | #if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_SMP) || defined(CONFIG_XEN)) && defined(CONFIG_PCI) |
3353 | - |
3354 | -static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) |
3355 | +static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev) |
3356 | { |
3357 | u8 config, rev; |
3358 | u32 word; |
3359 | @@ -14,14 +16,12 @@ |
3360 | /* BIOS may enable hardware IRQ balancing for |
3361 | * E7520/E7320/E7525(revision ID 0x9 and below) |
3362 | * based platforms. |
3363 | - * Disable SW irqbalance/affinity on those platforms. |
3364 | + * For those platforms, make sure that the genapic is set to 'flat' |
3365 | */ |
3366 | pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); |
3367 | if (rev > 0x9) |
3368 | return; |
3369 | |
3370 | - printk(KERN_INFO "Intel E7520/7320/7525 detected."); |
3371 | - |
3372 | /* enable access to config space*/ |
3373 | pci_read_config_byte(dev, 0xf4, &config); |
3374 | pci_write_config_byte(dev, 0xf4, config|0x2); |
3375 | @@ -30,6 +30,46 @@ |
3376 | raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); |
3377 | |
3378 | if (!(word & (1 << 13))) { |
3379 | +#ifndef CONFIG_XEN |
3380 | +#ifdef CONFIG_X86_64 |
3381 | + if (genapic != &apic_flat) |
3382 | + panic("APIC mode must be flat on this system\n"); |
3383 | +#elif defined(CONFIG_X86_GENERICARCH) |
3384 | + if (genapic != &apic_default) |
3385 | + panic("APIC mode must be default(flat) on this system. Use apic=default\n"); |
3386 | +#endif |
3387 | +#endif |
3388 | + } |
3389 | + |
3390 | + /* put back the original value for config space*/ |
3391 | + if (!(config & 0x2)) |
3392 | + pci_write_config_byte(dev, 0xf4, config); |
3393 | +} |
3394 | + |
3395 | +void __init quirk_intel_irqbalance(void) |
3396 | +{ |
3397 | + u8 config, rev; |
3398 | + u32 word; |
3399 | + |
3400 | + /* BIOS may enable hardware IRQ balancing for |
3401 | + * E7520/E7320/E7525(revision ID 0x9 and below) |
3402 | + * based platforms. |
3403 | + * Disable SW irqbalance/affinity on those platforms. |
3404 | + */ |
3405 | + rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION); |
3406 | + if (rev > 0x9) |
3407 | + return; |
3408 | + |
3409 | + printk(KERN_INFO "Intel E7520/7320/7525 detected."); |
3410 | + |
3411 | + /* enable access to config space */ |
3412 | + config = read_pci_config_byte(0, 0, 0, 0xf4); |
3413 | + write_pci_config_byte(0, 0, 0, 0xf4, config|0x2); |
3414 | + |
3415 | + /* read xTPR register */ |
3416 | + word = read_pci_config_16(0, 0, 0x40, 0x4c); |
3417 | + |
3418 | + if (!(word & (1 << 13))) { |
3419 | struct xen_platform_op op; |
3420 | printk(KERN_INFO "Disabling irq balancing and affinity\n"); |
3421 | op.cmd = XENPF_platform_quirk; |
3422 | @@ -37,11 +77,12 @@ |
3423 | WARN_ON(HYPERVISOR_platform_op(&op)); |
3424 | } |
3425 | |
3426 | - /* put back the original value for config space*/ |
3427 | + /* put back the original value for config space */ |
3428 | if (!(config & 0x2)) |
3429 | - pci_write_config_byte(dev, 0xf4, config); |
3430 | + write_pci_config_byte(0, 0, 0, 0xf4, config); |
3431 | } |
3432 | -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); |
3433 | -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); |
3434 | -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); |
3435 | +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance); |
3436 | +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance); |
3437 | +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance); |
3438 | + |
3439 | #endif |
3440 | --- a/arch/x86/kernel/setup_32-xen.c |
3441 | +++ b/arch/x86/kernel/setup_32-xen.c |
3442 | @@ -76,9 +76,6 @@ |
3443 | #include <xen/interface/kexec.h> |
3444 | #endif |
3445 | |
3446 | -/* Forward Declaration. */ |
3447 | -void __init find_max_pfn(void); |
3448 | - |
3449 | static int xen_panic_event(struct notifier_block *, unsigned long, void *); |
3450 | static struct notifier_block xen_panic_block = { |
3451 | xen_panic_event, NULL, 0 /* try to go last */ |
3452 | @@ -92,14 +89,11 @@ |
3453 | /* |
3454 | * Machine setup.. |
3455 | */ |
3456 | - |
3457 | -#ifdef CONFIG_EFI |
3458 | -int efi_enabled = 0; |
3459 | -EXPORT_SYMBOL(efi_enabled); |
3460 | -#endif |
3461 | +extern struct resource code_resource; |
3462 | +extern struct resource data_resource; |
3463 | |
3464 | /* cpu data as detected by the assembly code in head.S */ |
3465 | -struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; |
3466 | +struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; |
3467 | /* common cpu data for all cpus */ |
3468 | struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; |
3469 | EXPORT_SYMBOL(boot_cpu_data); |
3470 | @@ -115,12 +109,6 @@ |
3471 | unsigned int BIOS_revision; |
3472 | unsigned int mca_pentium_flag; |
3473 | |
3474 | -/* For PCI or other memory-mapped resources */ |
3475 | -unsigned long pci_mem_start = 0x10000000; |
3476 | -#ifdef CONFIG_PCI |
3477 | -EXPORT_SYMBOL(pci_mem_start); |
3478 | -#endif |
3479 | - |
3480 | /* Boot loader ID as an integer, for the benefit of proc_dointvec */ |
3481 | int bootloader_type; |
3482 | |
3483 | @@ -153,10 +141,6 @@ |
3484 | defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) |
3485 | EXPORT_SYMBOL(ist_info); |
3486 | #endif |
3487 | -struct e820map e820; |
3488 | -#ifdef CONFIG_XEN |
3489 | -struct e820map machine_e820; |
3490 | -#endif |
3491 | |
3492 | extern void early_cpu_init(void); |
3493 | extern int root_mountflags; |
3494 | @@ -171,209 +155,6 @@ |
3495 | |
3496 | unsigned char __initdata boot_params[PARAM_SIZE]; |
3497 | |
3498 | -static struct resource data_resource = { |
3499 | - .name = "Kernel data", |
3500 | - .start = 0, |
3501 | - .end = 0, |
3502 | - .flags = IORESOURCE_BUSY | IORESOURCE_MEM |
3503 | -}; |
3504 | - |
3505 | -static struct resource code_resource = { |
3506 | - .name = "Kernel code", |
3507 | - .start = 0, |
3508 | - .end = 0, |
3509 | - .flags = IORESOURCE_BUSY | IORESOURCE_MEM |
3510 | -}; |
3511 | - |
3512 | -static struct resource system_rom_resource = { |
3513 | - .name = "System ROM", |
3514 | - .start = 0xf0000, |
3515 | - .end = 0xfffff, |
3516 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
3517 | -}; |
3518 | - |
3519 | -static struct resource extension_rom_resource = { |
3520 | - .name = "Extension ROM", |
3521 | - .start = 0xe0000, |
3522 | - .end = 0xeffff, |
3523 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
3524 | -}; |
3525 | - |
3526 | -static struct resource adapter_rom_resources[] = { { |
3527 | - .name = "Adapter ROM", |
3528 | - .start = 0xc8000, |
3529 | - .end = 0, |
3530 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
3531 | -}, { |
3532 | - .name = "Adapter ROM", |
3533 | - .start = 0, |
3534 | - .end = 0, |
3535 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
3536 | -}, { |
3537 | - .name = "Adapter ROM", |
3538 | - .start = 0, |
3539 | - .end = 0, |
3540 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
3541 | -}, { |
3542 | - .name = "Adapter ROM", |
3543 | - .start = 0, |
3544 | - .end = 0, |
3545 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
3546 | -}, { |
3547 | - .name = "Adapter ROM", |
3548 | - .start = 0, |
3549 | - .end = 0, |
3550 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
3551 | -}, { |
3552 | - .name = "Adapter ROM", |
3553 | - .start = 0, |
3554 | - .end = 0, |
3555 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
3556 | -} }; |
3557 | - |
3558 | -static struct resource video_rom_resource = { |
3559 | - .name = "Video ROM", |
3560 | - .start = 0xc0000, |
3561 | - .end = 0xc7fff, |
3562 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM |
3563 | -}; |
3564 | - |
3565 | -static struct resource video_ram_resource = { |
3566 | - .name = "Video RAM area", |
3567 | - .start = 0xa0000, |
3568 | - .end = 0xbffff, |
3569 | - .flags = IORESOURCE_BUSY | IORESOURCE_MEM |
3570 | -}; |
3571 | - |
3572 | -static struct resource standard_io_resources[] = { { |
3573 | - .name = "dma1", |
3574 | - .start = 0x0000, |
3575 | - .end = 0x001f, |
3576 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO |
3577 | -}, { |
3578 | - .name = "pic1", |
3579 | - .start = 0x0020, |
3580 | - .end = 0x0021, |
3581 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO |
3582 | -}, { |
3583 | - .name = "timer0", |
3584 | - .start = 0x0040, |
3585 | - .end = 0x0043, |
3586 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO |
3587 | -}, { |
3588 | - .name = "timer1", |
3589 | - .start = 0x0050, |
3590 | - .end = 0x0053, |
3591 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO |
3592 | -}, { |
3593 | - .name = "keyboard", |
3594 | - .start = 0x0060, |
3595 | - .end = 0x006f, |
3596 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO |
3597 | -}, { |
3598 | - .name = "dma page reg", |
3599 | - .start = 0x0080, |
3600 | - .end = 0x008f, |
3601 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO |
3602 | -}, { |
3603 | - .name = "pic2", |
3604 | - .start = 0x00a0, |
3605 | - .end = 0x00a1, |
3606 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO |
3607 | -}, { |
3608 | - .name = "dma2", |
3609 | - .start = 0x00c0, |
3610 | - .end = 0x00df, |
3611 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO |
3612 | -}, { |
3613 | - .name = "fpu", |
3614 | - .start = 0x00f0, |
3615 | - .end = 0x00ff, |
3616 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO |
3617 | -} }; |
3618 | - |
3619 | -#define romsignature(x) (*(unsigned short *)(x) == 0xaa55) |
3620 | - |
3621 | -static int __init romchecksum(unsigned char *rom, unsigned long length) |
3622 | -{ |
3623 | - unsigned char *p, sum = 0; |
3624 | - |
3625 | - for (p = rom; p < rom + length; p++) |
3626 | - sum += *p; |
3627 | - return sum == 0; |
3628 | -} |
3629 | - |
3630 | -static void __init probe_roms(void) |
3631 | -{ |
3632 | - unsigned long start, length, upper; |
3633 | - unsigned char *rom; |
3634 | - int i; |
3635 | - |
3636 | -#ifdef CONFIG_XEN |
3637 | - /* Nothing to do if not running in dom0. */ |
3638 | - if (!is_initial_xendomain()) |
3639 | - return; |
3640 | -#endif |
3641 | - |
3642 | - /* video rom */ |
3643 | - upper = adapter_rom_resources[0].start; |
3644 | - for (start = video_rom_resource.start; start < upper; start += 2048) { |
3645 | - rom = isa_bus_to_virt(start); |
3646 | - if (!romsignature(rom)) |
3647 | - continue; |
3648 | - |
3649 | - video_rom_resource.start = start; |
3650 | - |
3651 | - /* 0 < length <= 0x7f * 512, historically */ |
3652 | - length = rom[2] * 512; |
3653 | - |
3654 | - /* if checksum okay, trust length byte */ |
3655 | - if (length && romchecksum(rom, length)) |
3656 | - video_rom_resource.end = start + length - 1; |
3657 | - |
3658 | - request_resource(&iomem_resource, &video_rom_resource); |
3659 | - break; |
3660 | - } |
3661 | - |
3662 | - start = (video_rom_resource.end + 1 + 2047) & ~2047UL; |
3663 | - if (start < upper) |
3664 | - start = upper; |
3665 | - |
3666 | - /* system rom */ |
3667 | - request_resource(&iomem_resource, &system_rom_resource); |
3668 | - upper = system_rom_resource.start; |
3669 | - |
3670 | - /* check for extension rom (ignore length byte!) */ |
3671 | - rom = isa_bus_to_virt(extension_rom_resource.start); |
3672 | - if (romsignature(rom)) { |
3673 | - length = extension_rom_resource.end - extension_rom_resource.start + 1; |
3674 | - if (romchecksum(rom, length)) { |
3675 | - request_resource(&iomem_resource, &extension_rom_resource); |
3676 | - upper = extension_rom_resource.start; |
3677 | - } |
3678 | - } |
3679 | - |
3680 | - /* check for adapter roms on 2k boundaries */ |
3681 | - for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { |
3682 | - rom = isa_bus_to_virt(start); |
3683 | - if (!romsignature(rom)) |
3684 | - continue; |
3685 | - |
3686 | - /* 0 < length <= 0x7f * 512, historically */ |
3687 | - length = rom[2] * 512; |
3688 | - |
3689 | - /* but accept any length that fits if checksum okay */ |
3690 | - if (!length || start + length > upper || !romchecksum(rom, length)) |
3691 | - continue; |
3692 | - |
3693 | - adapter_rom_resources[i].start = start; |
3694 | - adapter_rom_resources[i].end = start + length - 1; |
3695 | - request_resource(&iomem_resource, &adapter_rom_resources[i]); |
3696 | - |
3697 | - start = adapter_rom_resources[i++].end & ~2047UL; |
3698 | - } |
3699 | -} |
3700 | - |
3701 | /* |
3702 | * Point at the empty zero page to start with. We map the real shared_info |
3703 | * page as soon as fixmap is up and running. |
3704 | @@ -389,338 +170,6 @@ |
3705 | start_info_t *xen_start_info; |
3706 | EXPORT_SYMBOL(xen_start_info); |
3707 | |
3708 | -void __init add_memory_region(unsigned long long start, |
3709 | - unsigned long long size, int type) |
3710 | -{ |
3711 | - int x; |
3712 | - |
3713 | - if (!efi_enabled) { |
3714 | - x = e820.nr_map; |
3715 | - |
3716 | - if (x == E820MAX) { |
3717 | - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); |
3718 | - return; |
3719 | - } |
3720 | - |
3721 | - e820.map[x].addr = start; |
3722 | - e820.map[x].size = size; |
3723 | - e820.map[x].type = type; |
3724 | - e820.nr_map++; |
3725 | - } |
3726 | -} /* add_memory_region */ |
3727 | - |
3728 | -static void __init limit_regions(unsigned long long size) |
3729 | -{ |
3730 | - unsigned long long current_addr = 0; |
3731 | - int i; |
3732 | - |
3733 | - if (efi_enabled) { |
3734 | - efi_memory_desc_t *md; |
3735 | - void *p; |
3736 | - |
3737 | - for (p = memmap.map, i = 0; p < memmap.map_end; |
3738 | - p += memmap.desc_size, i++) { |
3739 | - md = p; |
3740 | - current_addr = md->phys_addr + (md->num_pages << 12); |
3741 | - if (md->type == EFI_CONVENTIONAL_MEMORY) { |
3742 | - if (current_addr >= size) { |
3743 | - md->num_pages -= |
3744 | - (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT); |
3745 | - memmap.nr_map = i + 1; |
3746 | - return; |
3747 | - } |
3748 | - } |
3749 | - } |
3750 | - } |
3751 | - for (i = 0; i < e820.nr_map; i++) { |
3752 | - current_addr = e820.map[i].addr + e820.map[i].size; |
3753 | - if (current_addr < size) |
3754 | - continue; |
3755 | - |
3756 | - if (e820.map[i].type != E820_RAM) |
3757 | - continue; |
3758 | - |
3759 | - if (e820.map[i].addr >= size) { |
3760 | - /* |
3761 | - * This region starts past the end of the |
3762 | - * requested size, skip it completely. |
3763 | - */ |
3764 | - e820.nr_map = i; |
3765 | - } else { |
3766 | - e820.nr_map = i + 1; |
3767 | - e820.map[i].size -= current_addr - size; |
3768 | - } |
3769 | - return; |
3770 | - } |
3771 | -#ifdef CONFIG_XEN |
3772 | - if (i==e820.nr_map && current_addr < size) { |
3773 | - /* |
3774 | - * The e820 map finished before our requested size so |
3775 | - * extend the final entry to the requested address. |
3776 | - */ |
3777 | - --i; |
3778 | - if (e820.map[i].type == E820_RAM) |
3779 | - e820.map[i].size -= current_addr - size; |
3780 | - else |
3781 | - add_memory_region(current_addr, size - current_addr, E820_RAM); |
3782 | - } |
3783 | -#endif |
3784 | -} |
3785 | - |
3786 | -#define E820_DEBUG 1 |
3787 | - |
3788 | -static void __init print_memory_map(char *who) |
3789 | -{ |
3790 | - int i; |
3791 | - |
3792 | - for (i = 0; i < e820.nr_map; i++) { |
3793 | - printk(" %s: %016Lx - %016Lx ", who, |
3794 | - e820.map[i].addr, |
3795 | - e820.map[i].addr + e820.map[i].size); |
3796 | - switch (e820.map[i].type) { |
3797 | - case E820_RAM: printk("(usable)\n"); |
3798 | - break; |
3799 | - case E820_RESERVED: |
3800 | - printk("(reserved)\n"); |
3801 | - break; |
3802 | - case E820_ACPI: |
3803 | - printk("(ACPI data)\n"); |
3804 | - break; |
3805 | - case E820_NVS: |
3806 | - printk("(ACPI NVS)\n"); |
3807 | - break; |
3808 | - default: printk("type %lu\n", e820.map[i].type); |
3809 | - break; |
3810 | - } |
3811 | - } |
3812 | -} |
3813 | - |
3814 | -/* |
3815 | - * Sanitize the BIOS e820 map. |
3816 | - * |
3817 | - * Some e820 responses include overlapping entries. The following |
3818 | - * replaces the original e820 map with a new one, removing overlaps. |
3819 | - * |
3820 | - */ |
3821 | -struct change_member { |
3822 | - struct e820entry *pbios; /* pointer to original bios entry */ |
3823 | - unsigned long long addr; /* address for this change point */ |
3824 | -}; |
3825 | -static struct change_member change_point_list[2*E820MAX] __initdata; |
3826 | -static struct change_member *change_point[2*E820MAX] __initdata; |
3827 | -static struct e820entry *overlap_list[E820MAX] __initdata; |
3828 | -static struct e820entry new_bios[E820MAX] __initdata; |
3829 | - |
3830 | -int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) |
3831 | -{ |
3832 | - struct change_member *change_tmp; |
3833 | - unsigned long current_type, last_type; |
3834 | - unsigned long long last_addr; |
3835 | - int chgidx, still_changing; |
3836 | - int overlap_entries; |
3837 | - int new_bios_entry; |
3838 | - int old_nr, new_nr, chg_nr; |
3839 | - int i; |
3840 | - |
3841 | - /* |
3842 | - Visually we're performing the following (1,2,3,4 = memory types)... |
3843 | - |
3844 | - Sample memory map (w/overlaps): |
3845 | - ____22__________________ |
3846 | - ______________________4_ |
3847 | - ____1111________________ |
3848 | - _44_____________________ |
3849 | - 11111111________________ |
3850 | - ____________________33__ |
3851 | - ___________44___________ |
3852 | - __________33333_________ |
3853 | - ______________22________ |
3854 | - ___________________2222_ |
3855 | - _________111111111______ |
3856 | - _____________________11_ |
3857 | - _________________4______ |
3858 | - |
3859 | - Sanitized equivalent (no overlap): |
3860 | - 1_______________________ |
3861 | - _44_____________________ |
3862 | - ___1____________________ |
3863 | - ____22__________________ |
3864 | - ______11________________ |
3865 | - _________1______________ |
3866 | - __________3_____________ |
3867 | - ___________44___________ |
3868 | - _____________33_________ |
3869 | - _______________2________ |
3870 | - ________________1_______ |
3871 | - _________________4______ |
3872 | - ___________________2____ |
3873 | - ____________________33__ |
3874 | - ______________________4_ |
3875 | - */ |
3876 | - |
3877 | - /* if there's only one memory region, don't bother */ |
3878 | - if (*pnr_map < 2) |
3879 | - return -1; |
3880 | - |
3881 | - old_nr = *pnr_map; |
3882 | - |
3883 | - /* bail out if we find any unreasonable addresses in bios map */ |
3884 | - for (i=0; i<old_nr; i++) |
3885 | - if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) |
3886 | - return -1; |
3887 | - |
3888 | - /* create pointers for initial change-point information (for sorting) */ |
3889 | - for (i=0; i < 2*old_nr; i++) |
3890 | - change_point[i] = &change_point_list[i]; |
3891 | - |
3892 | - /* record all known change-points (starting and ending addresses), |
3893 | - omitting those that are for empty memory regions */ |
3894 | - chgidx = 0; |
3895 | - for (i=0; i < old_nr; i++) { |
3896 | - if (biosmap[i].size != 0) { |
3897 | - change_point[chgidx]->addr = biosmap[i].addr; |
3898 | - change_point[chgidx++]->pbios = &biosmap[i]; |
3899 | - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; |
3900 | - change_point[chgidx++]->pbios = &biosmap[i]; |
3901 | - } |
3902 | - } |
3903 | - chg_nr = chgidx; /* true number of change-points */ |
3904 | - |
3905 | - /* sort change-point list by memory addresses (low -> high) */ |
3906 | - still_changing = 1; |
3907 | - while (still_changing) { |
3908 | - still_changing = 0; |
3909 | - for (i=1; i < chg_nr; i++) { |
3910 | - /* if <current_addr> > <last_addr>, swap */ |
3911 | - /* or, if current=<start_addr> & last=<end_addr>, swap */ |
3912 | - if ((change_point[i]->addr < change_point[i-1]->addr) || |
3913 | - ((change_point[i]->addr == change_point[i-1]->addr) && |
3914 | - (change_point[i]->addr == change_point[i]->pbios->addr) && |
3915 | - (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) |
3916 | - ) |
3917 | - { |
3918 | - change_tmp = change_point[i]; |
3919 | - change_point[i] = change_point[i-1]; |
3920 | - change_point[i-1] = change_tmp; |
3921 | - still_changing=1; |
3922 | - } |
3923 | - } |
3924 | - } |
3925 | - |
3926 | - /* create a new bios memory map, removing overlaps */ |
3927 | - overlap_entries=0; /* number of entries in the overlap table */ |
3928 | - new_bios_entry=0; /* index for creating new bios map entries */ |
3929 | - last_type = 0; /* start with undefined memory type */ |
3930 | - last_addr = 0; /* start with 0 as last starting address */ |
3931 | - /* loop through change-points, determining affect on the new bios map */ |
3932 | - for (chgidx=0; chgidx < chg_nr; chgidx++) |
3933 | - { |
3934 | - /* keep track of all overlapping bios entries */ |
3935 | - if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) |
3936 | - { |
3937 | - /* add map entry to overlap list (> 1 entry implies an overlap) */ |
3938 | - overlap_list[overlap_entries++]=change_point[chgidx]->pbios; |
3939 | - } |
3940 | - else |
3941 | - { |
3942 | - /* remove entry from list (order independent, so swap with last) */ |
3943 | - for (i=0; i<overlap_entries; i++) |
3944 | - { |
3945 | - if (overlap_list[i] == change_point[chgidx]->pbios) |
3946 | - overlap_list[i] = overlap_list[overlap_entries-1]; |
3947 | - } |
3948 | - overlap_entries--; |
3949 | - } |
3950 | - /* if there are overlapping entries, decide which "type" to use */ |
3951 | - /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ |
3952 | - current_type = 0; |
3953 | - for (i=0; i<overlap_entries; i++) |
3954 | - if (overlap_list[i]->type > current_type) |
3955 | - current_type = overlap_list[i]->type; |
3956 | - /* continue building up new bios map based on this information */ |
3957 | - if (current_type != last_type) { |
3958 | - if (last_type != 0) { |
3959 | - new_bios[new_bios_entry].size = |
3960 | - change_point[chgidx]->addr - last_addr; |
3961 | - /* move forward only if the new size was non-zero */ |
3962 | - if (new_bios[new_bios_entry].size != 0) |
3963 | - if (++new_bios_entry >= E820MAX) |
3964 | - break; /* no more space left for new bios entries */ |
3965 | - } |
3966 | - if (current_type != 0) { |
3967 | - new_bios[new_bios_entry].addr = change_point[chgidx]->addr; |
3968 | - new_bios[new_bios_entry].type = current_type; |
3969 | - last_addr=change_point[chgidx]->addr; |
3970 | - } |
3971 | - last_type = current_type; |
3972 | - } |
3973 | - } |
3974 | - new_nr = new_bios_entry; /* retain count for new bios entries */ |
3975 | - |
3976 | - /* copy new bios mapping into original location */ |
3977 | - memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); |
3978 | - *pnr_map = new_nr; |
3979 | - |
3980 | - return 0; |
3981 | -} |
3982 | - |
3983 | -/* |
3984 | - * Copy the BIOS e820 map into a safe place. |
3985 | - * |
3986 | - * Sanity-check it while we're at it.. |
3987 | - * |
3988 | - * If we're lucky and live on a modern system, the setup code |
3989 | - * will have given us a memory map that we can use to properly |
3990 | - * set up memory. If we aren't, we'll fake a memory map. |
3991 | - * |
3992 | - * We check to see that the memory map contains at least 2 elements |
3993 | - * before we'll use it, because the detection code in setup.S may |
3994 | - * not be perfect and most every PC known to man has two memory |
3995 | - * regions: one from 0 to 640k, and one from 1mb up. (The IBM |
3996 | - * thinkpad 560x, for example, does not cooperate with the memory |
3997 | - * detection code.) |
3998 | - */ |
3999 | -int __init copy_e820_map(struct e820entry * biosmap, int nr_map) |
4000 | -{ |
4001 | -#ifndef CONFIG_XEN |
4002 | - /* Only one memory region (or negative)? Ignore it */ |
4003 | - if (nr_map < 2) |
4004 | - return -1; |
4005 | -#else |
4006 | - BUG_ON(nr_map < 1); |
4007 | -#endif |
4008 | - |
4009 | - do { |
4010 | - unsigned long long start = biosmap->addr; |
4011 | - unsigned long long size = biosmap->size; |
4012 | - unsigned long long end = start + size; |
4013 | - unsigned long type = biosmap->type; |
4014 | - |
4015 | - /* Overflow in 64 bits? Ignore the memory map. */ |
4016 | - if (start > end) |
4017 | - return -1; |
4018 | - |
4019 | -#ifndef CONFIG_XEN |
4020 | - /* |
4021 | - * Some BIOSes claim RAM in the 640k - 1M region. |
4022 | - * Not right. Fix it up. |
4023 | - */ |
4024 | - if (type == E820_RAM) { |
4025 | - if (start < 0x100000ULL && end > 0xA0000ULL) { |
4026 | - if (start < 0xA0000ULL) |
4027 | - add_memory_region(start, 0xA0000ULL-start, type); |
4028 | - if (end <= 0x100000ULL) |
4029 | - continue; |
4030 | - start = 0x100000ULL; |
4031 | - size = end - start; |
4032 | - } |
4033 | - } |
4034 | -#endif |
4035 | - add_memory_region(start, size, type); |
4036 | - } while (biosmap++,--nr_map); |
4037 | - return 0; |
4038 | -} |
4039 | - |
4040 | #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) |
4041 | struct edd edd; |
4042 | #ifdef CONFIG_EDD_MODULE |
4043 | @@ -746,7 +195,7 @@ |
4044 | } |
4045 | #endif |
4046 | |
4047 | -static int __initdata user_defined_memmap = 0; |
4048 | +int __initdata user_defined_memmap = 0; |
4049 | |
4050 | /* |
4051 | * "mem=nopentium" disables the 4MB page tables. |
4052 | @@ -783,51 +232,6 @@ |
4053 | } |
4054 | early_param("mem", parse_mem); |
4055 | |
4056 | -static int __init parse_memmap(char *arg) |
4057 | -{ |
4058 | - if (!arg) |
4059 | - return -EINVAL; |
4060 | - |
4061 | - if (strcmp(arg, "exactmap") == 0) { |
4062 | -#ifdef CONFIG_CRASH_DUMP |
4063 | - /* If we are doing a crash dump, we |
4064 | - * still need to know the real mem |
4065 | - * size before original memory map is |
4066 | - * reset. |
4067 | - */ |
4068 | - find_max_pfn(); |
4069 | - saved_max_pfn = max_pfn; |
4070 | -#endif |
4071 | - e820.nr_map = 0; |
4072 | - user_defined_memmap = 1; |
4073 | - } else { |
4074 | - /* If the user specifies memory size, we |
4075 | - * limit the BIOS-provided memory map to |
4076 | - * that size. exactmap can be used to specify |
4077 | - * the exact map. mem=number can be used to |
4078 | - * trim the existing memory map. |
4079 | - */ |
4080 | - unsigned long long start_at, mem_size; |
4081 | - |
4082 | - mem_size = memparse(arg, &arg); |
4083 | - if (*arg == '@') { |
4084 | - start_at = memparse(arg+1, &arg); |
4085 | - add_memory_region(start_at, mem_size, E820_RAM); |
4086 | - } else if (*arg == '#') { |
4087 | - start_at = memparse(arg+1, &arg); |
4088 | - add_memory_region(start_at, mem_size, E820_ACPI); |
4089 | - } else if (*arg == '$') { |
4090 | - start_at = memparse(arg+1, &arg); |
4091 | - add_memory_region(start_at, mem_size, E820_RESERVED); |
4092 | - } else { |
4093 | - limit_regions(mem_size); |
4094 | - user_defined_memmap = 1; |
4095 | - } |
4096 | - } |
4097 | - return 0; |
4098 | -} |
4099 | -early_param("memmap", parse_memmap); |
4100 | - |
4101 | #ifdef CONFIG_PROC_VMCORE |
4102 | /* elfcorehdr= specifies the location of elf core header |
4103 | * stored by the crashed kernel. |
4104 | @@ -894,127 +298,6 @@ |
4105 | #endif |
4106 | |
4107 | /* |
4108 | - * Callback for efi_memory_walk. |
4109 | - */ |
4110 | -static int __init |
4111 | -efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) |
4112 | -{ |
4113 | - unsigned long *max_pfn = arg, pfn; |
4114 | - |
4115 | - if (start < end) { |
4116 | - pfn = PFN_UP(end -1); |
4117 | - if (pfn > *max_pfn) |
4118 | - *max_pfn = pfn; |
4119 | - } |
4120 | - return 0; |
4121 | -} |
4122 | - |
4123 | -static int __init |
4124 | -efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) |
4125 | -{ |
4126 | - memory_present(0, PFN_UP(start), PFN_DOWN(end)); |
4127 | - return 0; |
4128 | -} |
4129 | - |
4130 | -/* |
4131 | - * This function checks if any part of the range <start,end> is mapped |
4132 | - * with type. |
4133 | - */ |
4134 | -int |
4135 | -e820_any_mapped(u64 start, u64 end, unsigned type) |
4136 | -{ |
4137 | - int i; |
4138 | - |
4139 | -#ifndef CONFIG_XEN |
4140 | - for (i = 0; i < e820.nr_map; i++) { |
4141 | - const struct e820entry *ei = &e820.map[i]; |
4142 | -#else |
4143 | - if (!is_initial_xendomain()) |
4144 | - return 0; |
4145 | - for (i = 0; i < machine_e820.nr_map; ++i) { |
4146 | - const struct e820entry *ei = &machine_e820.map[i]; |
4147 | -#endif |
4148 | - |
4149 | - if (type && ei->type != type) |
4150 | - continue; |
4151 | - if (ei->addr >= end || ei->addr + ei->size <= start) |
4152 | - continue; |
4153 | - return 1; |
4154 | - } |
4155 | - return 0; |
4156 | -} |
4157 | -EXPORT_SYMBOL_GPL(e820_any_mapped); |
4158 | - |
4159 | - /* |
4160 | - * This function checks if the entire range <start,end> is mapped with type. |
4161 | - * |
4162 | - * Note: this function only works correct if the e820 table is sorted and |
4163 | - * not-overlapping, which is the case |
4164 | - */ |
4165 | -int __init |
4166 | -e820_all_mapped(unsigned long s, unsigned long e, unsigned type) |
4167 | -{ |
4168 | - u64 start = s; |
4169 | - u64 end = e; |
4170 | - int i; |
4171 | - |
4172 | -#ifndef CONFIG_XEN |
4173 | - for (i = 0; i < e820.nr_map; i++) { |
4174 | - struct e820entry *ei = &e820.map[i]; |
4175 | -#else |
4176 | - if (!is_initial_xendomain()) |
4177 | - return 0; |
4178 | - for (i = 0; i < machine_e820.nr_map; ++i) { |
4179 | - const struct e820entry *ei = &machine_e820.map[i]; |
4180 | -#endif |
4181 | - if (type && ei->type != type) |
4182 | - continue; |
4183 | - /* is the region (part) in overlap with the current region ?*/ |
4184 | - if (ei->addr >= end || ei->addr + ei->size <= start) |
4185 | - continue; |
4186 | - /* if the region is at the beginning of <start,end> we move |
4187 | - * start to the end of the region since it's ok until there |
4188 | - */ |
4189 | - if (ei->addr <= start) |
4190 | - start = ei->addr + ei->size; |
4191 | - /* if start is now at or beyond end, we're done, full |
4192 | - * coverage */ |
4193 | - if (start >= end) |
4194 | - return 1; /* we're done */ |
4195 | - } |
4196 | - return 0; |
4197 | -} |
4198 | - |
4199 | -/* |
4200 | - * Find the highest page frame number we have available |
4201 | - */ |
4202 | -void __init find_max_pfn(void) |
4203 | -{ |
4204 | - int i; |
4205 | - |
4206 | - max_pfn = 0; |
4207 | - if (efi_enabled) { |
4208 | - efi_memmap_walk(efi_find_max_pfn, &max_pfn); |
4209 | - efi_memmap_walk(efi_memory_present_wrapper, NULL); |
4210 | - return; |
4211 | - } |
4212 | - |
4213 | - for (i = 0; i < e820.nr_map; i++) { |
4214 | - unsigned long start, end; |
4215 | - /* RAM? */ |
4216 | - if (e820.map[i].type != E820_RAM) |
4217 | - continue; |
4218 | - start = PFN_UP(e820.map[i].addr); |
4219 | - end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); |
4220 | - if (start >= end) |
4221 | - continue; |
4222 | - if (end > max_pfn) |
4223 | - max_pfn = end; |
4224 | - memory_present(0, start, end); |
4225 | - } |
4226 | -} |
4227 | - |
4228 | -/* |
4229 | * Determine low and high memory ranges: |
4230 | */ |
4231 | unsigned long __init find_max_low_pfn(void) |
4232 | @@ -1073,77 +356,6 @@ |
4233 | return max_low_pfn; |
4234 | } |
4235 | |
4236 | -/* |
4237 | - * Free all available memory for boot time allocation. Used |
4238 | - * as a callback function by efi_memory_walk() |
4239 | - */ |
4240 | - |
4241 | -static int __init |
4242 | -free_available_memory(unsigned long start, unsigned long end, void *arg) |
4243 | -{ |
4244 | - /* check max_low_pfn */ |
4245 | - if (start >= (max_low_pfn << PAGE_SHIFT)) |
4246 | - return 0; |
4247 | - if (end >= (max_low_pfn << PAGE_SHIFT)) |
4248 | - end = max_low_pfn << PAGE_SHIFT; |
4249 | - if (start < end) |
4250 | - free_bootmem(start, end - start); |
4251 | - |
4252 | - return 0; |
4253 | -} |
4254 | -/* |
4255 | - * Register fully available low RAM pages with the bootmem allocator. |
4256 | - */ |
4257 | -static void __init register_bootmem_low_pages(unsigned long max_low_pfn) |
4258 | -{ |
4259 | - int i; |
4260 | - |
4261 | - if (efi_enabled) { |
4262 | - efi_memmap_walk(free_available_memory, NULL); |
4263 | - return; |
4264 | - } |
4265 | - for (i = 0; i < e820.nr_map; i++) { |
4266 | - unsigned long curr_pfn, last_pfn, size; |
4267 | - /* |
4268 | - * Reserve usable low memory |
4269 | - */ |
4270 | - if (e820.map[i].type != E820_RAM) |
4271 | - continue; |
4272 | - /* |
4273 | - * We are rounding up the start address of usable memory: |
4274 | - */ |
4275 | - curr_pfn = PFN_UP(e820.map[i].addr); |
4276 | - if (curr_pfn >= max_low_pfn) |
4277 | - continue; |
4278 | - /* |
4279 | - * ... and at the end of the usable range downwards: |
4280 | - */ |
4281 | - last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); |
4282 | - |
4283 | -#ifdef CONFIG_XEN |
4284 | - /* |
4285 | - * Truncate to the number of actual pages currently |
4286 | - * present. |
4287 | - */ |
4288 | - if (last_pfn > xen_start_info->nr_pages) |
4289 | - last_pfn = xen_start_info->nr_pages; |
4290 | -#endif |
4291 | - |
4292 | - if (last_pfn > max_low_pfn) |
4293 | - last_pfn = max_low_pfn; |
4294 | - |
4295 | - /* |
4296 | - * .. finally, did all the rounding and playing |
4297 | - * around just make the area go away? |
4298 | - */ |
4299 | - if (last_pfn <= curr_pfn) |
4300 | - continue; |
4301 | - |
4302 | - size = last_pfn - curr_pfn; |
4303 | - free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); |
4304 | - } |
4305 | -} |
4306 | - |
4307 | #ifndef CONFIG_XEN |
4308 | /* |
4309 | * workaround for Dell systems that neglect to reserve EBDA |
4310 | @@ -1233,8 +445,8 @@ |
4311 | * the (very unlikely) case of us accidentally initializing the |
4312 | * bootmem allocator with an invalid RAM area. |
4313 | */ |
4314 | - reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) + |
4315 | - bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START)); |
4316 | + reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) + |
4317 | + bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text)); |
4318 | |
4319 | #ifndef CONFIG_XEN |
4320 | /* |
4321 | @@ -1316,170 +528,6 @@ |
4322 | } |
4323 | } |
4324 | |
4325 | -/* |
4326 | - * Request address space for all standard RAM and ROM resources |
4327 | - * and also for regions reported as reserved by the e820. |
4328 | - */ |
4329 | -static void __init |
4330 | -legacy_init_iomem_resources(struct e820entry *e820, int nr_map, |
4331 | - struct resource *code_resource, |
4332 | - struct resource *data_resource) |
4333 | -{ |
4334 | - int i; |
4335 | - |
4336 | - probe_roms(); |
4337 | - |
4338 | - for (i = 0; i < nr_map; i++) { |
4339 | - struct resource *res; |
4340 | -#ifndef CONFIG_RESOURCES_64BIT |
4341 | - if (e820[i].addr + e820[i].size > 0x100000000ULL) |
4342 | - continue; |
4343 | -#endif |
4344 | - res = kzalloc(sizeof(struct resource), GFP_ATOMIC); |
4345 | - switch (e820[i].type) { |
4346 | - case E820_RAM: res->name = "System RAM"; break; |
4347 | - case E820_ACPI: res->name = "ACPI Tables"; break; |
4348 | - case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; |
4349 | - default: res->name = "reserved"; |
4350 | - } |
4351 | - res->start = e820[i].addr; |
4352 | - res->end = res->start + e820[i].size - 1; |
4353 | - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; |
4354 | - if (request_resource(&iomem_resource, res)) { |
4355 | - kfree(res); |
4356 | - continue; |
4357 | - } |
4358 | - if (e820[i].type == E820_RAM) { |
4359 | - /* |
4360 | - * We don't know which RAM region contains kernel data, |
4361 | - * so we try it repeatedly and let the resource manager |
4362 | - * test it. |
4363 | - */ |
4364 | -#ifndef CONFIG_XEN |
4365 | - request_resource(res, code_resource); |
4366 | - request_resource(res, data_resource); |
4367 | -#endif |
4368 | -#ifdef CONFIG_KEXEC |
4369 | - if (crashk_res.start != crashk_res.end) |
4370 | - request_resource(res, &crashk_res); |
4371 | -#ifdef CONFIG_XEN |
4372 | - xen_machine_kexec_register_resources(res); |
4373 | -#endif |
4374 | -#endif |
4375 | - } |
4376 | - } |
4377 | -} |
4378 | - |
4379 | -/* |
4380 | - * Locate a unused range of the physical address space below 4G which |
4381 | - * can be used for PCI mappings. |
4382 | - */ |
4383 | -static void __init |
4384 | -e820_setup_gap(struct e820entry *e820, int nr_map) |
4385 | -{ |
4386 | - unsigned long gapstart, gapsize, round; |
4387 | - unsigned long long last; |
4388 | - int i; |
4389 | - |
4390 | - /* |
4391 | - * Search for the bigest gap in the low 32 bits of the e820 |
4392 | - * memory space. |
4393 | - */ |
4394 | - last = 0x100000000ull; |
4395 | - gapstart = 0x10000000; |
4396 | - gapsize = 0x400000; |
4397 | - i = nr_map; |
4398 | - while (--i >= 0) { |
4399 | - unsigned long long start = e820[i].addr; |
4400 | - unsigned long long end = start + e820[i].size; |
4401 | - |
4402 | - /* |
4403 | - * Since "last" is at most 4GB, we know we'll |
4404 | - * fit in 32 bits if this condition is true |
4405 | - */ |
4406 | - if (last > end) { |
4407 | - unsigned long gap = last - end; |
4408 | - |
4409 | - if (gap > gapsize) { |
4410 | - gapsize = gap; |
4411 | - gapstart = end; |
4412 | - } |
4413 | - } |
4414 | - if (start < last) |
4415 | - last = start; |
4416 | - } |
4417 | - |
4418 | - /* |
4419 | - * See how much we want to round up: start off with |
4420 | - * rounding to the next 1MB area. |
4421 | - */ |
4422 | - round = 0x100000; |
4423 | - while ((gapsize >> 4) > round) |
4424 | - round += round; |
4425 | - /* Fun with two's complement */ |
4426 | - pci_mem_start = (gapstart + round) & -round; |
4427 | - |
4428 | - printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", |
4429 | - pci_mem_start, gapstart, gapsize); |
4430 | -} |
4431 | - |
4432 | -/* |
4433 | - * Request address space for all standard resources |
4434 | - * |
4435 | - * This is called just before pcibios_init(), which is also a |
4436 | - * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). |
4437 | - */ |
4438 | -static int __init request_standard_resources(void) |
4439 | -{ |
4440 | - int i; |
4441 | - |
4442 | - /* Nothing to do if not running in dom0. */ |
4443 | - if (!is_initial_xendomain()) |
4444 | - return 0; |
4445 | - |
4446 | - printk("Setting up standard PCI resources\n"); |
4447 | -#ifdef CONFIG_XEN |
4448 | - legacy_init_iomem_resources(machine_e820.map, machine_e820.nr_map, |
4449 | - &code_resource, &data_resource); |
4450 | -#else |
4451 | - if (efi_enabled) |
4452 | - efi_initialize_iomem_resources(&code_resource, &data_resource); |
4453 | - else |
4454 | - legacy_init_iomem_resources(e820.map, e820.nr_map, |
4455 | - &code_resource, &data_resource); |
4456 | -#endif |
4457 | - |
4458 | - /* EFI systems may still have VGA */ |
4459 | - request_resource(&iomem_resource, &video_ram_resource); |
4460 | - |
4461 | - /* request I/O space for devices used on all i[345]86 PCs */ |
4462 | - for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) |
4463 | - request_resource(&ioport_resource, &standard_io_resources[i]); |
4464 | - return 0; |
4465 | -} |
4466 | - |
4467 | -subsys_initcall(request_standard_resources); |
4468 | - |
4469 | -static void __init register_memory(void) |
4470 | -{ |
4471 | -#ifdef CONFIG_XEN |
4472 | - if (is_initial_xendomain()) { |
4473 | - struct xen_memory_map memmap; |
4474 | - |
4475 | - memmap.nr_entries = E820MAX; |
4476 | - set_xen_guest_handle(memmap.buffer, machine_e820.map); |
4477 | - |
4478 | - if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)) |
4479 | - BUG(); |
4480 | - |
4481 | - machine_e820.nr_map = memmap.nr_entries; |
4482 | - e820_setup_gap(machine_e820.map, machine_e820.nr_map); |
4483 | - } |
4484 | - else |
4485 | -#endif |
4486 | - e820_setup_gap(e820.map, e820.nr_map); |
4487 | -} |
4488 | - |
4489 | #ifdef CONFIG_MCA |
4490 | static void set_mca_bus(int x) |
4491 | { |
4492 | @@ -1489,6 +537,12 @@ |
4493 | static void set_mca_bus(int x) { } |
4494 | #endif |
4495 | |
4496 | +/* Overridden in paravirt.c if CONFIG_PARAVIRT */ |
4497 | +char * __init __attribute__((weak)) memory_setup(void) |
4498 | +{ |
4499 | + return machine_specific_memory_setup(); |
4500 | +} |
4501 | + |
4502 | /* |
4503 | * Determine if we were loaded by an EFI loader. If so, then we have also been |
4504 | * passed the efi memmap, systab, etc., so we should use these data structures |
4505 | @@ -1576,7 +630,7 @@ |
4506 | efi_init(); |
4507 | else { |
4508 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); |
4509 | - print_memory_map(machine_specific_memory_setup()); |
4510 | + print_memory_map(memory_setup()); |
4511 | } |
4512 | |
4513 | copy_edd(); |
4514 | @@ -1755,7 +809,7 @@ |
4515 | get_smp_config(); |
4516 | #endif |
4517 | |
4518 | - register_memory(); |
4519 | + e820_register_memory(); |
4520 | |
4521 | if (is_initial_xendomain()) { |
4522 | #ifdef CONFIG_VT |
4523 | --- a/arch/x86/kernel/setup_64-xen.c |
4524 | +++ b/arch/x86/kernel/setup_64-xen.c |
4525 | @@ -576,8 +576,7 @@ |
4526 | if (LOADER_TYPE && INITRD_START) { |
4527 | if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { |
4528 | reserve_bootmem_generic(INITRD_START, INITRD_SIZE); |
4529 | - initrd_start = |
4530 | - INITRD_START ? INITRD_START + PAGE_OFFSET : 0; |
4531 | + initrd_start = INITRD_START + PAGE_OFFSET; |
4532 | initrd_end = initrd_start+INITRD_SIZE; |
4533 | } |
4534 | else { |
4535 | @@ -1003,11 +1002,8 @@ |
4536 | /* Fix cpuid4 emulation for more */ |
4537 | num_cache_leaves = 3; |
4538 | |
4539 | - /* When there is only one core no need to synchronize RDTSC */ |
4540 | - if (num_possible_cpus() == 1) |
4541 | - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); |
4542 | - else |
4543 | - clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); |
4544 | + /* RDTSC can be speculated around */ |
4545 | + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); |
4546 | } |
4547 | |
4548 | static void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
4549 | @@ -1106,6 +1102,15 @@ |
4550 | set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability); |
4551 | } |
4552 | |
4553 | + if (cpu_has_ds) { |
4554 | + unsigned int l1, l2; |
4555 | + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); |
4556 | + if (!(l1 & (1<<11))) |
4557 | + set_bit(X86_FEATURE_BTS, c->x86_capability); |
4558 | + if (!(l1 & (1<<12))) |
4559 | + set_bit(X86_FEATURE_PEBS, c->x86_capability); |
4560 | + } |
4561 | + |
4562 | n = c->extended_cpuid_level; |
4563 | if (n >= 0x80000008) { |
4564 | unsigned eax = cpuid_eax(0x80000008); |
4565 | @@ -1125,7 +1130,10 @@ |
4566 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); |
4567 | if (c->x86 == 6) |
4568 | set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); |
4569 | - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); |
4570 | + if (c->x86 == 15) |
4571 | + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); |
4572 | + else |
4573 | + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); |
4574 | c->x86_max_cores = intel_num_cpu_cores(c); |
4575 | |
4576 | srat_detect_node(); |
4577 | --- a/arch/x86/kernel/smp_32-xen.c |
4578 | +++ b/arch/x86/kernel/smp_32-xen.c |
4579 | @@ -659,6 +659,10 @@ |
4580 | put_cpu(); |
4581 | return -EBUSY; |
4582 | } |
4583 | + |
4584 | + /* Can deadlock when called with interrupts disabled */ |
4585 | + WARN_ON(irqs_disabled()); |
4586 | + |
4587 | spin_lock_bh(&call_lock); |
4588 | __smp_call_function_single(cpu, func, info, nonatomic, wait); |
4589 | spin_unlock_bh(&call_lock); |
4590 | --- a/arch/x86/kernel/smp_64-xen.c |
4591 | +++ b/arch/x86/kernel/smp_64-xen.c |
4592 | @@ -384,12 +384,17 @@ |
4593 | put_cpu(); |
4594 | return 0; |
4595 | } |
4596 | + |
4597 | + /* Can deadlock when called with interrupts disabled */ |
4598 | + WARN_ON(irqs_disabled()); |
4599 | + |
4600 | spin_lock_bh(&call_lock); |
4601 | __smp_call_function_single(cpu, func, info, nonatomic, wait); |
4602 | spin_unlock_bh(&call_lock); |
4603 | put_cpu(); |
4604 | return 0; |
4605 | } |
4606 | +EXPORT_SYMBOL(smp_call_function_single); |
4607 | |
4608 | /* |
4609 | * this function sends a 'generic call function' IPI to all other CPUs |
4610 | --- a/arch/x86/kernel/time_32-xen.c |
4611 | +++ b/arch/x86/kernel/time_32-xen.c |
4612 | @@ -61,6 +61,7 @@ |
4613 | #include <asm/uaccess.h> |
4614 | #include <asm/processor.h> |
4615 | #include <asm/timer.h> |
4616 | +#include <asm/time.h> |
4617 | #include <asm/sections.h> |
4618 | |
4619 | #include "mach_time.h" |
4620 | @@ -129,11 +130,11 @@ |
4621 | /* Must be signed, as it's compared with s64 quantities which can be -ve. */ |
4622 | #define NS_PER_TICK (1000000000LL/HZ) |
4623 | |
4624 | -static void __clock_was_set(void *unused) |
4625 | +static void __clock_was_set(struct work_struct *unused) |
4626 | { |
4627 | clock_was_set(); |
4628 | } |
4629 | -static DECLARE_WORK(clock_was_set_work, __clock_was_set, NULL); |
4630 | +static DECLARE_WORK(clock_was_set_work, __clock_was_set); |
4631 | |
4632 | static inline void __normalize_time(time_t *sec, s64 *nsec) |
4633 | { |
4634 | @@ -537,10 +538,7 @@ |
4635 | /* gets recalled with irq locally disabled */ |
4636 | /* XXX - does irqsave resolve this? -johnstul */ |
4637 | spin_lock_irqsave(&rtc_lock, flags); |
4638 | - if (efi_enabled) |
4639 | - retval = efi_set_rtc_mmss(nowtime); |
4640 | - else |
4641 | - retval = mach_set_rtc_mmss(nowtime); |
4642 | + retval = set_wallclock(nowtime); |
4643 | spin_unlock_irqrestore(&rtc_lock, flags); |
4644 | |
4645 | return retval; |
4646 | @@ -865,10 +863,7 @@ |
4647 | |
4648 | spin_lock_irqsave(&rtc_lock, flags); |
4649 | |
4650 | - if (efi_enabled) |
4651 | - retval = efi_get_time(); |
4652 | - else |
4653 | - retval = mach_get_cmos_time(); |
4654 | + retval = get_wallclock(); |
4655 | |
4656 | spin_unlock_irqrestore(&rtc_lock, flags); |
4657 | |
4658 | @@ -970,7 +965,7 @@ |
4659 | printk("Using HPET for base-timer\n"); |
4660 | } |
4661 | |
4662 | - time_init_hook(); |
4663 | + do_time_init(); |
4664 | } |
4665 | #endif |
4666 | |
4667 | --- a/arch/x86/kernel/traps_32-xen.c |
4668 | +++ b/arch/x86/kernel/traps_32-xen.c |
4669 | @@ -29,6 +29,8 @@ |
4670 | #include <linux/kexec.h> |
4671 | #include <linux/unwind.h> |
4672 | #include <linux/uaccess.h> |
4673 | +#include <linux/nmi.h> |
4674 | +#include <linux/bug.h> |
4675 | |
4676 | #ifdef CONFIG_EISA |
4677 | #include <linux/ioport.h> |
4678 | @@ -61,9 +63,6 @@ |
4679 | |
4680 | asmlinkage int system_call(void); |
4681 | |
4682 | -struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, |
4683 | - { 0, 0 }, { 0, 0 } }; |
4684 | - |
4685 | /* Do we ignore FPU interrupts ? */ |
4686 | char ignore_fpu_irq = 0; |
4687 | |
4688 | @@ -100,12 +99,7 @@ |
4689 | #endif |
4690 | asmlinkage void machine_check(void); |
4691 | |
4692 | -static int kstack_depth_to_print = 24; |
4693 | -#ifdef CONFIG_STACK_UNWIND |
4694 | -static int call_trace = 1; |
4695 | -#else |
4696 | -#define call_trace (-1) |
4697 | -#endif |
4698 | +int kstack_depth_to_print = 24; |
4699 | ATOMIC_NOTIFIER_HEAD(i386die_chain); |
4700 | |
4701 | int register_die_notifier(struct notifier_block *nb) |
4702 | @@ -159,25 +153,7 @@ |
4703 | return ebp; |
4704 | } |
4705 | |
4706 | -struct ops_and_data { |
4707 | - struct stacktrace_ops *ops; |
4708 | - void *data; |
4709 | -}; |
4710 | - |
4711 | -static asmlinkage int |
4712 | -dump_trace_unwind(struct unwind_frame_info *info, void *data) |
4713 | -{ |
4714 | - struct ops_and_data *oad = (struct ops_and_data *)data; |
4715 | - int n = 0; |
4716 | - |
4717 | - while (unwind(info) == 0 && UNW_PC(info)) { |
4718 | - n++; |
4719 | - oad->ops->address(oad->data, UNW_PC(info)); |
4720 | - if (arch_unw_user_mode(info)) |
4721 | - break; |
4722 | - } |
4723 | - return n; |
4724 | -} |
4725 | +#define MSG(msg) ops->warning(data, msg) |
4726 | |
4727 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
4728 | unsigned long *stack, |
4729 | @@ -188,39 +164,6 @@ |
4730 | if (!task) |
4731 | task = current; |
4732 | |
4733 | - if (call_trace >= 0) { |
4734 | - int unw_ret = 0; |
4735 | - struct unwind_frame_info info; |
4736 | - struct ops_and_data oad = { .ops = ops, .data = data }; |
4737 | - |
4738 | - if (regs) { |
4739 | - if (unwind_init_frame_info(&info, task, regs) == 0) |
4740 | - unw_ret = dump_trace_unwind(&info, &oad); |
4741 | - } else if (task == current) |
4742 | - unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); |
4743 | - else { |
4744 | - if (unwind_init_blocked(&info, task) == 0) |
4745 | - unw_ret = dump_trace_unwind(&info, &oad); |
4746 | - } |
4747 | - if (unw_ret > 0) { |
4748 | - if (call_trace == 1 && !arch_unw_user_mode(&info)) { |
4749 | - ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", |
4750 | - UNW_PC(&info)); |
4751 | - if (UNW_SP(&info) >= PAGE_OFFSET) { |
4752 | - ops->warning(data, "Leftover inexact backtrace:\n"); |
4753 | - stack = (void *)UNW_SP(&info); |
4754 | - if (!stack) |
4755 | - return; |
4756 | - ebp = UNW_FP(&info); |
4757 | - } else |
4758 | - ops->warning(data, "Full inexact backtrace again:\n"); |
4759 | - } else if (call_trace >= 1) |
4760 | - return; |
4761 | - else |
4762 | - ops->warning(data, "Full inexact backtrace again:\n"); |
4763 | - } else |
4764 | - ops->warning(data, "Inexact backtrace:\n"); |
4765 | - } |
4766 | if (!stack) { |
4767 | unsigned long dummy; |
4768 | stack = &dummy; |
4769 | @@ -253,6 +196,7 @@ |
4770 | stack = (unsigned long*)context->previous_esp; |
4771 | if (!stack) |
4772 | break; |
4773 | + touch_nmi_watchdog(); |
4774 | } |
4775 | } |
4776 | EXPORT_SYMBOL(dump_trace); |
4777 | @@ -385,7 +329,7 @@ |
4778 | * time of the fault.. |
4779 | */ |
4780 | if (in_kernel) { |
4781 | - u8 __user *eip; |
4782 | + u8 *eip; |
4783 | int code_bytes = 64; |
4784 | unsigned char c; |
4785 | |
4786 | @@ -394,18 +338,20 @@ |
4787 | |
4788 | printk(KERN_EMERG "Code: "); |
4789 | |
4790 | - eip = (u8 __user *)regs->eip - 43; |
4791 | - if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { |
4792 | + eip = (u8 *)regs->eip - 43; |
4793 | + if (eip < (u8 *)PAGE_OFFSET || |
4794 | + probe_kernel_address(eip, c)) { |
4795 | /* try starting at EIP */ |
4796 | - eip = (u8 __user *)regs->eip; |
4797 | + eip = (u8 *)regs->eip; |
4798 | code_bytes = 32; |
4799 | } |
4800 | for (i = 0; i < code_bytes; i++, eip++) { |
4801 | - if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { |
4802 | + if (eip < (u8 *)PAGE_OFFSET || |
4803 | + probe_kernel_address(eip, c)) { |
4804 | printk(" Bad EIP value."); |
4805 | break; |
4806 | } |
4807 | - if (eip == (u8 __user *)regs->eip) |
4808 | + if (eip == (u8 *)regs->eip) |
4809 | printk("<%02x> ", c); |
4810 | else |
4811 | printk("%02x ", c); |
4812 | @@ -414,43 +360,22 @@ |
4813 | printk("\n"); |
4814 | } |
4815 | |
4816 | -static void handle_BUG(struct pt_regs *regs) |
4817 | +int is_valid_bugaddr(unsigned long eip) |
4818 | { |
4819 | - unsigned long eip = regs->eip; |
4820 | unsigned short ud2; |
4821 | |
4822 | if (eip < PAGE_OFFSET) |
4823 | - return; |
4824 | - if (probe_kernel_address((unsigned short __user *)eip, ud2)) |
4825 | - return; |
4826 | - if (ud2 != 0x0b0f) |
4827 | - return; |
4828 | + return 0; |
4829 | + if (probe_kernel_address((unsigned short *)eip, ud2)) |
4830 | + return 0; |
4831 | |
4832 | - printk(KERN_EMERG "------------[ cut here ]------------\n"); |
4833 | - |
4834 | -#ifdef CONFIG_DEBUG_BUGVERBOSE |
4835 | - do { |
4836 | - unsigned short line; |
4837 | - char *file; |
4838 | - char c; |
4839 | - |
4840 | - if (probe_kernel_address((unsigned short __user *)(eip + 2), |
4841 | - line)) |
4842 | - break; |
4843 | - if (__get_user(file, (char * __user *)(eip + 4)) || |
4844 | - (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) |
4845 | - file = "<bad filename>"; |
4846 | - |
4847 | - printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line); |
4848 | - return; |
4849 | - } while (0); |
4850 | -#endif |
4851 | - printk(KERN_EMERG "Kernel BUG at [verbose debug info unavailable]\n"); |
4852 | + return ud2 == 0x0b0f; |
4853 | } |
4854 | |
4855 | -/* This is gone through when something in the kernel |
4856 | - * has done something bad and is about to be terminated. |
4857 | -*/ |
4858 | +/* |
4859 | + * This is gone through when something in the kernel has done something bad and |
4860 | + * is about to be terminated. |
4861 | + */ |
4862 | void die(const char * str, struct pt_regs * regs, long err) |
4863 | { |
4864 | static struct { |
4865 | @@ -458,7 +383,7 @@ |
4866 | u32 lock_owner; |
4867 | int lock_owner_depth; |
4868 | } die = { |
4869 | - .lock = SPIN_LOCK_UNLOCKED, |
4870 | + .lock = __SPIN_LOCK_UNLOCKED(die.lock), |
4871 | .lock_owner = -1, |
4872 | .lock_owner_depth = 0 |
4873 | }; |
4874 | @@ -482,7 +407,8 @@ |
4875 | unsigned long esp; |
4876 | unsigned short ss; |
4877 | |
4878 | - handle_BUG(regs); |
4879 | + report_bug(regs->eip); |
4880 | + |
4881 | printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); |
4882 | #ifdef CONFIG_PREEMPT |
4883 | printk(KERN_EMERG "PREEMPT "); |
4884 | @@ -682,8 +608,7 @@ |
4885 | { |
4886 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " |
4887 | "CPU %d.\n", reason, smp_processor_id()); |
4888 | - printk(KERN_EMERG "You probably have a hardware problem with your RAM " |
4889 | - "chips\n"); |
4890 | + printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); |
4891 | if (panic_on_unrecovered_nmi) |
4892 | panic("NMI: Not continuing"); |
4893 | |
4894 | @@ -741,7 +666,6 @@ |
4895 | printk(" on CPU%d, eip %08lx, registers:\n", |
4896 | smp_processor_id(), regs->eip); |
4897 | show_registers(regs); |
4898 | - printk(KERN_EMERG "console shuts up ...\n"); |
4899 | console_silent(); |
4900 | spin_unlock(&nmi_print_lock); |
4901 | bust_spinlocks(0); |
4902 | @@ -1057,49 +981,24 @@ |
4903 | #endif |
4904 | } |
4905 | |
4906 | -fastcall void setup_x86_bogus_stack(unsigned char * stk) |
4907 | +fastcall unsigned long patch_espfix_desc(unsigned long uesp, |
4908 | + unsigned long kesp) |
4909 | { |
4910 | - unsigned long *switch16_ptr, *switch32_ptr; |
4911 | - struct pt_regs *regs; |
4912 | - unsigned long stack_top, stack_bot; |
4913 | - unsigned short iret_frame16_off; |
4914 | - int cpu = smp_processor_id(); |
4915 | - /* reserve the space on 32bit stack for the magic switch16 pointer */ |
4916 | - memmove(stk, stk + 8, sizeof(struct pt_regs)); |
4917 | - switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs)); |
4918 | - regs = (struct pt_regs *)stk; |
4919 | - /* now the switch32 on 16bit stack */ |
4920 | - stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); |
4921 | - stack_top = stack_bot + CPU_16BIT_STACK_SIZE; |
4922 | - switch32_ptr = (unsigned long *)(stack_top - 8); |
4923 | - iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20; |
4924 | - /* copy iret frame on 16bit stack */ |
4925 | - memcpy((void *)(stack_bot + iret_frame16_off), ®s->eip, 20); |
4926 | - /* fill in the switch pointers */ |
4927 | - switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off; |
4928 | - switch16_ptr[1] = __ESPFIX_SS; |
4929 | - switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) + |
4930 | - 8 - CPU_16BIT_STACK_SIZE; |
4931 | - switch32_ptr[1] = __KERNEL_DS; |
4932 | -} |
4933 | - |
4934 | -fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp) |
4935 | -{ |
4936 | - unsigned long *switch32_ptr; |
4937 | - unsigned char *stack16, *stack32; |
4938 | - unsigned long stack_top, stack_bot; |
4939 | - int len; |
4940 | int cpu = smp_processor_id(); |
4941 | - stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); |
4942 | - stack_top = stack_bot + CPU_16BIT_STACK_SIZE; |
4943 | - switch32_ptr = (unsigned long *)(stack_top - 8); |
4944 | - /* copy the data from 16bit stack to 32bit stack */ |
4945 | - len = CPU_16BIT_STACK_SIZE - 8 - sp; |
4946 | - stack16 = (unsigned char *)(stack_bot + sp); |
4947 | - stack32 = (unsigned char *) |
4948 | - (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len); |
4949 | - memcpy(stack32, stack16, len); |
4950 | - return stack32; |
4951 | + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); |
4952 | + struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address; |
4953 | + unsigned long base = (kesp - uesp) & -THREAD_SIZE; |
4954 | + unsigned long new_kesp = kesp - base; |
4955 | + unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; |
4956 | + __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; |
4957 | + /* Set up base for espfix segment */ |
4958 | + desc &= 0x00f0ff0000000000ULL; |
4959 | + desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | |
4960 | + ((((__u64)base) << 32) & 0xff00000000000000ULL) | |
4961 | + ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | |
4962 | + (lim_pages & 0xffff); |
4963 | + *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; |
4964 | + return new_kesp; |
4965 | } |
4966 | #endif |
4967 | |
4968 | @@ -1113,7 +1012,7 @@ |
4969 | * Must be called with kernel preemption disabled (in this case, |
4970 | * local interrupts are disabled at the call-site in entry.S). |
4971 | */ |
4972 | -asmlinkage void math_state_restore(struct pt_regs regs) |
4973 | +asmlinkage void math_state_restore(void) |
4974 | { |
4975 | struct thread_info *thread = current_thread_info(); |
4976 | struct task_struct *tsk = thread->task; |
4977 | @@ -1123,6 +1022,7 @@ |
4978 | init_fpu(tsk); |
4979 | restore_fpu(tsk); |
4980 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ |
4981 | + tsk->fpu_counter++; |
4982 | } |
4983 | |
4984 | #ifndef CONFIG_MATH_EMULATION |
4985 | @@ -1234,19 +1134,3 @@ |
4986 | return 1; |
4987 | } |
4988 | __setup("kstack=", kstack_setup); |
4989 | - |
4990 | -#ifdef CONFIG_STACK_UNWIND |
4991 | -static int __init call_trace_setup(char *s) |
4992 | -{ |
4993 | - if (strcmp(s, "old") == 0) |
4994 | - call_trace = -1; |
4995 | - else if (strcmp(s, "both") == 0) |
4996 | - call_trace = 0; |
4997 | - else if (strcmp(s, "newfallback") == 0) |
4998 | - call_trace = 1; |
4999 | - else if (strcmp(s, "new") == 2) |
5000 | - call_trace = 2; |
5001 | - return 1; |
5002 | -} |
5003 | -__setup("call_trace=", call_trace_setup); |
5004 | -#endif |
5005 | --- a/arch/x86/kernel/traps_64-xen.c |
5006 | +++ b/arch/x86/kernel/traps_64-xen.c |
5007 | @@ -30,9 +30,10 @@ |
5008 | #include <linux/kprobes.h> |
5009 | #include <linux/kexec.h> |
5010 | #include <linux/unwind.h> |
5011 | +#include <linux/uaccess.h> |
5012 | +#include <linux/bug.h> |
5013 | |
5014 | #include <asm/system.h> |
5015 | -#include <asm/uaccess.h> |
5016 | #include <asm/io.h> |
5017 | #include <asm/atomic.h> |
5018 | #include <asm/debugreg.h> |
5019 | @@ -108,12 +109,7 @@ |
5020 | preempt_enable_no_resched(); |
5021 | } |
5022 | |
5023 | -static int kstack_depth_to_print = 12; |
5024 | -#ifdef CONFIG_STACK_UNWIND |
5025 | -static int call_trace = 1; |
5026 | -#else |
5027 | -#define call_trace (-1) |
5028 | -#endif |
5029 | +int kstack_depth_to_print = 12; |
5030 | |
5031 | #ifdef CONFIG_KALLSYMS |
5032 | void printk_address(unsigned long address) |
5033 | @@ -218,24 +214,7 @@ |
5034 | return NULL; |
5035 | } |
5036 | |
5037 | -struct ops_and_data { |
5038 | - struct stacktrace_ops *ops; |
5039 | - void *data; |
5040 | -}; |
5041 | - |
5042 | -static int dump_trace_unwind(struct unwind_frame_info *info, void *context) |
5043 | -{ |
5044 | - struct ops_and_data *oad = (struct ops_and_data *)context; |
5045 | - int n = 0; |
5046 | - |
5047 | - while (unwind(info) == 0 && UNW_PC(info)) { |
5048 | - n++; |
5049 | - oad->ops->address(oad->data, UNW_PC(info)); |
5050 | - if (arch_unw_user_mode(info)) |
5051 | - break; |
5052 | - } |
5053 | - return n; |
5054 | -} |
5055 | +#define MSG(txt) ops->warning(data, txt) |
5056 | |
5057 | /* |
5058 | * x86-64 can have upto three kernel stacks: |
5059 | @@ -250,61 +229,24 @@ |
5060 | return p > t && p < t + THREAD_SIZE - 3; |
5061 | } |
5062 | |
5063 | -void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack, |
5064 | +void dump_trace(struct task_struct *tsk, struct pt_regs *regs, |
5065 | + unsigned long *stack, |
5066 | struct stacktrace_ops *ops, void *data) |
5067 | { |
5068 | - const unsigned cpu = smp_processor_id(); |
5069 | - unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; |
5070 | + const unsigned cpu = get_cpu(); |
5071 | + unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; |
5072 | unsigned used = 0; |
5073 | struct thread_info *tinfo; |
5074 | |
5075 | if (!tsk) |
5076 | tsk = current; |
5077 | |
5078 | - if (call_trace >= 0) { |
5079 | - int unw_ret = 0; |
5080 | - struct unwind_frame_info info; |
5081 | - struct ops_and_data oad = { .ops = ops, .data = data }; |
5082 | - |
5083 | - if (regs) { |
5084 | - if (unwind_init_frame_info(&info, tsk, regs) == 0) |
5085 | - unw_ret = dump_trace_unwind(&info, &oad); |
5086 | - } else if (tsk == current) |
5087 | - unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); |
5088 | - else { |
5089 | - if (unwind_init_blocked(&info, tsk) == 0) |
5090 | - unw_ret = dump_trace_unwind(&info, &oad); |
5091 | - } |
5092 | - if (unw_ret > 0) { |
5093 | - if (call_trace == 1 && !arch_unw_user_mode(&info)) { |
5094 | - ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", |
5095 | - UNW_PC(&info)); |
5096 | - if ((long)UNW_SP(&info) < 0) { |
5097 | - ops->warning(data, "Leftover inexact backtrace:\n"); |
5098 | - stack = (unsigned long *)UNW_SP(&info); |
5099 | - if (!stack) |
5100 | - return; |
5101 | - } else |
5102 | - ops->warning(data, "Full inexact backtrace again:\n"); |
5103 | - } else if (call_trace >= 1) |
5104 | - return; |
5105 | - else |
5106 | - ops->warning(data, "Full inexact backtrace again:\n"); |
5107 | - } else |
5108 | - ops->warning(data, "Inexact backtrace:\n"); |
5109 | - } |
5110 | if (!stack) { |
5111 | unsigned long dummy; |
5112 | stack = &dummy; |
5113 | if (tsk && tsk != current) |
5114 | stack = (unsigned long *)tsk->thread.rsp; |
5115 | } |
5116 | - /* |
5117 | - * Align the stack pointer on word boundary, later loops |
5118 | - * rely on that (and corruption / debug info bugs can cause |
5119 | - * unaligned values here): |
5120 | - */ |
5121 | - stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1)); |
5122 | |
5123 | /* |
5124 | * Print function call entries within a stack. 'cond' is the |
5125 | @@ -314,9 +256,9 @@ |
5126 | #define HANDLE_STACK(cond) \ |
5127 | do while (cond) { \ |
5128 | unsigned long addr = *stack++; \ |
5129 | - if (oops_in_progress ? \ |
5130 | - __kernel_text_address(addr) : \ |
5131 | - kernel_text_address(addr)) { \ |
5132 | + /* Use unlocked access here because except for NMIs \ |
5133 | + we should be already protected against module unloads */ \ |
5134 | + if (__kernel_text_address(addr)) { \ |
5135 | /* \ |
5136 | * If the address is either in the text segment of the \ |
5137 | * kernel, or in the region which contains vmalloc'ed \ |
5138 | @@ -379,9 +321,10 @@ |
5139 | /* |
5140 | * This handles the process stack: |
5141 | */ |
5142 | - tinfo = current_thread_info(); |
5143 | + tinfo = task_thread_info(tsk); |
5144 | HANDLE_STACK (valid_stack_ptr(tinfo, stack)); |
5145 | #undef HANDLE_STACK |
5146 | + put_cpu(); |
5147 | } |
5148 | EXPORT_SYMBOL(dump_trace); |
5149 | |
5150 | @@ -518,30 +461,15 @@ |
5151 | printk("\n"); |
5152 | } |
5153 | |
5154 | -void handle_BUG(struct pt_regs *regs) |
5155 | -{ |
5156 | - struct bug_frame f; |
5157 | - long len; |
5158 | - const char *prefix = ""; |
5159 | +int is_valid_bugaddr(unsigned long rip) |
5160 | +{ |
5161 | + unsigned short ud2; |
5162 | |
5163 | - if (user_mode(regs)) |
5164 | - return; |
5165 | - if (__copy_from_user(&f, (const void __user *) regs->rip, |
5166 | - sizeof(struct bug_frame))) |
5167 | - return; |
5168 | - if (f.filename >= 0 || |
5169 | - f.ud2[0] != 0x0f || f.ud2[1] != 0x0b) |
5170 | - return; |
5171 | - len = __strnlen_user((char *)(long)f.filename, PATH_MAX) - 1; |
5172 | - if (len < 0 || len >= PATH_MAX) |
5173 | - f.filename = (int)(long)"unmapped filename"; |
5174 | - else if (len > 50) { |
5175 | - f.filename += len - 50; |
5176 | - prefix = "..."; |
5177 | - } |
5178 | - printk("----------- [cut here ] --------- [please bite here ] ---------\n"); |
5179 | - printk(KERN_ALERT "Kernel BUG at %s%.50s:%d\n", prefix, (char *)(long)f.filename, f.line); |
5180 | -} |
5181 | + if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2))) |
5182 | + return 0; |
5183 | + |
5184 | + return ud2 == 0x0b0f; |
5185 | +} |
5186 | |
5187 | #ifdef CONFIG_BUG |
5188 | void out_of_line_bug(void) |
5189 | @@ -621,7 +549,9 @@ |
5190 | { |
5191 | unsigned long flags = oops_begin(); |
5192 | |
5193 | - handle_BUG(regs); |
5194 | + if (!user_mode(regs)) |
5195 | + report_bug(regs->rip); |
5196 | + |
5197 | __die(str, regs, err); |
5198 | oops_end(flags); |
5199 | do_exit(SIGSEGV); |
5200 | @@ -790,8 +720,7 @@ |
5201 | { |
5202 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", |
5203 | reason); |
5204 | - printk(KERN_EMERG "You probably have a hardware problem with your " |
5205 | - "RAM chips\n"); |
5206 | + printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); |
5207 | |
5208 | if (panic_on_unrecovered_nmi) |
5209 | panic("NMI: Not continuing"); |
5210 | @@ -1227,21 +1156,3 @@ |
5211 | return 0; |
5212 | } |
5213 | early_param("kstack", kstack_setup); |
5214 | - |
5215 | -#ifdef CONFIG_STACK_UNWIND |
5216 | -static int __init call_trace_setup(char *s) |
5217 | -{ |
5218 | - if (!s) |
5219 | - return -EINVAL; |
5220 | - if (strcmp(s, "old") == 0) |
5221 | - call_trace = -1; |
5222 | - else if (strcmp(s, "both") == 0) |
5223 | - call_trace = 0; |
5224 | - else if (strcmp(s, "newfallback") == 0) |
5225 | - call_trace = 1; |
5226 | - else if (strcmp(s, "new") == 0) |
5227 | - call_trace = 2; |
5228 | - return 0; |
5229 | -} |
5230 | -early_param("call_trace", call_trace_setup); |
5231 | -#endif |
5232 | --- a/arch/x86/kernel/vmlinux_32.lds.S |
5233 | +++ b/arch/x86/kernel/vmlinux_32.lds.S |
5234 | @@ -29,6 +29,12 @@ |
5235 | SECTIONS |
5236 | { |
5237 | . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; |
5238 | + |
5239 | +#if defined(CONFIG_XEN) && CONFIG_XEN_COMPAT <= 0x030002 |
5240 | +#undef LOAD_OFFSET |
5241 | +#define LOAD_OFFSET 0 |
5242 | +#endif |
5243 | + |
5244 | phys_startup_32 = startup_32 - LOAD_OFFSET; |
5245 | |
5246 | .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { |
5247 | --- a/arch/x86/kernel/vsyscall_64-xen.c |
5248 | +++ b/arch/x86/kernel/vsyscall_64-xen.c |
5249 | @@ -42,6 +42,7 @@ |
5250 | #include <asm/topology.h> |
5251 | |
5252 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) |
5253 | +#define __syscall_clobber "r11","rcx","memory" |
5254 | |
5255 | int __sysctl_vsyscall __section_sysctl_vsyscall = 1; |
5256 | seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; |
5257 | @@ -224,8 +225,7 @@ |
5258 | |
5259 | static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen, |
5260 | void __user *oldval, size_t __user *oldlenp, |
5261 | - void __user *newval, size_t newlen, |
5262 | - void **context) |
5263 | + void __user *newval, size_t newlen) |
5264 | { |
5265 | return -ENOSYS; |
5266 | } |
5267 | @@ -277,7 +277,6 @@ |
5268 | vsyscall_set_cpu(raw_smp_processor_id()); |
5269 | } |
5270 | |
5271 | -#ifdef CONFIG_HOTPLUG_CPU |
5272 | static int __cpuinit |
5273 | cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) |
5274 | { |
5275 | @@ -286,13 +285,13 @@ |
5276 | smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); |
5277 | return NOTIFY_DONE; |
5278 | } |
5279 | -#endif |
5280 | |
5281 | static void __init map_vsyscall(void) |
5282 | { |
5283 | extern char __vsyscall_0; |
5284 | unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); |
5285 | |
5286 | + /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ |
5287 | __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); |
5288 | } |
5289 | |
5290 | --- a/arch/x86/kvm/Kconfig |
5291 | +++ b/arch/x86/kvm/Kconfig |
5292 | @@ -7,6 +7,7 @@ |
5293 | menuconfig VIRTUALIZATION |
5294 | bool "Virtualization" |
5295 | depends on HAVE_KVM || X86 |
5296 | + depends on !XEN |
5297 | default y |
5298 | ---help--- |
5299 | Say Y here to get to see options for using your Linux host to run other |
5300 | --- a/arch/x86/mm/fault_32-xen.c |
5301 | +++ b/arch/x86/mm/fault_32-xen.c |
5302 | @@ -22,9 +22,9 @@ |
5303 | #include <linux/highmem.h> |
5304 | #include <linux/module.h> |
5305 | #include <linux/kprobes.h> |
5306 | +#include <linux/uaccess.h> |
5307 | |
5308 | #include <asm/system.h> |
5309 | -#include <asm/uaccess.h> |
5310 | #include <asm/desc.h> |
5311 | #include <asm/kdebug.h> |
5312 | #include <asm/segment.h> |
5313 | @@ -167,7 +167,7 @@ |
5314 | static int __is_prefetch(struct pt_regs *regs, unsigned long addr) |
5315 | { |
5316 | unsigned long limit; |
5317 | - unsigned long instr = get_segment_eip (regs, &limit); |
5318 | + unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit); |
5319 | int scan_more = 1; |
5320 | int prefetch = 0; |
5321 | int i; |
5322 | @@ -177,9 +177,9 @@ |
5323 | unsigned char instr_hi; |
5324 | unsigned char instr_lo; |
5325 | |
5326 | - if (instr > limit) |
5327 | + if (instr > (unsigned char *)limit) |
5328 | break; |
5329 | - if (__get_user(opcode, (unsigned char __user *) instr)) |
5330 | + if (probe_kernel_address(instr, opcode)) |
5331 | break; |
5332 | |
5333 | instr_hi = opcode & 0xf0; |
5334 | @@ -204,9 +204,9 @@ |
5335 | case 0x00: |
5336 | /* Prefetch instruction is 0x0F0D or 0x0F18 */ |
5337 | scan_more = 0; |
5338 | - if (instr > limit) |
5339 | + if (instr > (unsigned char *)limit) |
5340 | break; |
5341 | - if (__get_user(opcode, (unsigned char __user *) instr)) |
5342 | + if (probe_kernel_address(instr, opcode)) |
5343 | break; |
5344 | prefetch = (instr_lo == 0xF) && |
5345 | (opcode == 0x0D || opcode == 0x18); |
5346 | --- a/arch/x86/mm/fault_64-xen.c |
5347 | +++ b/arch/x86/mm/fault_64-xen.c |
5348 | @@ -23,9 +23,9 @@ |
5349 | #include <linux/compiler.h> |
5350 | #include <linux/module.h> |
5351 | #include <linux/kprobes.h> |
5352 | +#include <linux/uaccess.h> |
5353 | |
5354 | #include <asm/system.h> |
5355 | -#include <asm/uaccess.h> |
5356 | #include <asm/pgalloc.h> |
5357 | #include <asm/smp.h> |
5358 | #include <asm/tlbflush.h> |
5359 | @@ -96,7 +96,7 @@ |
5360 | static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, |
5361 | unsigned long error_code) |
5362 | { |
5363 | - unsigned char __user *instr; |
5364 | + unsigned char *instr; |
5365 | int scan_more = 1; |
5366 | int prefetch = 0; |
5367 | unsigned char *max_instr; |
5368 | @@ -116,7 +116,7 @@ |
5369 | unsigned char instr_hi; |
5370 | unsigned char instr_lo; |
5371 | |
5372 | - if (__get_user(opcode, (char __user *)instr)) |
5373 | + if (probe_kernel_address(instr, opcode)) |
5374 | break; |
5375 | |
5376 | instr_hi = opcode & 0xf0; |
5377 | @@ -154,7 +154,7 @@ |
5378 | case 0x00: |
5379 | /* Prefetch instruction is 0x0F0D or 0x0F18 */ |
5380 | scan_more = 0; |
5381 | - if (__get_user(opcode, (char __user *)instr)) |
5382 | + if (probe_kernel_address(instr, opcode)) |
5383 | break; |
5384 | prefetch = (instr_lo == 0xF) && |
5385 | (opcode == 0x0D || opcode == 0x18); |
5386 | @@ -170,7 +170,7 @@ |
5387 | static int bad_address(void *p) |
5388 | { |
5389 | unsigned long dummy; |
5390 | - return __get_user(dummy, (unsigned long __user *)p); |
5391 | + return probe_kernel_address((unsigned long *)p, dummy); |
5392 | } |
5393 | |
5394 | void dump_pagetable(unsigned long address) |
5395 | --- a/arch/x86/mm/highmem_32-xen.c |
5396 | +++ b/arch/x86/mm/highmem_32-xen.c |
5397 | @@ -32,7 +32,7 @@ |
5398 | unsigned long vaddr; |
5399 | |
5400 | /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ |
5401 | - inc_preempt_count(); |
5402 | + pagefault_disable(); |
5403 | if (!PageHighMem(page)) |
5404 | return page_address(page); |
5405 | |
5406 | @@ -63,26 +63,22 @@ |
5407 | unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; |
5408 | enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); |
5409 | |
5410 | -#ifdef CONFIG_DEBUG_HIGHMEM |
5411 | - if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) { |
5412 | - dec_preempt_count(); |
5413 | - preempt_check_resched(); |
5414 | - return; |
5415 | - } |
5416 | - |
5417 | - if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) |
5418 | - BUG(); |
5419 | -#endif |
5420 | /* |
5421 | * Force other mappings to Oops if they'll try to access this pte |
5422 | * without first remap it. Keeping stale mappings around is a bad idea |
5423 | * also, in case the page changes cacheability attributes or becomes |
5424 | * a protected page in a hypervisor. |
5425 | */ |
5426 | - kpte_clear_flush(kmap_pte-idx, vaddr); |
5427 | + if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) |
5428 | + kpte_clear_flush(kmap_pte-idx, vaddr); |
5429 | + else { |
5430 | +#ifdef CONFIG_DEBUG_HIGHMEM |
5431 | + BUG_ON(vaddr < PAGE_OFFSET); |
5432 | + BUG_ON(vaddr >= (unsigned long)high_memory); |
5433 | +#endif |
5434 | + } |
5435 | |
5436 | - dec_preempt_count(); |
5437 | - preempt_check_resched(); |
5438 | + pagefault_enable(); |
5439 | } |
5440 | |
5441 | /* This is the same as kmap_atomic() but can map memory that doesn't |
5442 | @@ -93,7 +89,7 @@ |
5443 | enum fixed_addresses idx; |
5444 | unsigned long vaddr; |
5445 | |
5446 | - inc_preempt_count(); |
5447 | + pagefault_disable(); |
5448 | |
5449 | idx = type + KM_TYPE_NR*smp_processor_id(); |
5450 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); |
5451 | --- a/arch/x86/mm/init_32-xen.c |
5452 | +++ b/arch/x86/mm/init_32-xen.c |
5453 | @@ -235,8 +235,6 @@ |
5454 | |
5455 | #endif |
5456 | |
5457 | -extern int is_available_memory(efi_memory_desc_t *); |
5458 | - |
5459 | int page_is_ram(unsigned long pagenr) |
5460 | { |
5461 | int i; |
5462 | @@ -329,7 +327,7 @@ |
5463 | SetPageReserved(page); |
5464 | } |
5465 | |
5466 | -static int add_one_highpage_hotplug(struct page *page, unsigned long pfn) |
5467 | +static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long pfn) |
5468 | { |
5469 | free_new_highpage(page, pfn); |
5470 | totalram_pages++; |
5471 | @@ -346,7 +344,7 @@ |
5472 | * has been added dynamically that would be |
5473 | * onlined here is in HIGHMEM |
5474 | */ |
5475 | -void online_page(struct page *page) |
5476 | +void __meminit online_page(struct page *page) |
5477 | { |
5478 | ClearPageReserved(page); |
5479 | add_one_highpage_hotplug(page, page_to_pfn(page)); |
5480 | @@ -739,16 +737,10 @@ |
5481 | set_bit(PG_pinned, &virt_to_page(init_mm.pgd)->flags); |
5482 | } |
5483 | |
5484 | -/* |
5485 | - * this is for the non-NUMA, single node SMP system case. |
5486 | - * Specifically, in the case of x86, we will always add |
5487 | - * memory to the highmem for now. |
5488 | - */ |
5489 | #ifdef CONFIG_MEMORY_HOTPLUG |
5490 | -#ifndef CONFIG_NEED_MULTIPLE_NODES |
5491 | int arch_add_memory(int nid, u64 start, u64 size) |
5492 | { |
5493 | - struct pglist_data *pgdata = &contig_page_data; |
5494 | + struct pglist_data *pgdata = NODE_DATA(nid); |
5495 | struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; |
5496 | unsigned long start_pfn = start >> PAGE_SHIFT; |
5497 | unsigned long nr_pages = size >> PAGE_SHIFT; |
5498 | @@ -760,11 +752,11 @@ |
5499 | { |
5500 | return -EINVAL; |
5501 | } |
5502 | -#endif |
5503 | +EXPORT_SYMBOL_GPL(remove_memory); |
5504 | #endif |
5505 | |
5506 | -kmem_cache_t *pgd_cache; |
5507 | -kmem_cache_t *pmd_cache; |
5508 | +struct kmem_cache *pgd_cache; |
5509 | +struct kmem_cache *pmd_cache; |
5510 | |
5511 | void __init pgtable_cache_init(void) |
5512 | { |
5513 | --- a/arch/x86/mm/init_64-xen.c |
5514 | +++ b/arch/x86/mm/init_64-xen.c |
5515 | @@ -1130,14 +1130,15 @@ |
5516 | __initcall(x8664_sysctl_init); |
5517 | #endif |
5518 | |
5519 | -/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only |
5520 | +/* A pseudo VMA to allow ptrace access for the vsyscall page. This only |
5521 | covers the 64bit vsyscall page now. 32bit has a real VMA now and does |
5522 | not need special handling anymore. */ |
5523 | |
5524 | static struct vm_area_struct gate_vma = { |
5525 | .vm_start = VSYSCALL_START, |
5526 | - .vm_end = VSYSCALL_END, |
5527 | - .vm_page_prot = PAGE_READONLY |
5528 | + .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT), |
5529 | + .vm_page_prot = PAGE_READONLY_EXEC, |
5530 | + .vm_flags = VM_READ | VM_EXEC |
5531 | }; |
5532 | |
5533 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) |
5534 | --- a/arch/x86/mm/pageattr_64-xen.c |
5535 | +++ b/arch/x86/mm/pageattr_64-xen.c |
5536 | @@ -324,34 +324,40 @@ |
5537 | return base; |
5538 | } |
5539 | |
5540 | - |
5541 | -static void flush_kernel_map(void *address) |
5542 | +static void cache_flush_page(void *adr) |
5543 | { |
5544 | - if (0 && address && cpu_has_clflush) { |
5545 | - /* is this worth it? */ |
5546 | - int i; |
5547 | - for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) |
5548 | - asm volatile("clflush (%0)" :: "r" (address + i)); |
5549 | - } else |
5550 | - asm volatile("wbinvd":::"memory"); |
5551 | - if (address) |
5552 | - __flush_tlb_one(address); |
5553 | - else |
5554 | - __flush_tlb_all(); |
5555 | + int i; |
5556 | + for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) |
5557 | + asm volatile("clflush (%0)" :: "r" (adr + i)); |
5558 | } |
5559 | |
5560 | +static void flush_kernel_map(void *arg) |
5561 | +{ |
5562 | + struct list_head *l = (struct list_head *)arg; |
5563 | + struct page *pg; |
5564 | |
5565 | -static inline void flush_map(unsigned long address) |
5566 | + /* When clflush is available always use it because it is |
5567 | + much cheaper than WBINVD */ |
5568 | + if (!cpu_has_clflush) |
5569 | + asm volatile("wbinvd" ::: "memory"); |
5570 | + list_for_each_entry(pg, l, lru) { |
5571 | + void *adr = page_address(pg); |
5572 | + if (cpu_has_clflush) |
5573 | + cache_flush_page(adr); |
5574 | + __flush_tlb_one(adr); |
5575 | + } |
5576 | +} |
5577 | + |
5578 | +static inline void flush_map(struct list_head *l) |
5579 | { |
5580 | - on_each_cpu(flush_kernel_map, (void *)address, 1, 1); |
5581 | + on_each_cpu(flush_kernel_map, l, 1, 1); |
5582 | } |
5583 | |
5584 | -static struct page *deferred_pages; /* protected by init_mm.mmap_sem */ |
5585 | +static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */ |
5586 | |
5587 | static inline void save_page(struct page *fpage) |
5588 | { |
5589 | - fpage->lru.next = (struct list_head *)deferred_pages; |
5590 | - deferred_pages = fpage; |
5591 | + list_add(&fpage->lru, &deferred_pages); |
5592 | } |
5593 | |
5594 | /* |
5595 | @@ -481,18 +487,18 @@ |
5596 | |
5597 | void global_flush_tlb(void) |
5598 | { |
5599 | - struct page *dpage; |
5600 | + struct page *pg, *next; |
5601 | + struct list_head l; |
5602 | |
5603 | down_read(&init_mm.mmap_sem); |
5604 | - dpage = xchg(&deferred_pages, NULL); |
5605 | + list_replace_init(&deferred_pages, &l); |
5606 | up_read(&init_mm.mmap_sem); |
5607 | |
5608 | - flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0); |
5609 | - while (dpage) { |
5610 | - struct page *tmp = dpage; |
5611 | - dpage = (struct page *)dpage->lru.next; |
5612 | - ClearPagePrivate(tmp); |
5613 | - __free_page(tmp); |
5614 | + flush_map(&l); |
5615 | + |
5616 | + list_for_each_entry_safe(pg, next, &l, lru) { |
5617 | + ClearPagePrivate(pg); |
5618 | + __free_page(pg); |
5619 | } |
5620 | } |
5621 | |
5622 | --- a/arch/x86/mm/pgtable_32-xen.c |
5623 | +++ b/arch/x86/mm/pgtable_32-xen.c |
5624 | @@ -197,7 +197,7 @@ |
5625 | __free_page(pte); |
5626 | } |
5627 | |
5628 | -void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags) |
5629 | +void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags) |
5630 | { |
5631 | memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); |
5632 | } |
5633 | @@ -237,7 +237,7 @@ |
5634 | set_page_private(next, (unsigned long)pprev); |
5635 | } |
5636 | |
5637 | -void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) |
5638 | +void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) |
5639 | { |
5640 | unsigned long flags; |
5641 | |
5642 | @@ -258,7 +258,7 @@ |
5643 | } |
5644 | |
5645 | /* never called when PTRS_PER_PMD > 1 */ |
5646 | -void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused) |
5647 | +void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) |
5648 | { |
5649 | unsigned long flags; /* can be called from interrupt context */ |
5650 | |
5651 | --- a/arch/x86/pci/irq-xen.c |
5652 | +++ b/arch/x86/pci/irq-xen.c |
5653 | @@ -768,7 +768,7 @@ |
5654 | DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", |
5655 | rt->rtr_vendor, rt->rtr_device); |
5656 | |
5657 | - pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn); |
5658 | + pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn); |
5659 | if (!pirq_router_dev) { |
5660 | DBG(KERN_DEBUG "PCI: Interrupt router not found at " |
5661 | "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn); |
5662 | @@ -788,6 +788,8 @@ |
5663 | pirq_router_dev->vendor, |
5664 | pirq_router_dev->device, |
5665 | pci_name(pirq_router_dev)); |
5666 | + |
5667 | + /* The device remains referenced for the kernel lifetime */ |
5668 | } |
5669 | |
5670 | static struct irq_info *pirq_get_info(struct pci_dev *dev) |
5671 | --- a/drivers/xen/balloon/balloon.c |
5672 | +++ b/drivers/xen/balloon/balloon.c |
5673 | @@ -97,8 +97,8 @@ |
5674 | static LIST_HEAD(ballooned_pages); |
5675 | |
5676 | /* Main work function, always executed in process context. */ |
5677 | -static void balloon_process(void *unused); |
5678 | -static DECLARE_WORK(balloon_worker, balloon_process, NULL); |
5679 | +static void balloon_process(struct work_struct *unused); |
5680 | +static DECLARE_WORK(balloon_worker, balloon_process); |
5681 | static struct timer_list balloon_timer; |
5682 | |
5683 | /* When ballooning out (allocating memory to return to Xen) we don't really |
5684 | @@ -387,7 +387,7 @@ |
5685 | * by the balloon lock), or with changes to the Xen hard limit, but we will |
5686 | * recover from these in time. |
5687 | */ |
5688 | -static void balloon_process(void *unused) |
5689 | +static void balloon_process(struct work_struct *unused) |
5690 | { |
5691 | int need_sleep = 0; |
5692 | long credit; |
5693 | --- a/drivers/xen/blkback/blkback.c |
5694 | +++ b/drivers/xen/blkback/blkback.c |
5695 | @@ -37,6 +37,7 @@ |
5696 | |
5697 | #include <linux/spinlock.h> |
5698 | #include <linux/kthread.h> |
5699 | +#include <linux/freezer.h> |
5700 | #include <linux/list.h> |
5701 | #include <linux/delay.h> |
5702 | #include <xen/balloon.h> |
5703 | --- a/drivers/xen/blkback/interface.c |
5704 | +++ b/drivers/xen/blkback/interface.c |
5705 | @@ -34,7 +34,7 @@ |
5706 | #include <xen/evtchn.h> |
5707 | #include <linux/kthread.h> |
5708 | |
5709 | -static kmem_cache_t *blkif_cachep; |
5710 | +static struct kmem_cache *blkif_cachep; |
5711 | |
5712 | blkif_t *blkif_alloc(domid_t domid) |
5713 | { |
5714 | --- a/drivers/xen/blkfront/blkfront.c |
5715 | +++ b/drivers/xen/blkfront/blkfront.c |
5716 | @@ -70,7 +70,7 @@ |
5717 | static void kick_pending_request_queues(struct blkfront_info *); |
5718 | |
5719 | static irqreturn_t blkif_int(int irq, void *dev_id); |
5720 | -static void blkif_restart_queue(void *arg); |
5721 | +static void blkif_restart_queue(struct work_struct *arg); |
5722 | static void blkif_recover(struct blkfront_info *); |
5723 | static void blkif_completion(struct blk_shadow *); |
5724 | static void blkif_free(struct blkfront_info *, int); |
5725 | @@ -105,7 +105,7 @@ |
5726 | info->xbdev = dev; |
5727 | info->vdevice = vdevice; |
5728 | info->connected = BLKIF_STATE_DISCONNECTED; |
5729 | - INIT_WORK(&info->work, blkif_restart_queue, (void *)info); |
5730 | + INIT_WORK(&info->work, blkif_restart_queue); |
5731 | |
5732 | for (i = 0; i < BLK_RING_SIZE; i++) |
5733 | info->shadow[i].req.id = i+1; |
5734 | @@ -445,9 +445,9 @@ |
5735 | } |
5736 | } |
5737 | |
5738 | -static void blkif_restart_queue(void *arg) |
5739 | +static void blkif_restart_queue(struct work_struct *arg) |
5740 | { |
5741 | - struct blkfront_info *info = (struct blkfront_info *)arg; |
5742 | + struct blkfront_info *info = container_of(arg, struct blkfront_info, work); |
5743 | spin_lock_irq(&blkif_io_lock); |
5744 | if (info->connected == BLKIF_STATE_CONNECTED) |
5745 | kick_pending_request_queues(info); |
5746 | --- a/drivers/xen/blktap/blktap.c |
5747 | +++ b/drivers/xen/blktap/blktap.c |
5748 | @@ -40,6 +40,7 @@ |
5749 | |
5750 | #include <linux/spinlock.h> |
5751 | #include <linux/kthread.h> |
5752 | +#include <linux/freezer.h> |
5753 | #include <linux/list.h> |
5754 | #include <asm/hypervisor.h> |
5755 | #include "common.h" |
5756 | --- a/drivers/xen/blktap/interface.c |
5757 | +++ b/drivers/xen/blktap/interface.c |
5758 | @@ -34,7 +34,7 @@ |
5759 | #include "common.h" |
5760 | #include <xen/evtchn.h> |
5761 | |
5762 | -static kmem_cache_t *blkif_cachep; |
5763 | +static struct kmem_cache *blkif_cachep; |
5764 | |
5765 | blkif_t *tap_alloc_blkif(domid_t domid) |
5766 | { |
5767 | --- a/drivers/xen/char/mem.c |
5768 | +++ b/drivers/xen/char/mem.c |
5769 | @@ -157,7 +157,7 @@ |
5770 | { |
5771 | loff_t ret; |
5772 | |
5773 | - mutex_lock(&file->f_dentry->d_inode->i_mutex); |
5774 | + mutex_lock(&file->f_path.dentry->d_inode->i_mutex); |
5775 | switch (orig) { |
5776 | case 0: |
5777 | file->f_pos = offset; |
5778 | @@ -172,7 +172,7 @@ |
5779 | default: |
5780 | ret = -EINVAL; |
5781 | } |
5782 | - mutex_unlock(&file->f_dentry->d_inode->i_mutex); |
5783 | + mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); |
5784 | return ret; |
5785 | } |
5786 | |
5787 | --- a/drivers/xen/console/console.c |
5788 | +++ b/drivers/xen/console/console.c |
5789 | @@ -80,11 +80,6 @@ |
5790 | #define XEN_XVC_MAJOR 204 |
5791 | #define XEN_XVC_MINOR 191 |
5792 | |
5793 | -#ifdef CONFIG_MAGIC_SYSRQ |
5794 | -static unsigned long sysrq_requested; |
5795 | -extern int sysrq_enabled; |
5796 | -#endif |
5797 | - |
5798 | static int __init xencons_setup(char *str) |
5799 | { |
5800 | char *q; |
5801 | @@ -339,8 +334,8 @@ |
5802 | #define DUMMY_TTY(_tty) ((xc_mode == XC_TTY) && \ |
5803 | ((_tty)->index != (xc_num - 1))) |
5804 | |
5805 | -static struct termios *xencons_termios[MAX_NR_CONSOLES]; |
5806 | -static struct termios *xencons_termios_locked[MAX_NR_CONSOLES]; |
5807 | +static struct ktermios *xencons_termios[MAX_NR_CONSOLES]; |
5808 | +static struct ktermios *xencons_termios_locked[MAX_NR_CONSOLES]; |
5809 | static struct tty_struct *xencons_tty; |
5810 | static int xencons_priv_irq; |
5811 | static char x_char; |
5812 | @@ -356,7 +351,9 @@ |
5813 | |
5814 | for (i = 0; i < len; i++) { |
5815 | #ifdef CONFIG_MAGIC_SYSRQ |
5816 | - if (sysrq_enabled) { |
5817 | + if (sysrq_on()) { |
5818 | + static unsigned long sysrq_requested; |
5819 | + |
5820 | if (buf[i] == '\x0f') { /* ^O */ |
5821 | if (!sysrq_requested) { |
5822 | sysrq_requested = jiffies; |
5823 | --- a/drivers/xen/core/reboot.c |
5824 | +++ b/drivers/xen/core/reboot.c |
5825 | @@ -30,8 +30,8 @@ |
5826 | /* Can we leave APs online when we suspend? */ |
5827 | static int fast_suspend; |
5828 | |
5829 | -static void __shutdown_handler(void *unused); |
5830 | -static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL); |
5831 | +static void __shutdown_handler(struct work_struct *unused); |
5832 | +static DECLARE_DELAYED_WORK(shutdown_work, __shutdown_handler); |
5833 | |
5834 | int __xen_suspend(int fast_suspend, void (*resume_notifier)(void)); |
5835 | |
5836 | @@ -96,7 +96,7 @@ |
5837 | case SHUTDOWN_RESUMING: |
5838 | break; |
5839 | default: |
5840 | - schedule_work(&shutdown_work); |
5841 | + schedule_delayed_work(&shutdown_work, 0); |
5842 | break; |
5843 | } |
5844 | |
5845 | @@ -108,7 +108,7 @@ |
5846 | return 0; |
5847 | } |
5848 | |
5849 | -static void __shutdown_handler(void *unused) |
5850 | +static void __shutdown_handler(struct work_struct *unused) |
5851 | { |
5852 | int err; |
5853 | |
5854 | @@ -169,7 +169,7 @@ |
5855 | if (new_state != SHUTDOWN_INVALID) { |
5856 | old_state = xchg(&shutting_down, new_state); |
5857 | if (old_state == SHUTDOWN_INVALID) |
5858 | - schedule_work(&shutdown_work); |
5859 | + schedule_delayed_work(&shutdown_work, 0); |
5860 | else |
5861 | BUG_ON(old_state != SHUTDOWN_RESUMING); |
5862 | } |
5863 | --- a/drivers/xen/core/smpboot.c |
5864 | +++ b/drivers/xen/core/smpboot.c |
5865 | @@ -165,7 +165,12 @@ |
5866 | |
5867 | void __cpuinit cpu_bringup(void) |
5868 | { |
5869 | +#ifdef __i386__ |
5870 | + cpu_set_gdt(current_thread_info()->cpu); |
5871 | + secondary_cpu_init(); |
5872 | +#else |
5873 | cpu_init(); |
5874 | +#endif |
5875 | identify_cpu(cpu_data + smp_processor_id()); |
5876 | touch_softlockup_watchdog(); |
5877 | preempt_disable(); |
5878 | @@ -304,11 +309,12 @@ |
5879 | if (cpu == 0) |
5880 | continue; |
5881 | |
5882 | + idle = fork_idle(cpu); |
5883 | + if (IS_ERR(idle)) |
5884 | + panic("failed fork for CPU %d", cpu); |
5885 | + |
5886 | #ifdef __x86_64__ |
5887 | gdt_descr = &cpu_gdt_descr[cpu]; |
5888 | -#else |
5889 | - gdt_descr = &per_cpu(cpu_gdt_descr, cpu); |
5890 | -#endif |
5891 | gdt_descr->address = get_zeroed_page(GFP_KERNEL); |
5892 | if (unlikely(!gdt_descr->address)) { |
5893 | printk(KERN_CRIT "CPU%d failed to allocate GDT\n", |
5894 | @@ -317,6 +323,11 @@ |
5895 | } |
5896 | gdt_descr->size = GDT_SIZE; |
5897 | memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE); |
5898 | +#else |
5899 | + if (unlikely(!init_gdt(cpu, idle))) |
5900 | + continue; |
5901 | + gdt_descr = &per_cpu(cpu_gdt_descr, cpu); |
5902 | +#endif |
5903 | make_page_readonly( |
5904 | (void *)gdt_descr->address, |
5905 | XENFEAT_writable_descriptor_tables); |
5906 | @@ -336,10 +347,6 @@ |
5907 | cpu_2_logical_apicid[cpu] = apicid; |
5908 | x86_cpu_to_apicid[cpu] = apicid; |
5909 | |
5910 | - idle = fork_idle(cpu); |
5911 | - if (IS_ERR(idle)) |
5912 | - panic("failed fork for CPU %d", cpu); |
5913 | - |
5914 | #ifdef __x86_64__ |
5915 | cpu_pda(cpu)->pcurrent = idle; |
5916 | cpu_pda(cpu)->cpunumber = cpu; |
5917 | --- a/drivers/xen/fbfront/xenfb.c |
5918 | +++ b/drivers/xen/fbfront/xenfb.c |
5919 | @@ -25,6 +25,7 @@ |
5920 | #include <linux/vmalloc.h> |
5921 | #include <linux/mm.h> |
5922 | #include <linux/mutex.h> |
5923 | +#include <linux/freezer.h> |
5924 | #include <asm/hypervisor.h> |
5925 | #include <xen/evtchn.h> |
5926 | #include <xen/interface/io/fbif.h> |
5927 | --- a/drivers/xen/netback/loopback.c |
5928 | +++ b/drivers/xen/netback/loopback.c |
5929 | @@ -54,6 +54,7 @@ |
5930 | #include <net/dst.h> |
5931 | #include <net/xfrm.h> /* secpath_reset() */ |
5932 | #include <asm/hypervisor.h> /* is_initial_xendomain() */ |
5933 | +#include <../net/core/kmap_skb.h> /* k{,un}map_skb_frag() */ |
5934 | |
5935 | static int nloopbacks = -1; |
5936 | module_param(nloopbacks, int, 0); |
5937 | --- a/drivers/xen/pciback/conf_space_header.c |
5938 | +++ b/drivers/xen/pciback/conf_space_header.c |
5939 | @@ -22,14 +22,14 @@ |
5940 | { |
5941 | int err; |
5942 | |
5943 | - if (!dev->is_enabled && is_enable_cmd(value)) { |
5944 | + if (!atomic_read(&dev->enable_cnt) && is_enable_cmd(value)) { |
5945 | if (unlikely(verbose_request)) |
5946 | printk(KERN_DEBUG "pciback: %s: enable\n", |
5947 | pci_name(dev)); |
5948 | err = pci_enable_device(dev); |
5949 | if (err) |
5950 | return err; |
5951 | - } else if (dev->is_enabled && !is_enable_cmd(value)) { |
5952 | + } else if (atomic_read(&dev->enable_cnt) && !is_enable_cmd(value)) { |
5953 | if (unlikely(verbose_request)) |
5954 | printk(KERN_DEBUG "pciback: %s: disable\n", |
5955 | pci_name(dev)); |
5956 | --- a/drivers/xen/pciback/pciback.h |
5957 | +++ b/drivers/xen/pciback/pciback.h |
5958 | @@ -88,7 +88,7 @@ |
5959 | |
5960 | /* Handles events from front-end */ |
5961 | irqreturn_t pciback_handle_event(int irq, void *dev_id); |
5962 | -void pciback_do_op(void *data); |
5963 | +void pciback_do_op(struct work_struct *work); |
5964 | |
5965 | int pciback_xenbus_register(void); |
5966 | void pciback_xenbus_unregister(void); |
5967 | --- a/drivers/xen/pciback/pciback_ops.c |
5968 | +++ b/drivers/xen/pciback/pciback_ops.c |
5969 | @@ -25,7 +25,7 @@ |
5970 | |
5971 | pci_write_config_word(dev, PCI_COMMAND, 0); |
5972 | |
5973 | - dev->is_enabled = 0; |
5974 | + atomic_set(&dev->enable_cnt, 0); |
5975 | dev->is_busmaster = 0; |
5976 | } else { |
5977 | pci_read_config_word(dev, PCI_COMMAND, &cmd); |
5978 | @@ -51,9 +51,9 @@ |
5979 | * context because some of the pci_* functions can sleep (mostly due to ACPI |
5980 | * use of semaphores). This function is intended to be called from a work |
5981 | * queue in process context taking a struct pciback_device as a parameter */ |
5982 | -void pciback_do_op(void *data) |
5983 | +void pciback_do_op(struct work_struct *work) |
5984 | { |
5985 | - struct pciback_device *pdev = data; |
5986 | + struct pciback_device *pdev = container_of(work, struct pciback_device, op_work); |
5987 | struct pci_dev *dev; |
5988 | struct xen_pci_op *op = &pdev->sh_info->op; |
5989 | |
5990 | --- a/drivers/xen/pciback/xenbus.c |
5991 | +++ b/drivers/xen/pciback/xenbus.c |
5992 | @@ -32,7 +32,7 @@ |
5993 | pdev->evtchn_irq = INVALID_EVTCHN_IRQ; |
5994 | pdev->be_watching = 0; |
5995 | |
5996 | - INIT_WORK(&pdev->op_work, pciback_do_op, pdev); |
5997 | + INIT_WORK(&pdev->op_work, pciback_do_op); |
5998 | |
5999 | if (pciback_init_devices(pdev)) { |
6000 | kfree(pdev); |
6001 | @@ -53,7 +53,6 @@ |
6002 | |
6003 | /* If the driver domain started an op, make sure we complete it or |
6004 | * delete it before releasing the shared memory */ |
6005 | - cancel_delayed_work(&pdev->op_work); |
6006 | flush_scheduled_work(); |
6007 | |
6008 | if (pdev->sh_info) |
6009 | --- a/drivers/xen/sfc_netfront/accel_vi.c |
6010 | +++ b/drivers/xen/sfc_netfront/accel_vi.c |
6011 | @@ -463,7 +463,7 @@ |
6012 | |
6013 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
6014 | /* Set to zero to encourage falcon to work it out for us */ |
6015 | - *(u16*)(skb->h.raw + skb->csum) = 0; |
6016 | + *(u16*)(skb->h.raw + skb->csum_offset) = 0; |
6017 | } |
6018 | |
6019 | if (multi_post_start_new_buffer(vnic, &state)) { |
6020 | @@ -582,7 +582,7 @@ |
6021 | |
6022 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
6023 | /* Set to zero to encourage falcon to work it out for us */ |
6024 | - *(u16*)(skb->h.raw + skb->csum) = 0; |
6025 | + *(u16*)(skb->h.raw + skb->csum_offset) = 0; |
6026 | } |
6027 | NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT |
6028 | (skb, idx, frag_data, frag_len, { |
6029 | --- a/drivers/xen/tpmback/interface.c |
6030 | +++ b/drivers/xen/tpmback/interface.c |
6031 | @@ -15,7 +15,7 @@ |
6032 | #include <xen/balloon.h> |
6033 | #include <xen/gnttab.h> |
6034 | |
6035 | -static kmem_cache_t *tpmif_cachep; |
6036 | +static struct kmem_cache *tpmif_cachep; |
6037 | int num_frontends = 0; |
6038 | |
6039 | LIST_HEAD(tpmif_list); |
6040 | --- a/drivers/xen/xenbus/xenbus_comms.c |
6041 | +++ b/drivers/xen/xenbus/xenbus_comms.c |
6042 | @@ -49,9 +49,9 @@ |
6043 | |
6044 | static int xenbus_irq; |
6045 | |
6046 | -extern void xenbus_probe(void *); |
6047 | +extern void xenbus_probe(struct work_struct *); |
6048 | extern int xenstored_ready; |
6049 | -static DECLARE_WORK(probe_work, xenbus_probe, NULL); |
6050 | +static DECLARE_WORK(probe_work, xenbus_probe); |
6051 | |
6052 | static DECLARE_WAIT_QUEUE_HEAD(xb_waitq); |
6053 | |
6054 | --- a/drivers/xen/xenbus/xenbus_probe.c |
6055 | +++ b/drivers/xen/xenbus/xenbus_probe.c |
6056 | @@ -840,7 +840,7 @@ |
6057 | EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); |
6058 | |
6059 | |
6060 | -void xenbus_probe(void *unused) |
6061 | +void xenbus_probe(struct work_struct *unused) |
6062 | { |
6063 | BUG_ON((xenstored_ready <= 0)); |
6064 | |
6065 | --- a/include/asm-x86/mach-xen/asm/desc_32.h |
6066 | +++ b/include/asm-x86/mach-xen/asm/desc_32.h |
6067 | @@ -4,8 +4,6 @@ |
6068 | #include <asm/ldt.h> |
6069 | #include <asm/segment.h> |
6070 | |
6071 | -#define CPU_16BIT_STACK_SIZE 1024 |
6072 | - |
6073 | #ifndef __ASSEMBLY__ |
6074 | |
6075 | #include <linux/preempt.h> |
6076 | @@ -15,8 +13,6 @@ |
6077 | |
6078 | extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; |
6079 | |
6080 | -DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); |
6081 | - |
6082 | struct Xgt_desc_struct { |
6083 | unsigned short size; |
6084 | unsigned long address __attribute__((packed)); |
6085 | @@ -32,11 +28,6 @@ |
6086 | return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; |
6087 | } |
6088 | |
6089 | -/* |
6090 | - * This is the ldt that every process will get unless we need |
6091 | - * something other than this. |
6092 | - */ |
6093 | -extern struct desc_struct default_ldt[]; |
6094 | extern struct desc_struct idt_table[]; |
6095 | extern void set_intr_gate(unsigned int irq, void * addr); |
6096 | |
6097 | @@ -63,8 +54,8 @@ |
6098 | #define DESCTYPE_DPL3 0x60 /* DPL-3 */ |
6099 | #define DESCTYPE_S 0x10 /* !system */ |
6100 | |
6101 | +#ifndef CONFIG_XEN |
6102 | #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) |
6103 | -#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)) |
6104 | |
6105 | #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) |
6106 | #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr)) |
6107 | @@ -75,6 +66,7 @@ |
6108 | #define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr)) |
6109 | #define store_tr(tr) __asm__ ("str %0":"=m" (tr)) |
6110 | #define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt)) |
6111 | +#endif |
6112 | |
6113 | #if TLS_SIZE != 24 |
6114 | # error update this code. |
6115 | @@ -90,22 +82,43 @@ |
6116 | } |
6117 | |
6118 | #ifndef CONFIG_XEN |
6119 | +#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) |
6120 | +#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) |
6121 | +#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) |
6122 | + |
6123 | static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b) |
6124 | { |
6125 | __u32 *lp = (__u32 *)((char *)dt + entry*8); |
6126 | *lp = entry_a; |
6127 | *(lp+1) = entry_b; |
6128 | } |
6129 | - |
6130 | -#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) |
6131 | -#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) |
6132 | +#define set_ldt native_set_ldt |
6133 | #else |
6134 | extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b); |
6135 | extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b); |
6136 | +#define set_ldt xen_set_ldt |
6137 | +#endif |
6138 | + |
6139 | +#ifndef CONFIG_XEN |
6140 | +static inline fastcall void native_set_ldt(const void *addr, |
6141 | + unsigned int entries) |
6142 | +{ |
6143 | + if (likely(entries == 0)) |
6144 | + __asm__ __volatile__("lldt %w0"::"q" (0)); |
6145 | + else { |
6146 | + unsigned cpu = smp_processor_id(); |
6147 | + __u32 a, b; |
6148 | + |
6149 | + pack_descriptor(&a, &b, (unsigned long)addr, |
6150 | + entries * sizeof(struct desc_struct) - 1, |
6151 | + DESCTYPE_LDT, 0); |
6152 | + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b); |
6153 | + __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); |
6154 | + } |
6155 | +} |
6156 | #endif |
6157 | -#ifndef CONFIG_X86_NO_IDT |
6158 | -#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) |
6159 | |
6160 | +#ifndef CONFIG_X86_NO_IDT |
6161 | static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg) |
6162 | { |
6163 | __u32 a, b; |
6164 | @@ -125,14 +138,6 @@ |
6165 | } |
6166 | #endif |
6167 | |
6168 | -static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries) |
6169 | -{ |
6170 | - __u32 a, b; |
6171 | - pack_descriptor(&a, &b, (unsigned long)addr, |
6172 | - entries * sizeof(struct desc_struct) - 1, |
6173 | - DESCTYPE_LDT, 0); |
6174 | - write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b); |
6175 | -} |
6176 | |
6177 | #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) |
6178 | |
6179 | @@ -163,36 +168,22 @@ |
6180 | |
6181 | static inline void clear_LDT(void) |
6182 | { |
6183 | - int cpu = get_cpu(); |
6184 | - |
6185 | - /* |
6186 | - * NB. We load the default_ldt for lcall7/27 handling on demand, as |
6187 | - * it slows down context switching. Noone uses it anyway. |
6188 | - */ |
6189 | - cpu = cpu; /* XXX avoid compiler warning */ |
6190 | - xen_set_ldt(NULL, 0); |
6191 | - put_cpu(); |
6192 | + set_ldt(NULL, 0); |
6193 | } |
6194 | |
6195 | /* |
6196 | * load one particular LDT into the current CPU |
6197 | */ |
6198 | -static inline void load_LDT_nolock(mm_context_t *pc, int cpu) |
6199 | +static inline void load_LDT_nolock(mm_context_t *pc) |
6200 | { |
6201 | - void *segments = pc->ldt; |
6202 | - int count = pc->size; |
6203 | - |
6204 | - if (likely(!count)) |
6205 | - segments = NULL; |
6206 | - |
6207 | - xen_set_ldt(segments, count); |
6208 | + set_ldt(pc->ldt, pc->size); |
6209 | } |
6210 | |
6211 | static inline void load_LDT(mm_context_t *pc) |
6212 | { |
6213 | - int cpu = get_cpu(); |
6214 | - load_LDT_nolock(pc, cpu); |
6215 | - put_cpu(); |
6216 | + preempt_disable(); |
6217 | + load_LDT_nolock(pc); |
6218 | + preempt_enable(); |
6219 | } |
6220 | |
6221 | static inline unsigned long get_desc_base(unsigned long *desc) |
6222 | @@ -204,6 +195,29 @@ |
6223 | return base; |
6224 | } |
6225 | |
6226 | +#else /* __ASSEMBLY__ */ |
6227 | + |
6228 | +/* |
6229 | + * GET_DESC_BASE reads the descriptor base of the specified segment. |
6230 | + * |
6231 | + * Args: |
6232 | + * idx - descriptor index |
6233 | + * gdt - GDT pointer |
6234 | + * base - 32bit register to which the base will be written |
6235 | + * lo_w - lo word of the "base" register |
6236 | + * lo_b - lo byte of the "base" register |
6237 | + * hi_b - hi byte of the low word of the "base" register |
6238 | + * |
6239 | + * Example: |
6240 | + * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) |
6241 | + * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax. |
6242 | + */ |
6243 | +#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \ |
6244 | + movb idx*8+4(gdt), lo_b; \ |
6245 | + movb idx*8+7(gdt), hi_b; \ |
6246 | + shll $16, base; \ |
6247 | + movw idx*8+2(gdt), lo_w; |
6248 | + |
6249 | #endif /* !__ASSEMBLY__ */ |
6250 | |
6251 | #endif |
6252 | --- a/include/asm-x86/mach-xen/asm/desc_64.h |
6253 | +++ b/include/asm-x86/mach-xen/asm/desc_64.h |
6254 | @@ -9,62 +9,11 @@ |
6255 | |
6256 | #include <linux/string.h> |
6257 | #include <linux/smp.h> |
6258 | +#include <asm/desc_defs.h> |
6259 | |
6260 | #include <asm/segment.h> |
6261 | #include <asm/mmu.h> |
6262 | |
6263 | -// 8 byte segment descriptor |
6264 | -struct desc_struct { |
6265 | - u16 limit0; |
6266 | - u16 base0; |
6267 | - unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1; |
6268 | - unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8; |
6269 | -} __attribute__((packed)); |
6270 | - |
6271 | -struct n_desc_struct { |
6272 | - unsigned int a,b; |
6273 | -}; |
6274 | - |
6275 | -enum { |
6276 | - GATE_INTERRUPT = 0xE, |
6277 | - GATE_TRAP = 0xF, |
6278 | - GATE_CALL = 0xC, |
6279 | -}; |
6280 | - |
6281 | -// 16byte gate |
6282 | -struct gate_struct { |
6283 | - u16 offset_low; |
6284 | - u16 segment; |
6285 | - unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; |
6286 | - u16 offset_middle; |
6287 | - u32 offset_high; |
6288 | - u32 zero1; |
6289 | -} __attribute__((packed)); |
6290 | - |
6291 | -#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF) |
6292 | -#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF) |
6293 | -#define PTR_HIGH(x) ((unsigned long)(x) >> 32) |
6294 | - |
6295 | -enum { |
6296 | - DESC_TSS = 0x9, |
6297 | - DESC_LDT = 0x2, |
6298 | -}; |
6299 | - |
6300 | -// LDT or TSS descriptor in the GDT. 16 bytes. |
6301 | -struct ldttss_desc { |
6302 | - u16 limit0; |
6303 | - u16 base0; |
6304 | - unsigned base1 : 8, type : 5, dpl : 2, p : 1; |
6305 | - unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; |
6306 | - u32 base3; |
6307 | - u32 zero1; |
6308 | -} __attribute__((packed)); |
6309 | - |
6310 | -struct desc_ptr { |
6311 | - unsigned short size; |
6312 | - unsigned long address; |
6313 | -} __attribute__((packed)) ; |
6314 | - |
6315 | extern struct desc_ptr idt_descr, cpu_gdt_descr[NR_CPUS]; |
6316 | |
6317 | extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; |
6318 | --- a/include/asm-x86/mach-xen/asm/dma-mapping_32.h |
6319 | +++ b/include/asm-x86/mach-xen/asm/dma-mapping_32.h |
6320 | @@ -127,10 +127,10 @@ |
6321 | return (1 << INTERNODE_CACHE_SHIFT); |
6322 | } |
6323 | |
6324 | -#define dma_is_consistent(d) (1) |
6325 | +#define dma_is_consistent(d, h) (1) |
6326 | |
6327 | static inline void |
6328 | -dma_cache_sync(void *vaddr, size_t size, |
6329 | +dma_cache_sync(struct device *dev, void *vaddr, size_t size, |
6330 | enum dma_data_direction direction) |
6331 | { |
6332 | flush_write_buffers(); |
6333 | --- a/include/asm-x86/mach-xen/asm/dma-mapping_64.h |
6334 | +++ b/include/asm-x86/mach-xen/asm/dma-mapping_64.h |
6335 | @@ -64,6 +64,9 @@ |
6336 | return (dma_addr == bad_dma_address); |
6337 | } |
6338 | |
6339 | +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) |
6340 | +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) |
6341 | + |
6342 | extern void *dma_alloc_coherent(struct device *dev, size_t size, |
6343 | dma_addr_t *dma_handle, gfp_t gfp); |
6344 | extern void dma_free_coherent(struct device *dev, size_t size, void *vaddr, |
6345 | @@ -181,12 +184,13 @@ |
6346 | return boot_cpu_data.x86_clflush_size; |
6347 | } |
6348 | |
6349 | -#define dma_is_consistent(h) 1 |
6350 | +#define dma_is_consistent(d, h) 1 |
6351 | |
6352 | extern int dma_set_mask(struct device *dev, u64 mask); |
6353 | |
6354 | static inline void |
6355 | -dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction dir) |
6356 | +dma_cache_sync(struct device *dev, void *vaddr, size_t size, |
6357 | + enum dma_data_direction dir) |
6358 | { |
6359 | flush_write_buffers(); |
6360 | } |
6361 | --- a/include/asm-x86/mach-xen/asm/fixmap_32.h |
6362 | +++ b/include/asm-x86/mach-xen/asm/fixmap_32.h |
6363 | @@ -13,13 +13,16 @@ |
6364 | #ifndef _ASM_FIXMAP_H |
6365 | #define _ASM_FIXMAP_H |
6366 | |
6367 | - |
6368 | /* used by vmalloc.c, vsyscall.lds.S. |
6369 | * |
6370 | * Leave one empty page between vmalloc'ed areas and |
6371 | * the start of the fixmap. |
6372 | */ |
6373 | extern unsigned long __FIXADDR_TOP; |
6374 | +#ifdef CONFIG_COMPAT_VDSO |
6375 | +#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) |
6376 | +#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) |
6377 | +#endif |
6378 | |
6379 | #ifndef __ASSEMBLY__ |
6380 | #include <linux/kernel.h> |
6381 | --- a/include/asm-x86/mach-xen/asm/hypervisor.h |
6382 | +++ b/include/asm-x86/mach-xen/asm/hypervisor.h |
6383 | @@ -45,15 +45,6 @@ |
6384 | #include <xen/interface/nmi.h> |
6385 | #include <asm/ptrace.h> |
6386 | #include <asm/page.h> |
6387 | -#if defined(__i386__) |
6388 | -# ifdef CONFIG_X86_PAE |
6389 | -# include <asm-generic/pgtable-nopud.h> |
6390 | -# else |
6391 | -# include <asm-generic/pgtable-nopmd.h> |
6392 | -# endif |
6393 | -#elif defined(__x86_64__) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11) |
6394 | -# include <asm-generic/pgtable-nopud.h> |
6395 | -#endif |
6396 | |
6397 | extern shared_info_t *HYPERVISOR_shared_info; |
6398 | |
6399 | --- a/include/asm-x86/mach-xen/asm/io_32.h |
6400 | +++ b/include/asm-x86/mach-xen/asm/io_32.h |
6401 | @@ -269,11 +269,7 @@ |
6402 | |
6403 | #endif /* __KERNEL__ */ |
6404 | |
6405 | -#ifdef SLOW_IO_BY_JUMPING |
6406 | -#define __SLOW_DOWN_IO "jmp 1f; 1: jmp 1f; 1:" |
6407 | -#else |
6408 | #define __SLOW_DOWN_IO "outb %%al,$0x80;" |
6409 | -#endif |
6410 | |
6411 | static inline void slow_down_io(void) { |
6412 | __asm__ __volatile__( |
6413 | --- a/include/asm-x86/mach-xen/asm/irqflags_32.h |
6414 | +++ b/include/asm-x86/mach-xen/asm/irqflags_32.h |
6415 | @@ -22,9 +22,6 @@ |
6416 | |
6417 | #define __raw_local_save_flags() (current_vcpu_info()->evtchn_upcall_mask) |
6418 | |
6419 | -#define raw_local_save_flags(flags) \ |
6420 | - do { (flags) = __raw_local_save_flags(); } while (0) |
6421 | - |
6422 | #define raw_local_irq_restore(x) \ |
6423 | do { \ |
6424 | vcpu_info_t *_vcpu; \ |
6425 | @@ -66,18 +63,6 @@ |
6426 | */ |
6427 | void halt(void); |
6428 | |
6429 | -static inline int raw_irqs_disabled_flags(unsigned long flags) |
6430 | -{ |
6431 | - return (flags != 0); |
6432 | -} |
6433 | - |
6434 | -#define raw_irqs_disabled() \ |
6435 | -({ \ |
6436 | - unsigned long flags = __raw_local_save_flags(); \ |
6437 | - \ |
6438 | - raw_irqs_disabled_flags(flags); \ |
6439 | -}) |
6440 | - |
6441 | /* |
6442 | * For spinlocks, etc: |
6443 | */ |
6444 | @@ -90,9 +75,62 @@ |
6445 | flags; \ |
6446 | }) |
6447 | |
6448 | +#else |
6449 | +/* Offsets into shared_info_t. */ |
6450 | +#define evtchn_upcall_pending /* 0 */ |
6451 | +#define evtchn_upcall_mask 1 |
6452 | + |
6453 | +#define sizeof_vcpu_shift 6 |
6454 | + |
6455 | +#ifdef CONFIG_SMP |
6456 | +#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \ |
6457 | + shl $sizeof_vcpu_shift,%esi ; \ |
6458 | + addl HYPERVISOR_shared_info,%esi |
6459 | +#else |
6460 | +#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi |
6461 | +#endif |
6462 | + |
6463 | +#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi) |
6464 | +#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi) |
6465 | +#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi) |
6466 | +#define DISABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \ |
6467 | + __DISABLE_INTERRUPTS |
6468 | +#define ENABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \ |
6469 | + __ENABLE_INTERRUPTS |
6470 | +#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \ |
6471 | +sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \ |
6472 | + __TEST_PENDING ; \ |
6473 | + jnz 14f /* process more events if necessary... */ ; \ |
6474 | + movl PT_ESI(%esp), %esi ; \ |
6475 | + sysexit ; \ |
6476 | +14: __DISABLE_INTERRUPTS ; \ |
6477 | + TRACE_IRQS_OFF ; \ |
6478 | +sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \ |
6479 | + push %esp ; \ |
6480 | + call evtchn_do_upcall ; \ |
6481 | + add $4,%esp ; \ |
6482 | + jmp ret_from_intr |
6483 | +#define INTERRUPT_RETURN iret |
6484 | +#endif /* __ASSEMBLY__ */ |
6485 | + |
6486 | +#ifndef __ASSEMBLY__ |
6487 | +#define raw_local_save_flags(flags) \ |
6488 | + do { (flags) = __raw_local_save_flags(); } while (0) |
6489 | + |
6490 | #define raw_local_irq_save(flags) \ |
6491 | do { (flags) = __raw_local_irq_save(); } while (0) |
6492 | |
6493 | +static inline int raw_irqs_disabled_flags(unsigned long flags) |
6494 | +{ |
6495 | + return (flags != 0); |
6496 | +} |
6497 | + |
6498 | +#define raw_irqs_disabled() \ |
6499 | +({ \ |
6500 | + unsigned long flags = __raw_local_save_flags(); \ |
6501 | + \ |
6502 | + raw_irqs_disabled_flags(flags); \ |
6503 | +}) |
6504 | #endif /* __ASSEMBLY__ */ |
6505 | |
6506 | /* |
6507 | --- a/include/asm-x86/mach-xen/asm/mmu_context_32.h |
6508 | +++ b/include/asm-x86/mach-xen/asm/mmu_context_32.h |
6509 | @@ -27,14 +27,13 @@ |
6510 | static inline void __prepare_arch_switch(void) |
6511 | { |
6512 | /* |
6513 | - * Save away %fs and %gs. No need to save %es and %ds, as those |
6514 | - * are always kernel segments while inside the kernel. Must |
6515 | - * happen before reload of cr3/ldt (i.e., not in __switch_to). |
6516 | + * Save away %fs. No need to save %gs, as it was saved on the |
6517 | + * stack on entry. No need to save %es and %ds, as those are |
6518 | + * always kernel segments while inside the kernel. |
6519 | */ |
6520 | - asm volatile ( "mov %%fs,%0 ; mov %%gs,%1" |
6521 | - : "=m" (current->thread.fs), |
6522 | - "=m" (current->thread.gs)); |
6523 | - asm volatile ( "movl %0,%%fs ; movl %0,%%gs" |
6524 | + asm volatile ( "mov %%fs,%0" |
6525 | + : "=m" (current->thread.fs)); |
6526 | + asm volatile ( "movl %0,%%fs" |
6527 | : : "r" (0) ); |
6528 | } |
6529 | |
6530 | @@ -89,14 +88,14 @@ |
6531 | * tlb flush IPI delivery. We must reload %cr3. |
6532 | */ |
6533 | load_cr3(next->pgd); |
6534 | - load_LDT_nolock(&next->context, cpu); |
6535 | + load_LDT_nolock(&next->context); |
6536 | } |
6537 | } |
6538 | #endif |
6539 | } |
6540 | |
6541 | -#define deactivate_mm(tsk, mm) \ |
6542 | - asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0)) |
6543 | +#define deactivate_mm(tsk, mm) \ |
6544 | + asm("movl %0,%%fs": :"r" (0)); |
6545 | |
6546 | static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) |
6547 | { |
6548 | --- a/include/asm-x86/mach-xen/asm/pgtable-2level.h |
6549 | +++ b/include/asm-x86/mach-xen/asm/pgtable-2level.h |
6550 | @@ -1,8 +1,6 @@ |
6551 | #ifndef _I386_PGTABLE_2LEVEL_H |
6552 | #define _I386_PGTABLE_2LEVEL_H |
6553 | |
6554 | -#include <asm-generic/pgtable-nopmd.h> |
6555 | - |
6556 | #define pte_ERROR(e) \ |
6557 | printk("%s:%d: bad pte %08lx (pfn %05lx).\n", __FILE__, __LINE__, \ |
6558 | __pte_val(e), pte_pfn(e)) |
6559 | @@ -23,26 +21,14 @@ |
6560 | set_pte((ptep), (pteval)); \ |
6561 | } while (0) |
6562 | |
6563 | -#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval) |
6564 | - |
6565 | #define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval)) |
6566 | |
6567 | +#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval) |
6568 | + |
6569 | #define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) |
6570 | #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) |
6571 | |
6572 | -#define pte_none(x) (!(x).pte_low) |
6573 | - |
6574 | -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR |
6575 | -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
6576 | -{ |
6577 | - pte_t pte = *ptep; |
6578 | - if (!pte_none(pte)) { |
6579 | - if ((mm != &init_mm) || |
6580 | - HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) |
6581 | - pte = __pte_ma(xchg(&ptep->pte_low, 0)); |
6582 | - } |
6583 | - return pte; |
6584 | -} |
6585 | +#define raw_ptep_get_and_clear(xp, pte) __pte_ma(xchg(&(xp)->pte_low, 0)) |
6586 | |
6587 | #define __HAVE_ARCH_PTEP_CLEAR_FLUSH |
6588 | #define ptep_clear_flush(vma, addr, ptep) \ |
6589 | @@ -69,6 +55,7 @@ |
6590 | __pte_mfn(_pte)) |
6591 | |
6592 | #define pte_page(_pte) pfn_to_page(pte_pfn(_pte)) |
6593 | +#define pte_none(x) (!(x).pte_low) |
6594 | |
6595 | #define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) |
6596 | #define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) |
6597 | --- a/include/asm-x86/mach-xen/asm/pgtable-3level.h |
6598 | +++ b/include/asm-x86/mach-xen/asm/pgtable-3level.h |
6599 | @@ -1,8 +1,6 @@ |
6600 | #ifndef _I386_PGTABLE_3LEVEL_H |
6601 | #define _I386_PGTABLE_3LEVEL_H |
6602 | |
6603 | -#include <asm-generic/pgtable-nopud.h> |
6604 | - |
6605 | /* |
6606 | * Intel Physical Address Extension (PAE) Mode - three-level page |
6607 | * tables on PPro+ CPUs. |
6608 | @@ -75,6 +73,23 @@ |
6609 | xen_l3_entry_update((pudptr), (pudval)) |
6610 | |
6611 | /* |
6612 | + * For PTEs and PDEs, we must clear the P-bit first when clearing a page table |
6613 | + * entry, so clear the bottom half first and enforce ordering with a compiler |
6614 | + * barrier. |
6615 | + */ |
6616 | +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
6617 | +{ |
6618 | + if ((mm != current->mm && mm != &init_mm) |
6619 | + || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { |
6620 | + ptep->pte_low = 0; |
6621 | + smp_wmb(); |
6622 | + ptep->pte_high = 0; |
6623 | + } |
6624 | +} |
6625 | + |
6626 | +#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) |
6627 | + |
6628 | +/* |
6629 | * Pentium-II erratum A13: in PAE mode we explicitly have to flush |
6630 | * the TLB via cr3 if the top-level pgd is changed... |
6631 | * We do not let the generic code free and clear pgd entries due to |
6632 | @@ -93,45 +108,16 @@ |
6633 | #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ |
6634 | pmd_index(address)) |
6635 | |
6636 | -static inline int pte_none(pte_t pte) |
6637 | -{ |
6638 | - return !(pte.pte_low | pte.pte_high); |
6639 | -} |
6640 | - |
6641 | -/* |
6642 | - * For PTEs and PDEs, we must clear the P-bit first when clearing a page table |
6643 | - * entry, so clear the bottom half first and enforce ordering with a compiler |
6644 | - * barrier. |
6645 | - */ |
6646 | -static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
6647 | +static inline pte_t raw_ptep_get_and_clear(pte_t *ptep, pte_t res) |
6648 | { |
6649 | - if ((mm != current->mm && mm != &init_mm) |
6650 | - || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { |
6651 | - ptep->pte_low = 0; |
6652 | - smp_wmb(); |
6653 | + uint64_t val = __pte_val(res); |
6654 | + if (__cmpxchg64(ptep, val, 0) != val) { |
6655 | + /* xchg acts as a barrier before the setting of the high bits */ |
6656 | + res.pte_low = xchg(&ptep->pte_low, 0); |
6657 | + res.pte_high = ptep->pte_high; |
6658 | ptep->pte_high = 0; |
6659 | } |
6660 | -} |
6661 | - |
6662 | -#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) |
6663 | - |
6664 | -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR |
6665 | -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
6666 | -{ |
6667 | - pte_t pte = *ptep; |
6668 | - if (!pte_none(pte)) { |
6669 | - if ((mm != &init_mm) || |
6670 | - HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { |
6671 | - uint64_t val = __pte_val(pte); |
6672 | - if (__cmpxchg64(ptep, val, 0) != val) { |
6673 | - /* xchg acts as a barrier before the setting of the high bits */ |
6674 | - pte.pte_low = xchg(&ptep->pte_low, 0); |
6675 | - pte.pte_high = ptep->pte_high; |
6676 | - ptep->pte_high = 0; |
6677 | - } |
6678 | - } |
6679 | - } |
6680 | - return pte; |
6681 | + return res; |
6682 | } |
6683 | |
6684 | #define __HAVE_ARCH_PTEP_CLEAR_FLUSH |
6685 | @@ -160,6 +146,11 @@ |
6686 | |
6687 | #define pte_page(x) pfn_to_page(pte_pfn(x)) |
6688 | |
6689 | +static inline int pte_none(pte_t pte) |
6690 | +{ |
6691 | + return !(pte.pte_low | pte.pte_high); |
6692 | +} |
6693 | + |
6694 | #define __pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \ |
6695 | ((_pte).pte_high << (32-PAGE_SHIFT))) |
6696 | #define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \ |
6697 | --- a/include/asm-x86/mach-xen/asm/pgtable_32.h |
6698 | +++ b/include/asm-x86/mach-xen/asm/pgtable_32.h |
6699 | @@ -38,14 +38,14 @@ |
6700 | #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) |
6701 | extern unsigned long empty_zero_page[1024]; |
6702 | extern pgd_t *swapper_pg_dir; |
6703 | -extern kmem_cache_t *pgd_cache; |
6704 | -extern kmem_cache_t *pmd_cache; |
6705 | +extern struct kmem_cache *pgd_cache; |
6706 | +extern struct kmem_cache *pmd_cache; |
6707 | extern spinlock_t pgd_lock; |
6708 | extern struct page *pgd_list; |
6709 | |
6710 | -void pmd_ctor(void *, kmem_cache_t *, unsigned long); |
6711 | -void pgd_ctor(void *, kmem_cache_t *, unsigned long); |
6712 | -void pgd_dtor(void *, kmem_cache_t *, unsigned long); |
6713 | +void pmd_ctor(void *, struct kmem_cache *, unsigned long); |
6714 | +void pgd_ctor(void *, struct kmem_cache *, unsigned long); |
6715 | +void pgd_dtor(void *, struct kmem_cache *, unsigned long); |
6716 | void pgtable_cache_init(void); |
6717 | void paging_init(void); |
6718 | |
6719 | @@ -276,7 +276,6 @@ |
6720 | #define pte_update(mm, addr, ptep) do { } while (0) |
6721 | #define pte_update_defer(mm, addr, ptep) do { } while (0) |
6722 | |
6723 | - |
6724 | /* |
6725 | * We only update the dirty/accessed state if we set |
6726 | * the dirty bit by hand in the kernel, since the hardware |
6727 | @@ -342,6 +341,19 @@ |
6728 | __young; \ |
6729 | }) |
6730 | |
6731 | +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR |
6732 | +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
6733 | +{ |
6734 | + pte_t pte = *ptep; |
6735 | + if (!pte_none(pte) |
6736 | + && (mm != &init_mm |
6737 | + || HYPERVISOR_update_va_mapping(addr, __pte(0), 0))) { |
6738 | + pte = raw_ptep_get_and_clear(ptep, pte); |
6739 | + pte_update(mm, addr, ptep); |
6740 | + } |
6741 | + return pte; |
6742 | +} |
6743 | + |
6744 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL |
6745 | #define ptep_get_and_clear_full(mm, addr, ptep, full) \ |
6746 | ((full) ? ({ \ |
6747 | --- a/include/asm-x86/mach-xen/asm/pgtable_64.h |
6748 | +++ b/include/asm-x86/mach-xen/asm/pgtable_64.h |
6749 | @@ -236,19 +236,18 @@ |
6750 | |
6751 | static inline unsigned long pgd_bad(pgd_t pgd) |
6752 | { |
6753 | - unsigned long val = __pgd_val(pgd); |
6754 | - val &= ~PTE_MASK; |
6755 | - val &= ~(_PAGE_USER | _PAGE_DIRTY); |
6756 | - return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED); |
6757 | + return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); |
6758 | } |
6759 | |
6760 | -static inline unsigned long pud_bad(pud_t pud) |
6761 | -{ |
6762 | - unsigned long val = __pud_val(pud); |
6763 | - val &= ~PTE_MASK; |
6764 | - val &= ~(_PAGE_USER | _PAGE_DIRTY); |
6765 | - return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED); |
6766 | -} |
6767 | +static inline unsigned long pud_bad(pud_t pud) |
6768 | +{ |
6769 | + return __pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); |
6770 | +} |
6771 | + |
6772 | +static inline unsigned long pmd_bad(pmd_t pmd) |
6773 | +{ |
6774 | + return __pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); |
6775 | +} |
6776 | |
6777 | #define set_pte_at(_mm,addr,ptep,pteval) do { \ |
6778 | if (((_mm) != current->mm && (_mm) != &init_mm) || \ |
6779 | @@ -404,8 +403,6 @@ |
6780 | #define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT) |
6781 | #endif |
6782 | #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) |
6783 | -#define pmd_bad(x) ((__pmd_val(x) & ~(PTE_MASK | _PAGE_USER | _PAGE_PRESENT)) \ |
6784 | - != (_KERNPG_TABLE & ~(_PAGE_USER | _PAGE_PRESENT))) |
6785 | #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot))) |
6786 | #define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) |
6787 | |
6788 | --- a/include/asm-x86/mach-xen/asm/processor_32.h |
6789 | +++ b/include/asm-x86/mach-xen/asm/processor_32.h |
6790 | @@ -20,6 +20,7 @@ |
6791 | #include <linux/threads.h> |
6792 | #include <asm/percpu.h> |
6793 | #include <linux/cpumask.h> |
6794 | +#include <linux/init.h> |
6795 | #include <xen/interface/physdev.h> |
6796 | |
6797 | /* flag for disabling the tsc */ |
6798 | @@ -73,6 +74,7 @@ |
6799 | #endif |
6800 | unsigned char x86_max_cores; /* cpuid returned max cores value */ |
6801 | unsigned char apicid; |
6802 | + unsigned short x86_clflush_size; |
6803 | #ifdef CONFIG_SMP |
6804 | unsigned char booted_cores; /* number of cores as seen by OS */ |
6805 | __u8 phys_proc_id; /* Physical processor id. */ |
6806 | @@ -114,6 +116,8 @@ |
6807 | extern int cpu_llc_id[NR_CPUS]; |
6808 | extern char ignore_fpu_irq; |
6809 | |
6810 | +void __init cpu_detect(struct cpuinfo_x86 *c); |
6811 | + |
6812 | extern void identify_cpu(struct cpuinfo_x86 *); |
6813 | extern void print_cpu_info(struct cpuinfo_x86 *); |
6814 | extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); |
6815 | @@ -146,8 +150,8 @@ |
6816 | #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ |
6817 | #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ |
6818 | |
6819 | -static inline void __cpuid(unsigned int *eax, unsigned int *ebx, |
6820 | - unsigned int *ecx, unsigned int *edx) |
6821 | +static inline fastcall void xen_cpuid(unsigned int *eax, unsigned int *ebx, |
6822 | + unsigned int *ecx, unsigned int *edx) |
6823 | { |
6824 | /* ecx is often an input as well as an output. */ |
6825 | __asm__(XEN_CPUID |
6826 | @@ -158,59 +162,6 @@ |
6827 | : "0" (*eax), "2" (*ecx)); |
6828 | } |
6829 | |
6830 | -/* |
6831 | - * Generic CPUID function |
6832 | - * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx |
6833 | - * resulting in stale register contents being returned. |
6834 | - */ |
6835 | -static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) |
6836 | -{ |
6837 | - *eax = op; |
6838 | - *ecx = 0; |
6839 | - __cpuid(eax, ebx, ecx, edx); |
6840 | -} |
6841 | - |
6842 | -/* Some CPUID calls want 'count' to be placed in ecx */ |
6843 | -static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, |
6844 | - int *edx) |
6845 | -{ |
6846 | - *eax = op; |
6847 | - *ecx = count; |
6848 | - __cpuid(eax, ebx, ecx, edx); |
6849 | -} |
6850 | - |
6851 | -/* |
6852 | - * CPUID functions returning a single datum |
6853 | - */ |
6854 | -static inline unsigned int cpuid_eax(unsigned int op) |
6855 | -{ |
6856 | - unsigned int eax, ebx, ecx, edx; |
6857 | - |
6858 | - cpuid(op, &eax, &ebx, &ecx, &edx); |
6859 | - return eax; |
6860 | -} |
6861 | -static inline unsigned int cpuid_ebx(unsigned int op) |
6862 | -{ |
6863 | - unsigned int eax, ebx, ecx, edx; |
6864 | - |
6865 | - cpuid(op, &eax, &ebx, &ecx, &edx); |
6866 | - return ebx; |
6867 | -} |
6868 | -static inline unsigned int cpuid_ecx(unsigned int op) |
6869 | -{ |
6870 | - unsigned int eax, ebx, ecx, edx; |
6871 | - |
6872 | - cpuid(op, &eax, &ebx, &ecx, &edx); |
6873 | - return ecx; |
6874 | -} |
6875 | -static inline unsigned int cpuid_edx(unsigned int op) |
6876 | -{ |
6877 | - unsigned int eax, ebx, ecx, edx; |
6878 | - |
6879 | - cpuid(op, &eax, &ebx, &ecx, &edx); |
6880 | - return edx; |
6881 | -} |
6882 | - |
6883 | #define load_cr3(pgdir) write_cr3(__pa(pgdir)) |
6884 | |
6885 | /* |
6886 | @@ -480,9 +431,9 @@ |
6887 | .vm86_info = NULL, \ |
6888 | .sysenter_cs = __KERNEL_CS, \ |
6889 | .io_bitmap_ptr = NULL, \ |
6890 | + .gs = __KERNEL_PDA, \ |
6891 | } |
6892 | |
6893 | -#ifndef CONFIG_X86_NO_TSS |
6894 | /* |
6895 | * Note that the .io_bitmap member must be extra-big. This is because |
6896 | * the CPU will access an additional byte beyond the end of the IO |
6897 | @@ -497,26 +448,9 @@ |
6898 | .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \ |
6899 | } |
6900 | |
6901 | -static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread) |
6902 | -{ |
6903 | - tss->esp0 = thread->esp0; |
6904 | - /* This can only happen when SEP is enabled, no need to test "SEP"arately */ |
6905 | - if (unlikely(tss->ss1 != thread->sysenter_cs)) { |
6906 | - tss->ss1 = thread->sysenter_cs; |
6907 | - wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); |
6908 | - } |
6909 | -} |
6910 | -#define load_esp0(tss, thread) \ |
6911 | - __load_esp0(tss, thread) |
6912 | -#else |
6913 | -#define load_esp0(tss, thread) do { \ |
6914 | - if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \ |
6915 | - BUG(); \ |
6916 | -} while (0) |
6917 | -#endif |
6918 | - |
6919 | #define start_thread(regs, new_eip, new_esp) do { \ |
6920 | - __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \ |
6921 | + __asm__("movl %0,%%fs": :"r" (0)); \ |
6922 | + regs->xgs = 0; \ |
6923 | set_fs(USER_DS); \ |
6924 | regs->xds = __USER_DS; \ |
6925 | regs->xes = __USER_DS; \ |
6926 | @@ -526,26 +460,6 @@ |
6927 | regs->esp = new_esp; \ |
6928 | } while (0) |
6929 | |
6930 | -/* |
6931 | - * These special macros can be used to get or set a debugging register |
6932 | - */ |
6933 | -#define get_debugreg(var, register) \ |
6934 | - (var) = HYPERVISOR_get_debugreg((register)) |
6935 | -#define set_debugreg(value, register) \ |
6936 | - WARN_ON(HYPERVISOR_set_debugreg((register), (value))) |
6937 | - |
6938 | -/* |
6939 | - * Set IOPL bits in EFLAGS from given mask |
6940 | - */ |
6941 | -static inline void set_iopl_mask(unsigned mask) |
6942 | -{ |
6943 | - struct physdev_set_iopl set_iopl; |
6944 | - |
6945 | - /* Force the change at ring 0. */ |
6946 | - set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; |
6947 | - WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl)); |
6948 | -} |
6949 | - |
6950 | /* Forward declaration, a strange C thing */ |
6951 | struct task_struct; |
6952 | struct mm_struct; |
6953 | @@ -637,6 +551,105 @@ |
6954 | |
6955 | #define cpu_relax() rep_nop() |
6956 | |
6957 | +#define paravirt_enabled() 0 |
6958 | +#define __cpuid xen_cpuid |
6959 | + |
6960 | +#ifndef CONFIG_X86_NO_TSS |
6961 | +static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread) |
6962 | +{ |
6963 | + tss->esp0 = thread->esp0; |
6964 | + /* This can only happen when SEP is enabled, no need to test "SEP"arately */ |
6965 | + if (unlikely(tss->ss1 != thread->sysenter_cs)) { |
6966 | + tss->ss1 = thread->sysenter_cs; |
6967 | + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); |
6968 | + } |
6969 | +} |
6970 | +#define load_esp0(tss, thread) \ |
6971 | + __load_esp0(tss, thread) |
6972 | +#else |
6973 | +#define load_esp0(tss, thread) do { \ |
6974 | + if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \ |
6975 | + BUG(); \ |
6976 | +} while (0) |
6977 | +#endif |
6978 | + |
6979 | + |
6980 | +/* |
6981 | + * These special macros can be used to get or set a debugging register |
6982 | + */ |
6983 | +#define get_debugreg(var, register) \ |
6984 | + (var) = HYPERVISOR_get_debugreg(register) |
6985 | +#define set_debugreg(value, register) \ |
6986 | + WARN_ON(HYPERVISOR_set_debugreg(register, value)) |
6987 | + |
6988 | +#define set_iopl_mask xen_set_iopl_mask |
6989 | + |
6990 | +/* |
6991 | + * Set IOPL bits in EFLAGS from given mask |
6992 | + */ |
6993 | +static inline void xen_set_iopl_mask(unsigned mask) |
6994 | +{ |
6995 | + struct physdev_set_iopl set_iopl; |
6996 | + |
6997 | + /* Force the change at ring 0. */ |
6998 | + set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; |
6999 | + WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl)); |
7000 | +} |
7001 | + |
7002 | + |
7003 | +/* |
7004 | + * Generic CPUID function |
7005 | + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx |
7006 | + * resulting in stale register contents being returned. |
7007 | + */ |
7008 | +static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) |
7009 | +{ |
7010 | + *eax = op; |
7011 | + *ecx = 0; |
7012 | + __cpuid(eax, ebx, ecx, edx); |
7013 | +} |
7014 | + |
7015 | +/* Some CPUID calls want 'count' to be placed in ecx */ |
7016 | +static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, |
7017 | + int *edx) |
7018 | +{ |
7019 | + *eax = op; |
7020 | + *ecx = count; |
7021 | + __cpuid(eax, ebx, ecx, edx); |
7022 | +} |
7023 | + |
7024 | +/* |
7025 | + * CPUID functions returning a single datum |
7026 | + */ |
7027 | +static inline unsigned int cpuid_eax(unsigned int op) |
7028 | +{ |
7029 | + unsigned int eax, ebx, ecx, edx; |
7030 | + |
7031 | + cpuid(op, &eax, &ebx, &ecx, &edx); |
7032 | + return eax; |
7033 | +} |
7034 | +static inline unsigned int cpuid_ebx(unsigned int op) |
7035 | +{ |
7036 | + unsigned int eax, ebx, ecx, edx; |
7037 | + |
7038 | + cpuid(op, &eax, &ebx, &ecx, &edx); |
7039 | + return ebx; |
7040 | +} |
7041 | +static inline unsigned int cpuid_ecx(unsigned int op) |
7042 | +{ |
7043 | + unsigned int eax, ebx, ecx, edx; |
7044 | + |
7045 | + cpuid(op, &eax, &ebx, &ecx, &edx); |
7046 | + return ecx; |
7047 | +} |
7048 | +static inline unsigned int cpuid_edx(unsigned int op) |
7049 | +{ |
7050 | + unsigned int eax, ebx, ecx, edx; |
7051 | + |
7052 | + cpuid(op, &eax, &ebx, &ecx, &edx); |
7053 | + return edx; |
7054 | +} |
7055 | + |
7056 | /* generic versions from gas */ |
7057 | #define GENERIC_NOP1 ".byte 0x90\n" |
7058 | #define GENERIC_NOP2 ".byte 0x89,0xf6\n" |
7059 | @@ -736,4 +749,8 @@ |
7060 | extern void enable_sep_cpu(void); |
7061 | extern int sysenter_setup(void); |
7062 | |
7063 | +extern int init_gdt(int cpu, struct task_struct *idle); |
7064 | +extern void cpu_set_gdt(int); |
7065 | +extern void secondary_cpu_init(void); |
7066 | + |
7067 | #endif /* __ASM_I386_PROCESSOR_H */ |
7068 | --- a/include/asm-x86/mach-xen/asm/processor_64.h |
7069 | +++ b/include/asm-x86/mach-xen/asm/processor_64.h |
7070 | @@ -484,6 +484,14 @@ |
7071 | : :"a" (eax), "c" (ecx)); |
7072 | } |
7073 | |
7074 | +static inline void __sti_mwait(unsigned long eax, unsigned long ecx) |
7075 | +{ |
7076 | + /* "mwait %eax,%ecx;" */ |
7077 | + asm volatile( |
7078 | + "sti; .byte 0x0f,0x01,0xc9;" |
7079 | + : :"a" (eax), "c" (ecx)); |
7080 | +} |
7081 | + |
7082 | extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); |
7083 | |
7084 | #define stack_current() \ |
7085 | --- a/include/asm-x86/mach-xen/asm/segment_32.h |
7086 | +++ b/include/asm-x86/mach-xen/asm/segment_32.h |
7087 | @@ -39,7 +39,7 @@ |
7088 | * 25 - APM BIOS support |
7089 | * |
7090 | * 26 - ESPFIX small SS |
7091 | - * 27 - unused |
7092 | + * 27 - PDA [ per-cpu private data area ] |
7093 | * 28 - unused |
7094 | * 29 - unused |
7095 | * 30 - unused |
7096 | @@ -74,6 +74,9 @@ |
7097 | #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) |
7098 | #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) |
7099 | |
7100 | +#define GDT_ENTRY_PDA (GDT_ENTRY_KERNEL_BASE + 15) |
7101 | +#define __KERNEL_PDA (GDT_ENTRY_PDA * 8) |
7102 | + |
7103 | #define GDT_ENTRY_DOUBLEFAULT_TSS 31 |
7104 | |
7105 | /* |
7106 | --- a/include/asm-x86/mach-xen/asm/smp_32.h |
7107 | +++ b/include/asm-x86/mach-xen/asm/smp_32.h |
7108 | @@ -8,6 +8,7 @@ |
7109 | #include <linux/kernel.h> |
7110 | #include <linux/threads.h> |
7111 | #include <linux/cpumask.h> |
7112 | +#include <asm/pda.h> |
7113 | #endif |
7114 | |
7115 | #ifdef CONFIG_X86_LOCAL_APIC |
7116 | @@ -56,7 +57,7 @@ |
7117 | * from the initial startup. We map APIC_BASE very early in page_setup(), |
7118 | * so this is correct in the x86 case. |
7119 | */ |
7120 | -#define raw_smp_processor_id() (current_thread_info()->cpu) |
7121 | +#define raw_smp_processor_id() (read_pda(cpu_number)) |
7122 | |
7123 | extern cpumask_t cpu_possible_map; |
7124 | #define cpu_callin_map cpu_possible_map |
7125 | --- a/include/asm-x86/mach-xen/asm/smp_64.h |
7126 | +++ b/include/asm-x86/mach-xen/asm/smp_64.h |
7127 | @@ -88,11 +88,6 @@ |
7128 | extern u8 bios_cpu_apicid[]; |
7129 | |
7130 | #ifdef CONFIG_X86_LOCAL_APIC |
7131 | -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) |
7132 | -{ |
7133 | - return cpus_addr(cpumask)[0]; |
7134 | -} |
7135 | - |
7136 | static inline int cpu_present_to_apicid(int mps_cpu) |
7137 | { |
7138 | if (mps_cpu < NR_CPUS) |
7139 | @@ -127,13 +122,6 @@ |
7140 | #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] |
7141 | #else |
7142 | #define cpu_physical_id(cpu) boot_cpu_id |
7143 | -static inline int smp_call_function_single(int cpuid, void (*func) (void *info), |
7144 | - void *info, int retry, int wait) |
7145 | -{ |
7146 | - /* Disable interrupts here? */ |
7147 | - func(info); |
7148 | - return 0; |
7149 | -} |
7150 | #endif /* !CONFIG_SMP */ |
7151 | #endif |
7152 | |
7153 | --- a/include/asm-x86/mach-xen/asm/system_32.h |
7154 | +++ b/include/asm-x86/mach-xen/asm/system_32.h |
7155 | @@ -139,17 +139,17 @@ |
7156 | #define write_cr4(x) \ |
7157 | __asm__ __volatile__("movl %0,%%cr4": :"r" (x)) |
7158 | |
7159 | -/* |
7160 | - * Clear and set 'TS' bit respectively |
7161 | - */ |
7162 | +#define wbinvd() \ |
7163 | + __asm__ __volatile__ ("wbinvd": : :"memory") |
7164 | + |
7165 | +/* Clear the 'TS' bit */ |
7166 | #define clts() (HYPERVISOR_fpu_taskswitch(0)) |
7167 | + |
7168 | +/* Set the 'TS' bit */ |
7169 | #define stts() (HYPERVISOR_fpu_taskswitch(1)) |
7170 | |
7171 | #endif /* __KERNEL__ */ |
7172 | |
7173 | -#define wbinvd() \ |
7174 | - __asm__ __volatile__ ("wbinvd": : :"memory") |
7175 | - |
7176 | static inline unsigned long get_limit(unsigned long segment) |
7177 | { |
7178 | unsigned long __limit; |
7179 | --- a/kernel/kexec.c |
7180 | +++ b/kernel/kexec.c |
7181 | @@ -353,7 +353,7 @@ |
7182 | if (limit == ~0UL) |
7183 | address_bits = BITS_PER_LONG; |
7184 | else |
7185 | - address_bits = long_log2(limit); |
7186 | + address_bits = ilog2(limit); |
7187 | |
7188 | if (xen_limit_pages_to_max_mfn(pages, order, address_bits) < 0) { |
7189 | __free_pages(pages, order); |
7190 | --- a/net/core/dev.c |
7191 | +++ b/net/core/dev.c |
7192 | @@ -1597,10 +1597,10 @@ |
7193 | goto out; |
7194 | switch (skb->nh.iph->protocol) { |
7195 | case IPPROTO_TCP: |
7196 | - skb->csum = offsetof(struct tcphdr, check); |
7197 | + skb->csum_offset = offsetof(struct tcphdr, check); |
7198 | break; |
7199 | case IPPROTO_UDP: |
7200 | - skb->csum = offsetof(struct udphdr, check); |
7201 | + skb->csum_offset = offsetof(struct udphdr, check); |
7202 | break; |
7203 | default: |
7204 | if (net_ratelimit()) |
7205 | @@ -1609,7 +1609,7 @@ |
7206 | " %d packet", skb->nh.iph->protocol); |
7207 | goto out; |
7208 | } |
7209 | - if ((skb->h.raw + skb->csum + 2) > skb->tail) |
7210 | + if ((skb->h.raw + skb->csum_offset + 2) > skb->tail) |
7211 | goto out; |
7212 | skb->ip_summed = CHECKSUM_PARTIAL; |
7213 | skb->proto_csum_blank = 0; |