Annotation of /trunk/kernel26-xen/patches-2.6.25-r1/1021-2.6.25-xen-patch-2.6.20.patch
Parent Directory | Revision Log
Revision 609 -
(hide annotations)
(download)
Fri May 23 17:35:37 2008 UTC (16 years, 4 months ago) by niro
File size: 199019 byte(s)
Fri May 23 17:35:37 2008 UTC (16 years, 4 months ago) by niro
File size: 199019 byte(s)
-using opensuse xen patchset, updated kernel configs
1 | niro | 609 | From: www.kernel.org |
2 | Subject: Linux 2.6.20 | ||
3 | Patch-mainline: 2.6.20 | ||
4 | |||
5 | Automatically created from "patches.kernel.org/patch-2.6.20" by xen-port-patches.py | ||
6 | |||
7 | Acked-by: jbeulich@novell.com | ||
8 | |||
9 | --- | ||
10 | arch/x86/Kconfig | 2 | ||
11 | arch/x86/kernel/asm-offsets_32.c | 6 | ||
12 | arch/x86/kernel/cpu/common-xen.c | 286 ++++--- | ||
13 | arch/x86/kernel/cpu/mtrr/main-xen.c | 5 | ||
14 | arch/x86/kernel/e820_32-xen.c | 1000 ++++++++++++++++++++++++++ | ||
15 | arch/x86/kernel/entry_32-xen.S | 387 ++++------ | ||
16 | arch/x86/kernel/entry_64-xen.S | 69 - | ||
17 | arch/x86/kernel/genapic_64-xen.c | 8 | ||
18 | arch/x86/kernel/head64-xen.c | 5 | ||
19 | arch/x86/kernel/head_32-xen.S | 63 + | ||
20 | arch/x86/kernel/io_apic_32-xen.c | 68 - | ||
21 | arch/x86/kernel/io_apic_64-xen.c | 133 ++- | ||
22 | arch/x86/kernel/irq_64-xen.c | 2 | ||
23 | arch/x86/kernel/ldt_32-xen.c | 4 | ||
24 | arch/x86/kernel/microcode-xen.c | 6 | ||
25 | arch/x86/kernel/mpparse_32-xen.c | 12 | ||
26 | arch/x86/kernel/mpparse_64-xen.c | 2 | ||
27 | arch/x86/kernel/pci-dma_32-xen.c | 10 | ||
28 | arch/x86/kernel/process_32-xen.c | 56 - | ||
29 | arch/x86/kernel/process_64-xen.c | 34 | ||
30 | arch/x86/kernel/quirks-xen.c | 61 + | ||
31 | arch/x86/kernel/setup_32-xen.c | 974 ------------------------- | ||
32 | arch/x86/kernel/setup_64-xen.c | 24 | ||
33 | arch/x86/kernel/smp_32-xen.c | 4 | ||
34 | arch/x86/kernel/smp_64-xen.c | 5 | ||
35 | arch/x86/kernel/time_32-xen.c | 17 | ||
36 | arch/x86/kernel/traps_32-xen.c | 204 +---- | ||
37 | arch/x86/kernel/traps_64-xen.c | 139 --- | ||
38 | arch/x86/kernel/vmlinux_32.lds.S | 6 | ||
39 | arch/x86/kernel/vsyscall_64-xen.c | 7 | ||
40 | arch/x86/kvm/Kconfig | 1 | ||
41 | arch/x86/mm/fault_32-xen.c | 12 | ||
42 | arch/x86/mm/fault_64-xen.c | 10 | ||
43 | arch/x86/mm/highmem_32-xen.c | 26 | ||
44 | arch/x86/mm/init_32-xen.c | 20 | ||
45 | arch/x86/mm/init_64-xen.c | 7 | ||
46 | arch/x86/mm/pageattr_64-xen.c | 58 - | ||
47 | arch/x86/mm/pgtable_32-xen.c | 6 | ||
48 | arch/x86/pci/irq-xen.c | 4 | ||
49 | drivers/xen/balloon/balloon.c | 6 | ||
50 | drivers/xen/blkback/blkback.c | 1 | ||
51 | drivers/xen/blkback/interface.c | 2 | ||
52 | drivers/xen/blkfront/blkfront.c | 8 | ||
53 | drivers/xen/blktap/blktap.c | 1 | ||
54 | drivers/xen/blktap/interface.c | 2 | ||
55 | drivers/xen/char/mem.c | 4 | ||
56 | drivers/xen/console/console.c | 13 | ||
57 | drivers/xen/core/reboot.c | 10 | ||
58 | drivers/xen/core/smpboot.c | 21 | ||
59 | drivers/xen/fbfront/xenfb.c | 1 | ||
60 | drivers/xen/netback/loopback.c | 1 | ||
61 | drivers/xen/pciback/conf_space_header.c | 4 | ||
62 | drivers/xen/pciback/pciback.h | 2 | ||
63 | drivers/xen/pciback/pciback_ops.c | 6 | ||
64 | drivers/xen/pciback/xenbus.c | 3 | ||
65 | drivers/xen/sfc_netfront/accel_vi.c | 4 | ||
66 | drivers/xen/tpmback/interface.c | 2 | ||
67 | drivers/xen/xenbus/xenbus_comms.c | 4 | ||
68 | drivers/xen/xenbus/xenbus_probe.c | 2 | ||
69 | include/asm-x86/mach-xen/asm/desc_32.h | 100 +- | ||
70 | include/asm-x86/mach-xen/asm/desc_64.h | 53 - | ||
71 | include/asm-x86/mach-xen/asm/dma-mapping_32.h | 4 | ||
72 | include/asm-x86/mach-xen/asm/dma-mapping_64.h | 8 | ||
73 | include/asm-x86/mach-xen/asm/fixmap_32.h | 5 | ||
74 | include/asm-x86/mach-xen/asm/hypervisor.h | 9 | ||
75 | include/asm-x86/mach-xen/asm/io_32.h | 4 | ||
76 | include/asm-x86/mach-xen/asm/irqflags_32.h | 68 + | ||
77 | include/asm-x86/mach-xen/asm/mmu_context_32.h | 19 | ||
78 | include/asm-x86/mach-xen/asm/pgtable-2level.h | 21 | ||
79 | include/asm-x86/mach-xen/asm/pgtable-3level.h | 67 - | ||
80 | include/asm-x86/mach-xen/asm/pgtable_32.h | 24 | ||
81 | include/asm-x86/mach-xen/asm/pgtable_64.h | 23 | ||
82 | include/asm-x86/mach-xen/asm/processor_32.h | 207 ++--- | ||
83 | include/asm-x86/mach-xen/asm/processor_64.h | 8 | ||
84 | include/asm-x86/mach-xen/asm/segment_32.h | 5 | ||
85 | include/asm-x86/mach-xen/asm/smp_32.h | 3 | ||
86 | include/asm-x86/mach-xen/asm/smp_64.h | 12 | ||
87 | include/asm-x86/mach-xen/asm/system_32.h | 12 | ||
88 | kernel/kexec.c | 2 | ||
89 | net/core/dev.c | 6 | ||
90 | 80 files changed, 2263 insertions(+), 2237 deletions(-) | ||
91 | |||
92 | --- a/arch/x86/Kconfig | ||
93 | +++ b/arch/x86/Kconfig | ||
94 | @@ -1220,7 +1220,7 @@ | ||
95 | |||
96 | config RELOCATABLE | ||
97 | bool "Build a relocatable kernel (EXPERIMENTAL)" | ||
98 | - depends on EXPERIMENTAL | ||
99 | + depends on EXPERIMENTAL && !X86_XEN | ||
100 | help | ||
101 | This builds a kernel image that retains relocation information | ||
102 | so it can be loaded someplace besides the default 1MB. | ||
103 | --- a/arch/x86/kernel/asm-offsets_32.c | ||
104 | +++ b/arch/x86/kernel/asm-offsets_32.c | ||
105 | @@ -61,6 +61,7 @@ | ||
106 | OFFSET(TI_exec_domain, thread_info, exec_domain); | ||
107 | OFFSET(TI_flags, thread_info, flags); | ||
108 | OFFSET(TI_status, thread_info, status); | ||
109 | + OFFSET(TI_cpu, thread_info, cpu); | ||
110 | OFFSET(TI_preempt_count, thread_info, preempt_count); | ||
111 | OFFSET(TI_addr_limit, thread_info, addr_limit); | ||
112 | OFFSET(TI_restart_block, thread_info, restart_block); | ||
113 | @@ -115,6 +116,11 @@ | ||
114 | |||
115 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); | ||
116 | |||
117 | +#ifdef CONFIG_XEN | ||
118 | + BLANK(); | ||
119 | + OFFSET(XEN_START_mfn_list, start_info, mfn_list); | ||
120 | +#endif | ||
121 | + | ||
122 | #ifdef CONFIG_PARAVIRT | ||
123 | BLANK(); | ||
124 | OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); | ||
125 | --- a/arch/x86/kernel/cpu/common-xen.c | ||
126 | +++ b/arch/x86/kernel/cpu/common-xen.c | ||
127 | @@ -22,6 +22,7 @@ | ||
128 | #define phys_pkg_id(a,b) a | ||
129 | #endif | ||
130 | #endif | ||
131 | +#include <asm/pda.h> | ||
132 | #include <asm/hypervisor.h> | ||
133 | |||
134 | #include "cpu.h" | ||
135 | @@ -29,10 +30,8 @@ | ||
136 | DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); | ||
137 | EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); | ||
138 | |||
139 | -#ifndef CONFIG_XEN | ||
140 | -DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); | ||
141 | -EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); | ||
142 | -#endif | ||
143 | +struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly; | ||
144 | +EXPORT_SYMBOL(_cpu_pda); | ||
145 | |||
146 | static int cachesize_override __cpuinitdata = -1; | ||
147 | static int disable_x86_fxsr __cpuinitdata; | ||
148 | @@ -60,7 +59,7 @@ | ||
149 | .c_init = default_init, | ||
150 | .c_vendor = "Unknown", | ||
151 | }; | ||
152 | -static struct cpu_dev * this_cpu = &default_cpu; | ||
153 | +static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu; | ||
154 | |||
155 | static int __init cachesize_setup(char *str) | ||
156 | { | ||
157 | @@ -242,29 +241,14 @@ | ||
158 | return flag_is_changeable_p(X86_EFLAGS_ID); | ||
159 | } | ||
160 | |||
161 | -/* Do minimum CPU detection early. | ||
162 | - Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. | ||
163 | - The others are not touched to avoid unwanted side effects. | ||
164 | - | ||
165 | - WARNING: this function is only called on the BP. Don't add code here | ||
166 | - that is supposed to run on all CPUs. */ | ||
167 | -static void __init early_cpu_detect(void) | ||
168 | +void __init cpu_detect(struct cpuinfo_x86 *c) | ||
169 | { | ||
170 | - struct cpuinfo_x86 *c = &boot_cpu_data; | ||
171 | - | ||
172 | - c->x86_cache_alignment = 32; | ||
173 | - | ||
174 | - if (!have_cpuid_p()) | ||
175 | - return; | ||
176 | - | ||
177 | /* Get vendor name */ | ||
178 | cpuid(0x00000000, &c->cpuid_level, | ||
179 | (int *)&c->x86_vendor_id[0], | ||
180 | (int *)&c->x86_vendor_id[8], | ||
181 | (int *)&c->x86_vendor_id[4]); | ||
182 | |||
183 | - get_cpu_vendor(c, 1); | ||
184 | - | ||
185 | c->x86 = 4; | ||
186 | if (c->cpuid_level >= 0x00000001) { | ||
187 | u32 junk, tfms, cap0, misc; | ||
188 | @@ -281,6 +265,26 @@ | ||
189 | } | ||
190 | } | ||
191 | |||
192 | +/* Do minimum CPU detection early. | ||
193 | + Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. | ||
194 | + The others are not touched to avoid unwanted side effects. | ||
195 | + | ||
196 | + WARNING: this function is only called on the BP. Don't add code here | ||
197 | + that is supposed to run on all CPUs. */ | ||
198 | +static void __init early_cpu_detect(void) | ||
199 | +{ | ||
200 | + struct cpuinfo_x86 *c = &boot_cpu_data; | ||
201 | + | ||
202 | + c->x86_cache_alignment = 32; | ||
203 | + | ||
204 | + if (!have_cpuid_p()) | ||
205 | + return; | ||
206 | + | ||
207 | + cpu_detect(c); | ||
208 | + | ||
209 | + get_cpu_vendor(c, 1); | ||
210 | +} | ||
211 | + | ||
212 | static void __cpuinit generic_identify(struct cpuinfo_x86 * c) | ||
213 | { | ||
214 | u32 tfms, xlvl; | ||
215 | @@ -315,6 +319,8 @@ | ||
216 | #else | ||
217 | c->apicid = (ebx >> 24) & 0xFF; | ||
218 | #endif | ||
219 | + if (c->x86_capability[0] & (1<<19)) | ||
220 | + c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; | ||
221 | } else { | ||
222 | /* Have CPUID level 0 only - unheard of */ | ||
223 | c->x86 = 4; | ||
224 | @@ -379,6 +385,7 @@ | ||
225 | c->x86_vendor_id[0] = '\0'; /* Unset */ | ||
226 | c->x86_model_id[0] = '\0'; /* Unset */ | ||
227 | c->x86_max_cores = 1; | ||
228 | + c->x86_clflush_size = 32; | ||
229 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | ||
230 | |||
231 | if (!have_cpuid_p()) { | ||
232 | @@ -599,61 +606,23 @@ | ||
233 | #endif | ||
234 | } | ||
235 | |||
236 | -static void __cpuinit cpu_gdt_init(const struct Xgt_desc_struct *gdt_descr) | ||
237 | +/* Make sure %gs is initialized properly in idle threads */ | ||
238 | +struct pt_regs * __devinit idle_regs(struct pt_regs *regs) | ||
239 | { | ||
240 | - unsigned long frames[16]; | ||
241 | - unsigned long va; | ||
242 | - int f; | ||
243 | - | ||
244 | - for (va = gdt_descr->address, f = 0; | ||
245 | - va < gdt_descr->address + gdt_descr->size; | ||
246 | - va += PAGE_SIZE, f++) { | ||
247 | - frames[f] = virt_to_mfn(va); | ||
248 | - make_lowmem_page_readonly( | ||
249 | - (void *)va, XENFEAT_writable_descriptor_tables); | ||
250 | - } | ||
251 | - if (HYPERVISOR_set_gdt(frames, (gdt_descr->size + 1) / 8)) | ||
252 | - BUG(); | ||
253 | + memset(regs, 0, sizeof(struct pt_regs)); | ||
254 | + regs->xgs = __KERNEL_PDA; | ||
255 | + return regs; | ||
256 | } | ||
257 | |||
258 | -/* | ||
259 | - * cpu_init() initializes state that is per-CPU. Some data is already | ||
260 | - * initialized (naturally) in the bootstrap process, such as the GDT | ||
261 | - * and IDT. We reload them nevertheless, this function acts as a | ||
262 | - * 'CPU state barrier', nothing should get across. | ||
263 | - */ | ||
264 | -void __cpuinit cpu_init(void) | ||
265 | +static __cpuinit int alloc_gdt(int cpu) | ||
266 | { | ||
267 | - int cpu = smp_processor_id(); | ||
268 | -#ifndef CONFIG_X86_NO_TSS | ||
269 | - struct tss_struct * t = &per_cpu(init_tss, cpu); | ||
270 | -#endif | ||
271 | - struct thread_struct *thread = ¤t->thread; | ||
272 | - struct desc_struct *gdt; | ||
273 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | ||
274 | + struct desc_struct *gdt; | ||
275 | + struct i386_pda *pda; | ||
276 | |||
277 | - if (cpu_test_and_set(cpu, cpu_initialized)) { | ||
278 | - printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); | ||
279 | - for (;;) local_irq_enable(); | ||
280 | - } | ||
281 | - printk(KERN_INFO "Initializing CPU#%d\n", cpu); | ||
282 | - | ||
283 | - if (cpu_has_vme || cpu_has_de) | ||
284 | - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | ||
285 | - if (tsc_disable && cpu_has_tsc) { | ||
286 | - printk(KERN_NOTICE "Disabling TSC...\n"); | ||
287 | - /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ | ||
288 | - clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); | ||
289 | - set_in_cr4(X86_CR4_TSD); | ||
290 | - } | ||
291 | + gdt = (struct desc_struct *)cpu_gdt_descr->address; | ||
292 | + pda = cpu_pda(cpu); | ||
293 | |||
294 | -#ifndef CONFIG_XEN | ||
295 | - /* The CPU hotplug case */ | ||
296 | - if (cpu_gdt_descr->address) { | ||
297 | - gdt = (struct desc_struct *)cpu_gdt_descr->address; | ||
298 | - memset(gdt, 0, PAGE_SIZE); | ||
299 | - goto old_gdt; | ||
300 | - } | ||
301 | /* | ||
302 | * This is a horrible hack to allocate the GDT. The problem | ||
303 | * is that cpu_init() is called really early for the boot CPU | ||
304 | @@ -661,54 +630,141 @@ | ||
305 | * CPUs, when bootmem will have gone away | ||
306 | */ | ||
307 | if (NODE_DATA(0)->bdata->node_bootmem_map) { | ||
308 | - gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); | ||
309 | - /* alloc_bootmem_pages panics on failure, so no check */ | ||
310 | + BUG_ON(gdt != NULL || pda != NULL); | ||
311 | + | ||
312 | + gdt = alloc_bootmem_pages(PAGE_SIZE); | ||
313 | + pda = alloc_bootmem(sizeof(*pda)); | ||
314 | + /* alloc_bootmem(_pages) panics on failure, so no check */ | ||
315 | + | ||
316 | memset(gdt, 0, PAGE_SIZE); | ||
317 | + memset(pda, 0, sizeof(*pda)); | ||
318 | } else { | ||
319 | - gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); | ||
320 | - if (unlikely(!gdt)) { | ||
321 | - printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); | ||
322 | - for (;;) | ||
323 | - local_irq_enable(); | ||
324 | + /* GDT and PDA might already have been allocated if | ||
325 | + this is a CPU hotplug re-insertion. */ | ||
326 | + if (gdt == NULL) | ||
327 | + gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); | ||
328 | + | ||
329 | + if (pda == NULL) | ||
330 | + pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu)); | ||
331 | + | ||
332 | + if (unlikely(!gdt || !pda)) { | ||
333 | + free_pages((unsigned long)gdt, 0); | ||
334 | + kfree(pda); | ||
335 | + return 0; | ||
336 | } | ||
337 | } | ||
338 | -old_gdt: | ||
339 | + | ||
340 | + cpu_gdt_descr->address = (unsigned long)gdt; | ||
341 | + cpu_pda(cpu) = pda; | ||
342 | + | ||
343 | + return 1; | ||
344 | +} | ||
345 | + | ||
346 | +/* Initial PDA used by boot CPU */ | ||
347 | +struct i386_pda boot_pda = { | ||
348 | + ._pda = &boot_pda, | ||
349 | + .cpu_number = 0, | ||
350 | + .pcurrent = &init_task, | ||
351 | +}; | ||
352 | + | ||
353 | +static inline void set_kernel_gs(void) | ||
354 | +{ | ||
355 | + /* Set %gs for this CPU's PDA. Memory clobber is to create a | ||
356 | + barrier with respect to any PDA operations, so the compiler | ||
357 | + doesn't move any before here. */ | ||
358 | + asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory"); | ||
359 | +} | ||
360 | + | ||
361 | +/* Initialize the CPU's GDT and PDA. The boot CPU does this for | ||
362 | + itself, but secondaries find this done for them. */ | ||
363 | +__cpuinit int init_gdt(int cpu, struct task_struct *idle) | ||
364 | +{ | ||
365 | + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | ||
366 | + struct desc_struct *gdt; | ||
367 | + struct i386_pda *pda; | ||
368 | + | ||
369 | + /* For non-boot CPUs, the GDT and PDA should already have been | ||
370 | + allocated. */ | ||
371 | + if (!alloc_gdt(cpu)) { | ||
372 | + printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu); | ||
373 | + return 0; | ||
374 | + } | ||
375 | + | ||
376 | + gdt = (struct desc_struct *)cpu_gdt_descr->address; | ||
377 | + pda = cpu_pda(cpu); | ||
378 | + | ||
379 | + BUG_ON(gdt == NULL || pda == NULL); | ||
380 | + | ||
381 | /* | ||
382 | * Initialize the per-CPU GDT with the boot GDT, | ||
383 | * and set up the GDT descriptor: | ||
384 | */ | ||
385 | memcpy(gdt, cpu_gdt_table, GDT_SIZE); | ||
386 | + cpu_gdt_descr->size = GDT_SIZE - 1; | ||
387 | |||
388 | - /* Set up GDT entry for 16bit stack */ | ||
389 | - *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |= | ||
390 | - ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) | | ||
391 | - ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | | ||
392 | - (CPU_16BIT_STACK_SIZE - 1); | ||
393 | + pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a, | ||
394 | + (u32 *)&gdt[GDT_ENTRY_PDA].b, | ||
395 | + (unsigned long)pda, sizeof(*pda) - 1, | ||
396 | + 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */ | ||
397 | + | ||
398 | + memset(pda, 0, sizeof(*pda)); | ||
399 | + pda->_pda = pda; | ||
400 | + pda->cpu_number = cpu; | ||
401 | + pda->pcurrent = idle; | ||
402 | |||
403 | - cpu_gdt_descr->size = GDT_SIZE - 1; | ||
404 | - cpu_gdt_descr->address = (unsigned long)gdt; | ||
405 | -#else | ||
406 | - if (cpu == 0 && cpu_gdt_descr->address == 0) { | ||
407 | - gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); | ||
408 | - /* alloc_bootmem_pages panics on failure, so no check */ | ||
409 | - memset(gdt, 0, PAGE_SIZE); | ||
410 | + return 1; | ||
411 | +} | ||
412 | |||
413 | - memcpy(gdt, cpu_gdt_table, GDT_SIZE); | ||
414 | - | ||
415 | - cpu_gdt_descr->size = GDT_SIZE; | ||
416 | - cpu_gdt_descr->address = (unsigned long)gdt; | ||
417 | +void __cpuinit cpu_set_gdt(int cpu) | ||
418 | +{ | ||
419 | + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | ||
420 | + unsigned long va, frames[16]; | ||
421 | + int f; | ||
422 | + | ||
423 | + for (va = cpu_gdt_descr->address, f = 0; | ||
424 | + va < cpu_gdt_descr->address + cpu_gdt_descr->size; | ||
425 | + va += PAGE_SIZE, f++) { | ||
426 | + frames[f] = virt_to_mfn(va); | ||
427 | + make_lowmem_page_readonly( | ||
428 | + (void *)va, XENFEAT_writable_descriptor_tables); | ||
429 | } | ||
430 | + BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8)); | ||
431 | + | ||
432 | + set_kernel_gs(); | ||
433 | +} | ||
434 | + | ||
435 | +/* Common CPU init for both boot and secondary CPUs */ | ||
436 | +static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) | ||
437 | +{ | ||
438 | +#ifndef CONFIG_X86_NO_TSS | ||
439 | + struct tss_struct * t = &per_cpu(init_tss, cpu); | ||
440 | #endif | ||
441 | + struct thread_struct *thread = &curr->thread; | ||
442 | + | ||
443 | + if (cpu_test_and_set(cpu, cpu_initialized)) { | ||
444 | + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); | ||
445 | + for (;;) local_irq_enable(); | ||
446 | + } | ||
447 | |||
448 | - cpu_gdt_init(cpu_gdt_descr); | ||
449 | + printk(KERN_INFO "Initializing CPU#%d\n", cpu); | ||
450 | + | ||
451 | + if (cpu_has_vme || cpu_has_de) | ||
452 | + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | ||
453 | + if (tsc_disable && cpu_has_tsc) { | ||
454 | + printk(KERN_NOTICE "Disabling TSC...\n"); | ||
455 | + /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ | ||
456 | + clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); | ||
457 | + set_in_cr4(X86_CR4_TSD); | ||
458 | + } | ||
459 | |||
460 | /* | ||
461 | * Set up and load the per-CPU TSS and LDT | ||
462 | */ | ||
463 | atomic_inc(&init_mm.mm_count); | ||
464 | - current->active_mm = &init_mm; | ||
465 | - BUG_ON(current->mm); | ||
466 | - enter_lazy_tlb(&init_mm, current); | ||
467 | + curr->active_mm = &init_mm; | ||
468 | + if (curr->mm) | ||
469 | + BUG(); | ||
470 | + enter_lazy_tlb(&init_mm, curr); | ||
471 | |||
472 | load_esp0(t, thread); | ||
473 | |||
474 | @@ -719,8 +775,8 @@ | ||
475 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); | ||
476 | #endif | ||
477 | |||
478 | - /* Clear %fs and %gs. */ | ||
479 | - asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0)); | ||
480 | + /* Clear %fs. */ | ||
481 | + asm volatile ("mov %0, %%fs" : : "r" (0)); | ||
482 | |||
483 | /* Clear all 6 debug registers: */ | ||
484 | set_debugreg(0, 0); | ||
485 | @@ -738,6 +794,38 @@ | ||
486 | mxcsr_feature_mask_init(); | ||
487 | } | ||
488 | |||
489 | +/* Entrypoint to initialize secondary CPU */ | ||
490 | +void __cpuinit secondary_cpu_init(void) | ||
491 | +{ | ||
492 | + int cpu = smp_processor_id(); | ||
493 | + struct task_struct *curr = current; | ||
494 | + | ||
495 | + _cpu_init(cpu, curr); | ||
496 | +} | ||
497 | + | ||
498 | +/* | ||
499 | + * cpu_init() initializes state that is per-CPU. Some data is already | ||
500 | + * initialized (naturally) in the bootstrap process, such as the GDT | ||
501 | + * and IDT. We reload them nevertheless, this function acts as a | ||
502 | + * 'CPU state barrier', nothing should get across. | ||
503 | + */ | ||
504 | +void __cpuinit cpu_init(void) | ||
505 | +{ | ||
506 | + int cpu = smp_processor_id(); | ||
507 | + struct task_struct *curr = current; | ||
508 | + | ||
509 | + /* Set up the real GDT and PDA, so we can transition from the | ||
510 | + boot versions. */ | ||
511 | + if (!init_gdt(cpu, curr)) { | ||
512 | + /* failed to allocate something; not much we can do... */ | ||
513 | + for (;;) | ||
514 | + local_irq_enable(); | ||
515 | + } | ||
516 | + | ||
517 | + cpu_set_gdt(cpu); | ||
518 | + _cpu_init(cpu, curr); | ||
519 | +} | ||
520 | + | ||
521 | #ifdef CONFIG_HOTPLUG_CPU | ||
522 | void __cpuinit cpu_uninit(void) | ||
523 | { | ||
524 | --- a/arch/x86/kernel/cpu/mtrr/main-xen.c | ||
525 | +++ b/arch/x86/kernel/cpu/mtrr/main-xen.c | ||
526 | @@ -12,7 +12,7 @@ | ||
527 | static DEFINE_MUTEX(mtrr_mutex); | ||
528 | |||
529 | void generic_get_mtrr(unsigned int reg, unsigned long *base, | ||
530 | - unsigned int *size, mtrr_type * type) | ||
531 | + unsigned long *size, mtrr_type * type) | ||
532 | { | ||
533 | struct xen_platform_op op; | ||
534 | |||
535 | @@ -115,8 +115,7 @@ | ||
536 | { | ||
537 | unsigned i; | ||
538 | mtrr_type ltype; | ||
539 | - unsigned long lbase; | ||
540 | - unsigned int lsize; | ||
541 | + unsigned long lbase, lsize; | ||
542 | int error = -EINVAL; | ||
543 | struct xen_platform_op op; | ||
544 | |||
545 | --- /dev/null | ||
546 | +++ b/arch/x86/kernel/e820_32-xen.c | ||
547 | @@ -0,0 +1,1000 @@ | ||
548 | +#include <linux/kernel.h> | ||
549 | +#include <linux/types.h> | ||
550 | +#include <linux/init.h> | ||
551 | +#include <linux/bootmem.h> | ||
552 | +#include <linux/ioport.h> | ||
553 | +#include <linux/string.h> | ||
554 | +#include <linux/kexec.h> | ||
555 | +#include <linux/module.h> | ||
556 | +#include <linux/mm.h> | ||
557 | +#include <linux/efi.h> | ||
558 | +#include <linux/pfn.h> | ||
559 | +#include <linux/uaccess.h> | ||
560 | + | ||
561 | +#include <asm/pgtable.h> | ||
562 | +#include <asm/page.h> | ||
563 | +#include <asm/e820.h> | ||
564 | +#include <xen/interface/memory.h> | ||
565 | + | ||
566 | +#ifdef CONFIG_EFI | ||
567 | +int efi_enabled = 0; | ||
568 | +EXPORT_SYMBOL(efi_enabled); | ||
569 | +#endif | ||
570 | + | ||
571 | +struct e820map e820; | ||
572 | +struct change_member { | ||
573 | + struct e820entry *pbios; /* pointer to original bios entry */ | ||
574 | + unsigned long long addr; /* address for this change point */ | ||
575 | +}; | ||
576 | +static struct change_member change_point_list[2*E820MAX] __initdata; | ||
577 | +static struct change_member *change_point[2*E820MAX] __initdata; | ||
578 | +static struct e820entry *overlap_list[E820MAX] __initdata; | ||
579 | +static struct e820entry new_bios[E820MAX] __initdata; | ||
580 | +/* For PCI or other memory-mapped resources */ | ||
581 | +unsigned long pci_mem_start = 0x10000000; | ||
582 | +#ifdef CONFIG_PCI | ||
583 | +EXPORT_SYMBOL(pci_mem_start); | ||
584 | +#endif | ||
585 | +extern int user_defined_memmap; | ||
586 | +struct resource data_resource = { | ||
587 | + .name = "Kernel data", | ||
588 | + .start = 0, | ||
589 | + .end = 0, | ||
590 | + .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
591 | +}; | ||
592 | + | ||
593 | +struct resource code_resource = { | ||
594 | + .name = "Kernel code", | ||
595 | + .start = 0, | ||
596 | + .end = 0, | ||
597 | + .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
598 | +}; | ||
599 | + | ||
600 | +static struct resource system_rom_resource = { | ||
601 | + .name = "System ROM", | ||
602 | + .start = 0xf0000, | ||
603 | + .end = 0xfffff, | ||
604 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
605 | +}; | ||
606 | + | ||
607 | +static struct resource extension_rom_resource = { | ||
608 | + .name = "Extension ROM", | ||
609 | + .start = 0xe0000, | ||
610 | + .end = 0xeffff, | ||
611 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
612 | +}; | ||
613 | + | ||
614 | +static struct resource adapter_rom_resources[] = { { | ||
615 | + .name = "Adapter ROM", | ||
616 | + .start = 0xc8000, | ||
617 | + .end = 0, | ||
618 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
619 | +}, { | ||
620 | + .name = "Adapter ROM", | ||
621 | + .start = 0, | ||
622 | + .end = 0, | ||
623 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
624 | +}, { | ||
625 | + .name = "Adapter ROM", | ||
626 | + .start = 0, | ||
627 | + .end = 0, | ||
628 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
629 | +}, { | ||
630 | + .name = "Adapter ROM", | ||
631 | + .start = 0, | ||
632 | + .end = 0, | ||
633 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
634 | +}, { | ||
635 | + .name = "Adapter ROM", | ||
636 | + .start = 0, | ||
637 | + .end = 0, | ||
638 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
639 | +}, { | ||
640 | + .name = "Adapter ROM", | ||
641 | + .start = 0, | ||
642 | + .end = 0, | ||
643 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
644 | +} }; | ||
645 | + | ||
646 | +static struct resource video_rom_resource = { | ||
647 | + .name = "Video ROM", | ||
648 | + .start = 0xc0000, | ||
649 | + .end = 0xc7fff, | ||
650 | + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
651 | +}; | ||
652 | + | ||
653 | +static struct resource video_ram_resource = { | ||
654 | + .name = "Video RAM area", | ||
655 | + .start = 0xa0000, | ||
656 | + .end = 0xbffff, | ||
657 | + .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
658 | +}; | ||
659 | + | ||
660 | +static struct resource standard_io_resources[] = { { | ||
661 | + .name = "dma1", | ||
662 | + .start = 0x0000, | ||
663 | + .end = 0x001f, | ||
664 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
665 | +}, { | ||
666 | + .name = "pic1", | ||
667 | + .start = 0x0020, | ||
668 | + .end = 0x0021, | ||
669 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
670 | +}, { | ||
671 | + .name = "timer0", | ||
672 | + .start = 0x0040, | ||
673 | + .end = 0x0043, | ||
674 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
675 | +}, { | ||
676 | + .name = "timer1", | ||
677 | + .start = 0x0050, | ||
678 | + .end = 0x0053, | ||
679 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
680 | +}, { | ||
681 | + .name = "keyboard", | ||
682 | + .start = 0x0060, | ||
683 | + .end = 0x006f, | ||
684 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
685 | +}, { | ||
686 | + .name = "dma page reg", | ||
687 | + .start = 0x0080, | ||
688 | + .end = 0x008f, | ||
689 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
690 | +}, { | ||
691 | + .name = "pic2", | ||
692 | + .start = 0x00a0, | ||
693 | + .end = 0x00a1, | ||
694 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
695 | +}, { | ||
696 | + .name = "dma2", | ||
697 | + .start = 0x00c0, | ||
698 | + .end = 0x00df, | ||
699 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
700 | +}, { | ||
701 | + .name = "fpu", | ||
702 | + .start = 0x00f0, | ||
703 | + .end = 0x00ff, | ||
704 | + .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
705 | +} }; | ||
706 | + | ||
707 | +static int romsignature(const unsigned char *x) | ||
708 | +{ | ||
709 | + unsigned short sig; | ||
710 | + int ret = 0; | ||
711 | + if (probe_kernel_address((const unsigned short *)x, sig) == 0) | ||
712 | + ret = (sig == 0xaa55); | ||
713 | + return ret; | ||
714 | +} | ||
715 | + | ||
716 | +static int __init romchecksum(unsigned char *rom, unsigned long length) | ||
717 | +{ | ||
718 | + unsigned char *p, sum = 0; | ||
719 | + | ||
720 | + for (p = rom; p < rom + length; p++) | ||
721 | + sum += *p; | ||
722 | + return sum == 0; | ||
723 | +} | ||
724 | + | ||
725 | +static void __init probe_roms(void) | ||
726 | +{ | ||
727 | + unsigned long start, length, upper; | ||
728 | + unsigned char *rom; | ||
729 | + int i; | ||
730 | + | ||
731 | +#ifdef CONFIG_XEN | ||
732 | + /* Nothing to do if not running in dom0. */ | ||
733 | + if (!is_initial_xendomain()) | ||
734 | + return; | ||
735 | +#endif | ||
736 | + | ||
737 | + /* video rom */ | ||
738 | + upper = adapter_rom_resources[0].start; | ||
739 | + for (start = video_rom_resource.start; start < upper; start += 2048) { | ||
740 | + rom = isa_bus_to_virt(start); | ||
741 | + if (!romsignature(rom)) | ||
742 | + continue; | ||
743 | + | ||
744 | + video_rom_resource.start = start; | ||
745 | + | ||
746 | + /* 0 < length <= 0x7f * 512, historically */ | ||
747 | + length = rom[2] * 512; | ||
748 | + | ||
749 | + /* if checksum okay, trust length byte */ | ||
750 | + if (length && romchecksum(rom, length)) | ||
751 | + video_rom_resource.end = start + length - 1; | ||
752 | + | ||
753 | + request_resource(&iomem_resource, &video_rom_resource); | ||
754 | + break; | ||
755 | + } | ||
756 | + | ||
757 | + start = (video_rom_resource.end + 1 + 2047) & ~2047UL; | ||
758 | + if (start < upper) | ||
759 | + start = upper; | ||
760 | + | ||
761 | + /* system rom */ | ||
762 | + request_resource(&iomem_resource, &system_rom_resource); | ||
763 | + upper = system_rom_resource.start; | ||
764 | + | ||
765 | + /* check for extension rom (ignore length byte!) */ | ||
766 | + rom = isa_bus_to_virt((unsigned long)extension_rom_resource.start); | ||
767 | + if (romsignature(rom)) { | ||
768 | + length = extension_rom_resource.end - extension_rom_resource.start + 1; | ||
769 | + if (romchecksum(rom, length)) { | ||
770 | + request_resource(&iomem_resource, &extension_rom_resource); | ||
771 | + upper = extension_rom_resource.start; | ||
772 | + } | ||
773 | + } | ||
774 | + | ||
775 | + /* check for adapter roms on 2k boundaries */ | ||
776 | + for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { | ||
777 | + rom = isa_bus_to_virt(start); | ||
778 | + if (!romsignature(rom)) | ||
779 | + continue; | ||
780 | + | ||
781 | + /* 0 < length <= 0x7f * 512, historically */ | ||
782 | + length = rom[2] * 512; | ||
783 | + | ||
784 | + /* but accept any length that fits if checksum okay */ | ||
785 | + if (!length || start + length > upper || !romchecksum(rom, length)) | ||
786 | + continue; | ||
787 | + | ||
788 | + adapter_rom_resources[i].start = start; | ||
789 | + adapter_rom_resources[i].end = start + length - 1; | ||
790 | + request_resource(&iomem_resource, &adapter_rom_resources[i]); | ||
791 | + | ||
792 | + start = adapter_rom_resources[i++].end & ~2047UL; | ||
793 | + } | ||
794 | +} | ||
795 | + | ||
796 | +#ifdef CONFIG_XEN | ||
797 | +static struct e820map machine_e820 __initdata; | ||
798 | +#define e820 machine_e820 | ||
799 | +#endif | ||
800 | + | ||
801 | +/* | ||
802 | + * Request address space for all standard RAM and ROM resources | ||
803 | + * and also for regions reported as reserved by the e820. | ||
804 | + */ | ||
805 | +static void __init | ||
806 | +legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) | ||
807 | +{ | ||
808 | + int i; | ||
809 | + | ||
810 | + probe_roms(); | ||
811 | + for (i = 0; i < e820.nr_map; i++) { | ||
812 | + struct resource *res; | ||
813 | +#ifndef CONFIG_RESOURCES_64BIT | ||
814 | + if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) | ||
815 | + continue; | ||
816 | +#endif | ||
817 | + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); | ||
818 | + switch (e820.map[i].type) { | ||
819 | + case E820_RAM: res->name = "System RAM"; break; | ||
820 | + case E820_ACPI: res->name = "ACPI Tables"; break; | ||
821 | + case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; | ||
822 | + default: res->name = "reserved"; | ||
823 | + } | ||
824 | + res->start = e820.map[i].addr; | ||
825 | + res->end = res->start + e820.map[i].size - 1; | ||
826 | + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; | ||
827 | + if (request_resource(&iomem_resource, res)) { | ||
828 | + kfree(res); | ||
829 | + continue; | ||
830 | + } | ||
831 | + if (e820.map[i].type == E820_RAM) { | ||
832 | + /* | ||
833 | + * We don't know which RAM region contains kernel data, | ||
834 | + * so we try it repeatedly and let the resource manager | ||
835 | + * test it. | ||
836 | + */ | ||
837 | +#ifndef CONFIG_XEN | ||
838 | + request_resource(res, code_resource); | ||
839 | + request_resource(res, data_resource); | ||
840 | +#endif | ||
841 | +#ifdef CONFIG_KEXEC | ||
842 | + request_resource(res, &crashk_res); | ||
843 | +#ifdef CONFIG_XEN | ||
844 | + xen_machine_kexec_register_resources(res); | ||
845 | +#endif | ||
846 | +#endif | ||
847 | + } | ||
848 | + } | ||
849 | +} | ||
850 | + | ||
851 | +#undef e820 | ||
852 | + | ||
853 | +/* | ||
854 | + * Request address space for all standard resources | ||
855 | + * | ||
856 | + * This is called just before pcibios_init(), which is also a | ||
857 | + * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). | ||
858 | + */ | ||
859 | +static int __init request_standard_resources(void) | ||
860 | +{ | ||
861 | + int i; | ||
862 | + | ||
863 | + /* Nothing to do if not running in dom0. */ | ||
864 | + if (!is_initial_xendomain()) | ||
865 | + return 0; | ||
866 | + | ||
867 | + printk("Setting up standard PCI resources\n"); | ||
868 | + if (efi_enabled) | ||
869 | + efi_initialize_iomem_resources(&code_resource, &data_resource); | ||
870 | + else | ||
871 | + legacy_init_iomem_resources(&code_resource, &data_resource); | ||
872 | + | ||
873 | + /* EFI systems may still have VGA */ | ||
874 | + request_resource(&iomem_resource, &video_ram_resource); | ||
875 | + | ||
876 | + /* request I/O space for devices used on all i[345]86 PCs */ | ||
877 | + for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) | ||
878 | + request_resource(&ioport_resource, &standard_io_resources[i]); | ||
879 | + return 0; | ||
880 | +} | ||
881 | + | ||
882 | +subsys_initcall(request_standard_resources); | ||
883 | + | ||
884 | +void __init add_memory_region(unsigned long long start, | ||
885 | + unsigned long long size, int type) | ||
886 | +{ | ||
887 | + int x; | ||
888 | + | ||
889 | + if (!efi_enabled) { | ||
890 | + x = e820.nr_map; | ||
891 | + | ||
892 | + if (x == E820MAX) { | ||
893 | + printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); | ||
894 | + return; | ||
895 | + } | ||
896 | + | ||
897 | + e820.map[x].addr = start; | ||
898 | + e820.map[x].size = size; | ||
899 | + e820.map[x].type = type; | ||
900 | + e820.nr_map++; | ||
901 | + } | ||
902 | +} /* add_memory_region */ | ||
903 | + | ||
904 | +/* | ||
905 | + * Sanitize the BIOS e820 map. | ||
906 | + * | ||
907 | + * Some e820 responses include overlapping entries. The following | ||
908 | + * replaces the original e820 map with a new one, removing overlaps. | ||
909 | + * | ||
910 | + */ | ||
911 | +int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | ||
912 | +{ | ||
913 | + struct change_member *change_tmp; | ||
914 | + unsigned long current_type, last_type; | ||
915 | + unsigned long long last_addr; | ||
916 | + int chgidx, still_changing; | ||
917 | + int overlap_entries; | ||
918 | + int new_bios_entry; | ||
919 | + int old_nr, new_nr, chg_nr; | ||
920 | + int i; | ||
921 | + | ||
922 | + /* | ||
923 | + Visually we're performing the following (1,2,3,4 = memory types)... | ||
924 | + | ||
925 | + Sample memory map (w/overlaps): | ||
926 | + ____22__________________ | ||
927 | + ______________________4_ | ||
928 | + ____1111________________ | ||
929 | + _44_____________________ | ||
930 | + 11111111________________ | ||
931 | + ____________________33__ | ||
932 | + ___________44___________ | ||
933 | + __________33333_________ | ||
934 | + ______________22________ | ||
935 | + ___________________2222_ | ||
936 | + _________111111111______ | ||
937 | + _____________________11_ | ||
938 | + _________________4______ | ||
939 | + | ||
940 | + Sanitized equivalent (no overlap): | ||
941 | + 1_______________________ | ||
942 | + _44_____________________ | ||
943 | + ___1____________________ | ||
944 | + ____22__________________ | ||
945 | + ______11________________ | ||
946 | + _________1______________ | ||
947 | + __________3_____________ | ||
948 | + ___________44___________ | ||
949 | + _____________33_________ | ||
950 | + _______________2________ | ||
951 | + ________________1_______ | ||
952 | + _________________4______ | ||
953 | + ___________________2____ | ||
954 | + ____________________33__ | ||
955 | + ______________________4_ | ||
956 | + */ | ||
957 | + printk("sanitize start\n"); | ||
958 | + /* if there's only one memory region, don't bother */ | ||
959 | + if (*pnr_map < 2) { | ||
960 | + printk("sanitize bail 0\n"); | ||
961 | + return -1; | ||
962 | + } | ||
963 | + | ||
964 | + old_nr = *pnr_map; | ||
965 | + | ||
966 | + /* bail out if we find any unreasonable addresses in bios map */ | ||
967 | + for (i=0; i<old_nr; i++) | ||
968 | + if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) { | ||
969 | + printk("sanitize bail 1\n"); | ||
970 | + return -1; | ||
971 | + } | ||
972 | + | ||
973 | + /* create pointers for initial change-point information (for sorting) */ | ||
974 | + for (i=0; i < 2*old_nr; i++) | ||
975 | + change_point[i] = &change_point_list[i]; | ||
976 | + | ||
977 | + /* record all known change-points (starting and ending addresses), | ||
978 | + omitting those that are for empty memory regions */ | ||
979 | + chgidx = 0; | ||
980 | + for (i=0; i < old_nr; i++) { | ||
981 | + if (biosmap[i].size != 0) { | ||
982 | + change_point[chgidx]->addr = biosmap[i].addr; | ||
983 | + change_point[chgidx++]->pbios = &biosmap[i]; | ||
984 | + change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; | ||
985 | + change_point[chgidx++]->pbios = &biosmap[i]; | ||
986 | + } | ||
987 | + } | ||
988 | + chg_nr = chgidx; /* true number of change-points */ | ||
989 | + | ||
990 | + /* sort change-point list by memory addresses (low -> high) */ | ||
991 | + still_changing = 1; | ||
992 | + while (still_changing) { | ||
993 | + still_changing = 0; | ||
994 | + for (i=1; i < chg_nr; i++) { | ||
995 | + /* if <current_addr> > <last_addr>, swap */ | ||
996 | + /* or, if current=<start_addr> & last=<end_addr>, swap */ | ||
997 | + if ((change_point[i]->addr < change_point[i-1]->addr) || | ||
998 | + ((change_point[i]->addr == change_point[i-1]->addr) && | ||
999 | + (change_point[i]->addr == change_point[i]->pbios->addr) && | ||
1000 | + (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) | ||
1001 | + ) | ||
1002 | + { | ||
1003 | + change_tmp = change_point[i]; | ||
1004 | + change_point[i] = change_point[i-1]; | ||
1005 | + change_point[i-1] = change_tmp; | ||
1006 | + still_changing=1; | ||
1007 | + } | ||
1008 | + } | ||
1009 | + } | ||
1010 | + | ||
1011 | + /* create a new bios memory map, removing overlaps */ | ||
1012 | + overlap_entries=0; /* number of entries in the overlap table */ | ||
1013 | + new_bios_entry=0; /* index for creating new bios map entries */ | ||
1014 | + last_type = 0; /* start with undefined memory type */ | ||
1015 | + last_addr = 0; /* start with 0 as last starting address */ | ||
1016 | + /* loop through change-points, determining affect on the new bios map */ | ||
1017 | + for (chgidx=0; chgidx < chg_nr; chgidx++) | ||
1018 | + { | ||
1019 | + /* keep track of all overlapping bios entries */ | ||
1020 | + if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) | ||
1021 | + { | ||
1022 | + /* add map entry to overlap list (> 1 entry implies an overlap) */ | ||
1023 | + overlap_list[overlap_entries++]=change_point[chgidx]->pbios; | ||
1024 | + } | ||
1025 | + else | ||
1026 | + { | ||
1027 | + /* remove entry from list (order independent, so swap with last) */ | ||
1028 | + for (i=0; i<overlap_entries; i++) | ||
1029 | + { | ||
1030 | + if (overlap_list[i] == change_point[chgidx]->pbios) | ||
1031 | + overlap_list[i] = overlap_list[overlap_entries-1]; | ||
1032 | + } | ||
1033 | + overlap_entries--; | ||
1034 | + } | ||
1035 | + /* if there are overlapping entries, decide which "type" to use */ | ||
1036 | + /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ | ||
1037 | + current_type = 0; | ||
1038 | + for (i=0; i<overlap_entries; i++) | ||
1039 | + if (overlap_list[i]->type > current_type) | ||
1040 | + current_type = overlap_list[i]->type; | ||
1041 | + /* continue building up new bios map based on this information */ | ||
1042 | + if (current_type != last_type) { | ||
1043 | + if (last_type != 0) { | ||
1044 | + new_bios[new_bios_entry].size = | ||
1045 | + change_point[chgidx]->addr - last_addr; | ||
1046 | + /* move forward only if the new size was non-zero */ | ||
1047 | + if (new_bios[new_bios_entry].size != 0) | ||
1048 | + if (++new_bios_entry >= E820MAX) | ||
1049 | + break; /* no more space left for new bios entries */ | ||
1050 | + } | ||
1051 | + if (current_type != 0) { | ||
1052 | + new_bios[new_bios_entry].addr = change_point[chgidx]->addr; | ||
1053 | + new_bios[new_bios_entry].type = current_type; | ||
1054 | + last_addr=change_point[chgidx]->addr; | ||
1055 | + } | ||
1056 | + last_type = current_type; | ||
1057 | + } | ||
1058 | + } | ||
1059 | + new_nr = new_bios_entry; /* retain count for new bios entries */ | ||
1060 | + | ||
1061 | + /* copy new bios mapping into original location */ | ||
1062 | + memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); | ||
1063 | + *pnr_map = new_nr; | ||
1064 | + | ||
1065 | + printk("sanitize end\n"); | ||
1066 | + return 0; | ||
1067 | +} | ||
1068 | + | ||
1069 | +/* | ||
1070 | + * Copy the BIOS e820 map into a safe place. | ||
1071 | + * | ||
1072 | + * Sanity-check it while we're at it.. | ||
1073 | + * | ||
1074 | + * If we're lucky and live on a modern system, the setup code | ||
1075 | + * will have given us a memory map that we can use to properly | ||
1076 | + * set up memory. If we aren't, we'll fake a memory map. | ||
1077 | + * | ||
1078 | + * We check to see that the memory map contains at least 2 elements | ||
1079 | + * before we'll use it, because the detection code in setup.S may | ||
1080 | + * not be perfect and most every PC known to man has two memory | ||
1081 | + * regions: one from 0 to 640k, and one from 1mb up. (The IBM | ||
1082 | + * thinkpad 560x, for example, does not cooperate with the memory | ||
1083 | + * detection code.) | ||
1084 | + */ | ||
1085 | +int __init copy_e820_map(struct e820entry * biosmap, int nr_map) | ||
1086 | +{ | ||
1087 | +#ifndef CONFIG_XEN | ||
1088 | + /* Only one memory region (or negative)? Ignore it */ | ||
1089 | + if (nr_map < 2) | ||
1090 | + return -1; | ||
1091 | +#else | ||
1092 | + BUG_ON(nr_map < 1); | ||
1093 | +#endif | ||
1094 | + | ||
1095 | + do { | ||
1096 | + unsigned long long start = biosmap->addr; | ||
1097 | + unsigned long long size = biosmap->size; | ||
1098 | + unsigned long long end = start + size; | ||
1099 | + unsigned long type = biosmap->type; | ||
1100 | + printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type); | ||
1101 | + | ||
1102 | + /* Overflow in 64 bits? Ignore the memory map. */ | ||
1103 | + if (start > end) | ||
1104 | + return -1; | ||
1105 | + | ||
1106 | +#ifndef CONFIG_XEN | ||
1107 | + /* | ||
1108 | + * Some BIOSes claim RAM in the 640k - 1M region. | ||
1109 | + * Not right. Fix it up. | ||
1110 | + */ | ||
1111 | + if (type == E820_RAM) { | ||
1112 | + printk("copy_e820_map() type is E820_RAM\n"); | ||
1113 | + if (start < 0x100000ULL && end > 0xA0000ULL) { | ||
1114 | + printk("copy_e820_map() lies in range...\n"); | ||
1115 | + if (start < 0xA0000ULL) { | ||
1116 | + printk("copy_e820_map() start < 0xA0000ULL\n"); | ||
1117 | + add_memory_region(start, 0xA0000ULL-start, type); | ||
1118 | + } | ||
1119 | + if (end <= 0x100000ULL) { | ||
1120 | + printk("copy_e820_map() end <= 0x100000ULL\n"); | ||
1121 | + continue; | ||
1122 | + } | ||
1123 | + start = 0x100000ULL; | ||
1124 | + size = end - start; | ||
1125 | + } | ||
1126 | + } | ||
1127 | +#endif | ||
1128 | + add_memory_region(start, size, type); | ||
1129 | + } while (biosmap++,--nr_map); | ||
1130 | + return 0; | ||
1131 | +} | ||
1132 | + | ||
1133 | +/* | ||
1134 | + * Callback for efi_memory_walk. | ||
1135 | + */ | ||
1136 | +static int __init | ||
1137 | +efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) | ||
1138 | +{ | ||
1139 | + unsigned long *max_pfn = arg, pfn; | ||
1140 | + | ||
1141 | + if (start < end) { | ||
1142 | + pfn = PFN_UP(end -1); | ||
1143 | + if (pfn > *max_pfn) | ||
1144 | + *max_pfn = pfn; | ||
1145 | + } | ||
1146 | + return 0; | ||
1147 | +} | ||
1148 | + | ||
1149 | +static int __init | ||
1150 | +efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) | ||
1151 | +{ | ||
1152 | + memory_present(0, PFN_UP(start), PFN_DOWN(end)); | ||
1153 | + return 0; | ||
1154 | +} | ||
1155 | + | ||
1156 | +/* | ||
1157 | + * Find the highest page frame number we have available | ||
1158 | + */ | ||
1159 | +void __init find_max_pfn(void) | ||
1160 | +{ | ||
1161 | + int i; | ||
1162 | + | ||
1163 | + max_pfn = 0; | ||
1164 | + if (efi_enabled) { | ||
1165 | + efi_memmap_walk(efi_find_max_pfn, &max_pfn); | ||
1166 | + efi_memmap_walk(efi_memory_present_wrapper, NULL); | ||
1167 | + return; | ||
1168 | + } | ||
1169 | + | ||
1170 | + for (i = 0; i < e820.nr_map; i++) { | ||
1171 | + unsigned long start, end; | ||
1172 | + /* RAM? */ | ||
1173 | + if (e820.map[i].type != E820_RAM) | ||
1174 | + continue; | ||
1175 | + start = PFN_UP(e820.map[i].addr); | ||
1176 | + end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); | ||
1177 | + if (start >= end) | ||
1178 | + continue; | ||
1179 | + if (end > max_pfn) | ||
1180 | + max_pfn = end; | ||
1181 | + memory_present(0, start, end); | ||
1182 | + } | ||
1183 | +} | ||
1184 | + | ||
1185 | +/* | ||
1186 | + * Free all available memory for boot time allocation. Used | ||
1187 | + * as a callback function by efi_memory_walk() | ||
1188 | + */ | ||
1189 | + | ||
1190 | +static int __init | ||
1191 | +free_available_memory(unsigned long start, unsigned long end, void *arg) | ||
1192 | +{ | ||
1193 | + /* check max_low_pfn */ | ||
1194 | + if (start >= (max_low_pfn << PAGE_SHIFT)) | ||
1195 | + return 0; | ||
1196 | + if (end >= (max_low_pfn << PAGE_SHIFT)) | ||
1197 | + end = max_low_pfn << PAGE_SHIFT; | ||
1198 | + if (start < end) | ||
1199 | + free_bootmem(start, end - start); | ||
1200 | + | ||
1201 | + return 0; | ||
1202 | +} | ||
1203 | +/* | ||
1204 | + * Register fully available low RAM pages with the bootmem allocator. | ||
1205 | + */ | ||
1206 | +void __init register_bootmem_low_pages(unsigned long max_low_pfn) | ||
1207 | +{ | ||
1208 | + int i; | ||
1209 | + | ||
1210 | + if (efi_enabled) { | ||
1211 | + efi_memmap_walk(free_available_memory, NULL); | ||
1212 | + return; | ||
1213 | + } | ||
1214 | + for (i = 0; i < e820.nr_map; i++) { | ||
1215 | + unsigned long curr_pfn, last_pfn, size; | ||
1216 | + /* | ||
1217 | + * Reserve usable low memory | ||
1218 | + */ | ||
1219 | + if (e820.map[i].type != E820_RAM) | ||
1220 | + continue; | ||
1221 | + /* | ||
1222 | + * We are rounding up the start address of usable memory: | ||
1223 | + */ | ||
1224 | + curr_pfn = PFN_UP(e820.map[i].addr); | ||
1225 | + if (curr_pfn >= max_low_pfn) | ||
1226 | + continue; | ||
1227 | + /* | ||
1228 | + * ... and at the end of the usable range downwards: | ||
1229 | + */ | ||
1230 | + last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); | ||
1231 | + | ||
1232 | +#ifdef CONFIG_XEN | ||
1233 | + /* | ||
1234 | + * Truncate to the number of actual pages currently | ||
1235 | + * present. | ||
1236 | + */ | ||
1237 | + if (last_pfn > xen_start_info->nr_pages) | ||
1238 | + last_pfn = xen_start_info->nr_pages; | ||
1239 | +#endif | ||
1240 | + | ||
1241 | + if (last_pfn > max_low_pfn) | ||
1242 | + last_pfn = max_low_pfn; | ||
1243 | + | ||
1244 | + /* | ||
1245 | + * .. finally, did all the rounding and playing | ||
1246 | + * around just make the area go away? | ||
1247 | + */ | ||
1248 | + if (last_pfn <= curr_pfn) | ||
1249 | + continue; | ||
1250 | + | ||
1251 | + size = last_pfn - curr_pfn; | ||
1252 | + free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); | ||
1253 | + } | ||
1254 | +} | ||
1255 | + | ||
1256 | +void __init e820_register_memory(void) | ||
1257 | +{ | ||
1258 | + unsigned long gapstart, gapsize, round; | ||
1259 | + unsigned long long last; | ||
1260 | + int i; | ||
1261 | + | ||
1262 | +#ifdef CONFIG_XEN | ||
1263 | + if (is_initial_xendomain()) { | ||
1264 | + struct xen_memory_map memmap; | ||
1265 | + | ||
1266 | + memmap.nr_entries = E820MAX; | ||
1267 | + set_xen_guest_handle(memmap.buffer, machine_e820.map); | ||
1268 | + | ||
1269 | + if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)) | ||
1270 | + BUG(); | ||
1271 | + machine_e820.nr_map = memmap.nr_entries; | ||
1272 | + } | ||
1273 | + else | ||
1274 | + machine_e820 = e820; | ||
1275 | +#define e820 machine_e820 | ||
1276 | +#endif | ||
1277 | + | ||
1278 | + /* | ||
1279 | + * Search for the bigest gap in the low 32 bits of the e820 | ||
1280 | + * memory space. | ||
1281 | + */ | ||
1282 | + last = 0x100000000ull; | ||
1283 | + gapstart = 0x10000000; | ||
1284 | + gapsize = 0x400000; | ||
1285 | + i = e820.nr_map; | ||
1286 | + while (--i >= 0) { | ||
1287 | + unsigned long long start = e820.map[i].addr; | ||
1288 | + unsigned long long end = start + e820.map[i].size; | ||
1289 | + | ||
1290 | + /* | ||
1291 | + * Since "last" is at most 4GB, we know we'll | ||
1292 | + * fit in 32 bits if this condition is true | ||
1293 | + */ | ||
1294 | + if (last > end) { | ||
1295 | + unsigned long gap = last - end; | ||
1296 | + | ||
1297 | + if (gap > gapsize) { | ||
1298 | + gapsize = gap; | ||
1299 | + gapstart = end; | ||
1300 | + } | ||
1301 | + } | ||
1302 | + if (start < last) | ||
1303 | + last = start; | ||
1304 | + } | ||
1305 | +#undef e820 | ||
1306 | + | ||
1307 | + /* | ||
1308 | + * See how much we want to round up: start off with | ||
1309 | + * rounding to the next 1MB area. | ||
1310 | + */ | ||
1311 | + round = 0x100000; | ||
1312 | + while ((gapsize >> 4) > round) | ||
1313 | + round += round; | ||
1314 | + /* Fun with two's complement */ | ||
1315 | + pci_mem_start = (gapstart + round) & -round; | ||
1316 | + | ||
1317 | + printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", | ||
1318 | + pci_mem_start, gapstart, gapsize); | ||
1319 | +} | ||
1320 | + | ||
1321 | +void __init print_memory_map(char *who) | ||
1322 | +{ | ||
1323 | + int i; | ||
1324 | + | ||
1325 | + for (i = 0; i < e820.nr_map; i++) { | ||
1326 | + printk(" %s: %016Lx - %016Lx ", who, | ||
1327 | + e820.map[i].addr, | ||
1328 | + e820.map[i].addr + e820.map[i].size); | ||
1329 | + switch (e820.map[i].type) { | ||
1330 | + case E820_RAM: printk("(usable)\n"); | ||
1331 | + break; | ||
1332 | + case E820_RESERVED: | ||
1333 | + printk("(reserved)\n"); | ||
1334 | + break; | ||
1335 | + case E820_ACPI: | ||
1336 | + printk("(ACPI data)\n"); | ||
1337 | + break; | ||
1338 | + case E820_NVS: | ||
1339 | + printk("(ACPI NVS)\n"); | ||
1340 | + break; | ||
1341 | + default: printk("type %lu\n", e820.map[i].type); | ||
1342 | + break; | ||
1343 | + } | ||
1344 | + } | ||
1345 | +} | ||
1346 | + | ||
1347 | +static __init __always_inline void efi_limit_regions(unsigned long long size) | ||
1348 | +{ | ||
1349 | + unsigned long long current_addr = 0; | ||
1350 | + efi_memory_desc_t *md, *next_md; | ||
1351 | + void *p, *p1; | ||
1352 | + int i, j; | ||
1353 | + | ||
1354 | + j = 0; | ||
1355 | + p1 = memmap.map; | ||
1356 | + for (p = p1, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) { | ||
1357 | + md = p; | ||
1358 | + next_md = p1; | ||
1359 | + current_addr = md->phys_addr + | ||
1360 | + PFN_PHYS(md->num_pages); | ||
1361 | + if (is_available_memory(md)) { | ||
1362 | + if (md->phys_addr >= size) continue; | ||
1363 | + memcpy(next_md, md, memmap.desc_size); | ||
1364 | + if (current_addr >= size) { | ||
1365 | + next_md->num_pages -= | ||
1366 | + PFN_UP(current_addr-size); | ||
1367 | + } | ||
1368 | + p1 += memmap.desc_size; | ||
1369 | + next_md = p1; | ||
1370 | + j++; | ||
1371 | + } else if ((md->attribute & EFI_MEMORY_RUNTIME) == | ||
1372 | + EFI_MEMORY_RUNTIME) { | ||
1373 | + /* In order to make runtime services | ||
1374 | + * available we have to include runtime | ||
1375 | + * memory regions in memory map */ | ||
1376 | + memcpy(next_md, md, memmap.desc_size); | ||
1377 | + p1 += memmap.desc_size; | ||
1378 | + next_md = p1; | ||
1379 | + j++; | ||
1380 | + } | ||
1381 | + } | ||
1382 | + memmap.nr_map = j; | ||
1383 | + memmap.map_end = memmap.map + | ||
1384 | + (memmap.nr_map * memmap.desc_size); | ||
1385 | +} | ||
1386 | + | ||
1387 | +void __init limit_regions(unsigned long long size) | ||
1388 | +{ | ||
1389 | + unsigned long long current_addr = 0; | ||
1390 | + int i; | ||
1391 | + | ||
1392 | + print_memory_map("limit_regions start"); | ||
1393 | + if (efi_enabled) { | ||
1394 | + efi_limit_regions(size); | ||
1395 | + return; | ||
1396 | + } | ||
1397 | + for (i = 0; i < e820.nr_map; i++) { | ||
1398 | + current_addr = e820.map[i].addr + e820.map[i].size; | ||
1399 | + if (current_addr < size) | ||
1400 | + continue; | ||
1401 | + | ||
1402 | + if (e820.map[i].type != E820_RAM) | ||
1403 | + continue; | ||
1404 | + | ||
1405 | + if (e820.map[i].addr >= size) { | ||
1406 | + /* | ||
1407 | + * This region starts past the end of the | ||
1408 | + * requested size, skip it completely. | ||
1409 | + */ | ||
1410 | + e820.nr_map = i; | ||
1411 | + } else { | ||
1412 | + e820.nr_map = i + 1; | ||
1413 | + e820.map[i].size -= current_addr - size; | ||
1414 | + } | ||
1415 | + print_memory_map("limit_regions endfor"); | ||
1416 | + return; | ||
1417 | + } | ||
1418 | +#ifdef CONFIG_XEN | ||
1419 | + if (current_addr < size) { | ||
1420 | + /* | ||
1421 | + * The e820 map finished before our requested size so | ||
1422 | + * extend the final entry to the requested address. | ||
1423 | + */ | ||
1424 | + --i; | ||
1425 | + if (e820.map[i].type == E820_RAM) | ||
1426 | + e820.map[i].size -= current_addr - size; | ||
1427 | + else | ||
1428 | + add_memory_region(current_addr, size - current_addr, E820_RAM); | ||
1429 | + } | ||
1430 | +#endif | ||
1431 | + print_memory_map("limit_regions endfunc"); | ||
1432 | +} | ||
1433 | + | ||
1434 | +/* | ||
1435 | + * This function checks if any part of the range <start,end> is mapped | ||
1436 | + * with type. | ||
1437 | + */ | ||
1438 | +int | ||
1439 | +e820_any_mapped(u64 start, u64 end, unsigned type) | ||
1440 | +{ | ||
1441 | + int i; | ||
1442 | + | ||
1443 | +#ifndef CONFIG_XEN | ||
1444 | + for (i = 0; i < e820.nr_map; i++) { | ||
1445 | + const struct e820entry *ei = &e820.map[i]; | ||
1446 | +#else | ||
1447 | + if (!is_initial_xendomain()) | ||
1448 | + return 0; | ||
1449 | + for (i = 0; i < machine_e820.nr_map; ++i) { | ||
1450 | + const struct e820entry *ei = &machine_e820.map[i]; | ||
1451 | +#endif | ||
1452 | + | ||
1453 | + if (type && ei->type != type) | ||
1454 | + continue; | ||
1455 | + if (ei->addr >= end || ei->addr + ei->size <= start) | ||
1456 | + continue; | ||
1457 | + return 1; | ||
1458 | + } | ||
1459 | + return 0; | ||
1460 | +} | ||
1461 | +EXPORT_SYMBOL_GPL(e820_any_mapped); | ||
1462 | + | ||
1463 | + /* | ||
1464 | + * This function checks if the entire range <start,end> is mapped with type. | ||
1465 | + * | ||
1466 | + * Note: this function only works correct if the e820 table is sorted and | ||
1467 | + * not-overlapping, which is the case | ||
1468 | + */ | ||
1469 | +int __init | ||
1470 | +e820_all_mapped(unsigned long s, unsigned long e, unsigned type) | ||
1471 | +{ | ||
1472 | + u64 start = s; | ||
1473 | + u64 end = e; | ||
1474 | + int i; | ||
1475 | + | ||
1476 | +#ifndef CONFIG_XEN | ||
1477 | + for (i = 0; i < e820.nr_map; i++) { | ||
1478 | + struct e820entry *ei = &e820.map[i]; | ||
1479 | +#else | ||
1480 | + if (!is_initial_xendomain()) | ||
1481 | + return 0; | ||
1482 | + for (i = 0; i < machine_e820.nr_map; ++i) { | ||
1483 | + const struct e820entry *ei = &machine_e820.map[i]; | ||
1484 | +#endif | ||
1485 | + | ||
1486 | + if (type && ei->type != type) | ||
1487 | + continue; | ||
1488 | + /* is the region (part) in overlap with the current region ?*/ | ||
1489 | + if (ei->addr >= end || ei->addr + ei->size <= start) | ||
1490 | + continue; | ||
1491 | + /* if the region is at the beginning of <start,end> we move | ||
1492 | + * start to the end of the region since it's ok until there | ||
1493 | + */ | ||
1494 | + if (ei->addr <= start) | ||
1495 | + start = ei->addr + ei->size; | ||
1496 | + /* if start is now at or beyond end, we're done, full | ||
1497 | + * coverage */ | ||
1498 | + if (start >= end) | ||
1499 | + return 1; /* we're done */ | ||
1500 | + } | ||
1501 | + return 0; | ||
1502 | +} | ||
1503 | + | ||
1504 | +static int __init parse_memmap(char *arg) | ||
1505 | +{ | ||
1506 | + if (!arg) | ||
1507 | + return -EINVAL; | ||
1508 | + | ||
1509 | + if (strcmp(arg, "exactmap") == 0) { | ||
1510 | +#ifdef CONFIG_CRASH_DUMP | ||
1511 | + /* If we are doing a crash dump, we | ||
1512 | + * still need to know the real mem | ||
1513 | + * size before original memory map is | ||
1514 | + * reset. | ||
1515 | + */ | ||
1516 | + find_max_pfn(); | ||
1517 | + saved_max_pfn = max_pfn; | ||
1518 | +#endif | ||
1519 | + e820.nr_map = 0; | ||
1520 | + user_defined_memmap = 1; | ||
1521 | + } else { | ||
1522 | + /* If the user specifies memory size, we | ||
1523 | + * limit the BIOS-provided memory map to | ||
1524 | + * that size. exactmap can be used to specify | ||
1525 | + * the exact map. mem=number can be used to | ||
1526 | + * trim the existing memory map. | ||
1527 | + */ | ||
1528 | + unsigned long long start_at, mem_size; | ||
1529 | + | ||
1530 | + mem_size = memparse(arg, &arg); | ||
1531 | + if (*arg == '@') { | ||
1532 | + start_at = memparse(arg+1, &arg); | ||
1533 | + add_memory_region(start_at, mem_size, E820_RAM); | ||
1534 | + } else if (*arg == '#') { | ||
1535 | + start_at = memparse(arg+1, &arg); | ||
1536 | + add_memory_region(start_at, mem_size, E820_ACPI); | ||
1537 | + } else if (*arg == '$') { | ||
1538 | + start_at = memparse(arg+1, &arg); | ||
1539 | + add_memory_region(start_at, mem_size, E820_RESERVED); | ||
1540 | + } else { | ||
1541 | + limit_regions(mem_size); | ||
1542 | + user_defined_memmap = 1; | ||
1543 | + } | ||
1544 | + } | ||
1545 | + return 0; | ||
1546 | +} | ||
1547 | +early_param("memmap", parse_memmap); | ||
1548 | --- a/arch/x86/kernel/entry_32-xen.S | ||
1549 | +++ b/arch/x86/kernel/entry_32-xen.S | ||
1550 | @@ -30,12 +30,13 @@ | ||
1551 | * 18(%esp) - %eax | ||
1552 | * 1C(%esp) - %ds | ||
1553 | * 20(%esp) - %es | ||
1554 | - * 24(%esp) - orig_eax | ||
1555 | - * 28(%esp) - %eip | ||
1556 | - * 2C(%esp) - %cs | ||
1557 | - * 30(%esp) - %eflags | ||
1558 | - * 34(%esp) - %oldesp | ||
1559 | - * 38(%esp) - %oldss | ||
1560 | + * 24(%esp) - %gs | ||
1561 | + * 28(%esp) - orig_eax | ||
1562 | + * 2C(%esp) - %eip | ||
1563 | + * 30(%esp) - %cs | ||
1564 | + * 34(%esp) - %eflags | ||
1565 | + * 38(%esp) - %oldesp | ||
1566 | + * 3C(%esp) - %oldss | ||
1567 | * | ||
1568 | * "current" is in register %ebx during any slow entries. | ||
1569 | */ | ||
1570 | @@ -48,27 +49,25 @@ | ||
1571 | #include <asm/smp.h> | ||
1572 | #include <asm/page.h> | ||
1573 | #include <asm/desc.h> | ||
1574 | +#include <asm/percpu.h> | ||
1575 | #include <asm/dwarf2.h> | ||
1576 | #include "irq_vectors.h" | ||
1577 | #include <xen/interface/xen.h> | ||
1578 | |||
1579 | -#define nr_syscalls ((syscall_table_size)/4) | ||
1580 | +/* | ||
1581 | + * We use macros for low-level operations which need to be overridden | ||
1582 | + * for paravirtualization. The following will never clobber any registers: | ||
1583 | + * INTERRUPT_RETURN (aka. "iret") | ||
1584 | + * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") | ||
1585 | + * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). | ||
1586 | + * | ||
1587 | + * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must | ||
1588 | + * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). | ||
1589 | + * Allowing a register to be clobbered can shrink the paravirt replacement | ||
1590 | + * enough to patch inline, increasing performance. | ||
1591 | + */ | ||
1592 | |||
1593 | -EBX = 0x00 | ||
1594 | -ECX = 0x04 | ||
1595 | -EDX = 0x08 | ||
1596 | -ESI = 0x0C | ||
1597 | -EDI = 0x10 | ||
1598 | -EBP = 0x14 | ||
1599 | -EAX = 0x18 | ||
1600 | -DS = 0x1C | ||
1601 | -ES = 0x20 | ||
1602 | -ORIG_EAX = 0x24 | ||
1603 | -EIP = 0x28 | ||
1604 | -CS = 0x2C | ||
1605 | -EFLAGS = 0x30 | ||
1606 | -OLDESP = 0x34 | ||
1607 | -OLDSS = 0x38 | ||
1608 | +#define nr_syscalls ((syscall_table_size)/4) | ||
1609 | |||
1610 | CF_MASK = 0x00000001 | ||
1611 | TF_MASK = 0x00000100 | ||
1612 | @@ -79,61 +78,16 @@ | ||
1613 | /* Pseudo-eflags. */ | ||
1614 | NMI_MASK = 0x80000000 | ||
1615 | |||
1616 | -#ifndef CONFIG_XEN | ||
1617 | -/* These are replaces for paravirtualization */ | ||
1618 | -#define DISABLE_INTERRUPTS cli | ||
1619 | -#define ENABLE_INTERRUPTS sti | ||
1620 | -#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit | ||
1621 | -#define INTERRUPT_RETURN iret | ||
1622 | -#define GET_CR0_INTO_EAX movl %cr0, %eax | ||
1623 | -#else | ||
1624 | -/* Offsets into shared_info_t. */ | ||
1625 | -#define evtchn_upcall_pending /* 0 */ | ||
1626 | -#define evtchn_upcall_mask 1 | ||
1627 | - | ||
1628 | -#define sizeof_vcpu_shift 6 | ||
1629 | - | ||
1630 | -#ifdef CONFIG_SMP | ||
1631 | -#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \ | ||
1632 | - shl $sizeof_vcpu_shift,%esi ; \ | ||
1633 | - addl HYPERVISOR_shared_info,%esi | ||
1634 | -#else | ||
1635 | -#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi | ||
1636 | -#endif | ||
1637 | - | ||
1638 | -#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi) | ||
1639 | -#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi) | ||
1640 | -#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi) | ||
1641 | -#define DISABLE_INTERRUPTS GET_VCPU_INFO ; \ | ||
1642 | - __DISABLE_INTERRUPTS | ||
1643 | -#define ENABLE_INTERRUPTS GET_VCPU_INFO ; \ | ||
1644 | - __ENABLE_INTERRUPTS | ||
1645 | -#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \ | ||
1646 | -sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \ | ||
1647 | - __TEST_PENDING ; \ | ||
1648 | - jnz 14f # process more events if necessary... ; \ | ||
1649 | - movl ESI(%esp), %esi ; \ | ||
1650 | - sysexit ; \ | ||
1651 | -14: __DISABLE_INTERRUPTS ; \ | ||
1652 | - TRACE_IRQS_OFF ; \ | ||
1653 | -sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \ | ||
1654 | - push %esp ; \ | ||
1655 | - call evtchn_do_upcall ; \ | ||
1656 | - add $4,%esp ; \ | ||
1657 | - jmp ret_from_intr | ||
1658 | -#define INTERRUPT_RETURN iret | ||
1659 | -#endif | ||
1660 | - | ||
1661 | #ifdef CONFIG_PREEMPT | ||
1662 | -#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF | ||
1663 | +#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF | ||
1664 | #else | ||
1665 | -#define preempt_stop | ||
1666 | +#define preempt_stop(clobbers) | ||
1667 | #define resume_kernel restore_nocheck | ||
1668 | #endif | ||
1669 | |||
1670 | .macro TRACE_IRQS_IRET | ||
1671 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
1672 | - testl $IF_MASK,EFLAGS(%esp) # interrupts off? | ||
1673 | + testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off? | ||
1674 | jz 1f | ||
1675 | TRACE_IRQS_ON | ||
1676 | 1: | ||
1677 | @@ -148,6 +102,9 @@ | ||
1678 | |||
1679 | #define SAVE_ALL \ | ||
1680 | cld; \ | ||
1681 | + pushl %gs; \ | ||
1682 | + CFI_ADJUST_CFA_OFFSET 4;\ | ||
1683 | + /*CFI_REL_OFFSET gs, 0;*/\ | ||
1684 | pushl %es; \ | ||
1685 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
1686 | /*CFI_REL_OFFSET es, 0;*/\ | ||
1687 | @@ -177,7 +134,9 @@ | ||
1688 | CFI_REL_OFFSET ebx, 0;\ | ||
1689 | movl $(__USER_DS), %edx; \ | ||
1690 | movl %edx, %ds; \ | ||
1691 | - movl %edx, %es; | ||
1692 | + movl %edx, %es; \ | ||
1693 | + movl $(__KERNEL_PDA), %edx; \ | ||
1694 | + movl %edx, %gs | ||
1695 | |||
1696 | #define RESTORE_INT_REGS \ | ||
1697 | popl %ebx; \ | ||
1698 | @@ -210,17 +169,22 @@ | ||
1699 | 2: popl %es; \ | ||
1700 | CFI_ADJUST_CFA_OFFSET -4;\ | ||
1701 | /*CFI_RESTORE es;*/\ | ||
1702 | -.section .fixup,"ax"; \ | ||
1703 | -3: movl $0,(%esp); \ | ||
1704 | - jmp 1b; \ | ||
1705 | +3: popl %gs; \ | ||
1706 | + CFI_ADJUST_CFA_OFFSET -4;\ | ||
1707 | + /*CFI_RESTORE gs;*/\ | ||
1708 | +.pushsection .fixup,"ax"; \ | ||
1709 | 4: movl $0,(%esp); \ | ||
1710 | + jmp 1b; \ | ||
1711 | +5: movl $0,(%esp); \ | ||
1712 | jmp 2b; \ | ||
1713 | -.previous; \ | ||
1714 | +6: movl $0,(%esp); \ | ||
1715 | + jmp 3b; \ | ||
1716 | .section __ex_table,"a";\ | ||
1717 | .align 4; \ | ||
1718 | - .long 1b,3b; \ | ||
1719 | - .long 2b,4b; \ | ||
1720 | -.previous | ||
1721 | + .long 1b,4b; \ | ||
1722 | + .long 2b,5b; \ | ||
1723 | + .long 3b,6b; \ | ||
1724 | +.popsection | ||
1725 | |||
1726 | #define RING0_INT_FRAME \ | ||
1727 | CFI_STARTPROC simple;\ | ||
1728 | @@ -239,18 +203,18 @@ | ||
1729 | #define RING0_PTREGS_FRAME \ | ||
1730 | CFI_STARTPROC simple;\ | ||
1731 | CFI_SIGNAL_FRAME;\ | ||
1732 | - CFI_DEF_CFA esp, OLDESP-EBX;\ | ||
1733 | - /*CFI_OFFSET cs, CS-OLDESP;*/\ | ||
1734 | - CFI_OFFSET eip, EIP-OLDESP;\ | ||
1735 | - /*CFI_OFFSET es, ES-OLDESP;*/\ | ||
1736 | - /*CFI_OFFSET ds, DS-OLDESP;*/\ | ||
1737 | - CFI_OFFSET eax, EAX-OLDESP;\ | ||
1738 | - CFI_OFFSET ebp, EBP-OLDESP;\ | ||
1739 | - CFI_OFFSET edi, EDI-OLDESP;\ | ||
1740 | - CFI_OFFSET esi, ESI-OLDESP;\ | ||
1741 | - CFI_OFFSET edx, EDX-OLDESP;\ | ||
1742 | - CFI_OFFSET ecx, ECX-OLDESP;\ | ||
1743 | - CFI_OFFSET ebx, EBX-OLDESP | ||
1744 | + CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ | ||
1745 | + /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ | ||
1746 | + CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ | ||
1747 | + /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ | ||
1748 | + /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ | ||
1749 | + CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ | ||
1750 | + CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ | ||
1751 | + CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ | ||
1752 | + CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ | ||
1753 | + CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ | ||
1754 | + CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ | ||
1755 | + CFI_OFFSET ebx, PT_EBX-PT_OLDESP | ||
1756 | |||
1757 | ENTRY(ret_from_fork) | ||
1758 | CFI_STARTPROC | ||
1759 | @@ -278,17 +242,18 @@ | ||
1760 | ALIGN | ||
1761 | RING0_PTREGS_FRAME | ||
1762 | ret_from_exception: | ||
1763 | - preempt_stop | ||
1764 | + preempt_stop(CLBR_ANY) | ||
1765 | ret_from_intr: | ||
1766 | GET_THREAD_INFO(%ebp) | ||
1767 | check_userspace: | ||
1768 | - movl EFLAGS(%esp), %eax # mix EFLAGS and CS | ||
1769 | - movb CS(%esp), %al | ||
1770 | + movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS | ||
1771 | + movb PT_CS(%esp), %al | ||
1772 | andl $(VM_MASK | SEGMENT_RPL_MASK), %eax | ||
1773 | cmpl $USER_RPL, %eax | ||
1774 | jb resume_kernel # not returning to v8086 or userspace | ||
1775 | + | ||
1776 | ENTRY(resume_userspace) | ||
1777 | - DISABLE_INTERRUPTS # make sure we don't miss an interrupt | ||
1778 | + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt | ||
1779 | # setting need_resched or sigpending | ||
1780 | # between sampling and the iret | ||
1781 | movl TI_flags(%ebp), %ecx | ||
1782 | @@ -299,14 +264,14 @@ | ||
1783 | |||
1784 | #ifdef CONFIG_PREEMPT | ||
1785 | ENTRY(resume_kernel) | ||
1786 | - DISABLE_INTERRUPTS | ||
1787 | + DISABLE_INTERRUPTS(CLBR_ANY) | ||
1788 | cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? | ||
1789 | jnz restore_nocheck | ||
1790 | need_resched: | ||
1791 | movl TI_flags(%ebp), %ecx # need_resched set ? | ||
1792 | testb $_TIF_NEED_RESCHED, %cl | ||
1793 | jz restore_all | ||
1794 | - testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? | ||
1795 | + testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off (exception path) ? | ||
1796 | jz restore_all | ||
1797 | call preempt_schedule_irq | ||
1798 | jmp need_resched | ||
1799 | @@ -328,7 +293,7 @@ | ||
1800 | * No need to follow this irqs on/off section: the syscall | ||
1801 | * disabled irqs and here we enable it straight after entry: | ||
1802 | */ | ||
1803 | - ENABLE_INTERRUPTS | ||
1804 | + ENABLE_INTERRUPTS(CLBR_NONE) | ||
1805 | pushl $(__USER_DS) | ||
1806 | CFI_ADJUST_CFA_OFFSET 4 | ||
1807 | /*CFI_REL_OFFSET ss, 0*/ | ||
1808 | @@ -340,12 +305,16 @@ | ||
1809 | pushl $(__USER_CS) | ||
1810 | CFI_ADJUST_CFA_OFFSET 4 | ||
1811 | /*CFI_REL_OFFSET cs, 0*/ | ||
1812 | +#ifndef CONFIG_COMPAT_VDSO | ||
1813 | /* | ||
1814 | * Push current_thread_info()->sysenter_return to the stack. | ||
1815 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words | ||
1816 | * pushed above; +8 corresponds to copy_thread's esp0 setting. | ||
1817 | */ | ||
1818 | pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) | ||
1819 | +#else | ||
1820 | + pushl $SYSENTER_RETURN | ||
1821 | +#endif | ||
1822 | CFI_ADJUST_CFA_OFFSET 4 | ||
1823 | CFI_REL_OFFSET eip, 0 | ||
1824 | |||
1825 | @@ -372,19 +341,27 @@ | ||
1826 | cmpl $(nr_syscalls), %eax | ||
1827 | jae syscall_badsys | ||
1828 | call *sys_call_table(,%eax,4) | ||
1829 | - movl %eax,EAX(%esp) | ||
1830 | - DISABLE_INTERRUPTS | ||
1831 | + movl %eax,PT_EAX(%esp) | ||
1832 | + DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX) | ||
1833 | TRACE_IRQS_OFF | ||
1834 | movl TI_flags(%ebp), %ecx | ||
1835 | testw $_TIF_ALLWORK_MASK, %cx | ||
1836 | jne syscall_exit_work | ||
1837 | /* if something modifies registers it must also disable sysexit */ | ||
1838 | - movl EIP(%esp), %edx | ||
1839 | - movl OLDESP(%esp), %ecx | ||
1840 | + movl PT_EIP(%esp), %edx | ||
1841 | + movl PT_OLDESP(%esp), %ecx | ||
1842 | xorl %ebp,%ebp | ||
1843 | TRACE_IRQS_ON | ||
1844 | +1: mov PT_GS(%esp), %gs | ||
1845 | ENABLE_INTERRUPTS_SYSEXIT | ||
1846 | CFI_ENDPROC | ||
1847 | +.pushsection .fixup,"ax" | ||
1848 | +2: movl $0,PT_GS(%esp) | ||
1849 | + jmp 1b | ||
1850 | +.section __ex_table,"a" | ||
1851 | + .align 4 | ||
1852 | + .long 1b,2b | ||
1853 | +.popsection | ||
1854 | |||
1855 | # pv sysenter call handler stub | ||
1856 | ENTRY(sysenter_entry_pv) | ||
1857 | @@ -419,7 +396,7 @@ | ||
1858 | CFI_ADJUST_CFA_OFFSET 4 | ||
1859 | SAVE_ALL | ||
1860 | GET_THREAD_INFO(%ebp) | ||
1861 | - testl $TF_MASK,EFLAGS(%esp) | ||
1862 | + testl $TF_MASK,PT_EFLAGS(%esp) | ||
1863 | jz no_singlestep | ||
1864 | orl $_TIF_SINGLESTEP,TI_flags(%ebp) | ||
1865 | no_singlestep: | ||
1866 | @@ -431,9 +408,9 @@ | ||
1867 | jae syscall_badsys | ||
1868 | syscall_call: | ||
1869 | call *sys_call_table(,%eax,4) | ||
1870 | - movl %eax,EAX(%esp) # store the return value | ||
1871 | + movl %eax,PT_EAX(%esp) # store the return value | ||
1872 | syscall_exit: | ||
1873 | - DISABLE_INTERRUPTS # make sure we don't miss an interrupt | ||
1874 | + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt | ||
1875 | # setting need_resched or sigpending | ||
1876 | # between sampling and the iret | ||
1877 | TRACE_IRQS_OFF | ||
1878 | @@ -443,12 +420,12 @@ | ||
1879 | |||
1880 | restore_all: | ||
1881 | #ifndef CONFIG_XEN | ||
1882 | - movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS | ||
1883 | - # Warning: OLDSS(%esp) contains the wrong/random values if we | ||
1884 | + movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS | ||
1885 | + # Warning: PT_OLDSS(%esp) contains the wrong/random values if we | ||
1886 | # are returning to the kernel. | ||
1887 | # See comments in process.c:copy_thread() for details. | ||
1888 | - movb OLDSS(%esp), %ah | ||
1889 | - movb CS(%esp), %al | ||
1890 | + movb PT_OLDSS(%esp), %ah | ||
1891 | + movb PT_CS(%esp), %al | ||
1892 | andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax | ||
1893 | cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax | ||
1894 | CFI_REMEMBER_STATE | ||
1895 | @@ -456,7 +433,7 @@ | ||
1896 | restore_nocheck: | ||
1897 | #else | ||
1898 | restore_nocheck: | ||
1899 | - movl EFLAGS(%esp), %eax | ||
1900 | + movl PT_EFLAGS(%esp), %eax | ||
1901 | testl $(VM_MASK|NMI_MASK), %eax | ||
1902 | CFI_REMEMBER_STATE | ||
1903 | jnz hypervisor_iret | ||
1904 | @@ -470,13 +447,13 @@ | ||
1905 | TRACE_IRQS_IRET | ||
1906 | restore_nocheck_notrace: | ||
1907 | RESTORE_REGS | ||
1908 | - addl $4, %esp | ||
1909 | + addl $4, %esp # skip orig_eax/error_code | ||
1910 | CFI_ADJUST_CFA_OFFSET -4 | ||
1911 | 1: INTERRUPT_RETURN | ||
1912 | .section .fixup,"ax" | ||
1913 | iret_exc: | ||
1914 | #ifndef CONFIG_XEN | ||
1915 | - ENABLE_INTERRUPTS | ||
1916 | + ENABLE_INTERRUPTS(CLBR_NONE) | ||
1917 | #endif | ||
1918 | pushl $0 # no error code | ||
1919 | pushl $do_iret_error | ||
1920 | @@ -490,33 +467,42 @@ | ||
1921 | CFI_RESTORE_STATE | ||
1922 | #ifndef CONFIG_XEN | ||
1923 | ldt_ss: | ||
1924 | - larl OLDSS(%esp), %eax | ||
1925 | + larl PT_OLDSS(%esp), %eax | ||
1926 | jnz restore_nocheck | ||
1927 | testl $0x00400000, %eax # returning to 32bit stack? | ||
1928 | jnz restore_nocheck # allright, normal return | ||
1929 | + | ||
1930 | +#ifdef CONFIG_PARAVIRT | ||
1931 | + /* | ||
1932 | + * The kernel can't run on a non-flat stack if paravirt mode | ||
1933 | + * is active. Rather than try to fixup the high bits of | ||
1934 | + * ESP, bypass this code entirely. This may break DOSemu | ||
1935 | + * and/or Wine support in a paravirt VM, although the option | ||
1936 | + * is still available to implement the setting of the high | ||
1937 | + * 16-bits in the INTERRUPT_RETURN paravirt-op. | ||
1938 | + */ | ||
1939 | + cmpl $0, paravirt_ops+PARAVIRT_enabled | ||
1940 | + jne restore_nocheck | ||
1941 | +#endif | ||
1942 | + | ||
1943 | /* If returning to userspace with 16bit stack, | ||
1944 | * try to fix the higher word of ESP, as the CPU | ||
1945 | * won't restore it. | ||
1946 | * This is an "official" bug of all the x86-compatible | ||
1947 | * CPUs, which we can try to work around to make | ||
1948 | * dosemu and wine happy. */ | ||
1949 | - subl $8, %esp # reserve space for switch16 pointer | ||
1950 | - CFI_ADJUST_CFA_OFFSET 8 | ||
1951 | - DISABLE_INTERRUPTS | ||
1952 | + movl PT_OLDESP(%esp), %eax | ||
1953 | + movl %esp, %edx | ||
1954 | + call patch_espfix_desc | ||
1955 | + pushl $__ESPFIX_SS | ||
1956 | + CFI_ADJUST_CFA_OFFSET 4 | ||
1957 | + pushl %eax | ||
1958 | + CFI_ADJUST_CFA_OFFSET 4 | ||
1959 | + DISABLE_INTERRUPTS(CLBR_EAX) | ||
1960 | TRACE_IRQS_OFF | ||
1961 | - movl %esp, %eax | ||
1962 | - /* Set up the 16bit stack frame with switch32 pointer on top, | ||
1963 | - * and a switch16 pointer on top of the current frame. */ | ||
1964 | - call setup_x86_bogus_stack | ||
1965 | - CFI_ADJUST_CFA_OFFSET -8 # frame has moved | ||
1966 | - TRACE_IRQS_IRET | ||
1967 | - RESTORE_REGS | ||
1968 | - lss 20+4(%esp), %esp # switch to 16bit stack | ||
1969 | -1: INTERRUPT_RETURN | ||
1970 | -.section __ex_table,"a" | ||
1971 | - .align 4 | ||
1972 | - .long 1b,iret_exc | ||
1973 | -.previous | ||
1974 | + lss (%esp), %esp | ||
1975 | + CFI_ADJUST_CFA_OFFSET -8 | ||
1976 | + jmp restore_nocheck | ||
1977 | #else | ||
1978 | ALIGN | ||
1979 | restore_all_enable_events: | ||
1980 | @@ -540,7 +526,7 @@ | ||
1981 | |||
1982 | CFI_RESTORE_STATE | ||
1983 | hypervisor_iret: | ||
1984 | - andl $~NMI_MASK, EFLAGS(%esp) | ||
1985 | + andl $~NMI_MASK, PT_EFLAGS(%esp) | ||
1986 | RESTORE_REGS | ||
1987 | addl $4, %esp | ||
1988 | CFI_ADJUST_CFA_OFFSET -4 | ||
1989 | @@ -556,7 +542,7 @@ | ||
1990 | jz work_notifysig | ||
1991 | work_resched: | ||
1992 | call schedule | ||
1993 | - DISABLE_INTERRUPTS # make sure we don't miss an interrupt | ||
1994 | + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt | ||
1995 | # setting need_resched or sigpending | ||
1996 | # between sampling and the iret | ||
1997 | TRACE_IRQS_OFF | ||
1998 | @@ -569,7 +555,8 @@ | ||
1999 | |||
2000 | work_notifysig: # deal with pending signals and | ||
2001 | # notify-resume requests | ||
2002 | - testl $VM_MASK, EFLAGS(%esp) | ||
2003 | +#ifdef CONFIG_VM86 | ||
2004 | + testl $VM_MASK, PT_EFLAGS(%esp) | ||
2005 | movl %esp, %eax | ||
2006 | jne work_notifysig_v86 # returning to kernel-space or | ||
2007 | # vm86-space | ||
2008 | @@ -579,29 +566,30 @@ | ||
2009 | |||
2010 | ALIGN | ||
2011 | work_notifysig_v86: | ||
2012 | -#ifdef CONFIG_VM86 | ||
2013 | pushl %ecx # save ti_flags for do_notify_resume | ||
2014 | CFI_ADJUST_CFA_OFFSET 4 | ||
2015 | call save_v86_state # %eax contains pt_regs pointer | ||
2016 | popl %ecx | ||
2017 | CFI_ADJUST_CFA_OFFSET -4 | ||
2018 | movl %eax, %esp | ||
2019 | +#else | ||
2020 | + movl %esp, %eax | ||
2021 | +#endif | ||
2022 | xorl %edx, %edx | ||
2023 | call do_notify_resume | ||
2024 | jmp resume_userspace_sig | ||
2025 | -#endif | ||
2026 | |||
2027 | # perform syscall exit tracing | ||
2028 | ALIGN | ||
2029 | syscall_trace_entry: | ||
2030 | - movl $-ENOSYS,EAX(%esp) | ||
2031 | + movl $-ENOSYS,PT_EAX(%esp) | ||
2032 | movl %esp, %eax | ||
2033 | xorl %edx,%edx | ||
2034 | call do_syscall_trace | ||
2035 | cmpl $0, %eax | ||
2036 | jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, | ||
2037 | # so must skip actual syscall | ||
2038 | - movl ORIG_EAX(%esp), %eax | ||
2039 | + movl PT_ORIG_EAX(%esp), %eax | ||
2040 | cmpl $(nr_syscalls), %eax | ||
2041 | jnae syscall_call | ||
2042 | jmp syscall_exit | ||
2043 | @@ -612,7 +600,7 @@ | ||
2044 | testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl | ||
2045 | jz work_pending | ||
2046 | TRACE_IRQS_ON | ||
2047 | - ENABLE_INTERRUPTS # could let do_syscall_trace() call | ||
2048 | + ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call | ||
2049 | # schedule() instead | ||
2050 | movl %esp, %eax | ||
2051 | movl $1, %edx | ||
2052 | @@ -626,40 +614,39 @@ | ||
2053 | CFI_ADJUST_CFA_OFFSET 4 | ||
2054 | SAVE_ALL | ||
2055 | GET_THREAD_INFO(%ebp) | ||
2056 | - movl $-EFAULT,EAX(%esp) | ||
2057 | + movl $-EFAULT,PT_EAX(%esp) | ||
2058 | jmp resume_userspace | ||
2059 | |||
2060 | syscall_badsys: | ||
2061 | - movl $-ENOSYS,EAX(%esp) | ||
2062 | + movl $-ENOSYS,PT_EAX(%esp) | ||
2063 | jmp resume_userspace | ||
2064 | CFI_ENDPROC | ||
2065 | |||
2066 | #ifndef CONFIG_XEN | ||
2067 | #define FIXUP_ESPFIX_STACK \ | ||
2068 | - movl %esp, %eax; \ | ||
2069 | - /* switch to 32bit stack using the pointer on top of 16bit stack */ \ | ||
2070 | - lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \ | ||
2071 | - /* copy data from 16bit stack to 32bit stack */ \ | ||
2072 | - call fixup_x86_bogus_stack; \ | ||
2073 | - /* put ESP to the proper location */ \ | ||
2074 | - movl %eax, %esp; | ||
2075 | -#define UNWIND_ESPFIX_STACK \ | ||
2076 | + /* since we are on a wrong stack, we cant make it a C code :( */ \ | ||
2077 | + movl %gs:PDA_cpu, %ebx; \ | ||
2078 | + PER_CPU(cpu_gdt_descr, %ebx); \ | ||
2079 | + movl GDS_address(%ebx), %ebx; \ | ||
2080 | + GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ | ||
2081 | + addl %esp, %eax; \ | ||
2082 | + pushl $__KERNEL_DS; \ | ||
2083 | + CFI_ADJUST_CFA_OFFSET 4; \ | ||
2084 | pushl %eax; \ | ||
2085 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
2086 | + lss (%esp), %esp; \ | ||
2087 | + CFI_ADJUST_CFA_OFFSET -8; | ||
2088 | +#define UNWIND_ESPFIX_STACK \ | ||
2089 | movl %ss, %eax; \ | ||
2090 | - /* see if on 16bit stack */ \ | ||
2091 | + /* see if on espfix stack */ \ | ||
2092 | cmpw $__ESPFIX_SS, %ax; \ | ||
2093 | - je 28f; \ | ||
2094 | -27: popl %eax; \ | ||
2095 | - CFI_ADJUST_CFA_OFFSET -4; \ | ||
2096 | -.section .fixup,"ax"; \ | ||
2097 | -28: movl $__KERNEL_DS, %eax; \ | ||
2098 | + jne 27f; \ | ||
2099 | + movl $__KERNEL_DS, %eax; \ | ||
2100 | movl %eax, %ds; \ | ||
2101 | movl %eax, %es; \ | ||
2102 | - /* switch to 32bit stack */ \ | ||
2103 | + /* switch to normal stack */ \ | ||
2104 | FIXUP_ESPFIX_STACK; \ | ||
2105 | - jmp 27b; \ | ||
2106 | -.previous | ||
2107 | +27:; | ||
2108 | |||
2109 | /* | ||
2110 | * Build the entry stubs and pointer table with | ||
2111 | @@ -723,13 +710,16 @@ | ||
2112 | CFI_ADJUST_CFA_OFFSET 4 | ||
2113 | ALIGN | ||
2114 | error_code: | ||
2115 | + /* the function address is in %gs's slot on the stack */ | ||
2116 | + pushl %es | ||
2117 | + CFI_ADJUST_CFA_OFFSET 4 | ||
2118 | + /*CFI_REL_OFFSET es, 0*/ | ||
2119 | pushl %ds | ||
2120 | CFI_ADJUST_CFA_OFFSET 4 | ||
2121 | /*CFI_REL_OFFSET ds, 0*/ | ||
2122 | pushl %eax | ||
2123 | CFI_ADJUST_CFA_OFFSET 4 | ||
2124 | CFI_REL_OFFSET eax, 0 | ||
2125 | - xorl %eax, %eax | ||
2126 | pushl %ebp | ||
2127 | CFI_ADJUST_CFA_OFFSET 4 | ||
2128 | CFI_REL_OFFSET ebp, 0 | ||
2129 | @@ -742,7 +732,6 @@ | ||
2130 | pushl %edx | ||
2131 | CFI_ADJUST_CFA_OFFSET 4 | ||
2132 | CFI_REL_OFFSET edx, 0 | ||
2133 | - decl %eax # eax = -1 | ||
2134 | pushl %ecx | ||
2135 | CFI_ADJUST_CFA_OFFSET 4 | ||
2136 | CFI_REL_OFFSET ecx, 0 | ||
2137 | @@ -750,18 +739,20 @@ | ||
2138 | CFI_ADJUST_CFA_OFFSET 4 | ||
2139 | CFI_REL_OFFSET ebx, 0 | ||
2140 | cld | ||
2141 | - pushl %es | ||
2142 | + pushl %gs | ||
2143 | CFI_ADJUST_CFA_OFFSET 4 | ||
2144 | - /*CFI_REL_OFFSET es, 0*/ | ||
2145 | + /*CFI_REL_OFFSET gs, 0*/ | ||
2146 | + movl $(__KERNEL_PDA), %ecx | ||
2147 | + movl %ecx, %gs | ||
2148 | UNWIND_ESPFIX_STACK | ||
2149 | popl %ecx | ||
2150 | CFI_ADJUST_CFA_OFFSET -4 | ||
2151 | /*CFI_REGISTER es, ecx*/ | ||
2152 | - movl ES(%esp), %edi # get the function address | ||
2153 | - movl ORIG_EAX(%esp), %edx # get the error code | ||
2154 | - movl %eax, ORIG_EAX(%esp) | ||
2155 | - movl %ecx, ES(%esp) | ||
2156 | - /*CFI_REL_OFFSET es, ES*/ | ||
2157 | + movl PT_GS(%esp), %edi # get the function address | ||
2158 | + movl PT_ORIG_EAX(%esp), %edx # get the error code | ||
2159 | + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart | ||
2160 | + mov %ecx, PT_GS(%esp) | ||
2161 | + /*CFI_REL_OFFSET gs, ES*/ | ||
2162 | movl $(__USER_DS), %ecx | ||
2163 | movl %ecx, %ds | ||
2164 | movl %ecx, %es | ||
2165 | @@ -793,7 +784,7 @@ | ||
2166 | pushl %eax | ||
2167 | CFI_ADJUST_CFA_OFFSET 4 | ||
2168 | SAVE_ALL | ||
2169 | - movl EIP(%esp),%eax | ||
2170 | + movl PT_EIP(%esp),%eax | ||
2171 | cmpl $scrit,%eax | ||
2172 | jb 11f | ||
2173 | cmpl $ecrit,%eax | ||
2174 | @@ -802,7 +793,7 @@ | ||
2175 | jb 11f | ||
2176 | cmpl $sysexit_ecrit,%eax | ||
2177 | ja 11f | ||
2178 | - addl $OLDESP,%esp # Remove eflags...ebx from stack frame. | ||
2179 | + addl $PT_OLDESP,%esp # Remove eflags...ebx from stack frame. | ||
2180 | 11: push %esp | ||
2181 | CFI_ADJUST_CFA_OFFSET 4 | ||
2182 | call evtchn_do_upcall | ||
2183 | @@ -824,7 +815,7 @@ | ||
2184 | jne 15f | ||
2185 | xorl %ecx,%ecx | ||
2186 | 15: leal (%esp,%ecx),%esi # %esi points at end of src region | ||
2187 | - leal OLDESP(%esp),%edi # %edi points at end of dst region | ||
2188 | + leal PT_OLDESP(%esp),%edi # %edi points at end of dst region | ||
2189 | shrl $2,%ecx # convert words to bytes | ||
2190 | je 17f # skip loop if nothing to copy | ||
2191 | 16: subl $4,%esi # pre-decrementing copy loop | ||
2192 | @@ -848,8 +839,9 @@ | ||
2193 | .byte 0x18 # pop %eax | ||
2194 | .byte 0x1c # pop %ds | ||
2195 | .byte 0x20 # pop %es | ||
2196 | - .byte 0x24,0x24,0x24 # add $4,%esp | ||
2197 | - .byte 0x28 # iret | ||
2198 | + .byte 0x24,0x24 # pop %gs | ||
2199 | + .byte 0x28,0x28,0x28 # add $4,%esp | ||
2200 | + .byte 0x2c # iret | ||
2201 | .byte 0xff,0xff,0xff,0xff # movb $1,1(%esi) | ||
2202 | .byte 0x00,0x00 # jmp 11b | ||
2203 | .previous | ||
2204 | @@ -940,7 +932,7 @@ | ||
2205 | jmp ret_from_exception | ||
2206 | device_available_emulate: | ||
2207 | #endif | ||
2208 | - preempt_stop | ||
2209 | + preempt_stop(CLBR_ANY) | ||
2210 | call math_state_restore | ||
2211 | jmp ret_from_exception | ||
2212 | CFI_ENDPROC | ||
2213 | @@ -1010,7 +1002,7 @@ | ||
2214 | cmpw $__ESPFIX_SS, %ax | ||
2215 | popl %eax | ||
2216 | CFI_ADJUST_CFA_OFFSET -4 | ||
2217 | - je nmi_16bit_stack | ||
2218 | + je nmi_espfix_stack | ||
2219 | cmpl $sysenter_entry,(%esp) | ||
2220 | je nmi_stack_fixup | ||
2221 | pushl %eax | ||
2222 | @@ -1053,7 +1045,7 @@ | ||
2223 | FIX_STACK(24,nmi_stack_correct, 1) | ||
2224 | jmp nmi_stack_correct | ||
2225 | |||
2226 | -nmi_16bit_stack: | ||
2227 | +nmi_espfix_stack: | ||
2228 | /* We have a RING0_INT_FRAME here. | ||
2229 | * | ||
2230 | * create the pointer to lss back | ||
2231 | @@ -1062,7 +1054,6 @@ | ||
2232 | CFI_ADJUST_CFA_OFFSET 4 | ||
2233 | pushl %esp | ||
2234 | CFI_ADJUST_CFA_OFFSET 4 | ||
2235 | - movzwl %sp, %esp | ||
2236 | addw $4, (%esp) | ||
2237 | /* copy the iret frame of 12 bytes */ | ||
2238 | .rept 3 | ||
2239 | @@ -1073,11 +1064,11 @@ | ||
2240 | CFI_ADJUST_CFA_OFFSET 4 | ||
2241 | SAVE_ALL | ||
2242 | FIXUP_ESPFIX_STACK # %eax == %esp | ||
2243 | - CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved | ||
2244 | xorl %edx,%edx # zero error code | ||
2245 | call do_nmi | ||
2246 | RESTORE_REGS | ||
2247 | - lss 12+4(%esp), %esp # back to 16bit stack | ||
2248 | + lss 12+4(%esp), %esp # back to espfix stack | ||
2249 | + CFI_ADJUST_CFA_OFFSET -24 | ||
2250 | 1: INTERRUPT_RETURN | ||
2251 | CFI_ENDPROC | ||
2252 | .section __ex_table,"a" | ||
2253 | @@ -1093,12 +1084,25 @@ | ||
2254 | xorl %edx,%edx # zero error code | ||
2255 | movl %esp,%eax # pt_regs pointer | ||
2256 | call do_nmi | ||
2257 | - orl $NMI_MASK, EFLAGS(%esp) | ||
2258 | + orl $NMI_MASK, PT_EFLAGS(%esp) | ||
2259 | jmp restore_all | ||
2260 | CFI_ENDPROC | ||
2261 | #endif | ||
2262 | KPROBE_END(nmi) | ||
2263 | |||
2264 | +#ifdef CONFIG_PARAVIRT | ||
2265 | +ENTRY(native_iret) | ||
2266 | +1: iret | ||
2267 | +.section __ex_table,"a" | ||
2268 | + .align 4 | ||
2269 | + .long 1b,iret_exc | ||
2270 | +.previous | ||
2271 | + | ||
2272 | +ENTRY(native_irq_enable_sysexit) | ||
2273 | + sti | ||
2274 | + sysexit | ||
2275 | +#endif | ||
2276 | + | ||
2277 | KPROBE_ENTRY(int3) | ||
2278 | RING0_INT_FRAME | ||
2279 | pushl $-1 # mark this as an int | ||
2280 | @@ -1214,37 +1218,6 @@ | ||
2281 | CFI_ENDPROC | ||
2282 | #endif /* !CONFIG_XEN */ | ||
2283 | |||
2284 | -#ifdef CONFIG_STACK_UNWIND | ||
2285 | -ENTRY(arch_unwind_init_running) | ||
2286 | - CFI_STARTPROC | ||
2287 | - movl 4(%esp), %edx | ||
2288 | - movl (%esp), %ecx | ||
2289 | - leal 4(%esp), %eax | ||
2290 | - movl %ebx, EBX(%edx) | ||
2291 | - xorl %ebx, %ebx | ||
2292 | - movl %ebx, ECX(%edx) | ||
2293 | - movl %ebx, EDX(%edx) | ||
2294 | - movl %esi, ESI(%edx) | ||
2295 | - movl %edi, EDI(%edx) | ||
2296 | - movl %ebp, EBP(%edx) | ||
2297 | - movl %ebx, EAX(%edx) | ||
2298 | - movl $__USER_DS, DS(%edx) | ||
2299 | - movl $__USER_DS, ES(%edx) | ||
2300 | - movl %ebx, ORIG_EAX(%edx) | ||
2301 | - movl %ecx, EIP(%edx) | ||
2302 | - movl 12(%esp), %ecx | ||
2303 | - movl $__KERNEL_CS, CS(%edx) | ||
2304 | - movl %ebx, EFLAGS(%edx) | ||
2305 | - movl %eax, OLDESP(%edx) | ||
2306 | - movl 8(%esp), %eax | ||
2307 | - movl %ecx, 8(%esp) | ||
2308 | - movl EBX(%edx), %ebx | ||
2309 | - movl $__KERNEL_DS, OLDSS(%edx) | ||
2310 | - jmpl *%eax | ||
2311 | - CFI_ENDPROC | ||
2312 | -ENDPROC(arch_unwind_init_running) | ||
2313 | -#endif | ||
2314 | - | ||
2315 | ENTRY(fixup_4gb_segment) | ||
2316 | RING0_EC_FRAME | ||
2317 | pushl $do_fixup_4gb_segment | ||
2318 | --- a/arch/x86/kernel/entry_64-xen.S | ||
2319 | +++ b/arch/x86/kernel/entry_64-xen.S | ||
2320 | @@ -261,7 +261,6 @@ | ||
2321 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | ||
2322 | GET_THREAD_INFO(%rcx) | ||
2323 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) | ||
2324 | - CFI_REMEMBER_STATE | ||
2325 | jnz tracesys | ||
2326 | cmpq $__NR_syscall_max,%rax | ||
2327 | ja badsys | ||
2328 | @@ -272,7 +271,6 @@ | ||
2329 | * Syscall return path ending with SYSRET (fast path) | ||
2330 | * Has incomplete stack frame and undefined top of stack. | ||
2331 | */ | ||
2332 | - .globl ret_from_sys_call | ||
2333 | ret_from_sys_call: | ||
2334 | movl $_TIF_ALLWORK_MASK,%edi | ||
2335 | /* edi: flagmask */ | ||
2336 | @@ -282,8 +280,8 @@ | ||
2337 | TRACE_IRQS_OFF | ||
2338 | movl threadinfo_flags(%rcx),%edx | ||
2339 | andl %edi,%edx | ||
2340 | - CFI_REMEMBER_STATE | ||
2341 | jnz sysret_careful | ||
2342 | + CFI_REMEMBER_STATE | ||
2343 | /* | ||
2344 | * sysretq will re-enable interrupts: | ||
2345 | */ | ||
2346 | @@ -292,10 +290,10 @@ | ||
2347 | RESTORE_ARGS 0,8,0 | ||
2348 | HYPERVISOR_IRET VGCF_IN_SYSCALL | ||
2349 | |||
2350 | + CFI_RESTORE_STATE | ||
2351 | /* Handle reschedules */ | ||
2352 | /* edx: work, edi: workmask */ | ||
2353 | sysret_careful: | ||
2354 | - CFI_RESTORE_STATE | ||
2355 | bt $TIF_NEED_RESCHED,%edx | ||
2356 | jnc sysret_signal | ||
2357 | TRACE_IRQS_ON | ||
2358 | @@ -334,7 +332,6 @@ | ||
2359 | |||
2360 | /* Do syscall tracing */ | ||
2361 | tracesys: | ||
2362 | - CFI_RESTORE_STATE | ||
2363 | SAVE_REST | ||
2364 | movq $-ENOSYS,RAX(%rsp) | ||
2365 | FIXUP_TOP_OF_STACK %rdi | ||
2366 | @@ -350,32 +347,13 @@ | ||
2367 | call *sys_call_table(,%rax,8) | ||
2368 | 1: movq %rax,RAX-ARGOFFSET(%rsp) | ||
2369 | /* Use IRET because user could have changed frame */ | ||
2370 | - jmp int_ret_from_sys_call | ||
2371 | - CFI_ENDPROC | ||
2372 | -END(system_call) | ||
2373 | |||
2374 | /* | ||
2375 | * Syscall return path ending with IRET. | ||
2376 | * Has correct top of stack, but partial stack frame. | ||
2377 | - */ | ||
2378 | -ENTRY(int_ret_from_sys_call) | ||
2379 | - CFI_STARTPROC simple | ||
2380 | - CFI_SIGNAL_FRAME | ||
2381 | - CFI_DEF_CFA rsp,SS+8-ARGOFFSET | ||
2382 | - /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/ | ||
2383 | - CFI_REL_OFFSET rsp,RSP-ARGOFFSET | ||
2384 | - /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ | ||
2385 | - /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/ | ||
2386 | - CFI_REL_OFFSET rip,RIP-ARGOFFSET | ||
2387 | - CFI_REL_OFFSET rdx,RDX-ARGOFFSET | ||
2388 | - CFI_REL_OFFSET rcx,RCX-ARGOFFSET | ||
2389 | - CFI_REL_OFFSET rax,RAX-ARGOFFSET | ||
2390 | - CFI_REL_OFFSET rdi,RDI-ARGOFFSET | ||
2391 | - CFI_REL_OFFSET rsi,RSI-ARGOFFSET | ||
2392 | - CFI_REL_OFFSET r8,R8-ARGOFFSET | ||
2393 | - CFI_REL_OFFSET r9,R9-ARGOFFSET | ||
2394 | - CFI_REL_OFFSET r10,R10-ARGOFFSET | ||
2395 | - CFI_REL_OFFSET r11,R11-ARGOFFSET | ||
2396 | + */ | ||
2397 | + .globl int_ret_from_sys_call | ||
2398 | +int_ret_from_sys_call: | ||
2399 | XEN_BLOCK_EVENTS(%rsi) | ||
2400 | TRACE_IRQS_OFF | ||
2401 | testb $3,CS-ARGOFFSET(%rsp) | ||
2402 | @@ -428,8 +406,6 @@ | ||
2403 | popq %rdi | ||
2404 | CFI_ADJUST_CFA_OFFSET -8 | ||
2405 | andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi | ||
2406 | - XEN_BLOCK_EVENTS(%rsi) | ||
2407 | - TRACE_IRQS_OFF | ||
2408 | jmp int_restore_rest | ||
2409 | |||
2410 | int_signal: | ||
2411 | @@ -445,7 +421,7 @@ | ||
2412 | TRACE_IRQS_OFF | ||
2413 | jmp int_with_check | ||
2414 | CFI_ENDPROC | ||
2415 | -END(int_ret_from_sys_call) | ||
2416 | +END(system_call) | ||
2417 | |||
2418 | /* | ||
2419 | * Certain special system calls that need to save a complete full stack frame. | ||
2420 | @@ -1275,36 +1251,3 @@ | ||
2421 | ret | ||
2422 | CFI_ENDPROC | ||
2423 | ENDPROC(call_softirq) | ||
2424 | - | ||
2425 | -#ifdef CONFIG_STACK_UNWIND | ||
2426 | -ENTRY(arch_unwind_init_running) | ||
2427 | - CFI_STARTPROC | ||
2428 | - movq %r15, R15(%rdi) | ||
2429 | - movq %r14, R14(%rdi) | ||
2430 | - xchgq %rsi, %rdx | ||
2431 | - movq %r13, R13(%rdi) | ||
2432 | - movq %r12, R12(%rdi) | ||
2433 | - xorl %eax, %eax | ||
2434 | - movq %rbp, RBP(%rdi) | ||
2435 | - movq %rbx, RBX(%rdi) | ||
2436 | - movq (%rsp), %rcx | ||
2437 | - movq %rax, R11(%rdi) | ||
2438 | - movq %rax, R10(%rdi) | ||
2439 | - movq %rax, R9(%rdi) | ||
2440 | - movq %rax, R8(%rdi) | ||
2441 | - movq %rax, RAX(%rdi) | ||
2442 | - movq %rax, RCX(%rdi) | ||
2443 | - movq %rax, RDX(%rdi) | ||
2444 | - movq %rax, RSI(%rdi) | ||
2445 | - movq %rax, RDI(%rdi) | ||
2446 | - movq %rax, ORIG_RAX(%rdi) | ||
2447 | - movq %rcx, RIP(%rdi) | ||
2448 | - leaq 8(%rsp), %rcx | ||
2449 | - movq $__KERNEL_CS, CS(%rdi) | ||
2450 | - movq %rax, EFLAGS(%rdi) | ||
2451 | - movq %rcx, RSP(%rdi) | ||
2452 | - movq $__KERNEL_DS, SS(%rdi) | ||
2453 | - jmpq *%rdx | ||
2454 | - CFI_ENDPROC | ||
2455 | -ENDPROC(arch_unwind_init_running) | ||
2456 | -#endif | ||
2457 | --- a/arch/x86/kernel/genapic_64-xen.c | ||
2458 | +++ b/arch/x86/kernel/genapic_64-xen.c | ||
2459 | @@ -34,6 +34,7 @@ | ||
2460 | |||
2461 | #ifndef CONFIG_XEN | ||
2462 | struct genapic *genapic = &apic_flat; | ||
2463 | +struct genapic *genapic_force; | ||
2464 | #else | ||
2465 | extern struct genapic apic_xen; | ||
2466 | struct genapic *genapic = &apic_xen; | ||
2467 | @@ -52,6 +53,13 @@ | ||
2468 | u8 cluster_cnt[NUM_APIC_CLUSTERS]; | ||
2469 | int max_apic = 0; | ||
2470 | |||
2471 | + /* genapic selection can be forced because of certain quirks. | ||
2472 | + */ | ||
2473 | + if (genapic_force) { | ||
2474 | + genapic = genapic_force; | ||
2475 | + goto print; | ||
2476 | + } | ||
2477 | + | ||
2478 | #if defined(CONFIG_ACPI) | ||
2479 | /* | ||
2480 | * Some x86_64 machines use physical APIC mode regardless of how many | ||
2481 | --- a/arch/x86/kernel/head64-xen.c | ||
2482 | +++ b/arch/x86/kernel/head64-xen.c | ||
2483 | @@ -101,7 +101,10 @@ | ||
2484 | machine_to_phys_order++; | ||
2485 | |||
2486 | #if 0 | ||
2487 | - for (i = 0; i < 256; i++) | ||
2488 | + /* clear bss before set_intr_gate with early_idt_handler */ | ||
2489 | + clear_bss(); | ||
2490 | + | ||
2491 | + for (i = 0; i < IDT_ENTRIES; i++) | ||
2492 | set_intr_gate(i, early_idt_handler); | ||
2493 | asm volatile("lidt %0" :: "m" (idt_descr)); | ||
2494 | #endif | ||
2495 | --- a/arch/x86/kernel/head_32-xen.S | ||
2496 | +++ b/arch/x86/kernel/head_32-xen.S | ||
2497 | @@ -9,6 +9,7 @@ | ||
2498 | #include <asm/cache.h> | ||
2499 | #include <asm/thread_info.h> | ||
2500 | #include <asm/asm-offsets.h> | ||
2501 | +#include <asm/boot.h> | ||
2502 | #include <asm/dwarf2.h> | ||
2503 | #include <xen/interface/xen.h> | ||
2504 | #include <xen/interface/elfnote.h> | ||
2505 | @@ -35,6 +36,8 @@ | ||
2506 | /* Set up the stack pointer */ | ||
2507 | movl $(init_thread_union+THREAD_SIZE),%esp | ||
2508 | |||
2509 | + call setup_pda | ||
2510 | + | ||
2511 | /* get vendor info */ | ||
2512 | xorl %eax,%eax # call CPUID with 0 -> return vendor ID | ||
2513 | XEN_CPUID | ||
2514 | @@ -57,14 +60,58 @@ | ||
2515 | |||
2516 | movb $1,X86_HARD_MATH | ||
2517 | |||
2518 | - xorl %eax,%eax # Clear FS/GS and LDT | ||
2519 | + xorl %eax,%eax # Clear FS | ||
2520 | movl %eax,%fs | ||
2521 | - movl %eax,%gs | ||
2522 | + | ||
2523 | + movl $(__KERNEL_PDA),%eax | ||
2524 | + mov %eax,%gs | ||
2525 | + | ||
2526 | cld # gcc2 wants the direction flag cleared at all times | ||
2527 | |||
2528 | pushl $0 # fake return address for unwinder | ||
2529 | jmp start_kernel | ||
2530 | |||
2531 | +/* | ||
2532 | + * Point the GDT at this CPU's PDA. This will be | ||
2533 | + * cpu_gdt_table and boot_pda. | ||
2534 | + */ | ||
2535 | +setup_pda: | ||
2536 | + /* get the PDA pointer */ | ||
2537 | + movl $boot_pda, %eax | ||
2538 | + | ||
2539 | + /* slot the PDA address into the GDT */ | ||
2540 | + mov $cpu_gdt_table, %ecx | ||
2541 | + mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ | ||
2542 | + shr $16, %eax | ||
2543 | + mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ | ||
2544 | + mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */ | ||
2545 | + | ||
2546 | + # %esi still points to start_info, and no registers | ||
2547 | + # need to be preserved. | ||
2548 | + | ||
2549 | + movl XEN_START_mfn_list(%esi), %ebx | ||
2550 | + movl $(cpu_gdt_table - __PAGE_OFFSET), %eax | ||
2551 | + shrl $PAGE_SHIFT, %eax | ||
2552 | + movl (%ebx,%eax,4), %ecx | ||
2553 | + pushl %ecx # frame number for set_gdt below | ||
2554 | + | ||
2555 | + xorl %esi, %esi | ||
2556 | + xorl %edx, %edx | ||
2557 | + shldl $PAGE_SHIFT, %ecx, %edx | ||
2558 | + shll $PAGE_SHIFT, %ecx | ||
2559 | + orl $0x61, %ecx | ||
2560 | + movl $cpu_gdt_table, %ebx | ||
2561 | + movl $__HYPERVISOR_update_va_mapping, %eax | ||
2562 | + int $0x82 | ||
2563 | + | ||
2564 | + movl $(PAGE_SIZE_asm / 8), %ecx | ||
2565 | + movl %esp, %ebx | ||
2566 | + movl $__HYPERVISOR_set_gdt, %eax | ||
2567 | + int $0x82 | ||
2568 | + | ||
2569 | + popl %ecx | ||
2570 | + ret | ||
2571 | + | ||
2572 | #define HYPERCALL_PAGE_OFFSET 0x1000 | ||
2573 | .org HYPERCALL_PAGE_OFFSET | ||
2574 | ENTRY(hypercall_page) | ||
2575 | @@ -93,7 +140,8 @@ | ||
2576 | /* | ||
2577 | * The Global Descriptor Table contains 28 quadwords, per-CPU. | ||
2578 | */ | ||
2579 | - .align L1_CACHE_BYTES | ||
2580 | + .section .data.page_aligned, "aw" | ||
2581 | + .align PAGE_SIZE_asm | ||
2582 | ENTRY(cpu_gdt_table) | ||
2583 | .quad 0x0000000000000000 /* NULL descriptor */ | ||
2584 | .quad 0x0000000000000000 /* 0x0b reserved */ | ||
2585 | @@ -135,12 +183,13 @@ | ||
2586 | .quad 0x0000000000000000 /* 0xc0 APM CS 16 code (16 bit) */ | ||
2587 | .quad 0x0000000000000000 /* 0xc8 APM DS data */ | ||
2588 | |||
2589 | - .quad 0x0000000000000000 /* 0xd0 - ESPFIX 16-bit SS */ | ||
2590 | - .quad 0x0000000000000000 /* 0xd8 - unused */ | ||
2591 | + .quad 0x0000000000000000 /* 0xd0 - ESPFIX SS */ | ||
2592 | + .quad 0x00cf92000000ffff /* 0xd8 - PDA */ | ||
2593 | .quad 0x0000000000000000 /* 0xe0 - unused */ | ||
2594 | .quad 0x0000000000000000 /* 0xe8 - unused */ | ||
2595 | .quad 0x0000000000000000 /* 0xf0 - unused */ | ||
2596 | .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ | ||
2597 | + .align PAGE_SIZE_asm | ||
2598 | |||
2599 | #if CONFIG_XEN_COMPAT <= 0x030002 | ||
2600 | /* | ||
2601 | @@ -165,9 +214,9 @@ | ||
2602 | .ascii ",ELF_PADDR_OFFSET=0x" | ||
2603 | utoa __PAGE_OFFSET | ||
2604 | .ascii ",VIRT_ENTRY=0x" | ||
2605 | - utoa (__PAGE_OFFSET + __PHYSICAL_START + VIRT_ENTRY_OFFSET) | ||
2606 | + utoa (__PAGE_OFFSET + LOAD_PHYSICAL_ADDR + VIRT_ENTRY_OFFSET) | ||
2607 | .ascii ",HYPERCALL_PAGE=0x" | ||
2608 | - utoa ((__PHYSICAL_START+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT) | ||
2609 | + utoa ((LOAD_PHYSICAL_ADDR+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT) | ||
2610 | .ascii ",FEATURES=writable_page_tables" | ||
2611 | .ascii "|writable_descriptor_tables" | ||
2612 | .ascii "|auto_translated_physmap" | ||
2613 | --- a/arch/x86/kernel/io_apic_32-xen.c | ||
2614 | +++ b/arch/x86/kernel/io_apic_32-xen.c | ||
2615 | @@ -34,6 +34,7 @@ | ||
2616 | #include <linux/pci.h> | ||
2617 | #include <linux/msi.h> | ||
2618 | #include <linux/htirq.h> | ||
2619 | +#include <linux/freezer.h> | ||
2620 | |||
2621 | #include <asm/io.h> | ||
2622 | #include <asm/smp.h> | ||
2623 | @@ -194,14 +195,20 @@ | ||
2624 | * the interrupt, and we need to make sure the entry is fully populated | ||
2625 | * before that happens. | ||
2626 | */ | ||
2627 | -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | ||
2628 | +static void | ||
2629 | +__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | ||
2630 | { | ||
2631 | - unsigned long flags; | ||
2632 | union entry_union eu; | ||
2633 | eu.entry = e; | ||
2634 | - spin_lock_irqsave(&ioapic_lock, flags); | ||
2635 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); | ||
2636 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); | ||
2637 | +} | ||
2638 | + | ||
2639 | +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | ||
2640 | +{ | ||
2641 | + unsigned long flags; | ||
2642 | + spin_lock_irqsave(&ioapic_lock, flags); | ||
2643 | + __ioapic_write_entry(apic, pin, e); | ||
2644 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2645 | } | ||
2646 | |||
2647 | @@ -883,8 +890,7 @@ | ||
2648 | |||
2649 | if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || | ||
2650 | mp_bus_id_to_type[lbus] == MP_BUS_EISA || | ||
2651 | - mp_bus_id_to_type[lbus] == MP_BUS_MCA || | ||
2652 | - mp_bus_id_to_type[lbus] == MP_BUS_NEC98 | ||
2653 | + mp_bus_id_to_type[lbus] == MP_BUS_MCA | ||
2654 | ) && | ||
2655 | (mp_irqs[i].mpc_irqtype == type) && | ||
2656 | (mp_irqs[i].mpc_srcbusirq == irq)) | ||
2657 | @@ -903,8 +909,7 @@ | ||
2658 | |||
2659 | if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || | ||
2660 | mp_bus_id_to_type[lbus] == MP_BUS_EISA || | ||
2661 | - mp_bus_id_to_type[lbus] == MP_BUS_MCA || | ||
2662 | - mp_bus_id_to_type[lbus] == MP_BUS_NEC98 | ||
2663 | + mp_bus_id_to_type[lbus] == MP_BUS_MCA | ||
2664 | ) && | ||
2665 | (mp_irqs[i].mpc_irqtype == type) && | ||
2666 | (mp_irqs[i].mpc_srcbusirq == irq)) | ||
2667 | @@ -1036,12 +1041,6 @@ | ||
2668 | #define default_MCA_trigger(idx) (1) | ||
2669 | #define default_MCA_polarity(idx) (0) | ||
2670 | |||
2671 | -/* NEC98 interrupts are always polarity zero edge triggered, | ||
2672 | - * when listed as conforming in the MP table. */ | ||
2673 | - | ||
2674 | -#define default_NEC98_trigger(idx) (0) | ||
2675 | -#define default_NEC98_polarity(idx) (0) | ||
2676 | - | ||
2677 | static int __init MPBIOS_polarity(int idx) | ||
2678 | { | ||
2679 | int bus = mp_irqs[idx].mpc_srcbus; | ||
2680 | @@ -1076,11 +1075,6 @@ | ||
2681 | polarity = default_MCA_polarity(idx); | ||
2682 | break; | ||
2683 | } | ||
2684 | - case MP_BUS_NEC98: /* NEC 98 pin */ | ||
2685 | - { | ||
2686 | - polarity = default_NEC98_polarity(idx); | ||
2687 | - break; | ||
2688 | - } | ||
2689 | default: | ||
2690 | { | ||
2691 | printk(KERN_WARNING "broken BIOS!!\n"); | ||
2692 | @@ -1150,11 +1144,6 @@ | ||
2693 | trigger = default_MCA_trigger(idx); | ||
2694 | break; | ||
2695 | } | ||
2696 | - case MP_BUS_NEC98: /* NEC 98 pin */ | ||
2697 | - { | ||
2698 | - trigger = default_NEC98_trigger(idx); | ||
2699 | - break; | ||
2700 | - } | ||
2701 | default: | ||
2702 | { | ||
2703 | printk(KERN_WARNING "broken BIOS!!\n"); | ||
2704 | @@ -1216,7 +1205,6 @@ | ||
2705 | case MP_BUS_ISA: /* ISA pin */ | ||
2706 | case MP_BUS_EISA: | ||
2707 | case MP_BUS_MCA: | ||
2708 | - case MP_BUS_NEC98: | ||
2709 | { | ||
2710 | irq = mp_irqs[idx].mpc_srcbusirq; | ||
2711 | break; | ||
2712 | @@ -1284,7 +1272,7 @@ | ||
2713 | } | ||
2714 | |||
2715 | /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ | ||
2716 | -u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */ | ||
2717 | +static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */ | ||
2718 | |||
2719 | static int __assign_irq_vector(int irq) | ||
2720 | { | ||
2721 | @@ -1407,8 +1395,8 @@ | ||
2722 | if (!apic && (irq < 16)) | ||
2723 | disable_8259A_irq(irq); | ||
2724 | } | ||
2725 | - ioapic_write_entry(apic, pin, entry); | ||
2726 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2727 | + __ioapic_write_entry(apic, pin, entry); | ||
2728 | set_native_irq_info(irq, TARGET_CPUS); | ||
2729 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2730 | } | ||
2731 | @@ -1974,6 +1962,15 @@ | ||
2732 | #endif | ||
2733 | |||
2734 | #ifndef CONFIG_XEN | ||
2735 | +static int no_timer_check __initdata; | ||
2736 | + | ||
2737 | +static int __init notimercheck(char *s) | ||
2738 | +{ | ||
2739 | + no_timer_check = 1; | ||
2740 | + return 1; | ||
2741 | +} | ||
2742 | +__setup("no_timer_check", notimercheck); | ||
2743 | + | ||
2744 | /* | ||
2745 | * There is a nasty bug in some older SMP boards, their mptable lies | ||
2746 | * about the timer IRQ. We do the following to work around the situation: | ||
2747 | @@ -1982,10 +1979,13 @@ | ||
2748 | * - if this function detects that timer IRQs are defunct, then we fall | ||
2749 | * back to ISA timer IRQs | ||
2750 | */ | ||
2751 | -static int __init timer_irq_works(void) | ||
2752 | +int __init timer_irq_works(void) | ||
2753 | { | ||
2754 | unsigned long t1 = jiffies; | ||
2755 | |||
2756 | + if (no_timer_check) | ||
2757 | + return 1; | ||
2758 | + | ||
2759 | local_irq_enable(); | ||
2760 | /* Let ten ticks pass... */ | ||
2761 | mdelay((10 * 1000) / HZ); | ||
2762 | @@ -2212,9 +2212,15 @@ | ||
2763 | unsigned char save_control, save_freq_select; | ||
2764 | |||
2765 | pin = find_isa_irq_pin(8, mp_INT); | ||
2766 | + if (pin == -1) { | ||
2767 | + WARN_ON_ONCE(1); | ||
2768 | + return; | ||
2769 | + } | ||
2770 | apic = find_isa_irq_apic(8, mp_INT); | ||
2771 | - if (pin == -1) | ||
2772 | + if (apic == -1) { | ||
2773 | + WARN_ON_ONCE(1); | ||
2774 | return; | ||
2775 | + } | ||
2776 | |||
2777 | entry0 = ioapic_read_entry(apic, pin); | ||
2778 | clear_IO_APIC_pin(apic, pin); | ||
2779 | @@ -2259,7 +2265,7 @@ | ||
2780 | * is so screwy. Thanks to Brian Perkins for testing/hacking this beast | ||
2781 | * fanatically on his truly buggy board. | ||
2782 | */ | ||
2783 | -static inline void check_timer(void) | ||
2784 | +static inline void __init check_timer(void) | ||
2785 | { | ||
2786 | int apic1, pin1, apic2, pin2; | ||
2787 | int vector; | ||
2788 | @@ -2543,7 +2549,7 @@ | ||
2789 | int create_irq(void) | ||
2790 | { | ||
2791 | /* Allocate an unused irq */ | ||
2792 | - int irq, new, vector; | ||
2793 | + int irq, new, vector = 0; | ||
2794 | unsigned long flags; | ||
2795 | |||
2796 | irq = -ENOSPC; | ||
2797 | @@ -2923,8 +2929,8 @@ | ||
2798 | if (!ioapic && (irq < 16)) | ||
2799 | disable_8259A_irq(irq); | ||
2800 | |||
2801 | - ioapic_write_entry(ioapic, pin, entry); | ||
2802 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2803 | + __ioapic_write_entry(ioapic, pin, entry); | ||
2804 | set_native_irq_info(irq, TARGET_CPUS); | ||
2805 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2806 | |||
2807 | --- a/arch/x86/kernel/io_apic_64-xen.c | ||
2808 | +++ b/arch/x86/kernel/io_apic_64-xen.c | ||
2809 | @@ -199,14 +199,20 @@ | ||
2810 | * the interrupt, and we need to make sure the entry is fully populated | ||
2811 | * before that happens. | ||
2812 | */ | ||
2813 | -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | ||
2814 | +static void | ||
2815 | +__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | ||
2816 | { | ||
2817 | - unsigned long flags; | ||
2818 | union entry_union eu; | ||
2819 | eu.entry = e; | ||
2820 | - spin_lock_irqsave(&ioapic_lock, flags); | ||
2821 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); | ||
2822 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); | ||
2823 | +} | ||
2824 | + | ||
2825 | +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | ||
2826 | +{ | ||
2827 | + unsigned long flags; | ||
2828 | + spin_lock_irqsave(&ioapic_lock, flags); | ||
2829 | + __ioapic_write_entry(apic, pin, e); | ||
2830 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2831 | } | ||
2832 | |||
2833 | @@ -714,6 +720,22 @@ | ||
2834 | } | ||
2835 | |||
2836 | #ifndef CONFIG_XEN | ||
2837 | +static void __clear_irq_vector(int irq) | ||
2838 | +{ | ||
2839 | + cpumask_t mask; | ||
2840 | + int cpu, vector; | ||
2841 | + | ||
2842 | + BUG_ON(!irq_vector[irq]); | ||
2843 | + | ||
2844 | + vector = irq_vector[irq]; | ||
2845 | + cpus_and(mask, irq_domain[irq], cpu_online_map); | ||
2846 | + for_each_cpu_mask(cpu, mask) | ||
2847 | + per_cpu(vector_irq, cpu)[vector] = -1; | ||
2848 | + | ||
2849 | + irq_vector[irq] = 0; | ||
2850 | + irq_domain[irq] = CPU_MASK_NONE; | ||
2851 | +} | ||
2852 | + | ||
2853 | void __setup_vector_irq(int cpu) | ||
2854 | { | ||
2855 | /* Initialize vector_irq on a new cpu */ | ||
2856 | @@ -761,26 +783,65 @@ | ||
2857 | #define ioapic_register_intr(_irq,_vector,_trigger) ((void)0) | ||
2858 | #endif /* !CONFIG_XEN */ | ||
2859 | |||
2860 | -static void __init setup_IO_APIC_irqs(void) | ||
2861 | +static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq) | ||
2862 | { | ||
2863 | struct IO_APIC_route_entry entry; | ||
2864 | - int apic, pin, idx, irq, first_notcon = 1, vector; | ||
2865 | + int vector; | ||
2866 | unsigned long flags; | ||
2867 | |||
2868 | - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | ||
2869 | |||
2870 | - for (apic = 0; apic < nr_ioapics; apic++) { | ||
2871 | - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | ||
2872 | + /* | ||
2873 | + * add it to the IO-APIC irq-routing table: | ||
2874 | + */ | ||
2875 | + memset(&entry,0,sizeof(entry)); | ||
2876 | |||
2877 | - /* | ||
2878 | - * add it to the IO-APIC irq-routing table: | ||
2879 | - */ | ||
2880 | - memset(&entry,0,sizeof(entry)); | ||
2881 | + entry.delivery_mode = INT_DELIVERY_MODE; | ||
2882 | + entry.dest_mode = INT_DEST_MODE; | ||
2883 | + entry.mask = 0; /* enable IRQ */ | ||
2884 | + entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); | ||
2885 | |||
2886 | - entry.delivery_mode = INT_DELIVERY_MODE; | ||
2887 | - entry.dest_mode = INT_DEST_MODE; | ||
2888 | - entry.mask = 0; /* enable IRQ */ | ||
2889 | + entry.trigger = irq_trigger(idx); | ||
2890 | + entry.polarity = irq_polarity(idx); | ||
2891 | + | ||
2892 | + if (irq_trigger(idx)) { | ||
2893 | + entry.trigger = 1; | ||
2894 | + entry.mask = 1; | ||
2895 | entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); | ||
2896 | + } | ||
2897 | + | ||
2898 | + if (/* !apic && */ !IO_APIC_IRQ(irq)) | ||
2899 | + return; | ||
2900 | + | ||
2901 | + if (IO_APIC_IRQ(irq)) { | ||
2902 | + cpumask_t mask; | ||
2903 | + vector = assign_irq_vector(irq, TARGET_CPUS, &mask); | ||
2904 | + if (vector < 0) | ||
2905 | + return; | ||
2906 | + | ||
2907 | + entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); | ||
2908 | + entry.vector = vector; | ||
2909 | + | ||
2910 | + ioapic_register_intr(irq, vector, IOAPIC_AUTO); | ||
2911 | + if (!apic && (irq < 16)) | ||
2912 | + disable_8259A_irq(irq); | ||
2913 | + } | ||
2914 | + | ||
2915 | + ioapic_write_entry(apic, pin, entry); | ||
2916 | + | ||
2917 | + spin_lock_irqsave(&ioapic_lock, flags); | ||
2918 | + set_native_irq_info(irq, TARGET_CPUS); | ||
2919 | + spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2920 | + | ||
2921 | +} | ||
2922 | + | ||
2923 | +static void __init setup_IO_APIC_irqs(void) | ||
2924 | +{ | ||
2925 | + int apic, pin, idx, irq, first_notcon = 1; | ||
2926 | + | ||
2927 | + apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | ||
2928 | + | ||
2929 | + for (apic = 0; apic < nr_ioapics; apic++) { | ||
2930 | + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | ||
2931 | |||
2932 | idx = find_irq_entry(apic,pin,mp_INT); | ||
2933 | if (idx == -1) { | ||
2934 | @@ -792,39 +853,11 @@ | ||
2935 | continue; | ||
2936 | } | ||
2937 | |||
2938 | - entry.trigger = irq_trigger(idx); | ||
2939 | - entry.polarity = irq_polarity(idx); | ||
2940 | - | ||
2941 | - if (irq_trigger(idx)) { | ||
2942 | - entry.trigger = 1; | ||
2943 | - entry.mask = 1; | ||
2944 | - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); | ||
2945 | - } | ||
2946 | - | ||
2947 | irq = pin_2_irq(idx, apic, pin); | ||
2948 | add_pin_to_irq(irq, apic, pin); | ||
2949 | |||
2950 | - if (/* !apic && */ !IO_APIC_IRQ(irq)) | ||
2951 | - continue; | ||
2952 | - | ||
2953 | - if (IO_APIC_IRQ(irq)) { | ||
2954 | - cpumask_t mask; | ||
2955 | - vector = assign_irq_vector(irq, TARGET_CPUS, &mask); | ||
2956 | - if (vector < 0) | ||
2957 | - continue; | ||
2958 | - | ||
2959 | - entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); | ||
2960 | - entry.vector = vector; | ||
2961 | + setup_IO_APIC_irq(apic, pin, idx, irq); | ||
2962 | |||
2963 | - ioapic_register_intr(irq, vector, IOAPIC_AUTO); | ||
2964 | - if (!apic && (irq < 16)) | ||
2965 | - disable_8259A_irq(irq); | ||
2966 | - } | ||
2967 | - ioapic_write_entry(apic, pin, entry); | ||
2968 | - | ||
2969 | - spin_lock_irqsave(&ioapic_lock, flags); | ||
2970 | - set_native_irq_info(irq, TARGET_CPUS); | ||
2971 | - spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2972 | } | ||
2973 | } | ||
2974 | |||
2975 | @@ -1819,7 +1852,7 @@ | ||
2976 | dynamic_irq_cleanup(irq); | ||
2977 | |||
2978 | spin_lock_irqsave(&vector_lock, flags); | ||
2979 | - irq_vector[irq] = 0; | ||
2980 | + __clear_irq_vector(irq); | ||
2981 | spin_unlock_irqrestore(&vector_lock, flags); | ||
2982 | } | ||
2983 | #endif | ||
2984 | @@ -2123,7 +2156,15 @@ | ||
2985 | if (irq_entry == -1) | ||
2986 | continue; | ||
2987 | irq = pin_2_irq(irq_entry, ioapic, pin); | ||
2988 | - set_ioapic_affinity_irq(irq, TARGET_CPUS); | ||
2989 | + | ||
2990 | + /* setup_IO_APIC_irqs could fail to get vector for some device | ||
2991 | + * when you have too many devices, because at that time only boot | ||
2992 | + * cpu is online. | ||
2993 | + */ | ||
2994 | + if(!irq_vector[irq]) | ||
2995 | + setup_IO_APIC_irq(ioapic, pin, irq_entry, irq); | ||
2996 | + else | ||
2997 | + set_ioapic_affinity_irq(irq, TARGET_CPUS); | ||
2998 | } | ||
2999 | |||
3000 | } | ||
3001 | --- a/arch/x86/kernel/irq_64-xen.c | ||
3002 | +++ b/arch/x86/kernel/irq_64-xen.c | ||
3003 | @@ -120,7 +120,7 @@ | ||
3004 | |||
3005 | if (likely(irq < NR_IRQS)) | ||
3006 | generic_handle_irq(irq); | ||
3007 | - else | ||
3008 | + else if (printk_ratelimit()) | ||
3009 | printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n", | ||
3010 | __func__, smp_processor_id(), irq); | ||
3011 | |||
3012 | --- a/arch/x86/kernel/ldt_32-xen.c | ||
3013 | +++ b/arch/x86/kernel/ldt_32-xen.c | ||
3014 | @@ -177,16 +177,14 @@ | ||
3015 | { | ||
3016 | int err; | ||
3017 | unsigned long size; | ||
3018 | - void *address; | ||
3019 | |||
3020 | err = 0; | ||
3021 | - address = &default_ldt[0]; | ||
3022 | size = 5*sizeof(struct desc_struct); | ||
3023 | if (size > bytecount) | ||
3024 | size = bytecount; | ||
3025 | |||
3026 | err = size; | ||
3027 | - if (copy_to_user(ptr, address, size)) | ||
3028 | + if (clear_user(ptr, size)) | ||
3029 | err = -EFAULT; | ||
3030 | |||
3031 | return err; | ||
3032 | --- a/arch/x86/kernel/microcode-xen.c | ||
3033 | +++ b/arch/x86/kernel/microcode-xen.c | ||
3034 | @@ -1,7 +1,7 @@ | ||
3035 | /* | ||
3036 | * Intel CPU Microcode Update Driver for Linux | ||
3037 | * | ||
3038 | - * Copyright (C) 2000-2004 Tigran Aivazian | ||
3039 | + * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> | ||
3040 | * 2006 Shaohua Li <shaohua.li@intel.com> | ||
3041 | * | ||
3042 | * This driver allows to upgrade microcode on Intel processors | ||
3043 | @@ -43,7 +43,7 @@ | ||
3044 | #include <asm/processor.h> | ||
3045 | |||
3046 | MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); | ||
3047 | -MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>"); | ||
3048 | +MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); | ||
3049 | MODULE_LICENSE("GPL"); | ||
3050 | |||
3051 | static int verbose; | ||
3052 | @@ -195,7 +195,7 @@ | ||
3053 | request_microcode(); | ||
3054 | |||
3055 | printk(KERN_INFO | ||
3056 | - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n"); | ||
3057 | + "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n"); | ||
3058 | return 0; | ||
3059 | } | ||
3060 | |||
3061 | --- a/arch/x86/kernel/mpparse_32-xen.c | ||
3062 | +++ b/arch/x86/kernel/mpparse_32-xen.c | ||
3063 | @@ -36,7 +36,7 @@ | ||
3064 | |||
3065 | /* Have we found an MP table */ | ||
3066 | int smp_found_config; | ||
3067 | -unsigned int __initdata maxcpus = NR_CPUS; | ||
3068 | +unsigned int __cpuinitdata maxcpus = NR_CPUS; | ||
3069 | |||
3070 | /* | ||
3071 | * Various Linux-internal data structures created from the | ||
3072 | @@ -102,10 +102,10 @@ | ||
3073 | */ | ||
3074 | |||
3075 | static int mpc_record; | ||
3076 | -static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata; | ||
3077 | +static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata; | ||
3078 | |||
3079 | #ifndef CONFIG_XEN | ||
3080 | -static void __devinit MP_processor_info (struct mpc_config_processor *m) | ||
3081 | +static void __cpuinit MP_processor_info (struct mpc_config_processor *m) | ||
3082 | { | ||
3083 | int ver, apicid; | ||
3084 | physid_mask_t phys_cpu; | ||
3085 | @@ -221,7 +221,7 @@ | ||
3086 | bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; | ||
3087 | } | ||
3088 | #else | ||
3089 | -void __init MP_processor_info (struct mpc_config_processor *m) | ||
3090 | +static void __cpuinit MP_processor_info (struct mpc_config_processor *m) | ||
3091 | { | ||
3092 | num_processors++; | ||
3093 | } | ||
3094 | @@ -256,8 +256,6 @@ | ||
3095 | mp_current_pci_id++; | ||
3096 | } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) { | ||
3097 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; | ||
3098 | - } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) { | ||
3099 | - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98; | ||
3100 | } else { | ||
3101 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); | ||
3102 | } | ||
3103 | @@ -842,7 +840,7 @@ | ||
3104 | #endif | ||
3105 | } | ||
3106 | |||
3107 | -void __devinit mp_register_lapic (u8 id, u8 enabled) | ||
3108 | +void __cpuinit mp_register_lapic (u8 id, u8 enabled) | ||
3109 | { | ||
3110 | struct mpc_config_processor processor; | ||
3111 | int boot_cpu = 0; | ||
3112 | --- a/arch/x86/kernel/mpparse_64-xen.c | ||
3113 | +++ b/arch/x86/kernel/mpparse_64-xen.c | ||
3114 | @@ -35,8 +35,6 @@ | ||
3115 | int smp_found_config; | ||
3116 | unsigned int __initdata maxcpus = NR_CPUS; | ||
3117 | |||
3118 | -int acpi_found_madt; | ||
3119 | - | ||
3120 | /* | ||
3121 | * Various Linux-internal data structures created from the | ||
3122 | * MP-table. | ||
3123 | --- a/arch/x86/kernel/pci-dma_32-xen.c | ||
3124 | +++ b/arch/x86/kernel/pci-dma_32-xen.c | ||
3125 | @@ -282,7 +282,7 @@ | ||
3126 | int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, | ||
3127 | dma_addr_t device_addr, size_t size, int flags) | ||
3128 | { | ||
3129 | - void __iomem *mem_base; | ||
3130 | + void __iomem *mem_base = NULL; | ||
3131 | int pages = size >> PAGE_SHIFT; | ||
3132 | int bitmap_size = (pages + 31)/32; | ||
3133 | |||
3134 | @@ -299,14 +299,12 @@ | ||
3135 | if (!mem_base) | ||
3136 | goto out; | ||
3137 | |||
3138 | - dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); | ||
3139 | + dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); | ||
3140 | if (!dev->dma_mem) | ||
3141 | goto out; | ||
3142 | - memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem)); | ||
3143 | - dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL); | ||
3144 | + dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); | ||
3145 | if (!dev->dma_mem->bitmap) | ||
3146 | goto free1_out; | ||
3147 | - memset(dev->dma_mem->bitmap, 0, bitmap_size); | ||
3148 | |||
3149 | dev->dma_mem->virt_base = mem_base; | ||
3150 | dev->dma_mem->device_base = device_addr; | ||
3151 | @@ -321,6 +319,8 @@ | ||
3152 | free1_out: | ||
3153 | kfree(dev->dma_mem->bitmap); | ||
3154 | out: | ||
3155 | + if (mem_base) | ||
3156 | + iounmap(mem_base); | ||
3157 | return 0; | ||
3158 | } | ||
3159 | EXPORT_SYMBOL(dma_declare_coherent_memory); | ||
3160 | --- a/arch/x86/kernel/process_32-xen.c | ||
3161 | +++ b/arch/x86/kernel/process_32-xen.c | ||
3162 | @@ -60,6 +60,7 @@ | ||
3163 | |||
3164 | #include <asm/tlbflush.h> | ||
3165 | #include <asm/cpu.h> | ||
3166 | +#include <asm/pda.h> | ||
3167 | |||
3168 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | ||
3169 | |||
3170 | @@ -104,28 +105,24 @@ | ||
3171 | */ | ||
3172 | static void poll_idle (void) | ||
3173 | { | ||
3174 | - local_irq_enable(); | ||
3175 | - | ||
3176 | - asm volatile( | ||
3177 | - "2:" | ||
3178 | - "testl %0, %1;" | ||
3179 | - "rep; nop;" | ||
3180 | - "je 2b;" | ||
3181 | - : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); | ||
3182 | + cpu_relax(); | ||
3183 | } | ||
3184 | |||
3185 | static void xen_idle(void) | ||
3186 | { | ||
3187 | - local_irq_disable(); | ||
3188 | + current_thread_info()->status &= ~TS_POLLING; | ||
3189 | + /* | ||
3190 | + * TS_POLLING-cleared state must be visible before we | ||
3191 | + * test NEED_RESCHED: | ||
3192 | + */ | ||
3193 | + smp_mb(); | ||
3194 | |||
3195 | - if (need_resched()) | ||
3196 | + local_irq_disable(); | ||
3197 | + if (!need_resched()) | ||
3198 | + safe_halt(); /* enables interrupts racelessly */ | ||
3199 | + else | ||
3200 | local_irq_enable(); | ||
3201 | - else { | ||
3202 | - current_thread_info()->status &= ~TS_POLLING; | ||
3203 | - smp_mb__after_clear_bit(); | ||
3204 | - safe_halt(); | ||
3205 | - current_thread_info()->status |= TS_POLLING; | ||
3206 | - } | ||
3207 | + current_thread_info()->status |= TS_POLLING; | ||
3208 | } | ||
3209 | #ifdef CONFIG_APM_MODULE | ||
3210 | EXPORT_SYMBOL(default_idle); | ||
3211 | @@ -250,8 +247,8 @@ | ||
3212 | regs->eax,regs->ebx,regs->ecx,regs->edx); | ||
3213 | printk("ESI: %08lx EDI: %08lx EBP: %08lx", | ||
3214 | regs->esi, regs->edi, regs->ebp); | ||
3215 | - printk(" DS: %04x ES: %04x\n", | ||
3216 | - 0xffff & regs->xds,0xffff & regs->xes); | ||
3217 | + printk(" DS: %04x ES: %04x GS: %04x\n", | ||
3218 | + 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs); | ||
3219 | |||
3220 | cr0 = read_cr0(); | ||
3221 | cr2 = read_cr2(); | ||
3222 | @@ -282,6 +279,7 @@ | ||
3223 | |||
3224 | regs.xds = __USER_DS; | ||
3225 | regs.xes = __USER_DS; | ||
3226 | + regs.xgs = __KERNEL_PDA; | ||
3227 | regs.orig_eax = -1; | ||
3228 | regs.eip = (unsigned long) kernel_thread_helper; | ||
3229 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); | ||
3230 | @@ -359,7 +357,6 @@ | ||
3231 | p->thread.eip = (unsigned long) ret_from_fork; | ||
3232 | |||
3233 | savesegment(fs,p->thread.fs); | ||
3234 | - savesegment(gs,p->thread.gs); | ||
3235 | |||
3236 | tsk = current; | ||
3237 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | ||
3238 | @@ -438,7 +435,7 @@ | ||
3239 | dump->regs.ds = regs->xds; | ||
3240 | dump->regs.es = regs->xes; | ||
3241 | savesegment(fs,dump->regs.fs); | ||
3242 | - savesegment(gs,dump->regs.gs); | ||
3243 | + dump->regs.gs = regs->xgs; | ||
3244 | dump->regs.orig_eax = regs->orig_eax; | ||
3245 | dump->regs.eip = regs->eip; | ||
3246 | dump->regs.cs = regs->xcs; | ||
3247 | @@ -614,17 +611,19 @@ | ||
3248 | if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL))) | ||
3249 | BUG(); | ||
3250 | |||
3251 | + /* we're going to use this soon, after a few expensive things */ | ||
3252 | + if (next_p->fpu_counter > 5) | ||
3253 | + prefetch(&next->i387.fxsave); | ||
3254 | + | ||
3255 | /* | ||
3256 | - * Restore %fs and %gs if needed. | ||
3257 | + * Restore %fs if needed. | ||
3258 | * | ||
3259 | - * Glibc normally makes %fs be zero, and %gs is one of | ||
3260 | - * the TLS segments. | ||
3261 | + * Glibc normally makes %fs be zero. | ||
3262 | */ | ||
3263 | if (unlikely(next->fs)) | ||
3264 | loadsegment(fs, next->fs); | ||
3265 | |||
3266 | - if (next->gs) | ||
3267 | - loadsegment(gs, next->gs); | ||
3268 | + write_pda(pcurrent, next_p); | ||
3269 | |||
3270 | /* | ||
3271 | * Now maybe handle debug registers | ||
3272 | @@ -634,6 +633,13 @@ | ||
3273 | |||
3274 | disable_tsc(prev_p, next_p); | ||
3275 | |||
3276 | + /* If the task has used fpu the last 5 timeslices, just do a full | ||
3277 | + * restore of the math state immediately to avoid the trap; the | ||
3278 | + * chances of needing FPU soon are obviously high now | ||
3279 | + */ | ||
3280 | + if (next_p->fpu_counter > 5) | ||
3281 | + math_state_restore(); | ||
3282 | + | ||
3283 | return prev_p; | ||
3284 | } | ||
3285 | |||
3286 | --- a/arch/x86/kernel/process_64-xen.c | ||
3287 | +++ b/arch/x86/kernel/process_64-xen.c | ||
3288 | @@ -119,29 +119,23 @@ | ||
3289 | static void poll_idle (void) | ||
3290 | { | ||
3291 | local_irq_enable(); | ||
3292 | - | ||
3293 | - asm volatile( | ||
3294 | - "2:" | ||
3295 | - "testl %0,%1;" | ||
3296 | - "rep; nop;" | ||
3297 | - "je 2b;" | ||
3298 | - : : | ||
3299 | - "i" (_TIF_NEED_RESCHED), | ||
3300 | - "m" (current_thread_info()->flags)); | ||
3301 | + cpu_relax(); | ||
3302 | } | ||
3303 | |||
3304 | static void xen_idle(void) | ||
3305 | { | ||
3306 | + current_thread_info()->status &= ~TS_POLLING; | ||
3307 | + /* | ||
3308 | + * TS_POLLING-cleared state must be visible before we | ||
3309 | + * test NEED_RESCHED: | ||
3310 | + */ | ||
3311 | + smp_mb(); | ||
3312 | local_irq_disable(); | ||
3313 | - | ||
3314 | - if (need_resched()) | ||
3315 | - local_irq_enable(); | ||
3316 | - else { | ||
3317 | - current_thread_info()->status &= ~TS_POLLING; | ||
3318 | - smp_mb__after_clear_bit(); | ||
3319 | + if (!need_resched()) | ||
3320 | safe_halt(); | ||
3321 | - current_thread_info()->status |= TS_POLLING; | ||
3322 | - } | ||
3323 | + else | ||
3324 | + local_irq_enable(); | ||
3325 | + current_thread_info()->status |= TS_POLLING; | ||
3326 | } | ||
3327 | |||
3328 | #ifdef CONFIG_HOTPLUG_CPU | ||
3329 | @@ -181,6 +175,12 @@ | ||
3330 | idle = xen_idle; /* no alternatives */ | ||
3331 | if (cpu_is_offline(smp_processor_id())) | ||
3332 | play_dead(); | ||
3333 | + /* | ||
3334 | + * Idle routines should keep interrupts disabled | ||
3335 | + * from here on, until they go to idle. | ||
3336 | + * Otherwise, idle callbacks can misfire. | ||
3337 | + */ | ||
3338 | + local_irq_disable(); | ||
3339 | enter_idle(); | ||
3340 | idle(); | ||
3341 | /* In many cases the interrupt that ended idle | ||
3342 | --- a/arch/x86/kernel/quirks-xen.c | ||
3343 | +++ b/arch/x86/kernel/quirks-xen.c | ||
3344 | @@ -3,10 +3,12 @@ | ||
3345 | */ | ||
3346 | #include <linux/pci.h> | ||
3347 | #include <linux/irq.h> | ||
3348 | +#include <asm/pci-direct.h> | ||
3349 | +#include <asm/genapic.h> | ||
3350 | +#include <asm/cpu.h> | ||
3351 | |||
3352 | #if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_SMP) || defined(CONFIG_XEN)) && defined(CONFIG_PCI) | ||
3353 | - | ||
3354 | -static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) | ||
3355 | +static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev) | ||
3356 | { | ||
3357 | u8 config, rev; | ||
3358 | u32 word; | ||
3359 | @@ -14,14 +16,12 @@ | ||
3360 | /* BIOS may enable hardware IRQ balancing for | ||
3361 | * E7520/E7320/E7525(revision ID 0x9 and below) | ||
3362 | * based platforms. | ||
3363 | - * Disable SW irqbalance/affinity on those platforms. | ||
3364 | + * For those platforms, make sure that the genapic is set to 'flat' | ||
3365 | */ | ||
3366 | pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); | ||
3367 | if (rev > 0x9) | ||
3368 | return; | ||
3369 | |||
3370 | - printk(KERN_INFO "Intel E7520/7320/7525 detected."); | ||
3371 | - | ||
3372 | /* enable access to config space*/ | ||
3373 | pci_read_config_byte(dev, 0xf4, &config); | ||
3374 | pci_write_config_byte(dev, 0xf4, config|0x2); | ||
3375 | @@ -30,6 +30,46 @@ | ||
3376 | raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); | ||
3377 | |||
3378 | if (!(word & (1 << 13))) { | ||
3379 | +#ifndef CONFIG_XEN | ||
3380 | +#ifdef CONFIG_X86_64 | ||
3381 | + if (genapic != &apic_flat) | ||
3382 | + panic("APIC mode must be flat on this system\n"); | ||
3383 | +#elif defined(CONFIG_X86_GENERICARCH) | ||
3384 | + if (genapic != &apic_default) | ||
3385 | + panic("APIC mode must be default(flat) on this system. Use apic=default\n"); | ||
3386 | +#endif | ||
3387 | +#endif | ||
3388 | + } | ||
3389 | + | ||
3390 | + /* put back the original value for config space*/ | ||
3391 | + if (!(config & 0x2)) | ||
3392 | + pci_write_config_byte(dev, 0xf4, config); | ||
3393 | +} | ||
3394 | + | ||
3395 | +void __init quirk_intel_irqbalance(void) | ||
3396 | +{ | ||
3397 | + u8 config, rev; | ||
3398 | + u32 word; | ||
3399 | + | ||
3400 | + /* BIOS may enable hardware IRQ balancing for | ||
3401 | + * E7520/E7320/E7525(revision ID 0x9 and below) | ||
3402 | + * based platforms. | ||
3403 | + * Disable SW irqbalance/affinity on those platforms. | ||
3404 | + */ | ||
3405 | + rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION); | ||
3406 | + if (rev > 0x9) | ||
3407 | + return; | ||
3408 | + | ||
3409 | + printk(KERN_INFO "Intel E7520/7320/7525 detected."); | ||
3410 | + | ||
3411 | + /* enable access to config space */ | ||
3412 | + config = read_pci_config_byte(0, 0, 0, 0xf4); | ||
3413 | + write_pci_config_byte(0, 0, 0, 0xf4, config|0x2); | ||
3414 | + | ||
3415 | + /* read xTPR register */ | ||
3416 | + word = read_pci_config_16(0, 0, 0x40, 0x4c); | ||
3417 | + | ||
3418 | + if (!(word & (1 << 13))) { | ||
3419 | struct xen_platform_op op; | ||
3420 | printk(KERN_INFO "Disabling irq balancing and affinity\n"); | ||
3421 | op.cmd = XENPF_platform_quirk; | ||
3422 | @@ -37,11 +77,12 @@ | ||
3423 | WARN_ON(HYPERVISOR_platform_op(&op)); | ||
3424 | } | ||
3425 | |||
3426 | - /* put back the original value for config space*/ | ||
3427 | + /* put back the original value for config space */ | ||
3428 | if (!(config & 0x2)) | ||
3429 | - pci_write_config_byte(dev, 0xf4, config); | ||
3430 | + write_pci_config_byte(0, 0, 0, 0xf4, config); | ||
3431 | } | ||
3432 | -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); | ||
3433 | -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); | ||
3434 | -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); | ||
3435 | +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance); | ||
3436 | +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance); | ||
3437 | +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance); | ||
3438 | + | ||
3439 | #endif | ||
3440 | --- a/arch/x86/kernel/setup_32-xen.c | ||
3441 | +++ b/arch/x86/kernel/setup_32-xen.c | ||
3442 | @@ -76,9 +76,6 @@ | ||
3443 | #include <xen/interface/kexec.h> | ||
3444 | #endif | ||
3445 | |||
3446 | -/* Forward Declaration. */ | ||
3447 | -void __init find_max_pfn(void); | ||
3448 | - | ||
3449 | static int xen_panic_event(struct notifier_block *, unsigned long, void *); | ||
3450 | static struct notifier_block xen_panic_block = { | ||
3451 | xen_panic_event, NULL, 0 /* try to go last */ | ||
3452 | @@ -92,14 +89,11 @@ | ||
3453 | /* | ||
3454 | * Machine setup.. | ||
3455 | */ | ||
3456 | - | ||
3457 | -#ifdef CONFIG_EFI | ||
3458 | -int efi_enabled = 0; | ||
3459 | -EXPORT_SYMBOL(efi_enabled); | ||
3460 | -#endif | ||
3461 | +extern struct resource code_resource; | ||
3462 | +extern struct resource data_resource; | ||
3463 | |||
3464 | /* cpu data as detected by the assembly code in head.S */ | ||
3465 | -struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; | ||
3466 | +struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; | ||
3467 | /* common cpu data for all cpus */ | ||
3468 | struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; | ||
3469 | EXPORT_SYMBOL(boot_cpu_data); | ||
3470 | @@ -115,12 +109,6 @@ | ||
3471 | unsigned int BIOS_revision; | ||
3472 | unsigned int mca_pentium_flag; | ||
3473 | |||
3474 | -/* For PCI or other memory-mapped resources */ | ||
3475 | -unsigned long pci_mem_start = 0x10000000; | ||
3476 | -#ifdef CONFIG_PCI | ||
3477 | -EXPORT_SYMBOL(pci_mem_start); | ||
3478 | -#endif | ||
3479 | - | ||
3480 | /* Boot loader ID as an integer, for the benefit of proc_dointvec */ | ||
3481 | int bootloader_type; | ||
3482 | |||
3483 | @@ -153,10 +141,6 @@ | ||
3484 | defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) | ||
3485 | EXPORT_SYMBOL(ist_info); | ||
3486 | #endif | ||
3487 | -struct e820map e820; | ||
3488 | -#ifdef CONFIG_XEN | ||
3489 | -struct e820map machine_e820; | ||
3490 | -#endif | ||
3491 | |||
3492 | extern void early_cpu_init(void); | ||
3493 | extern int root_mountflags; | ||
3494 | @@ -171,209 +155,6 @@ | ||
3495 | |||
3496 | unsigned char __initdata boot_params[PARAM_SIZE]; | ||
3497 | |||
3498 | -static struct resource data_resource = { | ||
3499 | - .name = "Kernel data", | ||
3500 | - .start = 0, | ||
3501 | - .end = 0, | ||
3502 | - .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
3503 | -}; | ||
3504 | - | ||
3505 | -static struct resource code_resource = { | ||
3506 | - .name = "Kernel code", | ||
3507 | - .start = 0, | ||
3508 | - .end = 0, | ||
3509 | - .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
3510 | -}; | ||
3511 | - | ||
3512 | -static struct resource system_rom_resource = { | ||
3513 | - .name = "System ROM", | ||
3514 | - .start = 0xf0000, | ||
3515 | - .end = 0xfffff, | ||
3516 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
3517 | -}; | ||
3518 | - | ||
3519 | -static struct resource extension_rom_resource = { | ||
3520 | - .name = "Extension ROM", | ||
3521 | - .start = 0xe0000, | ||
3522 | - .end = 0xeffff, | ||
3523 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
3524 | -}; | ||
3525 | - | ||
3526 | -static struct resource adapter_rom_resources[] = { { | ||
3527 | - .name = "Adapter ROM", | ||
3528 | - .start = 0xc8000, | ||
3529 | - .end = 0, | ||
3530 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
3531 | -}, { | ||
3532 | - .name = "Adapter ROM", | ||
3533 | - .start = 0, | ||
3534 | - .end = 0, | ||
3535 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
3536 | -}, { | ||
3537 | - .name = "Adapter ROM", | ||
3538 | - .start = 0, | ||
3539 | - .end = 0, | ||
3540 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
3541 | -}, { | ||
3542 | - .name = "Adapter ROM", | ||
3543 | - .start = 0, | ||
3544 | - .end = 0, | ||
3545 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
3546 | -}, { | ||
3547 | - .name = "Adapter ROM", | ||
3548 | - .start = 0, | ||
3549 | - .end = 0, | ||
3550 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
3551 | -}, { | ||
3552 | - .name = "Adapter ROM", | ||
3553 | - .start = 0, | ||
3554 | - .end = 0, | ||
3555 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
3556 | -} }; | ||
3557 | - | ||
3558 | -static struct resource video_rom_resource = { | ||
3559 | - .name = "Video ROM", | ||
3560 | - .start = 0xc0000, | ||
3561 | - .end = 0xc7fff, | ||
3562 | - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM | ||
3563 | -}; | ||
3564 | - | ||
3565 | -static struct resource video_ram_resource = { | ||
3566 | - .name = "Video RAM area", | ||
3567 | - .start = 0xa0000, | ||
3568 | - .end = 0xbffff, | ||
3569 | - .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
3570 | -}; | ||
3571 | - | ||
3572 | -static struct resource standard_io_resources[] = { { | ||
3573 | - .name = "dma1", | ||
3574 | - .start = 0x0000, | ||
3575 | - .end = 0x001f, | ||
3576 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
3577 | -}, { | ||
3578 | - .name = "pic1", | ||
3579 | - .start = 0x0020, | ||
3580 | - .end = 0x0021, | ||
3581 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
3582 | -}, { | ||
3583 | - .name = "timer0", | ||
3584 | - .start = 0x0040, | ||
3585 | - .end = 0x0043, | ||
3586 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
3587 | -}, { | ||
3588 | - .name = "timer1", | ||
3589 | - .start = 0x0050, | ||
3590 | - .end = 0x0053, | ||
3591 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
3592 | -}, { | ||
3593 | - .name = "keyboard", | ||
3594 | - .start = 0x0060, | ||
3595 | - .end = 0x006f, | ||
3596 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
3597 | -}, { | ||
3598 | - .name = "dma page reg", | ||
3599 | - .start = 0x0080, | ||
3600 | - .end = 0x008f, | ||
3601 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
3602 | -}, { | ||
3603 | - .name = "pic2", | ||
3604 | - .start = 0x00a0, | ||
3605 | - .end = 0x00a1, | ||
3606 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
3607 | -}, { | ||
3608 | - .name = "dma2", | ||
3609 | - .start = 0x00c0, | ||
3610 | - .end = 0x00df, | ||
3611 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
3612 | -}, { | ||
3613 | - .name = "fpu", | ||
3614 | - .start = 0x00f0, | ||
3615 | - .end = 0x00ff, | ||
3616 | - .flags = IORESOURCE_BUSY | IORESOURCE_IO | ||
3617 | -} }; | ||
3618 | - | ||
3619 | -#define romsignature(x) (*(unsigned short *)(x) == 0xaa55) | ||
3620 | - | ||
3621 | -static int __init romchecksum(unsigned char *rom, unsigned long length) | ||
3622 | -{ | ||
3623 | - unsigned char *p, sum = 0; | ||
3624 | - | ||
3625 | - for (p = rom; p < rom + length; p++) | ||
3626 | - sum += *p; | ||
3627 | - return sum == 0; | ||
3628 | -} | ||
3629 | - | ||
3630 | -static void __init probe_roms(void) | ||
3631 | -{ | ||
3632 | - unsigned long start, length, upper; | ||
3633 | - unsigned char *rom; | ||
3634 | - int i; | ||
3635 | - | ||
3636 | -#ifdef CONFIG_XEN | ||
3637 | - /* Nothing to do if not running in dom0. */ | ||
3638 | - if (!is_initial_xendomain()) | ||
3639 | - return; | ||
3640 | -#endif | ||
3641 | - | ||
3642 | - /* video rom */ | ||
3643 | - upper = adapter_rom_resources[0].start; | ||
3644 | - for (start = video_rom_resource.start; start < upper; start += 2048) { | ||
3645 | - rom = isa_bus_to_virt(start); | ||
3646 | - if (!romsignature(rom)) | ||
3647 | - continue; | ||
3648 | - | ||
3649 | - video_rom_resource.start = start; | ||
3650 | - | ||
3651 | - /* 0 < length <= 0x7f * 512, historically */ | ||
3652 | - length = rom[2] * 512; | ||
3653 | - | ||
3654 | - /* if checksum okay, trust length byte */ | ||
3655 | - if (length && romchecksum(rom, length)) | ||
3656 | - video_rom_resource.end = start + length - 1; | ||
3657 | - | ||
3658 | - request_resource(&iomem_resource, &video_rom_resource); | ||
3659 | - break; | ||
3660 | - } | ||
3661 | - | ||
3662 | - start = (video_rom_resource.end + 1 + 2047) & ~2047UL; | ||
3663 | - if (start < upper) | ||
3664 | - start = upper; | ||
3665 | - | ||
3666 | - /* system rom */ | ||
3667 | - request_resource(&iomem_resource, &system_rom_resource); | ||
3668 | - upper = system_rom_resource.start; | ||
3669 | - | ||
3670 | - /* check for extension rom (ignore length byte!) */ | ||
3671 | - rom = isa_bus_to_virt(extension_rom_resource.start); | ||
3672 | - if (romsignature(rom)) { | ||
3673 | - length = extension_rom_resource.end - extension_rom_resource.start + 1; | ||
3674 | - if (romchecksum(rom, length)) { | ||
3675 | - request_resource(&iomem_resource, &extension_rom_resource); | ||
3676 | - upper = extension_rom_resource.start; | ||
3677 | - } | ||
3678 | - } | ||
3679 | - | ||
3680 | - /* check for adapter roms on 2k boundaries */ | ||
3681 | - for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { | ||
3682 | - rom = isa_bus_to_virt(start); | ||
3683 | - if (!romsignature(rom)) | ||
3684 | - continue; | ||
3685 | - | ||
3686 | - /* 0 < length <= 0x7f * 512, historically */ | ||
3687 | - length = rom[2] * 512; | ||
3688 | - | ||
3689 | - /* but accept any length that fits if checksum okay */ | ||
3690 | - if (!length || start + length > upper || !romchecksum(rom, length)) | ||
3691 | - continue; | ||
3692 | - | ||
3693 | - adapter_rom_resources[i].start = start; | ||
3694 | - adapter_rom_resources[i].end = start + length - 1; | ||
3695 | - request_resource(&iomem_resource, &adapter_rom_resources[i]); | ||
3696 | - | ||
3697 | - start = adapter_rom_resources[i++].end & ~2047UL; | ||
3698 | - } | ||
3699 | -} | ||
3700 | - | ||
3701 | /* | ||
3702 | * Point at the empty zero page to start with. We map the real shared_info | ||
3703 | * page as soon as fixmap is up and running. | ||
3704 | @@ -389,338 +170,6 @@ | ||
3705 | start_info_t *xen_start_info; | ||
3706 | EXPORT_SYMBOL(xen_start_info); | ||
3707 | |||
3708 | -void __init add_memory_region(unsigned long long start, | ||
3709 | - unsigned long long size, int type) | ||
3710 | -{ | ||
3711 | - int x; | ||
3712 | - | ||
3713 | - if (!efi_enabled) { | ||
3714 | - x = e820.nr_map; | ||
3715 | - | ||
3716 | - if (x == E820MAX) { | ||
3717 | - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); | ||
3718 | - return; | ||
3719 | - } | ||
3720 | - | ||
3721 | - e820.map[x].addr = start; | ||
3722 | - e820.map[x].size = size; | ||
3723 | - e820.map[x].type = type; | ||
3724 | - e820.nr_map++; | ||
3725 | - } | ||
3726 | -} /* add_memory_region */ | ||
3727 | - | ||
3728 | -static void __init limit_regions(unsigned long long size) | ||
3729 | -{ | ||
3730 | - unsigned long long current_addr = 0; | ||
3731 | - int i; | ||
3732 | - | ||
3733 | - if (efi_enabled) { | ||
3734 | - efi_memory_desc_t *md; | ||
3735 | - void *p; | ||
3736 | - | ||
3737 | - for (p = memmap.map, i = 0; p < memmap.map_end; | ||
3738 | - p += memmap.desc_size, i++) { | ||
3739 | - md = p; | ||
3740 | - current_addr = md->phys_addr + (md->num_pages << 12); | ||
3741 | - if (md->type == EFI_CONVENTIONAL_MEMORY) { | ||
3742 | - if (current_addr >= size) { | ||
3743 | - md->num_pages -= | ||
3744 | - (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT); | ||
3745 | - memmap.nr_map = i + 1; | ||
3746 | - return; | ||
3747 | - } | ||
3748 | - } | ||
3749 | - } | ||
3750 | - } | ||
3751 | - for (i = 0; i < e820.nr_map; i++) { | ||
3752 | - current_addr = e820.map[i].addr + e820.map[i].size; | ||
3753 | - if (current_addr < size) | ||
3754 | - continue; | ||
3755 | - | ||
3756 | - if (e820.map[i].type != E820_RAM) | ||
3757 | - continue; | ||
3758 | - | ||
3759 | - if (e820.map[i].addr >= size) { | ||
3760 | - /* | ||
3761 | - * This region starts past the end of the | ||
3762 | - * requested size, skip it completely. | ||
3763 | - */ | ||
3764 | - e820.nr_map = i; | ||
3765 | - } else { | ||
3766 | - e820.nr_map = i + 1; | ||
3767 | - e820.map[i].size -= current_addr - size; | ||
3768 | - } | ||
3769 | - return; | ||
3770 | - } | ||
3771 | -#ifdef CONFIG_XEN | ||
3772 | - if (i==e820.nr_map && current_addr < size) { | ||
3773 | - /* | ||
3774 | - * The e820 map finished before our requested size so | ||
3775 | - * extend the final entry to the requested address. | ||
3776 | - */ | ||
3777 | - --i; | ||
3778 | - if (e820.map[i].type == E820_RAM) | ||
3779 | - e820.map[i].size -= current_addr - size; | ||
3780 | - else | ||
3781 | - add_memory_region(current_addr, size - current_addr, E820_RAM); | ||
3782 | - } | ||
3783 | -#endif | ||
3784 | -} | ||
3785 | - | ||
3786 | -#define E820_DEBUG 1 | ||
3787 | - | ||
3788 | -static void __init print_memory_map(char *who) | ||
3789 | -{ | ||
3790 | - int i; | ||
3791 | - | ||
3792 | - for (i = 0; i < e820.nr_map; i++) { | ||
3793 | - printk(" %s: %016Lx - %016Lx ", who, | ||
3794 | - e820.map[i].addr, | ||
3795 | - e820.map[i].addr + e820.map[i].size); | ||
3796 | - switch (e820.map[i].type) { | ||
3797 | - case E820_RAM: printk("(usable)\n"); | ||
3798 | - break; | ||
3799 | - case E820_RESERVED: | ||
3800 | - printk("(reserved)\n"); | ||
3801 | - break; | ||
3802 | - case E820_ACPI: | ||
3803 | - printk("(ACPI data)\n"); | ||
3804 | - break; | ||
3805 | - case E820_NVS: | ||
3806 | - printk("(ACPI NVS)\n"); | ||
3807 | - break; | ||
3808 | - default: printk("type %lu\n", e820.map[i].type); | ||
3809 | - break; | ||
3810 | - } | ||
3811 | - } | ||
3812 | -} | ||
3813 | - | ||
3814 | -/* | ||
3815 | - * Sanitize the BIOS e820 map. | ||
3816 | - * | ||
3817 | - * Some e820 responses include overlapping entries. The following | ||
3818 | - * replaces the original e820 map with a new one, removing overlaps. | ||
3819 | - * | ||
3820 | - */ | ||
3821 | -struct change_member { | ||
3822 | - struct e820entry *pbios; /* pointer to original bios entry */ | ||
3823 | - unsigned long long addr; /* address for this change point */ | ||
3824 | -}; | ||
3825 | -static struct change_member change_point_list[2*E820MAX] __initdata; | ||
3826 | -static struct change_member *change_point[2*E820MAX] __initdata; | ||
3827 | -static struct e820entry *overlap_list[E820MAX] __initdata; | ||
3828 | -static struct e820entry new_bios[E820MAX] __initdata; | ||
3829 | - | ||
3830 | -int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | ||
3831 | -{ | ||
3832 | - struct change_member *change_tmp; | ||
3833 | - unsigned long current_type, last_type; | ||
3834 | - unsigned long long last_addr; | ||
3835 | - int chgidx, still_changing; | ||
3836 | - int overlap_entries; | ||
3837 | - int new_bios_entry; | ||
3838 | - int old_nr, new_nr, chg_nr; | ||
3839 | - int i; | ||
3840 | - | ||
3841 | - /* | ||
3842 | - Visually we're performing the following (1,2,3,4 = memory types)... | ||
3843 | - | ||
3844 | - Sample memory map (w/overlaps): | ||
3845 | - ____22__________________ | ||
3846 | - ______________________4_ | ||
3847 | - ____1111________________ | ||
3848 | - _44_____________________ | ||
3849 | - 11111111________________ | ||
3850 | - ____________________33__ | ||
3851 | - ___________44___________ | ||
3852 | - __________33333_________ | ||
3853 | - ______________22________ | ||
3854 | - ___________________2222_ | ||
3855 | - _________111111111______ | ||
3856 | - _____________________11_ | ||
3857 | - _________________4______ | ||
3858 | - | ||
3859 | - Sanitized equivalent (no overlap): | ||
3860 | - 1_______________________ | ||
3861 | - _44_____________________ | ||
3862 | - ___1____________________ | ||
3863 | - ____22__________________ | ||
3864 | - ______11________________ | ||
3865 | - _________1______________ | ||
3866 | - __________3_____________ | ||
3867 | - ___________44___________ | ||
3868 | - _____________33_________ | ||
3869 | - _______________2________ | ||
3870 | - ________________1_______ | ||
3871 | - _________________4______ | ||
3872 | - ___________________2____ | ||
3873 | - ____________________33__ | ||
3874 | - ______________________4_ | ||
3875 | - */ | ||
3876 | - | ||
3877 | - /* if there's only one memory region, don't bother */ | ||
3878 | - if (*pnr_map < 2) | ||
3879 | - return -1; | ||
3880 | - | ||
3881 | - old_nr = *pnr_map; | ||
3882 | - | ||
3883 | - /* bail out if we find any unreasonable addresses in bios map */ | ||
3884 | - for (i=0; i<old_nr; i++) | ||
3885 | - if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) | ||
3886 | - return -1; | ||
3887 | - | ||
3888 | - /* create pointers for initial change-point information (for sorting) */ | ||
3889 | - for (i=0; i < 2*old_nr; i++) | ||
3890 | - change_point[i] = &change_point_list[i]; | ||
3891 | - | ||
3892 | - /* record all known change-points (starting and ending addresses), | ||
3893 | - omitting those that are for empty memory regions */ | ||
3894 | - chgidx = 0; | ||
3895 | - for (i=0; i < old_nr; i++) { | ||
3896 | - if (biosmap[i].size != 0) { | ||
3897 | - change_point[chgidx]->addr = biosmap[i].addr; | ||
3898 | - change_point[chgidx++]->pbios = &biosmap[i]; | ||
3899 | - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; | ||
3900 | - change_point[chgidx++]->pbios = &biosmap[i]; | ||
3901 | - } | ||
3902 | - } | ||
3903 | - chg_nr = chgidx; /* true number of change-points */ | ||
3904 | - | ||
3905 | - /* sort change-point list by memory addresses (low -> high) */ | ||
3906 | - still_changing = 1; | ||
3907 | - while (still_changing) { | ||
3908 | - still_changing = 0; | ||
3909 | - for (i=1; i < chg_nr; i++) { | ||
3910 | - /* if <current_addr> > <last_addr>, swap */ | ||
3911 | - /* or, if current=<start_addr> & last=<end_addr>, swap */ | ||
3912 | - if ((change_point[i]->addr < change_point[i-1]->addr) || | ||
3913 | - ((change_point[i]->addr == change_point[i-1]->addr) && | ||
3914 | - (change_point[i]->addr == change_point[i]->pbios->addr) && | ||
3915 | - (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) | ||
3916 | - ) | ||
3917 | - { | ||
3918 | - change_tmp = change_point[i]; | ||
3919 | - change_point[i] = change_point[i-1]; | ||
3920 | - change_point[i-1] = change_tmp; | ||
3921 | - still_changing=1; | ||
3922 | - } | ||
3923 | - } | ||
3924 | - } | ||
3925 | - | ||
3926 | - /* create a new bios memory map, removing overlaps */ | ||
3927 | - overlap_entries=0; /* number of entries in the overlap table */ | ||
3928 | - new_bios_entry=0; /* index for creating new bios map entries */ | ||
3929 | - last_type = 0; /* start with undefined memory type */ | ||
3930 | - last_addr = 0; /* start with 0 as last starting address */ | ||
3931 | - /* loop through change-points, determining affect on the new bios map */ | ||
3932 | - for (chgidx=0; chgidx < chg_nr; chgidx++) | ||
3933 | - { | ||
3934 | - /* keep track of all overlapping bios entries */ | ||
3935 | - if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) | ||
3936 | - { | ||
3937 | - /* add map entry to overlap list (> 1 entry implies an overlap) */ | ||
3938 | - overlap_list[overlap_entries++]=change_point[chgidx]->pbios; | ||
3939 | - } | ||
3940 | - else | ||
3941 | - { | ||
3942 | - /* remove entry from list (order independent, so swap with last) */ | ||
3943 | - for (i=0; i<overlap_entries; i++) | ||
3944 | - { | ||
3945 | - if (overlap_list[i] == change_point[chgidx]->pbios) | ||
3946 | - overlap_list[i] = overlap_list[overlap_entries-1]; | ||
3947 | - } | ||
3948 | - overlap_entries--; | ||
3949 | - } | ||
3950 | - /* if there are overlapping entries, decide which "type" to use */ | ||
3951 | - /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ | ||
3952 | - current_type = 0; | ||
3953 | - for (i=0; i<overlap_entries; i++) | ||
3954 | - if (overlap_list[i]->type > current_type) | ||
3955 | - current_type = overlap_list[i]->type; | ||
3956 | - /* continue building up new bios map based on this information */ | ||
3957 | - if (current_type != last_type) { | ||
3958 | - if (last_type != 0) { | ||
3959 | - new_bios[new_bios_entry].size = | ||
3960 | - change_point[chgidx]->addr - last_addr; | ||
3961 | - /* move forward only if the new size was non-zero */ | ||
3962 | - if (new_bios[new_bios_entry].size != 0) | ||
3963 | - if (++new_bios_entry >= E820MAX) | ||
3964 | - break; /* no more space left for new bios entries */ | ||
3965 | - } | ||
3966 | - if (current_type != 0) { | ||
3967 | - new_bios[new_bios_entry].addr = change_point[chgidx]->addr; | ||
3968 | - new_bios[new_bios_entry].type = current_type; | ||
3969 | - last_addr=change_point[chgidx]->addr; | ||
3970 | - } | ||
3971 | - last_type = current_type; | ||
3972 | - } | ||
3973 | - } | ||
3974 | - new_nr = new_bios_entry; /* retain count for new bios entries */ | ||
3975 | - | ||
3976 | - /* copy new bios mapping into original location */ | ||
3977 | - memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); | ||
3978 | - *pnr_map = new_nr; | ||
3979 | - | ||
3980 | - return 0; | ||
3981 | -} | ||
3982 | - | ||
3983 | -/* | ||
3984 | - * Copy the BIOS e820 map into a safe place. | ||
3985 | - * | ||
3986 | - * Sanity-check it while we're at it.. | ||
3987 | - * | ||
3988 | - * If we're lucky and live on a modern system, the setup code | ||
3989 | - * will have given us a memory map that we can use to properly | ||
3990 | - * set up memory. If we aren't, we'll fake a memory map. | ||
3991 | - * | ||
3992 | - * We check to see that the memory map contains at least 2 elements | ||
3993 | - * before we'll use it, because the detection code in setup.S may | ||
3994 | - * not be perfect and most every PC known to man has two memory | ||
3995 | - * regions: one from 0 to 640k, and one from 1mb up. (The IBM | ||
3996 | - * thinkpad 560x, for example, does not cooperate with the memory | ||
3997 | - * detection code.) | ||
3998 | - */ | ||
3999 | -int __init copy_e820_map(struct e820entry * biosmap, int nr_map) | ||
4000 | -{ | ||
4001 | -#ifndef CONFIG_XEN | ||
4002 | - /* Only one memory region (or negative)? Ignore it */ | ||
4003 | - if (nr_map < 2) | ||
4004 | - return -1; | ||
4005 | -#else | ||
4006 | - BUG_ON(nr_map < 1); | ||
4007 | -#endif | ||
4008 | - | ||
4009 | - do { | ||
4010 | - unsigned long long start = biosmap->addr; | ||
4011 | - unsigned long long size = biosmap->size; | ||
4012 | - unsigned long long end = start + size; | ||
4013 | - unsigned long type = biosmap->type; | ||
4014 | - | ||
4015 | - /* Overflow in 64 bits? Ignore the memory map. */ | ||
4016 | - if (start > end) | ||
4017 | - return -1; | ||
4018 | - | ||
4019 | -#ifndef CONFIG_XEN | ||
4020 | - /* | ||
4021 | - * Some BIOSes claim RAM in the 640k - 1M region. | ||
4022 | - * Not right. Fix it up. | ||
4023 | - */ | ||
4024 | - if (type == E820_RAM) { | ||
4025 | - if (start < 0x100000ULL && end > 0xA0000ULL) { | ||
4026 | - if (start < 0xA0000ULL) | ||
4027 | - add_memory_region(start, 0xA0000ULL-start, type); | ||
4028 | - if (end <= 0x100000ULL) | ||
4029 | - continue; | ||
4030 | - start = 0x100000ULL; | ||
4031 | - size = end - start; | ||
4032 | - } | ||
4033 | - } | ||
4034 | -#endif | ||
4035 | - add_memory_region(start, size, type); | ||
4036 | - } while (biosmap++,--nr_map); | ||
4037 | - return 0; | ||
4038 | -} | ||
4039 | - | ||
4040 | #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) | ||
4041 | struct edd edd; | ||
4042 | #ifdef CONFIG_EDD_MODULE | ||
4043 | @@ -746,7 +195,7 @@ | ||
4044 | } | ||
4045 | #endif | ||
4046 | |||
4047 | -static int __initdata user_defined_memmap = 0; | ||
4048 | +int __initdata user_defined_memmap = 0; | ||
4049 | |||
4050 | /* | ||
4051 | * "mem=nopentium" disables the 4MB page tables. | ||
4052 | @@ -783,51 +232,6 @@ | ||
4053 | } | ||
4054 | early_param("mem", parse_mem); | ||
4055 | |||
4056 | -static int __init parse_memmap(char *arg) | ||
4057 | -{ | ||
4058 | - if (!arg) | ||
4059 | - return -EINVAL; | ||
4060 | - | ||
4061 | - if (strcmp(arg, "exactmap") == 0) { | ||
4062 | -#ifdef CONFIG_CRASH_DUMP | ||
4063 | - /* If we are doing a crash dump, we | ||
4064 | - * still need to know the real mem | ||
4065 | - * size before original memory map is | ||
4066 | - * reset. | ||
4067 | - */ | ||
4068 | - find_max_pfn(); | ||
4069 | - saved_max_pfn = max_pfn; | ||
4070 | -#endif | ||
4071 | - e820.nr_map = 0; | ||
4072 | - user_defined_memmap = 1; | ||
4073 | - } else { | ||
4074 | - /* If the user specifies memory size, we | ||
4075 | - * limit the BIOS-provided memory map to | ||
4076 | - * that size. exactmap can be used to specify | ||
4077 | - * the exact map. mem=number can be used to | ||
4078 | - * trim the existing memory map. | ||
4079 | - */ | ||
4080 | - unsigned long long start_at, mem_size; | ||
4081 | - | ||
4082 | - mem_size = memparse(arg, &arg); | ||
4083 | - if (*arg == '@') { | ||
4084 | - start_at = memparse(arg+1, &arg); | ||
4085 | - add_memory_region(start_at, mem_size, E820_RAM); | ||
4086 | - } else if (*arg == '#') { | ||
4087 | - start_at = memparse(arg+1, &arg); | ||
4088 | - add_memory_region(start_at, mem_size, E820_ACPI); | ||
4089 | - } else if (*arg == '$') { | ||
4090 | - start_at = memparse(arg+1, &arg); | ||
4091 | - add_memory_region(start_at, mem_size, E820_RESERVED); | ||
4092 | - } else { | ||
4093 | - limit_regions(mem_size); | ||
4094 | - user_defined_memmap = 1; | ||
4095 | - } | ||
4096 | - } | ||
4097 | - return 0; | ||
4098 | -} | ||
4099 | -early_param("memmap", parse_memmap); | ||
4100 | - | ||
4101 | #ifdef CONFIG_PROC_VMCORE | ||
4102 | /* elfcorehdr= specifies the location of elf core header | ||
4103 | * stored by the crashed kernel. | ||
4104 | @@ -894,127 +298,6 @@ | ||
4105 | #endif | ||
4106 | |||
4107 | /* | ||
4108 | - * Callback for efi_memory_walk. | ||
4109 | - */ | ||
4110 | -static int __init | ||
4111 | -efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) | ||
4112 | -{ | ||
4113 | - unsigned long *max_pfn = arg, pfn; | ||
4114 | - | ||
4115 | - if (start < end) { | ||
4116 | - pfn = PFN_UP(end -1); | ||
4117 | - if (pfn > *max_pfn) | ||
4118 | - *max_pfn = pfn; | ||
4119 | - } | ||
4120 | - return 0; | ||
4121 | -} | ||
4122 | - | ||
4123 | -static int __init | ||
4124 | -efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) | ||
4125 | -{ | ||
4126 | - memory_present(0, PFN_UP(start), PFN_DOWN(end)); | ||
4127 | - return 0; | ||
4128 | -} | ||
4129 | - | ||
4130 | -/* | ||
4131 | - * This function checks if any part of the range <start,end> is mapped | ||
4132 | - * with type. | ||
4133 | - */ | ||
4134 | -int | ||
4135 | -e820_any_mapped(u64 start, u64 end, unsigned type) | ||
4136 | -{ | ||
4137 | - int i; | ||
4138 | - | ||
4139 | -#ifndef CONFIG_XEN | ||
4140 | - for (i = 0; i < e820.nr_map; i++) { | ||
4141 | - const struct e820entry *ei = &e820.map[i]; | ||
4142 | -#else | ||
4143 | - if (!is_initial_xendomain()) | ||
4144 | - return 0; | ||
4145 | - for (i = 0; i < machine_e820.nr_map; ++i) { | ||
4146 | - const struct e820entry *ei = &machine_e820.map[i]; | ||
4147 | -#endif | ||
4148 | - | ||
4149 | - if (type && ei->type != type) | ||
4150 | - continue; | ||
4151 | - if (ei->addr >= end || ei->addr + ei->size <= start) | ||
4152 | - continue; | ||
4153 | - return 1; | ||
4154 | - } | ||
4155 | - return 0; | ||
4156 | -} | ||
4157 | -EXPORT_SYMBOL_GPL(e820_any_mapped); | ||
4158 | - | ||
4159 | - /* | ||
4160 | - * This function checks if the entire range <start,end> is mapped with type. | ||
4161 | - * | ||
4162 | - * Note: this function only works correct if the e820 table is sorted and | ||
4163 | - * not-overlapping, which is the case | ||
4164 | - */ | ||
4165 | -int __init | ||
4166 | -e820_all_mapped(unsigned long s, unsigned long e, unsigned type) | ||
4167 | -{ | ||
4168 | - u64 start = s; | ||
4169 | - u64 end = e; | ||
4170 | - int i; | ||
4171 | - | ||
4172 | -#ifndef CONFIG_XEN | ||
4173 | - for (i = 0; i < e820.nr_map; i++) { | ||
4174 | - struct e820entry *ei = &e820.map[i]; | ||
4175 | -#else | ||
4176 | - if (!is_initial_xendomain()) | ||
4177 | - return 0; | ||
4178 | - for (i = 0; i < machine_e820.nr_map; ++i) { | ||
4179 | - const struct e820entry *ei = &machine_e820.map[i]; | ||
4180 | -#endif | ||
4181 | - if (type && ei->type != type) | ||
4182 | - continue; | ||
4183 | - /* is the region (part) in overlap with the current region ?*/ | ||
4184 | - if (ei->addr >= end || ei->addr + ei->size <= start) | ||
4185 | - continue; | ||
4186 | - /* if the region is at the beginning of <start,end> we move | ||
4187 | - * start to the end of the region since it's ok until there | ||
4188 | - */ | ||
4189 | - if (ei->addr <= start) | ||
4190 | - start = ei->addr + ei->size; | ||
4191 | - /* if start is now at or beyond end, we're done, full | ||
4192 | - * coverage */ | ||
4193 | - if (start >= end) | ||
4194 | - return 1; /* we're done */ | ||
4195 | - } | ||
4196 | - return 0; | ||
4197 | -} | ||
4198 | - | ||
4199 | -/* | ||
4200 | - * Find the highest page frame number we have available | ||
4201 | - */ | ||
4202 | -void __init find_max_pfn(void) | ||
4203 | -{ | ||
4204 | - int i; | ||
4205 | - | ||
4206 | - max_pfn = 0; | ||
4207 | - if (efi_enabled) { | ||
4208 | - efi_memmap_walk(efi_find_max_pfn, &max_pfn); | ||
4209 | - efi_memmap_walk(efi_memory_present_wrapper, NULL); | ||
4210 | - return; | ||
4211 | - } | ||
4212 | - | ||
4213 | - for (i = 0; i < e820.nr_map; i++) { | ||
4214 | - unsigned long start, end; | ||
4215 | - /* RAM? */ | ||
4216 | - if (e820.map[i].type != E820_RAM) | ||
4217 | - continue; | ||
4218 | - start = PFN_UP(e820.map[i].addr); | ||
4219 | - end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); | ||
4220 | - if (start >= end) | ||
4221 | - continue; | ||
4222 | - if (end > max_pfn) | ||
4223 | - max_pfn = end; | ||
4224 | - memory_present(0, start, end); | ||
4225 | - } | ||
4226 | -} | ||
4227 | - | ||
4228 | -/* | ||
4229 | * Determine low and high memory ranges: | ||
4230 | */ | ||
4231 | unsigned long __init find_max_low_pfn(void) | ||
4232 | @@ -1073,77 +356,6 @@ | ||
4233 | return max_low_pfn; | ||
4234 | } | ||
4235 | |||
4236 | -/* | ||
4237 | - * Free all available memory for boot time allocation. Used | ||
4238 | - * as a callback function by efi_memory_walk() | ||
4239 | - */ | ||
4240 | - | ||
4241 | -static int __init | ||
4242 | -free_available_memory(unsigned long start, unsigned long end, void *arg) | ||
4243 | -{ | ||
4244 | - /* check max_low_pfn */ | ||
4245 | - if (start >= (max_low_pfn << PAGE_SHIFT)) | ||
4246 | - return 0; | ||
4247 | - if (end >= (max_low_pfn << PAGE_SHIFT)) | ||
4248 | - end = max_low_pfn << PAGE_SHIFT; | ||
4249 | - if (start < end) | ||
4250 | - free_bootmem(start, end - start); | ||
4251 | - | ||
4252 | - return 0; | ||
4253 | -} | ||
4254 | -/* | ||
4255 | - * Register fully available low RAM pages with the bootmem allocator. | ||
4256 | - */ | ||
4257 | -static void __init register_bootmem_low_pages(unsigned long max_low_pfn) | ||
4258 | -{ | ||
4259 | - int i; | ||
4260 | - | ||
4261 | - if (efi_enabled) { | ||
4262 | - efi_memmap_walk(free_available_memory, NULL); | ||
4263 | - return; | ||
4264 | - } | ||
4265 | - for (i = 0; i < e820.nr_map; i++) { | ||
4266 | - unsigned long curr_pfn, last_pfn, size; | ||
4267 | - /* | ||
4268 | - * Reserve usable low memory | ||
4269 | - */ | ||
4270 | - if (e820.map[i].type != E820_RAM) | ||
4271 | - continue; | ||
4272 | - /* | ||
4273 | - * We are rounding up the start address of usable memory: | ||
4274 | - */ | ||
4275 | - curr_pfn = PFN_UP(e820.map[i].addr); | ||
4276 | - if (curr_pfn >= max_low_pfn) | ||
4277 | - continue; | ||
4278 | - /* | ||
4279 | - * ... and at the end of the usable range downwards: | ||
4280 | - */ | ||
4281 | - last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); | ||
4282 | - | ||
4283 | -#ifdef CONFIG_XEN | ||
4284 | - /* | ||
4285 | - * Truncate to the number of actual pages currently | ||
4286 | - * present. | ||
4287 | - */ | ||
4288 | - if (last_pfn > xen_start_info->nr_pages) | ||
4289 | - last_pfn = xen_start_info->nr_pages; | ||
4290 | -#endif | ||
4291 | - | ||
4292 | - if (last_pfn > max_low_pfn) | ||
4293 | - last_pfn = max_low_pfn; | ||
4294 | - | ||
4295 | - /* | ||
4296 | - * .. finally, did all the rounding and playing | ||
4297 | - * around just make the area go away? | ||
4298 | - */ | ||
4299 | - if (last_pfn <= curr_pfn) | ||
4300 | - continue; | ||
4301 | - | ||
4302 | - size = last_pfn - curr_pfn; | ||
4303 | - free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); | ||
4304 | - } | ||
4305 | -} | ||
4306 | - | ||
4307 | #ifndef CONFIG_XEN | ||
4308 | /* | ||
4309 | * workaround for Dell systems that neglect to reserve EBDA | ||
4310 | @@ -1233,8 +445,8 @@ | ||
4311 | * the (very unlikely) case of us accidentally initializing the | ||
4312 | * bootmem allocator with an invalid RAM area. | ||
4313 | */ | ||
4314 | - reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) + | ||
4315 | - bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START)); | ||
4316 | + reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) + | ||
4317 | + bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text)); | ||
4318 | |||
4319 | #ifndef CONFIG_XEN | ||
4320 | /* | ||
4321 | @@ -1316,170 +528,6 @@ | ||
4322 | } | ||
4323 | } | ||
4324 | |||
4325 | -/* | ||
4326 | - * Request address space for all standard RAM and ROM resources | ||
4327 | - * and also for regions reported as reserved by the e820. | ||
4328 | - */ | ||
4329 | -static void __init | ||
4330 | -legacy_init_iomem_resources(struct e820entry *e820, int nr_map, | ||
4331 | - struct resource *code_resource, | ||
4332 | - struct resource *data_resource) | ||
4333 | -{ | ||
4334 | - int i; | ||
4335 | - | ||
4336 | - probe_roms(); | ||
4337 | - | ||
4338 | - for (i = 0; i < nr_map; i++) { | ||
4339 | - struct resource *res; | ||
4340 | -#ifndef CONFIG_RESOURCES_64BIT | ||
4341 | - if (e820[i].addr + e820[i].size > 0x100000000ULL) | ||
4342 | - continue; | ||
4343 | -#endif | ||
4344 | - res = kzalloc(sizeof(struct resource), GFP_ATOMIC); | ||
4345 | - switch (e820[i].type) { | ||
4346 | - case E820_RAM: res->name = "System RAM"; break; | ||
4347 | - case E820_ACPI: res->name = "ACPI Tables"; break; | ||
4348 | - case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; | ||
4349 | - default: res->name = "reserved"; | ||
4350 | - } | ||
4351 | - res->start = e820[i].addr; | ||
4352 | - res->end = res->start + e820[i].size - 1; | ||
4353 | - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; | ||
4354 | - if (request_resource(&iomem_resource, res)) { | ||
4355 | - kfree(res); | ||
4356 | - continue; | ||
4357 | - } | ||
4358 | - if (e820[i].type == E820_RAM) { | ||
4359 | - /* | ||
4360 | - * We don't know which RAM region contains kernel data, | ||
4361 | - * so we try it repeatedly and let the resource manager | ||
4362 | - * test it. | ||
4363 | - */ | ||
4364 | -#ifndef CONFIG_XEN | ||
4365 | - request_resource(res, code_resource); | ||
4366 | - request_resource(res, data_resource); | ||
4367 | -#endif | ||
4368 | -#ifdef CONFIG_KEXEC | ||
4369 | - if (crashk_res.start != crashk_res.end) | ||
4370 | - request_resource(res, &crashk_res); | ||
4371 | -#ifdef CONFIG_XEN | ||
4372 | - xen_machine_kexec_register_resources(res); | ||
4373 | -#endif | ||
4374 | -#endif | ||
4375 | - } | ||
4376 | - } | ||
4377 | -} | ||
4378 | - | ||
4379 | -/* | ||
4380 | - * Locate a unused range of the physical address space below 4G which | ||
4381 | - * can be used for PCI mappings. | ||
4382 | - */ | ||
4383 | -static void __init | ||
4384 | -e820_setup_gap(struct e820entry *e820, int nr_map) | ||
4385 | -{ | ||
4386 | - unsigned long gapstart, gapsize, round; | ||
4387 | - unsigned long long last; | ||
4388 | - int i; | ||
4389 | - | ||
4390 | - /* | ||
4391 | - * Search for the bigest gap in the low 32 bits of the e820 | ||
4392 | - * memory space. | ||
4393 | - */ | ||
4394 | - last = 0x100000000ull; | ||
4395 | - gapstart = 0x10000000; | ||
4396 | - gapsize = 0x400000; | ||
4397 | - i = nr_map; | ||
4398 | - while (--i >= 0) { | ||
4399 | - unsigned long long start = e820[i].addr; | ||
4400 | - unsigned long long end = start + e820[i].size; | ||
4401 | - | ||
4402 | - /* | ||
4403 | - * Since "last" is at most 4GB, we know we'll | ||
4404 | - * fit in 32 bits if this condition is true | ||
4405 | - */ | ||
4406 | - if (last > end) { | ||
4407 | - unsigned long gap = last - end; | ||
4408 | - | ||
4409 | - if (gap > gapsize) { | ||
4410 | - gapsize = gap; | ||
4411 | - gapstart = end; | ||
4412 | - } | ||
4413 | - } | ||
4414 | - if (start < last) | ||
4415 | - last = start; | ||
4416 | - } | ||
4417 | - | ||
4418 | - /* | ||
4419 | - * See how much we want to round up: start off with | ||
4420 | - * rounding to the next 1MB area. | ||
4421 | - */ | ||
4422 | - round = 0x100000; | ||
4423 | - while ((gapsize >> 4) > round) | ||
4424 | - round += round; | ||
4425 | - /* Fun with two's complement */ | ||
4426 | - pci_mem_start = (gapstart + round) & -round; | ||
4427 | - | ||
4428 | - printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", | ||
4429 | - pci_mem_start, gapstart, gapsize); | ||
4430 | -} | ||
4431 | - | ||
4432 | -/* | ||
4433 | - * Request address space for all standard resources | ||
4434 | - * | ||
4435 | - * This is called just before pcibios_init(), which is also a | ||
4436 | - * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). | ||
4437 | - */ | ||
4438 | -static int __init request_standard_resources(void) | ||
4439 | -{ | ||
4440 | - int i; | ||
4441 | - | ||
4442 | - /* Nothing to do if not running in dom0. */ | ||
4443 | - if (!is_initial_xendomain()) | ||
4444 | - return 0; | ||
4445 | - | ||
4446 | - printk("Setting up standard PCI resources\n"); | ||
4447 | -#ifdef CONFIG_XEN | ||
4448 | - legacy_init_iomem_resources(machine_e820.map, machine_e820.nr_map, | ||
4449 | - &code_resource, &data_resource); | ||
4450 | -#else | ||
4451 | - if (efi_enabled) | ||
4452 | - efi_initialize_iomem_resources(&code_resource, &data_resource); | ||
4453 | - else | ||
4454 | - legacy_init_iomem_resources(e820.map, e820.nr_map, | ||
4455 | - &code_resource, &data_resource); | ||
4456 | -#endif | ||
4457 | - | ||
4458 | - /* EFI systems may still have VGA */ | ||
4459 | - request_resource(&iomem_resource, &video_ram_resource); | ||
4460 | - | ||
4461 | - /* request I/O space for devices used on all i[345]86 PCs */ | ||
4462 | - for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) | ||
4463 | - request_resource(&ioport_resource, &standard_io_resources[i]); | ||
4464 | - return 0; | ||
4465 | -} | ||
4466 | - | ||
4467 | -subsys_initcall(request_standard_resources); | ||
4468 | - | ||
4469 | -static void __init register_memory(void) | ||
4470 | -{ | ||
4471 | -#ifdef CONFIG_XEN | ||
4472 | - if (is_initial_xendomain()) { | ||
4473 | - struct xen_memory_map memmap; | ||
4474 | - | ||
4475 | - memmap.nr_entries = E820MAX; | ||
4476 | - set_xen_guest_handle(memmap.buffer, machine_e820.map); | ||
4477 | - | ||
4478 | - if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap)) | ||
4479 | - BUG(); | ||
4480 | - | ||
4481 | - machine_e820.nr_map = memmap.nr_entries; | ||
4482 | - e820_setup_gap(machine_e820.map, machine_e820.nr_map); | ||
4483 | - } | ||
4484 | - else | ||
4485 | -#endif | ||
4486 | - e820_setup_gap(e820.map, e820.nr_map); | ||
4487 | -} | ||
4488 | - | ||
4489 | #ifdef CONFIG_MCA | ||
4490 | static void set_mca_bus(int x) | ||
4491 | { | ||
4492 | @@ -1489,6 +537,12 @@ | ||
4493 | static void set_mca_bus(int x) { } | ||
4494 | #endif | ||
4495 | |||
4496 | +/* Overridden in paravirt.c if CONFIG_PARAVIRT */ | ||
4497 | +char * __init __attribute__((weak)) memory_setup(void) | ||
4498 | +{ | ||
4499 | + return machine_specific_memory_setup(); | ||
4500 | +} | ||
4501 | + | ||
4502 | /* | ||
4503 | * Determine if we were loaded by an EFI loader. If so, then we have also been | ||
4504 | * passed the efi memmap, systab, etc., so we should use these data structures | ||
4505 | @@ -1576,7 +630,7 @@ | ||
4506 | efi_init(); | ||
4507 | else { | ||
4508 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | ||
4509 | - print_memory_map(machine_specific_memory_setup()); | ||
4510 | + print_memory_map(memory_setup()); | ||
4511 | } | ||
4512 | |||
4513 | copy_edd(); | ||
4514 | @@ -1755,7 +809,7 @@ | ||
4515 | get_smp_config(); | ||
4516 | #endif | ||
4517 | |||
4518 | - register_memory(); | ||
4519 | + e820_register_memory(); | ||
4520 | |||
4521 | if (is_initial_xendomain()) { | ||
4522 | #ifdef CONFIG_VT | ||
4523 | --- a/arch/x86/kernel/setup_64-xen.c | ||
4524 | +++ b/arch/x86/kernel/setup_64-xen.c | ||
4525 | @@ -576,8 +576,7 @@ | ||
4526 | if (LOADER_TYPE && INITRD_START) { | ||
4527 | if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { | ||
4528 | reserve_bootmem_generic(INITRD_START, INITRD_SIZE); | ||
4529 | - initrd_start = | ||
4530 | - INITRD_START ? INITRD_START + PAGE_OFFSET : 0; | ||
4531 | + initrd_start = INITRD_START + PAGE_OFFSET; | ||
4532 | initrd_end = initrd_start+INITRD_SIZE; | ||
4533 | } | ||
4534 | else { | ||
4535 | @@ -1003,11 +1002,8 @@ | ||
4536 | /* Fix cpuid4 emulation for more */ | ||
4537 | num_cache_leaves = 3; | ||
4538 | |||
4539 | - /* When there is only one core no need to synchronize RDTSC */ | ||
4540 | - if (num_possible_cpus() == 1) | ||
4541 | - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | ||
4542 | - else | ||
4543 | - clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | ||
4544 | + /* RDTSC can be speculated around */ | ||
4545 | + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | ||
4546 | } | ||
4547 | |||
4548 | static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | ||
4549 | @@ -1106,6 +1102,15 @@ | ||
4550 | set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability); | ||
4551 | } | ||
4552 | |||
4553 | + if (cpu_has_ds) { | ||
4554 | + unsigned int l1, l2; | ||
4555 | + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); | ||
4556 | + if (!(l1 & (1<<11))) | ||
4557 | + set_bit(X86_FEATURE_BTS, c->x86_capability); | ||
4558 | + if (!(l1 & (1<<12))) | ||
4559 | + set_bit(X86_FEATURE_PEBS, c->x86_capability); | ||
4560 | + } | ||
4561 | + | ||
4562 | n = c->extended_cpuid_level; | ||
4563 | if (n >= 0x80000008) { | ||
4564 | unsigned eax = cpuid_eax(0x80000008); | ||
4565 | @@ -1125,7 +1130,10 @@ | ||
4566 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); | ||
4567 | if (c->x86 == 6) | ||
4568 | set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); | ||
4569 | - set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | ||
4570 | + if (c->x86 == 15) | ||
4571 | + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | ||
4572 | + else | ||
4573 | + clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | ||
4574 | c->x86_max_cores = intel_num_cpu_cores(c); | ||
4575 | |||
4576 | srat_detect_node(); | ||
4577 | --- a/arch/x86/kernel/smp_32-xen.c | ||
4578 | +++ b/arch/x86/kernel/smp_32-xen.c | ||
4579 | @@ -659,6 +659,10 @@ | ||
4580 | put_cpu(); | ||
4581 | return -EBUSY; | ||
4582 | } | ||
4583 | + | ||
4584 | + /* Can deadlock when called with interrupts disabled */ | ||
4585 | + WARN_ON(irqs_disabled()); | ||
4586 | + | ||
4587 | spin_lock_bh(&call_lock); | ||
4588 | __smp_call_function_single(cpu, func, info, nonatomic, wait); | ||
4589 | spin_unlock_bh(&call_lock); | ||
4590 | --- a/arch/x86/kernel/smp_64-xen.c | ||
4591 | +++ b/arch/x86/kernel/smp_64-xen.c | ||
4592 | @@ -384,12 +384,17 @@ | ||
4593 | put_cpu(); | ||
4594 | return 0; | ||
4595 | } | ||
4596 | + | ||
4597 | + /* Can deadlock when called with interrupts disabled */ | ||
4598 | + WARN_ON(irqs_disabled()); | ||
4599 | + | ||
4600 | spin_lock_bh(&call_lock); | ||
4601 | __smp_call_function_single(cpu, func, info, nonatomic, wait); | ||
4602 | spin_unlock_bh(&call_lock); | ||
4603 | put_cpu(); | ||
4604 | return 0; | ||
4605 | } | ||
4606 | +EXPORT_SYMBOL(smp_call_function_single); | ||
4607 | |||
4608 | /* | ||
4609 | * this function sends a 'generic call function' IPI to all other CPUs | ||
4610 | --- a/arch/x86/kernel/time_32-xen.c | ||
4611 | +++ b/arch/x86/kernel/time_32-xen.c | ||
4612 | @@ -61,6 +61,7 @@ | ||
4613 | #include <asm/uaccess.h> | ||
4614 | #include <asm/processor.h> | ||
4615 | #include <asm/timer.h> | ||
4616 | +#include <asm/time.h> | ||
4617 | #include <asm/sections.h> | ||
4618 | |||
4619 | #include "mach_time.h" | ||
4620 | @@ -129,11 +130,11 @@ | ||
4621 | /* Must be signed, as it's compared with s64 quantities which can be -ve. */ | ||
4622 | #define NS_PER_TICK (1000000000LL/HZ) | ||
4623 | |||
4624 | -static void __clock_was_set(void *unused) | ||
4625 | +static void __clock_was_set(struct work_struct *unused) | ||
4626 | { | ||
4627 | clock_was_set(); | ||
4628 | } | ||
4629 | -static DECLARE_WORK(clock_was_set_work, __clock_was_set, NULL); | ||
4630 | +static DECLARE_WORK(clock_was_set_work, __clock_was_set); | ||
4631 | |||
4632 | static inline void __normalize_time(time_t *sec, s64 *nsec) | ||
4633 | { | ||
4634 | @@ -537,10 +538,7 @@ | ||
4635 | /* gets recalled with irq locally disabled */ | ||
4636 | /* XXX - does irqsave resolve this? -johnstul */ | ||
4637 | spin_lock_irqsave(&rtc_lock, flags); | ||
4638 | - if (efi_enabled) | ||
4639 | - retval = efi_set_rtc_mmss(nowtime); | ||
4640 | - else | ||
4641 | - retval = mach_set_rtc_mmss(nowtime); | ||
4642 | + retval = set_wallclock(nowtime); | ||
4643 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
4644 | |||
4645 | return retval; | ||
4646 | @@ -865,10 +863,7 @@ | ||
4647 | |||
4648 | spin_lock_irqsave(&rtc_lock, flags); | ||
4649 | |||
4650 | - if (efi_enabled) | ||
4651 | - retval = efi_get_time(); | ||
4652 | - else | ||
4653 | - retval = mach_get_cmos_time(); | ||
4654 | + retval = get_wallclock(); | ||
4655 | |||
4656 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
4657 | |||
4658 | @@ -970,7 +965,7 @@ | ||
4659 | printk("Using HPET for base-timer\n"); | ||
4660 | } | ||
4661 | |||
4662 | - time_init_hook(); | ||
4663 | + do_time_init(); | ||
4664 | } | ||
4665 | #endif | ||
4666 | |||
4667 | --- a/arch/x86/kernel/traps_32-xen.c | ||
4668 | +++ b/arch/x86/kernel/traps_32-xen.c | ||
4669 | @@ -29,6 +29,8 @@ | ||
4670 | #include <linux/kexec.h> | ||
4671 | #include <linux/unwind.h> | ||
4672 | #include <linux/uaccess.h> | ||
4673 | +#include <linux/nmi.h> | ||
4674 | +#include <linux/bug.h> | ||
4675 | |||
4676 | #ifdef CONFIG_EISA | ||
4677 | #include <linux/ioport.h> | ||
4678 | @@ -61,9 +63,6 @@ | ||
4679 | |||
4680 | asmlinkage int system_call(void); | ||
4681 | |||
4682 | -struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, | ||
4683 | - { 0, 0 }, { 0, 0 } }; | ||
4684 | - | ||
4685 | /* Do we ignore FPU interrupts ? */ | ||
4686 | char ignore_fpu_irq = 0; | ||
4687 | |||
4688 | @@ -100,12 +99,7 @@ | ||
4689 | #endif | ||
4690 | asmlinkage void machine_check(void); | ||
4691 | |||
4692 | -static int kstack_depth_to_print = 24; | ||
4693 | -#ifdef CONFIG_STACK_UNWIND | ||
4694 | -static int call_trace = 1; | ||
4695 | -#else | ||
4696 | -#define call_trace (-1) | ||
4697 | -#endif | ||
4698 | +int kstack_depth_to_print = 24; | ||
4699 | ATOMIC_NOTIFIER_HEAD(i386die_chain); | ||
4700 | |||
4701 | int register_die_notifier(struct notifier_block *nb) | ||
4702 | @@ -159,25 +153,7 @@ | ||
4703 | return ebp; | ||
4704 | } | ||
4705 | |||
4706 | -struct ops_and_data { | ||
4707 | - struct stacktrace_ops *ops; | ||
4708 | - void *data; | ||
4709 | -}; | ||
4710 | - | ||
4711 | -static asmlinkage int | ||
4712 | -dump_trace_unwind(struct unwind_frame_info *info, void *data) | ||
4713 | -{ | ||
4714 | - struct ops_and_data *oad = (struct ops_and_data *)data; | ||
4715 | - int n = 0; | ||
4716 | - | ||
4717 | - while (unwind(info) == 0 && UNW_PC(info)) { | ||
4718 | - n++; | ||
4719 | - oad->ops->address(oad->data, UNW_PC(info)); | ||
4720 | - if (arch_unw_user_mode(info)) | ||
4721 | - break; | ||
4722 | - } | ||
4723 | - return n; | ||
4724 | -} | ||
4725 | +#define MSG(msg) ops->warning(data, msg) | ||
4726 | |||
4727 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | ||
4728 | unsigned long *stack, | ||
4729 | @@ -188,39 +164,6 @@ | ||
4730 | if (!task) | ||
4731 | task = current; | ||
4732 | |||
4733 | - if (call_trace >= 0) { | ||
4734 | - int unw_ret = 0; | ||
4735 | - struct unwind_frame_info info; | ||
4736 | - struct ops_and_data oad = { .ops = ops, .data = data }; | ||
4737 | - | ||
4738 | - if (regs) { | ||
4739 | - if (unwind_init_frame_info(&info, task, regs) == 0) | ||
4740 | - unw_ret = dump_trace_unwind(&info, &oad); | ||
4741 | - } else if (task == current) | ||
4742 | - unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); | ||
4743 | - else { | ||
4744 | - if (unwind_init_blocked(&info, task) == 0) | ||
4745 | - unw_ret = dump_trace_unwind(&info, &oad); | ||
4746 | - } | ||
4747 | - if (unw_ret > 0) { | ||
4748 | - if (call_trace == 1 && !arch_unw_user_mode(&info)) { | ||
4749 | - ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", | ||
4750 | - UNW_PC(&info)); | ||
4751 | - if (UNW_SP(&info) >= PAGE_OFFSET) { | ||
4752 | - ops->warning(data, "Leftover inexact backtrace:\n"); | ||
4753 | - stack = (void *)UNW_SP(&info); | ||
4754 | - if (!stack) | ||
4755 | - return; | ||
4756 | - ebp = UNW_FP(&info); | ||
4757 | - } else | ||
4758 | - ops->warning(data, "Full inexact backtrace again:\n"); | ||
4759 | - } else if (call_trace >= 1) | ||
4760 | - return; | ||
4761 | - else | ||
4762 | - ops->warning(data, "Full inexact backtrace again:\n"); | ||
4763 | - } else | ||
4764 | - ops->warning(data, "Inexact backtrace:\n"); | ||
4765 | - } | ||
4766 | if (!stack) { | ||
4767 | unsigned long dummy; | ||
4768 | stack = &dummy; | ||
4769 | @@ -253,6 +196,7 @@ | ||
4770 | stack = (unsigned long*)context->previous_esp; | ||
4771 | if (!stack) | ||
4772 | break; | ||
4773 | + touch_nmi_watchdog(); | ||
4774 | } | ||
4775 | } | ||
4776 | EXPORT_SYMBOL(dump_trace); | ||
4777 | @@ -385,7 +329,7 @@ | ||
4778 | * time of the fault.. | ||
4779 | */ | ||
4780 | if (in_kernel) { | ||
4781 | - u8 __user *eip; | ||
4782 | + u8 *eip; | ||
4783 | int code_bytes = 64; | ||
4784 | unsigned char c; | ||
4785 | |||
4786 | @@ -394,18 +338,20 @@ | ||
4787 | |||
4788 | printk(KERN_EMERG "Code: "); | ||
4789 | |||
4790 | - eip = (u8 __user *)regs->eip - 43; | ||
4791 | - if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { | ||
4792 | + eip = (u8 *)regs->eip - 43; | ||
4793 | + if (eip < (u8 *)PAGE_OFFSET || | ||
4794 | + probe_kernel_address(eip, c)) { | ||
4795 | /* try starting at EIP */ | ||
4796 | - eip = (u8 __user *)regs->eip; | ||
4797 | + eip = (u8 *)regs->eip; | ||
4798 | code_bytes = 32; | ||
4799 | } | ||
4800 | for (i = 0; i < code_bytes; i++, eip++) { | ||
4801 | - if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { | ||
4802 | + if (eip < (u8 *)PAGE_OFFSET || | ||
4803 | + probe_kernel_address(eip, c)) { | ||
4804 | printk(" Bad EIP value."); | ||
4805 | break; | ||
4806 | } | ||
4807 | - if (eip == (u8 __user *)regs->eip) | ||
4808 | + if (eip == (u8 *)regs->eip) | ||
4809 | printk("<%02x> ", c); | ||
4810 | else | ||
4811 | printk("%02x ", c); | ||
4812 | @@ -414,43 +360,22 @@ | ||
4813 | printk("\n"); | ||
4814 | } | ||
4815 | |||
4816 | -static void handle_BUG(struct pt_regs *regs) | ||
4817 | +int is_valid_bugaddr(unsigned long eip) | ||
4818 | { | ||
4819 | - unsigned long eip = regs->eip; | ||
4820 | unsigned short ud2; | ||
4821 | |||
4822 | if (eip < PAGE_OFFSET) | ||
4823 | - return; | ||
4824 | - if (probe_kernel_address((unsigned short __user *)eip, ud2)) | ||
4825 | - return; | ||
4826 | - if (ud2 != 0x0b0f) | ||
4827 | - return; | ||
4828 | + return 0; | ||
4829 | + if (probe_kernel_address((unsigned short *)eip, ud2)) | ||
4830 | + return 0; | ||
4831 | |||
4832 | - printk(KERN_EMERG "------------[ cut here ]------------\n"); | ||
4833 | - | ||
4834 | -#ifdef CONFIG_DEBUG_BUGVERBOSE | ||
4835 | - do { | ||
4836 | - unsigned short line; | ||
4837 | - char *file; | ||
4838 | - char c; | ||
4839 | - | ||
4840 | - if (probe_kernel_address((unsigned short __user *)(eip + 2), | ||
4841 | - line)) | ||
4842 | - break; | ||
4843 | - if (__get_user(file, (char * __user *)(eip + 4)) || | ||
4844 | - (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) | ||
4845 | - file = "<bad filename>"; | ||
4846 | - | ||
4847 | - printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line); | ||
4848 | - return; | ||
4849 | - } while (0); | ||
4850 | -#endif | ||
4851 | - printk(KERN_EMERG "Kernel BUG at [verbose debug info unavailable]\n"); | ||
4852 | + return ud2 == 0x0b0f; | ||
4853 | } | ||
4854 | |||
4855 | -/* This is gone through when something in the kernel | ||
4856 | - * has done something bad and is about to be terminated. | ||
4857 | -*/ | ||
4858 | +/* | ||
4859 | + * This is gone through when something in the kernel has done something bad and | ||
4860 | + * is about to be terminated. | ||
4861 | + */ | ||
4862 | void die(const char * str, struct pt_regs * regs, long err) | ||
4863 | { | ||
4864 | static struct { | ||
4865 | @@ -458,7 +383,7 @@ | ||
4866 | u32 lock_owner; | ||
4867 | int lock_owner_depth; | ||
4868 | } die = { | ||
4869 | - .lock = SPIN_LOCK_UNLOCKED, | ||
4870 | + .lock = __SPIN_LOCK_UNLOCKED(die.lock), | ||
4871 | .lock_owner = -1, | ||
4872 | .lock_owner_depth = 0 | ||
4873 | }; | ||
4874 | @@ -482,7 +407,8 @@ | ||
4875 | unsigned long esp; | ||
4876 | unsigned short ss; | ||
4877 | |||
4878 | - handle_BUG(regs); | ||
4879 | + report_bug(regs->eip); | ||
4880 | + | ||
4881 | printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); | ||
4882 | #ifdef CONFIG_PREEMPT | ||
4883 | printk(KERN_EMERG "PREEMPT "); | ||
4884 | @@ -682,8 +608,7 @@ | ||
4885 | { | ||
4886 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " | ||
4887 | "CPU %d.\n", reason, smp_processor_id()); | ||
4888 | - printk(KERN_EMERG "You probably have a hardware problem with your RAM " | ||
4889 | - "chips\n"); | ||
4890 | + printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); | ||
4891 | if (panic_on_unrecovered_nmi) | ||
4892 | panic("NMI: Not continuing"); | ||
4893 | |||
4894 | @@ -741,7 +666,6 @@ | ||
4895 | printk(" on CPU%d, eip %08lx, registers:\n", | ||
4896 | smp_processor_id(), regs->eip); | ||
4897 | show_registers(regs); | ||
4898 | - printk(KERN_EMERG "console shuts up ...\n"); | ||
4899 | console_silent(); | ||
4900 | spin_unlock(&nmi_print_lock); | ||
4901 | bust_spinlocks(0); | ||
4902 | @@ -1057,49 +981,24 @@ | ||
4903 | #endif | ||
4904 | } | ||
4905 | |||
4906 | -fastcall void setup_x86_bogus_stack(unsigned char * stk) | ||
4907 | +fastcall unsigned long patch_espfix_desc(unsigned long uesp, | ||
4908 | + unsigned long kesp) | ||
4909 | { | ||
4910 | - unsigned long *switch16_ptr, *switch32_ptr; | ||
4911 | - struct pt_regs *regs; | ||
4912 | - unsigned long stack_top, stack_bot; | ||
4913 | - unsigned short iret_frame16_off; | ||
4914 | - int cpu = smp_processor_id(); | ||
4915 | - /* reserve the space on 32bit stack for the magic switch16 pointer */ | ||
4916 | - memmove(stk, stk + 8, sizeof(struct pt_regs)); | ||
4917 | - switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs)); | ||
4918 | - regs = (struct pt_regs *)stk; | ||
4919 | - /* now the switch32 on 16bit stack */ | ||
4920 | - stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); | ||
4921 | - stack_top = stack_bot + CPU_16BIT_STACK_SIZE; | ||
4922 | - switch32_ptr = (unsigned long *)(stack_top - 8); | ||
4923 | - iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20; | ||
4924 | - /* copy iret frame on 16bit stack */ | ||
4925 | - memcpy((void *)(stack_bot + iret_frame16_off), ®s->eip, 20); | ||
4926 | - /* fill in the switch pointers */ | ||
4927 | - switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off; | ||
4928 | - switch16_ptr[1] = __ESPFIX_SS; | ||
4929 | - switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) + | ||
4930 | - 8 - CPU_16BIT_STACK_SIZE; | ||
4931 | - switch32_ptr[1] = __KERNEL_DS; | ||
4932 | -} | ||
4933 | - | ||
4934 | -fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp) | ||
4935 | -{ | ||
4936 | - unsigned long *switch32_ptr; | ||
4937 | - unsigned char *stack16, *stack32; | ||
4938 | - unsigned long stack_top, stack_bot; | ||
4939 | - int len; | ||
4940 | int cpu = smp_processor_id(); | ||
4941 | - stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); | ||
4942 | - stack_top = stack_bot + CPU_16BIT_STACK_SIZE; | ||
4943 | - switch32_ptr = (unsigned long *)(stack_top - 8); | ||
4944 | - /* copy the data from 16bit stack to 32bit stack */ | ||
4945 | - len = CPU_16BIT_STACK_SIZE - 8 - sp; | ||
4946 | - stack16 = (unsigned char *)(stack_bot + sp); | ||
4947 | - stack32 = (unsigned char *) | ||
4948 | - (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len); | ||
4949 | - memcpy(stack32, stack16, len); | ||
4950 | - return stack32; | ||
4951 | + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | ||
4952 | + struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address; | ||
4953 | + unsigned long base = (kesp - uesp) & -THREAD_SIZE; | ||
4954 | + unsigned long new_kesp = kesp - base; | ||
4955 | + unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; | ||
4956 | + __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; | ||
4957 | + /* Set up base for espfix segment */ | ||
4958 | + desc &= 0x00f0ff0000000000ULL; | ||
4959 | + desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | | ||
4960 | + ((((__u64)base) << 32) & 0xff00000000000000ULL) | | ||
4961 | + ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | | ||
4962 | + (lim_pages & 0xffff); | ||
4963 | + *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; | ||
4964 | + return new_kesp; | ||
4965 | } | ||
4966 | #endif | ||
4967 | |||
4968 | @@ -1113,7 +1012,7 @@ | ||
4969 | * Must be called with kernel preemption disabled (in this case, | ||
4970 | * local interrupts are disabled at the call-site in entry.S). | ||
4971 | */ | ||
4972 | -asmlinkage void math_state_restore(struct pt_regs regs) | ||
4973 | +asmlinkage void math_state_restore(void) | ||
4974 | { | ||
4975 | struct thread_info *thread = current_thread_info(); | ||
4976 | struct task_struct *tsk = thread->task; | ||
4977 | @@ -1123,6 +1022,7 @@ | ||
4978 | init_fpu(tsk); | ||
4979 | restore_fpu(tsk); | ||
4980 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | ||
4981 | + tsk->fpu_counter++; | ||
4982 | } | ||
4983 | |||
4984 | #ifndef CONFIG_MATH_EMULATION | ||
4985 | @@ -1234,19 +1134,3 @@ | ||
4986 | return 1; | ||
4987 | } | ||
4988 | __setup("kstack=", kstack_setup); | ||
4989 | - | ||
4990 | -#ifdef CONFIG_STACK_UNWIND | ||
4991 | -static int __init call_trace_setup(char *s) | ||
4992 | -{ | ||
4993 | - if (strcmp(s, "old") == 0) | ||
4994 | - call_trace = -1; | ||
4995 | - else if (strcmp(s, "both") == 0) | ||
4996 | - call_trace = 0; | ||
4997 | - else if (strcmp(s, "newfallback") == 0) | ||
4998 | - call_trace = 1; | ||
4999 | - else if (strcmp(s, "new") == 2) | ||
5000 | - call_trace = 2; | ||
5001 | - return 1; | ||
5002 | -} | ||
5003 | -__setup("call_trace=", call_trace_setup); | ||
5004 | -#endif | ||
5005 | --- a/arch/x86/kernel/traps_64-xen.c | ||
5006 | +++ b/arch/x86/kernel/traps_64-xen.c | ||
5007 | @@ -30,9 +30,10 @@ | ||
5008 | #include <linux/kprobes.h> | ||
5009 | #include <linux/kexec.h> | ||
5010 | #include <linux/unwind.h> | ||
5011 | +#include <linux/uaccess.h> | ||
5012 | +#include <linux/bug.h> | ||
5013 | |||
5014 | #include <asm/system.h> | ||
5015 | -#include <asm/uaccess.h> | ||
5016 | #include <asm/io.h> | ||
5017 | #include <asm/atomic.h> | ||
5018 | #include <asm/debugreg.h> | ||
5019 | @@ -108,12 +109,7 @@ | ||
5020 | preempt_enable_no_resched(); | ||
5021 | } | ||
5022 | |||
5023 | -static int kstack_depth_to_print = 12; | ||
5024 | -#ifdef CONFIG_STACK_UNWIND | ||
5025 | -static int call_trace = 1; | ||
5026 | -#else | ||
5027 | -#define call_trace (-1) | ||
5028 | -#endif | ||
5029 | +int kstack_depth_to_print = 12; | ||
5030 | |||
5031 | #ifdef CONFIG_KALLSYMS | ||
5032 | void printk_address(unsigned long address) | ||
5033 | @@ -218,24 +214,7 @@ | ||
5034 | return NULL; | ||
5035 | } | ||
5036 | |||
5037 | -struct ops_and_data { | ||
5038 | - struct stacktrace_ops *ops; | ||
5039 | - void *data; | ||
5040 | -}; | ||
5041 | - | ||
5042 | -static int dump_trace_unwind(struct unwind_frame_info *info, void *context) | ||
5043 | -{ | ||
5044 | - struct ops_and_data *oad = (struct ops_and_data *)context; | ||
5045 | - int n = 0; | ||
5046 | - | ||
5047 | - while (unwind(info) == 0 && UNW_PC(info)) { | ||
5048 | - n++; | ||
5049 | - oad->ops->address(oad->data, UNW_PC(info)); | ||
5050 | - if (arch_unw_user_mode(info)) | ||
5051 | - break; | ||
5052 | - } | ||
5053 | - return n; | ||
5054 | -} | ||
5055 | +#define MSG(txt) ops->warning(data, txt) | ||
5056 | |||
5057 | /* | ||
5058 | * x86-64 can have upto three kernel stacks: | ||
5059 | @@ -250,61 +229,24 @@ | ||
5060 | return p > t && p < t + THREAD_SIZE - 3; | ||
5061 | } | ||
5062 | |||
5063 | -void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack, | ||
5064 | +void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | ||
5065 | + unsigned long *stack, | ||
5066 | struct stacktrace_ops *ops, void *data) | ||
5067 | { | ||
5068 | - const unsigned cpu = smp_processor_id(); | ||
5069 | - unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; | ||
5070 | + const unsigned cpu = get_cpu(); | ||
5071 | + unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; | ||
5072 | unsigned used = 0; | ||
5073 | struct thread_info *tinfo; | ||
5074 | |||
5075 | if (!tsk) | ||
5076 | tsk = current; | ||
5077 | |||
5078 | - if (call_trace >= 0) { | ||
5079 | - int unw_ret = 0; | ||
5080 | - struct unwind_frame_info info; | ||
5081 | - struct ops_and_data oad = { .ops = ops, .data = data }; | ||
5082 | - | ||
5083 | - if (regs) { | ||
5084 | - if (unwind_init_frame_info(&info, tsk, regs) == 0) | ||
5085 | - unw_ret = dump_trace_unwind(&info, &oad); | ||
5086 | - } else if (tsk == current) | ||
5087 | - unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); | ||
5088 | - else { | ||
5089 | - if (unwind_init_blocked(&info, tsk) == 0) | ||
5090 | - unw_ret = dump_trace_unwind(&info, &oad); | ||
5091 | - } | ||
5092 | - if (unw_ret > 0) { | ||
5093 | - if (call_trace == 1 && !arch_unw_user_mode(&info)) { | ||
5094 | - ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", | ||
5095 | - UNW_PC(&info)); | ||
5096 | - if ((long)UNW_SP(&info) < 0) { | ||
5097 | - ops->warning(data, "Leftover inexact backtrace:\n"); | ||
5098 | - stack = (unsigned long *)UNW_SP(&info); | ||
5099 | - if (!stack) | ||
5100 | - return; | ||
5101 | - } else | ||
5102 | - ops->warning(data, "Full inexact backtrace again:\n"); | ||
5103 | - } else if (call_trace >= 1) | ||
5104 | - return; | ||
5105 | - else | ||
5106 | - ops->warning(data, "Full inexact backtrace again:\n"); | ||
5107 | - } else | ||
5108 | - ops->warning(data, "Inexact backtrace:\n"); | ||
5109 | - } | ||
5110 | if (!stack) { | ||
5111 | unsigned long dummy; | ||
5112 | stack = &dummy; | ||
5113 | if (tsk && tsk != current) | ||
5114 | stack = (unsigned long *)tsk->thread.rsp; | ||
5115 | } | ||
5116 | - /* | ||
5117 | - * Align the stack pointer on word boundary, later loops | ||
5118 | - * rely on that (and corruption / debug info bugs can cause | ||
5119 | - * unaligned values here): | ||
5120 | - */ | ||
5121 | - stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1)); | ||
5122 | |||
5123 | /* | ||
5124 | * Print function call entries within a stack. 'cond' is the | ||
5125 | @@ -314,9 +256,9 @@ | ||
5126 | #define HANDLE_STACK(cond) \ | ||
5127 | do while (cond) { \ | ||
5128 | unsigned long addr = *stack++; \ | ||
5129 | - if (oops_in_progress ? \ | ||
5130 | - __kernel_text_address(addr) : \ | ||
5131 | - kernel_text_address(addr)) { \ | ||
5132 | + /* Use unlocked access here because except for NMIs \ | ||
5133 | + we should be already protected against module unloads */ \ | ||
5134 | + if (__kernel_text_address(addr)) { \ | ||
5135 | /* \ | ||
5136 | * If the address is either in the text segment of the \ | ||
5137 | * kernel, or in the region which contains vmalloc'ed \ | ||
5138 | @@ -379,9 +321,10 @@ | ||
5139 | /* | ||
5140 | * This handles the process stack: | ||
5141 | */ | ||
5142 | - tinfo = current_thread_info(); | ||
5143 | + tinfo = task_thread_info(tsk); | ||
5144 | HANDLE_STACK (valid_stack_ptr(tinfo, stack)); | ||
5145 | #undef HANDLE_STACK | ||
5146 | + put_cpu(); | ||
5147 | } | ||
5148 | EXPORT_SYMBOL(dump_trace); | ||
5149 | |||
5150 | @@ -518,30 +461,15 @@ | ||
5151 | printk("\n"); | ||
5152 | } | ||
5153 | |||
5154 | -void handle_BUG(struct pt_regs *regs) | ||
5155 | -{ | ||
5156 | - struct bug_frame f; | ||
5157 | - long len; | ||
5158 | - const char *prefix = ""; | ||
5159 | +int is_valid_bugaddr(unsigned long rip) | ||
5160 | +{ | ||
5161 | + unsigned short ud2; | ||
5162 | |||
5163 | - if (user_mode(regs)) | ||
5164 | - return; | ||
5165 | - if (__copy_from_user(&f, (const void __user *) regs->rip, | ||
5166 | - sizeof(struct bug_frame))) | ||
5167 | - return; | ||
5168 | - if (f.filename >= 0 || | ||
5169 | - f.ud2[0] != 0x0f || f.ud2[1] != 0x0b) | ||
5170 | - return; | ||
5171 | - len = __strnlen_user((char *)(long)f.filename, PATH_MAX) - 1; | ||
5172 | - if (len < 0 || len >= PATH_MAX) | ||
5173 | - f.filename = (int)(long)"unmapped filename"; | ||
5174 | - else if (len > 50) { | ||
5175 | - f.filename += len - 50; | ||
5176 | - prefix = "..."; | ||
5177 | - } | ||
5178 | - printk("----------- [cut here ] --------- [please bite here ] ---------\n"); | ||
5179 | - printk(KERN_ALERT "Kernel BUG at %s%.50s:%d\n", prefix, (char *)(long)f.filename, f.line); | ||
5180 | -} | ||
5181 | + if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2))) | ||
5182 | + return 0; | ||
5183 | + | ||
5184 | + return ud2 == 0x0b0f; | ||
5185 | +} | ||
5186 | |||
5187 | #ifdef CONFIG_BUG | ||
5188 | void out_of_line_bug(void) | ||
5189 | @@ -621,7 +549,9 @@ | ||
5190 | { | ||
5191 | unsigned long flags = oops_begin(); | ||
5192 | |||
5193 | - handle_BUG(regs); | ||
5194 | + if (!user_mode(regs)) | ||
5195 | + report_bug(regs->rip); | ||
5196 | + | ||
5197 | __die(str, regs, err); | ||
5198 | oops_end(flags); | ||
5199 | do_exit(SIGSEGV); | ||
5200 | @@ -790,8 +720,7 @@ | ||
5201 | { | ||
5202 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", | ||
5203 | reason); | ||
5204 | - printk(KERN_EMERG "You probably have a hardware problem with your " | ||
5205 | - "RAM chips\n"); | ||
5206 | + printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); | ||
5207 | |||
5208 | if (panic_on_unrecovered_nmi) | ||
5209 | panic("NMI: Not continuing"); | ||
5210 | @@ -1227,21 +1156,3 @@ | ||
5211 | return 0; | ||
5212 | } | ||
5213 | early_param("kstack", kstack_setup); | ||
5214 | - | ||
5215 | -#ifdef CONFIG_STACK_UNWIND | ||
5216 | -static int __init call_trace_setup(char *s) | ||
5217 | -{ | ||
5218 | - if (!s) | ||
5219 | - return -EINVAL; | ||
5220 | - if (strcmp(s, "old") == 0) | ||
5221 | - call_trace = -1; | ||
5222 | - else if (strcmp(s, "both") == 0) | ||
5223 | - call_trace = 0; | ||
5224 | - else if (strcmp(s, "newfallback") == 0) | ||
5225 | - call_trace = 1; | ||
5226 | - else if (strcmp(s, "new") == 0) | ||
5227 | - call_trace = 2; | ||
5228 | - return 0; | ||
5229 | -} | ||
5230 | -early_param("call_trace", call_trace_setup); | ||
5231 | -#endif | ||
5232 | --- a/arch/x86/kernel/vmlinux_32.lds.S | ||
5233 | +++ b/arch/x86/kernel/vmlinux_32.lds.S | ||
5234 | @@ -29,6 +29,12 @@ | ||
5235 | SECTIONS | ||
5236 | { | ||
5237 | . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; | ||
5238 | + | ||
5239 | +#if defined(CONFIG_XEN) && CONFIG_XEN_COMPAT <= 0x030002 | ||
5240 | +#undef LOAD_OFFSET | ||
5241 | +#define LOAD_OFFSET 0 | ||
5242 | +#endif | ||
5243 | + | ||
5244 | phys_startup_32 = startup_32 - LOAD_OFFSET; | ||
5245 | |||
5246 | .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { | ||
5247 | --- a/arch/x86/kernel/vsyscall_64-xen.c | ||
5248 | +++ b/arch/x86/kernel/vsyscall_64-xen.c | ||
5249 | @@ -42,6 +42,7 @@ | ||
5250 | #include <asm/topology.h> | ||
5251 | |||
5252 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | ||
5253 | +#define __syscall_clobber "r11","rcx","memory" | ||
5254 | |||
5255 | int __sysctl_vsyscall __section_sysctl_vsyscall = 1; | ||
5256 | seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; | ||
5257 | @@ -224,8 +225,7 @@ | ||
5258 | |||
5259 | static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen, | ||
5260 | void __user *oldval, size_t __user *oldlenp, | ||
5261 | - void __user *newval, size_t newlen, | ||
5262 | - void **context) | ||
5263 | + void __user *newval, size_t newlen) | ||
5264 | { | ||
5265 | return -ENOSYS; | ||
5266 | } | ||
5267 | @@ -277,7 +277,6 @@ | ||
5268 | vsyscall_set_cpu(raw_smp_processor_id()); | ||
5269 | } | ||
5270 | |||
5271 | -#ifdef CONFIG_HOTPLUG_CPU | ||
5272 | static int __cpuinit | ||
5273 | cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) | ||
5274 | { | ||
5275 | @@ -286,13 +285,13 @@ | ||
5276 | smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); | ||
5277 | return NOTIFY_DONE; | ||
5278 | } | ||
5279 | -#endif | ||
5280 | |||
5281 | static void __init map_vsyscall(void) | ||
5282 | { | ||
5283 | extern char __vsyscall_0; | ||
5284 | unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); | ||
5285 | |||
5286 | + /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ | ||
5287 | __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); | ||
5288 | } | ||
5289 | |||
5290 | --- a/arch/x86/kvm/Kconfig | ||
5291 | +++ b/arch/x86/kvm/Kconfig | ||
5292 | @@ -7,6 +7,7 @@ | ||
5293 | menuconfig VIRTUALIZATION | ||
5294 | bool "Virtualization" | ||
5295 | depends on HAVE_KVM || X86 | ||
5296 | + depends on !XEN | ||
5297 | default y | ||
5298 | ---help--- | ||
5299 | Say Y here to get to see options for using your Linux host to run other | ||
5300 | --- a/arch/x86/mm/fault_32-xen.c | ||
5301 | +++ b/arch/x86/mm/fault_32-xen.c | ||
5302 | @@ -22,9 +22,9 @@ | ||
5303 | #include <linux/highmem.h> | ||
5304 | #include <linux/module.h> | ||
5305 | #include <linux/kprobes.h> | ||
5306 | +#include <linux/uaccess.h> | ||
5307 | |||
5308 | #include <asm/system.h> | ||
5309 | -#include <asm/uaccess.h> | ||
5310 | #include <asm/desc.h> | ||
5311 | #include <asm/kdebug.h> | ||
5312 | #include <asm/segment.h> | ||
5313 | @@ -167,7 +167,7 @@ | ||
5314 | static int __is_prefetch(struct pt_regs *regs, unsigned long addr) | ||
5315 | { | ||
5316 | unsigned long limit; | ||
5317 | - unsigned long instr = get_segment_eip (regs, &limit); | ||
5318 | + unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit); | ||
5319 | int scan_more = 1; | ||
5320 | int prefetch = 0; | ||
5321 | int i; | ||
5322 | @@ -177,9 +177,9 @@ | ||
5323 | unsigned char instr_hi; | ||
5324 | unsigned char instr_lo; | ||
5325 | |||
5326 | - if (instr > limit) | ||
5327 | + if (instr > (unsigned char *)limit) | ||
5328 | break; | ||
5329 | - if (__get_user(opcode, (unsigned char __user *) instr)) | ||
5330 | + if (probe_kernel_address(instr, opcode)) | ||
5331 | break; | ||
5332 | |||
5333 | instr_hi = opcode & 0xf0; | ||
5334 | @@ -204,9 +204,9 @@ | ||
5335 | case 0x00: | ||
5336 | /* Prefetch instruction is 0x0F0D or 0x0F18 */ | ||
5337 | scan_more = 0; | ||
5338 | - if (instr > limit) | ||
5339 | + if (instr > (unsigned char *)limit) | ||
5340 | break; | ||
5341 | - if (__get_user(opcode, (unsigned char __user *) instr)) | ||
5342 | + if (probe_kernel_address(instr, opcode)) | ||
5343 | break; | ||
5344 | prefetch = (instr_lo == 0xF) && | ||
5345 | (opcode == 0x0D || opcode == 0x18); | ||
5346 | --- a/arch/x86/mm/fault_64-xen.c | ||
5347 | +++ b/arch/x86/mm/fault_64-xen.c | ||
5348 | @@ -23,9 +23,9 @@ | ||
5349 | #include <linux/compiler.h> | ||
5350 | #include <linux/module.h> | ||
5351 | #include <linux/kprobes.h> | ||
5352 | +#include <linux/uaccess.h> | ||
5353 | |||
5354 | #include <asm/system.h> | ||
5355 | -#include <asm/uaccess.h> | ||
5356 | #include <asm/pgalloc.h> | ||
5357 | #include <asm/smp.h> | ||
5358 | #include <asm/tlbflush.h> | ||
5359 | @@ -96,7 +96,7 @@ | ||
5360 | static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, | ||
5361 | unsigned long error_code) | ||
5362 | { | ||
5363 | - unsigned char __user *instr; | ||
5364 | + unsigned char *instr; | ||
5365 | int scan_more = 1; | ||
5366 | int prefetch = 0; | ||
5367 | unsigned char *max_instr; | ||
5368 | @@ -116,7 +116,7 @@ | ||
5369 | unsigned char instr_hi; | ||
5370 | unsigned char instr_lo; | ||
5371 | |||
5372 | - if (__get_user(opcode, (char __user *)instr)) | ||
5373 | + if (probe_kernel_address(instr, opcode)) | ||
5374 | break; | ||
5375 | |||
5376 | instr_hi = opcode & 0xf0; | ||
5377 | @@ -154,7 +154,7 @@ | ||
5378 | case 0x00: | ||
5379 | /* Prefetch instruction is 0x0F0D or 0x0F18 */ | ||
5380 | scan_more = 0; | ||
5381 | - if (__get_user(opcode, (char __user *)instr)) | ||
5382 | + if (probe_kernel_address(instr, opcode)) | ||
5383 | break; | ||
5384 | prefetch = (instr_lo == 0xF) && | ||
5385 | (opcode == 0x0D || opcode == 0x18); | ||
5386 | @@ -170,7 +170,7 @@ | ||
5387 | static int bad_address(void *p) | ||
5388 | { | ||
5389 | unsigned long dummy; | ||
5390 | - return __get_user(dummy, (unsigned long __user *)p); | ||
5391 | + return probe_kernel_address((unsigned long *)p, dummy); | ||
5392 | } | ||
5393 | |||
5394 | void dump_pagetable(unsigned long address) | ||
5395 | --- a/arch/x86/mm/highmem_32-xen.c | ||
5396 | +++ b/arch/x86/mm/highmem_32-xen.c | ||
5397 | @@ -32,7 +32,7 @@ | ||
5398 | unsigned long vaddr; | ||
5399 | |||
5400 | /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ | ||
5401 | - inc_preempt_count(); | ||
5402 | + pagefault_disable(); | ||
5403 | if (!PageHighMem(page)) | ||
5404 | return page_address(page); | ||
5405 | |||
5406 | @@ -63,26 +63,22 @@ | ||
5407 | unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; | ||
5408 | enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); | ||
5409 | |||
5410 | -#ifdef CONFIG_DEBUG_HIGHMEM | ||
5411 | - if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) { | ||
5412 | - dec_preempt_count(); | ||
5413 | - preempt_check_resched(); | ||
5414 | - return; | ||
5415 | - } | ||
5416 | - | ||
5417 | - if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) | ||
5418 | - BUG(); | ||
5419 | -#endif | ||
5420 | /* | ||
5421 | * Force other mappings to Oops if they'll try to access this pte | ||
5422 | * without first remap it. Keeping stale mappings around is a bad idea | ||
5423 | * also, in case the page changes cacheability attributes or becomes | ||
5424 | * a protected page in a hypervisor. | ||
5425 | */ | ||
5426 | - kpte_clear_flush(kmap_pte-idx, vaddr); | ||
5427 | + if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) | ||
5428 | + kpte_clear_flush(kmap_pte-idx, vaddr); | ||
5429 | + else { | ||
5430 | +#ifdef CONFIG_DEBUG_HIGHMEM | ||
5431 | + BUG_ON(vaddr < PAGE_OFFSET); | ||
5432 | + BUG_ON(vaddr >= (unsigned long)high_memory); | ||
5433 | +#endif | ||
5434 | + } | ||
5435 | |||
5436 | - dec_preempt_count(); | ||
5437 | - preempt_check_resched(); | ||
5438 | + pagefault_enable(); | ||
5439 | } | ||
5440 | |||
5441 | /* This is the same as kmap_atomic() but can map memory that doesn't | ||
5442 | @@ -93,7 +89,7 @@ | ||
5443 | enum fixed_addresses idx; | ||
5444 | unsigned long vaddr; | ||
5445 | |||
5446 | - inc_preempt_count(); | ||
5447 | + pagefault_disable(); | ||
5448 | |||
5449 | idx = type + KM_TYPE_NR*smp_processor_id(); | ||
5450 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | ||
5451 | --- a/arch/x86/mm/init_32-xen.c | ||
5452 | +++ b/arch/x86/mm/init_32-xen.c | ||
5453 | @@ -235,8 +235,6 @@ | ||
5454 | |||
5455 | #endif | ||
5456 | |||
5457 | -extern int is_available_memory(efi_memory_desc_t *); | ||
5458 | - | ||
5459 | int page_is_ram(unsigned long pagenr) | ||
5460 | { | ||
5461 | int i; | ||
5462 | @@ -329,7 +327,7 @@ | ||
5463 | SetPageReserved(page); | ||
5464 | } | ||
5465 | |||
5466 | -static int add_one_highpage_hotplug(struct page *page, unsigned long pfn) | ||
5467 | +static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long pfn) | ||
5468 | { | ||
5469 | free_new_highpage(page, pfn); | ||
5470 | totalram_pages++; | ||
5471 | @@ -346,7 +344,7 @@ | ||
5472 | * has been added dynamically that would be | ||
5473 | * onlined here is in HIGHMEM | ||
5474 | */ | ||
5475 | -void online_page(struct page *page) | ||
5476 | +void __meminit online_page(struct page *page) | ||
5477 | { | ||
5478 | ClearPageReserved(page); | ||
5479 | add_one_highpage_hotplug(page, page_to_pfn(page)); | ||
5480 | @@ -739,16 +737,10 @@ | ||
5481 | set_bit(PG_pinned, &virt_to_page(init_mm.pgd)->flags); | ||
5482 | } | ||
5483 | |||
5484 | -/* | ||
5485 | - * this is for the non-NUMA, single node SMP system case. | ||
5486 | - * Specifically, in the case of x86, we will always add | ||
5487 | - * memory to the highmem for now. | ||
5488 | - */ | ||
5489 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
5490 | -#ifndef CONFIG_NEED_MULTIPLE_NODES | ||
5491 | int arch_add_memory(int nid, u64 start, u64 size) | ||
5492 | { | ||
5493 | - struct pglist_data *pgdata = &contig_page_data; | ||
5494 | + struct pglist_data *pgdata = NODE_DATA(nid); | ||
5495 | struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; | ||
5496 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
5497 | unsigned long nr_pages = size >> PAGE_SHIFT; | ||
5498 | @@ -760,11 +752,11 @@ | ||
5499 | { | ||
5500 | return -EINVAL; | ||
5501 | } | ||
5502 | -#endif | ||
5503 | +EXPORT_SYMBOL_GPL(remove_memory); | ||
5504 | #endif | ||
5505 | |||
5506 | -kmem_cache_t *pgd_cache; | ||
5507 | -kmem_cache_t *pmd_cache; | ||
5508 | +struct kmem_cache *pgd_cache; | ||
5509 | +struct kmem_cache *pmd_cache; | ||
5510 | |||
5511 | void __init pgtable_cache_init(void) | ||
5512 | { | ||
5513 | --- a/arch/x86/mm/init_64-xen.c | ||
5514 | +++ b/arch/x86/mm/init_64-xen.c | ||
5515 | @@ -1130,14 +1130,15 @@ | ||
5516 | __initcall(x8664_sysctl_init); | ||
5517 | #endif | ||
5518 | |||
5519 | -/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only | ||
5520 | +/* A pseudo VMA to allow ptrace access for the vsyscall page. This only | ||
5521 | covers the 64bit vsyscall page now. 32bit has a real VMA now and does | ||
5522 | not need special handling anymore. */ | ||
5523 | |||
5524 | static struct vm_area_struct gate_vma = { | ||
5525 | .vm_start = VSYSCALL_START, | ||
5526 | - .vm_end = VSYSCALL_END, | ||
5527 | - .vm_page_prot = PAGE_READONLY | ||
5528 | + .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT), | ||
5529 | + .vm_page_prot = PAGE_READONLY_EXEC, | ||
5530 | + .vm_flags = VM_READ | VM_EXEC | ||
5531 | }; | ||
5532 | |||
5533 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) | ||
5534 | --- a/arch/x86/mm/pageattr_64-xen.c | ||
5535 | +++ b/arch/x86/mm/pageattr_64-xen.c | ||
5536 | @@ -324,34 +324,40 @@ | ||
5537 | return base; | ||
5538 | } | ||
5539 | |||
5540 | - | ||
5541 | -static void flush_kernel_map(void *address) | ||
5542 | +static void cache_flush_page(void *adr) | ||
5543 | { | ||
5544 | - if (0 && address && cpu_has_clflush) { | ||
5545 | - /* is this worth it? */ | ||
5546 | - int i; | ||
5547 | - for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) | ||
5548 | - asm volatile("clflush (%0)" :: "r" (address + i)); | ||
5549 | - } else | ||
5550 | - asm volatile("wbinvd":::"memory"); | ||
5551 | - if (address) | ||
5552 | - __flush_tlb_one(address); | ||
5553 | - else | ||
5554 | - __flush_tlb_all(); | ||
5555 | + int i; | ||
5556 | + for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) | ||
5557 | + asm volatile("clflush (%0)" :: "r" (adr + i)); | ||
5558 | } | ||
5559 | |||
5560 | +static void flush_kernel_map(void *arg) | ||
5561 | +{ | ||
5562 | + struct list_head *l = (struct list_head *)arg; | ||
5563 | + struct page *pg; | ||
5564 | |||
5565 | -static inline void flush_map(unsigned long address) | ||
5566 | + /* When clflush is available always use it because it is | ||
5567 | + much cheaper than WBINVD */ | ||
5568 | + if (!cpu_has_clflush) | ||
5569 | + asm volatile("wbinvd" ::: "memory"); | ||
5570 | + list_for_each_entry(pg, l, lru) { | ||
5571 | + void *adr = page_address(pg); | ||
5572 | + if (cpu_has_clflush) | ||
5573 | + cache_flush_page(adr); | ||
5574 | + __flush_tlb_one(adr); | ||
5575 | + } | ||
5576 | +} | ||
5577 | + | ||
5578 | +static inline void flush_map(struct list_head *l) | ||
5579 | { | ||
5580 | - on_each_cpu(flush_kernel_map, (void *)address, 1, 1); | ||
5581 | + on_each_cpu(flush_kernel_map, l, 1, 1); | ||
5582 | } | ||
5583 | |||
5584 | -static struct page *deferred_pages; /* protected by init_mm.mmap_sem */ | ||
5585 | +static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */ | ||
5586 | |||
5587 | static inline void save_page(struct page *fpage) | ||
5588 | { | ||
5589 | - fpage->lru.next = (struct list_head *)deferred_pages; | ||
5590 | - deferred_pages = fpage; | ||
5591 | + list_add(&fpage->lru, &deferred_pages); | ||
5592 | } | ||
5593 | |||
5594 | /* | ||
5595 | @@ -481,18 +487,18 @@ | ||
5596 | |||
5597 | void global_flush_tlb(void) | ||
5598 | { | ||
5599 | - struct page *dpage; | ||
5600 | + struct page *pg, *next; | ||
5601 | + struct list_head l; | ||
5602 | |||
5603 | down_read(&init_mm.mmap_sem); | ||
5604 | - dpage = xchg(&deferred_pages, NULL); | ||
5605 | + list_replace_init(&deferred_pages, &l); | ||
5606 | up_read(&init_mm.mmap_sem); | ||
5607 | |||
5608 | - flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0); | ||
5609 | - while (dpage) { | ||
5610 | - struct page *tmp = dpage; | ||
5611 | - dpage = (struct page *)dpage->lru.next; | ||
5612 | - ClearPagePrivate(tmp); | ||
5613 | - __free_page(tmp); | ||
5614 | + flush_map(&l); | ||
5615 | + | ||
5616 | + list_for_each_entry_safe(pg, next, &l, lru) { | ||
5617 | + ClearPagePrivate(pg); | ||
5618 | + __free_page(pg); | ||
5619 | } | ||
5620 | } | ||
5621 | |||
5622 | --- a/arch/x86/mm/pgtable_32-xen.c | ||
5623 | +++ b/arch/x86/mm/pgtable_32-xen.c | ||
5624 | @@ -197,7 +197,7 @@ | ||
5625 | __free_page(pte); | ||
5626 | } | ||
5627 | |||
5628 | -void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags) | ||
5629 | +void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags) | ||
5630 | { | ||
5631 | memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); | ||
5632 | } | ||
5633 | @@ -237,7 +237,7 @@ | ||
5634 | set_page_private(next, (unsigned long)pprev); | ||
5635 | } | ||
5636 | |||
5637 | -void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) | ||
5638 | +void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) | ||
5639 | { | ||
5640 | unsigned long flags; | ||
5641 | |||
5642 | @@ -258,7 +258,7 @@ | ||
5643 | } | ||
5644 | |||
5645 | /* never called when PTRS_PER_PMD > 1 */ | ||
5646 | -void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused) | ||
5647 | +void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) | ||
5648 | { | ||
5649 | unsigned long flags; /* can be called from interrupt context */ | ||
5650 | |||
5651 | --- a/arch/x86/pci/irq-xen.c | ||
5652 | +++ b/arch/x86/pci/irq-xen.c | ||
5653 | @@ -768,7 +768,7 @@ | ||
5654 | DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", | ||
5655 | rt->rtr_vendor, rt->rtr_device); | ||
5656 | |||
5657 | - pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn); | ||
5658 | + pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn); | ||
5659 | if (!pirq_router_dev) { | ||
5660 | DBG(KERN_DEBUG "PCI: Interrupt router not found at " | ||
5661 | "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn); | ||
5662 | @@ -788,6 +788,8 @@ | ||
5663 | pirq_router_dev->vendor, | ||
5664 | pirq_router_dev->device, | ||
5665 | pci_name(pirq_router_dev)); | ||
5666 | + | ||
5667 | + /* The device remains referenced for the kernel lifetime */ | ||
5668 | } | ||
5669 | |||
5670 | static struct irq_info *pirq_get_info(struct pci_dev *dev) | ||
5671 | --- a/drivers/xen/balloon/balloon.c | ||
5672 | +++ b/drivers/xen/balloon/balloon.c | ||
5673 | @@ -97,8 +97,8 @@ | ||
5674 | static LIST_HEAD(ballooned_pages); | ||
5675 | |||
5676 | /* Main work function, always executed in process context. */ | ||
5677 | -static void balloon_process(void *unused); | ||
5678 | -static DECLARE_WORK(balloon_worker, balloon_process, NULL); | ||
5679 | +static void balloon_process(struct work_struct *unused); | ||
5680 | +static DECLARE_WORK(balloon_worker, balloon_process); | ||
5681 | static struct timer_list balloon_timer; | ||
5682 | |||
5683 | /* When ballooning out (allocating memory to return to Xen) we don't really | ||
5684 | @@ -387,7 +387,7 @@ | ||
5685 | * by the balloon lock), or with changes to the Xen hard limit, but we will | ||
5686 | * recover from these in time. | ||
5687 | */ | ||
5688 | -static void balloon_process(void *unused) | ||
5689 | +static void balloon_process(struct work_struct *unused) | ||
5690 | { | ||
5691 | int need_sleep = 0; | ||
5692 | long credit; | ||
5693 | --- a/drivers/xen/blkback/blkback.c | ||
5694 | +++ b/drivers/xen/blkback/blkback.c | ||
5695 | @@ -37,6 +37,7 @@ | ||
5696 | |||
5697 | #include <linux/spinlock.h> | ||
5698 | #include <linux/kthread.h> | ||
5699 | +#include <linux/freezer.h> | ||
5700 | #include <linux/list.h> | ||
5701 | #include <linux/delay.h> | ||
5702 | #include <xen/balloon.h> | ||
5703 | --- a/drivers/xen/blkback/interface.c | ||
5704 | +++ b/drivers/xen/blkback/interface.c | ||
5705 | @@ -34,7 +34,7 @@ | ||
5706 | #include <xen/evtchn.h> | ||
5707 | #include <linux/kthread.h> | ||
5708 | |||
5709 | -static kmem_cache_t *blkif_cachep; | ||
5710 | +static struct kmem_cache *blkif_cachep; | ||
5711 | |||
5712 | blkif_t *blkif_alloc(domid_t domid) | ||
5713 | { | ||
5714 | --- a/drivers/xen/blkfront/blkfront.c | ||
5715 | +++ b/drivers/xen/blkfront/blkfront.c | ||
5716 | @@ -70,7 +70,7 @@ | ||
5717 | static void kick_pending_request_queues(struct blkfront_info *); | ||
5718 | |||
5719 | static irqreturn_t blkif_int(int irq, void *dev_id); | ||
5720 | -static void blkif_restart_queue(void *arg); | ||
5721 | +static void blkif_restart_queue(struct work_struct *arg); | ||
5722 | static void blkif_recover(struct blkfront_info *); | ||
5723 | static void blkif_completion(struct blk_shadow *); | ||
5724 | static void blkif_free(struct blkfront_info *, int); | ||
5725 | @@ -105,7 +105,7 @@ | ||
5726 | info->xbdev = dev; | ||
5727 | info->vdevice = vdevice; | ||
5728 | info->connected = BLKIF_STATE_DISCONNECTED; | ||
5729 | - INIT_WORK(&info->work, blkif_restart_queue, (void *)info); | ||
5730 | + INIT_WORK(&info->work, blkif_restart_queue); | ||
5731 | |||
5732 | for (i = 0; i < BLK_RING_SIZE; i++) | ||
5733 | info->shadow[i].req.id = i+1; | ||
5734 | @@ -445,9 +445,9 @@ | ||
5735 | } | ||
5736 | } | ||
5737 | |||
5738 | -static void blkif_restart_queue(void *arg) | ||
5739 | +static void blkif_restart_queue(struct work_struct *arg) | ||
5740 | { | ||
5741 | - struct blkfront_info *info = (struct blkfront_info *)arg; | ||
5742 | + struct blkfront_info *info = container_of(arg, struct blkfront_info, work); | ||
5743 | spin_lock_irq(&blkif_io_lock); | ||
5744 | if (info->connected == BLKIF_STATE_CONNECTED) | ||
5745 | kick_pending_request_queues(info); | ||
5746 | --- a/drivers/xen/blktap/blktap.c | ||
5747 | +++ b/drivers/xen/blktap/blktap.c | ||
5748 | @@ -40,6 +40,7 @@ | ||
5749 | |||
5750 | #include <linux/spinlock.h> | ||
5751 | #include <linux/kthread.h> | ||
5752 | +#include <linux/freezer.h> | ||
5753 | #include <linux/list.h> | ||
5754 | #include <asm/hypervisor.h> | ||
5755 | #include "common.h" | ||
5756 | --- a/drivers/xen/blktap/interface.c | ||
5757 | +++ b/drivers/xen/blktap/interface.c | ||
5758 | @@ -34,7 +34,7 @@ | ||
5759 | #include "common.h" | ||
5760 | #include <xen/evtchn.h> | ||
5761 | |||
5762 | -static kmem_cache_t *blkif_cachep; | ||
5763 | +static struct kmem_cache *blkif_cachep; | ||
5764 | |||
5765 | blkif_t *tap_alloc_blkif(domid_t domid) | ||
5766 | { | ||
5767 | --- a/drivers/xen/char/mem.c | ||
5768 | +++ b/drivers/xen/char/mem.c | ||
5769 | @@ -157,7 +157,7 @@ | ||
5770 | { | ||
5771 | loff_t ret; | ||
5772 | |||
5773 | - mutex_lock(&file->f_dentry->d_inode->i_mutex); | ||
5774 | + mutex_lock(&file->f_path.dentry->d_inode->i_mutex); | ||
5775 | switch (orig) { | ||
5776 | case 0: | ||
5777 | file->f_pos = offset; | ||
5778 | @@ -172,7 +172,7 @@ | ||
5779 | default: | ||
5780 | ret = -EINVAL; | ||
5781 | } | ||
5782 | - mutex_unlock(&file->f_dentry->d_inode->i_mutex); | ||
5783 | + mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); | ||
5784 | return ret; | ||
5785 | } | ||
5786 | |||
5787 | --- a/drivers/xen/console/console.c | ||
5788 | +++ b/drivers/xen/console/console.c | ||
5789 | @@ -80,11 +80,6 @@ | ||
5790 | #define XEN_XVC_MAJOR 204 | ||
5791 | #define XEN_XVC_MINOR 191 | ||
5792 | |||
5793 | -#ifdef CONFIG_MAGIC_SYSRQ | ||
5794 | -static unsigned long sysrq_requested; | ||
5795 | -extern int sysrq_enabled; | ||
5796 | -#endif | ||
5797 | - | ||
5798 | static int __init xencons_setup(char *str) | ||
5799 | { | ||
5800 | char *q; | ||
5801 | @@ -339,8 +334,8 @@ | ||
5802 | #define DUMMY_TTY(_tty) ((xc_mode == XC_TTY) && \ | ||
5803 | ((_tty)->index != (xc_num - 1))) | ||
5804 | |||
5805 | -static struct termios *xencons_termios[MAX_NR_CONSOLES]; | ||
5806 | -static struct termios *xencons_termios_locked[MAX_NR_CONSOLES]; | ||
5807 | +static struct ktermios *xencons_termios[MAX_NR_CONSOLES]; | ||
5808 | +static struct ktermios *xencons_termios_locked[MAX_NR_CONSOLES]; | ||
5809 | static struct tty_struct *xencons_tty; | ||
5810 | static int xencons_priv_irq; | ||
5811 | static char x_char; | ||
5812 | @@ -356,7 +351,9 @@ | ||
5813 | |||
5814 | for (i = 0; i < len; i++) { | ||
5815 | #ifdef CONFIG_MAGIC_SYSRQ | ||
5816 | - if (sysrq_enabled) { | ||
5817 | + if (sysrq_on()) { | ||
5818 | + static unsigned long sysrq_requested; | ||
5819 | + | ||
5820 | if (buf[i] == '\x0f') { /* ^O */ | ||
5821 | if (!sysrq_requested) { | ||
5822 | sysrq_requested = jiffies; | ||
5823 | --- a/drivers/xen/core/reboot.c | ||
5824 | +++ b/drivers/xen/core/reboot.c | ||
5825 | @@ -30,8 +30,8 @@ | ||
5826 | /* Can we leave APs online when we suspend? */ | ||
5827 | static int fast_suspend; | ||
5828 | |||
5829 | -static void __shutdown_handler(void *unused); | ||
5830 | -static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL); | ||
5831 | +static void __shutdown_handler(struct work_struct *unused); | ||
5832 | +static DECLARE_DELAYED_WORK(shutdown_work, __shutdown_handler); | ||
5833 | |||
5834 | int __xen_suspend(int fast_suspend, void (*resume_notifier)(void)); | ||
5835 | |||
5836 | @@ -96,7 +96,7 @@ | ||
5837 | case SHUTDOWN_RESUMING: | ||
5838 | break; | ||
5839 | default: | ||
5840 | - schedule_work(&shutdown_work); | ||
5841 | + schedule_delayed_work(&shutdown_work, 0); | ||
5842 | break; | ||
5843 | } | ||
5844 | |||
5845 | @@ -108,7 +108,7 @@ | ||
5846 | return 0; | ||
5847 | } | ||
5848 | |||
5849 | -static void __shutdown_handler(void *unused) | ||
5850 | +static void __shutdown_handler(struct work_struct *unused) | ||
5851 | { | ||
5852 | int err; | ||
5853 | |||
5854 | @@ -169,7 +169,7 @@ | ||
5855 | if (new_state != SHUTDOWN_INVALID) { | ||
5856 | old_state = xchg(&shutting_down, new_state); | ||
5857 | if (old_state == SHUTDOWN_INVALID) | ||
5858 | - schedule_work(&shutdown_work); | ||
5859 | + schedule_delayed_work(&shutdown_work, 0); | ||
5860 | else | ||
5861 | BUG_ON(old_state != SHUTDOWN_RESUMING); | ||
5862 | } | ||
5863 | --- a/drivers/xen/core/smpboot.c | ||
5864 | +++ b/drivers/xen/core/smpboot.c | ||
5865 | @@ -165,7 +165,12 @@ | ||
5866 | |||
5867 | void __cpuinit cpu_bringup(void) | ||
5868 | { | ||
5869 | +#ifdef __i386__ | ||
5870 | + cpu_set_gdt(current_thread_info()->cpu); | ||
5871 | + secondary_cpu_init(); | ||
5872 | +#else | ||
5873 | cpu_init(); | ||
5874 | +#endif | ||
5875 | identify_cpu(cpu_data + smp_processor_id()); | ||
5876 | touch_softlockup_watchdog(); | ||
5877 | preempt_disable(); | ||
5878 | @@ -304,11 +309,12 @@ | ||
5879 | if (cpu == 0) | ||
5880 | continue; | ||
5881 | |||
5882 | + idle = fork_idle(cpu); | ||
5883 | + if (IS_ERR(idle)) | ||
5884 | + panic("failed fork for CPU %d", cpu); | ||
5885 | + | ||
5886 | #ifdef __x86_64__ | ||
5887 | gdt_descr = &cpu_gdt_descr[cpu]; | ||
5888 | -#else | ||
5889 | - gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | ||
5890 | -#endif | ||
5891 | gdt_descr->address = get_zeroed_page(GFP_KERNEL); | ||
5892 | if (unlikely(!gdt_descr->address)) { | ||
5893 | printk(KERN_CRIT "CPU%d failed to allocate GDT\n", | ||
5894 | @@ -317,6 +323,11 @@ | ||
5895 | } | ||
5896 | gdt_descr->size = GDT_SIZE; | ||
5897 | memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE); | ||
5898 | +#else | ||
5899 | + if (unlikely(!init_gdt(cpu, idle))) | ||
5900 | + continue; | ||
5901 | + gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | ||
5902 | +#endif | ||
5903 | make_page_readonly( | ||
5904 | (void *)gdt_descr->address, | ||
5905 | XENFEAT_writable_descriptor_tables); | ||
5906 | @@ -336,10 +347,6 @@ | ||
5907 | cpu_2_logical_apicid[cpu] = apicid; | ||
5908 | x86_cpu_to_apicid[cpu] = apicid; | ||
5909 | |||
5910 | - idle = fork_idle(cpu); | ||
5911 | - if (IS_ERR(idle)) | ||
5912 | - panic("failed fork for CPU %d", cpu); | ||
5913 | - | ||
5914 | #ifdef __x86_64__ | ||
5915 | cpu_pda(cpu)->pcurrent = idle; | ||
5916 | cpu_pda(cpu)->cpunumber = cpu; | ||
5917 | --- a/drivers/xen/fbfront/xenfb.c | ||
5918 | +++ b/drivers/xen/fbfront/xenfb.c | ||
5919 | @@ -25,6 +25,7 @@ | ||
5920 | #include <linux/vmalloc.h> | ||
5921 | #include <linux/mm.h> | ||
5922 | #include <linux/mutex.h> | ||
5923 | +#include <linux/freezer.h> | ||
5924 | #include <asm/hypervisor.h> | ||
5925 | #include <xen/evtchn.h> | ||
5926 | #include <xen/interface/io/fbif.h> | ||
5927 | --- a/drivers/xen/netback/loopback.c | ||
5928 | +++ b/drivers/xen/netback/loopback.c | ||
5929 | @@ -54,6 +54,7 @@ | ||
5930 | #include <net/dst.h> | ||
5931 | #include <net/xfrm.h> /* secpath_reset() */ | ||
5932 | #include <asm/hypervisor.h> /* is_initial_xendomain() */ | ||
5933 | +#include <../net/core/kmap_skb.h> /* k{,un}map_skb_frag() */ | ||
5934 | |||
5935 | static int nloopbacks = -1; | ||
5936 | module_param(nloopbacks, int, 0); | ||
5937 | --- a/drivers/xen/pciback/conf_space_header.c | ||
5938 | +++ b/drivers/xen/pciback/conf_space_header.c | ||
5939 | @@ -22,14 +22,14 @@ | ||
5940 | { | ||
5941 | int err; | ||
5942 | |||
5943 | - if (!dev->is_enabled && is_enable_cmd(value)) { | ||
5944 | + if (!atomic_read(&dev->enable_cnt) && is_enable_cmd(value)) { | ||
5945 | if (unlikely(verbose_request)) | ||
5946 | printk(KERN_DEBUG "pciback: %s: enable\n", | ||
5947 | pci_name(dev)); | ||
5948 | err = pci_enable_device(dev); | ||
5949 | if (err) | ||
5950 | return err; | ||
5951 | - } else if (dev->is_enabled && !is_enable_cmd(value)) { | ||
5952 | + } else if (atomic_read(&dev->enable_cnt) && !is_enable_cmd(value)) { | ||
5953 | if (unlikely(verbose_request)) | ||
5954 | printk(KERN_DEBUG "pciback: %s: disable\n", | ||
5955 | pci_name(dev)); | ||
5956 | --- a/drivers/xen/pciback/pciback.h | ||
5957 | +++ b/drivers/xen/pciback/pciback.h | ||
5958 | @@ -88,7 +88,7 @@ | ||
5959 | |||
5960 | /* Handles events from front-end */ | ||
5961 | irqreturn_t pciback_handle_event(int irq, void *dev_id); | ||
5962 | -void pciback_do_op(void *data); | ||
5963 | +void pciback_do_op(struct work_struct *work); | ||
5964 | |||
5965 | int pciback_xenbus_register(void); | ||
5966 | void pciback_xenbus_unregister(void); | ||
5967 | --- a/drivers/xen/pciback/pciback_ops.c | ||
5968 | +++ b/drivers/xen/pciback/pciback_ops.c | ||
5969 | @@ -25,7 +25,7 @@ | ||
5970 | |||
5971 | pci_write_config_word(dev, PCI_COMMAND, 0); | ||
5972 | |||
5973 | - dev->is_enabled = 0; | ||
5974 | + atomic_set(&dev->enable_cnt, 0); | ||
5975 | dev->is_busmaster = 0; | ||
5976 | } else { | ||
5977 | pci_read_config_word(dev, PCI_COMMAND, &cmd); | ||
5978 | @@ -51,9 +51,9 @@ | ||
5979 | * context because some of the pci_* functions can sleep (mostly due to ACPI | ||
5980 | * use of semaphores). This function is intended to be called from a work | ||
5981 | * queue in process context taking a struct pciback_device as a parameter */ | ||
5982 | -void pciback_do_op(void *data) | ||
5983 | +void pciback_do_op(struct work_struct *work) | ||
5984 | { | ||
5985 | - struct pciback_device *pdev = data; | ||
5986 | + struct pciback_device *pdev = container_of(work, struct pciback_device, op_work); | ||
5987 | struct pci_dev *dev; | ||
5988 | struct xen_pci_op *op = &pdev->sh_info->op; | ||
5989 | |||
5990 | --- a/drivers/xen/pciback/xenbus.c | ||
5991 | +++ b/drivers/xen/pciback/xenbus.c | ||
5992 | @@ -32,7 +32,7 @@ | ||
5993 | pdev->evtchn_irq = INVALID_EVTCHN_IRQ; | ||
5994 | pdev->be_watching = 0; | ||
5995 | |||
5996 | - INIT_WORK(&pdev->op_work, pciback_do_op, pdev); | ||
5997 | + INIT_WORK(&pdev->op_work, pciback_do_op); | ||
5998 | |||
5999 | if (pciback_init_devices(pdev)) { | ||
6000 | kfree(pdev); | ||
6001 | @@ -53,7 +53,6 @@ | ||
6002 | |||
6003 | /* If the driver domain started an op, make sure we complete it or | ||
6004 | * delete it before releasing the shared memory */ | ||
6005 | - cancel_delayed_work(&pdev->op_work); | ||
6006 | flush_scheduled_work(); | ||
6007 | |||
6008 | if (pdev->sh_info) | ||
6009 | --- a/drivers/xen/sfc_netfront/accel_vi.c | ||
6010 | +++ b/drivers/xen/sfc_netfront/accel_vi.c | ||
6011 | @@ -463,7 +463,7 @@ | ||
6012 | |||
6013 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | ||
6014 | /* Set to zero to encourage falcon to work it out for us */ | ||
6015 | - *(u16*)(skb->h.raw + skb->csum) = 0; | ||
6016 | + *(u16*)(skb->h.raw + skb->csum_offset) = 0; | ||
6017 | } | ||
6018 | |||
6019 | if (multi_post_start_new_buffer(vnic, &state)) { | ||
6020 | @@ -582,7 +582,7 @@ | ||
6021 | |||
6022 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | ||
6023 | /* Set to zero to encourage falcon to work it out for us */ | ||
6024 | - *(u16*)(skb->h.raw + skb->csum) = 0; | ||
6025 | + *(u16*)(skb->h.raw + skb->csum_offset) = 0; | ||
6026 | } | ||
6027 | NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT | ||
6028 | (skb, idx, frag_data, frag_len, { | ||
6029 | --- a/drivers/xen/tpmback/interface.c | ||
6030 | +++ b/drivers/xen/tpmback/interface.c | ||
6031 | @@ -15,7 +15,7 @@ | ||
6032 | #include <xen/balloon.h> | ||
6033 | #include <xen/gnttab.h> | ||
6034 | |||
6035 | -static kmem_cache_t *tpmif_cachep; | ||
6036 | +static struct kmem_cache *tpmif_cachep; | ||
6037 | int num_frontends = 0; | ||
6038 | |||
6039 | LIST_HEAD(tpmif_list); | ||
6040 | --- a/drivers/xen/xenbus/xenbus_comms.c | ||
6041 | +++ b/drivers/xen/xenbus/xenbus_comms.c | ||
6042 | @@ -49,9 +49,9 @@ | ||
6043 | |||
6044 | static int xenbus_irq; | ||
6045 | |||
6046 | -extern void xenbus_probe(void *); | ||
6047 | +extern void xenbus_probe(struct work_struct *); | ||
6048 | extern int xenstored_ready; | ||
6049 | -static DECLARE_WORK(probe_work, xenbus_probe, NULL); | ||
6050 | +static DECLARE_WORK(probe_work, xenbus_probe); | ||
6051 | |||
6052 | static DECLARE_WAIT_QUEUE_HEAD(xb_waitq); | ||
6053 | |||
6054 | --- a/drivers/xen/xenbus/xenbus_probe.c | ||
6055 | +++ b/drivers/xen/xenbus/xenbus_probe.c | ||
6056 | @@ -840,7 +840,7 @@ | ||
6057 | EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); | ||
6058 | |||
6059 | |||
6060 | -void xenbus_probe(void *unused) | ||
6061 | +void xenbus_probe(struct work_struct *unused) | ||
6062 | { | ||
6063 | BUG_ON((xenstored_ready <= 0)); | ||
6064 | |||
6065 | --- a/include/asm-x86/mach-xen/asm/desc_32.h | ||
6066 | +++ b/include/asm-x86/mach-xen/asm/desc_32.h | ||
6067 | @@ -4,8 +4,6 @@ | ||
6068 | #include <asm/ldt.h> | ||
6069 | #include <asm/segment.h> | ||
6070 | |||
6071 | -#define CPU_16BIT_STACK_SIZE 1024 | ||
6072 | - | ||
6073 | #ifndef __ASSEMBLY__ | ||
6074 | |||
6075 | #include <linux/preempt.h> | ||
6076 | @@ -15,8 +13,6 @@ | ||
6077 | |||
6078 | extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; | ||
6079 | |||
6080 | -DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); | ||
6081 | - | ||
6082 | struct Xgt_desc_struct { | ||
6083 | unsigned short size; | ||
6084 | unsigned long address __attribute__((packed)); | ||
6085 | @@ -32,11 +28,6 @@ | ||
6086 | return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; | ||
6087 | } | ||
6088 | |||
6089 | -/* | ||
6090 | - * This is the ldt that every process will get unless we need | ||
6091 | - * something other than this. | ||
6092 | - */ | ||
6093 | -extern struct desc_struct default_ldt[]; | ||
6094 | extern struct desc_struct idt_table[]; | ||
6095 | extern void set_intr_gate(unsigned int irq, void * addr); | ||
6096 | |||
6097 | @@ -63,8 +54,8 @@ | ||
6098 | #define DESCTYPE_DPL3 0x60 /* DPL-3 */ | ||
6099 | #define DESCTYPE_S 0x10 /* !system */ | ||
6100 | |||
6101 | +#ifndef CONFIG_XEN | ||
6102 | #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) | ||
6103 | -#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)) | ||
6104 | |||
6105 | #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) | ||
6106 | #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr)) | ||
6107 | @@ -75,6 +66,7 @@ | ||
6108 | #define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr)) | ||
6109 | #define store_tr(tr) __asm__ ("str %0":"=m" (tr)) | ||
6110 | #define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt)) | ||
6111 | +#endif | ||
6112 | |||
6113 | #if TLS_SIZE != 24 | ||
6114 | # error update this code. | ||
6115 | @@ -90,22 +82,43 @@ | ||
6116 | } | ||
6117 | |||
6118 | #ifndef CONFIG_XEN | ||
6119 | +#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) | ||
6120 | +#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) | ||
6121 | +#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) | ||
6122 | + | ||
6123 | static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b) | ||
6124 | { | ||
6125 | __u32 *lp = (__u32 *)((char *)dt + entry*8); | ||
6126 | *lp = entry_a; | ||
6127 | *(lp+1) = entry_b; | ||
6128 | } | ||
6129 | - | ||
6130 | -#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) | ||
6131 | -#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) | ||
6132 | +#define set_ldt native_set_ldt | ||
6133 | #else | ||
6134 | extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b); | ||
6135 | extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b); | ||
6136 | +#define set_ldt xen_set_ldt | ||
6137 | +#endif | ||
6138 | + | ||
6139 | +#ifndef CONFIG_XEN | ||
6140 | +static inline fastcall void native_set_ldt(const void *addr, | ||
6141 | + unsigned int entries) | ||
6142 | +{ | ||
6143 | + if (likely(entries == 0)) | ||
6144 | + __asm__ __volatile__("lldt %w0"::"q" (0)); | ||
6145 | + else { | ||
6146 | + unsigned cpu = smp_processor_id(); | ||
6147 | + __u32 a, b; | ||
6148 | + | ||
6149 | + pack_descriptor(&a, &b, (unsigned long)addr, | ||
6150 | + entries * sizeof(struct desc_struct) - 1, | ||
6151 | + DESCTYPE_LDT, 0); | ||
6152 | + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b); | ||
6153 | + __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); | ||
6154 | + } | ||
6155 | +} | ||
6156 | #endif | ||
6157 | -#ifndef CONFIG_X86_NO_IDT | ||
6158 | -#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) | ||
6159 | |||
6160 | +#ifndef CONFIG_X86_NO_IDT | ||
6161 | static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg) | ||
6162 | { | ||
6163 | __u32 a, b; | ||
6164 | @@ -125,14 +138,6 @@ | ||
6165 | } | ||
6166 | #endif | ||
6167 | |||
6168 | -static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries) | ||
6169 | -{ | ||
6170 | - __u32 a, b; | ||
6171 | - pack_descriptor(&a, &b, (unsigned long)addr, | ||
6172 | - entries * sizeof(struct desc_struct) - 1, | ||
6173 | - DESCTYPE_LDT, 0); | ||
6174 | - write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b); | ||
6175 | -} | ||
6176 | |||
6177 | #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) | ||
6178 | |||
6179 | @@ -163,36 +168,22 @@ | ||
6180 | |||
6181 | static inline void clear_LDT(void) | ||
6182 | { | ||
6183 | - int cpu = get_cpu(); | ||
6184 | - | ||
6185 | - /* | ||
6186 | - * NB. We load the default_ldt for lcall7/27 handling on demand, as | ||
6187 | - * it slows down context switching. Noone uses it anyway. | ||
6188 | - */ | ||
6189 | - cpu = cpu; /* XXX avoid compiler warning */ | ||
6190 | - xen_set_ldt(NULL, 0); | ||
6191 | - put_cpu(); | ||
6192 | + set_ldt(NULL, 0); | ||
6193 | } | ||
6194 | |||
6195 | /* | ||
6196 | * load one particular LDT into the current CPU | ||
6197 | */ | ||
6198 | -static inline void load_LDT_nolock(mm_context_t *pc, int cpu) | ||
6199 | +static inline void load_LDT_nolock(mm_context_t *pc) | ||
6200 | { | ||
6201 | - void *segments = pc->ldt; | ||
6202 | - int count = pc->size; | ||
6203 | - | ||
6204 | - if (likely(!count)) | ||
6205 | - segments = NULL; | ||
6206 | - | ||
6207 | - xen_set_ldt(segments, count); | ||
6208 | + set_ldt(pc->ldt, pc->size); | ||
6209 | } | ||
6210 | |||
6211 | static inline void load_LDT(mm_context_t *pc) | ||
6212 | { | ||
6213 | - int cpu = get_cpu(); | ||
6214 | - load_LDT_nolock(pc, cpu); | ||
6215 | - put_cpu(); | ||
6216 | + preempt_disable(); | ||
6217 | + load_LDT_nolock(pc); | ||
6218 | + preempt_enable(); | ||
6219 | } | ||
6220 | |||
6221 | static inline unsigned long get_desc_base(unsigned long *desc) | ||
6222 | @@ -204,6 +195,29 @@ | ||
6223 | return base; | ||
6224 | } | ||
6225 | |||
6226 | +#else /* __ASSEMBLY__ */ | ||
6227 | + | ||
6228 | +/* | ||
6229 | + * GET_DESC_BASE reads the descriptor base of the specified segment. | ||
6230 | + * | ||
6231 | + * Args: | ||
6232 | + * idx - descriptor index | ||
6233 | + * gdt - GDT pointer | ||
6234 | + * base - 32bit register to which the base will be written | ||
6235 | + * lo_w - lo word of the "base" register | ||
6236 | + * lo_b - lo byte of the "base" register | ||
6237 | + * hi_b - hi byte of the low word of the "base" register | ||
6238 | + * | ||
6239 | + * Example: | ||
6240 | + * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) | ||
6241 | + * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax. | ||
6242 | + */ | ||
6243 | +#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \ | ||
6244 | + movb idx*8+4(gdt), lo_b; \ | ||
6245 | + movb idx*8+7(gdt), hi_b; \ | ||
6246 | + shll $16, base; \ | ||
6247 | + movw idx*8+2(gdt), lo_w; | ||
6248 | + | ||
6249 | #endif /* !__ASSEMBLY__ */ | ||
6250 | |||
6251 | #endif | ||
6252 | --- a/include/asm-x86/mach-xen/asm/desc_64.h | ||
6253 | +++ b/include/asm-x86/mach-xen/asm/desc_64.h | ||
6254 | @@ -9,62 +9,11 @@ | ||
6255 | |||
6256 | #include <linux/string.h> | ||
6257 | #include <linux/smp.h> | ||
6258 | +#include <asm/desc_defs.h> | ||
6259 | |||
6260 | #include <asm/segment.h> | ||
6261 | #include <asm/mmu.h> | ||
6262 | |||
6263 | -// 8 byte segment descriptor | ||
6264 | -struct desc_struct { | ||
6265 | - u16 limit0; | ||
6266 | - u16 base0; | ||
6267 | - unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1; | ||
6268 | - unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8; | ||
6269 | -} __attribute__((packed)); | ||
6270 | - | ||
6271 | -struct n_desc_struct { | ||
6272 | - unsigned int a,b; | ||
6273 | -}; | ||
6274 | - | ||
6275 | -enum { | ||
6276 | - GATE_INTERRUPT = 0xE, | ||
6277 | - GATE_TRAP = 0xF, | ||
6278 | - GATE_CALL = 0xC, | ||
6279 | -}; | ||
6280 | - | ||
6281 | -// 16byte gate | ||
6282 | -struct gate_struct { | ||
6283 | - u16 offset_low; | ||
6284 | - u16 segment; | ||
6285 | - unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; | ||
6286 | - u16 offset_middle; | ||
6287 | - u32 offset_high; | ||
6288 | - u32 zero1; | ||
6289 | -} __attribute__((packed)); | ||
6290 | - | ||
6291 | -#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF) | ||
6292 | -#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF) | ||
6293 | -#define PTR_HIGH(x) ((unsigned long)(x) >> 32) | ||
6294 | - | ||
6295 | -enum { | ||
6296 | - DESC_TSS = 0x9, | ||
6297 | - DESC_LDT = 0x2, | ||
6298 | -}; | ||
6299 | - | ||
6300 | -// LDT or TSS descriptor in the GDT. 16 bytes. | ||
6301 | -struct ldttss_desc { | ||
6302 | - u16 limit0; | ||
6303 | - u16 base0; | ||
6304 | - unsigned base1 : 8, type : 5, dpl : 2, p : 1; | ||
6305 | - unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; | ||
6306 | - u32 base3; | ||
6307 | - u32 zero1; | ||
6308 | -} __attribute__((packed)); | ||
6309 | - | ||
6310 | -struct desc_ptr { | ||
6311 | - unsigned short size; | ||
6312 | - unsigned long address; | ||
6313 | -} __attribute__((packed)) ; | ||
6314 | - | ||
6315 | extern struct desc_ptr idt_descr, cpu_gdt_descr[NR_CPUS]; | ||
6316 | |||
6317 | extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; | ||
6318 | --- a/include/asm-x86/mach-xen/asm/dma-mapping_32.h | ||
6319 | +++ b/include/asm-x86/mach-xen/asm/dma-mapping_32.h | ||
6320 | @@ -127,10 +127,10 @@ | ||
6321 | return (1 << INTERNODE_CACHE_SHIFT); | ||
6322 | } | ||
6323 | |||
6324 | -#define dma_is_consistent(d) (1) | ||
6325 | +#define dma_is_consistent(d, h) (1) | ||
6326 | |||
6327 | static inline void | ||
6328 | -dma_cache_sync(void *vaddr, size_t size, | ||
6329 | +dma_cache_sync(struct device *dev, void *vaddr, size_t size, | ||
6330 | enum dma_data_direction direction) | ||
6331 | { | ||
6332 | flush_write_buffers(); | ||
6333 | --- a/include/asm-x86/mach-xen/asm/dma-mapping_64.h | ||
6334 | +++ b/include/asm-x86/mach-xen/asm/dma-mapping_64.h | ||
6335 | @@ -64,6 +64,9 @@ | ||
6336 | return (dma_addr == bad_dma_address); | ||
6337 | } | ||
6338 | |||
6339 | +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) | ||
6340 | +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) | ||
6341 | + | ||
6342 | extern void *dma_alloc_coherent(struct device *dev, size_t size, | ||
6343 | dma_addr_t *dma_handle, gfp_t gfp); | ||
6344 | extern void dma_free_coherent(struct device *dev, size_t size, void *vaddr, | ||
6345 | @@ -181,12 +184,13 @@ | ||
6346 | return boot_cpu_data.x86_clflush_size; | ||
6347 | } | ||
6348 | |||
6349 | -#define dma_is_consistent(h) 1 | ||
6350 | +#define dma_is_consistent(d, h) 1 | ||
6351 | |||
6352 | extern int dma_set_mask(struct device *dev, u64 mask); | ||
6353 | |||
6354 | static inline void | ||
6355 | -dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction dir) | ||
6356 | +dma_cache_sync(struct device *dev, void *vaddr, size_t size, | ||
6357 | + enum dma_data_direction dir) | ||
6358 | { | ||
6359 | flush_write_buffers(); | ||
6360 | } | ||
6361 | --- a/include/asm-x86/mach-xen/asm/fixmap_32.h | ||
6362 | +++ b/include/asm-x86/mach-xen/asm/fixmap_32.h | ||
6363 | @@ -13,13 +13,16 @@ | ||
6364 | #ifndef _ASM_FIXMAP_H | ||
6365 | #define _ASM_FIXMAP_H | ||
6366 | |||
6367 | - | ||
6368 | /* used by vmalloc.c, vsyscall.lds.S. | ||
6369 | * | ||
6370 | * Leave one empty page between vmalloc'ed areas and | ||
6371 | * the start of the fixmap. | ||
6372 | */ | ||
6373 | extern unsigned long __FIXADDR_TOP; | ||
6374 | +#ifdef CONFIG_COMPAT_VDSO | ||
6375 | +#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) | ||
6376 | +#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) | ||
6377 | +#endif | ||
6378 | |||
6379 | #ifndef __ASSEMBLY__ | ||
6380 | #include <linux/kernel.h> | ||
6381 | --- a/include/asm-x86/mach-xen/asm/hypervisor.h | ||
6382 | +++ b/include/asm-x86/mach-xen/asm/hypervisor.h | ||
6383 | @@ -45,15 +45,6 @@ | ||
6384 | #include <xen/interface/nmi.h> | ||
6385 | #include <asm/ptrace.h> | ||
6386 | #include <asm/page.h> | ||
6387 | -#if defined(__i386__) | ||
6388 | -# ifdef CONFIG_X86_PAE | ||
6389 | -# include <asm-generic/pgtable-nopud.h> | ||
6390 | -# else | ||
6391 | -# include <asm-generic/pgtable-nopmd.h> | ||
6392 | -# endif | ||
6393 | -#elif defined(__x86_64__) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11) | ||
6394 | -# include <asm-generic/pgtable-nopud.h> | ||
6395 | -#endif | ||
6396 | |||
6397 | extern shared_info_t *HYPERVISOR_shared_info; | ||
6398 | |||
6399 | --- a/include/asm-x86/mach-xen/asm/io_32.h | ||
6400 | +++ b/include/asm-x86/mach-xen/asm/io_32.h | ||
6401 | @@ -269,11 +269,7 @@ | ||
6402 | |||
6403 | #endif /* __KERNEL__ */ | ||
6404 | |||
6405 | -#ifdef SLOW_IO_BY_JUMPING | ||
6406 | -#define __SLOW_DOWN_IO "jmp 1f; 1: jmp 1f; 1:" | ||
6407 | -#else | ||
6408 | #define __SLOW_DOWN_IO "outb %%al,$0x80;" | ||
6409 | -#endif | ||
6410 | |||
6411 | static inline void slow_down_io(void) { | ||
6412 | __asm__ __volatile__( | ||
6413 | --- a/include/asm-x86/mach-xen/asm/irqflags_32.h | ||
6414 | +++ b/include/asm-x86/mach-xen/asm/irqflags_32.h | ||
6415 | @@ -22,9 +22,6 @@ | ||
6416 | |||
6417 | #define __raw_local_save_flags() (current_vcpu_info()->evtchn_upcall_mask) | ||
6418 | |||
6419 | -#define raw_local_save_flags(flags) \ | ||
6420 | - do { (flags) = __raw_local_save_flags(); } while (0) | ||
6421 | - | ||
6422 | #define raw_local_irq_restore(x) \ | ||
6423 | do { \ | ||
6424 | vcpu_info_t *_vcpu; \ | ||
6425 | @@ -66,18 +63,6 @@ | ||
6426 | */ | ||
6427 | void halt(void); | ||
6428 | |||
6429 | -static inline int raw_irqs_disabled_flags(unsigned long flags) | ||
6430 | -{ | ||
6431 | - return (flags != 0); | ||
6432 | -} | ||
6433 | - | ||
6434 | -#define raw_irqs_disabled() \ | ||
6435 | -({ \ | ||
6436 | - unsigned long flags = __raw_local_save_flags(); \ | ||
6437 | - \ | ||
6438 | - raw_irqs_disabled_flags(flags); \ | ||
6439 | -}) | ||
6440 | - | ||
6441 | /* | ||
6442 | * For spinlocks, etc: | ||
6443 | */ | ||
6444 | @@ -90,9 +75,62 @@ | ||
6445 | flags; \ | ||
6446 | }) | ||
6447 | |||
6448 | +#else | ||
6449 | +/* Offsets into shared_info_t. */ | ||
6450 | +#define evtchn_upcall_pending /* 0 */ | ||
6451 | +#define evtchn_upcall_mask 1 | ||
6452 | + | ||
6453 | +#define sizeof_vcpu_shift 6 | ||
6454 | + | ||
6455 | +#ifdef CONFIG_SMP | ||
6456 | +#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \ | ||
6457 | + shl $sizeof_vcpu_shift,%esi ; \ | ||
6458 | + addl HYPERVISOR_shared_info,%esi | ||
6459 | +#else | ||
6460 | +#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi | ||
6461 | +#endif | ||
6462 | + | ||
6463 | +#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi) | ||
6464 | +#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi) | ||
6465 | +#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi) | ||
6466 | +#define DISABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \ | ||
6467 | + __DISABLE_INTERRUPTS | ||
6468 | +#define ENABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \ | ||
6469 | + __ENABLE_INTERRUPTS | ||
6470 | +#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \ | ||
6471 | +sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \ | ||
6472 | + __TEST_PENDING ; \ | ||
6473 | + jnz 14f /* process more events if necessary... */ ; \ | ||
6474 | + movl PT_ESI(%esp), %esi ; \ | ||
6475 | + sysexit ; \ | ||
6476 | +14: __DISABLE_INTERRUPTS ; \ | ||
6477 | + TRACE_IRQS_OFF ; \ | ||
6478 | +sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \ | ||
6479 | + push %esp ; \ | ||
6480 | + call evtchn_do_upcall ; \ | ||
6481 | + add $4,%esp ; \ | ||
6482 | + jmp ret_from_intr | ||
6483 | +#define INTERRUPT_RETURN iret | ||
6484 | +#endif /* __ASSEMBLY__ */ | ||
6485 | + | ||
6486 | +#ifndef __ASSEMBLY__ | ||
6487 | +#define raw_local_save_flags(flags) \ | ||
6488 | + do { (flags) = __raw_local_save_flags(); } while (0) | ||
6489 | + | ||
6490 | #define raw_local_irq_save(flags) \ | ||
6491 | do { (flags) = __raw_local_irq_save(); } while (0) | ||
6492 | |||
6493 | +static inline int raw_irqs_disabled_flags(unsigned long flags) | ||
6494 | +{ | ||
6495 | + return (flags != 0); | ||
6496 | +} | ||
6497 | + | ||
6498 | +#define raw_irqs_disabled() \ | ||
6499 | +({ \ | ||
6500 | + unsigned long flags = __raw_local_save_flags(); \ | ||
6501 | + \ | ||
6502 | + raw_irqs_disabled_flags(flags); \ | ||
6503 | +}) | ||
6504 | #endif /* __ASSEMBLY__ */ | ||
6505 | |||
6506 | /* | ||
6507 | --- a/include/asm-x86/mach-xen/asm/mmu_context_32.h | ||
6508 | +++ b/include/asm-x86/mach-xen/asm/mmu_context_32.h | ||
6509 | @@ -27,14 +27,13 @@ | ||
6510 | static inline void __prepare_arch_switch(void) | ||
6511 | { | ||
6512 | /* | ||
6513 | - * Save away %fs and %gs. No need to save %es and %ds, as those | ||
6514 | - * are always kernel segments while inside the kernel. Must | ||
6515 | - * happen before reload of cr3/ldt (i.e., not in __switch_to). | ||
6516 | + * Save away %fs. No need to save %gs, as it was saved on the | ||
6517 | + * stack on entry. No need to save %es and %ds, as those are | ||
6518 | + * always kernel segments while inside the kernel. | ||
6519 | */ | ||
6520 | - asm volatile ( "mov %%fs,%0 ; mov %%gs,%1" | ||
6521 | - : "=m" (current->thread.fs), | ||
6522 | - "=m" (current->thread.gs)); | ||
6523 | - asm volatile ( "movl %0,%%fs ; movl %0,%%gs" | ||
6524 | + asm volatile ( "mov %%fs,%0" | ||
6525 | + : "=m" (current->thread.fs)); | ||
6526 | + asm volatile ( "movl %0,%%fs" | ||
6527 | : : "r" (0) ); | ||
6528 | } | ||
6529 | |||
6530 | @@ -89,14 +88,14 @@ | ||
6531 | * tlb flush IPI delivery. We must reload %cr3. | ||
6532 | */ | ||
6533 | load_cr3(next->pgd); | ||
6534 | - load_LDT_nolock(&next->context, cpu); | ||
6535 | + load_LDT_nolock(&next->context); | ||
6536 | } | ||
6537 | } | ||
6538 | #endif | ||
6539 | } | ||
6540 | |||
6541 | -#define deactivate_mm(tsk, mm) \ | ||
6542 | - asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0)) | ||
6543 | +#define deactivate_mm(tsk, mm) \ | ||
6544 | + asm("movl %0,%%fs": :"r" (0)); | ||
6545 | |||
6546 | static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) | ||
6547 | { | ||
6548 | --- a/include/asm-x86/mach-xen/asm/pgtable-2level.h | ||
6549 | +++ b/include/asm-x86/mach-xen/asm/pgtable-2level.h | ||
6550 | @@ -1,8 +1,6 @@ | ||
6551 | #ifndef _I386_PGTABLE_2LEVEL_H | ||
6552 | #define _I386_PGTABLE_2LEVEL_H | ||
6553 | |||
6554 | -#include <asm-generic/pgtable-nopmd.h> | ||
6555 | - | ||
6556 | #define pte_ERROR(e) \ | ||
6557 | printk("%s:%d: bad pte %08lx (pfn %05lx).\n", __FILE__, __LINE__, \ | ||
6558 | __pte_val(e), pte_pfn(e)) | ||
6559 | @@ -23,26 +21,14 @@ | ||
6560 | set_pte((ptep), (pteval)); \ | ||
6561 | } while (0) | ||
6562 | |||
6563 | -#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval) | ||
6564 | - | ||
6565 | #define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval)) | ||
6566 | |||
6567 | +#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval) | ||
6568 | + | ||
6569 | #define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) | ||
6570 | #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) | ||
6571 | |||
6572 | -#define pte_none(x) (!(x).pte_low) | ||
6573 | - | ||
6574 | -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR | ||
6575 | -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
6576 | -{ | ||
6577 | - pte_t pte = *ptep; | ||
6578 | - if (!pte_none(pte)) { | ||
6579 | - if ((mm != &init_mm) || | ||
6580 | - HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) | ||
6581 | - pte = __pte_ma(xchg(&ptep->pte_low, 0)); | ||
6582 | - } | ||
6583 | - return pte; | ||
6584 | -} | ||
6585 | +#define raw_ptep_get_and_clear(xp, pte) __pte_ma(xchg(&(xp)->pte_low, 0)) | ||
6586 | |||
6587 | #define __HAVE_ARCH_PTEP_CLEAR_FLUSH | ||
6588 | #define ptep_clear_flush(vma, addr, ptep) \ | ||
6589 | @@ -69,6 +55,7 @@ | ||
6590 | __pte_mfn(_pte)) | ||
6591 | |||
6592 | #define pte_page(_pte) pfn_to_page(pte_pfn(_pte)) | ||
6593 | +#define pte_none(x) (!(x).pte_low) | ||
6594 | |||
6595 | #define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) | ||
6596 | #define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) | ||
6597 | --- a/include/asm-x86/mach-xen/asm/pgtable-3level.h | ||
6598 | +++ b/include/asm-x86/mach-xen/asm/pgtable-3level.h | ||
6599 | @@ -1,8 +1,6 @@ | ||
6600 | #ifndef _I386_PGTABLE_3LEVEL_H | ||
6601 | #define _I386_PGTABLE_3LEVEL_H | ||
6602 | |||
6603 | -#include <asm-generic/pgtable-nopud.h> | ||
6604 | - | ||
6605 | /* | ||
6606 | * Intel Physical Address Extension (PAE) Mode - three-level page | ||
6607 | * tables on PPro+ CPUs. | ||
6608 | @@ -75,6 +73,23 @@ | ||
6609 | xen_l3_entry_update((pudptr), (pudval)) | ||
6610 | |||
6611 | /* | ||
6612 | + * For PTEs and PDEs, we must clear the P-bit first when clearing a page table | ||
6613 | + * entry, so clear the bottom half first and enforce ordering with a compiler | ||
6614 | + * barrier. | ||
6615 | + */ | ||
6616 | +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
6617 | +{ | ||
6618 | + if ((mm != current->mm && mm != &init_mm) | ||
6619 | + || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { | ||
6620 | + ptep->pte_low = 0; | ||
6621 | + smp_wmb(); | ||
6622 | + ptep->pte_high = 0; | ||
6623 | + } | ||
6624 | +} | ||
6625 | + | ||
6626 | +#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) | ||
6627 | + | ||
6628 | +/* | ||
6629 | * Pentium-II erratum A13: in PAE mode we explicitly have to flush | ||
6630 | * the TLB via cr3 if the top-level pgd is changed... | ||
6631 | * We do not let the generic code free and clear pgd entries due to | ||
6632 | @@ -93,45 +108,16 @@ | ||
6633 | #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ | ||
6634 | pmd_index(address)) | ||
6635 | |||
6636 | -static inline int pte_none(pte_t pte) | ||
6637 | -{ | ||
6638 | - return !(pte.pte_low | pte.pte_high); | ||
6639 | -} | ||
6640 | - | ||
6641 | -/* | ||
6642 | - * For PTEs and PDEs, we must clear the P-bit first when clearing a page table | ||
6643 | - * entry, so clear the bottom half first and enforce ordering with a compiler | ||
6644 | - * barrier. | ||
6645 | - */ | ||
6646 | -static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
6647 | +static inline pte_t raw_ptep_get_and_clear(pte_t *ptep, pte_t res) | ||
6648 | { | ||
6649 | - if ((mm != current->mm && mm != &init_mm) | ||
6650 | - || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { | ||
6651 | - ptep->pte_low = 0; | ||
6652 | - smp_wmb(); | ||
6653 | + uint64_t val = __pte_val(res); | ||
6654 | + if (__cmpxchg64(ptep, val, 0) != val) { | ||
6655 | + /* xchg acts as a barrier before the setting of the high bits */ | ||
6656 | + res.pte_low = xchg(&ptep->pte_low, 0); | ||
6657 | + res.pte_high = ptep->pte_high; | ||
6658 | ptep->pte_high = 0; | ||
6659 | } | ||
6660 | -} | ||
6661 | - | ||
6662 | -#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) | ||
6663 | - | ||
6664 | -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR | ||
6665 | -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
6666 | -{ | ||
6667 | - pte_t pte = *ptep; | ||
6668 | - if (!pte_none(pte)) { | ||
6669 | - if ((mm != &init_mm) || | ||
6670 | - HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { | ||
6671 | - uint64_t val = __pte_val(pte); | ||
6672 | - if (__cmpxchg64(ptep, val, 0) != val) { | ||
6673 | - /* xchg acts as a barrier before the setting of the high bits */ | ||
6674 | - pte.pte_low = xchg(&ptep->pte_low, 0); | ||
6675 | - pte.pte_high = ptep->pte_high; | ||
6676 | - ptep->pte_high = 0; | ||
6677 | - } | ||
6678 | - } | ||
6679 | - } | ||
6680 | - return pte; | ||
6681 | + return res; | ||
6682 | } | ||
6683 | |||
6684 | #define __HAVE_ARCH_PTEP_CLEAR_FLUSH | ||
6685 | @@ -160,6 +146,11 @@ | ||
6686 | |||
6687 | #define pte_page(x) pfn_to_page(pte_pfn(x)) | ||
6688 | |||
6689 | +static inline int pte_none(pte_t pte) | ||
6690 | +{ | ||
6691 | + return !(pte.pte_low | pte.pte_high); | ||
6692 | +} | ||
6693 | + | ||
6694 | #define __pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \ | ||
6695 | ((_pte).pte_high << (32-PAGE_SHIFT))) | ||
6696 | #define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \ | ||
6697 | --- a/include/asm-x86/mach-xen/asm/pgtable_32.h | ||
6698 | +++ b/include/asm-x86/mach-xen/asm/pgtable_32.h | ||
6699 | @@ -38,14 +38,14 @@ | ||
6700 | #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) | ||
6701 | extern unsigned long empty_zero_page[1024]; | ||
6702 | extern pgd_t *swapper_pg_dir; | ||
6703 | -extern kmem_cache_t *pgd_cache; | ||
6704 | -extern kmem_cache_t *pmd_cache; | ||
6705 | +extern struct kmem_cache *pgd_cache; | ||
6706 | +extern struct kmem_cache *pmd_cache; | ||
6707 | extern spinlock_t pgd_lock; | ||
6708 | extern struct page *pgd_list; | ||
6709 | |||
6710 | -void pmd_ctor(void *, kmem_cache_t *, unsigned long); | ||
6711 | -void pgd_ctor(void *, kmem_cache_t *, unsigned long); | ||
6712 | -void pgd_dtor(void *, kmem_cache_t *, unsigned long); | ||
6713 | +void pmd_ctor(void *, struct kmem_cache *, unsigned long); | ||
6714 | +void pgd_ctor(void *, struct kmem_cache *, unsigned long); | ||
6715 | +void pgd_dtor(void *, struct kmem_cache *, unsigned long); | ||
6716 | void pgtable_cache_init(void); | ||
6717 | void paging_init(void); | ||
6718 | |||
6719 | @@ -276,7 +276,6 @@ | ||
6720 | #define pte_update(mm, addr, ptep) do { } while (0) | ||
6721 | #define pte_update_defer(mm, addr, ptep) do { } while (0) | ||
6722 | |||
6723 | - | ||
6724 | /* | ||
6725 | * We only update the dirty/accessed state if we set | ||
6726 | * the dirty bit by hand in the kernel, since the hardware | ||
6727 | @@ -342,6 +341,19 @@ | ||
6728 | __young; \ | ||
6729 | }) | ||
6730 | |||
6731 | +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR | ||
6732 | +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
6733 | +{ | ||
6734 | + pte_t pte = *ptep; | ||
6735 | + if (!pte_none(pte) | ||
6736 | + && (mm != &init_mm | ||
6737 | + || HYPERVISOR_update_va_mapping(addr, __pte(0), 0))) { | ||
6738 | + pte = raw_ptep_get_and_clear(ptep, pte); | ||
6739 | + pte_update(mm, addr, ptep); | ||
6740 | + } | ||
6741 | + return pte; | ||
6742 | +} | ||
6743 | + | ||
6744 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL | ||
6745 | #define ptep_get_and_clear_full(mm, addr, ptep, full) \ | ||
6746 | ((full) ? ({ \ | ||
6747 | --- a/include/asm-x86/mach-xen/asm/pgtable_64.h | ||
6748 | +++ b/include/asm-x86/mach-xen/asm/pgtable_64.h | ||
6749 | @@ -236,19 +236,18 @@ | ||
6750 | |||
6751 | static inline unsigned long pgd_bad(pgd_t pgd) | ||
6752 | { | ||
6753 | - unsigned long val = __pgd_val(pgd); | ||
6754 | - val &= ~PTE_MASK; | ||
6755 | - val &= ~(_PAGE_USER | _PAGE_DIRTY); | ||
6756 | - return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED); | ||
6757 | + return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); | ||
6758 | } | ||
6759 | |||
6760 | -static inline unsigned long pud_bad(pud_t pud) | ||
6761 | -{ | ||
6762 | - unsigned long val = __pud_val(pud); | ||
6763 | - val &= ~PTE_MASK; | ||
6764 | - val &= ~(_PAGE_USER | _PAGE_DIRTY); | ||
6765 | - return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED); | ||
6766 | -} | ||
6767 | +static inline unsigned long pud_bad(pud_t pud) | ||
6768 | +{ | ||
6769 | + return __pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); | ||
6770 | +} | ||
6771 | + | ||
6772 | +static inline unsigned long pmd_bad(pmd_t pmd) | ||
6773 | +{ | ||
6774 | + return __pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); | ||
6775 | +} | ||
6776 | |||
6777 | #define set_pte_at(_mm,addr,ptep,pteval) do { \ | ||
6778 | if (((_mm) != current->mm && (_mm) != &init_mm) || \ | ||
6779 | @@ -404,8 +403,6 @@ | ||
6780 | #define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT) | ||
6781 | #endif | ||
6782 | #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) | ||
6783 | -#define pmd_bad(x) ((__pmd_val(x) & ~(PTE_MASK | _PAGE_USER | _PAGE_PRESENT)) \ | ||
6784 | - != (_KERNPG_TABLE & ~(_PAGE_USER | _PAGE_PRESENT))) | ||
6785 | #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot))) | ||
6786 | #define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) | ||
6787 | |||
6788 | --- a/include/asm-x86/mach-xen/asm/processor_32.h | ||
6789 | +++ b/include/asm-x86/mach-xen/asm/processor_32.h | ||
6790 | @@ -20,6 +20,7 @@ | ||
6791 | #include <linux/threads.h> | ||
6792 | #include <asm/percpu.h> | ||
6793 | #include <linux/cpumask.h> | ||
6794 | +#include <linux/init.h> | ||
6795 | #include <xen/interface/physdev.h> | ||
6796 | |||
6797 | /* flag for disabling the tsc */ | ||
6798 | @@ -73,6 +74,7 @@ | ||
6799 | #endif | ||
6800 | unsigned char x86_max_cores; /* cpuid returned max cores value */ | ||
6801 | unsigned char apicid; | ||
6802 | + unsigned short x86_clflush_size; | ||
6803 | #ifdef CONFIG_SMP | ||
6804 | unsigned char booted_cores; /* number of cores as seen by OS */ | ||
6805 | __u8 phys_proc_id; /* Physical processor id. */ | ||
6806 | @@ -114,6 +116,8 @@ | ||
6807 | extern int cpu_llc_id[NR_CPUS]; | ||
6808 | extern char ignore_fpu_irq; | ||
6809 | |||
6810 | +void __init cpu_detect(struct cpuinfo_x86 *c); | ||
6811 | + | ||
6812 | extern void identify_cpu(struct cpuinfo_x86 *); | ||
6813 | extern void print_cpu_info(struct cpuinfo_x86 *); | ||
6814 | extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); | ||
6815 | @@ -146,8 +150,8 @@ | ||
6816 | #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ | ||
6817 | #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ | ||
6818 | |||
6819 | -static inline void __cpuid(unsigned int *eax, unsigned int *ebx, | ||
6820 | - unsigned int *ecx, unsigned int *edx) | ||
6821 | +static inline fastcall void xen_cpuid(unsigned int *eax, unsigned int *ebx, | ||
6822 | + unsigned int *ecx, unsigned int *edx) | ||
6823 | { | ||
6824 | /* ecx is often an input as well as an output. */ | ||
6825 | __asm__(XEN_CPUID | ||
6826 | @@ -158,59 +162,6 @@ | ||
6827 | : "0" (*eax), "2" (*ecx)); | ||
6828 | } | ||
6829 | |||
6830 | -/* | ||
6831 | - * Generic CPUID function | ||
6832 | - * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx | ||
6833 | - * resulting in stale register contents being returned. | ||
6834 | - */ | ||
6835 | -static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) | ||
6836 | -{ | ||
6837 | - *eax = op; | ||
6838 | - *ecx = 0; | ||
6839 | - __cpuid(eax, ebx, ecx, edx); | ||
6840 | -} | ||
6841 | - | ||
6842 | -/* Some CPUID calls want 'count' to be placed in ecx */ | ||
6843 | -static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, | ||
6844 | - int *edx) | ||
6845 | -{ | ||
6846 | - *eax = op; | ||
6847 | - *ecx = count; | ||
6848 | - __cpuid(eax, ebx, ecx, edx); | ||
6849 | -} | ||
6850 | - | ||
6851 | -/* | ||
6852 | - * CPUID functions returning a single datum | ||
6853 | - */ | ||
6854 | -static inline unsigned int cpuid_eax(unsigned int op) | ||
6855 | -{ | ||
6856 | - unsigned int eax, ebx, ecx, edx; | ||
6857 | - | ||
6858 | - cpuid(op, &eax, &ebx, &ecx, &edx); | ||
6859 | - return eax; | ||
6860 | -} | ||
6861 | -static inline unsigned int cpuid_ebx(unsigned int op) | ||
6862 | -{ | ||
6863 | - unsigned int eax, ebx, ecx, edx; | ||
6864 | - | ||
6865 | - cpuid(op, &eax, &ebx, &ecx, &edx); | ||
6866 | - return ebx; | ||
6867 | -} | ||
6868 | -static inline unsigned int cpuid_ecx(unsigned int op) | ||
6869 | -{ | ||
6870 | - unsigned int eax, ebx, ecx, edx; | ||
6871 | - | ||
6872 | - cpuid(op, &eax, &ebx, &ecx, &edx); | ||
6873 | - return ecx; | ||
6874 | -} | ||
6875 | -static inline unsigned int cpuid_edx(unsigned int op) | ||
6876 | -{ | ||
6877 | - unsigned int eax, ebx, ecx, edx; | ||
6878 | - | ||
6879 | - cpuid(op, &eax, &ebx, &ecx, &edx); | ||
6880 | - return edx; | ||
6881 | -} | ||
6882 | - | ||
6883 | #define load_cr3(pgdir) write_cr3(__pa(pgdir)) | ||
6884 | |||
6885 | /* | ||
6886 | @@ -480,9 +431,9 @@ | ||
6887 | .vm86_info = NULL, \ | ||
6888 | .sysenter_cs = __KERNEL_CS, \ | ||
6889 | .io_bitmap_ptr = NULL, \ | ||
6890 | + .gs = __KERNEL_PDA, \ | ||
6891 | } | ||
6892 | |||
6893 | -#ifndef CONFIG_X86_NO_TSS | ||
6894 | /* | ||
6895 | * Note that the .io_bitmap member must be extra-big. This is because | ||
6896 | * the CPU will access an additional byte beyond the end of the IO | ||
6897 | @@ -497,26 +448,9 @@ | ||
6898 | .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \ | ||
6899 | } | ||
6900 | |||
6901 | -static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread) | ||
6902 | -{ | ||
6903 | - tss->esp0 = thread->esp0; | ||
6904 | - /* This can only happen when SEP is enabled, no need to test "SEP"arately */ | ||
6905 | - if (unlikely(tss->ss1 != thread->sysenter_cs)) { | ||
6906 | - tss->ss1 = thread->sysenter_cs; | ||
6907 | - wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | ||
6908 | - } | ||
6909 | -} | ||
6910 | -#define load_esp0(tss, thread) \ | ||
6911 | - __load_esp0(tss, thread) | ||
6912 | -#else | ||
6913 | -#define load_esp0(tss, thread) do { \ | ||
6914 | - if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \ | ||
6915 | - BUG(); \ | ||
6916 | -} while (0) | ||
6917 | -#endif | ||
6918 | - | ||
6919 | #define start_thread(regs, new_eip, new_esp) do { \ | ||
6920 | - __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \ | ||
6921 | + __asm__("movl %0,%%fs": :"r" (0)); \ | ||
6922 | + regs->xgs = 0; \ | ||
6923 | set_fs(USER_DS); \ | ||
6924 | regs->xds = __USER_DS; \ | ||
6925 | regs->xes = __USER_DS; \ | ||
6926 | @@ -526,26 +460,6 @@ | ||
6927 | regs->esp = new_esp; \ | ||
6928 | } while (0) | ||
6929 | |||
6930 | -/* | ||
6931 | - * These special macros can be used to get or set a debugging register | ||
6932 | - */ | ||
6933 | -#define get_debugreg(var, register) \ | ||
6934 | - (var) = HYPERVISOR_get_debugreg((register)) | ||
6935 | -#define set_debugreg(value, register) \ | ||
6936 | - WARN_ON(HYPERVISOR_set_debugreg((register), (value))) | ||
6937 | - | ||
6938 | -/* | ||
6939 | - * Set IOPL bits in EFLAGS from given mask | ||
6940 | - */ | ||
6941 | -static inline void set_iopl_mask(unsigned mask) | ||
6942 | -{ | ||
6943 | - struct physdev_set_iopl set_iopl; | ||
6944 | - | ||
6945 | - /* Force the change at ring 0. */ | ||
6946 | - set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; | ||
6947 | - WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl)); | ||
6948 | -} | ||
6949 | - | ||
6950 | /* Forward declaration, a strange C thing */ | ||
6951 | struct task_struct; | ||
6952 | struct mm_struct; | ||
6953 | @@ -637,6 +551,105 @@ | ||
6954 | |||
6955 | #define cpu_relax() rep_nop() | ||
6956 | |||
6957 | +#define paravirt_enabled() 0 | ||
6958 | +#define __cpuid xen_cpuid | ||
6959 | + | ||
6960 | +#ifndef CONFIG_X86_NO_TSS | ||
6961 | +static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread) | ||
6962 | +{ | ||
6963 | + tss->esp0 = thread->esp0; | ||
6964 | + /* This can only happen when SEP is enabled, no need to test "SEP"arately */ | ||
6965 | + if (unlikely(tss->ss1 != thread->sysenter_cs)) { | ||
6966 | + tss->ss1 = thread->sysenter_cs; | ||
6967 | + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | ||
6968 | + } | ||
6969 | +} | ||
6970 | +#define load_esp0(tss, thread) \ | ||
6971 | + __load_esp0(tss, thread) | ||
6972 | +#else | ||
6973 | +#define load_esp0(tss, thread) do { \ | ||
6974 | + if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \ | ||
6975 | + BUG(); \ | ||
6976 | +} while (0) | ||
6977 | +#endif | ||
6978 | + | ||
6979 | + | ||
6980 | +/* | ||
6981 | + * These special macros can be used to get or set a debugging register | ||
6982 | + */ | ||
6983 | +#define get_debugreg(var, register) \ | ||
6984 | + (var) = HYPERVISOR_get_debugreg(register) | ||
6985 | +#define set_debugreg(value, register) \ | ||
6986 | + WARN_ON(HYPERVISOR_set_debugreg(register, value)) | ||
6987 | + | ||
6988 | +#define set_iopl_mask xen_set_iopl_mask | ||
6989 | + | ||
6990 | +/* | ||
6991 | + * Set IOPL bits in EFLAGS from given mask | ||
6992 | + */ | ||
6993 | +static inline void xen_set_iopl_mask(unsigned mask) | ||
6994 | +{ | ||
6995 | + struct physdev_set_iopl set_iopl; | ||
6996 | + | ||
6997 | + /* Force the change at ring 0. */ | ||
6998 | + set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; | ||
6999 | + WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl)); | ||
7000 | +} | ||
7001 | + | ||
7002 | + | ||
7003 | +/* | ||
7004 | + * Generic CPUID function | ||
7005 | + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx | ||
7006 | + * resulting in stale register contents being returned. | ||
7007 | + */ | ||
7008 | +static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) | ||
7009 | +{ | ||
7010 | + *eax = op; | ||
7011 | + *ecx = 0; | ||
7012 | + __cpuid(eax, ebx, ecx, edx); | ||
7013 | +} | ||
7014 | + | ||
7015 | +/* Some CPUID calls want 'count' to be placed in ecx */ | ||
7016 | +static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, | ||
7017 | + int *edx) | ||
7018 | +{ | ||
7019 | + *eax = op; | ||
7020 | + *ecx = count; | ||
7021 | + __cpuid(eax, ebx, ecx, edx); | ||
7022 | +} | ||
7023 | + | ||
7024 | +/* | ||
7025 | + * CPUID functions returning a single datum | ||
7026 | + */ | ||
7027 | +static inline unsigned int cpuid_eax(unsigned int op) | ||
7028 | +{ | ||
7029 | + unsigned int eax, ebx, ecx, edx; | ||
7030 | + | ||
7031 | + cpuid(op, &eax, &ebx, &ecx, &edx); | ||
7032 | + return eax; | ||
7033 | +} | ||
7034 | +static inline unsigned int cpuid_ebx(unsigned int op) | ||
7035 | +{ | ||
7036 | + unsigned int eax, ebx, ecx, edx; | ||
7037 | + | ||
7038 | + cpuid(op, &eax, &ebx, &ecx, &edx); | ||
7039 | + return ebx; | ||
7040 | +} | ||
7041 | +static inline unsigned int cpuid_ecx(unsigned int op) | ||
7042 | +{ | ||
7043 | + unsigned int eax, ebx, ecx, edx; | ||
7044 | + | ||
7045 | + cpuid(op, &eax, &ebx, &ecx, &edx); | ||
7046 | + return ecx; | ||
7047 | +} | ||
7048 | +static inline unsigned int cpuid_edx(unsigned int op) | ||
7049 | +{ | ||
7050 | + unsigned int eax, ebx, ecx, edx; | ||
7051 | + | ||
7052 | + cpuid(op, &eax, &ebx, &ecx, &edx); | ||
7053 | + return edx; | ||
7054 | +} | ||
7055 | + | ||
7056 | /* generic versions from gas */ | ||
7057 | #define GENERIC_NOP1 ".byte 0x90\n" | ||
7058 | #define GENERIC_NOP2 ".byte 0x89,0xf6\n" | ||
7059 | @@ -736,4 +749,8 @@ | ||
7060 | extern void enable_sep_cpu(void); | ||
7061 | extern int sysenter_setup(void); | ||
7062 | |||
7063 | +extern int init_gdt(int cpu, struct task_struct *idle); | ||
7064 | +extern void cpu_set_gdt(int); | ||
7065 | +extern void secondary_cpu_init(void); | ||
7066 | + | ||
7067 | #endif /* __ASM_I386_PROCESSOR_H */ | ||
7068 | --- a/include/asm-x86/mach-xen/asm/processor_64.h | ||
7069 | +++ b/include/asm-x86/mach-xen/asm/processor_64.h | ||
7070 | @@ -484,6 +484,14 @@ | ||
7071 | : :"a" (eax), "c" (ecx)); | ||
7072 | } | ||
7073 | |||
7074 | +static inline void __sti_mwait(unsigned long eax, unsigned long ecx) | ||
7075 | +{ | ||
7076 | + /* "mwait %eax,%ecx;" */ | ||
7077 | + asm volatile( | ||
7078 | + "sti; .byte 0x0f,0x01,0xc9;" | ||
7079 | + : :"a" (eax), "c" (ecx)); | ||
7080 | +} | ||
7081 | + | ||
7082 | extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); | ||
7083 | |||
7084 | #define stack_current() \ | ||
7085 | --- a/include/asm-x86/mach-xen/asm/segment_32.h | ||
7086 | +++ b/include/asm-x86/mach-xen/asm/segment_32.h | ||
7087 | @@ -39,7 +39,7 @@ | ||
7088 | * 25 - APM BIOS support | ||
7089 | * | ||
7090 | * 26 - ESPFIX small SS | ||
7091 | - * 27 - unused | ||
7092 | + * 27 - PDA [ per-cpu private data area ] | ||
7093 | * 28 - unused | ||
7094 | * 29 - unused | ||
7095 | * 30 - unused | ||
7096 | @@ -74,6 +74,9 @@ | ||
7097 | #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) | ||
7098 | #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) | ||
7099 | |||
7100 | +#define GDT_ENTRY_PDA (GDT_ENTRY_KERNEL_BASE + 15) | ||
7101 | +#define __KERNEL_PDA (GDT_ENTRY_PDA * 8) | ||
7102 | + | ||
7103 | #define GDT_ENTRY_DOUBLEFAULT_TSS 31 | ||
7104 | |||
7105 | /* | ||
7106 | --- a/include/asm-x86/mach-xen/asm/smp_32.h | ||
7107 | +++ b/include/asm-x86/mach-xen/asm/smp_32.h | ||
7108 | @@ -8,6 +8,7 @@ | ||
7109 | #include <linux/kernel.h> | ||
7110 | #include <linux/threads.h> | ||
7111 | #include <linux/cpumask.h> | ||
7112 | +#include <asm/pda.h> | ||
7113 | #endif | ||
7114 | |||
7115 | #ifdef CONFIG_X86_LOCAL_APIC | ||
7116 | @@ -56,7 +57,7 @@ | ||
7117 | * from the initial startup. We map APIC_BASE very early in page_setup(), | ||
7118 | * so this is correct in the x86 case. | ||
7119 | */ | ||
7120 | -#define raw_smp_processor_id() (current_thread_info()->cpu) | ||
7121 | +#define raw_smp_processor_id() (read_pda(cpu_number)) | ||
7122 | |||
7123 | extern cpumask_t cpu_possible_map; | ||
7124 | #define cpu_callin_map cpu_possible_map | ||
7125 | --- a/include/asm-x86/mach-xen/asm/smp_64.h | ||
7126 | +++ b/include/asm-x86/mach-xen/asm/smp_64.h | ||
7127 | @@ -88,11 +88,6 @@ | ||
7128 | extern u8 bios_cpu_apicid[]; | ||
7129 | |||
7130 | #ifdef CONFIG_X86_LOCAL_APIC | ||
7131 | -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | ||
7132 | -{ | ||
7133 | - return cpus_addr(cpumask)[0]; | ||
7134 | -} | ||
7135 | - | ||
7136 | static inline int cpu_present_to_apicid(int mps_cpu) | ||
7137 | { | ||
7138 | if (mps_cpu < NR_CPUS) | ||
7139 | @@ -127,13 +122,6 @@ | ||
7140 | #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] | ||
7141 | #else | ||
7142 | #define cpu_physical_id(cpu) boot_cpu_id | ||
7143 | -static inline int smp_call_function_single(int cpuid, void (*func) (void *info), | ||
7144 | - void *info, int retry, int wait) | ||
7145 | -{ | ||
7146 | - /* Disable interrupts here? */ | ||
7147 | - func(info); | ||
7148 | - return 0; | ||
7149 | -} | ||
7150 | #endif /* !CONFIG_SMP */ | ||
7151 | #endif | ||
7152 | |||
7153 | --- a/include/asm-x86/mach-xen/asm/system_32.h | ||
7154 | +++ b/include/asm-x86/mach-xen/asm/system_32.h | ||
7155 | @@ -139,17 +139,17 @@ | ||
7156 | #define write_cr4(x) \ | ||
7157 | __asm__ __volatile__("movl %0,%%cr4": :"r" (x)) | ||
7158 | |||
7159 | -/* | ||
7160 | - * Clear and set 'TS' bit respectively | ||
7161 | - */ | ||
7162 | +#define wbinvd() \ | ||
7163 | + __asm__ __volatile__ ("wbinvd": : :"memory") | ||
7164 | + | ||
7165 | +/* Clear the 'TS' bit */ | ||
7166 | #define clts() (HYPERVISOR_fpu_taskswitch(0)) | ||
7167 | + | ||
7168 | +/* Set the 'TS' bit */ | ||
7169 | #define stts() (HYPERVISOR_fpu_taskswitch(1)) | ||
7170 | |||
7171 | #endif /* __KERNEL__ */ | ||
7172 | |||
7173 | -#define wbinvd() \ | ||
7174 | - __asm__ __volatile__ ("wbinvd": : :"memory") | ||
7175 | - | ||
7176 | static inline unsigned long get_limit(unsigned long segment) | ||
7177 | { | ||
7178 | unsigned long __limit; | ||
7179 | --- a/kernel/kexec.c | ||
7180 | +++ b/kernel/kexec.c | ||
7181 | @@ -353,7 +353,7 @@ | ||
7182 | if (limit == ~0UL) | ||
7183 | address_bits = BITS_PER_LONG; | ||
7184 | else | ||
7185 | - address_bits = long_log2(limit); | ||
7186 | + address_bits = ilog2(limit); | ||
7187 | |||
7188 | if (xen_limit_pages_to_max_mfn(pages, order, address_bits) < 0) { | ||
7189 | __free_pages(pages, order); | ||
7190 | --- a/net/core/dev.c | ||
7191 | +++ b/net/core/dev.c | ||
7192 | @@ -1597,10 +1597,10 @@ | ||
7193 | goto out; | ||
7194 | switch (skb->nh.iph->protocol) { | ||
7195 | case IPPROTO_TCP: | ||
7196 | - skb->csum = offsetof(struct tcphdr, check); | ||
7197 | + skb->csum_offset = offsetof(struct tcphdr, check); | ||
7198 | break; | ||
7199 | case IPPROTO_UDP: | ||
7200 | - skb->csum = offsetof(struct udphdr, check); | ||
7201 | + skb->csum_offset = offsetof(struct udphdr, check); | ||
7202 | break; | ||
7203 | default: | ||
7204 | if (net_ratelimit()) | ||
7205 | @@ -1609,7 +1609,7 @@ | ||
7206 | " %d packet", skb->nh.iph->protocol); | ||
7207 | goto out; | ||
7208 | } | ||
7209 | - if ((skb->h.raw + skb->csum + 2) > skb->tail) | ||
7210 | + if ((skb->h.raw + skb->csum_offset + 2) > skb->tail) | ||
7211 | goto out; | ||
7212 | skb->ip_summed = CHECKSUM_PARTIAL; | ||
7213 | skb->proto_csum_blank = 0; |