Annotation of /trunk/kernel-magellan/patches-3.2/0107-3.2.8-all-fixes.patch
Parent Directory | Revision Log
Revision 1663 -
(hide annotations)
(download)
Fri Mar 2 10:03:33 2012 UTC (12 years, 6 months ago) by niro
File size: 19328 byte(s)
Fri Mar 2 10:03:33 2012 UTC (12 years, 6 months ago) by niro
File size: 19328 byte(s)
-added more upstream patches
1 | niro | 1663 | diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h |
2 | index c9e09ea..a850b4d 100644 | ||
3 | --- a/arch/x86/include/asm/i387.h | ||
4 | +++ b/arch/x86/include/asm/i387.h | ||
5 | @@ -29,8 +29,8 @@ extern unsigned int sig_xstate_size; | ||
6 | extern void fpu_init(void); | ||
7 | extern void mxcsr_feature_mask_init(void); | ||
8 | extern int init_fpu(struct task_struct *child); | ||
9 | -extern asmlinkage void math_state_restore(void); | ||
10 | -extern void __math_state_restore(void); | ||
11 | +extern void __math_state_restore(struct task_struct *); | ||
12 | +extern void math_state_restore(void); | ||
13 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); | ||
14 | |||
15 | extern user_regset_active_fn fpregs_active, xfpregs_active; | ||
16 | @@ -212,19 +212,11 @@ static inline void fpu_fxsave(struct fpu *fpu) | ||
17 | |||
18 | #endif /* CONFIG_X86_64 */ | ||
19 | |||
20 | -/* We need a safe address that is cheap to find and that is already | ||
21 | - in L1 during context switch. The best choices are unfortunately | ||
22 | - different for UP and SMP */ | ||
23 | -#ifdef CONFIG_SMP | ||
24 | -#define safe_address (__per_cpu_offset[0]) | ||
25 | -#else | ||
26 | -#define safe_address (kstat_cpu(0).cpustat.user) | ||
27 | -#endif | ||
28 | - | ||
29 | /* | ||
30 | - * These must be called with preempt disabled | ||
31 | + * These must be called with preempt disabled. Returns | ||
32 | + * 'true' if the FPU state is still intact. | ||
33 | */ | ||
34 | -static inline void fpu_save_init(struct fpu *fpu) | ||
35 | +static inline int fpu_save_init(struct fpu *fpu) | ||
36 | { | ||
37 | if (use_xsave()) { | ||
38 | fpu_xsave(fpu); | ||
39 | @@ -233,33 +225,33 @@ static inline void fpu_save_init(struct fpu *fpu) | ||
40 | * xsave header may indicate the init state of the FP. | ||
41 | */ | ||
42 | if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) | ||
43 | - return; | ||
44 | + return 1; | ||
45 | } else if (use_fxsr()) { | ||
46 | fpu_fxsave(fpu); | ||
47 | } else { | ||
48 | asm volatile("fnsave %[fx]; fwait" | ||
49 | : [fx] "=m" (fpu->state->fsave)); | ||
50 | - return; | ||
51 | + return 0; | ||
52 | } | ||
53 | |||
54 | - if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) | ||
55 | + /* | ||
56 | + * If exceptions are pending, we need to clear them so | ||
57 | + * that we don't randomly get exceptions later. | ||
58 | + * | ||
59 | + * FIXME! Is this perhaps only true for the old-style | ||
60 | + * irq13 case? Maybe we could leave the x87 state | ||
61 | + * intact otherwise? | ||
62 | + */ | ||
63 | + if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { | ||
64 | asm volatile("fnclex"); | ||
65 | - | ||
66 | - /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | ||
67 | - is pending. Clear the x87 state here by setting it to fixed | ||
68 | - values. safe_address is a random variable that should be in L1 */ | ||
69 | - alternative_input( | ||
70 | - ASM_NOP8 ASM_NOP2, | ||
71 | - "emms\n\t" /* clear stack tags */ | ||
72 | - "fildl %P[addr]", /* set F?P to defined value */ | ||
73 | - X86_FEATURE_FXSAVE_LEAK, | ||
74 | - [addr] "m" (safe_address)); | ||
75 | + return 0; | ||
76 | + } | ||
77 | + return 1; | ||
78 | } | ||
79 | |||
80 | -static inline void __save_init_fpu(struct task_struct *tsk) | ||
81 | +static inline int __save_init_fpu(struct task_struct *tsk) | ||
82 | { | ||
83 | - fpu_save_init(&tsk->thread.fpu); | ||
84 | - task_thread_info(tsk)->status &= ~TS_USEDFPU; | ||
85 | + return fpu_save_init(&tsk->thread.fpu); | ||
86 | } | ||
87 | |||
88 | static inline int fpu_fxrstor_checking(struct fpu *fpu) | ||
89 | @@ -281,39 +273,185 @@ static inline int restore_fpu_checking(struct task_struct *tsk) | ||
90 | } | ||
91 | |||
92 | /* | ||
93 | - * Signal frame handlers... | ||
94 | + * Software FPU state helpers. Careful: these need to | ||
95 | + * be preemption protection *and* they need to be | ||
96 | + * properly paired with the CR0.TS changes! | ||
97 | */ | ||
98 | -extern int save_i387_xstate(void __user *buf); | ||
99 | -extern int restore_i387_xstate(void __user *buf); | ||
100 | +static inline int __thread_has_fpu(struct task_struct *tsk) | ||
101 | +{ | ||
102 | + return tsk->thread.has_fpu; | ||
103 | +} | ||
104 | |||
105 | -static inline void __unlazy_fpu(struct task_struct *tsk) | ||
106 | +/* Must be paired with an 'stts' after! */ | ||
107 | +static inline void __thread_clear_has_fpu(struct task_struct *tsk) | ||
108 | { | ||
109 | - if (task_thread_info(tsk)->status & TS_USEDFPU) { | ||
110 | - __save_init_fpu(tsk); | ||
111 | - stts(); | ||
112 | - } else | ||
113 | - tsk->fpu_counter = 0; | ||
114 | + tsk->thread.has_fpu = 0; | ||
115 | +} | ||
116 | + | ||
117 | +/* Must be paired with a 'clts' before! */ | ||
118 | +static inline void __thread_set_has_fpu(struct task_struct *tsk) | ||
119 | +{ | ||
120 | + tsk->thread.has_fpu = 1; | ||
121 | } | ||
122 | |||
123 | +/* | ||
124 | + * Encapsulate the CR0.TS handling together with the | ||
125 | + * software flag. | ||
126 | + * | ||
127 | + * These generally need preemption protection to work, | ||
128 | + * do try to avoid using these on their own. | ||
129 | + */ | ||
130 | +static inline void __thread_fpu_end(struct task_struct *tsk) | ||
131 | +{ | ||
132 | + __thread_clear_has_fpu(tsk); | ||
133 | + stts(); | ||
134 | +} | ||
135 | + | ||
136 | +static inline void __thread_fpu_begin(struct task_struct *tsk) | ||
137 | +{ | ||
138 | + clts(); | ||
139 | + __thread_set_has_fpu(tsk); | ||
140 | +} | ||
141 | + | ||
142 | +/* | ||
143 | + * FPU state switching for scheduling. | ||
144 | + * | ||
145 | + * This is a two-stage process: | ||
146 | + * | ||
147 | + * - switch_fpu_prepare() saves the old state and | ||
148 | + * sets the new state of the CR0.TS bit. This is | ||
149 | + * done within the context of the old process. | ||
150 | + * | ||
151 | + * - switch_fpu_finish() restores the new state as | ||
152 | + * necessary. | ||
153 | + */ | ||
154 | +typedef struct { int preload; } fpu_switch_t; | ||
155 | + | ||
156 | +/* | ||
157 | + * FIXME! We could do a totally lazy restore, but we need to | ||
158 | + * add a per-cpu "this was the task that last touched the FPU | ||
159 | + * on this CPU" variable, and the task needs to have a "I last | ||
160 | + * touched the FPU on this CPU" and check them. | ||
161 | + * | ||
162 | + * We don't do that yet, so "fpu_lazy_restore()" always returns | ||
163 | + * false, but some day.. | ||
164 | + */ | ||
165 | +#define fpu_lazy_restore(tsk) (0) | ||
166 | +#define fpu_lazy_state_intact(tsk) do { } while (0) | ||
167 | + | ||
168 | +static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new) | ||
169 | +{ | ||
170 | + fpu_switch_t fpu; | ||
171 | + | ||
172 | + fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; | ||
173 | + if (__thread_has_fpu(old)) { | ||
174 | + if (__save_init_fpu(old)) | ||
175 | + fpu_lazy_state_intact(old); | ||
176 | + __thread_clear_has_fpu(old); | ||
177 | + old->fpu_counter++; | ||
178 | + | ||
179 | + /* Don't change CR0.TS if we just switch! */ | ||
180 | + if (fpu.preload) { | ||
181 | + __thread_set_has_fpu(new); | ||
182 | + prefetch(new->thread.fpu.state); | ||
183 | + } else | ||
184 | + stts(); | ||
185 | + } else { | ||
186 | + old->fpu_counter = 0; | ||
187 | + if (fpu.preload) { | ||
188 | + if (fpu_lazy_restore(new)) | ||
189 | + fpu.preload = 0; | ||
190 | + else | ||
191 | + prefetch(new->thread.fpu.state); | ||
192 | + __thread_fpu_begin(new); | ||
193 | + } | ||
194 | + } | ||
195 | + return fpu; | ||
196 | +} | ||
197 | + | ||
198 | +/* | ||
199 | + * By the time this gets called, we've already cleared CR0.TS and | ||
200 | + * given the process the FPU if we are going to preload the FPU | ||
201 | + * state - all we need to do is to conditionally restore the register | ||
202 | + * state itself. | ||
203 | + */ | ||
204 | +static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) | ||
205 | +{ | ||
206 | + if (fpu.preload) | ||
207 | + __math_state_restore(new); | ||
208 | +} | ||
209 | + | ||
210 | +/* | ||
211 | + * Signal frame handlers... | ||
212 | + */ | ||
213 | +extern int save_i387_xstate(void __user *buf); | ||
214 | +extern int restore_i387_xstate(void __user *buf); | ||
215 | + | ||
216 | static inline void __clear_fpu(struct task_struct *tsk) | ||
217 | { | ||
218 | - if (task_thread_info(tsk)->status & TS_USEDFPU) { | ||
219 | + if (__thread_has_fpu(tsk)) { | ||
220 | /* Ignore delayed exceptions from user space */ | ||
221 | asm volatile("1: fwait\n" | ||
222 | "2:\n" | ||
223 | _ASM_EXTABLE(1b, 2b)); | ||
224 | - task_thread_info(tsk)->status &= ~TS_USEDFPU; | ||
225 | - stts(); | ||
226 | + __thread_fpu_end(tsk); | ||
227 | } | ||
228 | } | ||
229 | |||
230 | +/* | ||
231 | + * Were we in an interrupt that interrupted kernel mode? | ||
232 | + * | ||
233 | + * We can do a kernel_fpu_begin/end() pair *ONLY* if that | ||
234 | + * pair does nothing at all: the thread must not have fpu (so | ||
235 | + * that we don't try to save the FPU state), and TS must | ||
236 | + * be set (so that the clts/stts pair does nothing that is | ||
237 | + * visible in the interrupted kernel thread). | ||
238 | + */ | ||
239 | +static inline bool interrupted_kernel_fpu_idle(void) | ||
240 | +{ | ||
241 | + return !__thread_has_fpu(current) && | ||
242 | + (read_cr0() & X86_CR0_TS); | ||
243 | +} | ||
244 | + | ||
245 | +/* | ||
246 | + * Were we in user mode (or vm86 mode) when we were | ||
247 | + * interrupted? | ||
248 | + * | ||
249 | + * Doing kernel_fpu_begin/end() is ok if we are running | ||
250 | + * in an interrupt context from user mode - we'll just | ||
251 | + * save the FPU state as required. | ||
252 | + */ | ||
253 | +static inline bool interrupted_user_mode(void) | ||
254 | +{ | ||
255 | + struct pt_regs *regs = get_irq_regs(); | ||
256 | + return regs && user_mode_vm(regs); | ||
257 | +} | ||
258 | + | ||
259 | +/* | ||
260 | + * Can we use the FPU in kernel mode with the | ||
261 | + * whole "kernel_fpu_begin/end()" sequence? | ||
262 | + * | ||
263 | + * It's always ok in process context (ie "not interrupt") | ||
264 | + * but it is sometimes ok even from an irq. | ||
265 | + */ | ||
266 | +static inline bool irq_fpu_usable(void) | ||
267 | +{ | ||
268 | + return !in_interrupt() || | ||
269 | + interrupted_user_mode() || | ||
270 | + interrupted_kernel_fpu_idle(); | ||
271 | +} | ||
272 | + | ||
273 | static inline void kernel_fpu_begin(void) | ||
274 | { | ||
275 | - struct thread_info *me = current_thread_info(); | ||
276 | + struct task_struct *me = current; | ||
277 | + | ||
278 | + WARN_ON_ONCE(!irq_fpu_usable()); | ||
279 | preempt_disable(); | ||
280 | - if (me->status & TS_USEDFPU) | ||
281 | - __save_init_fpu(me->task); | ||
282 | - else | ||
283 | + if (__thread_has_fpu(me)) { | ||
284 | + __save_init_fpu(me); | ||
285 | + __thread_clear_has_fpu(me); | ||
286 | + /* We do 'stts()' in kernel_fpu_end() */ | ||
287 | + } else | ||
288 | clts(); | ||
289 | } | ||
290 | |||
291 | @@ -323,14 +461,6 @@ static inline void kernel_fpu_end(void) | ||
292 | preempt_enable(); | ||
293 | } | ||
294 | |||
295 | -static inline bool irq_fpu_usable(void) | ||
296 | -{ | ||
297 | - struct pt_regs *regs; | ||
298 | - | ||
299 | - return !in_interrupt() || !(regs = get_irq_regs()) || \ | ||
300 | - user_mode(regs) || (read_cr0() & X86_CR0_TS); | ||
301 | -} | ||
302 | - | ||
303 | /* | ||
304 | * Some instructions like VIA's padlock instructions generate a spurious | ||
305 | * DNA fault but don't modify SSE registers. And these instructions | ||
306 | @@ -363,20 +493,64 @@ static inline void irq_ts_restore(int TS_state) | ||
307 | } | ||
308 | |||
309 | /* | ||
310 | + * The question "does this thread have fpu access?" | ||
311 | + * is slightly racy, since preemption could come in | ||
312 | + * and revoke it immediately after the test. | ||
313 | + * | ||
314 | + * However, even in that very unlikely scenario, | ||
315 | + * we can just assume we have FPU access - typically | ||
316 | + * to save the FP state - we'll just take a #NM | ||
317 | + * fault and get the FPU access back. | ||
318 | + * | ||
319 | + * The actual user_fpu_begin/end() functions | ||
320 | + * need to be preemption-safe, though. | ||
321 | + * | ||
322 | + * NOTE! user_fpu_end() must be used only after you | ||
323 | + * have saved the FP state, and user_fpu_begin() must | ||
324 | + * be used only immediately before restoring it. | ||
325 | + * These functions do not do any save/restore on | ||
326 | + * their own. | ||
327 | + */ | ||
328 | +static inline int user_has_fpu(void) | ||
329 | +{ | ||
330 | + return __thread_has_fpu(current); | ||
331 | +} | ||
332 | + | ||
333 | +static inline void user_fpu_end(void) | ||
334 | +{ | ||
335 | + preempt_disable(); | ||
336 | + __thread_fpu_end(current); | ||
337 | + preempt_enable(); | ||
338 | +} | ||
339 | + | ||
340 | +static inline void user_fpu_begin(void) | ||
341 | +{ | ||
342 | + preempt_disable(); | ||
343 | + if (!user_has_fpu()) | ||
344 | + __thread_fpu_begin(current); | ||
345 | + preempt_enable(); | ||
346 | +} | ||
347 | + | ||
348 | +/* | ||
349 | * These disable preemption on their own and are safe | ||
350 | */ | ||
351 | static inline void save_init_fpu(struct task_struct *tsk) | ||
352 | { | ||
353 | + WARN_ON_ONCE(!__thread_has_fpu(tsk)); | ||
354 | preempt_disable(); | ||
355 | __save_init_fpu(tsk); | ||
356 | - stts(); | ||
357 | + __thread_fpu_end(tsk); | ||
358 | preempt_enable(); | ||
359 | } | ||
360 | |||
361 | static inline void unlazy_fpu(struct task_struct *tsk) | ||
362 | { | ||
363 | preempt_disable(); | ||
364 | - __unlazy_fpu(tsk); | ||
365 | + if (__thread_has_fpu(tsk)) { | ||
366 | + __save_init_fpu(tsk); | ||
367 | + __thread_fpu_end(tsk); | ||
368 | + } else | ||
369 | + tsk->fpu_counter = 0; | ||
370 | preempt_enable(); | ||
371 | } | ||
372 | |||
373 | diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h | ||
374 | index b650435..bb3ee36 100644 | ||
375 | --- a/arch/x86/include/asm/processor.h | ||
376 | +++ b/arch/x86/include/asm/processor.h | ||
377 | @@ -456,6 +456,7 @@ struct thread_struct { | ||
378 | unsigned long trap_no; | ||
379 | unsigned long error_code; | ||
380 | /* floating point and extended processor state */ | ||
381 | + unsigned long has_fpu; | ||
382 | struct fpu fpu; | ||
383 | #ifdef CONFIG_X86_32 | ||
384 | /* Virtual 86 mode info */ | ||
385 | diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h | ||
386 | index a1fe5c1..d7ef849 100644 | ||
387 | --- a/arch/x86/include/asm/thread_info.h | ||
388 | +++ b/arch/x86/include/asm/thread_info.h | ||
389 | @@ -242,8 +242,6 @@ static inline struct thread_info *current_thread_info(void) | ||
390 | * ever touches our thread-synchronous status, so we don't | ||
391 | * have to worry about atomic accesses. | ||
392 | */ | ||
393 | -#define TS_USEDFPU 0x0001 /* FPU was used by this task | ||
394 | - this quantum (SMP) */ | ||
395 | #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ | ||
396 | #define TS_POLLING 0x0004 /* idle task polling need_resched, | ||
397 | skip sending interrupt */ | ||
398 | diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c | ||
399 | index 795b79f..8598296 100644 | ||
400 | --- a/arch/x86/kernel/process_32.c | ||
401 | +++ b/arch/x86/kernel/process_32.c | ||
402 | @@ -297,22 +297,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
403 | *next = &next_p->thread; | ||
404 | int cpu = smp_processor_id(); | ||
405 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | ||
406 | - bool preload_fpu; | ||
407 | + fpu_switch_t fpu; | ||
408 | |||
409 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ | ||
410 | |||
411 | - /* | ||
412 | - * If the task has used fpu the last 5 timeslices, just do a full | ||
413 | - * restore of the math state immediately to avoid the trap; the | ||
414 | - * chances of needing FPU soon are obviously high now | ||
415 | - */ | ||
416 | - preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; | ||
417 | - | ||
418 | - __unlazy_fpu(prev_p); | ||
419 | - | ||
420 | - /* we're going to use this soon, after a few expensive things */ | ||
421 | - if (preload_fpu) | ||
422 | - prefetch(next->fpu.state); | ||
423 | + fpu = switch_fpu_prepare(prev_p, next_p); | ||
424 | |||
425 | /* | ||
426 | * Reload esp0. | ||
427 | @@ -352,11 +341,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
428 | task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) | ||
429 | __switch_to_xtra(prev_p, next_p, tss); | ||
430 | |||
431 | - /* If we're going to preload the fpu context, make sure clts | ||
432 | - is run while we're batching the cpu state updates. */ | ||
433 | - if (preload_fpu) | ||
434 | - clts(); | ||
435 | - | ||
436 | /* | ||
437 | * Leave lazy mode, flushing any hypercalls made here. | ||
438 | * This must be done before restoring TLS segments so | ||
439 | @@ -366,15 +350,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
440 | */ | ||
441 | arch_end_context_switch(next_p); | ||
442 | |||
443 | - if (preload_fpu) | ||
444 | - __math_state_restore(); | ||
445 | - | ||
446 | /* | ||
447 | * Restore %gs if needed (which is common) | ||
448 | */ | ||
449 | if (prev->gs | next->gs) | ||
450 | lazy_load_gs(next->gs); | ||
451 | |||
452 | + switch_fpu_finish(next_p, fpu); | ||
453 | + | ||
454 | percpu_write(current_task, next_p); | ||
455 | |||
456 | return prev_p; | ||
457 | diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c | ||
458 | index 3bd7e6e..6a364a6 100644 | ||
459 | --- a/arch/x86/kernel/process_64.c | ||
460 | +++ b/arch/x86/kernel/process_64.c | ||
461 | @@ -381,18 +381,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
462 | int cpu = smp_processor_id(); | ||
463 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | ||
464 | unsigned fsindex, gsindex; | ||
465 | - bool preload_fpu; | ||
466 | + fpu_switch_t fpu; | ||
467 | |||
468 | - /* | ||
469 | - * If the task has used fpu the last 5 timeslices, just do a full | ||
470 | - * restore of the math state immediately to avoid the trap; the | ||
471 | - * chances of needing FPU soon are obviously high now | ||
472 | - */ | ||
473 | - preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; | ||
474 | - | ||
475 | - /* we're going to use this soon, after a few expensive things */ | ||
476 | - if (preload_fpu) | ||
477 | - prefetch(next->fpu.state); | ||
478 | + fpu = switch_fpu_prepare(prev_p, next_p); | ||
479 | |||
480 | /* | ||
481 | * Reload esp0, LDT and the page table pointer: | ||
482 | @@ -422,13 +413,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
483 | |||
484 | load_TLS(next, cpu); | ||
485 | |||
486 | - /* Must be after DS reload */ | ||
487 | - __unlazy_fpu(prev_p); | ||
488 | - | ||
489 | - /* Make sure cpu is ready for new context */ | ||
490 | - if (preload_fpu) | ||
491 | - clts(); | ||
492 | - | ||
493 | /* | ||
494 | * Leave lazy mode, flushing any hypercalls made here. | ||
495 | * This must be done before restoring TLS segments so | ||
496 | @@ -469,6 +453,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
497 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); | ||
498 | prev->gsindex = gsindex; | ||
499 | |||
500 | + switch_fpu_finish(next_p, fpu); | ||
501 | + | ||
502 | /* | ||
503 | * Switch the PDA and FPU contexts. | ||
504 | */ | ||
505 | @@ -487,13 +473,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
506 | task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) | ||
507 | __switch_to_xtra(prev_p, next_p, tss); | ||
508 | |||
509 | - /* | ||
510 | - * Preload the FPU context, now that we've determined that the | ||
511 | - * task is likely to be using it. | ||
512 | - */ | ||
513 | - if (preload_fpu) | ||
514 | - __math_state_restore(); | ||
515 | - | ||
516 | return prev_p; | ||
517 | } | ||
518 | |||
519 | diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c | ||
520 | index a8e3eb8..31d9d0f 100644 | ||
521 | --- a/arch/x86/kernel/traps.c | ||
522 | +++ b/arch/x86/kernel/traps.c | ||
523 | @@ -562,25 +562,34 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) | ||
524 | } | ||
525 | |||
526 | /* | ||
527 | - * __math_state_restore assumes that cr0.TS is already clear and the | ||
528 | - * fpu state is all ready for use. Used during context switch. | ||
529 | + * This gets called with the process already owning the | ||
530 | + * FPU state, and with CR0.TS cleared. It just needs to | ||
531 | + * restore the FPU register state. | ||
532 | */ | ||
533 | -void __math_state_restore(void) | ||
534 | +void __math_state_restore(struct task_struct *tsk) | ||
535 | { | ||
536 | - struct thread_info *thread = current_thread_info(); | ||
537 | - struct task_struct *tsk = thread->task; | ||
538 | + /* We need a safe address that is cheap to find and that is already | ||
539 | + in L1. We've just brought in "tsk->thread.has_fpu", so use that */ | ||
540 | +#define safe_address (tsk->thread.has_fpu) | ||
541 | + | ||
542 | + /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | ||
543 | + is pending. Clear the x87 state here by setting it to fixed | ||
544 | + values. safe_address is a random variable that should be in L1 */ | ||
545 | + alternative_input( | ||
546 | + ASM_NOP8 ASM_NOP2, | ||
547 | + "emms\n\t" /* clear stack tags */ | ||
548 | + "fildl %P[addr]", /* set F?P to defined value */ | ||
549 | + X86_FEATURE_FXSAVE_LEAK, | ||
550 | + [addr] "m" (safe_address)); | ||
551 | |||
552 | /* | ||
553 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
554 | */ | ||
555 | if (unlikely(restore_fpu_checking(tsk))) { | ||
556 | - stts(); | ||
557 | + __thread_fpu_end(tsk); | ||
558 | force_sig(SIGSEGV, tsk); | ||
559 | return; | ||
560 | } | ||
561 | - | ||
562 | - thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | ||
563 | - tsk->fpu_counter++; | ||
564 | } | ||
565 | |||
566 | /* | ||
567 | @@ -590,13 +599,12 @@ void __math_state_restore(void) | ||
568 | * Careful.. There are problems with IBM-designed IRQ13 behaviour. | ||
569 | * Don't touch unless you *really* know how it works. | ||
570 | * | ||
571 | - * Must be called with kernel preemption disabled (in this case, | ||
572 | - * local interrupts are disabled at the call-site in entry.S). | ||
573 | + * Must be called with kernel preemption disabled (eg with local | ||
574 | + * local interrupts as in the case of do_device_not_available). | ||
575 | */ | ||
576 | -asmlinkage void math_state_restore(void) | ||
577 | +void math_state_restore(void) | ||
578 | { | ||
579 | - struct thread_info *thread = current_thread_info(); | ||
580 | - struct task_struct *tsk = thread->task; | ||
581 | + struct task_struct *tsk = current; | ||
582 | |||
583 | if (!tsk_used_math(tsk)) { | ||
584 | local_irq_enable(); | ||
585 | @@ -613,9 +621,10 @@ asmlinkage void math_state_restore(void) | ||
586 | local_irq_disable(); | ||
587 | } | ||
588 | |||
589 | - clts(); /* Allow maths ops (or we recurse) */ | ||
590 | + __thread_fpu_begin(tsk); | ||
591 | + __math_state_restore(tsk); | ||
592 | |||
593 | - __math_state_restore(); | ||
594 | + tsk->fpu_counter++; | ||
595 | } | ||
596 | EXPORT_SYMBOL_GPL(math_state_restore); | ||
597 | |||
598 | diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c | ||
599 | index a391134..7110911 100644 | ||
600 | --- a/arch/x86/kernel/xsave.c | ||
601 | +++ b/arch/x86/kernel/xsave.c | ||
602 | @@ -47,7 +47,7 @@ void __sanitize_i387_state(struct task_struct *tsk) | ||
603 | if (!fx) | ||
604 | return; | ||
605 | |||
606 | - BUG_ON(task_thread_info(tsk)->status & TS_USEDFPU); | ||
607 | + BUG_ON(__thread_has_fpu(tsk)); | ||
608 | |||
609 | xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; | ||
610 | |||
611 | @@ -168,7 +168,7 @@ int save_i387_xstate(void __user *buf) | ||
612 | if (!used_math()) | ||
613 | return 0; | ||
614 | |||
615 | - if (task_thread_info(tsk)->status & TS_USEDFPU) { | ||
616 | + if (user_has_fpu()) { | ||
617 | if (use_xsave()) | ||
618 | err = xsave_user(buf); | ||
619 | else | ||
620 | @@ -176,8 +176,7 @@ int save_i387_xstate(void __user *buf) | ||
621 | |||
622 | if (err) | ||
623 | return err; | ||
624 | - task_thread_info(tsk)->status &= ~TS_USEDFPU; | ||
625 | - stts(); | ||
626 | + user_fpu_end(); | ||
627 | } else { | ||
628 | sanitize_i387_state(tsk); | ||
629 | if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, | ||
630 | @@ -292,10 +291,7 @@ int restore_i387_xstate(void __user *buf) | ||
631 | return err; | ||
632 | } | ||
633 | |||
634 | - if (!(task_thread_info(current)->status & TS_USEDFPU)) { | ||
635 | - clts(); | ||
636 | - task_thread_info(current)->status |= TS_USEDFPU; | ||
637 | - } | ||
638 | + user_fpu_begin(); | ||
639 | if (use_xsave()) | ||
640 | err = restore_user_xstate(buf); | ||
641 | else | ||
642 | diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c | ||
643 | index 579a0b5..4ea7678 100644 | ||
644 | --- a/arch/x86/kvm/vmx.c | ||
645 | +++ b/arch/x86/kvm/vmx.c | ||
646 | @@ -1456,7 +1456,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | ||
647 | #ifdef CONFIG_X86_64 | ||
648 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | ||
649 | #endif | ||
650 | - if (current_thread_info()->status & TS_USEDFPU) | ||
651 | + if (__thread_has_fpu(current)) | ||
652 | clts(); | ||
653 | load_gdt(&__get_cpu_var(host_gdt)); | ||
654 | } |