Contents of /trunk/kernel-magellan/patches-4.15/0101-4.15.2-all-fixes.patch
Parent Directory | Revision Log
Revision 3085 -
(show annotations)
(download)
Wed Mar 21 14:52:15 2018 UTC (6 years, 6 months ago) by niro
File size: 122489 byte(s)
Wed Mar 21 14:52:15 2018 UTC (6 years, 6 months ago) by niro
File size: 122489 byte(s)
-linux-4.15.2
1 | diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt |
2 | index 46b26bfee27b..1e762c210f1b 100644 |
3 | --- a/Documentation/admin-guide/kernel-parameters.txt |
4 | +++ b/Documentation/admin-guide/kernel-parameters.txt |
5 | @@ -2742,8 +2742,6 @@ |
6 | norandmaps Don't use address space randomization. Equivalent to |
7 | echo 0 > /proc/sys/kernel/randomize_va_space |
8 | |
9 | - noreplace-paravirt [X86,IA-64,PV_OPS] Don't patch paravirt_ops |
10 | - |
11 | noreplace-smp [X86-32,SMP] Don't replace SMP instructions |
12 | with UP alternatives |
13 | |
14 | diff --git a/Documentation/speculation.txt b/Documentation/speculation.txt |
15 | new file mode 100644 |
16 | index 000000000000..e9e6cbae2841 |
17 | --- /dev/null |
18 | +++ b/Documentation/speculation.txt |
19 | @@ -0,0 +1,90 @@ |
20 | +This document explains potential effects of speculation, and how undesirable |
21 | +effects can be mitigated portably using common APIs. |
22 | + |
23 | +=========== |
24 | +Speculation |
25 | +=========== |
26 | + |
27 | +To improve performance and minimize average latencies, many contemporary CPUs |
28 | +employ speculative execution techniques such as branch prediction, performing |
29 | +work which may be discarded at a later stage. |
30 | + |
31 | +Typically speculative execution cannot be observed from architectural state, |
32 | +such as the contents of registers. However, in some cases it is possible to |
33 | +observe its impact on microarchitectural state, such as the presence or |
34 | +absence of data in caches. Such state may form side-channels which can be |
35 | +observed to extract secret information. |
36 | + |
37 | +For example, in the presence of branch prediction, it is possible for bounds |
38 | +checks to be ignored by code which is speculatively executed. Consider the |
39 | +following code: |
40 | + |
41 | + int load_array(int *array, unsigned int index) |
42 | + { |
43 | + if (index >= MAX_ARRAY_ELEMS) |
44 | + return 0; |
45 | + else |
46 | + return array[index]; |
47 | + } |
48 | + |
49 | +Which, on arm64, may be compiled to an assembly sequence such as: |
50 | + |
51 | + CMP <index>, #MAX_ARRAY_ELEMS |
52 | + B.LT less |
53 | + MOV <returnval>, #0 |
54 | + RET |
55 | + less: |
56 | + LDR <returnval>, [<array>, <index>] |
57 | + RET |
58 | + |
59 | +It is possible that a CPU mis-predicts the conditional branch, and |
60 | +speculatively loads array[index], even if index >= MAX_ARRAY_ELEMS. This |
61 | +value will subsequently be discarded, but the speculated load may affect |
62 | +microarchitectural state which can be subsequently measured. |
63 | + |
64 | +More complex sequences involving multiple dependent memory accesses may |
65 | +result in sensitive information being leaked. Consider the following |
66 | +code, building on the prior example: |
67 | + |
68 | + int load_dependent_arrays(int *arr1, int *arr2, int index) |
69 | + { |
70 | + int val1, val2, |
71 | + |
72 | + val1 = load_array(arr1, index); |
73 | + val2 = load_array(arr2, val1); |
74 | + |
75 | + return val2; |
76 | + } |
77 | + |
78 | +Under speculation, the first call to load_array() may return the value |
79 | +of an out-of-bounds address, while the second call will influence |
80 | +microarchitectural state dependent on this value. This may provide an |
81 | +arbitrary read primitive. |
82 | + |
83 | +==================================== |
84 | +Mitigating speculation side-channels |
85 | +==================================== |
86 | + |
87 | +The kernel provides a generic API to ensure that bounds checks are |
88 | +respected even under speculation. Architectures which are affected by |
89 | +speculation-based side-channels are expected to implement these |
90 | +primitives. |
91 | + |
92 | +The array_index_nospec() helper in <linux/nospec.h> can be used to |
93 | +prevent information from being leaked via side-channels. |
94 | + |
95 | +A call to array_index_nospec(index, size) returns a sanitized index |
96 | +value that is bounded to [0, size) even under cpu speculation |
97 | +conditions. |
98 | + |
99 | +This can be used to protect the earlier load_array() example: |
100 | + |
101 | + int load_array(int *array, unsigned int index) |
102 | + { |
103 | + if (index >= MAX_ARRAY_ELEMS) |
104 | + return 0; |
105 | + else { |
106 | + index = array_index_nospec(index, MAX_ARRAY_ELEMS); |
107 | + return array[index]; |
108 | + } |
109 | + } |
110 | diff --git a/Makefile b/Makefile |
111 | index af101b556ba0..54f1bc10b531 100644 |
112 | --- a/Makefile |
113 | +++ b/Makefile |
114 | @@ -1,7 +1,7 @@ |
115 | # SPDX-License-Identifier: GPL-2.0 |
116 | VERSION = 4 |
117 | PATCHLEVEL = 15 |
118 | -SUBLEVEL = 1 |
119 | +SUBLEVEL = 2 |
120 | EXTRAVERSION = |
121 | NAME = Fearless Coyote |
122 | |
123 | diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c |
124 | index d7d3cc24baf4..21dbdf0e476b 100644 |
125 | --- a/arch/x86/entry/common.c |
126 | +++ b/arch/x86/entry/common.c |
127 | @@ -21,6 +21,7 @@ |
128 | #include <linux/export.h> |
129 | #include <linux/context_tracking.h> |
130 | #include <linux/user-return-notifier.h> |
131 | +#include <linux/nospec.h> |
132 | #include <linux/uprobes.h> |
133 | #include <linux/livepatch.h> |
134 | #include <linux/syscalls.h> |
135 | @@ -206,7 +207,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) |
136 | * special case only applies after poking regs and before the |
137 | * very next return to user mode. |
138 | */ |
139 | - current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED); |
140 | + ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); |
141 | #endif |
142 | |
143 | user_enter_irqoff(); |
144 | @@ -282,7 +283,8 @@ __visible void do_syscall_64(struct pt_regs *regs) |
145 | * regs->orig_ax, which changes the behavior of some syscalls. |
146 | */ |
147 | if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) { |
148 | - regs->ax = sys_call_table[nr & __SYSCALL_MASK]( |
149 | + nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls); |
150 | + regs->ax = sys_call_table[nr]( |
151 | regs->di, regs->si, regs->dx, |
152 | regs->r10, regs->r8, regs->r9); |
153 | } |
154 | @@ -304,7 +306,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) |
155 | unsigned int nr = (unsigned int)regs->orig_ax; |
156 | |
157 | #ifdef CONFIG_IA32_EMULATION |
158 | - current->thread.status |= TS_COMPAT; |
159 | + ti->status |= TS_COMPAT; |
160 | #endif |
161 | |
162 | if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { |
163 | @@ -318,6 +320,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) |
164 | } |
165 | |
166 | if (likely(nr < IA32_NR_syscalls)) { |
167 | + nr = array_index_nospec(nr, IA32_NR_syscalls); |
168 | /* |
169 | * It's possible that a 32-bit syscall implementation |
170 | * takes a 64-bit parameter but nonetheless assumes that |
171 | diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S |
172 | index 60c4c342316c..2a35b1e0fb90 100644 |
173 | --- a/arch/x86/entry/entry_32.S |
174 | +++ b/arch/x86/entry/entry_32.S |
175 | @@ -252,7 +252,8 @@ ENTRY(__switch_to_asm) |
176 | * exist, overwrite the RSB with entries which capture |
177 | * speculative execution to prevent attack. |
178 | */ |
179 | - FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW |
180 | + /* Clobbers %ebx */ |
181 | + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW |
182 | #endif |
183 | |
184 | /* restore callee-saved registers */ |
185 | diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S |
186 | index ff6f8022612c..c752abe89d80 100644 |
187 | --- a/arch/x86/entry/entry_64.S |
188 | +++ b/arch/x86/entry/entry_64.S |
189 | @@ -236,91 +236,20 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) |
190 | pushq %r9 /* pt_regs->r9 */ |
191 | pushq %r10 /* pt_regs->r10 */ |
192 | pushq %r11 /* pt_regs->r11 */ |
193 | - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ |
194 | - UNWIND_HINT_REGS extra=0 |
195 | - |
196 | - TRACE_IRQS_OFF |
197 | - |
198 | - /* |
199 | - * If we need to do entry work or if we guess we'll need to do |
200 | - * exit work, go straight to the slow path. |
201 | - */ |
202 | - movq PER_CPU_VAR(current_task), %r11 |
203 | - testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) |
204 | - jnz entry_SYSCALL64_slow_path |
205 | - |
206 | -entry_SYSCALL_64_fastpath: |
207 | - /* |
208 | - * Easy case: enable interrupts and issue the syscall. If the syscall |
209 | - * needs pt_regs, we'll call a stub that disables interrupts again |
210 | - * and jumps to the slow path. |
211 | - */ |
212 | - TRACE_IRQS_ON |
213 | - ENABLE_INTERRUPTS(CLBR_NONE) |
214 | -#if __SYSCALL_MASK == ~0 |
215 | - cmpq $__NR_syscall_max, %rax |
216 | -#else |
217 | - andl $__SYSCALL_MASK, %eax |
218 | - cmpl $__NR_syscall_max, %eax |
219 | -#endif |
220 | - ja 1f /* return -ENOSYS (already in pt_regs->ax) */ |
221 | - movq %r10, %rcx |
222 | - |
223 | - /* |
224 | - * This call instruction is handled specially in stub_ptregs_64. |
225 | - * It might end up jumping to the slow path. If it jumps, RAX |
226 | - * and all argument registers are clobbered. |
227 | - */ |
228 | -#ifdef CONFIG_RETPOLINE |
229 | - movq sys_call_table(, %rax, 8), %rax |
230 | - call __x86_indirect_thunk_rax |
231 | -#else |
232 | - call *sys_call_table(, %rax, 8) |
233 | -#endif |
234 | -.Lentry_SYSCALL_64_after_fastpath_call: |
235 | - |
236 | - movq %rax, RAX(%rsp) |
237 | -1: |
238 | + pushq %rbx /* pt_regs->rbx */ |
239 | + pushq %rbp /* pt_regs->rbp */ |
240 | + pushq %r12 /* pt_regs->r12 */ |
241 | + pushq %r13 /* pt_regs->r13 */ |
242 | + pushq %r14 /* pt_regs->r14 */ |
243 | + pushq %r15 /* pt_regs->r15 */ |
244 | + UNWIND_HINT_REGS |
245 | |
246 | - /* |
247 | - * If we get here, then we know that pt_regs is clean for SYSRET64. |
248 | - * If we see that no exit work is required (which we are required |
249 | - * to check with IRQs off), then we can go straight to SYSRET64. |
250 | - */ |
251 | - DISABLE_INTERRUPTS(CLBR_ANY) |
252 | TRACE_IRQS_OFF |
253 | - movq PER_CPU_VAR(current_task), %r11 |
254 | - testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) |
255 | - jnz 1f |
256 | - |
257 | - LOCKDEP_SYS_EXIT |
258 | - TRACE_IRQS_ON /* user mode is traced as IRQs on */ |
259 | - movq RIP(%rsp), %rcx |
260 | - movq EFLAGS(%rsp), %r11 |
261 | - addq $6*8, %rsp /* skip extra regs -- they were preserved */ |
262 | - UNWIND_HINT_EMPTY |
263 | - jmp .Lpop_c_regs_except_rcx_r11_and_sysret |
264 | |
265 | -1: |
266 | - /* |
267 | - * The fast path looked good when we started, but something changed |
268 | - * along the way and we need to switch to the slow path. Calling |
269 | - * raise(3) will trigger this, for example. IRQs are off. |
270 | - */ |
271 | - TRACE_IRQS_ON |
272 | - ENABLE_INTERRUPTS(CLBR_ANY) |
273 | - SAVE_EXTRA_REGS |
274 | - movq %rsp, %rdi |
275 | - call syscall_return_slowpath /* returns with IRQs disabled */ |
276 | - jmp return_from_SYSCALL_64 |
277 | - |
278 | -entry_SYSCALL64_slow_path: |
279 | /* IRQs are off. */ |
280 | - SAVE_EXTRA_REGS |
281 | movq %rsp, %rdi |
282 | call do_syscall_64 /* returns with IRQs disabled */ |
283 | |
284 | -return_from_SYSCALL_64: |
285 | TRACE_IRQS_IRETQ /* we're about to change IF */ |
286 | |
287 | /* |
288 | @@ -393,7 +322,6 @@ syscall_return_via_sysret: |
289 | /* rcx and r11 are already restored (see code above) */ |
290 | UNWIND_HINT_EMPTY |
291 | POP_EXTRA_REGS |
292 | -.Lpop_c_regs_except_rcx_r11_and_sysret: |
293 | popq %rsi /* skip r11 */ |
294 | popq %r10 |
295 | popq %r9 |
296 | @@ -424,47 +352,6 @@ syscall_return_via_sysret: |
297 | USERGS_SYSRET64 |
298 | END(entry_SYSCALL_64) |
299 | |
300 | -ENTRY(stub_ptregs_64) |
301 | - /* |
302 | - * Syscalls marked as needing ptregs land here. |
303 | - * If we are on the fast path, we need to save the extra regs, |
304 | - * which we achieve by trying again on the slow path. If we are on |
305 | - * the slow path, the extra regs are already saved. |
306 | - * |
307 | - * RAX stores a pointer to the C function implementing the syscall. |
308 | - * IRQs are on. |
309 | - */ |
310 | - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp) |
311 | - jne 1f |
312 | - |
313 | - /* |
314 | - * Called from fast path -- disable IRQs again, pop return address |
315 | - * and jump to slow path |
316 | - */ |
317 | - DISABLE_INTERRUPTS(CLBR_ANY) |
318 | - TRACE_IRQS_OFF |
319 | - popq %rax |
320 | - UNWIND_HINT_REGS extra=0 |
321 | - jmp entry_SYSCALL64_slow_path |
322 | - |
323 | -1: |
324 | - JMP_NOSPEC %rax /* Called from C */ |
325 | -END(stub_ptregs_64) |
326 | - |
327 | -.macro ptregs_stub func |
328 | -ENTRY(ptregs_\func) |
329 | - UNWIND_HINT_FUNC |
330 | - leaq \func(%rip), %rax |
331 | - jmp stub_ptregs_64 |
332 | -END(ptregs_\func) |
333 | -.endm |
334 | - |
335 | -/* Instantiate ptregs_stub for each ptregs-using syscall */ |
336 | -#define __SYSCALL_64_QUAL_(sym) |
337 | -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym |
338 | -#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym) |
339 | -#include <asm/syscalls_64.h> |
340 | - |
341 | /* |
342 | * %rdi: prev task |
343 | * %rsi: next task |
344 | @@ -499,7 +386,8 @@ ENTRY(__switch_to_asm) |
345 | * exist, overwrite the RSB with entries which capture |
346 | * speculative execution to prevent attack. |
347 | */ |
348 | - FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW |
349 | + /* Clobbers %rbx */ |
350 | + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW |
351 | #endif |
352 | |
353 | /* restore callee-saved registers */ |
354 | diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c |
355 | index 9c09775e589d..c176d2fab1da 100644 |
356 | --- a/arch/x86/entry/syscall_64.c |
357 | +++ b/arch/x86/entry/syscall_64.c |
358 | @@ -7,14 +7,11 @@ |
359 | #include <asm/asm-offsets.h> |
360 | #include <asm/syscall.h> |
361 | |
362 | -#define __SYSCALL_64_QUAL_(sym) sym |
363 | -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym |
364 | - |
365 | -#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); |
366 | +#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); |
367 | #include <asm/syscalls_64.h> |
368 | #undef __SYSCALL_64 |
369 | |
370 | -#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym), |
371 | +#define __SYSCALL_64(nr, sym, qual) [nr] = sym, |
372 | |
373 | extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); |
374 | |
375 | diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h |
376 | index 1908214b9125..4d111616524b 100644 |
377 | --- a/arch/x86/include/asm/asm-prototypes.h |
378 | +++ b/arch/x86/include/asm/asm-prototypes.h |
379 | @@ -38,4 +38,7 @@ INDIRECT_THUNK(dx) |
380 | INDIRECT_THUNK(si) |
381 | INDIRECT_THUNK(di) |
382 | INDIRECT_THUNK(bp) |
383 | +asmlinkage void __fill_rsb(void); |
384 | +asmlinkage void __clear_rsb(void); |
385 | + |
386 | #endif /* CONFIG_RETPOLINE */ |
387 | diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h |
388 | index 7fb336210e1b..30d406146016 100644 |
389 | --- a/arch/x86/include/asm/barrier.h |
390 | +++ b/arch/x86/include/asm/barrier.h |
391 | @@ -24,6 +24,34 @@ |
392 | #define wmb() asm volatile("sfence" ::: "memory") |
393 | #endif |
394 | |
395 | +/** |
396 | + * array_index_mask_nospec() - generate a mask that is ~0UL when the |
397 | + * bounds check succeeds and 0 otherwise |
398 | + * @index: array element index |
399 | + * @size: number of elements in array |
400 | + * |
401 | + * Returns: |
402 | + * 0 - (index < size) |
403 | + */ |
404 | +static inline unsigned long array_index_mask_nospec(unsigned long index, |
405 | + unsigned long size) |
406 | +{ |
407 | + unsigned long mask; |
408 | + |
409 | + asm ("cmp %1,%2; sbb %0,%0;" |
410 | + :"=r" (mask) |
411 | + :"r"(size),"r" (index) |
412 | + :"cc"); |
413 | + return mask; |
414 | +} |
415 | + |
416 | +/* Override the default implementation from linux/nospec.h. */ |
417 | +#define array_index_mask_nospec array_index_mask_nospec |
418 | + |
419 | +/* Prevent speculative execution past this barrier. */ |
420 | +#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ |
421 | + "lfence", X86_FEATURE_LFENCE_RDTSC) |
422 | + |
423 | #ifdef CONFIG_X86_PPRO_FENCE |
424 | #define dma_rmb() rmb() |
425 | #else |
426 | diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h |
427 | index ea9a7dde62e5..70eddb3922ff 100644 |
428 | --- a/arch/x86/include/asm/cpufeature.h |
429 | +++ b/arch/x86/include/asm/cpufeature.h |
430 | @@ -29,6 +29,7 @@ enum cpuid_leafs |
431 | CPUID_8000_000A_EDX, |
432 | CPUID_7_ECX, |
433 | CPUID_8000_0007_EBX, |
434 | + CPUID_7_EDX, |
435 | }; |
436 | |
437 | #ifdef CONFIG_X86_FEATURE_NAMES |
438 | @@ -79,8 +80,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; |
439 | CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ |
440 | CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ |
441 | CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ |
442 | + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ |
443 | REQUIRED_MASK_CHECK || \ |
444 | - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) |
445 | + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) |
446 | |
447 | #define DISABLED_MASK_BIT_SET(feature_bit) \ |
448 | ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ |
449 | @@ -101,8 +103,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; |
450 | CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ |
451 | CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ |
452 | CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ |
453 | + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ |
454 | DISABLED_MASK_CHECK || \ |
455 | - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) |
456 | + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) |
457 | |
458 | #define cpu_has(c, bit) \ |
459 | (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ |
460 | diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h |
461 | index 25b9375c1484..73b5fff159a4 100644 |
462 | --- a/arch/x86/include/asm/cpufeatures.h |
463 | +++ b/arch/x86/include/asm/cpufeatures.h |
464 | @@ -13,7 +13,7 @@ |
465 | /* |
466 | * Defines x86 CPU feature bits |
467 | */ |
468 | -#define NCAPINTS 18 /* N 32-bit words worth of info */ |
469 | +#define NCAPINTS 19 /* N 32-bit words worth of info */ |
470 | #define NBUGINTS 1 /* N 32-bit bug flags */ |
471 | |
472 | /* |
473 | @@ -203,14 +203,14 @@ |
474 | #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ |
475 | #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ |
476 | #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ |
477 | -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ |
478 | -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ |
479 | +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ |
480 | +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ |
481 | #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ |
482 | -#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ |
483 | -#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ |
484 | |
485 | #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ |
486 | -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ |
487 | +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ |
488 | + |
489 | +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ |
490 | |
491 | /* Virtualization flags: Linux defined, word 8 */ |
492 | #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ |
493 | @@ -271,6 +271,9 @@ |
494 | #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ |
495 | #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ |
496 | #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ |
497 | +#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ |
498 | +#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ |
499 | +#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ |
500 | |
501 | /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ |
502 | #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ |
503 | @@ -319,6 +322,13 @@ |
504 | #define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ |
505 | #define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ |
506 | |
507 | +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ |
508 | +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ |
509 | +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ |
510 | +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ |
511 | +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ |
512 | +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ |
513 | + |
514 | /* |
515 | * BUG word(s) |
516 | */ |
517 | diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h |
518 | index b027633e7300..33833d1909af 100644 |
519 | --- a/arch/x86/include/asm/disabled-features.h |
520 | +++ b/arch/x86/include/asm/disabled-features.h |
521 | @@ -77,6 +77,7 @@ |
522 | #define DISABLED_MASK15 0 |
523 | #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP) |
524 | #define DISABLED_MASK17 0 |
525 | -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) |
526 | +#define DISABLED_MASK18 0 |
527 | +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) |
528 | |
529 | #endif /* _ASM_X86_DISABLED_FEATURES_H */ |
530 | diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h |
531 | index 64c4a30e0d39..e203169931c7 100644 |
532 | --- a/arch/x86/include/asm/fixmap.h |
533 | +++ b/arch/x86/include/asm/fixmap.h |
534 | @@ -137,8 +137,10 @@ enum fixed_addresses { |
535 | |
536 | extern void reserve_top_address(unsigned long reserve); |
537 | |
538 | -#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) |
539 | -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) |
540 | +#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) |
541 | +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) |
542 | +#define FIXADDR_TOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) |
543 | +#define FIXADDR_TOT_START (FIXADDR_TOP - FIXADDR_TOT_SIZE) |
544 | |
545 | extern int fixmaps_set; |
546 | |
547 | diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h |
548 | index e7b983a35506..e520a1e6fc11 100644 |
549 | --- a/arch/x86/include/asm/msr-index.h |
550 | +++ b/arch/x86/include/asm/msr-index.h |
551 | @@ -39,6 +39,13 @@ |
552 | |
553 | /* Intel MSRs. Some also available on other CPUs */ |
554 | |
555 | +#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ |
556 | +#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ |
557 | +#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ |
558 | + |
559 | +#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ |
560 | +#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ |
561 | + |
562 | #define MSR_PPIN_CTL 0x0000004e |
563 | #define MSR_PPIN 0x0000004f |
564 | |
565 | @@ -57,6 +64,11 @@ |
566 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) |
567 | |
568 | #define MSR_MTRRcap 0x000000fe |
569 | + |
570 | +#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a |
571 | +#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ |
572 | +#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ |
573 | + |
574 | #define MSR_IA32_BBL_CR_CTL 0x00000119 |
575 | #define MSR_IA32_BBL_CR_CTL3 0x0000011e |
576 | |
577 | diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h |
578 | index 07962f5f6fba..30df295f6d94 100644 |
579 | --- a/arch/x86/include/asm/msr.h |
580 | +++ b/arch/x86/include/asm/msr.h |
581 | @@ -214,8 +214,7 @@ static __always_inline unsigned long long rdtsc_ordered(void) |
582 | * that some other imaginary CPU is updating continuously with a |
583 | * time stamp. |
584 | */ |
585 | - alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, |
586 | - "lfence", X86_FEATURE_LFENCE_RDTSC); |
587 | + barrier_nospec(); |
588 | return rdtsc(); |
589 | } |
590 | |
591 | diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h |
592 | index 4ad41087ce0e..4d57894635f2 100644 |
593 | --- a/arch/x86/include/asm/nospec-branch.h |
594 | +++ b/arch/x86/include/asm/nospec-branch.h |
595 | @@ -1,56 +1,12 @@ |
596 | /* SPDX-License-Identifier: GPL-2.0 */ |
597 | |
598 | -#ifndef __NOSPEC_BRANCH_H__ |
599 | -#define __NOSPEC_BRANCH_H__ |
600 | +#ifndef _ASM_X86_NOSPEC_BRANCH_H_ |
601 | +#define _ASM_X86_NOSPEC_BRANCH_H_ |
602 | |
603 | #include <asm/alternative.h> |
604 | #include <asm/alternative-asm.h> |
605 | #include <asm/cpufeatures.h> |
606 | |
607 | -/* |
608 | - * Fill the CPU return stack buffer. |
609 | - * |
610 | - * Each entry in the RSB, if used for a speculative 'ret', contains an |
611 | - * infinite 'pause; lfence; jmp' loop to capture speculative execution. |
612 | - * |
613 | - * This is required in various cases for retpoline and IBRS-based |
614 | - * mitigations for the Spectre variant 2 vulnerability. Sometimes to |
615 | - * eliminate potentially bogus entries from the RSB, and sometimes |
616 | - * purely to ensure that it doesn't get empty, which on some CPUs would |
617 | - * allow predictions from other (unwanted!) sources to be used. |
618 | - * |
619 | - * We define a CPP macro such that it can be used from both .S files and |
620 | - * inline assembly. It's possible to do a .macro and then include that |
621 | - * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. |
622 | - */ |
623 | - |
624 | -#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ |
625 | -#define RSB_FILL_LOOPS 16 /* To avoid underflow */ |
626 | - |
627 | -/* |
628 | - * Google experimented with loop-unrolling and this turned out to be |
629 | - * the optimal version — two calls, each with their own speculation |
630 | - * trap should their return address end up getting used, in a loop. |
631 | - */ |
632 | -#define __FILL_RETURN_BUFFER(reg, nr, sp) \ |
633 | - mov $(nr/2), reg; \ |
634 | -771: \ |
635 | - call 772f; \ |
636 | -773: /* speculation trap */ \ |
637 | - pause; \ |
638 | - lfence; \ |
639 | - jmp 773b; \ |
640 | -772: \ |
641 | - call 774f; \ |
642 | -775: /* speculation trap */ \ |
643 | - pause; \ |
644 | - lfence; \ |
645 | - jmp 775b; \ |
646 | -774: \ |
647 | - dec reg; \ |
648 | - jnz 771b; \ |
649 | - add $(BITS_PER_LONG/8) * nr, sp; |
650 | - |
651 | #ifdef __ASSEMBLY__ |
652 | |
653 | /* |
654 | @@ -121,17 +77,10 @@ |
655 | #endif |
656 | .endm |
657 | |
658 | - /* |
659 | - * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP |
660 | - * monstrosity above, manually. |
661 | - */ |
662 | -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req |
663 | +/* This clobbers the BX register */ |
664 | +.macro FILL_RETURN_BUFFER nr:req ftr:req |
665 | #ifdef CONFIG_RETPOLINE |
666 | - ANNOTATE_NOSPEC_ALTERNATIVE |
667 | - ALTERNATIVE "jmp .Lskip_rsb_\@", \ |
668 | - __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ |
669 | - \ftr |
670 | -.Lskip_rsb_\@: |
671 | + ALTERNATIVE "", "call __clear_rsb", \ftr |
672 | #endif |
673 | .endm |
674 | |
675 | @@ -201,22 +150,25 @@ extern char __indirect_thunk_end[]; |
676 | * On VMEXIT we must ensure that no RSB predictions learned in the guest |
677 | * can be followed in the host, by overwriting the RSB completely. Both |
678 | * retpoline and IBRS mitigations for Spectre v2 need this; only on future |
679 | - * CPUs with IBRS_ATT *might* it be avoided. |
680 | + * CPUs with IBRS_ALL *might* it be avoided. |
681 | */ |
682 | static inline void vmexit_fill_RSB(void) |
683 | { |
684 | #ifdef CONFIG_RETPOLINE |
685 | - unsigned long loops; |
686 | - |
687 | - asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE |
688 | - ALTERNATIVE("jmp 910f", |
689 | - __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), |
690 | - X86_FEATURE_RETPOLINE) |
691 | - "910:" |
692 | - : "=r" (loops), ASM_CALL_CONSTRAINT |
693 | - : : "memory" ); |
694 | + alternative_input("", |
695 | + "call __fill_rsb", |
696 | + X86_FEATURE_RETPOLINE, |
697 | + ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); |
698 | #endif |
699 | } |
700 | |
701 | +static inline void indirect_branch_prediction_barrier(void) |
702 | +{ |
703 | + alternative_input("", |
704 | + "call __ibp_barrier", |
705 | + X86_FEATURE_USE_IBPB, |
706 | + ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory")); |
707 | +} |
708 | + |
709 | #endif /* __ASSEMBLY__ */ |
710 | -#endif /* __NOSPEC_BRANCH_H__ */ |
711 | +#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ |
712 | diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h |
713 | index ce245b0cdfca..0777e18a1d23 100644 |
714 | --- a/arch/x86/include/asm/pgtable_32_types.h |
715 | +++ b/arch/x86/include/asm/pgtable_32_types.h |
716 | @@ -44,8 +44,9 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ |
717 | */ |
718 | #define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) |
719 | |
720 | -#define CPU_ENTRY_AREA_BASE \ |
721 | - ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK) |
722 | +#define CPU_ENTRY_AREA_BASE \ |
723 | + ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \ |
724 | + & PMD_MASK) |
725 | |
726 | #define PKMAP_BASE \ |
727 | ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) |
728 | diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h |
729 | index d3a67fba200a..513f9604c192 100644 |
730 | --- a/arch/x86/include/asm/processor.h |
731 | +++ b/arch/x86/include/asm/processor.h |
732 | @@ -460,8 +460,6 @@ struct thread_struct { |
733 | unsigned short gsindex; |
734 | #endif |
735 | |
736 | - u32 status; /* thread synchronous flags */ |
737 | - |
738 | #ifdef CONFIG_X86_64 |
739 | unsigned long fsbase; |
740 | unsigned long gsbase; |
741 | @@ -971,4 +969,7 @@ bool xen_set_default_idle(void); |
742 | |
743 | void stop_this_cpu(void *dummy); |
744 | void df_debug(struct pt_regs *regs, long error_code); |
745 | + |
746 | +void __ibp_barrier(void); |
747 | + |
748 | #endif /* _ASM_X86_PROCESSOR_H */ |
749 | diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h |
750 | index d91ba04dd007..fb3a6de7440b 100644 |
751 | --- a/arch/x86/include/asm/required-features.h |
752 | +++ b/arch/x86/include/asm/required-features.h |
753 | @@ -106,6 +106,7 @@ |
754 | #define REQUIRED_MASK15 0 |
755 | #define REQUIRED_MASK16 (NEED_LA57) |
756 | #define REQUIRED_MASK17 0 |
757 | -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) |
758 | +#define REQUIRED_MASK18 0 |
759 | +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) |
760 | |
761 | #endif /* _ASM_X86_REQUIRED_FEATURES_H */ |
762 | diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h |
763 | index e3c95e8e61c5..03eedc21246d 100644 |
764 | --- a/arch/x86/include/asm/syscall.h |
765 | +++ b/arch/x86/include/asm/syscall.h |
766 | @@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task, |
767 | * TS_COMPAT is set for 32-bit syscall entries and then |
768 | * remains set until we return to user mode. |
769 | */ |
770 | - if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) |
771 | + if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED)) |
772 | /* |
773 | * Sign-extend the value so (int)-EFOO becomes (long)-EFOO |
774 | * and will match correctly in comparisons. |
775 | @@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task, |
776 | unsigned long *args) |
777 | { |
778 | # ifdef CONFIG_IA32_EMULATION |
779 | - if (task->thread.status & TS_COMPAT) |
780 | + if (task->thread_info.status & TS_COMPAT) |
781 | switch (i) { |
782 | case 0: |
783 | if (!n--) break; |
784 | @@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task, |
785 | const unsigned long *args) |
786 | { |
787 | # ifdef CONFIG_IA32_EMULATION |
788 | - if (task->thread.status & TS_COMPAT) |
789 | + if (task->thread_info.status & TS_COMPAT) |
790 | switch (i) { |
791 | case 0: |
792 | if (!n--) break; |
793 | diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h |
794 | index 00223333821a..eda3b6823ca4 100644 |
795 | --- a/arch/x86/include/asm/thread_info.h |
796 | +++ b/arch/x86/include/asm/thread_info.h |
797 | @@ -55,6 +55,7 @@ struct task_struct; |
798 | |
799 | struct thread_info { |
800 | unsigned long flags; /* low level flags */ |
801 | + u32 status; /* thread synchronous flags */ |
802 | }; |
803 | |
804 | #define INIT_THREAD_INFO(tsk) \ |
805 | @@ -221,7 +222,7 @@ static inline int arch_within_stack_frames(const void * const stack, |
806 | #define in_ia32_syscall() true |
807 | #else |
808 | #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \ |
809 | - current->thread.status & TS_COMPAT) |
810 | + current_thread_info()->status & TS_COMPAT) |
811 | #endif |
812 | |
813 | /* |
814 | diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h |
815 | index d33e4a26dc7e..2b8f18ca5874 100644 |
816 | --- a/arch/x86/include/asm/tlbflush.h |
817 | +++ b/arch/x86/include/asm/tlbflush.h |
818 | @@ -174,6 +174,8 @@ struct tlb_state { |
819 | struct mm_struct *loaded_mm; |
820 | u16 loaded_mm_asid; |
821 | u16 next_asid; |
822 | + /* last user mm's ctx id */ |
823 | + u64 last_ctx_id; |
824 | |
825 | /* |
826 | * We can be in one of several states: |
827 | diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h |
828 | index 574dff4d2913..aae77eb8491c 100644 |
829 | --- a/arch/x86/include/asm/uaccess.h |
830 | +++ b/arch/x86/include/asm/uaccess.h |
831 | @@ -124,6 +124,11 @@ extern int __get_user_bad(void); |
832 | |
833 | #define __uaccess_begin() stac() |
834 | #define __uaccess_end() clac() |
835 | +#define __uaccess_begin_nospec() \ |
836 | +({ \ |
837 | + stac(); \ |
838 | + barrier_nospec(); \ |
839 | +}) |
840 | |
841 | /* |
842 | * This is a type: either unsigned long, if the argument fits into |
843 | @@ -445,7 +450,7 @@ do { \ |
844 | ({ \ |
845 | int __gu_err; \ |
846 | __inttype(*(ptr)) __gu_val; \ |
847 | - __uaccess_begin(); \ |
848 | + __uaccess_begin_nospec(); \ |
849 | __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ |
850 | __uaccess_end(); \ |
851 | (x) = (__force __typeof__(*(ptr)))__gu_val; \ |
852 | @@ -487,6 +492,10 @@ struct __large_struct { unsigned long buf[100]; }; |
853 | __uaccess_begin(); \ |
854 | barrier(); |
855 | |
856 | +#define uaccess_try_nospec do { \ |
857 | + current->thread.uaccess_err = 0; \ |
858 | + __uaccess_begin_nospec(); \ |
859 | + |
860 | #define uaccess_catch(err) \ |
861 | __uaccess_end(); \ |
862 | (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \ |
863 | @@ -548,7 +557,7 @@ struct __large_struct { unsigned long buf[100]; }; |
864 | * get_user_ex(...); |
865 | * } get_user_catch(err) |
866 | */ |
867 | -#define get_user_try uaccess_try |
868 | +#define get_user_try uaccess_try_nospec |
869 | #define get_user_catch(err) uaccess_catch(err) |
870 | |
871 | #define get_user_ex(x, ptr) do { \ |
872 | @@ -582,7 +591,7 @@ extern void __cmpxchg_wrong_size(void) |
873 | __typeof__(ptr) __uval = (uval); \ |
874 | __typeof__(*(ptr)) __old = (old); \ |
875 | __typeof__(*(ptr)) __new = (new); \ |
876 | - __uaccess_begin(); \ |
877 | + __uaccess_begin_nospec(); \ |
878 | switch (size) { \ |
879 | case 1: \ |
880 | { \ |
881 | diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h |
882 | index 72950401b223..ba2dc1930630 100644 |
883 | --- a/arch/x86/include/asm/uaccess_32.h |
884 | +++ b/arch/x86/include/asm/uaccess_32.h |
885 | @@ -29,21 +29,21 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n) |
886 | switch (n) { |
887 | case 1: |
888 | ret = 0; |
889 | - __uaccess_begin(); |
890 | + __uaccess_begin_nospec(); |
891 | __get_user_asm_nozero(*(u8 *)to, from, ret, |
892 | "b", "b", "=q", 1); |
893 | __uaccess_end(); |
894 | return ret; |
895 | case 2: |
896 | ret = 0; |
897 | - __uaccess_begin(); |
898 | + __uaccess_begin_nospec(); |
899 | __get_user_asm_nozero(*(u16 *)to, from, ret, |
900 | "w", "w", "=r", 2); |
901 | __uaccess_end(); |
902 | return ret; |
903 | case 4: |
904 | ret = 0; |
905 | - __uaccess_begin(); |
906 | + __uaccess_begin_nospec(); |
907 | __get_user_asm_nozero(*(u32 *)to, from, ret, |
908 | "l", "k", "=r", 4); |
909 | __uaccess_end(); |
910 | diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h |
911 | index f07ef3c575db..62546b3a398e 100644 |
912 | --- a/arch/x86/include/asm/uaccess_64.h |
913 | +++ b/arch/x86/include/asm/uaccess_64.h |
914 | @@ -55,31 +55,31 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size) |
915 | return copy_user_generic(dst, (__force void *)src, size); |
916 | switch (size) { |
917 | case 1: |
918 | - __uaccess_begin(); |
919 | + __uaccess_begin_nospec(); |
920 | __get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src, |
921 | ret, "b", "b", "=q", 1); |
922 | __uaccess_end(); |
923 | return ret; |
924 | case 2: |
925 | - __uaccess_begin(); |
926 | + __uaccess_begin_nospec(); |
927 | __get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src, |
928 | ret, "w", "w", "=r", 2); |
929 | __uaccess_end(); |
930 | return ret; |
931 | case 4: |
932 | - __uaccess_begin(); |
933 | + __uaccess_begin_nospec(); |
934 | __get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src, |
935 | ret, "l", "k", "=r", 4); |
936 | __uaccess_end(); |
937 | return ret; |
938 | case 8: |
939 | - __uaccess_begin(); |
940 | + __uaccess_begin_nospec(); |
941 | __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, |
942 | ret, "q", "", "=r", 8); |
943 | __uaccess_end(); |
944 | return ret; |
945 | case 10: |
946 | - __uaccess_begin(); |
947 | + __uaccess_begin_nospec(); |
948 | __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, |
949 | ret, "q", "", "=r", 10); |
950 | if (likely(!ret)) |
951 | @@ -89,7 +89,7 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size) |
952 | __uaccess_end(); |
953 | return ret; |
954 | case 16: |
955 | - __uaccess_begin(); |
956 | + __uaccess_begin_nospec(); |
957 | __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, |
958 | ret, "q", "", "=r", 16); |
959 | if (likely(!ret)) |
960 | diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c |
961 | index 4817d743c263..a481763a3776 100644 |
962 | --- a/arch/x86/kernel/alternative.c |
963 | +++ b/arch/x86/kernel/alternative.c |
964 | @@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(char *str) |
965 | } |
966 | __setup("noreplace-smp", setup_noreplace_smp); |
967 | |
968 | -#ifdef CONFIG_PARAVIRT |
969 | -static int __initdata_or_module noreplace_paravirt = 0; |
970 | - |
971 | -static int __init setup_noreplace_paravirt(char *str) |
972 | -{ |
973 | - noreplace_paravirt = 1; |
974 | - return 1; |
975 | -} |
976 | -__setup("noreplace-paravirt", setup_noreplace_paravirt); |
977 | -#endif |
978 | - |
979 | #define DPRINTK(fmt, args...) \ |
980 | do { \ |
981 | if (debug_alternative) \ |
982 | @@ -298,7 +287,7 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) |
983 | tgt_rip = next_rip + o_dspl; |
984 | n_dspl = tgt_rip - orig_insn; |
985 | |
986 | - DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl); |
987 | + DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl); |
988 | |
989 | if (tgt_rip - orig_insn >= 0) { |
990 | if (n_dspl - 2 <= 127) |
991 | @@ -355,7 +344,7 @@ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *ins |
992 | add_nops(instr + (a->instrlen - a->padlen), a->padlen); |
993 | local_irq_restore(flags); |
994 | |
995 | - DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", |
996 | + DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ", |
997 | instr, a->instrlen - a->padlen, a->padlen); |
998 | } |
999 | |
1000 | @@ -376,7 +365,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, |
1001 | u8 *instr, *replacement; |
1002 | u8 insnbuf[MAX_PATCH_LEN]; |
1003 | |
1004 | - DPRINTK("alt table %p -> %p", start, end); |
1005 | + DPRINTK("alt table %px, -> %px", start, end); |
1006 | /* |
1007 | * The scan order should be from start to end. A later scanned |
1008 | * alternative code can overwrite previously scanned alternative code. |
1009 | @@ -400,14 +389,14 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, |
1010 | continue; |
1011 | } |
1012 | |
1013 | - DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d", |
1014 | + DPRINTK("feat: %d*32+%d, old: (%px len: %d), repl: (%px, len: %d), pad: %d", |
1015 | a->cpuid >> 5, |
1016 | a->cpuid & 0x1f, |
1017 | instr, a->instrlen, |
1018 | replacement, a->replacementlen, a->padlen); |
1019 | |
1020 | - DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr); |
1021 | - DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement); |
1022 | + DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr); |
1023 | + DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement); |
1024 | |
1025 | memcpy(insnbuf, replacement, a->replacementlen); |
1026 | insnbuf_sz = a->replacementlen; |
1027 | @@ -433,7 +422,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, |
1028 | a->instrlen - a->replacementlen); |
1029 | insnbuf_sz += a->instrlen - a->replacementlen; |
1030 | } |
1031 | - DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr); |
1032 | + DUMP_BYTES(insnbuf, insnbuf_sz, "%px: final_insn: ", instr); |
1033 | |
1034 | text_poke_early(instr, insnbuf, insnbuf_sz); |
1035 | } |
1036 | @@ -599,9 +588,6 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, |
1037 | struct paravirt_patch_site *p; |
1038 | char insnbuf[MAX_PATCH_LEN]; |
1039 | |
1040 | - if (noreplace_paravirt) |
1041 | - return; |
1042 | - |
1043 | for (p = start; p < end; p++) { |
1044 | unsigned int used; |
1045 | |
1046 | diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c |
1047 | index 390b3dc3d438..71949bf2de5a 100644 |
1048 | --- a/arch/x86/kernel/cpu/bugs.c |
1049 | +++ b/arch/x86/kernel/cpu/bugs.c |
1050 | @@ -11,6 +11,7 @@ |
1051 | #include <linux/init.h> |
1052 | #include <linux/utsname.h> |
1053 | #include <linux/cpu.h> |
1054 | +#include <linux/module.h> |
1055 | |
1056 | #include <asm/nospec-branch.h> |
1057 | #include <asm/cmdline.h> |
1058 | @@ -90,20 +91,41 @@ static const char *spectre_v2_strings[] = { |
1059 | }; |
1060 | |
1061 | #undef pr_fmt |
1062 | -#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt |
1063 | +#define pr_fmt(fmt) "Spectre V2 : " fmt |
1064 | |
1065 | static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; |
1066 | |
1067 | +#ifdef RETPOLINE |
1068 | +static bool spectre_v2_bad_module; |
1069 | + |
1070 | +bool retpoline_module_ok(bool has_retpoline) |
1071 | +{ |
1072 | + if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) |
1073 | + return true; |
1074 | + |
1075 | + pr_err("System may be vulnerable to spectre v2\n"); |
1076 | + spectre_v2_bad_module = true; |
1077 | + return false; |
1078 | +} |
1079 | + |
1080 | +static inline const char *spectre_v2_module_string(void) |
1081 | +{ |
1082 | + return spectre_v2_bad_module ? " - vulnerable module loaded" : ""; |
1083 | +} |
1084 | +#else |
1085 | +static inline const char *spectre_v2_module_string(void) { return ""; } |
1086 | +#endif |
1087 | + |
1088 | static void __init spec2_print_if_insecure(const char *reason) |
1089 | { |
1090 | if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) |
1091 | - pr_info("%s\n", reason); |
1092 | + pr_info("%s selected on command line.\n", reason); |
1093 | } |
1094 | |
1095 | static void __init spec2_print_if_secure(const char *reason) |
1096 | { |
1097 | if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) |
1098 | - pr_info("%s\n", reason); |
1099 | + pr_info("%s selected on command line.\n", reason); |
1100 | } |
1101 | |
1102 | static inline bool retp_compiler(void) |
1103 | @@ -118,42 +140,68 @@ static inline bool match_option(const char *arg, int arglen, const char *opt) |
1104 | return len == arglen && !strncmp(arg, opt, len); |
1105 | } |
1106 | |
1107 | +static const struct { |
1108 | + const char *option; |
1109 | + enum spectre_v2_mitigation_cmd cmd; |
1110 | + bool secure; |
1111 | +} mitigation_options[] = { |
1112 | + { "off", SPECTRE_V2_CMD_NONE, false }, |
1113 | + { "on", SPECTRE_V2_CMD_FORCE, true }, |
1114 | + { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, |
1115 | + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, |
1116 | + { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, |
1117 | + { "auto", SPECTRE_V2_CMD_AUTO, false }, |
1118 | +}; |
1119 | + |
1120 | static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) |
1121 | { |
1122 | char arg[20]; |
1123 | - int ret; |
1124 | - |
1125 | - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, |
1126 | - sizeof(arg)); |
1127 | - if (ret > 0) { |
1128 | - if (match_option(arg, ret, "off")) { |
1129 | - goto disable; |
1130 | - } else if (match_option(arg, ret, "on")) { |
1131 | - spec2_print_if_secure("force enabled on command line."); |
1132 | - return SPECTRE_V2_CMD_FORCE; |
1133 | - } else if (match_option(arg, ret, "retpoline")) { |
1134 | - spec2_print_if_insecure("retpoline selected on command line."); |
1135 | - return SPECTRE_V2_CMD_RETPOLINE; |
1136 | - } else if (match_option(arg, ret, "retpoline,amd")) { |
1137 | - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { |
1138 | - pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); |
1139 | - return SPECTRE_V2_CMD_AUTO; |
1140 | - } |
1141 | - spec2_print_if_insecure("AMD retpoline selected on command line."); |
1142 | - return SPECTRE_V2_CMD_RETPOLINE_AMD; |
1143 | - } else if (match_option(arg, ret, "retpoline,generic")) { |
1144 | - spec2_print_if_insecure("generic retpoline selected on command line."); |
1145 | - return SPECTRE_V2_CMD_RETPOLINE_GENERIC; |
1146 | - } else if (match_option(arg, ret, "auto")) { |
1147 | + int ret, i; |
1148 | + enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; |
1149 | + |
1150 | + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) |
1151 | + return SPECTRE_V2_CMD_NONE; |
1152 | + else { |
1153 | + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, |
1154 | + sizeof(arg)); |
1155 | + if (ret < 0) |
1156 | + return SPECTRE_V2_CMD_AUTO; |
1157 | + |
1158 | + for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { |
1159 | + if (!match_option(arg, ret, mitigation_options[i].option)) |
1160 | + continue; |
1161 | + cmd = mitigation_options[i].cmd; |
1162 | + break; |
1163 | + } |
1164 | + |
1165 | + if (i >= ARRAY_SIZE(mitigation_options)) { |
1166 | + pr_err("unknown option (%s). Switching to AUTO select\n", |
1167 | + mitigation_options[i].option); |
1168 | return SPECTRE_V2_CMD_AUTO; |
1169 | } |
1170 | } |
1171 | |
1172 | - if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) |
1173 | + if ((cmd == SPECTRE_V2_CMD_RETPOLINE || |
1174 | + cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || |
1175 | + cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && |
1176 | + !IS_ENABLED(CONFIG_RETPOLINE)) { |
1177 | + pr_err("%s selected but not compiled in. Switching to AUTO select\n", |
1178 | + mitigation_options[i].option); |
1179 | return SPECTRE_V2_CMD_AUTO; |
1180 | -disable: |
1181 | - spec2_print_if_insecure("disabled on command line."); |
1182 | - return SPECTRE_V2_CMD_NONE; |
1183 | + } |
1184 | + |
1185 | + if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD && |
1186 | + boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { |
1187 | + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); |
1188 | + return SPECTRE_V2_CMD_AUTO; |
1189 | + } |
1190 | + |
1191 | + if (mitigation_options[i].secure) |
1192 | + spec2_print_if_secure(mitigation_options[i].option); |
1193 | + else |
1194 | + spec2_print_if_insecure(mitigation_options[i].option); |
1195 | + |
1196 | + return cmd; |
1197 | } |
1198 | |
1199 | /* Check for Skylake-like CPUs (for RSB handling) */ |
1200 | @@ -191,10 +239,10 @@ static void __init spectre_v2_select_mitigation(void) |
1201 | return; |
1202 | |
1203 | case SPECTRE_V2_CMD_FORCE: |
1204 | - /* FALLTRHU */ |
1205 | case SPECTRE_V2_CMD_AUTO: |
1206 | - goto retpoline_auto; |
1207 | - |
1208 | + if (IS_ENABLED(CONFIG_RETPOLINE)) |
1209 | + goto retpoline_auto; |
1210 | + break; |
1211 | case SPECTRE_V2_CMD_RETPOLINE_AMD: |
1212 | if (IS_ENABLED(CONFIG_RETPOLINE)) |
1213 | goto retpoline_amd; |
1214 | @@ -249,6 +297,12 @@ static void __init spectre_v2_select_mitigation(void) |
1215 | setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); |
1216 | pr_info("Filling RSB on context switch\n"); |
1217 | } |
1218 | + |
1219 | + /* Initialize Indirect Branch Prediction Barrier if supported */ |
1220 | + if (boot_cpu_has(X86_FEATURE_IBPB)) { |
1221 | + setup_force_cpu_cap(X86_FEATURE_USE_IBPB); |
1222 | + pr_info("Enabling Indirect Branch Prediction Barrier\n"); |
1223 | + } |
1224 | } |
1225 | |
1226 | #undef pr_fmt |
1227 | @@ -269,7 +323,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev, |
1228 | { |
1229 | if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) |
1230 | return sprintf(buf, "Not affected\n"); |
1231 | - return sprintf(buf, "Vulnerable\n"); |
1232 | + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); |
1233 | } |
1234 | |
1235 | ssize_t cpu_show_spectre_v2(struct device *dev, |
1236 | @@ -278,6 +332,14 @@ ssize_t cpu_show_spectre_v2(struct device *dev, |
1237 | if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) |
1238 | return sprintf(buf, "Not affected\n"); |
1239 | |
1240 | - return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); |
1241 | + return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], |
1242 | + boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", |
1243 | + spectre_v2_module_string()); |
1244 | } |
1245 | #endif |
1246 | + |
1247 | +void __ibp_barrier(void) |
1248 | +{ |
1249 | + __wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0); |
1250 | +} |
1251 | +EXPORT_SYMBOL_GPL(__ibp_barrier); |
1252 | diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c |
1253 | index ef29ad001991..d63f4b5706e4 100644 |
1254 | --- a/arch/x86/kernel/cpu/common.c |
1255 | +++ b/arch/x86/kernel/cpu/common.c |
1256 | @@ -47,6 +47,8 @@ |
1257 | #include <asm/pat.h> |
1258 | #include <asm/microcode.h> |
1259 | #include <asm/microcode_intel.h> |
1260 | +#include <asm/intel-family.h> |
1261 | +#include <asm/cpu_device_id.h> |
1262 | |
1263 | #ifdef CONFIG_X86_LOCAL_APIC |
1264 | #include <asm/uv/uv.h> |
1265 | @@ -748,6 +750,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) |
1266 | } |
1267 | } |
1268 | |
1269 | +static void init_speculation_control(struct cpuinfo_x86 *c) |
1270 | +{ |
1271 | + /* |
1272 | + * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, |
1273 | + * and they also have a different bit for STIBP support. Also, |
1274 | + * a hypervisor might have set the individual AMD bits even on |
1275 | + * Intel CPUs, for finer-grained selection of what's available. |
1276 | + * |
1277 | + * We use the AMD bits in 0x8000_0008 EBX as the generic hardware |
1278 | + * features, which are visible in /proc/cpuinfo and used by the |
1279 | + * kernel. So set those accordingly from the Intel bits. |
1280 | + */ |
1281 | + if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { |
1282 | + set_cpu_cap(c, X86_FEATURE_IBRS); |
1283 | + set_cpu_cap(c, X86_FEATURE_IBPB); |
1284 | + } |
1285 | + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) |
1286 | + set_cpu_cap(c, X86_FEATURE_STIBP); |
1287 | +} |
1288 | + |
1289 | void get_cpu_cap(struct cpuinfo_x86 *c) |
1290 | { |
1291 | u32 eax, ebx, ecx, edx; |
1292 | @@ -769,6 +791,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) |
1293 | cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); |
1294 | c->x86_capability[CPUID_7_0_EBX] = ebx; |
1295 | c->x86_capability[CPUID_7_ECX] = ecx; |
1296 | + c->x86_capability[CPUID_7_EDX] = edx; |
1297 | } |
1298 | |
1299 | /* Extended state features: level 0x0000000d */ |
1300 | @@ -841,6 +864,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) |
1301 | c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); |
1302 | |
1303 | init_scattered_cpuid_features(c); |
1304 | + init_speculation_control(c); |
1305 | |
1306 | /* |
1307 | * Clear/Set all flags overridden by options, after probe. |
1308 | @@ -876,6 +900,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) |
1309 | #endif |
1310 | } |
1311 | |
1312 | +static const __initconst struct x86_cpu_id cpu_no_speculation[] = { |
1313 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, |
1314 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, |
1315 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, |
1316 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY }, |
1317 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY }, |
1318 | + { X86_VENDOR_CENTAUR, 5 }, |
1319 | + { X86_VENDOR_INTEL, 5 }, |
1320 | + { X86_VENDOR_NSC, 5 }, |
1321 | + { X86_VENDOR_ANY, 4 }, |
1322 | + {} |
1323 | +}; |
1324 | + |
1325 | +static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { |
1326 | + { X86_VENDOR_AMD }, |
1327 | + {} |
1328 | +}; |
1329 | + |
1330 | +static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) |
1331 | +{ |
1332 | + u64 ia32_cap = 0; |
1333 | + |
1334 | + if (x86_match_cpu(cpu_no_meltdown)) |
1335 | + return false; |
1336 | + |
1337 | + if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) |
1338 | + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); |
1339 | + |
1340 | + /* Rogue Data Cache Load? No! */ |
1341 | + if (ia32_cap & ARCH_CAP_RDCL_NO) |
1342 | + return false; |
1343 | + |
1344 | + return true; |
1345 | +} |
1346 | + |
1347 | /* |
1348 | * Do minimum CPU detection early. |
1349 | * Fields really needed: vendor, cpuid_level, family, model, mask, |
1350 | @@ -923,11 +982,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) |
1351 | |
1352 | setup_force_cpu_cap(X86_FEATURE_ALWAYS); |
1353 | |
1354 | - if (c->x86_vendor != X86_VENDOR_AMD) |
1355 | - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); |
1356 | - |
1357 | - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); |
1358 | - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); |
1359 | + if (!x86_match_cpu(cpu_no_speculation)) { |
1360 | + if (cpu_vulnerable_to_meltdown(c)) |
1361 | + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); |
1362 | + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); |
1363 | + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); |
1364 | + } |
1365 | |
1366 | fpu__init_system(c); |
1367 | |
1368 | diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c |
1369 | index b1af22073e28..319bf989fad1 100644 |
1370 | --- a/arch/x86/kernel/cpu/intel.c |
1371 | +++ b/arch/x86/kernel/cpu/intel.c |
1372 | @@ -102,6 +102,59 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) |
1373 | ELF_HWCAP2 |= HWCAP2_RING3MWAIT; |
1374 | } |
1375 | |
1376 | +/* |
1377 | + * Early microcode releases for the Spectre v2 mitigation were broken. |
1378 | + * Information taken from; |
1379 | + * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf |
1380 | + * - https://kb.vmware.com/s/article/52345 |
1381 | + * - Microcode revisions observed in the wild |
1382 | + * - Release note from 20180108 microcode release |
1383 | + */ |
1384 | +struct sku_microcode { |
1385 | + u8 model; |
1386 | + u8 stepping; |
1387 | + u32 microcode; |
1388 | +}; |
1389 | +static const struct sku_microcode spectre_bad_microcodes[] = { |
1390 | + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, |
1391 | + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, |
1392 | + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, |
1393 | + { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, |
1394 | + { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, |
1395 | + { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, |
1396 | + { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, |
1397 | + { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, |
1398 | + { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, |
1399 | + { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, |
1400 | + { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, |
1401 | + { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, |
1402 | + { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 }, |
1403 | + { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, |
1404 | + { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 }, |
1405 | + { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 }, |
1406 | + { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 }, |
1407 | + { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, |
1408 | + { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, |
1409 | + { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, |
1410 | + /* Updated in the 20180108 release; blacklist until we know otherwise */ |
1411 | + { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, |
1412 | + /* Observed in the wild */ |
1413 | + { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, |
1414 | + { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, |
1415 | +}; |
1416 | + |
1417 | +static bool bad_spectre_microcode(struct cpuinfo_x86 *c) |
1418 | +{ |
1419 | + int i; |
1420 | + |
1421 | + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { |
1422 | + if (c->x86_model == spectre_bad_microcodes[i].model && |
1423 | + c->x86_mask == spectre_bad_microcodes[i].stepping) |
1424 | + return (c->microcode <= spectre_bad_microcodes[i].microcode); |
1425 | + } |
1426 | + return false; |
1427 | +} |
1428 | + |
1429 | static void early_init_intel(struct cpuinfo_x86 *c) |
1430 | { |
1431 | u64 misc_enable; |
1432 | @@ -122,6 +175,19 @@ static void early_init_intel(struct cpuinfo_x86 *c) |
1433 | if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) |
1434 | c->microcode = intel_get_microcode_revision(); |
1435 | |
1436 | + /* Now if any of them are set, check the blacklist and clear the lot */ |
1437 | + if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || |
1438 | + cpu_has(c, X86_FEATURE_INTEL_STIBP) || |
1439 | + cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || |
1440 | + cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { |
1441 | + pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); |
1442 | + setup_clear_cpu_cap(X86_FEATURE_IBRS); |
1443 | + setup_clear_cpu_cap(X86_FEATURE_IBPB); |
1444 | + setup_clear_cpu_cap(X86_FEATURE_STIBP); |
1445 | + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); |
1446 | + setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); |
1447 | + } |
1448 | + |
1449 | /* |
1450 | * Atom erratum AAE44/AAF40/AAG38/AAH41: |
1451 | * |
1452 | diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c |
1453 | index d0e69769abfd..df11f5d604be 100644 |
1454 | --- a/arch/x86/kernel/cpu/scattered.c |
1455 | +++ b/arch/x86/kernel/cpu/scattered.c |
1456 | @@ -21,8 +21,6 @@ struct cpuid_bit { |
1457 | static const struct cpuid_bit cpuid_bits[] = { |
1458 | { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, |
1459 | { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, |
1460 | - { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 }, |
1461 | - { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 }, |
1462 | { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, |
1463 | { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 }, |
1464 | { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, |
1465 | diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c |
1466 | index c75466232016..9eb448c7859d 100644 |
1467 | --- a/arch/x86/kernel/process_64.c |
1468 | +++ b/arch/x86/kernel/process_64.c |
1469 | @@ -557,7 +557,7 @@ static void __set_personality_x32(void) |
1470 | * Pretend to come from a x32 execve. |
1471 | */ |
1472 | task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT; |
1473 | - current->thread.status &= ~TS_COMPAT; |
1474 | + current_thread_info()->status &= ~TS_COMPAT; |
1475 | #endif |
1476 | } |
1477 | |
1478 | @@ -571,7 +571,7 @@ static void __set_personality_ia32(void) |
1479 | current->personality |= force_personality32; |
1480 | /* Prepare the first "return" to user space */ |
1481 | task_pt_regs(current)->orig_ax = __NR_ia32_execve; |
1482 | - current->thread.status |= TS_COMPAT; |
1483 | + current_thread_info()->status |= TS_COMPAT; |
1484 | #endif |
1485 | } |
1486 | |
1487 | diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c |
1488 | index f37d18124648..ed5c4cdf0a34 100644 |
1489 | --- a/arch/x86/kernel/ptrace.c |
1490 | +++ b/arch/x86/kernel/ptrace.c |
1491 | @@ -935,7 +935,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) |
1492 | */ |
1493 | regs->orig_ax = value; |
1494 | if (syscall_get_nr(child, regs) >= 0) |
1495 | - child->thread.status |= TS_I386_REGS_POKED; |
1496 | + child->thread_info.status |= TS_I386_REGS_POKED; |
1497 | break; |
1498 | |
1499 | case offsetof(struct user32, regs.eflags): |
1500 | diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c |
1501 | index b9e00e8f1c9b..4cdc0b27ec82 100644 |
1502 | --- a/arch/x86/kernel/signal.c |
1503 | +++ b/arch/x86/kernel/signal.c |
1504 | @@ -787,7 +787,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) |
1505 | * than the tracee. |
1506 | */ |
1507 | #ifdef CONFIG_IA32_EMULATION |
1508 | - if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) |
1509 | + if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) |
1510 | return __NR_ia32_restart_syscall; |
1511 | #endif |
1512 | #ifdef CONFIG_X86_X32_ABI |
1513 | diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c |
1514 | index 0099e10eb045..13f5d4217e4f 100644 |
1515 | --- a/arch/x86/kvm/cpuid.c |
1516 | +++ b/arch/x86/kvm/cpuid.c |
1517 | @@ -67,9 +67,7 @@ u64 kvm_supported_xcr0(void) |
1518 | |
1519 | #define F(x) bit(X86_FEATURE_##x) |
1520 | |
1521 | -/* These are scattered features in cpufeatures.h. */ |
1522 | -#define KVM_CPUID_BIT_AVX512_4VNNIW 2 |
1523 | -#define KVM_CPUID_BIT_AVX512_4FMAPS 3 |
1524 | +/* For scattered features from cpufeatures.h; we currently expose none */ |
1525 | #define KF(x) bit(KVM_CPUID_BIT_##x) |
1526 | |
1527 | int kvm_update_cpuid(struct kvm_vcpu *vcpu) |
1528 | @@ -367,6 +365,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
1529 | F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | |
1530 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); |
1531 | |
1532 | + /* cpuid 0x80000008.ebx */ |
1533 | + const u32 kvm_cpuid_8000_0008_ebx_x86_features = |
1534 | + F(IBPB) | F(IBRS); |
1535 | + |
1536 | /* cpuid 0xC0000001.edx */ |
1537 | const u32 kvm_cpuid_C000_0001_edx_x86_features = |
1538 | F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | |
1539 | @@ -392,7 +394,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
1540 | |
1541 | /* cpuid 7.0.edx*/ |
1542 | const u32 kvm_cpuid_7_0_edx_x86_features = |
1543 | - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); |
1544 | + F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | |
1545 | + F(ARCH_CAPABILITIES); |
1546 | |
1547 | /* all calls to cpuid_count() should be made on the same cpu */ |
1548 | get_cpu(); |
1549 | @@ -477,7 +480,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
1550 | if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) |
1551 | entry->ecx &= ~F(PKU); |
1552 | entry->edx &= kvm_cpuid_7_0_edx_x86_features; |
1553 | - entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX); |
1554 | + cpuid_mask(&entry->edx, CPUID_7_EDX); |
1555 | } else { |
1556 | entry->ebx = 0; |
1557 | entry->ecx = 0; |
1558 | @@ -627,7 +630,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
1559 | if (!g_phys_as) |
1560 | g_phys_as = phys_as; |
1561 | entry->eax = g_phys_as | (virt_as << 8); |
1562 | - entry->ebx = entry->edx = 0; |
1563 | + entry->edx = 0; |
1564 | + /* IBRS and IBPB aren't necessarily present in hardware cpuid */ |
1565 | + if (boot_cpu_has(X86_FEATURE_IBPB)) |
1566 | + entry->ebx |= F(IBPB); |
1567 | + if (boot_cpu_has(X86_FEATURE_IBRS)) |
1568 | + entry->ebx |= F(IBRS); |
1569 | + entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; |
1570 | + cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); |
1571 | break; |
1572 | } |
1573 | case 0x80000019: |
1574 | diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h |
1575 | index c2cea6651279..9a327d5b6d1f 100644 |
1576 | --- a/arch/x86/kvm/cpuid.h |
1577 | +++ b/arch/x86/kvm/cpuid.h |
1578 | @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { |
1579 | [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX}, |
1580 | [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, |
1581 | [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX}, |
1582 | + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, |
1583 | }; |
1584 | |
1585 | static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) |
1586 | diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c |
1587 | index b514b2b2845a..290ecf711aec 100644 |
1588 | --- a/arch/x86/kvm/emulate.c |
1589 | +++ b/arch/x86/kvm/emulate.c |
1590 | @@ -25,6 +25,7 @@ |
1591 | #include <asm/kvm_emulate.h> |
1592 | #include <linux/stringify.h> |
1593 | #include <asm/debugreg.h> |
1594 | +#include <asm/nospec-branch.h> |
1595 | |
1596 | #include "x86.h" |
1597 | #include "tss.h" |
1598 | @@ -1021,8 +1022,8 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) |
1599 | void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); |
1600 | |
1601 | flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; |
1602 | - asm("push %[flags]; popf; call *%[fastop]" |
1603 | - : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags)); |
1604 | + asm("push %[flags]; popf; " CALL_NOSPEC |
1605 | + : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags)); |
1606 | return rc; |
1607 | } |
1608 | |
1609 | @@ -5335,9 +5336,9 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) |
1610 | if (!(ctxt->d & ByteOp)) |
1611 | fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; |
1612 | |
1613 | - asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" |
1614 | + asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" |
1615 | : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), |
1616 | - [fastop]"+S"(fop), ASM_CALL_CONSTRAINT |
1617 | + [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT |
1618 | : "c"(ctxt->src2.val)); |
1619 | |
1620 | ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); |
1621 | diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c |
1622 | index f40d0da1f1d3..4e3c79530526 100644 |
1623 | --- a/arch/x86/kvm/svm.c |
1624 | +++ b/arch/x86/kvm/svm.c |
1625 | @@ -184,6 +184,8 @@ struct vcpu_svm { |
1626 | u64 gs_base; |
1627 | } host; |
1628 | |
1629 | + u64 spec_ctrl; |
1630 | + |
1631 | u32 *msrpm; |
1632 | |
1633 | ulong nmi_iret_rip; |
1634 | @@ -249,6 +251,8 @@ static const struct svm_direct_access_msrs { |
1635 | { .index = MSR_CSTAR, .always = true }, |
1636 | { .index = MSR_SYSCALL_MASK, .always = true }, |
1637 | #endif |
1638 | + { .index = MSR_IA32_SPEC_CTRL, .always = false }, |
1639 | + { .index = MSR_IA32_PRED_CMD, .always = false }, |
1640 | { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, |
1641 | { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, |
1642 | { .index = MSR_IA32_LASTINTFROMIP, .always = false }, |
1643 | @@ -529,6 +533,7 @@ struct svm_cpu_data { |
1644 | struct kvm_ldttss_desc *tss_desc; |
1645 | |
1646 | struct page *save_area; |
1647 | + struct vmcb *current_vmcb; |
1648 | }; |
1649 | |
1650 | static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); |
1651 | @@ -880,6 +885,25 @@ static bool valid_msr_intercept(u32 index) |
1652 | return false; |
1653 | } |
1654 | |
1655 | +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr) |
1656 | +{ |
1657 | + u8 bit_write; |
1658 | + unsigned long tmp; |
1659 | + u32 offset; |
1660 | + u32 *msrpm; |
1661 | + |
1662 | + msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: |
1663 | + to_svm(vcpu)->msrpm; |
1664 | + |
1665 | + offset = svm_msrpm_offset(msr); |
1666 | + bit_write = 2 * (msr & 0x0f) + 1; |
1667 | + tmp = msrpm[offset]; |
1668 | + |
1669 | + BUG_ON(offset == MSR_INVALID); |
1670 | + |
1671 | + return !!test_bit(bit_write, &tmp); |
1672 | +} |
1673 | + |
1674 | static void set_msr_interception(u32 *msrpm, unsigned msr, |
1675 | int read, int write) |
1676 | { |
1677 | @@ -1582,6 +1606,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) |
1678 | u32 dummy; |
1679 | u32 eax = 1; |
1680 | |
1681 | + svm->spec_ctrl = 0; |
1682 | + |
1683 | if (!init_event) { |
1684 | svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | |
1685 | MSR_IA32_APICBASE_ENABLE; |
1686 | @@ -1703,11 +1729,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) |
1687 | __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); |
1688 | kvm_vcpu_uninit(vcpu); |
1689 | kmem_cache_free(kvm_vcpu_cache, svm); |
1690 | + /* |
1691 | + * The vmcb page can be recycled, causing a false negative in |
1692 | + * svm_vcpu_load(). So do a full IBPB now. |
1693 | + */ |
1694 | + indirect_branch_prediction_barrier(); |
1695 | } |
1696 | |
1697 | static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
1698 | { |
1699 | struct vcpu_svm *svm = to_svm(vcpu); |
1700 | + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); |
1701 | int i; |
1702 | |
1703 | if (unlikely(cpu != vcpu->cpu)) { |
1704 | @@ -1736,6 +1768,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
1705 | if (static_cpu_has(X86_FEATURE_RDTSCP)) |
1706 | wrmsrl(MSR_TSC_AUX, svm->tsc_aux); |
1707 | |
1708 | + if (sd->current_vmcb != svm->vmcb) { |
1709 | + sd->current_vmcb = svm->vmcb; |
1710 | + indirect_branch_prediction_barrier(); |
1711 | + } |
1712 | avic_vcpu_load(vcpu, cpu); |
1713 | } |
1714 | |
1715 | @@ -3593,6 +3629,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
1716 | case MSR_VM_CR: |
1717 | msr_info->data = svm->nested.vm_cr_msr; |
1718 | break; |
1719 | + case MSR_IA32_SPEC_CTRL: |
1720 | + if (!msr_info->host_initiated && |
1721 | + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) |
1722 | + return 1; |
1723 | + |
1724 | + msr_info->data = svm->spec_ctrl; |
1725 | + break; |
1726 | case MSR_IA32_UCODE_REV: |
1727 | msr_info->data = 0x01000065; |
1728 | break; |
1729 | @@ -3684,6 +3727,49 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) |
1730 | case MSR_IA32_TSC: |
1731 | kvm_write_tsc(vcpu, msr); |
1732 | break; |
1733 | + case MSR_IA32_SPEC_CTRL: |
1734 | + if (!msr->host_initiated && |
1735 | + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) |
1736 | + return 1; |
1737 | + |
1738 | + /* The STIBP bit doesn't fault even if it's not advertised */ |
1739 | + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) |
1740 | + return 1; |
1741 | + |
1742 | + svm->spec_ctrl = data; |
1743 | + |
1744 | + if (!data) |
1745 | + break; |
1746 | + |
1747 | + /* |
1748 | + * For non-nested: |
1749 | + * When it's written (to non-zero) for the first time, pass |
1750 | + * it through. |
1751 | + * |
1752 | + * For nested: |
1753 | + * The handling of the MSR bitmap for L2 guests is done in |
1754 | + * nested_svm_vmrun_msrpm. |
1755 | + * We update the L1 MSR bit as well since it will end up |
1756 | + * touching the MSR anyway now. |
1757 | + */ |
1758 | + set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); |
1759 | + break; |
1760 | + case MSR_IA32_PRED_CMD: |
1761 | + if (!msr->host_initiated && |
1762 | + !guest_cpuid_has(vcpu, X86_FEATURE_IBPB)) |
1763 | + return 1; |
1764 | + |
1765 | + if (data & ~PRED_CMD_IBPB) |
1766 | + return 1; |
1767 | + |
1768 | + if (!data) |
1769 | + break; |
1770 | + |
1771 | + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); |
1772 | + if (is_guest_mode(vcpu)) |
1773 | + break; |
1774 | + set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); |
1775 | + break; |
1776 | case MSR_STAR: |
1777 | svm->vmcb->save.star = data; |
1778 | break; |
1779 | @@ -4936,6 +5022,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) |
1780 | |
1781 | local_irq_enable(); |
1782 | |
1783 | + /* |
1784 | + * If this vCPU has touched SPEC_CTRL, restore the guest's value if |
1785 | + * it's non-zero. Since vmentry is serialising on affected CPUs, there |
1786 | + * is no need to worry about the conditional branch over the wrmsr |
1787 | + * being speculatively taken. |
1788 | + */ |
1789 | + if (svm->spec_ctrl) |
1790 | + wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); |
1791 | + |
1792 | asm volatile ( |
1793 | "push %%" _ASM_BP "; \n\t" |
1794 | "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" |
1795 | @@ -5028,6 +5123,27 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) |
1796 | #endif |
1797 | ); |
1798 | |
1799 | + /* |
1800 | + * We do not use IBRS in the kernel. If this vCPU has used the |
1801 | + * SPEC_CTRL MSR it may have left it on; save the value and |
1802 | + * turn it off. This is much more efficient than blindly adding |
1803 | + * it to the atomic save/restore list. Especially as the former |
1804 | + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. |
1805 | + * |
1806 | + * For non-nested case: |
1807 | + * If the L01 MSR bitmap does not intercept the MSR, then we need to |
1808 | + * save it. |
1809 | + * |
1810 | + * For nested case: |
1811 | + * If the L02 MSR bitmap does not intercept the MSR, then we need to |
1812 | + * save it. |
1813 | + */ |
1814 | + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) |
1815 | + rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); |
1816 | + |
1817 | + if (svm->spec_ctrl) |
1818 | + wrmsrl(MSR_IA32_SPEC_CTRL, 0); |
1819 | + |
1820 | /* Eliminate branch target predictions from guest mode */ |
1821 | vmexit_fill_RSB(); |
1822 | |
1823 | diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c |
1824 | index c829d89e2e63..bee4c49f6dd0 100644 |
1825 | --- a/arch/x86/kvm/vmx.c |
1826 | +++ b/arch/x86/kvm/vmx.c |
1827 | @@ -34,6 +34,7 @@ |
1828 | #include <linux/tboot.h> |
1829 | #include <linux/hrtimer.h> |
1830 | #include <linux/frame.h> |
1831 | +#include <linux/nospec.h> |
1832 | #include "kvm_cache_regs.h" |
1833 | #include "x86.h" |
1834 | |
1835 | @@ -111,6 +112,14 @@ static u64 __read_mostly host_xss; |
1836 | static bool __read_mostly enable_pml = 1; |
1837 | module_param_named(pml, enable_pml, bool, S_IRUGO); |
1838 | |
1839 | +#define MSR_TYPE_R 1 |
1840 | +#define MSR_TYPE_W 2 |
1841 | +#define MSR_TYPE_RW 3 |
1842 | + |
1843 | +#define MSR_BITMAP_MODE_X2APIC 1 |
1844 | +#define MSR_BITMAP_MODE_X2APIC_APICV 2 |
1845 | +#define MSR_BITMAP_MODE_LM 4 |
1846 | + |
1847 | #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL |
1848 | |
1849 | /* Guest_tsc -> host_tsc conversion requires 64-bit division. */ |
1850 | @@ -185,7 +194,6 @@ module_param(ple_window_max, int, S_IRUGO); |
1851 | extern const ulong vmx_return; |
1852 | |
1853 | #define NR_AUTOLOAD_MSRS 8 |
1854 | -#define VMCS02_POOL_SIZE 1 |
1855 | |
1856 | struct vmcs { |
1857 | u32 revision_id; |
1858 | @@ -210,6 +218,7 @@ struct loaded_vmcs { |
1859 | int soft_vnmi_blocked; |
1860 | ktime_t entry_time; |
1861 | s64 vnmi_blocked_time; |
1862 | + unsigned long *msr_bitmap; |
1863 | struct list_head loaded_vmcss_on_cpu_link; |
1864 | }; |
1865 | |
1866 | @@ -226,7 +235,7 @@ struct shared_msr_entry { |
1867 | * stored in guest memory specified by VMPTRLD, but is opaque to the guest, |
1868 | * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. |
1869 | * More than one of these structures may exist, if L1 runs multiple L2 guests. |
1870 | - * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the |
1871 | + * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the |
1872 | * underlying hardware which will be used to run L2. |
1873 | * This structure is packed to ensure that its layout is identical across |
1874 | * machines (necessary for live migration). |
1875 | @@ -409,13 +418,6 @@ struct __packed vmcs12 { |
1876 | */ |
1877 | #define VMCS12_SIZE 0x1000 |
1878 | |
1879 | -/* Used to remember the last vmcs02 used for some recently used vmcs12s */ |
1880 | -struct vmcs02_list { |
1881 | - struct list_head list; |
1882 | - gpa_t vmptr; |
1883 | - struct loaded_vmcs vmcs02; |
1884 | -}; |
1885 | - |
1886 | /* |
1887 | * The nested_vmx structure is part of vcpu_vmx, and holds information we need |
1888 | * for correct emulation of VMX (i.e., nested VMX) on this vcpu. |
1889 | @@ -440,15 +442,15 @@ struct nested_vmx { |
1890 | */ |
1891 | bool sync_shadow_vmcs; |
1892 | |
1893 | - /* vmcs02_list cache of VMCSs recently used to run L2 guests */ |
1894 | - struct list_head vmcs02_pool; |
1895 | - int vmcs02_num; |
1896 | bool change_vmcs01_virtual_x2apic_mode; |
1897 | /* L2 must run next, and mustn't decide to exit to L1. */ |
1898 | bool nested_run_pending; |
1899 | + |
1900 | + struct loaded_vmcs vmcs02; |
1901 | + |
1902 | /* |
1903 | - * Guest pages referred to in vmcs02 with host-physical pointers, so |
1904 | - * we must keep them pinned while L2 runs. |
1905 | + * Guest pages referred to in the vmcs02 with host-physical |
1906 | + * pointers, so we must keep them pinned while L2 runs. |
1907 | */ |
1908 | struct page *apic_access_page; |
1909 | struct page *virtual_apic_page; |
1910 | @@ -457,8 +459,6 @@ struct nested_vmx { |
1911 | bool pi_pending; |
1912 | u16 posted_intr_nv; |
1913 | |
1914 | - unsigned long *msr_bitmap; |
1915 | - |
1916 | struct hrtimer preemption_timer; |
1917 | bool preemption_timer_expired; |
1918 | |
1919 | @@ -581,6 +581,7 @@ struct vcpu_vmx { |
1920 | struct kvm_vcpu vcpu; |
1921 | unsigned long host_rsp; |
1922 | u8 fail; |
1923 | + u8 msr_bitmap_mode; |
1924 | u32 exit_intr_info; |
1925 | u32 idt_vectoring_info; |
1926 | ulong rflags; |
1927 | @@ -592,6 +593,10 @@ struct vcpu_vmx { |
1928 | u64 msr_host_kernel_gs_base; |
1929 | u64 msr_guest_kernel_gs_base; |
1930 | #endif |
1931 | + |
1932 | + u64 arch_capabilities; |
1933 | + u64 spec_ctrl; |
1934 | + |
1935 | u32 vm_entry_controls_shadow; |
1936 | u32 vm_exit_controls_shadow; |
1937 | u32 secondary_exec_control; |
1938 | @@ -898,21 +903,18 @@ static const unsigned short vmcs_field_to_offset_table[] = { |
1939 | |
1940 | static inline short vmcs_field_to_offset(unsigned long field) |
1941 | { |
1942 | - BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); |
1943 | + const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table); |
1944 | + unsigned short offset; |
1945 | |
1946 | - if (field >= ARRAY_SIZE(vmcs_field_to_offset_table)) |
1947 | + BUILD_BUG_ON(size > SHRT_MAX); |
1948 | + if (field >= size) |
1949 | return -ENOENT; |
1950 | |
1951 | - /* |
1952 | - * FIXME: Mitigation for CVE-2017-5753. To be replaced with a |
1953 | - * generic mechanism. |
1954 | - */ |
1955 | - asm("lfence"); |
1956 | - |
1957 | - if (vmcs_field_to_offset_table[field] == 0) |
1958 | + field = array_index_nospec(field, size); |
1959 | + offset = vmcs_field_to_offset_table[field]; |
1960 | + if (offset == 0) |
1961 | return -ENOENT; |
1962 | - |
1963 | - return vmcs_field_to_offset_table[field]; |
1964 | + return offset; |
1965 | } |
1966 | |
1967 | static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) |
1968 | @@ -935,6 +937,9 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); |
1969 | static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); |
1970 | static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, |
1971 | u16 error_code); |
1972 | +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); |
1973 | +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, |
1974 | + u32 msr, int type); |
1975 | |
1976 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
1977 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
1978 | @@ -954,12 +959,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); |
1979 | enum { |
1980 | VMX_IO_BITMAP_A, |
1981 | VMX_IO_BITMAP_B, |
1982 | - VMX_MSR_BITMAP_LEGACY, |
1983 | - VMX_MSR_BITMAP_LONGMODE, |
1984 | - VMX_MSR_BITMAP_LEGACY_X2APIC_APICV, |
1985 | - VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV, |
1986 | - VMX_MSR_BITMAP_LEGACY_X2APIC, |
1987 | - VMX_MSR_BITMAP_LONGMODE_X2APIC, |
1988 | VMX_VMREAD_BITMAP, |
1989 | VMX_VMWRITE_BITMAP, |
1990 | VMX_BITMAP_NR |
1991 | @@ -969,12 +968,6 @@ static unsigned long *vmx_bitmap[VMX_BITMAP_NR]; |
1992 | |
1993 | #define vmx_io_bitmap_a (vmx_bitmap[VMX_IO_BITMAP_A]) |
1994 | #define vmx_io_bitmap_b (vmx_bitmap[VMX_IO_BITMAP_B]) |
1995 | -#define vmx_msr_bitmap_legacy (vmx_bitmap[VMX_MSR_BITMAP_LEGACY]) |
1996 | -#define vmx_msr_bitmap_longmode (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE]) |
1997 | -#define vmx_msr_bitmap_legacy_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV]) |
1998 | -#define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV]) |
1999 | -#define vmx_msr_bitmap_legacy_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC]) |
2000 | -#define vmx_msr_bitmap_longmode_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC]) |
2001 | #define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP]) |
2002 | #define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP]) |
2003 | |
2004 | @@ -1918,6 +1911,52 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) |
2005 | vmcs_write32(EXCEPTION_BITMAP, eb); |
2006 | } |
2007 | |
2008 | +/* |
2009 | + * Check if MSR is intercepted for currently loaded MSR bitmap. |
2010 | + */ |
2011 | +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) |
2012 | +{ |
2013 | + unsigned long *msr_bitmap; |
2014 | + int f = sizeof(unsigned long); |
2015 | + |
2016 | + if (!cpu_has_vmx_msr_bitmap()) |
2017 | + return true; |
2018 | + |
2019 | + msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; |
2020 | + |
2021 | + if (msr <= 0x1fff) { |
2022 | + return !!test_bit(msr, msr_bitmap + 0x800 / f); |
2023 | + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { |
2024 | + msr &= 0x1fff; |
2025 | + return !!test_bit(msr, msr_bitmap + 0xc00 / f); |
2026 | + } |
2027 | + |
2028 | + return true; |
2029 | +} |
2030 | + |
2031 | +/* |
2032 | + * Check if MSR is intercepted for L01 MSR bitmap. |
2033 | + */ |
2034 | +static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) |
2035 | +{ |
2036 | + unsigned long *msr_bitmap; |
2037 | + int f = sizeof(unsigned long); |
2038 | + |
2039 | + if (!cpu_has_vmx_msr_bitmap()) |
2040 | + return true; |
2041 | + |
2042 | + msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; |
2043 | + |
2044 | + if (msr <= 0x1fff) { |
2045 | + return !!test_bit(msr, msr_bitmap + 0x800 / f); |
2046 | + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { |
2047 | + msr &= 0x1fff; |
2048 | + return !!test_bit(msr, msr_bitmap + 0xc00 / f); |
2049 | + } |
2050 | + |
2051 | + return true; |
2052 | +} |
2053 | + |
2054 | static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, |
2055 | unsigned long entry, unsigned long exit) |
2056 | { |
2057 | @@ -2296,6 +2335,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
2058 | if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { |
2059 | per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; |
2060 | vmcs_load(vmx->loaded_vmcs->vmcs); |
2061 | + indirect_branch_prediction_barrier(); |
2062 | } |
2063 | |
2064 | if (!already_loaded) { |
2065 | @@ -2572,36 +2612,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) |
2066 | vmx->guest_msrs[from] = tmp; |
2067 | } |
2068 | |
2069 | -static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) |
2070 | -{ |
2071 | - unsigned long *msr_bitmap; |
2072 | - |
2073 | - if (is_guest_mode(vcpu)) |
2074 | - msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; |
2075 | - else if (cpu_has_secondary_exec_ctrls() && |
2076 | - (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & |
2077 | - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { |
2078 | - if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) { |
2079 | - if (is_long_mode(vcpu)) |
2080 | - msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv; |
2081 | - else |
2082 | - msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv; |
2083 | - } else { |
2084 | - if (is_long_mode(vcpu)) |
2085 | - msr_bitmap = vmx_msr_bitmap_longmode_x2apic; |
2086 | - else |
2087 | - msr_bitmap = vmx_msr_bitmap_legacy_x2apic; |
2088 | - } |
2089 | - } else { |
2090 | - if (is_long_mode(vcpu)) |
2091 | - msr_bitmap = vmx_msr_bitmap_longmode; |
2092 | - else |
2093 | - msr_bitmap = vmx_msr_bitmap_legacy; |
2094 | - } |
2095 | - |
2096 | - vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); |
2097 | -} |
2098 | - |
2099 | /* |
2100 | * Set up the vmcs to automatically save and restore system |
2101 | * msrs. Don't touch the 64-bit msrs if the guest is in legacy |
2102 | @@ -2642,7 +2652,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) |
2103 | vmx->save_nmsrs = save_nmsrs; |
2104 | |
2105 | if (cpu_has_vmx_msr_bitmap()) |
2106 | - vmx_set_msr_bitmap(&vmx->vcpu); |
2107 | + vmx_update_msr_bitmap(&vmx->vcpu); |
2108 | } |
2109 | |
2110 | /* |
2111 | @@ -3276,6 +3286,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
2112 | case MSR_IA32_TSC: |
2113 | msr_info->data = guest_read_tsc(vcpu); |
2114 | break; |
2115 | + case MSR_IA32_SPEC_CTRL: |
2116 | + if (!msr_info->host_initiated && |
2117 | + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && |
2118 | + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) |
2119 | + return 1; |
2120 | + |
2121 | + msr_info->data = to_vmx(vcpu)->spec_ctrl; |
2122 | + break; |
2123 | + case MSR_IA32_ARCH_CAPABILITIES: |
2124 | + if (!msr_info->host_initiated && |
2125 | + !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) |
2126 | + return 1; |
2127 | + msr_info->data = to_vmx(vcpu)->arch_capabilities; |
2128 | + break; |
2129 | case MSR_IA32_SYSENTER_CS: |
2130 | msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); |
2131 | break; |
2132 | @@ -3383,6 +3407,70 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
2133 | case MSR_IA32_TSC: |
2134 | kvm_write_tsc(vcpu, msr_info); |
2135 | break; |
2136 | + case MSR_IA32_SPEC_CTRL: |
2137 | + if (!msr_info->host_initiated && |
2138 | + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && |
2139 | + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) |
2140 | + return 1; |
2141 | + |
2142 | + /* The STIBP bit doesn't fault even if it's not advertised */ |
2143 | + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) |
2144 | + return 1; |
2145 | + |
2146 | + vmx->spec_ctrl = data; |
2147 | + |
2148 | + if (!data) |
2149 | + break; |
2150 | + |
2151 | + /* |
2152 | + * For non-nested: |
2153 | + * When it's written (to non-zero) for the first time, pass |
2154 | + * it through. |
2155 | + * |
2156 | + * For nested: |
2157 | + * The handling of the MSR bitmap for L2 guests is done in |
2158 | + * nested_vmx_merge_msr_bitmap. We should not touch the |
2159 | + * vmcs02.msr_bitmap here since it gets completely overwritten |
2160 | + * in the merging. We update the vmcs01 here for L1 as well |
2161 | + * since it will end up touching the MSR anyway now. |
2162 | + */ |
2163 | + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, |
2164 | + MSR_IA32_SPEC_CTRL, |
2165 | + MSR_TYPE_RW); |
2166 | + break; |
2167 | + case MSR_IA32_PRED_CMD: |
2168 | + if (!msr_info->host_initiated && |
2169 | + !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) && |
2170 | + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) |
2171 | + return 1; |
2172 | + |
2173 | + if (data & ~PRED_CMD_IBPB) |
2174 | + return 1; |
2175 | + |
2176 | + if (!data) |
2177 | + break; |
2178 | + |
2179 | + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); |
2180 | + |
2181 | + /* |
2182 | + * For non-nested: |
2183 | + * When it's written (to non-zero) for the first time, pass |
2184 | + * it through. |
2185 | + * |
2186 | + * For nested: |
2187 | + * The handling of the MSR bitmap for L2 guests is done in |
2188 | + * nested_vmx_merge_msr_bitmap. We should not touch the |
2189 | + * vmcs02.msr_bitmap here since it gets completely overwritten |
2190 | + * in the merging. |
2191 | + */ |
2192 | + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, |
2193 | + MSR_TYPE_W); |
2194 | + break; |
2195 | + case MSR_IA32_ARCH_CAPABILITIES: |
2196 | + if (!msr_info->host_initiated) |
2197 | + return 1; |
2198 | + vmx->arch_capabilities = data; |
2199 | + break; |
2200 | case MSR_IA32_CR_PAT: |
2201 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { |
2202 | if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) |
2203 | @@ -3837,11 +3925,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) |
2204 | return vmcs; |
2205 | } |
2206 | |
2207 | -static struct vmcs *alloc_vmcs(void) |
2208 | -{ |
2209 | - return alloc_vmcs_cpu(raw_smp_processor_id()); |
2210 | -} |
2211 | - |
2212 | static void free_vmcs(struct vmcs *vmcs) |
2213 | { |
2214 | free_pages((unsigned long)vmcs, vmcs_config.order); |
2215 | @@ -3857,9 +3940,38 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) |
2216 | loaded_vmcs_clear(loaded_vmcs); |
2217 | free_vmcs(loaded_vmcs->vmcs); |
2218 | loaded_vmcs->vmcs = NULL; |
2219 | + if (loaded_vmcs->msr_bitmap) |
2220 | + free_page((unsigned long)loaded_vmcs->msr_bitmap); |
2221 | WARN_ON(loaded_vmcs->shadow_vmcs != NULL); |
2222 | } |
2223 | |
2224 | +static struct vmcs *alloc_vmcs(void) |
2225 | +{ |
2226 | + return alloc_vmcs_cpu(raw_smp_processor_id()); |
2227 | +} |
2228 | + |
2229 | +static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) |
2230 | +{ |
2231 | + loaded_vmcs->vmcs = alloc_vmcs(); |
2232 | + if (!loaded_vmcs->vmcs) |
2233 | + return -ENOMEM; |
2234 | + |
2235 | + loaded_vmcs->shadow_vmcs = NULL; |
2236 | + loaded_vmcs_init(loaded_vmcs); |
2237 | + |
2238 | + if (cpu_has_vmx_msr_bitmap()) { |
2239 | + loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); |
2240 | + if (!loaded_vmcs->msr_bitmap) |
2241 | + goto out_vmcs; |
2242 | + memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); |
2243 | + } |
2244 | + return 0; |
2245 | + |
2246 | +out_vmcs: |
2247 | + free_loaded_vmcs(loaded_vmcs); |
2248 | + return -ENOMEM; |
2249 | +} |
2250 | + |
2251 | static void free_kvm_area(void) |
2252 | { |
2253 | int cpu; |
2254 | @@ -4918,10 +5030,8 @@ static void free_vpid(int vpid) |
2255 | spin_unlock(&vmx_vpid_lock); |
2256 | } |
2257 | |
2258 | -#define MSR_TYPE_R 1 |
2259 | -#define MSR_TYPE_W 2 |
2260 | -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, |
2261 | - u32 msr, int type) |
2262 | +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, |
2263 | + u32 msr, int type) |
2264 | { |
2265 | int f = sizeof(unsigned long); |
2266 | |
2267 | @@ -4955,6 +5065,50 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, |
2268 | } |
2269 | } |
2270 | |
2271 | +static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, |
2272 | + u32 msr, int type) |
2273 | +{ |
2274 | + int f = sizeof(unsigned long); |
2275 | + |
2276 | + if (!cpu_has_vmx_msr_bitmap()) |
2277 | + return; |
2278 | + |
2279 | + /* |
2280 | + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals |
2281 | + * have the write-low and read-high bitmap offsets the wrong way round. |
2282 | + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. |
2283 | + */ |
2284 | + if (msr <= 0x1fff) { |
2285 | + if (type & MSR_TYPE_R) |
2286 | + /* read-low */ |
2287 | + __set_bit(msr, msr_bitmap + 0x000 / f); |
2288 | + |
2289 | + if (type & MSR_TYPE_W) |
2290 | + /* write-low */ |
2291 | + __set_bit(msr, msr_bitmap + 0x800 / f); |
2292 | + |
2293 | + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { |
2294 | + msr &= 0x1fff; |
2295 | + if (type & MSR_TYPE_R) |
2296 | + /* read-high */ |
2297 | + __set_bit(msr, msr_bitmap + 0x400 / f); |
2298 | + |
2299 | + if (type & MSR_TYPE_W) |
2300 | + /* write-high */ |
2301 | + __set_bit(msr, msr_bitmap + 0xc00 / f); |
2302 | + |
2303 | + } |
2304 | +} |
2305 | + |
2306 | +static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, |
2307 | + u32 msr, int type, bool value) |
2308 | +{ |
2309 | + if (value) |
2310 | + vmx_enable_intercept_for_msr(msr_bitmap, msr, type); |
2311 | + else |
2312 | + vmx_disable_intercept_for_msr(msr_bitmap, msr, type); |
2313 | +} |
2314 | + |
2315 | /* |
2316 | * If a msr is allowed by L0, we should check whether it is allowed by L1. |
2317 | * The corresponding bit will be cleared unless both of L0 and L1 allow it. |
2318 | @@ -5001,30 +5155,70 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, |
2319 | } |
2320 | } |
2321 | |
2322 | -static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) |
2323 | +static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) |
2324 | { |
2325 | - if (!longmode_only) |
2326 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, |
2327 | - msr, MSR_TYPE_R | MSR_TYPE_W); |
2328 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, |
2329 | - msr, MSR_TYPE_R | MSR_TYPE_W); |
2330 | + u8 mode = 0; |
2331 | + |
2332 | + if (cpu_has_secondary_exec_ctrls() && |
2333 | + (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & |
2334 | + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { |
2335 | + mode |= MSR_BITMAP_MODE_X2APIC; |
2336 | + if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) |
2337 | + mode |= MSR_BITMAP_MODE_X2APIC_APICV; |
2338 | + } |
2339 | + |
2340 | + if (is_long_mode(vcpu)) |
2341 | + mode |= MSR_BITMAP_MODE_LM; |
2342 | + |
2343 | + return mode; |
2344 | } |
2345 | |
2346 | -static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_active) |
2347 | +#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) |
2348 | + |
2349 | +static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, |
2350 | + u8 mode) |
2351 | { |
2352 | - if (apicv_active) { |
2353 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv, |
2354 | - msr, type); |
2355 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv, |
2356 | - msr, type); |
2357 | - } else { |
2358 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, |
2359 | - msr, type); |
2360 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, |
2361 | - msr, type); |
2362 | + int msr; |
2363 | + |
2364 | + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { |
2365 | + unsigned word = msr / BITS_PER_LONG; |
2366 | + msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; |
2367 | + msr_bitmap[word + (0x800 / sizeof(long))] = ~0; |
2368 | + } |
2369 | + |
2370 | + if (mode & MSR_BITMAP_MODE_X2APIC) { |
2371 | + /* |
2372 | + * TPR reads and writes can be virtualized even if virtual interrupt |
2373 | + * delivery is not in use. |
2374 | + */ |
2375 | + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); |
2376 | + if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { |
2377 | + vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); |
2378 | + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); |
2379 | + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); |
2380 | + } |
2381 | } |
2382 | } |
2383 | |
2384 | +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) |
2385 | +{ |
2386 | + struct vcpu_vmx *vmx = to_vmx(vcpu); |
2387 | + unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; |
2388 | + u8 mode = vmx_msr_bitmap_mode(vcpu); |
2389 | + u8 changed = mode ^ vmx->msr_bitmap_mode; |
2390 | + |
2391 | + if (!changed) |
2392 | + return; |
2393 | + |
2394 | + vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, |
2395 | + !(mode & MSR_BITMAP_MODE_LM)); |
2396 | + |
2397 | + if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) |
2398 | + vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); |
2399 | + |
2400 | + vmx->msr_bitmap_mode = mode; |
2401 | +} |
2402 | + |
2403 | static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu) |
2404 | { |
2405 | return enable_apicv; |
2406 | @@ -5274,7 +5468,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) |
2407 | } |
2408 | |
2409 | if (cpu_has_vmx_msr_bitmap()) |
2410 | - vmx_set_msr_bitmap(vcpu); |
2411 | + vmx_update_msr_bitmap(vcpu); |
2412 | } |
2413 | |
2414 | static u32 vmx_exec_control(struct vcpu_vmx *vmx) |
2415 | @@ -5461,7 +5655,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) |
2416 | vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); |
2417 | } |
2418 | if (cpu_has_vmx_msr_bitmap()) |
2419 | - vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); |
2420 | + vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); |
2421 | |
2422 | vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ |
2423 | |
2424 | @@ -5539,6 +5733,8 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) |
2425 | ++vmx->nmsrs; |
2426 | } |
2427 | |
2428 | + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) |
2429 | + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); |
2430 | |
2431 | vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); |
2432 | |
2433 | @@ -5567,6 +5763,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) |
2434 | u64 cr0; |
2435 | |
2436 | vmx->rmode.vm86_active = 0; |
2437 | + vmx->spec_ctrl = 0; |
2438 | |
2439 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); |
2440 | kvm_set_cr8(vcpu, 0); |
2441 | @@ -6744,7 +6941,7 @@ void vmx_enable_tdp(void) |
2442 | |
2443 | static __init int hardware_setup(void) |
2444 | { |
2445 | - int r = -ENOMEM, i, msr; |
2446 | + int r = -ENOMEM, i; |
2447 | |
2448 | rdmsrl_safe(MSR_EFER, &host_efer); |
2449 | |
2450 | @@ -6764,9 +6961,6 @@ static __init int hardware_setup(void) |
2451 | |
2452 | memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); |
2453 | |
2454 | - memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); |
2455 | - memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); |
2456 | - |
2457 | if (setup_vmcs_config(&vmcs_config) < 0) { |
2458 | r = -EIO; |
2459 | goto out; |
2460 | @@ -6835,42 +7029,8 @@ static __init int hardware_setup(void) |
2461 | kvm_tsc_scaling_ratio_frac_bits = 48; |
2462 | } |
2463 | |
2464 | - vmx_disable_intercept_for_msr(MSR_FS_BASE, false); |
2465 | - vmx_disable_intercept_for_msr(MSR_GS_BASE, false); |
2466 | - vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); |
2467 | - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); |
2468 | - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); |
2469 | - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); |
2470 | - |
2471 | - memcpy(vmx_msr_bitmap_legacy_x2apic_apicv, |
2472 | - vmx_msr_bitmap_legacy, PAGE_SIZE); |
2473 | - memcpy(vmx_msr_bitmap_longmode_x2apic_apicv, |
2474 | - vmx_msr_bitmap_longmode, PAGE_SIZE); |
2475 | - memcpy(vmx_msr_bitmap_legacy_x2apic, |
2476 | - vmx_msr_bitmap_legacy, PAGE_SIZE); |
2477 | - memcpy(vmx_msr_bitmap_longmode_x2apic, |
2478 | - vmx_msr_bitmap_longmode, PAGE_SIZE); |
2479 | - |
2480 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ |
2481 | |
2482 | - for (msr = 0x800; msr <= 0x8ff; msr++) { |
2483 | - if (msr == 0x839 /* TMCCT */) |
2484 | - continue; |
2485 | - vmx_disable_intercept_msr_x2apic(msr, MSR_TYPE_R, true); |
2486 | - } |
2487 | - |
2488 | - /* |
2489 | - * TPR reads and writes can be virtualized even if virtual interrupt |
2490 | - * delivery is not in use. |
2491 | - */ |
2492 | - vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_W, true); |
2493 | - vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_R | MSR_TYPE_W, false); |
2494 | - |
2495 | - /* EOI */ |
2496 | - vmx_disable_intercept_msr_x2apic(0x80b, MSR_TYPE_W, true); |
2497 | - /* SELF-IPI */ |
2498 | - vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true); |
2499 | - |
2500 | if (enable_ept) |
2501 | vmx_enable_tdp(); |
2502 | else |
2503 | @@ -6973,94 +7133,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu) |
2504 | return handle_nop(vcpu); |
2505 | } |
2506 | |
2507 | -/* |
2508 | - * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. |
2509 | - * We could reuse a single VMCS for all the L2 guests, but we also want the |
2510 | - * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this |
2511 | - * allows keeping them loaded on the processor, and in the future will allow |
2512 | - * optimizations where prepare_vmcs02 doesn't need to set all the fields on |
2513 | - * every entry if they never change. |
2514 | - * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE |
2515 | - * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. |
2516 | - * |
2517 | - * The following functions allocate and free a vmcs02 in this pool. |
2518 | - */ |
2519 | - |
2520 | -/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */ |
2521 | -static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) |
2522 | -{ |
2523 | - struct vmcs02_list *item; |
2524 | - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) |
2525 | - if (item->vmptr == vmx->nested.current_vmptr) { |
2526 | - list_move(&item->list, &vmx->nested.vmcs02_pool); |
2527 | - return &item->vmcs02; |
2528 | - } |
2529 | - |
2530 | - if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { |
2531 | - /* Recycle the least recently used VMCS. */ |
2532 | - item = list_last_entry(&vmx->nested.vmcs02_pool, |
2533 | - struct vmcs02_list, list); |
2534 | - item->vmptr = vmx->nested.current_vmptr; |
2535 | - list_move(&item->list, &vmx->nested.vmcs02_pool); |
2536 | - return &item->vmcs02; |
2537 | - } |
2538 | - |
2539 | - /* Create a new VMCS */ |
2540 | - item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL); |
2541 | - if (!item) |
2542 | - return NULL; |
2543 | - item->vmcs02.vmcs = alloc_vmcs(); |
2544 | - item->vmcs02.shadow_vmcs = NULL; |
2545 | - if (!item->vmcs02.vmcs) { |
2546 | - kfree(item); |
2547 | - return NULL; |
2548 | - } |
2549 | - loaded_vmcs_init(&item->vmcs02); |
2550 | - item->vmptr = vmx->nested.current_vmptr; |
2551 | - list_add(&(item->list), &(vmx->nested.vmcs02_pool)); |
2552 | - vmx->nested.vmcs02_num++; |
2553 | - return &item->vmcs02; |
2554 | -} |
2555 | - |
2556 | -/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ |
2557 | -static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) |
2558 | -{ |
2559 | - struct vmcs02_list *item; |
2560 | - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) |
2561 | - if (item->vmptr == vmptr) { |
2562 | - free_loaded_vmcs(&item->vmcs02); |
2563 | - list_del(&item->list); |
2564 | - kfree(item); |
2565 | - vmx->nested.vmcs02_num--; |
2566 | - return; |
2567 | - } |
2568 | -} |
2569 | - |
2570 | -/* |
2571 | - * Free all VMCSs saved for this vcpu, except the one pointed by |
2572 | - * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs |
2573 | - * must be &vmx->vmcs01. |
2574 | - */ |
2575 | -static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) |
2576 | -{ |
2577 | - struct vmcs02_list *item, *n; |
2578 | - |
2579 | - WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); |
2580 | - list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { |
2581 | - /* |
2582 | - * Something will leak if the above WARN triggers. Better than |
2583 | - * a use-after-free. |
2584 | - */ |
2585 | - if (vmx->loaded_vmcs == &item->vmcs02) |
2586 | - continue; |
2587 | - |
2588 | - free_loaded_vmcs(&item->vmcs02); |
2589 | - list_del(&item->list); |
2590 | - kfree(item); |
2591 | - vmx->nested.vmcs02_num--; |
2592 | - } |
2593 | -} |
2594 | - |
2595 | /* |
2596 | * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), |
2597 | * set the success or error code of an emulated VMX instruction, as specified |
2598 | @@ -7241,13 +7313,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) |
2599 | { |
2600 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2601 | struct vmcs *shadow_vmcs; |
2602 | + int r; |
2603 | |
2604 | - if (cpu_has_vmx_msr_bitmap()) { |
2605 | - vmx->nested.msr_bitmap = |
2606 | - (unsigned long *)__get_free_page(GFP_KERNEL); |
2607 | - if (!vmx->nested.msr_bitmap) |
2608 | - goto out_msr_bitmap; |
2609 | - } |
2610 | + r = alloc_loaded_vmcs(&vmx->nested.vmcs02); |
2611 | + if (r < 0) |
2612 | + goto out_vmcs02; |
2613 | |
2614 | vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); |
2615 | if (!vmx->nested.cached_vmcs12) |
2616 | @@ -7264,9 +7334,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) |
2617 | vmx->vmcs01.shadow_vmcs = shadow_vmcs; |
2618 | } |
2619 | |
2620 | - INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); |
2621 | - vmx->nested.vmcs02_num = 0; |
2622 | - |
2623 | hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, |
2624 | HRTIMER_MODE_REL_PINNED); |
2625 | vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; |
2626 | @@ -7278,9 +7345,9 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) |
2627 | kfree(vmx->nested.cached_vmcs12); |
2628 | |
2629 | out_cached_vmcs12: |
2630 | - free_page((unsigned long)vmx->nested.msr_bitmap); |
2631 | + free_loaded_vmcs(&vmx->nested.vmcs02); |
2632 | |
2633 | -out_msr_bitmap: |
2634 | +out_vmcs02: |
2635 | return -ENOMEM; |
2636 | } |
2637 | |
2638 | @@ -7423,10 +7490,6 @@ static void free_nested(struct vcpu_vmx *vmx) |
2639 | free_vpid(vmx->nested.vpid02); |
2640 | vmx->nested.posted_intr_nv = -1; |
2641 | vmx->nested.current_vmptr = -1ull; |
2642 | - if (vmx->nested.msr_bitmap) { |
2643 | - free_page((unsigned long)vmx->nested.msr_bitmap); |
2644 | - vmx->nested.msr_bitmap = NULL; |
2645 | - } |
2646 | if (enable_shadow_vmcs) { |
2647 | vmx_disable_shadow_vmcs(vmx); |
2648 | vmcs_clear(vmx->vmcs01.shadow_vmcs); |
2649 | @@ -7434,7 +7497,7 @@ static void free_nested(struct vcpu_vmx *vmx) |
2650 | vmx->vmcs01.shadow_vmcs = NULL; |
2651 | } |
2652 | kfree(vmx->nested.cached_vmcs12); |
2653 | - /* Unpin physical memory we referred to in current vmcs02 */ |
2654 | + /* Unpin physical memory we referred to in the vmcs02 */ |
2655 | if (vmx->nested.apic_access_page) { |
2656 | kvm_release_page_dirty(vmx->nested.apic_access_page); |
2657 | vmx->nested.apic_access_page = NULL; |
2658 | @@ -7450,7 +7513,7 @@ static void free_nested(struct vcpu_vmx *vmx) |
2659 | vmx->nested.pi_desc = NULL; |
2660 | } |
2661 | |
2662 | - nested_free_all_saved_vmcss(vmx); |
2663 | + free_loaded_vmcs(&vmx->nested.vmcs02); |
2664 | } |
2665 | |
2666 | /* Emulate the VMXOFF instruction */ |
2667 | @@ -7493,8 +7556,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) |
2668 | vmptr + offsetof(struct vmcs12, launch_state), |
2669 | &zero, sizeof(zero)); |
2670 | |
2671 | - nested_free_vmcs02(vmx, vmptr); |
2672 | - |
2673 | nested_vmx_succeed(vcpu); |
2674 | return kvm_skip_emulated_instruction(vcpu); |
2675 | } |
2676 | @@ -8406,10 +8467,11 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) |
2677 | |
2678 | /* |
2679 | * The host physical addresses of some pages of guest memory |
2680 | - * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU |
2681 | - * may write to these pages via their host physical address while |
2682 | - * L2 is running, bypassing any address-translation-based dirty |
2683 | - * tracking (e.g. EPT write protection). |
2684 | + * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC |
2685 | + * Page). The CPU may write to these pages via their host |
2686 | + * physical address while L2 is running, bypassing any |
2687 | + * address-translation-based dirty tracking (e.g. EPT write |
2688 | + * protection). |
2689 | * |
2690 | * Mark them dirty on every exit from L2 to prevent them from |
2691 | * getting out of sync with dirty tracking. |
2692 | @@ -8943,7 +9005,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) |
2693 | } |
2694 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); |
2695 | |
2696 | - vmx_set_msr_bitmap(vcpu); |
2697 | + vmx_update_msr_bitmap(vcpu); |
2698 | } |
2699 | |
2700 | static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) |
2701 | @@ -9129,14 +9191,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) |
2702 | #endif |
2703 | "pushf\n\t" |
2704 | __ASM_SIZE(push) " $%c[cs]\n\t" |
2705 | - "call *%[entry]\n\t" |
2706 | + CALL_NOSPEC |
2707 | : |
2708 | #ifdef CONFIG_X86_64 |
2709 | [sp]"=&r"(tmp), |
2710 | #endif |
2711 | ASM_CALL_CONSTRAINT |
2712 | : |
2713 | - [entry]"r"(entry), |
2714 | + THUNK_TARGET(entry), |
2715 | [ss]"i"(__KERNEL_DS), |
2716 | [cs]"i"(__KERNEL_CS) |
2717 | ); |
2718 | @@ -9373,6 +9435,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) |
2719 | |
2720 | vmx_arm_hv_timer(vcpu); |
2721 | |
2722 | + /* |
2723 | + * If this vCPU has touched SPEC_CTRL, restore the guest's value if |
2724 | + * it's non-zero. Since vmentry is serialising on affected CPUs, there |
2725 | + * is no need to worry about the conditional branch over the wrmsr |
2726 | + * being speculatively taken. |
2727 | + */ |
2728 | + if (vmx->spec_ctrl) |
2729 | + wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); |
2730 | + |
2731 | vmx->__launched = vmx->loaded_vmcs->launched; |
2732 | asm( |
2733 | /* Store host registers */ |
2734 | @@ -9491,6 +9562,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) |
2735 | #endif |
2736 | ); |
2737 | |
2738 | + /* |
2739 | + * We do not use IBRS in the kernel. If this vCPU has used the |
2740 | + * SPEC_CTRL MSR it may have left it on; save the value and |
2741 | + * turn it off. This is much more efficient than blindly adding |
2742 | + * it to the atomic save/restore list. Especially as the former |
2743 | + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. |
2744 | + * |
2745 | + * For non-nested case: |
2746 | + * If the L01 MSR bitmap does not intercept the MSR, then we need to |
2747 | + * save it. |
2748 | + * |
2749 | + * For nested case: |
2750 | + * If the L02 MSR bitmap does not intercept the MSR, then we need to |
2751 | + * save it. |
2752 | + */ |
2753 | + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) |
2754 | + rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); |
2755 | + |
2756 | + if (vmx->spec_ctrl) |
2757 | + wrmsrl(MSR_IA32_SPEC_CTRL, 0); |
2758 | + |
2759 | /* Eliminate branch target predictions from guest mode */ |
2760 | vmexit_fill_RSB(); |
2761 | |
2762 | @@ -9604,6 +9696,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) |
2763 | { |
2764 | int err; |
2765 | struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); |
2766 | + unsigned long *msr_bitmap; |
2767 | int cpu; |
2768 | |
2769 | if (!vmx) |
2770 | @@ -9636,13 +9729,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) |
2771 | if (!vmx->guest_msrs) |
2772 | goto free_pml; |
2773 | |
2774 | - vmx->loaded_vmcs = &vmx->vmcs01; |
2775 | - vmx->loaded_vmcs->vmcs = alloc_vmcs(); |
2776 | - vmx->loaded_vmcs->shadow_vmcs = NULL; |
2777 | - if (!vmx->loaded_vmcs->vmcs) |
2778 | + err = alloc_loaded_vmcs(&vmx->vmcs01); |
2779 | + if (err < 0) |
2780 | goto free_msrs; |
2781 | - loaded_vmcs_init(vmx->loaded_vmcs); |
2782 | |
2783 | + msr_bitmap = vmx->vmcs01.msr_bitmap; |
2784 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); |
2785 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); |
2786 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); |
2787 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); |
2788 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); |
2789 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); |
2790 | + vmx->msr_bitmap_mode = 0; |
2791 | + |
2792 | + vmx->loaded_vmcs = &vmx->vmcs01; |
2793 | cpu = get_cpu(); |
2794 | vmx_vcpu_load(&vmx->vcpu, cpu); |
2795 | vmx->vcpu.cpu = cpu; |
2796 | @@ -10105,10 +10205,25 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, |
2797 | int msr; |
2798 | struct page *page; |
2799 | unsigned long *msr_bitmap_l1; |
2800 | - unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; |
2801 | + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; |
2802 | + /* |
2803 | + * pred_cmd & spec_ctrl are trying to verify two things: |
2804 | + * |
2805 | + * 1. L0 gave a permission to L1 to actually passthrough the MSR. This |
2806 | + * ensures that we do not accidentally generate an L02 MSR bitmap |
2807 | + * from the L12 MSR bitmap that is too permissive. |
2808 | + * 2. That L1 or L2s have actually used the MSR. This avoids |
2809 | + * unnecessarily merging of the bitmap if the MSR is unused. This |
2810 | + * works properly because we only update the L01 MSR bitmap lazily. |
2811 | + * So even if L0 should pass L1 these MSRs, the L01 bitmap is only |
2812 | + * updated to reflect this when L1 (or its L2s) actually write to |
2813 | + * the MSR. |
2814 | + */ |
2815 | + bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); |
2816 | + bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); |
2817 | |
2818 | - /* This shortcut is ok because we support only x2APIC MSRs so far. */ |
2819 | - if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) |
2820 | + if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && |
2821 | + !pred_cmd && !spec_ctrl) |
2822 | return false; |
2823 | |
2824 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); |
2825 | @@ -10141,6 +10256,19 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, |
2826 | MSR_TYPE_W); |
2827 | } |
2828 | } |
2829 | + |
2830 | + if (spec_ctrl) |
2831 | + nested_vmx_disable_intercept_for_msr( |
2832 | + msr_bitmap_l1, msr_bitmap_l0, |
2833 | + MSR_IA32_SPEC_CTRL, |
2834 | + MSR_TYPE_R | MSR_TYPE_W); |
2835 | + |
2836 | + if (pred_cmd) |
2837 | + nested_vmx_disable_intercept_for_msr( |
2838 | + msr_bitmap_l1, msr_bitmap_l0, |
2839 | + MSR_IA32_PRED_CMD, |
2840 | + MSR_TYPE_W); |
2841 | + |
2842 | kunmap(page); |
2843 | kvm_release_page_clean(page); |
2844 | |
2845 | @@ -10682,6 +10810,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, |
2846 | if (kvm_has_tsc_control) |
2847 | decache_tsc_multiplier(vmx); |
2848 | |
2849 | + if (cpu_has_vmx_msr_bitmap()) |
2850 | + vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); |
2851 | + |
2852 | if (enable_vpid) { |
2853 | /* |
2854 | * There is no direct mapping between vpid02 and vpid12, the |
2855 | @@ -10903,20 +11034,15 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) |
2856 | { |
2857 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2858 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
2859 | - struct loaded_vmcs *vmcs02; |
2860 | u32 msr_entry_idx; |
2861 | u32 exit_qual; |
2862 | |
2863 | - vmcs02 = nested_get_current_vmcs02(vmx); |
2864 | - if (!vmcs02) |
2865 | - return -ENOMEM; |
2866 | - |
2867 | enter_guest_mode(vcpu); |
2868 | |
2869 | if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) |
2870 | vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); |
2871 | |
2872 | - vmx_switch_vmcs(vcpu, vmcs02); |
2873 | + vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); |
2874 | vmx_segment_cache_clear(vmx); |
2875 | |
2876 | if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { |
2877 | @@ -11485,7 +11611,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, |
2878 | vmcs_write64(GUEST_IA32_DEBUGCTL, 0); |
2879 | |
2880 | if (cpu_has_vmx_msr_bitmap()) |
2881 | - vmx_set_msr_bitmap(vcpu); |
2882 | + vmx_update_msr_bitmap(vcpu); |
2883 | |
2884 | if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, |
2885 | vmcs12->vm_exit_msr_load_count)) |
2886 | @@ -11534,10 +11660,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, |
2887 | vm_exit_controls_reset_shadow(vmx); |
2888 | vmx_segment_cache_clear(vmx); |
2889 | |
2890 | - /* if no vmcs02 cache requested, remove the one we used */ |
2891 | - if (VMCS02_POOL_SIZE == 0) |
2892 | - nested_free_vmcs02(vmx, vmx->nested.current_vmptr); |
2893 | - |
2894 | /* Update any VMCS fields that might have changed while L2 ran */ |
2895 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr); |
2896 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr); |
2897 | diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c |
2898 | index c53298dfbf50..ac381437c291 100644 |
2899 | --- a/arch/x86/kvm/x86.c |
2900 | +++ b/arch/x86/kvm/x86.c |
2901 | @@ -1009,6 +1009,7 @@ static u32 msrs_to_save[] = { |
2902 | #endif |
2903 | MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, |
2904 | MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, |
2905 | + MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES |
2906 | }; |
2907 | |
2908 | static unsigned num_msrs_to_save; |
2909 | diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile |
2910 | index f23934bbaf4e..69a473919260 100644 |
2911 | --- a/arch/x86/lib/Makefile |
2912 | +++ b/arch/x86/lib/Makefile |
2913 | @@ -27,6 +27,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o |
2914 | lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o |
2915 | lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o |
2916 | lib-$(CONFIG_RETPOLINE) += retpoline.o |
2917 | +OBJECT_FILES_NON_STANDARD_retpoline.o :=y |
2918 | |
2919 | obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o |
2920 | |
2921 | diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S |
2922 | index c97d935a29e8..49b167f73215 100644 |
2923 | --- a/arch/x86/lib/getuser.S |
2924 | +++ b/arch/x86/lib/getuser.S |
2925 | @@ -40,6 +40,8 @@ ENTRY(__get_user_1) |
2926 | mov PER_CPU_VAR(current_task), %_ASM_DX |
2927 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX |
2928 | jae bad_get_user |
2929 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ |
2930 | + and %_ASM_DX, %_ASM_AX |
2931 | ASM_STAC |
2932 | 1: movzbl (%_ASM_AX),%edx |
2933 | xor %eax,%eax |
2934 | @@ -54,6 +56,8 @@ ENTRY(__get_user_2) |
2935 | mov PER_CPU_VAR(current_task), %_ASM_DX |
2936 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX |
2937 | jae bad_get_user |
2938 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ |
2939 | + and %_ASM_DX, %_ASM_AX |
2940 | ASM_STAC |
2941 | 2: movzwl -1(%_ASM_AX),%edx |
2942 | xor %eax,%eax |
2943 | @@ -68,6 +72,8 @@ ENTRY(__get_user_4) |
2944 | mov PER_CPU_VAR(current_task), %_ASM_DX |
2945 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX |
2946 | jae bad_get_user |
2947 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ |
2948 | + and %_ASM_DX, %_ASM_AX |
2949 | ASM_STAC |
2950 | 3: movl -3(%_ASM_AX),%edx |
2951 | xor %eax,%eax |
2952 | @@ -83,6 +89,8 @@ ENTRY(__get_user_8) |
2953 | mov PER_CPU_VAR(current_task), %_ASM_DX |
2954 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX |
2955 | jae bad_get_user |
2956 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ |
2957 | + and %_ASM_DX, %_ASM_AX |
2958 | ASM_STAC |
2959 | 4: movq -7(%_ASM_AX),%rdx |
2960 | xor %eax,%eax |
2961 | @@ -94,6 +102,8 @@ ENTRY(__get_user_8) |
2962 | mov PER_CPU_VAR(current_task), %_ASM_DX |
2963 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX |
2964 | jae bad_get_user_8 |
2965 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ |
2966 | + and %_ASM_DX, %_ASM_AX |
2967 | ASM_STAC |
2968 | 4: movl -7(%_ASM_AX),%edx |
2969 | 5: movl -3(%_ASM_AX),%ecx |
2970 | diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S |
2971 | index c909961e678a..480edc3a5e03 100644 |
2972 | --- a/arch/x86/lib/retpoline.S |
2973 | +++ b/arch/x86/lib/retpoline.S |
2974 | @@ -7,6 +7,7 @@ |
2975 | #include <asm/alternative-asm.h> |
2976 | #include <asm/export.h> |
2977 | #include <asm/nospec-branch.h> |
2978 | +#include <asm/bitsperlong.h> |
2979 | |
2980 | .macro THUNK reg |
2981 | .section .text.__x86.indirect_thunk |
2982 | @@ -46,3 +47,58 @@ GENERATE_THUNK(r13) |
2983 | GENERATE_THUNK(r14) |
2984 | GENERATE_THUNK(r15) |
2985 | #endif |
2986 | + |
2987 | +/* |
2988 | + * Fill the CPU return stack buffer. |
2989 | + * |
2990 | + * Each entry in the RSB, if used for a speculative 'ret', contains an |
2991 | + * infinite 'pause; lfence; jmp' loop to capture speculative execution. |
2992 | + * |
2993 | + * This is required in various cases for retpoline and IBRS-based |
2994 | + * mitigations for the Spectre variant 2 vulnerability. Sometimes to |
2995 | + * eliminate potentially bogus entries from the RSB, and sometimes |
2996 | + * purely to ensure that it doesn't get empty, which on some CPUs would |
2997 | + * allow predictions from other (unwanted!) sources to be used. |
2998 | + * |
2999 | + * Google experimented with loop-unrolling and this turned out to be |
3000 | + * the optimal version - two calls, each with their own speculation |
3001 | + * trap should their return address end up getting used, in a loop. |
3002 | + */ |
3003 | +.macro STUFF_RSB nr:req sp:req |
3004 | + mov $(\nr / 2), %_ASM_BX |
3005 | + .align 16 |
3006 | +771: |
3007 | + call 772f |
3008 | +773: /* speculation trap */ |
3009 | + pause |
3010 | + lfence |
3011 | + jmp 773b |
3012 | + .align 16 |
3013 | +772: |
3014 | + call 774f |
3015 | +775: /* speculation trap */ |
3016 | + pause |
3017 | + lfence |
3018 | + jmp 775b |
3019 | + .align 16 |
3020 | +774: |
3021 | + dec %_ASM_BX |
3022 | + jnz 771b |
3023 | + add $((BITS_PER_LONG/8) * \nr), \sp |
3024 | +.endm |
3025 | + |
3026 | +#define RSB_FILL_LOOPS 16 /* To avoid underflow */ |
3027 | + |
3028 | +ENTRY(__fill_rsb) |
3029 | + STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP |
3030 | + ret |
3031 | +END(__fill_rsb) |
3032 | +EXPORT_SYMBOL_GPL(__fill_rsb) |
3033 | + |
3034 | +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ |
3035 | + |
3036 | +ENTRY(__clear_rsb) |
3037 | + STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP |
3038 | + ret |
3039 | +END(__clear_rsb) |
3040 | +EXPORT_SYMBOL_GPL(__clear_rsb) |
3041 | diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c |
3042 | index 1b377f734e64..7add8ba06887 100644 |
3043 | --- a/arch/x86/lib/usercopy_32.c |
3044 | +++ b/arch/x86/lib/usercopy_32.c |
3045 | @@ -331,12 +331,12 @@ do { \ |
3046 | |
3047 | unsigned long __copy_user_ll(void *to, const void *from, unsigned long n) |
3048 | { |
3049 | - stac(); |
3050 | + __uaccess_begin_nospec(); |
3051 | if (movsl_is_ok(to, from, n)) |
3052 | __copy_user(to, from, n); |
3053 | else |
3054 | n = __copy_user_intel(to, from, n); |
3055 | - clac(); |
3056 | + __uaccess_end(); |
3057 | return n; |
3058 | } |
3059 | EXPORT_SYMBOL(__copy_user_ll); |
3060 | @@ -344,7 +344,7 @@ EXPORT_SYMBOL(__copy_user_ll); |
3061 | unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, |
3062 | unsigned long n) |
3063 | { |
3064 | - stac(); |
3065 | + __uaccess_begin_nospec(); |
3066 | #ifdef CONFIG_X86_INTEL_USERCOPY |
3067 | if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) |
3068 | n = __copy_user_intel_nocache(to, from, n); |
3069 | @@ -353,7 +353,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr |
3070 | #else |
3071 | __copy_user(to, from, n); |
3072 | #endif |
3073 | - clac(); |
3074 | + __uaccess_end(); |
3075 | return n; |
3076 | } |
3077 | EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); |
3078 | diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c |
3079 | index 5bfe61a5e8e3..012d02624848 100644 |
3080 | --- a/arch/x86/mm/tlb.c |
3081 | +++ b/arch/x86/mm/tlb.c |
3082 | @@ -6,13 +6,14 @@ |
3083 | #include <linux/interrupt.h> |
3084 | #include <linux/export.h> |
3085 | #include <linux/cpu.h> |
3086 | +#include <linux/debugfs.h> |
3087 | |
3088 | #include <asm/tlbflush.h> |
3089 | #include <asm/mmu_context.h> |
3090 | +#include <asm/nospec-branch.h> |
3091 | #include <asm/cache.h> |
3092 | #include <asm/apic.h> |
3093 | #include <asm/uv/uv.h> |
3094 | -#include <linux/debugfs.h> |
3095 | |
3096 | /* |
3097 | * TLB flushing, formerly SMP-only |
3098 | @@ -247,6 +248,27 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, |
3099 | } else { |
3100 | u16 new_asid; |
3101 | bool need_flush; |
3102 | + u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); |
3103 | + |
3104 | + /* |
3105 | + * Avoid user/user BTB poisoning by flushing the branch |
3106 | + * predictor when switching between processes. This stops |
3107 | + * one process from doing Spectre-v2 attacks on another. |
3108 | + * |
3109 | + * As an optimization, flush indirect branches only when |
3110 | + * switching into processes that disable dumping. This |
3111 | + * protects high value processes like gpg, without having |
3112 | + * too high performance overhead. IBPB is *expensive*! |
3113 | + * |
3114 | + * This will not flush branches when switching into kernel |
3115 | + * threads. It will also not flush if we switch to idle |
3116 | + * thread and back to the same process. It will flush if we |
3117 | + * switch to a different non-dumpable process. |
3118 | + */ |
3119 | + if (tsk && tsk->mm && |
3120 | + tsk->mm->context.ctx_id != last_ctx_id && |
3121 | + get_dumpable(tsk->mm) != SUID_DUMP_USER) |
3122 | + indirect_branch_prediction_barrier(); |
3123 | |
3124 | if (IS_ENABLED(CONFIG_VMAP_STACK)) { |
3125 | /* |
3126 | @@ -292,6 +314,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, |
3127 | trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); |
3128 | } |
3129 | |
3130 | + /* |
3131 | + * Record last user mm's context id, so we can avoid |
3132 | + * flushing branch buffer with IBPB if we switch back |
3133 | + * to the same user. |
3134 | + */ |
3135 | + if (next != &init_mm) |
3136 | + this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); |
3137 | + |
3138 | this_cpu_write(cpu_tlbstate.loaded_mm, next); |
3139 | this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); |
3140 | } |
3141 | @@ -369,6 +399,7 @@ void initialize_tlbstate_and_flush(void) |
3142 | write_cr3(build_cr3(mm->pgd, 0)); |
3143 | |
3144 | /* Reinitialize tlbstate. */ |
3145 | + this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id); |
3146 | this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); |
3147 | this_cpu_write(cpu_tlbstate.next_asid, 1); |
3148 | this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); |
3149 | diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c |
3150 | index db040b378224..9180b9bd5821 100644 |
3151 | --- a/drivers/auxdisplay/img-ascii-lcd.c |
3152 | +++ b/drivers/auxdisplay/img-ascii-lcd.c |
3153 | @@ -441,3 +441,7 @@ static struct platform_driver img_ascii_lcd_driver = { |
3154 | .remove = img_ascii_lcd_remove, |
3155 | }; |
3156 | module_platform_driver(img_ascii_lcd_driver); |
3157 | + |
3158 | +MODULE_DESCRIPTION("Imagination Technologies ASCII LCD Display"); |
3159 | +MODULE_AUTHOR("Paul Burton <paul.burton@mips.com>"); |
3160 | +MODULE_LICENSE("GPL"); |
3161 | diff --git a/drivers/fpga/fpga-region.c b/drivers/fpga/fpga-region.c |
3162 | index d9ab7c75b14f..e0c73ceba2ed 100644 |
3163 | --- a/drivers/fpga/fpga-region.c |
3164 | +++ b/drivers/fpga/fpga-region.c |
3165 | @@ -147,6 +147,7 @@ static struct fpga_manager *fpga_region_get_manager(struct fpga_region *region) |
3166 | mgr_node = of_parse_phandle(np, "fpga-mgr", 0); |
3167 | if (mgr_node) { |
3168 | mgr = of_fpga_mgr_get(mgr_node); |
3169 | + of_node_put(mgr_node); |
3170 | of_node_put(np); |
3171 | return mgr; |
3172 | } |
3173 | @@ -192,10 +193,13 @@ static int fpga_region_get_bridges(struct fpga_region *region, |
3174 | parent_br = region_np->parent; |
3175 | |
3176 | /* If overlay has a list of bridges, use it. */ |
3177 | - if (of_parse_phandle(overlay, "fpga-bridges", 0)) |
3178 | + br = of_parse_phandle(overlay, "fpga-bridges", 0); |
3179 | + if (br) { |
3180 | + of_node_put(br); |
3181 | np = overlay; |
3182 | - else |
3183 | + } else { |
3184 | np = region_np; |
3185 | + } |
3186 | |
3187 | for (i = 0; ; i++) { |
3188 | br = of_parse_phandle(np, "fpga-bridges", i); |
3189 | @@ -203,12 +207,15 @@ static int fpga_region_get_bridges(struct fpga_region *region, |
3190 | break; |
3191 | |
3192 | /* If parent bridge is in list, skip it. */ |
3193 | - if (br == parent_br) |
3194 | + if (br == parent_br) { |
3195 | + of_node_put(br); |
3196 | continue; |
3197 | + } |
3198 | |
3199 | /* If node is a bridge, get it and add to list */ |
3200 | ret = fpga_bridge_get_to_list(br, region->info, |
3201 | ®ion->bridge_list); |
3202 | + of_node_put(br); |
3203 | |
3204 | /* If any of the bridges are in use, give up */ |
3205 | if (ret == -EBUSY) { |
3206 | diff --git a/drivers/iio/accel/kxsd9-i2c.c b/drivers/iio/accel/kxsd9-i2c.c |
3207 | index 98fbb628d5bd..38411e1c155b 100644 |
3208 | --- a/drivers/iio/accel/kxsd9-i2c.c |
3209 | +++ b/drivers/iio/accel/kxsd9-i2c.c |
3210 | @@ -63,3 +63,6 @@ static struct i2c_driver kxsd9_i2c_driver = { |
3211 | .id_table = kxsd9_i2c_id, |
3212 | }; |
3213 | module_i2c_driver(kxsd9_i2c_driver); |
3214 | + |
3215 | +MODULE_LICENSE("GPL v2"); |
3216 | +MODULE_DESCRIPTION("KXSD9 accelerometer I2C interface"); |
3217 | diff --git a/drivers/iio/adc/qcom-vadc-common.c b/drivers/iio/adc/qcom-vadc-common.c |
3218 | index 47d24ae5462f..fe3d7826783c 100644 |
3219 | --- a/drivers/iio/adc/qcom-vadc-common.c |
3220 | +++ b/drivers/iio/adc/qcom-vadc-common.c |
3221 | @@ -5,6 +5,7 @@ |
3222 | #include <linux/math64.h> |
3223 | #include <linux/log2.h> |
3224 | #include <linux/err.h> |
3225 | +#include <linux/module.h> |
3226 | |
3227 | #include "qcom-vadc-common.h" |
3228 | |
3229 | @@ -229,3 +230,6 @@ int qcom_vadc_decimation_from_dt(u32 value) |
3230 | return __ffs64(value / VADC_DECIMATION_MIN); |
3231 | } |
3232 | EXPORT_SYMBOL(qcom_vadc_decimation_from_dt); |
3233 | + |
3234 | +MODULE_LICENSE("GPL v2"); |
3235 | +MODULE_DESCRIPTION("Qualcomm ADC common functionality"); |
3236 | diff --git a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c |
3237 | index 866aa3ce1ac9..6cf0006d4c8d 100644 |
3238 | --- a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c |
3239 | +++ b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c |
3240 | @@ -436,3 +436,7 @@ int pxa2xx_pinctrl_exit(struct platform_device *pdev) |
3241 | return 0; |
3242 | } |
3243 | EXPORT_SYMBOL_GPL(pxa2xx_pinctrl_exit); |
3244 | + |
3245 | +MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>"); |
3246 | +MODULE_DESCRIPTION("Marvell PXA2xx pinctrl driver"); |
3247 | +MODULE_LICENSE("GPL v2"); |
3248 | diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c |
3249 | index 854995e1cae7..7e7e6eb95b0a 100644 |
3250 | --- a/drivers/tty/serial/serial_core.c |
3251 | +++ b/drivers/tty/serial/serial_core.c |
3252 | @@ -974,6 +974,8 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port, |
3253 | } |
3254 | } else { |
3255 | retval = uart_startup(tty, state, 1); |
3256 | + if (retval == 0) |
3257 | + tty_port_set_initialized(port, true); |
3258 | if (retval > 0) |
3259 | retval = 0; |
3260 | } |
3261 | diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h |
3262 | index 1c65817673db..41615f38bcff 100644 |
3263 | --- a/include/linux/fdtable.h |
3264 | +++ b/include/linux/fdtable.h |
3265 | @@ -10,6 +10,7 @@ |
3266 | #include <linux/compiler.h> |
3267 | #include <linux/spinlock.h> |
3268 | #include <linux/rcupdate.h> |
3269 | +#include <linux/nospec.h> |
3270 | #include <linux/types.h> |
3271 | #include <linux/init.h> |
3272 | #include <linux/fs.h> |
3273 | @@ -82,8 +83,10 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i |
3274 | { |
3275 | struct fdtable *fdt = rcu_dereference_raw(files->fdt); |
3276 | |
3277 | - if (fd < fdt->max_fds) |
3278 | + if (fd < fdt->max_fds) { |
3279 | + fd = array_index_nospec(fd, fdt->max_fds); |
3280 | return rcu_dereference_raw(fdt->fd[fd]); |
3281 | + } |
3282 | return NULL; |
3283 | } |
3284 | |
3285 | diff --git a/include/linux/init.h b/include/linux/init.h |
3286 | index ea1b31101d9e..506a98151131 100644 |
3287 | --- a/include/linux/init.h |
3288 | +++ b/include/linux/init.h |
3289 | @@ -5,6 +5,13 @@ |
3290 | #include <linux/compiler.h> |
3291 | #include <linux/types.h> |
3292 | |
3293 | +/* Built-in __init functions needn't be compiled with retpoline */ |
3294 | +#if defined(RETPOLINE) && !defined(MODULE) |
3295 | +#define __noretpoline __attribute__((indirect_branch("keep"))) |
3296 | +#else |
3297 | +#define __noretpoline |
3298 | +#endif |
3299 | + |
3300 | /* These macros are used to mark some functions or |
3301 | * initialized data (doesn't apply to uninitialized data) |
3302 | * as `initialization' functions. The kernel can take this |
3303 | @@ -40,7 +47,7 @@ |
3304 | |
3305 | /* These are for everybody (although not all archs will actually |
3306 | discard it in modules) */ |
3307 | -#define __init __section(.init.text) __cold __latent_entropy |
3308 | +#define __init __section(.init.text) __cold __latent_entropy __noretpoline |
3309 | #define __initdata __section(.init.data) |
3310 | #define __initconst __section(.init.rodata) |
3311 | #define __exitdata __section(.exit.data) |
3312 | diff --git a/include/linux/module.h b/include/linux/module.h |
3313 | index c69b49abe877..1d8f245967be 100644 |
3314 | --- a/include/linux/module.h |
3315 | +++ b/include/linux/module.h |
3316 | @@ -801,6 +801,15 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, |
3317 | static inline void module_bug_cleanup(struct module *mod) {} |
3318 | #endif /* CONFIG_GENERIC_BUG */ |
3319 | |
3320 | +#ifdef RETPOLINE |
3321 | +extern bool retpoline_module_ok(bool has_retpoline); |
3322 | +#else |
3323 | +static inline bool retpoline_module_ok(bool has_retpoline) |
3324 | +{ |
3325 | + return true; |
3326 | +} |
3327 | +#endif |
3328 | + |
3329 | #ifdef CONFIG_MODULE_SIG |
3330 | static inline bool module_sig_ok(struct module *module) |
3331 | { |
3332 | diff --git a/include/linux/nospec.h b/include/linux/nospec.h |
3333 | new file mode 100644 |
3334 | index 000000000000..b99bced39ac2 |
3335 | --- /dev/null |
3336 | +++ b/include/linux/nospec.h |
3337 | @@ -0,0 +1,72 @@ |
3338 | +// SPDX-License-Identifier: GPL-2.0 |
3339 | +// Copyright(c) 2018 Linus Torvalds. All rights reserved. |
3340 | +// Copyright(c) 2018 Alexei Starovoitov. All rights reserved. |
3341 | +// Copyright(c) 2018 Intel Corporation. All rights reserved. |
3342 | + |
3343 | +#ifndef _LINUX_NOSPEC_H |
3344 | +#define _LINUX_NOSPEC_H |
3345 | + |
3346 | +/** |
3347 | + * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise |
3348 | + * @index: array element index |
3349 | + * @size: number of elements in array |
3350 | + * |
3351 | + * When @index is out of bounds (@index >= @size), the sign bit will be |
3352 | + * set. Extend the sign bit to all bits and invert, giving a result of |
3353 | + * zero for an out of bounds index, or ~0 if within bounds [0, @size). |
3354 | + */ |
3355 | +#ifndef array_index_mask_nospec |
3356 | +static inline unsigned long array_index_mask_nospec(unsigned long index, |
3357 | + unsigned long size) |
3358 | +{ |
3359 | + /* |
3360 | + * Warn developers about inappropriate array_index_nospec() usage. |
3361 | + * |
3362 | + * Even if the CPU speculates past the WARN_ONCE branch, the |
3363 | + * sign bit of @index is taken into account when generating the |
3364 | + * mask. |
3365 | + * |
3366 | + * This warning is compiled out when the compiler can infer that |
3367 | + * @index and @size are less than LONG_MAX. |
3368 | + */ |
3369 | + if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, |
3370 | + "array_index_nospec() limited to range of [0, LONG_MAX]\n")) |
3371 | + return 0; |
3372 | + |
3373 | + /* |
3374 | + * Always calculate and emit the mask even if the compiler |
3375 | + * thinks the mask is not needed. The compiler does not take |
3376 | + * into account the value of @index under speculation. |
3377 | + */ |
3378 | + OPTIMIZER_HIDE_VAR(index); |
3379 | + return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1); |
3380 | +} |
3381 | +#endif |
3382 | + |
3383 | +/* |
3384 | + * array_index_nospec - sanitize an array index after a bounds check |
3385 | + * |
3386 | + * For a code sequence like: |
3387 | + * |
3388 | + * if (index < size) { |
3389 | + * index = array_index_nospec(index, size); |
3390 | + * val = array[index]; |
3391 | + * } |
3392 | + * |
3393 | + * ...if the CPU speculates past the bounds check then |
3394 | + * array_index_nospec() will clamp the index within the range of [0, |
3395 | + * size). |
3396 | + */ |
3397 | +#define array_index_nospec(index, size) \ |
3398 | +({ \ |
3399 | + typeof(index) _i = (index); \ |
3400 | + typeof(size) _s = (size); \ |
3401 | + unsigned long _mask = array_index_mask_nospec(_i, _s); \ |
3402 | + \ |
3403 | + BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ |
3404 | + BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ |
3405 | + \ |
3406 | + _i &= _mask; \ |
3407 | + _i; \ |
3408 | +}) |
3409 | +#endif /* _LINUX_NOSPEC_H */ |
3410 | diff --git a/kernel/module.c b/kernel/module.c |
3411 | index dea01ac9cb74..09e48eee4d55 100644 |
3412 | --- a/kernel/module.c |
3413 | +++ b/kernel/module.c |
3414 | @@ -2863,6 +2863,15 @@ static int check_modinfo_livepatch(struct module *mod, struct load_info *info) |
3415 | } |
3416 | #endif /* CONFIG_LIVEPATCH */ |
3417 | |
3418 | +static void check_modinfo_retpoline(struct module *mod, struct load_info *info) |
3419 | +{ |
3420 | + if (retpoline_module_ok(get_modinfo(info, "retpoline"))) |
3421 | + return; |
3422 | + |
3423 | + pr_warn("%s: loading module not compiled with retpoline compiler.\n", |
3424 | + mod->name); |
3425 | +} |
3426 | + |
3427 | /* Sets info->hdr and info->len. */ |
3428 | static int copy_module_from_user(const void __user *umod, unsigned long len, |
3429 | struct load_info *info) |
3430 | @@ -3029,6 +3038,8 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags) |
3431 | add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); |
3432 | } |
3433 | |
3434 | + check_modinfo_retpoline(mod, info); |
3435 | + |
3436 | if (get_modinfo(info, "staging")) { |
3437 | add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK); |
3438 | pr_warn("%s: module is from the staging directory, the quality " |
3439 | diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c |
3440 | index 542a4fc0a8d7..4bbcfc1e2d43 100644 |
3441 | --- a/net/wireless/nl80211.c |
3442 | +++ b/net/wireless/nl80211.c |
3443 | @@ -16,6 +16,7 @@ |
3444 | #include <linux/nl80211.h> |
3445 | #include <linux/rtnetlink.h> |
3446 | #include <linux/netlink.h> |
3447 | +#include <linux/nospec.h> |
3448 | #include <linux/etherdevice.h> |
3449 | #include <net/net_namespace.h> |
3450 | #include <net/genetlink.h> |
3451 | @@ -2056,20 +2057,22 @@ static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = { |
3452 | static int parse_txq_params(struct nlattr *tb[], |
3453 | struct ieee80211_txq_params *txq_params) |
3454 | { |
3455 | + u8 ac; |
3456 | + |
3457 | if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] || |
3458 | !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] || |
3459 | !tb[NL80211_TXQ_ATTR_AIFS]) |
3460 | return -EINVAL; |
3461 | |
3462 | - txq_params->ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); |
3463 | + ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); |
3464 | txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]); |
3465 | txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]); |
3466 | txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]); |
3467 | txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]); |
3468 | |
3469 | - if (txq_params->ac >= NL80211_NUM_ACS) |
3470 | + if (ac >= NL80211_NUM_ACS) |
3471 | return -EINVAL; |
3472 | - |
3473 | + txq_params->ac = array_index_nospec(ac, NL80211_NUM_ACS); |
3474 | return 0; |
3475 | } |
3476 | |
3477 | diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c |
3478 | index f51cf977c65b..6510536c06df 100644 |
3479 | --- a/scripts/mod/modpost.c |
3480 | +++ b/scripts/mod/modpost.c |
3481 | @@ -2165,6 +2165,14 @@ static void add_intree_flag(struct buffer *b, int is_intree) |
3482 | buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n"); |
3483 | } |
3484 | |
3485 | +/* Cannot check for assembler */ |
3486 | +static void add_retpoline(struct buffer *b) |
3487 | +{ |
3488 | + buf_printf(b, "\n#ifdef RETPOLINE\n"); |
3489 | + buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n"); |
3490 | + buf_printf(b, "#endif\n"); |
3491 | +} |
3492 | + |
3493 | static void add_staging_flag(struct buffer *b, const char *name) |
3494 | { |
3495 | static const char *staging_dir = "drivers/staging"; |
3496 | @@ -2506,6 +2514,7 @@ int main(int argc, char **argv) |
3497 | err |= check_modname_len(mod); |
3498 | add_header(&buf, mod); |
3499 | add_intree_flag(&buf, !external_module); |
3500 | + add_retpoline(&buf); |
3501 | add_staging_flag(&buf, mod->name); |
3502 | err |= add_versions(&buf, mod); |
3503 | add_depends(&buf, mod, modules); |
3504 | diff --git a/sound/soc/codecs/pcm512x-spi.c b/sound/soc/codecs/pcm512x-spi.c |
3505 | index 25c63510ae15..7cdd2dc4fd79 100644 |
3506 | --- a/sound/soc/codecs/pcm512x-spi.c |
3507 | +++ b/sound/soc/codecs/pcm512x-spi.c |
3508 | @@ -70,3 +70,7 @@ static struct spi_driver pcm512x_spi_driver = { |
3509 | }; |
3510 | |
3511 | module_spi_driver(pcm512x_spi_driver); |
3512 | + |
3513 | +MODULE_DESCRIPTION("ASoC PCM512x codec driver - SPI"); |
3514 | +MODULE_AUTHOR("Mark Brown <broonie@kernel.org>"); |
3515 | +MODULE_LICENSE("GPL v2"); |
3516 | diff --git a/tools/objtool/check.c b/tools/objtool/check.c |
3517 | index f40d46e24bcc..9cd028aa1509 100644 |
3518 | --- a/tools/objtool/check.c |
3519 | +++ b/tools/objtool/check.c |
3520 | @@ -543,18 +543,14 @@ static int add_call_destinations(struct objtool_file *file) |
3521 | dest_off = insn->offset + insn->len + insn->immediate; |
3522 | insn->call_dest = find_symbol_by_offset(insn->sec, |
3523 | dest_off); |
3524 | - /* |
3525 | - * FIXME: Thanks to retpolines, it's now considered |
3526 | - * normal for a function to call within itself. So |
3527 | - * disable this warning for now. |
3528 | - */ |
3529 | -#if 0 |
3530 | - if (!insn->call_dest) { |
3531 | - WARN_FUNC("can't find call dest symbol at offset 0x%lx", |
3532 | - insn->sec, insn->offset, dest_off); |
3533 | + |
3534 | + if (!insn->call_dest && !insn->ignore) { |
3535 | + WARN_FUNC("unsupported intra-function call", |
3536 | + insn->sec, insn->offset); |
3537 | + WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE."); |
3538 | return -1; |
3539 | } |
3540 | -#endif |
3541 | + |
3542 | } else if (rela->sym->type == STT_SECTION) { |
3543 | insn->call_dest = find_symbol_by_offset(rela->sym->sec, |
3544 | rela->addend+4); |
3545 | @@ -598,7 +594,7 @@ static int handle_group_alt(struct objtool_file *file, |
3546 | struct instruction *orig_insn, |
3547 | struct instruction **new_insn) |
3548 | { |
3549 | - struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump; |
3550 | + struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL; |
3551 | unsigned long dest_off; |
3552 | |
3553 | last_orig_insn = NULL; |
3554 | @@ -614,28 +610,30 @@ static int handle_group_alt(struct objtool_file *file, |
3555 | last_orig_insn = insn; |
3556 | } |
3557 | |
3558 | - if (!next_insn_same_sec(file, last_orig_insn)) { |
3559 | - WARN("%s: don't know how to handle alternatives at end of section", |
3560 | - special_alt->orig_sec->name); |
3561 | - return -1; |
3562 | - } |
3563 | - |
3564 | - fake_jump = malloc(sizeof(*fake_jump)); |
3565 | - if (!fake_jump) { |
3566 | - WARN("malloc failed"); |
3567 | - return -1; |
3568 | + if (next_insn_same_sec(file, last_orig_insn)) { |
3569 | + fake_jump = malloc(sizeof(*fake_jump)); |
3570 | + if (!fake_jump) { |
3571 | + WARN("malloc failed"); |
3572 | + return -1; |
3573 | + } |
3574 | + memset(fake_jump, 0, sizeof(*fake_jump)); |
3575 | + INIT_LIST_HEAD(&fake_jump->alts); |
3576 | + clear_insn_state(&fake_jump->state); |
3577 | + |
3578 | + fake_jump->sec = special_alt->new_sec; |
3579 | + fake_jump->offset = -1; |
3580 | + fake_jump->type = INSN_JUMP_UNCONDITIONAL; |
3581 | + fake_jump->jump_dest = list_next_entry(last_orig_insn, list); |
3582 | + fake_jump->ignore = true; |
3583 | } |
3584 | - memset(fake_jump, 0, sizeof(*fake_jump)); |
3585 | - INIT_LIST_HEAD(&fake_jump->alts); |
3586 | - clear_insn_state(&fake_jump->state); |
3587 | - |
3588 | - fake_jump->sec = special_alt->new_sec; |
3589 | - fake_jump->offset = -1; |
3590 | - fake_jump->type = INSN_JUMP_UNCONDITIONAL; |
3591 | - fake_jump->jump_dest = list_next_entry(last_orig_insn, list); |
3592 | - fake_jump->ignore = true; |
3593 | |
3594 | if (!special_alt->new_len) { |
3595 | + if (!fake_jump) { |
3596 | + WARN("%s: empty alternative at end of section", |
3597 | + special_alt->orig_sec->name); |
3598 | + return -1; |
3599 | + } |
3600 | + |
3601 | *new_insn = fake_jump; |
3602 | return 0; |
3603 | } |
3604 | @@ -648,6 +646,8 @@ static int handle_group_alt(struct objtool_file *file, |
3605 | |
3606 | last_new_insn = insn; |
3607 | |
3608 | + insn->ignore = orig_insn->ignore_alts; |
3609 | + |
3610 | if (insn->type != INSN_JUMP_CONDITIONAL && |
3611 | insn->type != INSN_JUMP_UNCONDITIONAL) |
3612 | continue; |
3613 | @@ -656,8 +656,14 @@ static int handle_group_alt(struct objtool_file *file, |
3614 | continue; |
3615 | |
3616 | dest_off = insn->offset + insn->len + insn->immediate; |
3617 | - if (dest_off == special_alt->new_off + special_alt->new_len) |
3618 | + if (dest_off == special_alt->new_off + special_alt->new_len) { |
3619 | + if (!fake_jump) { |
3620 | + WARN("%s: alternative jump to end of section", |
3621 | + special_alt->orig_sec->name); |
3622 | + return -1; |
3623 | + } |
3624 | insn->jump_dest = fake_jump; |
3625 | + } |
3626 | |
3627 | if (!insn->jump_dest) { |
3628 | WARN_FUNC("can't find alternative jump destination", |
3629 | @@ -672,7 +678,8 @@ static int handle_group_alt(struct objtool_file *file, |
3630 | return -1; |
3631 | } |
3632 | |
3633 | - list_add(&fake_jump->list, &last_new_insn->list); |
3634 | + if (fake_jump) |
3635 | + list_add(&fake_jump->list, &last_new_insn->list); |
3636 | |
3637 | return 0; |
3638 | } |
3639 | @@ -729,10 +736,6 @@ static int add_special_section_alts(struct objtool_file *file) |
3640 | goto out; |
3641 | } |
3642 | |
3643 | - /* Ignore retpoline alternatives. */ |
3644 | - if (orig_insn->ignore_alts) |
3645 | - continue; |
3646 | - |
3647 | new_insn = NULL; |
3648 | if (!special_alt->group || special_alt->new_len) { |
3649 | new_insn = find_insn(file, special_alt->new_sec, |
3650 | @@ -1089,11 +1092,11 @@ static int decode_sections(struct objtool_file *file) |
3651 | if (ret) |
3652 | return ret; |
3653 | |
3654 | - ret = add_call_destinations(file); |
3655 | + ret = add_special_section_alts(file); |
3656 | if (ret) |
3657 | return ret; |
3658 | |
3659 | - ret = add_special_section_alts(file); |
3660 | + ret = add_call_destinations(file); |
3661 | if (ret) |
3662 | return ret; |
3663 | |
3664 | @@ -1720,10 +1723,12 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, |
3665 | |
3666 | insn->visited = true; |
3667 | |
3668 | - list_for_each_entry(alt, &insn->alts, list) { |
3669 | - ret = validate_branch(file, alt->insn, state); |
3670 | - if (ret) |
3671 | - return 1; |
3672 | + if (!insn->ignore_alts) { |
3673 | + list_for_each_entry(alt, &insn->alts, list) { |
3674 | + ret = validate_branch(file, alt->insn, state); |
3675 | + if (ret) |
3676 | + return 1; |
3677 | + } |
3678 | } |
3679 | |
3680 | switch (insn->type) { |
3681 | diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c |
3682 | index e61fe703197b..18384d9be4e1 100644 |
3683 | --- a/tools/objtool/orc_gen.c |
3684 | +++ b/tools/objtool/orc_gen.c |
3685 | @@ -98,6 +98,11 @@ static int create_orc_entry(struct section *u_sec, struct section *ip_relasec, |
3686 | struct orc_entry *orc; |
3687 | struct rela *rela; |
3688 | |
3689 | + if (!insn_sec->sym) { |
3690 | + WARN("missing symbol for section %s", insn_sec->name); |
3691 | + return -1; |
3692 | + } |
3693 | + |
3694 | /* populate ORC data */ |
3695 | orc = (struct orc_entry *)u_sec->data->d_buf + idx; |
3696 | memcpy(orc, o, sizeof(*orc)); |