Annotation of /trunk/kernel-alx/patches-4.14/0117-4.14.18-all-fixes.patch
Parent Directory | Revision Log
Revision 3238 -
(hide annotations)
(download)
Fri Nov 9 12:14:58 2018 UTC (5 years, 10 months ago) by niro
File size: 125071 byte(s)
Fri Nov 9 12:14:58 2018 UTC (5 years, 10 months ago) by niro
File size: 125071 byte(s)
-added up to patches-4.14.79
1 | niro | 3238 | diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt |
2 | index 8122b5f98ea1..c76afdcafbef 100644 | ||
3 | --- a/Documentation/admin-guide/kernel-parameters.txt | ||
4 | +++ b/Documentation/admin-guide/kernel-parameters.txt | ||
5 | @@ -2718,8 +2718,6 @@ | ||
6 | norandmaps Don't use address space randomization. Equivalent to | ||
7 | echo 0 > /proc/sys/kernel/randomize_va_space | ||
8 | |||
9 | - noreplace-paravirt [X86,IA-64,PV_OPS] Don't patch paravirt_ops | ||
10 | - | ||
11 | noreplace-smp [X86-32,SMP] Don't replace SMP instructions | ||
12 | with UP alternatives | ||
13 | |||
14 | diff --git a/Documentation/speculation.txt b/Documentation/speculation.txt | ||
15 | new file mode 100644 | ||
16 | index 000000000000..e9e6cbae2841 | ||
17 | --- /dev/null | ||
18 | +++ b/Documentation/speculation.txt | ||
19 | @@ -0,0 +1,90 @@ | ||
20 | +This document explains potential effects of speculation, and how undesirable | ||
21 | +effects can be mitigated portably using common APIs. | ||
22 | + | ||
23 | +=========== | ||
24 | +Speculation | ||
25 | +=========== | ||
26 | + | ||
27 | +To improve performance and minimize average latencies, many contemporary CPUs | ||
28 | +employ speculative execution techniques such as branch prediction, performing | ||
29 | +work which may be discarded at a later stage. | ||
30 | + | ||
31 | +Typically speculative execution cannot be observed from architectural state, | ||
32 | +such as the contents of registers. However, in some cases it is possible to | ||
33 | +observe its impact on microarchitectural state, such as the presence or | ||
34 | +absence of data in caches. Such state may form side-channels which can be | ||
35 | +observed to extract secret information. | ||
36 | + | ||
37 | +For example, in the presence of branch prediction, it is possible for bounds | ||
38 | +checks to be ignored by code which is speculatively executed. Consider the | ||
39 | +following code: | ||
40 | + | ||
41 | + int load_array(int *array, unsigned int index) | ||
42 | + { | ||
43 | + if (index >= MAX_ARRAY_ELEMS) | ||
44 | + return 0; | ||
45 | + else | ||
46 | + return array[index]; | ||
47 | + } | ||
48 | + | ||
49 | +Which, on arm64, may be compiled to an assembly sequence such as: | ||
50 | + | ||
51 | + CMP <index>, #MAX_ARRAY_ELEMS | ||
52 | + B.LT less | ||
53 | + MOV <returnval>, #0 | ||
54 | + RET | ||
55 | + less: | ||
56 | + LDR <returnval>, [<array>, <index>] | ||
57 | + RET | ||
58 | + | ||
59 | +It is possible that a CPU mis-predicts the conditional branch, and | ||
60 | +speculatively loads array[index], even if index >= MAX_ARRAY_ELEMS. This | ||
61 | +value will subsequently be discarded, but the speculated load may affect | ||
62 | +microarchitectural state which can be subsequently measured. | ||
63 | + | ||
64 | +More complex sequences involving multiple dependent memory accesses may | ||
65 | +result in sensitive information being leaked. Consider the following | ||
66 | +code, building on the prior example: | ||
67 | + | ||
68 | + int load_dependent_arrays(int *arr1, int *arr2, int index) | ||
69 | + { | ||
70 | + int val1, val2, | ||
71 | + | ||
72 | + val1 = load_array(arr1, index); | ||
73 | + val2 = load_array(arr2, val1); | ||
74 | + | ||
75 | + return val2; | ||
76 | + } | ||
77 | + | ||
78 | +Under speculation, the first call to load_array() may return the value | ||
79 | +of an out-of-bounds address, while the second call will influence | ||
80 | +microarchitectural state dependent on this value. This may provide an | ||
81 | +arbitrary read primitive. | ||
82 | + | ||
83 | +==================================== | ||
84 | +Mitigating speculation side-channels | ||
85 | +==================================== | ||
86 | + | ||
87 | +The kernel provides a generic API to ensure that bounds checks are | ||
88 | +respected even under speculation. Architectures which are affected by | ||
89 | +speculation-based side-channels are expected to implement these | ||
90 | +primitives. | ||
91 | + | ||
92 | +The array_index_nospec() helper in <linux/nospec.h> can be used to | ||
93 | +prevent information from being leaked via side-channels. | ||
94 | + | ||
95 | +A call to array_index_nospec(index, size) returns a sanitized index | ||
96 | +value that is bounded to [0, size) even under cpu speculation | ||
97 | +conditions. | ||
98 | + | ||
99 | +This can be used to protect the earlier load_array() example: | ||
100 | + | ||
101 | + int load_array(int *array, unsigned int index) | ||
102 | + { | ||
103 | + if (index >= MAX_ARRAY_ELEMS) | ||
104 | + return 0; | ||
105 | + else { | ||
106 | + index = array_index_nospec(index, MAX_ARRAY_ELEMS); | ||
107 | + return array[index]; | ||
108 | + } | ||
109 | + } | ||
110 | diff --git a/Makefile b/Makefile | ||
111 | index 7ed993896dd5..a69e5da9ed86 100644 | ||
112 | --- a/Makefile | ||
113 | +++ b/Makefile | ||
114 | @@ -1,7 +1,7 @@ | ||
115 | # SPDX-License-Identifier: GPL-2.0 | ||
116 | VERSION = 4 | ||
117 | PATCHLEVEL = 14 | ||
118 | -SUBLEVEL = 17 | ||
119 | +SUBLEVEL = 18 | ||
120 | EXTRAVERSION = | ||
121 | NAME = Petit Gorille | ||
122 | |||
123 | diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig | ||
124 | index cb782ac1c35d..fe418226df7f 100644 | ||
125 | --- a/arch/powerpc/Kconfig | ||
126 | +++ b/arch/powerpc/Kconfig | ||
127 | @@ -164,6 +164,7 @@ config PPC | ||
128 | select GENERIC_CLOCKEVENTS_BROADCAST if SMP | ||
129 | select GENERIC_CMOS_UPDATE | ||
130 | select GENERIC_CPU_AUTOPROBE | ||
131 | + select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64 | ||
132 | select GENERIC_IRQ_SHOW | ||
133 | select GENERIC_IRQ_SHOW_LEVEL | ||
134 | select GENERIC_SMP_IDLE_THREAD | ||
135 | diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c | ||
136 | index 935059cb9e40..9527a4c6cbc2 100644 | ||
137 | --- a/arch/powerpc/kernel/setup_64.c | ||
138 | +++ b/arch/powerpc/kernel/setup_64.c | ||
139 | @@ -38,6 +38,7 @@ | ||
140 | #include <linux/memory.h> | ||
141 | #include <linux/nmi.h> | ||
142 | |||
143 | +#include <asm/debugfs.h> | ||
144 | #include <asm/io.h> | ||
145 | #include <asm/kdump.h> | ||
146 | #include <asm/prom.h> | ||
147 | @@ -884,4 +885,41 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) | ||
148 | if (!no_rfi_flush) | ||
149 | rfi_flush_enable(enable); | ||
150 | } | ||
151 | + | ||
152 | +#ifdef CONFIG_DEBUG_FS | ||
153 | +static int rfi_flush_set(void *data, u64 val) | ||
154 | +{ | ||
155 | + if (val == 1) | ||
156 | + rfi_flush_enable(true); | ||
157 | + else if (val == 0) | ||
158 | + rfi_flush_enable(false); | ||
159 | + else | ||
160 | + return -EINVAL; | ||
161 | + | ||
162 | + return 0; | ||
163 | +} | ||
164 | + | ||
165 | +static int rfi_flush_get(void *data, u64 *val) | ||
166 | +{ | ||
167 | + *val = rfi_flush ? 1 : 0; | ||
168 | + return 0; | ||
169 | +} | ||
170 | + | ||
171 | +DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); | ||
172 | + | ||
173 | +static __init int rfi_flush_debugfs_init(void) | ||
174 | +{ | ||
175 | + debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); | ||
176 | + return 0; | ||
177 | +} | ||
178 | +device_initcall(rfi_flush_debugfs_init); | ||
179 | +#endif | ||
180 | + | ||
181 | +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) | ||
182 | +{ | ||
183 | + if (rfi_flush) | ||
184 | + return sprintf(buf, "Mitigation: RFI Flush\n"); | ||
185 | + | ||
186 | + return sprintf(buf, "Vulnerable\n"); | ||
187 | +} | ||
188 | #endif /* CONFIG_PPC_BOOK3S_64 */ | ||
189 | diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c | ||
190 | index 03505ffbe1b6..60e21ccfb6d6 100644 | ||
191 | --- a/arch/x86/entry/common.c | ||
192 | +++ b/arch/x86/entry/common.c | ||
193 | @@ -21,6 +21,7 @@ | ||
194 | #include <linux/export.h> | ||
195 | #include <linux/context_tracking.h> | ||
196 | #include <linux/user-return-notifier.h> | ||
197 | +#include <linux/nospec.h> | ||
198 | #include <linux/uprobes.h> | ||
199 | #include <linux/livepatch.h> | ||
200 | #include <linux/syscalls.h> | ||
201 | @@ -208,7 +209,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) | ||
202 | * special case only applies after poking regs and before the | ||
203 | * very next return to user mode. | ||
204 | */ | ||
205 | - current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED); | ||
206 | + ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); | ||
207 | #endif | ||
208 | |||
209 | user_enter_irqoff(); | ||
210 | @@ -284,7 +285,8 @@ __visible void do_syscall_64(struct pt_regs *regs) | ||
211 | * regs->orig_ax, which changes the behavior of some syscalls. | ||
212 | */ | ||
213 | if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) { | ||
214 | - regs->ax = sys_call_table[nr & __SYSCALL_MASK]( | ||
215 | + nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls); | ||
216 | + regs->ax = sys_call_table[nr]( | ||
217 | regs->di, regs->si, regs->dx, | ||
218 | regs->r10, regs->r8, regs->r9); | ||
219 | } | ||
220 | @@ -306,7 +308,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) | ||
221 | unsigned int nr = (unsigned int)regs->orig_ax; | ||
222 | |||
223 | #ifdef CONFIG_IA32_EMULATION | ||
224 | - current->thread.status |= TS_COMPAT; | ||
225 | + ti->status |= TS_COMPAT; | ||
226 | #endif | ||
227 | |||
228 | if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { | ||
229 | @@ -320,6 +322,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) | ||
230 | } | ||
231 | |||
232 | if (likely(nr < IA32_NR_syscalls)) { | ||
233 | + nr = array_index_nospec(nr, IA32_NR_syscalls); | ||
234 | /* | ||
235 | * It's possible that a 32-bit syscall implementation | ||
236 | * takes a 64-bit parameter but nonetheless assumes that | ||
237 | diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S | ||
238 | index 60c4c342316c..2a35b1e0fb90 100644 | ||
239 | --- a/arch/x86/entry/entry_32.S | ||
240 | +++ b/arch/x86/entry/entry_32.S | ||
241 | @@ -252,7 +252,8 @@ ENTRY(__switch_to_asm) | ||
242 | * exist, overwrite the RSB with entries which capture | ||
243 | * speculative execution to prevent attack. | ||
244 | */ | ||
245 | - FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW | ||
246 | + /* Clobbers %ebx */ | ||
247 | + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW | ||
248 | #endif | ||
249 | |||
250 | /* restore callee-saved registers */ | ||
251 | diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S | ||
252 | index be6b66464f6a..16e2d72e79a0 100644 | ||
253 | --- a/arch/x86/entry/entry_64.S | ||
254 | +++ b/arch/x86/entry/entry_64.S | ||
255 | @@ -232,91 +232,20 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) | ||
256 | pushq %r9 /* pt_regs->r9 */ | ||
257 | pushq %r10 /* pt_regs->r10 */ | ||
258 | pushq %r11 /* pt_regs->r11 */ | ||
259 | - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ | ||
260 | - UNWIND_HINT_REGS extra=0 | ||
261 | - | ||
262 | - TRACE_IRQS_OFF | ||
263 | - | ||
264 | - /* | ||
265 | - * If we need to do entry work or if we guess we'll need to do | ||
266 | - * exit work, go straight to the slow path. | ||
267 | - */ | ||
268 | - movq PER_CPU_VAR(current_task), %r11 | ||
269 | - testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) | ||
270 | - jnz entry_SYSCALL64_slow_path | ||
271 | - | ||
272 | -entry_SYSCALL_64_fastpath: | ||
273 | - /* | ||
274 | - * Easy case: enable interrupts and issue the syscall. If the syscall | ||
275 | - * needs pt_regs, we'll call a stub that disables interrupts again | ||
276 | - * and jumps to the slow path. | ||
277 | - */ | ||
278 | - TRACE_IRQS_ON | ||
279 | - ENABLE_INTERRUPTS(CLBR_NONE) | ||
280 | -#if __SYSCALL_MASK == ~0 | ||
281 | - cmpq $__NR_syscall_max, %rax | ||
282 | -#else | ||
283 | - andl $__SYSCALL_MASK, %eax | ||
284 | - cmpl $__NR_syscall_max, %eax | ||
285 | -#endif | ||
286 | - ja 1f /* return -ENOSYS (already in pt_regs->ax) */ | ||
287 | - movq %r10, %rcx | ||
288 | - | ||
289 | - /* | ||
290 | - * This call instruction is handled specially in stub_ptregs_64. | ||
291 | - * It might end up jumping to the slow path. If it jumps, RAX | ||
292 | - * and all argument registers are clobbered. | ||
293 | - */ | ||
294 | -#ifdef CONFIG_RETPOLINE | ||
295 | - movq sys_call_table(, %rax, 8), %rax | ||
296 | - call __x86_indirect_thunk_rax | ||
297 | -#else | ||
298 | - call *sys_call_table(, %rax, 8) | ||
299 | -#endif | ||
300 | -.Lentry_SYSCALL_64_after_fastpath_call: | ||
301 | - | ||
302 | - movq %rax, RAX(%rsp) | ||
303 | -1: | ||
304 | + pushq %rbx /* pt_regs->rbx */ | ||
305 | + pushq %rbp /* pt_regs->rbp */ | ||
306 | + pushq %r12 /* pt_regs->r12 */ | ||
307 | + pushq %r13 /* pt_regs->r13 */ | ||
308 | + pushq %r14 /* pt_regs->r14 */ | ||
309 | + pushq %r15 /* pt_regs->r15 */ | ||
310 | + UNWIND_HINT_REGS | ||
311 | |||
312 | - /* | ||
313 | - * If we get here, then we know that pt_regs is clean for SYSRET64. | ||
314 | - * If we see that no exit work is required (which we are required | ||
315 | - * to check with IRQs off), then we can go straight to SYSRET64. | ||
316 | - */ | ||
317 | - DISABLE_INTERRUPTS(CLBR_ANY) | ||
318 | TRACE_IRQS_OFF | ||
319 | - movq PER_CPU_VAR(current_task), %r11 | ||
320 | - testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) | ||
321 | - jnz 1f | ||
322 | - | ||
323 | - LOCKDEP_SYS_EXIT | ||
324 | - TRACE_IRQS_ON /* user mode is traced as IRQs on */ | ||
325 | - movq RIP(%rsp), %rcx | ||
326 | - movq EFLAGS(%rsp), %r11 | ||
327 | - addq $6*8, %rsp /* skip extra regs -- they were preserved */ | ||
328 | - UNWIND_HINT_EMPTY | ||
329 | - jmp .Lpop_c_regs_except_rcx_r11_and_sysret | ||
330 | |||
331 | -1: | ||
332 | - /* | ||
333 | - * The fast path looked good when we started, but something changed | ||
334 | - * along the way and we need to switch to the slow path. Calling | ||
335 | - * raise(3) will trigger this, for example. IRQs are off. | ||
336 | - */ | ||
337 | - TRACE_IRQS_ON | ||
338 | - ENABLE_INTERRUPTS(CLBR_ANY) | ||
339 | - SAVE_EXTRA_REGS | ||
340 | - movq %rsp, %rdi | ||
341 | - call syscall_return_slowpath /* returns with IRQs disabled */ | ||
342 | - jmp return_from_SYSCALL_64 | ||
343 | - | ||
344 | -entry_SYSCALL64_slow_path: | ||
345 | /* IRQs are off. */ | ||
346 | - SAVE_EXTRA_REGS | ||
347 | movq %rsp, %rdi | ||
348 | call do_syscall_64 /* returns with IRQs disabled */ | ||
349 | |||
350 | -return_from_SYSCALL_64: | ||
351 | TRACE_IRQS_IRETQ /* we're about to change IF */ | ||
352 | |||
353 | /* | ||
354 | @@ -389,7 +318,6 @@ syscall_return_via_sysret: | ||
355 | /* rcx and r11 are already restored (see code above) */ | ||
356 | UNWIND_HINT_EMPTY | ||
357 | POP_EXTRA_REGS | ||
358 | -.Lpop_c_regs_except_rcx_r11_and_sysret: | ||
359 | popq %rsi /* skip r11 */ | ||
360 | popq %r10 | ||
361 | popq %r9 | ||
362 | @@ -420,47 +348,6 @@ syscall_return_via_sysret: | ||
363 | USERGS_SYSRET64 | ||
364 | END(entry_SYSCALL_64) | ||
365 | |||
366 | -ENTRY(stub_ptregs_64) | ||
367 | - /* | ||
368 | - * Syscalls marked as needing ptregs land here. | ||
369 | - * If we are on the fast path, we need to save the extra regs, | ||
370 | - * which we achieve by trying again on the slow path. If we are on | ||
371 | - * the slow path, the extra regs are already saved. | ||
372 | - * | ||
373 | - * RAX stores a pointer to the C function implementing the syscall. | ||
374 | - * IRQs are on. | ||
375 | - */ | ||
376 | - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp) | ||
377 | - jne 1f | ||
378 | - | ||
379 | - /* | ||
380 | - * Called from fast path -- disable IRQs again, pop return address | ||
381 | - * and jump to slow path | ||
382 | - */ | ||
383 | - DISABLE_INTERRUPTS(CLBR_ANY) | ||
384 | - TRACE_IRQS_OFF | ||
385 | - popq %rax | ||
386 | - UNWIND_HINT_REGS extra=0 | ||
387 | - jmp entry_SYSCALL64_slow_path | ||
388 | - | ||
389 | -1: | ||
390 | - JMP_NOSPEC %rax /* Called from C */ | ||
391 | -END(stub_ptregs_64) | ||
392 | - | ||
393 | -.macro ptregs_stub func | ||
394 | -ENTRY(ptregs_\func) | ||
395 | - UNWIND_HINT_FUNC | ||
396 | - leaq \func(%rip), %rax | ||
397 | - jmp stub_ptregs_64 | ||
398 | -END(ptregs_\func) | ||
399 | -.endm | ||
400 | - | ||
401 | -/* Instantiate ptregs_stub for each ptregs-using syscall */ | ||
402 | -#define __SYSCALL_64_QUAL_(sym) | ||
403 | -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym | ||
404 | -#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym) | ||
405 | -#include <asm/syscalls_64.h> | ||
406 | - | ||
407 | /* | ||
408 | * %rdi: prev task | ||
409 | * %rsi: next task | ||
410 | @@ -495,7 +382,8 @@ ENTRY(__switch_to_asm) | ||
411 | * exist, overwrite the RSB with entries which capture | ||
412 | * speculative execution to prevent attack. | ||
413 | */ | ||
414 | - FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW | ||
415 | + /* Clobbers %rbx */ | ||
416 | + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW | ||
417 | #endif | ||
418 | |||
419 | /* restore callee-saved registers */ | ||
420 | diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c | ||
421 | index 9c09775e589d..c176d2fab1da 100644 | ||
422 | --- a/arch/x86/entry/syscall_64.c | ||
423 | +++ b/arch/x86/entry/syscall_64.c | ||
424 | @@ -7,14 +7,11 @@ | ||
425 | #include <asm/asm-offsets.h> | ||
426 | #include <asm/syscall.h> | ||
427 | |||
428 | -#define __SYSCALL_64_QUAL_(sym) sym | ||
429 | -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym | ||
430 | - | ||
431 | -#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); | ||
432 | +#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); | ||
433 | #include <asm/syscalls_64.h> | ||
434 | #undef __SYSCALL_64 | ||
435 | |||
436 | -#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym), | ||
437 | +#define __SYSCALL_64(nr, sym, qual) [nr] = sym, | ||
438 | |||
439 | extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); | ||
440 | |||
441 | diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h | ||
442 | index 0927cdc4f946..4d111616524b 100644 | ||
443 | --- a/arch/x86/include/asm/asm-prototypes.h | ||
444 | +++ b/arch/x86/include/asm/asm-prototypes.h | ||
445 | @@ -38,5 +38,7 @@ INDIRECT_THUNK(dx) | ||
446 | INDIRECT_THUNK(si) | ||
447 | INDIRECT_THUNK(di) | ||
448 | INDIRECT_THUNK(bp) | ||
449 | -INDIRECT_THUNK(sp) | ||
450 | +asmlinkage void __fill_rsb(void); | ||
451 | +asmlinkage void __clear_rsb(void); | ||
452 | + | ||
453 | #endif /* CONFIG_RETPOLINE */ | ||
454 | diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h | ||
455 | index 01727dbc294a..1e7c955b6303 100644 | ||
456 | --- a/arch/x86/include/asm/barrier.h | ||
457 | +++ b/arch/x86/include/asm/barrier.h | ||
458 | @@ -24,6 +24,34 @@ | ||
459 | #define wmb() asm volatile("sfence" ::: "memory") | ||
460 | #endif | ||
461 | |||
462 | +/** | ||
463 | + * array_index_mask_nospec() - generate a mask that is ~0UL when the | ||
464 | + * bounds check succeeds and 0 otherwise | ||
465 | + * @index: array element index | ||
466 | + * @size: number of elements in array | ||
467 | + * | ||
468 | + * Returns: | ||
469 | + * 0 - (index < size) | ||
470 | + */ | ||
471 | +static inline unsigned long array_index_mask_nospec(unsigned long index, | ||
472 | + unsigned long size) | ||
473 | +{ | ||
474 | + unsigned long mask; | ||
475 | + | ||
476 | + asm ("cmp %1,%2; sbb %0,%0;" | ||
477 | + :"=r" (mask) | ||
478 | + :"r"(size),"r" (index) | ||
479 | + :"cc"); | ||
480 | + return mask; | ||
481 | +} | ||
482 | + | ||
483 | +/* Override the default implementation from linux/nospec.h. */ | ||
484 | +#define array_index_mask_nospec array_index_mask_nospec | ||
485 | + | ||
486 | +/* Prevent speculative execution past this barrier. */ | ||
487 | +#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ | ||
488 | + "lfence", X86_FEATURE_LFENCE_RDTSC) | ||
489 | + | ||
490 | #ifdef CONFIG_X86_PPRO_FENCE | ||
491 | #define dma_rmb() rmb() | ||
492 | #else | ||
493 | diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h | ||
494 | index ea9a7dde62e5..70eddb3922ff 100644 | ||
495 | --- a/arch/x86/include/asm/cpufeature.h | ||
496 | +++ b/arch/x86/include/asm/cpufeature.h | ||
497 | @@ -29,6 +29,7 @@ enum cpuid_leafs | ||
498 | CPUID_8000_000A_EDX, | ||
499 | CPUID_7_ECX, | ||
500 | CPUID_8000_0007_EBX, | ||
501 | + CPUID_7_EDX, | ||
502 | }; | ||
503 | |||
504 | #ifdef CONFIG_X86_FEATURE_NAMES | ||
505 | @@ -79,8 +80,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; | ||
506 | CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ | ||
507 | CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ | ||
508 | CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ | ||
509 | + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ | ||
510 | REQUIRED_MASK_CHECK || \ | ||
511 | - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) | ||
512 | + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) | ||
513 | |||
514 | #define DISABLED_MASK_BIT_SET(feature_bit) \ | ||
515 | ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ | ||
516 | @@ -101,8 +103,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; | ||
517 | CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ | ||
518 | CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ | ||
519 | CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ | ||
520 | + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ | ||
521 | DISABLED_MASK_CHECK || \ | ||
522 | - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) | ||
523 | + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) | ||
524 | |||
525 | #define cpu_has(c, bit) \ | ||
526 | (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ | ||
527 | diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h | ||
528 | index 25b9375c1484..73b5fff159a4 100644 | ||
529 | --- a/arch/x86/include/asm/cpufeatures.h | ||
530 | +++ b/arch/x86/include/asm/cpufeatures.h | ||
531 | @@ -13,7 +13,7 @@ | ||
532 | /* | ||
533 | * Defines x86 CPU feature bits | ||
534 | */ | ||
535 | -#define NCAPINTS 18 /* N 32-bit words worth of info */ | ||
536 | +#define NCAPINTS 19 /* N 32-bit words worth of info */ | ||
537 | #define NBUGINTS 1 /* N 32-bit bug flags */ | ||
538 | |||
539 | /* | ||
540 | @@ -203,14 +203,14 @@ | ||
541 | #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ | ||
542 | #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ | ||
543 | #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ | ||
544 | -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ | ||
545 | -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ | ||
546 | +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ | ||
547 | +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ | ||
548 | #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ | ||
549 | -#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ | ||
550 | -#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ | ||
551 | |||
552 | #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ | ||
553 | -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ | ||
554 | +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ | ||
555 | + | ||
556 | +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ | ||
557 | |||
558 | /* Virtualization flags: Linux defined, word 8 */ | ||
559 | #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ | ||
560 | @@ -271,6 +271,9 @@ | ||
561 | #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ | ||
562 | #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ | ||
563 | #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ | ||
564 | +#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ | ||
565 | +#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ | ||
566 | +#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ | ||
567 | |||
568 | /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ | ||
569 | #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ | ||
570 | @@ -319,6 +322,13 @@ | ||
571 | #define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ | ||
572 | #define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ | ||
573 | |||
574 | +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ | ||
575 | +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ | ||
576 | +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ | ||
577 | +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ | ||
578 | +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ | ||
579 | +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ | ||
580 | + | ||
581 | /* | ||
582 | * BUG word(s) | ||
583 | */ | ||
584 | diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h | ||
585 | index e428e16dd822..c6a3af198294 100644 | ||
586 | --- a/arch/x86/include/asm/disabled-features.h | ||
587 | +++ b/arch/x86/include/asm/disabled-features.h | ||
588 | @@ -71,6 +71,7 @@ | ||
589 | #define DISABLED_MASK15 0 | ||
590 | #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57) | ||
591 | #define DISABLED_MASK17 0 | ||
592 | -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) | ||
593 | +#define DISABLED_MASK18 0 | ||
594 | +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) | ||
595 | |||
596 | #endif /* _ASM_X86_DISABLED_FEATURES_H */ | ||
597 | diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h | ||
598 | index 64c4a30e0d39..e203169931c7 100644 | ||
599 | --- a/arch/x86/include/asm/fixmap.h | ||
600 | +++ b/arch/x86/include/asm/fixmap.h | ||
601 | @@ -137,8 +137,10 @@ enum fixed_addresses { | ||
602 | |||
603 | extern void reserve_top_address(unsigned long reserve); | ||
604 | |||
605 | -#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) | ||
606 | -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) | ||
607 | +#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) | ||
608 | +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) | ||
609 | +#define FIXADDR_TOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) | ||
610 | +#define FIXADDR_TOT_START (FIXADDR_TOP - FIXADDR_TOT_SIZE) | ||
611 | |||
612 | extern int fixmaps_set; | ||
613 | |||
614 | diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h | ||
615 | index fa11fb1fa570..eb83ff1bae8f 100644 | ||
616 | --- a/arch/x86/include/asm/msr-index.h | ||
617 | +++ b/arch/x86/include/asm/msr-index.h | ||
618 | @@ -39,6 +39,13 @@ | ||
619 | |||
620 | /* Intel MSRs. Some also available on other CPUs */ | ||
621 | |||
622 | +#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ | ||
623 | +#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ | ||
624 | +#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ | ||
625 | + | ||
626 | +#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ | ||
627 | +#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ | ||
628 | + | ||
629 | #define MSR_PPIN_CTL 0x0000004e | ||
630 | #define MSR_PPIN 0x0000004f | ||
631 | |||
632 | @@ -57,6 +64,11 @@ | ||
633 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) | ||
634 | |||
635 | #define MSR_MTRRcap 0x000000fe | ||
636 | + | ||
637 | +#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a | ||
638 | +#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ | ||
639 | +#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ | ||
640 | + | ||
641 | #define MSR_IA32_BBL_CR_CTL 0x00000119 | ||
642 | #define MSR_IA32_BBL_CR_CTL3 0x0000011e | ||
643 | |||
644 | diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h | ||
645 | index 07962f5f6fba..30df295f6d94 100644 | ||
646 | --- a/arch/x86/include/asm/msr.h | ||
647 | +++ b/arch/x86/include/asm/msr.h | ||
648 | @@ -214,8 +214,7 @@ static __always_inline unsigned long long rdtsc_ordered(void) | ||
649 | * that some other imaginary CPU is updating continuously with a | ||
650 | * time stamp. | ||
651 | */ | ||
652 | - alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, | ||
653 | - "lfence", X86_FEATURE_LFENCE_RDTSC); | ||
654 | + barrier_nospec(); | ||
655 | return rdtsc(); | ||
656 | } | ||
657 | |||
658 | diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h | ||
659 | index 4ad41087ce0e..4d57894635f2 100644 | ||
660 | --- a/arch/x86/include/asm/nospec-branch.h | ||
661 | +++ b/arch/x86/include/asm/nospec-branch.h | ||
662 | @@ -1,56 +1,12 @@ | ||
663 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
664 | |||
665 | -#ifndef __NOSPEC_BRANCH_H__ | ||
666 | -#define __NOSPEC_BRANCH_H__ | ||
667 | +#ifndef _ASM_X86_NOSPEC_BRANCH_H_ | ||
668 | +#define _ASM_X86_NOSPEC_BRANCH_H_ | ||
669 | |||
670 | #include <asm/alternative.h> | ||
671 | #include <asm/alternative-asm.h> | ||
672 | #include <asm/cpufeatures.h> | ||
673 | |||
674 | -/* | ||
675 | - * Fill the CPU return stack buffer. | ||
676 | - * | ||
677 | - * Each entry in the RSB, if used for a speculative 'ret', contains an | ||
678 | - * infinite 'pause; lfence; jmp' loop to capture speculative execution. | ||
679 | - * | ||
680 | - * This is required in various cases for retpoline and IBRS-based | ||
681 | - * mitigations for the Spectre variant 2 vulnerability. Sometimes to | ||
682 | - * eliminate potentially bogus entries from the RSB, and sometimes | ||
683 | - * purely to ensure that it doesn't get empty, which on some CPUs would | ||
684 | - * allow predictions from other (unwanted!) sources to be used. | ||
685 | - * | ||
686 | - * We define a CPP macro such that it can be used from both .S files and | ||
687 | - * inline assembly. It's possible to do a .macro and then include that | ||
688 | - * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. | ||
689 | - */ | ||
690 | - | ||
691 | -#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ | ||
692 | -#define RSB_FILL_LOOPS 16 /* To avoid underflow */ | ||
693 | - | ||
694 | -/* | ||
695 | - * Google experimented with loop-unrolling and this turned out to be | ||
696 | - * the optimal version — two calls, each with their own speculation | ||
697 | - * trap should their return address end up getting used, in a loop. | ||
698 | - */ | ||
699 | -#define __FILL_RETURN_BUFFER(reg, nr, sp) \ | ||
700 | - mov $(nr/2), reg; \ | ||
701 | -771: \ | ||
702 | - call 772f; \ | ||
703 | -773: /* speculation trap */ \ | ||
704 | - pause; \ | ||
705 | - lfence; \ | ||
706 | - jmp 773b; \ | ||
707 | -772: \ | ||
708 | - call 774f; \ | ||
709 | -775: /* speculation trap */ \ | ||
710 | - pause; \ | ||
711 | - lfence; \ | ||
712 | - jmp 775b; \ | ||
713 | -774: \ | ||
714 | - dec reg; \ | ||
715 | - jnz 771b; \ | ||
716 | - add $(BITS_PER_LONG/8) * nr, sp; | ||
717 | - | ||
718 | #ifdef __ASSEMBLY__ | ||
719 | |||
720 | /* | ||
721 | @@ -121,17 +77,10 @@ | ||
722 | #endif | ||
723 | .endm | ||
724 | |||
725 | - /* | ||
726 | - * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP | ||
727 | - * monstrosity above, manually. | ||
728 | - */ | ||
729 | -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req | ||
730 | +/* This clobbers the BX register */ | ||
731 | +.macro FILL_RETURN_BUFFER nr:req ftr:req | ||
732 | #ifdef CONFIG_RETPOLINE | ||
733 | - ANNOTATE_NOSPEC_ALTERNATIVE | ||
734 | - ALTERNATIVE "jmp .Lskip_rsb_\@", \ | ||
735 | - __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ | ||
736 | - \ftr | ||
737 | -.Lskip_rsb_\@: | ||
738 | + ALTERNATIVE "", "call __clear_rsb", \ftr | ||
739 | #endif | ||
740 | .endm | ||
741 | |||
742 | @@ -201,22 +150,25 @@ extern char __indirect_thunk_end[]; | ||
743 | * On VMEXIT we must ensure that no RSB predictions learned in the guest | ||
744 | * can be followed in the host, by overwriting the RSB completely. Both | ||
745 | * retpoline and IBRS mitigations for Spectre v2 need this; only on future | ||
746 | - * CPUs with IBRS_ATT *might* it be avoided. | ||
747 | + * CPUs with IBRS_ALL *might* it be avoided. | ||
748 | */ | ||
749 | static inline void vmexit_fill_RSB(void) | ||
750 | { | ||
751 | #ifdef CONFIG_RETPOLINE | ||
752 | - unsigned long loops; | ||
753 | - | ||
754 | - asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE | ||
755 | - ALTERNATIVE("jmp 910f", | ||
756 | - __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), | ||
757 | - X86_FEATURE_RETPOLINE) | ||
758 | - "910:" | ||
759 | - : "=r" (loops), ASM_CALL_CONSTRAINT | ||
760 | - : : "memory" ); | ||
761 | + alternative_input("", | ||
762 | + "call __fill_rsb", | ||
763 | + X86_FEATURE_RETPOLINE, | ||
764 | + ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); | ||
765 | #endif | ||
766 | } | ||
767 | |||
768 | +static inline void indirect_branch_prediction_barrier(void) | ||
769 | +{ | ||
770 | + alternative_input("", | ||
771 | + "call __ibp_barrier", | ||
772 | + X86_FEATURE_USE_IBPB, | ||
773 | + ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory")); | ||
774 | +} | ||
775 | + | ||
776 | #endif /* __ASSEMBLY__ */ | ||
777 | -#endif /* __NOSPEC_BRANCH_H__ */ | ||
778 | +#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ | ||
779 | diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h | ||
780 | index ce245b0cdfca..0777e18a1d23 100644 | ||
781 | --- a/arch/x86/include/asm/pgtable_32_types.h | ||
782 | +++ b/arch/x86/include/asm/pgtable_32_types.h | ||
783 | @@ -44,8 +44,9 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ | ||
784 | */ | ||
785 | #define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) | ||
786 | |||
787 | -#define CPU_ENTRY_AREA_BASE \ | ||
788 | - ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK) | ||
789 | +#define CPU_ENTRY_AREA_BASE \ | ||
790 | + ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \ | ||
791 | + & PMD_MASK) | ||
792 | |||
793 | #define PKMAP_BASE \ | ||
794 | ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) | ||
795 | diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h | ||
796 | index 9c18da64daa9..c57c6e77c29f 100644 | ||
797 | --- a/arch/x86/include/asm/processor.h | ||
798 | +++ b/arch/x86/include/asm/processor.h | ||
799 | @@ -459,8 +459,6 @@ struct thread_struct { | ||
800 | unsigned short gsindex; | ||
801 | #endif | ||
802 | |||
803 | - u32 status; /* thread synchronous flags */ | ||
804 | - | ||
805 | #ifdef CONFIG_X86_64 | ||
806 | unsigned long fsbase; | ||
807 | unsigned long gsbase; | ||
808 | @@ -970,4 +968,7 @@ bool xen_set_default_idle(void); | ||
809 | |||
810 | void stop_this_cpu(void *dummy); | ||
811 | void df_debug(struct pt_regs *regs, long error_code); | ||
812 | + | ||
813 | +void __ibp_barrier(void); | ||
814 | + | ||
815 | #endif /* _ASM_X86_PROCESSOR_H */ | ||
816 | diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h | ||
817 | index d91ba04dd007..fb3a6de7440b 100644 | ||
818 | --- a/arch/x86/include/asm/required-features.h | ||
819 | +++ b/arch/x86/include/asm/required-features.h | ||
820 | @@ -106,6 +106,7 @@ | ||
821 | #define REQUIRED_MASK15 0 | ||
822 | #define REQUIRED_MASK16 (NEED_LA57) | ||
823 | #define REQUIRED_MASK17 0 | ||
824 | -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) | ||
825 | +#define REQUIRED_MASK18 0 | ||
826 | +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) | ||
827 | |||
828 | #endif /* _ASM_X86_REQUIRED_FEATURES_H */ | ||
829 | diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h | ||
830 | index e3c95e8e61c5..03eedc21246d 100644 | ||
831 | --- a/arch/x86/include/asm/syscall.h | ||
832 | +++ b/arch/x86/include/asm/syscall.h | ||
833 | @@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task, | ||
834 | * TS_COMPAT is set for 32-bit syscall entries and then | ||
835 | * remains set until we return to user mode. | ||
836 | */ | ||
837 | - if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) | ||
838 | + if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED)) | ||
839 | /* | ||
840 | * Sign-extend the value so (int)-EFOO becomes (long)-EFOO | ||
841 | * and will match correctly in comparisons. | ||
842 | @@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task, | ||
843 | unsigned long *args) | ||
844 | { | ||
845 | # ifdef CONFIG_IA32_EMULATION | ||
846 | - if (task->thread.status & TS_COMPAT) | ||
847 | + if (task->thread_info.status & TS_COMPAT) | ||
848 | switch (i) { | ||
849 | case 0: | ||
850 | if (!n--) break; | ||
851 | @@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task, | ||
852 | const unsigned long *args) | ||
853 | { | ||
854 | # ifdef CONFIG_IA32_EMULATION | ||
855 | - if (task->thread.status & TS_COMPAT) | ||
856 | + if (task->thread_info.status & TS_COMPAT) | ||
857 | switch (i) { | ||
858 | case 0: | ||
859 | if (!n--) break; | ||
860 | diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h | ||
861 | index 00223333821a..eda3b6823ca4 100644 | ||
862 | --- a/arch/x86/include/asm/thread_info.h | ||
863 | +++ b/arch/x86/include/asm/thread_info.h | ||
864 | @@ -55,6 +55,7 @@ struct task_struct; | ||
865 | |||
866 | struct thread_info { | ||
867 | unsigned long flags; /* low level flags */ | ||
868 | + u32 status; /* thread synchronous flags */ | ||
869 | }; | ||
870 | |||
871 | #define INIT_THREAD_INFO(tsk) \ | ||
872 | @@ -221,7 +222,7 @@ static inline int arch_within_stack_frames(const void * const stack, | ||
873 | #define in_ia32_syscall() true | ||
874 | #else | ||
875 | #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \ | ||
876 | - current->thread.status & TS_COMPAT) | ||
877 | + current_thread_info()->status & TS_COMPAT) | ||
878 | #endif | ||
879 | |||
880 | /* | ||
881 | diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h | ||
882 | index 3effd3c994af..4405c4b308e8 100644 | ||
883 | --- a/arch/x86/include/asm/tlbflush.h | ||
884 | +++ b/arch/x86/include/asm/tlbflush.h | ||
885 | @@ -174,6 +174,8 @@ struct tlb_state { | ||
886 | struct mm_struct *loaded_mm; | ||
887 | u16 loaded_mm_asid; | ||
888 | u16 next_asid; | ||
889 | + /* last user mm's ctx id */ | ||
890 | + u64 last_ctx_id; | ||
891 | |||
892 | /* | ||
893 | * We can be in one of several states: | ||
894 | diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h | ||
895 | index 574dff4d2913..aae77eb8491c 100644 | ||
896 | --- a/arch/x86/include/asm/uaccess.h | ||
897 | +++ b/arch/x86/include/asm/uaccess.h | ||
898 | @@ -124,6 +124,11 @@ extern int __get_user_bad(void); | ||
899 | |||
900 | #define __uaccess_begin() stac() | ||
901 | #define __uaccess_end() clac() | ||
902 | +#define __uaccess_begin_nospec() \ | ||
903 | +({ \ | ||
904 | + stac(); \ | ||
905 | + barrier_nospec(); \ | ||
906 | +}) | ||
907 | |||
908 | /* | ||
909 | * This is a type: either unsigned long, if the argument fits into | ||
910 | @@ -445,7 +450,7 @@ do { \ | ||
911 | ({ \ | ||
912 | int __gu_err; \ | ||
913 | __inttype(*(ptr)) __gu_val; \ | ||
914 | - __uaccess_begin(); \ | ||
915 | + __uaccess_begin_nospec(); \ | ||
916 | __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ | ||
917 | __uaccess_end(); \ | ||
918 | (x) = (__force __typeof__(*(ptr)))__gu_val; \ | ||
919 | @@ -487,6 +492,10 @@ struct __large_struct { unsigned long buf[100]; }; | ||
920 | __uaccess_begin(); \ | ||
921 | barrier(); | ||
922 | |||
923 | +#define uaccess_try_nospec do { \ | ||
924 | + current->thread.uaccess_err = 0; \ | ||
925 | + __uaccess_begin_nospec(); \ | ||
926 | + | ||
927 | #define uaccess_catch(err) \ | ||
928 | __uaccess_end(); \ | ||
929 | (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \ | ||
930 | @@ -548,7 +557,7 @@ struct __large_struct { unsigned long buf[100]; }; | ||
931 | * get_user_ex(...); | ||
932 | * } get_user_catch(err) | ||
933 | */ | ||
934 | -#define get_user_try uaccess_try | ||
935 | +#define get_user_try uaccess_try_nospec | ||
936 | #define get_user_catch(err) uaccess_catch(err) | ||
937 | |||
938 | #define get_user_ex(x, ptr) do { \ | ||
939 | @@ -582,7 +591,7 @@ extern void __cmpxchg_wrong_size(void) | ||
940 | __typeof__(ptr) __uval = (uval); \ | ||
941 | __typeof__(*(ptr)) __old = (old); \ | ||
942 | __typeof__(*(ptr)) __new = (new); \ | ||
943 | - __uaccess_begin(); \ | ||
944 | + __uaccess_begin_nospec(); \ | ||
945 | switch (size) { \ | ||
946 | case 1: \ | ||
947 | { \ | ||
948 | diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h | ||
949 | index 72950401b223..ba2dc1930630 100644 | ||
950 | --- a/arch/x86/include/asm/uaccess_32.h | ||
951 | +++ b/arch/x86/include/asm/uaccess_32.h | ||
952 | @@ -29,21 +29,21 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n) | ||
953 | switch (n) { | ||
954 | case 1: | ||
955 | ret = 0; | ||
956 | - __uaccess_begin(); | ||
957 | + __uaccess_begin_nospec(); | ||
958 | __get_user_asm_nozero(*(u8 *)to, from, ret, | ||
959 | "b", "b", "=q", 1); | ||
960 | __uaccess_end(); | ||
961 | return ret; | ||
962 | case 2: | ||
963 | ret = 0; | ||
964 | - __uaccess_begin(); | ||
965 | + __uaccess_begin_nospec(); | ||
966 | __get_user_asm_nozero(*(u16 *)to, from, ret, | ||
967 | "w", "w", "=r", 2); | ||
968 | __uaccess_end(); | ||
969 | return ret; | ||
970 | case 4: | ||
971 | ret = 0; | ||
972 | - __uaccess_begin(); | ||
973 | + __uaccess_begin_nospec(); | ||
974 | __get_user_asm_nozero(*(u32 *)to, from, ret, | ||
975 | "l", "k", "=r", 4); | ||
976 | __uaccess_end(); | ||
977 | diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h | ||
978 | index f07ef3c575db..62546b3a398e 100644 | ||
979 | --- a/arch/x86/include/asm/uaccess_64.h | ||
980 | +++ b/arch/x86/include/asm/uaccess_64.h | ||
981 | @@ -55,31 +55,31 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size) | ||
982 | return copy_user_generic(dst, (__force void *)src, size); | ||
983 | switch (size) { | ||
984 | case 1: | ||
985 | - __uaccess_begin(); | ||
986 | + __uaccess_begin_nospec(); | ||
987 | __get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src, | ||
988 | ret, "b", "b", "=q", 1); | ||
989 | __uaccess_end(); | ||
990 | return ret; | ||
991 | case 2: | ||
992 | - __uaccess_begin(); | ||
993 | + __uaccess_begin_nospec(); | ||
994 | __get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src, | ||
995 | ret, "w", "w", "=r", 2); | ||
996 | __uaccess_end(); | ||
997 | return ret; | ||
998 | case 4: | ||
999 | - __uaccess_begin(); | ||
1000 | + __uaccess_begin_nospec(); | ||
1001 | __get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src, | ||
1002 | ret, "l", "k", "=r", 4); | ||
1003 | __uaccess_end(); | ||
1004 | return ret; | ||
1005 | case 8: | ||
1006 | - __uaccess_begin(); | ||
1007 | + __uaccess_begin_nospec(); | ||
1008 | __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, | ||
1009 | ret, "q", "", "=r", 8); | ||
1010 | __uaccess_end(); | ||
1011 | return ret; | ||
1012 | case 10: | ||
1013 | - __uaccess_begin(); | ||
1014 | + __uaccess_begin_nospec(); | ||
1015 | __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, | ||
1016 | ret, "q", "", "=r", 10); | ||
1017 | if (likely(!ret)) | ||
1018 | @@ -89,7 +89,7 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size) | ||
1019 | __uaccess_end(); | ||
1020 | return ret; | ||
1021 | case 16: | ||
1022 | - __uaccess_begin(); | ||
1023 | + __uaccess_begin_nospec(); | ||
1024 | __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, | ||
1025 | ret, "q", "", "=r", 16); | ||
1026 | if (likely(!ret)) | ||
1027 | diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c | ||
1028 | index e0b97e4d1db5..21be0193d9dc 100644 | ||
1029 | --- a/arch/x86/kernel/alternative.c | ||
1030 | +++ b/arch/x86/kernel/alternative.c | ||
1031 | @@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(char *str) | ||
1032 | } | ||
1033 | __setup("noreplace-smp", setup_noreplace_smp); | ||
1034 | |||
1035 | -#ifdef CONFIG_PARAVIRT | ||
1036 | -static int __initdata_or_module noreplace_paravirt = 0; | ||
1037 | - | ||
1038 | -static int __init setup_noreplace_paravirt(char *str) | ||
1039 | -{ | ||
1040 | - noreplace_paravirt = 1; | ||
1041 | - return 1; | ||
1042 | -} | ||
1043 | -__setup("noreplace-paravirt", setup_noreplace_paravirt); | ||
1044 | -#endif | ||
1045 | - | ||
1046 | #define DPRINTK(fmt, args...) \ | ||
1047 | do { \ | ||
1048 | if (debug_alternative) \ | ||
1049 | @@ -298,7 +287,7 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) | ||
1050 | tgt_rip = next_rip + o_dspl; | ||
1051 | n_dspl = tgt_rip - orig_insn; | ||
1052 | |||
1053 | - DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl); | ||
1054 | + DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl); | ||
1055 | |||
1056 | if (tgt_rip - orig_insn >= 0) { | ||
1057 | if (n_dspl - 2 <= 127) | ||
1058 | @@ -355,7 +344,7 @@ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *ins | ||
1059 | add_nops(instr + (a->instrlen - a->padlen), a->padlen); | ||
1060 | local_irq_restore(flags); | ||
1061 | |||
1062 | - DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", | ||
1063 | + DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ", | ||
1064 | instr, a->instrlen - a->padlen, a->padlen); | ||
1065 | } | ||
1066 | |||
1067 | @@ -376,7 +365,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, | ||
1068 | u8 *instr, *replacement; | ||
1069 | u8 insnbuf[MAX_PATCH_LEN]; | ||
1070 | |||
1071 | - DPRINTK("alt table %p -> %p", start, end); | ||
1072 | + DPRINTK("alt table %px, -> %px", start, end); | ||
1073 | /* | ||
1074 | * The scan order should be from start to end. A later scanned | ||
1075 | * alternative code can overwrite previously scanned alternative code. | ||
1076 | @@ -400,14 +389,14 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, | ||
1077 | continue; | ||
1078 | } | ||
1079 | |||
1080 | - DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d", | ||
1081 | + DPRINTK("feat: %d*32+%d, old: (%px len: %d), repl: (%px, len: %d), pad: %d", | ||
1082 | a->cpuid >> 5, | ||
1083 | a->cpuid & 0x1f, | ||
1084 | instr, a->instrlen, | ||
1085 | replacement, a->replacementlen, a->padlen); | ||
1086 | |||
1087 | - DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr); | ||
1088 | - DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement); | ||
1089 | + DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr); | ||
1090 | + DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement); | ||
1091 | |||
1092 | memcpy(insnbuf, replacement, a->replacementlen); | ||
1093 | insnbuf_sz = a->replacementlen; | ||
1094 | @@ -433,7 +422,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, | ||
1095 | a->instrlen - a->replacementlen); | ||
1096 | insnbuf_sz += a->instrlen - a->replacementlen; | ||
1097 | } | ||
1098 | - DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr); | ||
1099 | + DUMP_BYTES(insnbuf, insnbuf_sz, "%px: final_insn: ", instr); | ||
1100 | |||
1101 | text_poke_early(instr, insnbuf, insnbuf_sz); | ||
1102 | } | ||
1103 | @@ -599,9 +588,6 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, | ||
1104 | struct paravirt_patch_site *p; | ||
1105 | char insnbuf[MAX_PATCH_LEN]; | ||
1106 | |||
1107 | - if (noreplace_paravirt) | ||
1108 | - return; | ||
1109 | - | ||
1110 | for (p = start; p < end; p++) { | ||
1111 | unsigned int used; | ||
1112 | |||
1113 | diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c | ||
1114 | index 390b3dc3d438..71949bf2de5a 100644 | ||
1115 | --- a/arch/x86/kernel/cpu/bugs.c | ||
1116 | +++ b/arch/x86/kernel/cpu/bugs.c | ||
1117 | @@ -11,6 +11,7 @@ | ||
1118 | #include <linux/init.h> | ||
1119 | #include <linux/utsname.h> | ||
1120 | #include <linux/cpu.h> | ||
1121 | +#include <linux/module.h> | ||
1122 | |||
1123 | #include <asm/nospec-branch.h> | ||
1124 | #include <asm/cmdline.h> | ||
1125 | @@ -90,20 +91,41 @@ static const char *spectre_v2_strings[] = { | ||
1126 | }; | ||
1127 | |||
1128 | #undef pr_fmt | ||
1129 | -#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt | ||
1130 | +#define pr_fmt(fmt) "Spectre V2 : " fmt | ||
1131 | |||
1132 | static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; | ||
1133 | |||
1134 | +#ifdef RETPOLINE | ||
1135 | +static bool spectre_v2_bad_module; | ||
1136 | + | ||
1137 | +bool retpoline_module_ok(bool has_retpoline) | ||
1138 | +{ | ||
1139 | + if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) | ||
1140 | + return true; | ||
1141 | + | ||
1142 | + pr_err("System may be vulnerable to spectre v2\n"); | ||
1143 | + spectre_v2_bad_module = true; | ||
1144 | + return false; | ||
1145 | +} | ||
1146 | + | ||
1147 | +static inline const char *spectre_v2_module_string(void) | ||
1148 | +{ | ||
1149 | + return spectre_v2_bad_module ? " - vulnerable module loaded" : ""; | ||
1150 | +} | ||
1151 | +#else | ||
1152 | +static inline const char *spectre_v2_module_string(void) { return ""; } | ||
1153 | +#endif | ||
1154 | + | ||
1155 | static void __init spec2_print_if_insecure(const char *reason) | ||
1156 | { | ||
1157 | if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) | ||
1158 | - pr_info("%s\n", reason); | ||
1159 | + pr_info("%s selected on command line.\n", reason); | ||
1160 | } | ||
1161 | |||
1162 | static void __init spec2_print_if_secure(const char *reason) | ||
1163 | { | ||
1164 | if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) | ||
1165 | - pr_info("%s\n", reason); | ||
1166 | + pr_info("%s selected on command line.\n", reason); | ||
1167 | } | ||
1168 | |||
1169 | static inline bool retp_compiler(void) | ||
1170 | @@ -118,42 +140,68 @@ static inline bool match_option(const char *arg, int arglen, const char *opt) | ||
1171 | return len == arglen && !strncmp(arg, opt, len); | ||
1172 | } | ||
1173 | |||
1174 | +static const struct { | ||
1175 | + const char *option; | ||
1176 | + enum spectre_v2_mitigation_cmd cmd; | ||
1177 | + bool secure; | ||
1178 | +} mitigation_options[] = { | ||
1179 | + { "off", SPECTRE_V2_CMD_NONE, false }, | ||
1180 | + { "on", SPECTRE_V2_CMD_FORCE, true }, | ||
1181 | + { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, | ||
1182 | + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, | ||
1183 | + { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, | ||
1184 | + { "auto", SPECTRE_V2_CMD_AUTO, false }, | ||
1185 | +}; | ||
1186 | + | ||
1187 | static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) | ||
1188 | { | ||
1189 | char arg[20]; | ||
1190 | - int ret; | ||
1191 | - | ||
1192 | - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, | ||
1193 | - sizeof(arg)); | ||
1194 | - if (ret > 0) { | ||
1195 | - if (match_option(arg, ret, "off")) { | ||
1196 | - goto disable; | ||
1197 | - } else if (match_option(arg, ret, "on")) { | ||
1198 | - spec2_print_if_secure("force enabled on command line."); | ||
1199 | - return SPECTRE_V2_CMD_FORCE; | ||
1200 | - } else if (match_option(arg, ret, "retpoline")) { | ||
1201 | - spec2_print_if_insecure("retpoline selected on command line."); | ||
1202 | - return SPECTRE_V2_CMD_RETPOLINE; | ||
1203 | - } else if (match_option(arg, ret, "retpoline,amd")) { | ||
1204 | - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { | ||
1205 | - pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); | ||
1206 | - return SPECTRE_V2_CMD_AUTO; | ||
1207 | - } | ||
1208 | - spec2_print_if_insecure("AMD retpoline selected on command line."); | ||
1209 | - return SPECTRE_V2_CMD_RETPOLINE_AMD; | ||
1210 | - } else if (match_option(arg, ret, "retpoline,generic")) { | ||
1211 | - spec2_print_if_insecure("generic retpoline selected on command line."); | ||
1212 | - return SPECTRE_V2_CMD_RETPOLINE_GENERIC; | ||
1213 | - } else if (match_option(arg, ret, "auto")) { | ||
1214 | + int ret, i; | ||
1215 | + enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; | ||
1216 | + | ||
1217 | + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) | ||
1218 | + return SPECTRE_V2_CMD_NONE; | ||
1219 | + else { | ||
1220 | + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, | ||
1221 | + sizeof(arg)); | ||
1222 | + if (ret < 0) | ||
1223 | + return SPECTRE_V2_CMD_AUTO; | ||
1224 | + | ||
1225 | + for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { | ||
1226 | + if (!match_option(arg, ret, mitigation_options[i].option)) | ||
1227 | + continue; | ||
1228 | + cmd = mitigation_options[i].cmd; | ||
1229 | + break; | ||
1230 | + } | ||
1231 | + | ||
1232 | + if (i >= ARRAY_SIZE(mitigation_options)) { | ||
1233 | + pr_err("unknown option (%s). Switching to AUTO select\n", | ||
1234 | + mitigation_options[i].option); | ||
1235 | return SPECTRE_V2_CMD_AUTO; | ||
1236 | } | ||
1237 | } | ||
1238 | |||
1239 | - if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) | ||
1240 | + if ((cmd == SPECTRE_V2_CMD_RETPOLINE || | ||
1241 | + cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || | ||
1242 | + cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && | ||
1243 | + !IS_ENABLED(CONFIG_RETPOLINE)) { | ||
1244 | + pr_err("%s selected but not compiled in. Switching to AUTO select\n", | ||
1245 | + mitigation_options[i].option); | ||
1246 | return SPECTRE_V2_CMD_AUTO; | ||
1247 | -disable: | ||
1248 | - spec2_print_if_insecure("disabled on command line."); | ||
1249 | - return SPECTRE_V2_CMD_NONE; | ||
1250 | + } | ||
1251 | + | ||
1252 | + if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD && | ||
1253 | + boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { | ||
1254 | + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); | ||
1255 | + return SPECTRE_V2_CMD_AUTO; | ||
1256 | + } | ||
1257 | + | ||
1258 | + if (mitigation_options[i].secure) | ||
1259 | + spec2_print_if_secure(mitigation_options[i].option); | ||
1260 | + else | ||
1261 | + spec2_print_if_insecure(mitigation_options[i].option); | ||
1262 | + | ||
1263 | + return cmd; | ||
1264 | } | ||
1265 | |||
1266 | /* Check for Skylake-like CPUs (for RSB handling) */ | ||
1267 | @@ -191,10 +239,10 @@ static void __init spectre_v2_select_mitigation(void) | ||
1268 | return; | ||
1269 | |||
1270 | case SPECTRE_V2_CMD_FORCE: | ||
1271 | - /* FALLTRHU */ | ||
1272 | case SPECTRE_V2_CMD_AUTO: | ||
1273 | - goto retpoline_auto; | ||
1274 | - | ||
1275 | + if (IS_ENABLED(CONFIG_RETPOLINE)) | ||
1276 | + goto retpoline_auto; | ||
1277 | + break; | ||
1278 | case SPECTRE_V2_CMD_RETPOLINE_AMD: | ||
1279 | if (IS_ENABLED(CONFIG_RETPOLINE)) | ||
1280 | goto retpoline_amd; | ||
1281 | @@ -249,6 +297,12 @@ static void __init spectre_v2_select_mitigation(void) | ||
1282 | setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); | ||
1283 | pr_info("Filling RSB on context switch\n"); | ||
1284 | } | ||
1285 | + | ||
1286 | + /* Initialize Indirect Branch Prediction Barrier if supported */ | ||
1287 | + if (boot_cpu_has(X86_FEATURE_IBPB)) { | ||
1288 | + setup_force_cpu_cap(X86_FEATURE_USE_IBPB); | ||
1289 | + pr_info("Enabling Indirect Branch Prediction Barrier\n"); | ||
1290 | + } | ||
1291 | } | ||
1292 | |||
1293 | #undef pr_fmt | ||
1294 | @@ -269,7 +323,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev, | ||
1295 | { | ||
1296 | if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) | ||
1297 | return sprintf(buf, "Not affected\n"); | ||
1298 | - return sprintf(buf, "Vulnerable\n"); | ||
1299 | + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); | ||
1300 | } | ||
1301 | |||
1302 | ssize_t cpu_show_spectre_v2(struct device *dev, | ||
1303 | @@ -278,6 +332,14 @@ ssize_t cpu_show_spectre_v2(struct device *dev, | ||
1304 | if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) | ||
1305 | return sprintf(buf, "Not affected\n"); | ||
1306 | |||
1307 | - return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); | ||
1308 | + return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], | ||
1309 | + boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", | ||
1310 | + spectre_v2_module_string()); | ||
1311 | } | ||
1312 | #endif | ||
1313 | + | ||
1314 | +void __ibp_barrier(void) | ||
1315 | +{ | ||
1316 | + __wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0); | ||
1317 | +} | ||
1318 | +EXPORT_SYMBOL_GPL(__ibp_barrier); | ||
1319 | diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c | ||
1320 | index 372ba3fb400f..92b66e21bae5 100644 | ||
1321 | --- a/arch/x86/kernel/cpu/common.c | ||
1322 | +++ b/arch/x86/kernel/cpu/common.c | ||
1323 | @@ -47,6 +47,8 @@ | ||
1324 | #include <asm/pat.h> | ||
1325 | #include <asm/microcode.h> | ||
1326 | #include <asm/microcode_intel.h> | ||
1327 | +#include <asm/intel-family.h> | ||
1328 | +#include <asm/cpu_device_id.h> | ||
1329 | |||
1330 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1331 | #include <asm/uv/uv.h> | ||
1332 | @@ -724,6 +726,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) | ||
1333 | } | ||
1334 | } | ||
1335 | |||
1336 | +static void init_speculation_control(struct cpuinfo_x86 *c) | ||
1337 | +{ | ||
1338 | + /* | ||
1339 | + * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, | ||
1340 | + * and they also have a different bit for STIBP support. Also, | ||
1341 | + * a hypervisor might have set the individual AMD bits even on | ||
1342 | + * Intel CPUs, for finer-grained selection of what's available. | ||
1343 | + * | ||
1344 | + * We use the AMD bits in 0x8000_0008 EBX as the generic hardware | ||
1345 | + * features, which are visible in /proc/cpuinfo and used by the | ||
1346 | + * kernel. So set those accordingly from the Intel bits. | ||
1347 | + */ | ||
1348 | + if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { | ||
1349 | + set_cpu_cap(c, X86_FEATURE_IBRS); | ||
1350 | + set_cpu_cap(c, X86_FEATURE_IBPB); | ||
1351 | + } | ||
1352 | + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) | ||
1353 | + set_cpu_cap(c, X86_FEATURE_STIBP); | ||
1354 | +} | ||
1355 | + | ||
1356 | void get_cpu_cap(struct cpuinfo_x86 *c) | ||
1357 | { | ||
1358 | u32 eax, ebx, ecx, edx; | ||
1359 | @@ -745,6 +767,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) | ||
1360 | cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); | ||
1361 | c->x86_capability[CPUID_7_0_EBX] = ebx; | ||
1362 | c->x86_capability[CPUID_7_ECX] = ecx; | ||
1363 | + c->x86_capability[CPUID_7_EDX] = edx; | ||
1364 | } | ||
1365 | |||
1366 | /* Extended state features: level 0x0000000d */ | ||
1367 | @@ -817,6 +840,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) | ||
1368 | c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); | ||
1369 | |||
1370 | init_scattered_cpuid_features(c); | ||
1371 | + init_speculation_control(c); | ||
1372 | |||
1373 | /* | ||
1374 | * Clear/Set all flags overridden by options, after probe. | ||
1375 | @@ -852,6 +876,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) | ||
1376 | #endif | ||
1377 | } | ||
1378 | |||
1379 | +static const __initconst struct x86_cpu_id cpu_no_speculation[] = { | ||
1380 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, | ||
1381 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, | ||
1382 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, | ||
1383 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY }, | ||
1384 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY }, | ||
1385 | + { X86_VENDOR_CENTAUR, 5 }, | ||
1386 | + { X86_VENDOR_INTEL, 5 }, | ||
1387 | + { X86_VENDOR_NSC, 5 }, | ||
1388 | + { X86_VENDOR_ANY, 4 }, | ||
1389 | + {} | ||
1390 | +}; | ||
1391 | + | ||
1392 | +static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { | ||
1393 | + { X86_VENDOR_AMD }, | ||
1394 | + {} | ||
1395 | +}; | ||
1396 | + | ||
1397 | +static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) | ||
1398 | +{ | ||
1399 | + u64 ia32_cap = 0; | ||
1400 | + | ||
1401 | + if (x86_match_cpu(cpu_no_meltdown)) | ||
1402 | + return false; | ||
1403 | + | ||
1404 | + if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) | ||
1405 | + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); | ||
1406 | + | ||
1407 | + /* Rogue Data Cache Load? No! */ | ||
1408 | + if (ia32_cap & ARCH_CAP_RDCL_NO) | ||
1409 | + return false; | ||
1410 | + | ||
1411 | + return true; | ||
1412 | +} | ||
1413 | + | ||
1414 | /* | ||
1415 | * Do minimum CPU detection early. | ||
1416 | * Fields really needed: vendor, cpuid_level, family, model, mask, | ||
1417 | @@ -899,11 +958,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) | ||
1418 | |||
1419 | setup_force_cpu_cap(X86_FEATURE_ALWAYS); | ||
1420 | |||
1421 | - if (c->x86_vendor != X86_VENDOR_AMD) | ||
1422 | - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); | ||
1423 | - | ||
1424 | - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); | ||
1425 | - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); | ||
1426 | + if (!x86_match_cpu(cpu_no_speculation)) { | ||
1427 | + if (cpu_vulnerable_to_meltdown(c)) | ||
1428 | + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); | ||
1429 | + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); | ||
1430 | + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); | ||
1431 | + } | ||
1432 | |||
1433 | fpu__init_system(c); | ||
1434 | |||
1435 | diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c | ||
1436 | index b720dacac051..4cf4f8cbc69d 100644 | ||
1437 | --- a/arch/x86/kernel/cpu/intel.c | ||
1438 | +++ b/arch/x86/kernel/cpu/intel.c | ||
1439 | @@ -102,6 +102,59 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) | ||
1440 | ELF_HWCAP2 |= HWCAP2_RING3MWAIT; | ||
1441 | } | ||
1442 | |||
1443 | +/* | ||
1444 | + * Early microcode releases for the Spectre v2 mitigation were broken. | ||
1445 | + * Information taken from; | ||
1446 | + * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf | ||
1447 | + * - https://kb.vmware.com/s/article/52345 | ||
1448 | + * - Microcode revisions observed in the wild | ||
1449 | + * - Release note from 20180108 microcode release | ||
1450 | + */ | ||
1451 | +struct sku_microcode { | ||
1452 | + u8 model; | ||
1453 | + u8 stepping; | ||
1454 | + u32 microcode; | ||
1455 | +}; | ||
1456 | +static const struct sku_microcode spectre_bad_microcodes[] = { | ||
1457 | + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, | ||
1458 | + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, | ||
1459 | + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, | ||
1460 | + { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, | ||
1461 | + { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, | ||
1462 | + { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, | ||
1463 | + { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, | ||
1464 | + { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, | ||
1465 | + { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, | ||
1466 | + { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, | ||
1467 | + { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, | ||
1468 | + { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, | ||
1469 | + { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 }, | ||
1470 | + { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, | ||
1471 | + { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 }, | ||
1472 | + { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 }, | ||
1473 | + { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 }, | ||
1474 | + { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, | ||
1475 | + { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, | ||
1476 | + { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, | ||
1477 | + /* Updated in the 20180108 release; blacklist until we know otherwise */ | ||
1478 | + { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, | ||
1479 | + /* Observed in the wild */ | ||
1480 | + { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, | ||
1481 | + { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, | ||
1482 | +}; | ||
1483 | + | ||
1484 | +static bool bad_spectre_microcode(struct cpuinfo_x86 *c) | ||
1485 | +{ | ||
1486 | + int i; | ||
1487 | + | ||
1488 | + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { | ||
1489 | + if (c->x86_model == spectre_bad_microcodes[i].model && | ||
1490 | + c->x86_mask == spectre_bad_microcodes[i].stepping) | ||
1491 | + return (c->microcode <= spectre_bad_microcodes[i].microcode); | ||
1492 | + } | ||
1493 | + return false; | ||
1494 | +} | ||
1495 | + | ||
1496 | static void early_init_intel(struct cpuinfo_x86 *c) | ||
1497 | { | ||
1498 | u64 misc_enable; | ||
1499 | @@ -122,6 +175,19 @@ static void early_init_intel(struct cpuinfo_x86 *c) | ||
1500 | if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) | ||
1501 | c->microcode = intel_get_microcode_revision(); | ||
1502 | |||
1503 | + /* Now if any of them are set, check the blacklist and clear the lot */ | ||
1504 | + if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || | ||
1505 | + cpu_has(c, X86_FEATURE_INTEL_STIBP) || | ||
1506 | + cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || | ||
1507 | + cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { | ||
1508 | + pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); | ||
1509 | + setup_clear_cpu_cap(X86_FEATURE_IBRS); | ||
1510 | + setup_clear_cpu_cap(X86_FEATURE_IBPB); | ||
1511 | + setup_clear_cpu_cap(X86_FEATURE_STIBP); | ||
1512 | + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); | ||
1513 | + setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); | ||
1514 | + } | ||
1515 | + | ||
1516 | /* | ||
1517 | * Atom erratum AAE44/AAF40/AAG38/AAH41: | ||
1518 | * | ||
1519 | diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c | ||
1520 | index d0e69769abfd..df11f5d604be 100644 | ||
1521 | --- a/arch/x86/kernel/cpu/scattered.c | ||
1522 | +++ b/arch/x86/kernel/cpu/scattered.c | ||
1523 | @@ -21,8 +21,6 @@ struct cpuid_bit { | ||
1524 | static const struct cpuid_bit cpuid_bits[] = { | ||
1525 | { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, | ||
1526 | { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, | ||
1527 | - { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 }, | ||
1528 | - { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 }, | ||
1529 | { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, | ||
1530 | { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 }, | ||
1531 | { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, | ||
1532 | diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c | ||
1533 | index c75466232016..9eb448c7859d 100644 | ||
1534 | --- a/arch/x86/kernel/process_64.c | ||
1535 | +++ b/arch/x86/kernel/process_64.c | ||
1536 | @@ -557,7 +557,7 @@ static void __set_personality_x32(void) | ||
1537 | * Pretend to come from a x32 execve. | ||
1538 | */ | ||
1539 | task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT; | ||
1540 | - current->thread.status &= ~TS_COMPAT; | ||
1541 | + current_thread_info()->status &= ~TS_COMPAT; | ||
1542 | #endif | ||
1543 | } | ||
1544 | |||
1545 | @@ -571,7 +571,7 @@ static void __set_personality_ia32(void) | ||
1546 | current->personality |= force_personality32; | ||
1547 | /* Prepare the first "return" to user space */ | ||
1548 | task_pt_regs(current)->orig_ax = __NR_ia32_execve; | ||
1549 | - current->thread.status |= TS_COMPAT; | ||
1550 | + current_thread_info()->status |= TS_COMPAT; | ||
1551 | #endif | ||
1552 | } | ||
1553 | |||
1554 | diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c | ||
1555 | index f37d18124648..ed5c4cdf0a34 100644 | ||
1556 | --- a/arch/x86/kernel/ptrace.c | ||
1557 | +++ b/arch/x86/kernel/ptrace.c | ||
1558 | @@ -935,7 +935,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) | ||
1559 | */ | ||
1560 | regs->orig_ax = value; | ||
1561 | if (syscall_get_nr(child, regs) >= 0) | ||
1562 | - child->thread.status |= TS_I386_REGS_POKED; | ||
1563 | + child->thread_info.status |= TS_I386_REGS_POKED; | ||
1564 | break; | ||
1565 | |||
1566 | case offsetof(struct user32, regs.eflags): | ||
1567 | diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c | ||
1568 | index b9e00e8f1c9b..4cdc0b27ec82 100644 | ||
1569 | --- a/arch/x86/kernel/signal.c | ||
1570 | +++ b/arch/x86/kernel/signal.c | ||
1571 | @@ -787,7 +787,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) | ||
1572 | * than the tracee. | ||
1573 | */ | ||
1574 | #ifdef CONFIG_IA32_EMULATION | ||
1575 | - if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) | ||
1576 | + if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) | ||
1577 | return __NR_ia32_restart_syscall; | ||
1578 | #endif | ||
1579 | #ifdef CONFIG_X86_X32_ABI | ||
1580 | diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c | ||
1581 | index 0099e10eb045..13f5d4217e4f 100644 | ||
1582 | --- a/arch/x86/kvm/cpuid.c | ||
1583 | +++ b/arch/x86/kvm/cpuid.c | ||
1584 | @@ -67,9 +67,7 @@ u64 kvm_supported_xcr0(void) | ||
1585 | |||
1586 | #define F(x) bit(X86_FEATURE_##x) | ||
1587 | |||
1588 | -/* These are scattered features in cpufeatures.h. */ | ||
1589 | -#define KVM_CPUID_BIT_AVX512_4VNNIW 2 | ||
1590 | -#define KVM_CPUID_BIT_AVX512_4FMAPS 3 | ||
1591 | +/* For scattered features from cpufeatures.h; we currently expose none */ | ||
1592 | #define KF(x) bit(KVM_CPUID_BIT_##x) | ||
1593 | |||
1594 | int kvm_update_cpuid(struct kvm_vcpu *vcpu) | ||
1595 | @@ -367,6 +365,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
1596 | F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | | ||
1597 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); | ||
1598 | |||
1599 | + /* cpuid 0x80000008.ebx */ | ||
1600 | + const u32 kvm_cpuid_8000_0008_ebx_x86_features = | ||
1601 | + F(IBPB) | F(IBRS); | ||
1602 | + | ||
1603 | /* cpuid 0xC0000001.edx */ | ||
1604 | const u32 kvm_cpuid_C000_0001_edx_x86_features = | ||
1605 | F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | | ||
1606 | @@ -392,7 +394,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
1607 | |||
1608 | /* cpuid 7.0.edx*/ | ||
1609 | const u32 kvm_cpuid_7_0_edx_x86_features = | ||
1610 | - KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS); | ||
1611 | + F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | | ||
1612 | + F(ARCH_CAPABILITIES); | ||
1613 | |||
1614 | /* all calls to cpuid_count() should be made on the same cpu */ | ||
1615 | get_cpu(); | ||
1616 | @@ -477,7 +480,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
1617 | if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) | ||
1618 | entry->ecx &= ~F(PKU); | ||
1619 | entry->edx &= kvm_cpuid_7_0_edx_x86_features; | ||
1620 | - entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX); | ||
1621 | + cpuid_mask(&entry->edx, CPUID_7_EDX); | ||
1622 | } else { | ||
1623 | entry->ebx = 0; | ||
1624 | entry->ecx = 0; | ||
1625 | @@ -627,7 +630,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
1626 | if (!g_phys_as) | ||
1627 | g_phys_as = phys_as; | ||
1628 | entry->eax = g_phys_as | (virt_as << 8); | ||
1629 | - entry->ebx = entry->edx = 0; | ||
1630 | + entry->edx = 0; | ||
1631 | + /* IBRS and IBPB aren't necessarily present in hardware cpuid */ | ||
1632 | + if (boot_cpu_has(X86_FEATURE_IBPB)) | ||
1633 | + entry->ebx |= F(IBPB); | ||
1634 | + if (boot_cpu_has(X86_FEATURE_IBRS)) | ||
1635 | + entry->ebx |= F(IBRS); | ||
1636 | + entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; | ||
1637 | + cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); | ||
1638 | break; | ||
1639 | } | ||
1640 | case 0x80000019: | ||
1641 | diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h | ||
1642 | index c2cea6651279..9a327d5b6d1f 100644 | ||
1643 | --- a/arch/x86/kvm/cpuid.h | ||
1644 | +++ b/arch/x86/kvm/cpuid.h | ||
1645 | @@ -54,6 +54,7 @@ static const struct cpuid_reg reverse_cpuid[] = { | ||
1646 | [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX}, | ||
1647 | [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, | ||
1648 | [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX}, | ||
1649 | + [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, | ||
1650 | }; | ||
1651 | |||
1652 | static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) | ||
1653 | diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c | ||
1654 | index eca6a89f2326..fab073b19528 100644 | ||
1655 | --- a/arch/x86/kvm/emulate.c | ||
1656 | +++ b/arch/x86/kvm/emulate.c | ||
1657 | @@ -25,6 +25,7 @@ | ||
1658 | #include <asm/kvm_emulate.h> | ||
1659 | #include <linux/stringify.h> | ||
1660 | #include <asm/debugreg.h> | ||
1661 | +#include <asm/nospec-branch.h> | ||
1662 | |||
1663 | #include "x86.h" | ||
1664 | #include "tss.h" | ||
1665 | @@ -1021,8 +1022,8 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) | ||
1666 | void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); | ||
1667 | |||
1668 | flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; | ||
1669 | - asm("push %[flags]; popf; call *%[fastop]" | ||
1670 | - : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags)); | ||
1671 | + asm("push %[flags]; popf; " CALL_NOSPEC | ||
1672 | + : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags)); | ||
1673 | return rc; | ||
1674 | } | ||
1675 | |||
1676 | @@ -5350,9 +5351,9 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) | ||
1677 | if (!(ctxt->d & ByteOp)) | ||
1678 | fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; | ||
1679 | |||
1680 | - asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" | ||
1681 | + asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" | ||
1682 | : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), | ||
1683 | - [fastop]"+S"(fop), ASM_CALL_CONSTRAINT | ||
1684 | + [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT | ||
1685 | : "c"(ctxt->src2.val)); | ||
1686 | |||
1687 | ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); | ||
1688 | diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c | ||
1689 | index 6a8284f72328..e0bc3ad0f6cd 100644 | ||
1690 | --- a/arch/x86/kvm/svm.c | ||
1691 | +++ b/arch/x86/kvm/svm.c | ||
1692 | @@ -184,6 +184,8 @@ struct vcpu_svm { | ||
1693 | u64 gs_base; | ||
1694 | } host; | ||
1695 | |||
1696 | + u64 spec_ctrl; | ||
1697 | + | ||
1698 | u32 *msrpm; | ||
1699 | |||
1700 | ulong nmi_iret_rip; | ||
1701 | @@ -249,6 +251,8 @@ static const struct svm_direct_access_msrs { | ||
1702 | { .index = MSR_CSTAR, .always = true }, | ||
1703 | { .index = MSR_SYSCALL_MASK, .always = true }, | ||
1704 | #endif | ||
1705 | + { .index = MSR_IA32_SPEC_CTRL, .always = false }, | ||
1706 | + { .index = MSR_IA32_PRED_CMD, .always = false }, | ||
1707 | { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, | ||
1708 | { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, | ||
1709 | { .index = MSR_IA32_LASTINTFROMIP, .always = false }, | ||
1710 | @@ -529,6 +533,7 @@ struct svm_cpu_data { | ||
1711 | struct kvm_ldttss_desc *tss_desc; | ||
1712 | |||
1713 | struct page *save_area; | ||
1714 | + struct vmcb *current_vmcb; | ||
1715 | }; | ||
1716 | |||
1717 | static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); | ||
1718 | @@ -880,6 +885,25 @@ static bool valid_msr_intercept(u32 index) | ||
1719 | return false; | ||
1720 | } | ||
1721 | |||
1722 | +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr) | ||
1723 | +{ | ||
1724 | + u8 bit_write; | ||
1725 | + unsigned long tmp; | ||
1726 | + u32 offset; | ||
1727 | + u32 *msrpm; | ||
1728 | + | ||
1729 | + msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: | ||
1730 | + to_svm(vcpu)->msrpm; | ||
1731 | + | ||
1732 | + offset = svm_msrpm_offset(msr); | ||
1733 | + bit_write = 2 * (msr & 0x0f) + 1; | ||
1734 | + tmp = msrpm[offset]; | ||
1735 | + | ||
1736 | + BUG_ON(offset == MSR_INVALID); | ||
1737 | + | ||
1738 | + return !!test_bit(bit_write, &tmp); | ||
1739 | +} | ||
1740 | + | ||
1741 | static void set_msr_interception(u32 *msrpm, unsigned msr, | ||
1742 | int read, int write) | ||
1743 | { | ||
1744 | @@ -1585,6 +1609,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | ||
1745 | u32 dummy; | ||
1746 | u32 eax = 1; | ||
1747 | |||
1748 | + svm->spec_ctrl = 0; | ||
1749 | + | ||
1750 | if (!init_event) { | ||
1751 | svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | | ||
1752 | MSR_IA32_APICBASE_ENABLE; | ||
1753 | @@ -1706,11 +1732,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) | ||
1754 | __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); | ||
1755 | kvm_vcpu_uninit(vcpu); | ||
1756 | kmem_cache_free(kvm_vcpu_cache, svm); | ||
1757 | + /* | ||
1758 | + * The vmcb page can be recycled, causing a false negative in | ||
1759 | + * svm_vcpu_load(). So do a full IBPB now. | ||
1760 | + */ | ||
1761 | + indirect_branch_prediction_barrier(); | ||
1762 | } | ||
1763 | |||
1764 | static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | ||
1765 | { | ||
1766 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1767 | + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); | ||
1768 | int i; | ||
1769 | |||
1770 | if (unlikely(cpu != vcpu->cpu)) { | ||
1771 | @@ -1739,6 +1771,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | ||
1772 | if (static_cpu_has(X86_FEATURE_RDTSCP)) | ||
1773 | wrmsrl(MSR_TSC_AUX, svm->tsc_aux); | ||
1774 | |||
1775 | + if (sd->current_vmcb != svm->vmcb) { | ||
1776 | + sd->current_vmcb = svm->vmcb; | ||
1777 | + indirect_branch_prediction_barrier(); | ||
1778 | + } | ||
1779 | avic_vcpu_load(vcpu, cpu); | ||
1780 | } | ||
1781 | |||
1782 | @@ -3579,6 +3615,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | ||
1783 | case MSR_VM_CR: | ||
1784 | msr_info->data = svm->nested.vm_cr_msr; | ||
1785 | break; | ||
1786 | + case MSR_IA32_SPEC_CTRL: | ||
1787 | + if (!msr_info->host_initiated && | ||
1788 | + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) | ||
1789 | + return 1; | ||
1790 | + | ||
1791 | + msr_info->data = svm->spec_ctrl; | ||
1792 | + break; | ||
1793 | case MSR_IA32_UCODE_REV: | ||
1794 | msr_info->data = 0x01000065; | ||
1795 | break; | ||
1796 | @@ -3670,6 +3713,49 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) | ||
1797 | case MSR_IA32_TSC: | ||
1798 | kvm_write_tsc(vcpu, msr); | ||
1799 | break; | ||
1800 | + case MSR_IA32_SPEC_CTRL: | ||
1801 | + if (!msr->host_initiated && | ||
1802 | + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) | ||
1803 | + return 1; | ||
1804 | + | ||
1805 | + /* The STIBP bit doesn't fault even if it's not advertised */ | ||
1806 | + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) | ||
1807 | + return 1; | ||
1808 | + | ||
1809 | + svm->spec_ctrl = data; | ||
1810 | + | ||
1811 | + if (!data) | ||
1812 | + break; | ||
1813 | + | ||
1814 | + /* | ||
1815 | + * For non-nested: | ||
1816 | + * When it's written (to non-zero) for the first time, pass | ||
1817 | + * it through. | ||
1818 | + * | ||
1819 | + * For nested: | ||
1820 | + * The handling of the MSR bitmap for L2 guests is done in | ||
1821 | + * nested_svm_vmrun_msrpm. | ||
1822 | + * We update the L1 MSR bit as well since it will end up | ||
1823 | + * touching the MSR anyway now. | ||
1824 | + */ | ||
1825 | + set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); | ||
1826 | + break; | ||
1827 | + case MSR_IA32_PRED_CMD: | ||
1828 | + if (!msr->host_initiated && | ||
1829 | + !guest_cpuid_has(vcpu, X86_FEATURE_IBPB)) | ||
1830 | + return 1; | ||
1831 | + | ||
1832 | + if (data & ~PRED_CMD_IBPB) | ||
1833 | + return 1; | ||
1834 | + | ||
1835 | + if (!data) | ||
1836 | + break; | ||
1837 | + | ||
1838 | + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); | ||
1839 | + if (is_guest_mode(vcpu)) | ||
1840 | + break; | ||
1841 | + set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); | ||
1842 | + break; | ||
1843 | case MSR_STAR: | ||
1844 | svm->vmcb->save.star = data; | ||
1845 | break; | ||
1846 | @@ -4922,6 +5008,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | ||
1847 | |||
1848 | local_irq_enable(); | ||
1849 | |||
1850 | + /* | ||
1851 | + * If this vCPU has touched SPEC_CTRL, restore the guest's value if | ||
1852 | + * it's non-zero. Since vmentry is serialising on affected CPUs, there | ||
1853 | + * is no need to worry about the conditional branch over the wrmsr | ||
1854 | + * being speculatively taken. | ||
1855 | + */ | ||
1856 | + if (svm->spec_ctrl) | ||
1857 | + wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); | ||
1858 | + | ||
1859 | asm volatile ( | ||
1860 | "push %%" _ASM_BP "; \n\t" | ||
1861 | "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" | ||
1862 | @@ -5014,6 +5109,27 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | ||
1863 | #endif | ||
1864 | ); | ||
1865 | |||
1866 | + /* | ||
1867 | + * We do not use IBRS in the kernel. If this vCPU has used the | ||
1868 | + * SPEC_CTRL MSR it may have left it on; save the value and | ||
1869 | + * turn it off. This is much more efficient than blindly adding | ||
1870 | + * it to the atomic save/restore list. Especially as the former | ||
1871 | + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. | ||
1872 | + * | ||
1873 | + * For non-nested case: | ||
1874 | + * If the L01 MSR bitmap does not intercept the MSR, then we need to | ||
1875 | + * save it. | ||
1876 | + * | ||
1877 | + * For nested case: | ||
1878 | + * If the L02 MSR bitmap does not intercept the MSR, then we need to | ||
1879 | + * save it. | ||
1880 | + */ | ||
1881 | + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) | ||
1882 | + rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); | ||
1883 | + | ||
1884 | + if (svm->spec_ctrl) | ||
1885 | + wrmsrl(MSR_IA32_SPEC_CTRL, 0); | ||
1886 | + | ||
1887 | /* Eliminate branch target predictions from guest mode */ | ||
1888 | vmexit_fill_RSB(); | ||
1889 | |||
1890 | diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c | ||
1891 | index a45063a9219c..0ae4b1a86168 100644 | ||
1892 | --- a/arch/x86/kvm/vmx.c | ||
1893 | +++ b/arch/x86/kvm/vmx.c | ||
1894 | @@ -34,6 +34,7 @@ | ||
1895 | #include <linux/tboot.h> | ||
1896 | #include <linux/hrtimer.h> | ||
1897 | #include <linux/frame.h> | ||
1898 | +#include <linux/nospec.h> | ||
1899 | #include "kvm_cache_regs.h" | ||
1900 | #include "x86.h" | ||
1901 | |||
1902 | @@ -108,6 +109,14 @@ static u64 __read_mostly host_xss; | ||
1903 | static bool __read_mostly enable_pml = 1; | ||
1904 | module_param_named(pml, enable_pml, bool, S_IRUGO); | ||
1905 | |||
1906 | +#define MSR_TYPE_R 1 | ||
1907 | +#define MSR_TYPE_W 2 | ||
1908 | +#define MSR_TYPE_RW 3 | ||
1909 | + | ||
1910 | +#define MSR_BITMAP_MODE_X2APIC 1 | ||
1911 | +#define MSR_BITMAP_MODE_X2APIC_APICV 2 | ||
1912 | +#define MSR_BITMAP_MODE_LM 4 | ||
1913 | + | ||
1914 | #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL | ||
1915 | |||
1916 | /* Guest_tsc -> host_tsc conversion requires 64-bit division. */ | ||
1917 | @@ -182,7 +191,6 @@ module_param(ple_window_max, int, S_IRUGO); | ||
1918 | extern const ulong vmx_return; | ||
1919 | |||
1920 | #define NR_AUTOLOAD_MSRS 8 | ||
1921 | -#define VMCS02_POOL_SIZE 1 | ||
1922 | |||
1923 | struct vmcs { | ||
1924 | u32 revision_id; | ||
1925 | @@ -207,6 +215,7 @@ struct loaded_vmcs { | ||
1926 | int soft_vnmi_blocked; | ||
1927 | ktime_t entry_time; | ||
1928 | s64 vnmi_blocked_time; | ||
1929 | + unsigned long *msr_bitmap; | ||
1930 | struct list_head loaded_vmcss_on_cpu_link; | ||
1931 | }; | ||
1932 | |||
1933 | @@ -223,7 +232,7 @@ struct shared_msr_entry { | ||
1934 | * stored in guest memory specified by VMPTRLD, but is opaque to the guest, | ||
1935 | * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. | ||
1936 | * More than one of these structures may exist, if L1 runs multiple L2 guests. | ||
1937 | - * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the | ||
1938 | + * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the | ||
1939 | * underlying hardware which will be used to run L2. | ||
1940 | * This structure is packed to ensure that its layout is identical across | ||
1941 | * machines (necessary for live migration). | ||
1942 | @@ -406,13 +415,6 @@ struct __packed vmcs12 { | ||
1943 | */ | ||
1944 | #define VMCS12_SIZE 0x1000 | ||
1945 | |||
1946 | -/* Used to remember the last vmcs02 used for some recently used vmcs12s */ | ||
1947 | -struct vmcs02_list { | ||
1948 | - struct list_head list; | ||
1949 | - gpa_t vmptr; | ||
1950 | - struct loaded_vmcs vmcs02; | ||
1951 | -}; | ||
1952 | - | ||
1953 | /* | ||
1954 | * The nested_vmx structure is part of vcpu_vmx, and holds information we need | ||
1955 | * for correct emulation of VMX (i.e., nested VMX) on this vcpu. | ||
1956 | @@ -437,15 +439,15 @@ struct nested_vmx { | ||
1957 | */ | ||
1958 | bool sync_shadow_vmcs; | ||
1959 | |||
1960 | - /* vmcs02_list cache of VMCSs recently used to run L2 guests */ | ||
1961 | - struct list_head vmcs02_pool; | ||
1962 | - int vmcs02_num; | ||
1963 | bool change_vmcs01_virtual_x2apic_mode; | ||
1964 | /* L2 must run next, and mustn't decide to exit to L1. */ | ||
1965 | bool nested_run_pending; | ||
1966 | + | ||
1967 | + struct loaded_vmcs vmcs02; | ||
1968 | + | ||
1969 | /* | ||
1970 | - * Guest pages referred to in vmcs02 with host-physical pointers, so | ||
1971 | - * we must keep them pinned while L2 runs. | ||
1972 | + * Guest pages referred to in the vmcs02 with host-physical | ||
1973 | + * pointers, so we must keep them pinned while L2 runs. | ||
1974 | */ | ||
1975 | struct page *apic_access_page; | ||
1976 | struct page *virtual_apic_page; | ||
1977 | @@ -454,8 +456,6 @@ struct nested_vmx { | ||
1978 | bool pi_pending; | ||
1979 | u16 posted_intr_nv; | ||
1980 | |||
1981 | - unsigned long *msr_bitmap; | ||
1982 | - | ||
1983 | struct hrtimer preemption_timer; | ||
1984 | bool preemption_timer_expired; | ||
1985 | |||
1986 | @@ -570,6 +570,7 @@ struct vcpu_vmx { | ||
1987 | struct kvm_vcpu vcpu; | ||
1988 | unsigned long host_rsp; | ||
1989 | u8 fail; | ||
1990 | + u8 msr_bitmap_mode; | ||
1991 | u32 exit_intr_info; | ||
1992 | u32 idt_vectoring_info; | ||
1993 | ulong rflags; | ||
1994 | @@ -581,6 +582,10 @@ struct vcpu_vmx { | ||
1995 | u64 msr_host_kernel_gs_base; | ||
1996 | u64 msr_guest_kernel_gs_base; | ||
1997 | #endif | ||
1998 | + | ||
1999 | + u64 arch_capabilities; | ||
2000 | + u64 spec_ctrl; | ||
2001 | + | ||
2002 | u32 vm_entry_controls_shadow; | ||
2003 | u32 vm_exit_controls_shadow; | ||
2004 | u32 secondary_exec_control; | ||
2005 | @@ -887,21 +892,18 @@ static const unsigned short vmcs_field_to_offset_table[] = { | ||
2006 | |||
2007 | static inline short vmcs_field_to_offset(unsigned long field) | ||
2008 | { | ||
2009 | - BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); | ||
2010 | + const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table); | ||
2011 | + unsigned short offset; | ||
2012 | |||
2013 | - if (field >= ARRAY_SIZE(vmcs_field_to_offset_table)) | ||
2014 | + BUILD_BUG_ON(size > SHRT_MAX); | ||
2015 | + if (field >= size) | ||
2016 | return -ENOENT; | ||
2017 | |||
2018 | - /* | ||
2019 | - * FIXME: Mitigation for CVE-2017-5753. To be replaced with a | ||
2020 | - * generic mechanism. | ||
2021 | - */ | ||
2022 | - asm("lfence"); | ||
2023 | - | ||
2024 | - if (vmcs_field_to_offset_table[field] == 0) | ||
2025 | + field = array_index_nospec(field, size); | ||
2026 | + offset = vmcs_field_to_offset_table[field]; | ||
2027 | + if (offset == 0) | ||
2028 | return -ENOENT; | ||
2029 | - | ||
2030 | - return vmcs_field_to_offset_table[field]; | ||
2031 | + return offset; | ||
2032 | } | ||
2033 | |||
2034 | static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) | ||
2035 | @@ -927,6 +929,9 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); | ||
2036 | static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); | ||
2037 | static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, | ||
2038 | u16 error_code); | ||
2039 | +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); | ||
2040 | +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, | ||
2041 | + u32 msr, int type); | ||
2042 | |||
2043 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | ||
2044 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | ||
2045 | @@ -946,12 +951,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); | ||
2046 | enum { | ||
2047 | VMX_IO_BITMAP_A, | ||
2048 | VMX_IO_BITMAP_B, | ||
2049 | - VMX_MSR_BITMAP_LEGACY, | ||
2050 | - VMX_MSR_BITMAP_LONGMODE, | ||
2051 | - VMX_MSR_BITMAP_LEGACY_X2APIC_APICV, | ||
2052 | - VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV, | ||
2053 | - VMX_MSR_BITMAP_LEGACY_X2APIC, | ||
2054 | - VMX_MSR_BITMAP_LONGMODE_X2APIC, | ||
2055 | VMX_VMREAD_BITMAP, | ||
2056 | VMX_VMWRITE_BITMAP, | ||
2057 | VMX_BITMAP_NR | ||
2058 | @@ -961,12 +960,6 @@ static unsigned long *vmx_bitmap[VMX_BITMAP_NR]; | ||
2059 | |||
2060 | #define vmx_io_bitmap_a (vmx_bitmap[VMX_IO_BITMAP_A]) | ||
2061 | #define vmx_io_bitmap_b (vmx_bitmap[VMX_IO_BITMAP_B]) | ||
2062 | -#define vmx_msr_bitmap_legacy (vmx_bitmap[VMX_MSR_BITMAP_LEGACY]) | ||
2063 | -#define vmx_msr_bitmap_longmode (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE]) | ||
2064 | -#define vmx_msr_bitmap_legacy_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV]) | ||
2065 | -#define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV]) | ||
2066 | -#define vmx_msr_bitmap_legacy_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC]) | ||
2067 | -#define vmx_msr_bitmap_longmode_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC]) | ||
2068 | #define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP]) | ||
2069 | #define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP]) | ||
2070 | |||
2071 | @@ -1913,6 +1906,52 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | ||
2072 | vmcs_write32(EXCEPTION_BITMAP, eb); | ||
2073 | } | ||
2074 | |||
2075 | +/* | ||
2076 | + * Check if MSR is intercepted for currently loaded MSR bitmap. | ||
2077 | + */ | ||
2078 | +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) | ||
2079 | +{ | ||
2080 | + unsigned long *msr_bitmap; | ||
2081 | + int f = sizeof(unsigned long); | ||
2082 | + | ||
2083 | + if (!cpu_has_vmx_msr_bitmap()) | ||
2084 | + return true; | ||
2085 | + | ||
2086 | + msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; | ||
2087 | + | ||
2088 | + if (msr <= 0x1fff) { | ||
2089 | + return !!test_bit(msr, msr_bitmap + 0x800 / f); | ||
2090 | + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { | ||
2091 | + msr &= 0x1fff; | ||
2092 | + return !!test_bit(msr, msr_bitmap + 0xc00 / f); | ||
2093 | + } | ||
2094 | + | ||
2095 | + return true; | ||
2096 | +} | ||
2097 | + | ||
2098 | +/* | ||
2099 | + * Check if MSR is intercepted for L01 MSR bitmap. | ||
2100 | + */ | ||
2101 | +static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) | ||
2102 | +{ | ||
2103 | + unsigned long *msr_bitmap; | ||
2104 | + int f = sizeof(unsigned long); | ||
2105 | + | ||
2106 | + if (!cpu_has_vmx_msr_bitmap()) | ||
2107 | + return true; | ||
2108 | + | ||
2109 | + msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; | ||
2110 | + | ||
2111 | + if (msr <= 0x1fff) { | ||
2112 | + return !!test_bit(msr, msr_bitmap + 0x800 / f); | ||
2113 | + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { | ||
2114 | + msr &= 0x1fff; | ||
2115 | + return !!test_bit(msr, msr_bitmap + 0xc00 / f); | ||
2116 | + } | ||
2117 | + | ||
2118 | + return true; | ||
2119 | +} | ||
2120 | + | ||
2121 | static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, | ||
2122 | unsigned long entry, unsigned long exit) | ||
2123 | { | ||
2124 | @@ -2291,6 +2330,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | ||
2125 | if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { | ||
2126 | per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; | ||
2127 | vmcs_load(vmx->loaded_vmcs->vmcs); | ||
2128 | + indirect_branch_prediction_barrier(); | ||
2129 | } | ||
2130 | |||
2131 | if (!already_loaded) { | ||
2132 | @@ -2567,36 +2607,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | ||
2133 | vmx->guest_msrs[from] = tmp; | ||
2134 | } | ||
2135 | |||
2136 | -static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) | ||
2137 | -{ | ||
2138 | - unsigned long *msr_bitmap; | ||
2139 | - | ||
2140 | - if (is_guest_mode(vcpu)) | ||
2141 | - msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; | ||
2142 | - else if (cpu_has_secondary_exec_ctrls() && | ||
2143 | - (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & | ||
2144 | - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { | ||
2145 | - if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) { | ||
2146 | - if (is_long_mode(vcpu)) | ||
2147 | - msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv; | ||
2148 | - else | ||
2149 | - msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv; | ||
2150 | - } else { | ||
2151 | - if (is_long_mode(vcpu)) | ||
2152 | - msr_bitmap = vmx_msr_bitmap_longmode_x2apic; | ||
2153 | - else | ||
2154 | - msr_bitmap = vmx_msr_bitmap_legacy_x2apic; | ||
2155 | - } | ||
2156 | - } else { | ||
2157 | - if (is_long_mode(vcpu)) | ||
2158 | - msr_bitmap = vmx_msr_bitmap_longmode; | ||
2159 | - else | ||
2160 | - msr_bitmap = vmx_msr_bitmap_legacy; | ||
2161 | - } | ||
2162 | - | ||
2163 | - vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); | ||
2164 | -} | ||
2165 | - | ||
2166 | /* | ||
2167 | * Set up the vmcs to automatically save and restore system | ||
2168 | * msrs. Don't touch the 64-bit msrs if the guest is in legacy | ||
2169 | @@ -2637,7 +2647,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) | ||
2170 | vmx->save_nmsrs = save_nmsrs; | ||
2171 | |||
2172 | if (cpu_has_vmx_msr_bitmap()) | ||
2173 | - vmx_set_msr_bitmap(&vmx->vcpu); | ||
2174 | + vmx_update_msr_bitmap(&vmx->vcpu); | ||
2175 | } | ||
2176 | |||
2177 | /* | ||
2178 | @@ -3273,6 +3283,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | ||
2179 | case MSR_IA32_TSC: | ||
2180 | msr_info->data = guest_read_tsc(vcpu); | ||
2181 | break; | ||
2182 | + case MSR_IA32_SPEC_CTRL: | ||
2183 | + if (!msr_info->host_initiated && | ||
2184 | + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && | ||
2185 | + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) | ||
2186 | + return 1; | ||
2187 | + | ||
2188 | + msr_info->data = to_vmx(vcpu)->spec_ctrl; | ||
2189 | + break; | ||
2190 | + case MSR_IA32_ARCH_CAPABILITIES: | ||
2191 | + if (!msr_info->host_initiated && | ||
2192 | + !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) | ||
2193 | + return 1; | ||
2194 | + msr_info->data = to_vmx(vcpu)->arch_capabilities; | ||
2195 | + break; | ||
2196 | case MSR_IA32_SYSENTER_CS: | ||
2197 | msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); | ||
2198 | break; | ||
2199 | @@ -3380,6 +3404,70 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | ||
2200 | case MSR_IA32_TSC: | ||
2201 | kvm_write_tsc(vcpu, msr_info); | ||
2202 | break; | ||
2203 | + case MSR_IA32_SPEC_CTRL: | ||
2204 | + if (!msr_info->host_initiated && | ||
2205 | + !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && | ||
2206 | + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) | ||
2207 | + return 1; | ||
2208 | + | ||
2209 | + /* The STIBP bit doesn't fault even if it's not advertised */ | ||
2210 | + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) | ||
2211 | + return 1; | ||
2212 | + | ||
2213 | + vmx->spec_ctrl = data; | ||
2214 | + | ||
2215 | + if (!data) | ||
2216 | + break; | ||
2217 | + | ||
2218 | + /* | ||
2219 | + * For non-nested: | ||
2220 | + * When it's written (to non-zero) for the first time, pass | ||
2221 | + * it through. | ||
2222 | + * | ||
2223 | + * For nested: | ||
2224 | + * The handling of the MSR bitmap for L2 guests is done in | ||
2225 | + * nested_vmx_merge_msr_bitmap. We should not touch the | ||
2226 | + * vmcs02.msr_bitmap here since it gets completely overwritten | ||
2227 | + * in the merging. We update the vmcs01 here for L1 as well | ||
2228 | + * since it will end up touching the MSR anyway now. | ||
2229 | + */ | ||
2230 | + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, | ||
2231 | + MSR_IA32_SPEC_CTRL, | ||
2232 | + MSR_TYPE_RW); | ||
2233 | + break; | ||
2234 | + case MSR_IA32_PRED_CMD: | ||
2235 | + if (!msr_info->host_initiated && | ||
2236 | + !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) && | ||
2237 | + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) | ||
2238 | + return 1; | ||
2239 | + | ||
2240 | + if (data & ~PRED_CMD_IBPB) | ||
2241 | + return 1; | ||
2242 | + | ||
2243 | + if (!data) | ||
2244 | + break; | ||
2245 | + | ||
2246 | + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); | ||
2247 | + | ||
2248 | + /* | ||
2249 | + * For non-nested: | ||
2250 | + * When it's written (to non-zero) for the first time, pass | ||
2251 | + * it through. | ||
2252 | + * | ||
2253 | + * For nested: | ||
2254 | + * The handling of the MSR bitmap for L2 guests is done in | ||
2255 | + * nested_vmx_merge_msr_bitmap. We should not touch the | ||
2256 | + * vmcs02.msr_bitmap here since it gets completely overwritten | ||
2257 | + * in the merging. | ||
2258 | + */ | ||
2259 | + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, | ||
2260 | + MSR_TYPE_W); | ||
2261 | + break; | ||
2262 | + case MSR_IA32_ARCH_CAPABILITIES: | ||
2263 | + if (!msr_info->host_initiated) | ||
2264 | + return 1; | ||
2265 | + vmx->arch_capabilities = data; | ||
2266 | + break; | ||
2267 | case MSR_IA32_CR_PAT: | ||
2268 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { | ||
2269 | if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) | ||
2270 | @@ -3822,11 +3910,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) | ||
2271 | return vmcs; | ||
2272 | } | ||
2273 | |||
2274 | -static struct vmcs *alloc_vmcs(void) | ||
2275 | -{ | ||
2276 | - return alloc_vmcs_cpu(raw_smp_processor_id()); | ||
2277 | -} | ||
2278 | - | ||
2279 | static void free_vmcs(struct vmcs *vmcs) | ||
2280 | { | ||
2281 | free_pages((unsigned long)vmcs, vmcs_config.order); | ||
2282 | @@ -3842,9 +3925,38 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) | ||
2283 | loaded_vmcs_clear(loaded_vmcs); | ||
2284 | free_vmcs(loaded_vmcs->vmcs); | ||
2285 | loaded_vmcs->vmcs = NULL; | ||
2286 | + if (loaded_vmcs->msr_bitmap) | ||
2287 | + free_page((unsigned long)loaded_vmcs->msr_bitmap); | ||
2288 | WARN_ON(loaded_vmcs->shadow_vmcs != NULL); | ||
2289 | } | ||
2290 | |||
2291 | +static struct vmcs *alloc_vmcs(void) | ||
2292 | +{ | ||
2293 | + return alloc_vmcs_cpu(raw_smp_processor_id()); | ||
2294 | +} | ||
2295 | + | ||
2296 | +static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) | ||
2297 | +{ | ||
2298 | + loaded_vmcs->vmcs = alloc_vmcs(); | ||
2299 | + if (!loaded_vmcs->vmcs) | ||
2300 | + return -ENOMEM; | ||
2301 | + | ||
2302 | + loaded_vmcs->shadow_vmcs = NULL; | ||
2303 | + loaded_vmcs_init(loaded_vmcs); | ||
2304 | + | ||
2305 | + if (cpu_has_vmx_msr_bitmap()) { | ||
2306 | + loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
2307 | + if (!loaded_vmcs->msr_bitmap) | ||
2308 | + goto out_vmcs; | ||
2309 | + memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); | ||
2310 | + } | ||
2311 | + return 0; | ||
2312 | + | ||
2313 | +out_vmcs: | ||
2314 | + free_loaded_vmcs(loaded_vmcs); | ||
2315 | + return -ENOMEM; | ||
2316 | +} | ||
2317 | + | ||
2318 | static void free_kvm_area(void) | ||
2319 | { | ||
2320 | int cpu; | ||
2321 | @@ -4917,10 +5029,8 @@ static void free_vpid(int vpid) | ||
2322 | spin_unlock(&vmx_vpid_lock); | ||
2323 | } | ||
2324 | |||
2325 | -#define MSR_TYPE_R 1 | ||
2326 | -#define MSR_TYPE_W 2 | ||
2327 | -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, | ||
2328 | - u32 msr, int type) | ||
2329 | +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, | ||
2330 | + u32 msr, int type) | ||
2331 | { | ||
2332 | int f = sizeof(unsigned long); | ||
2333 | |||
2334 | @@ -4954,6 +5064,50 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, | ||
2335 | } | ||
2336 | } | ||
2337 | |||
2338 | +static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, | ||
2339 | + u32 msr, int type) | ||
2340 | +{ | ||
2341 | + int f = sizeof(unsigned long); | ||
2342 | + | ||
2343 | + if (!cpu_has_vmx_msr_bitmap()) | ||
2344 | + return; | ||
2345 | + | ||
2346 | + /* | ||
2347 | + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals | ||
2348 | + * have the write-low and read-high bitmap offsets the wrong way round. | ||
2349 | + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. | ||
2350 | + */ | ||
2351 | + if (msr <= 0x1fff) { | ||
2352 | + if (type & MSR_TYPE_R) | ||
2353 | + /* read-low */ | ||
2354 | + __set_bit(msr, msr_bitmap + 0x000 / f); | ||
2355 | + | ||
2356 | + if (type & MSR_TYPE_W) | ||
2357 | + /* write-low */ | ||
2358 | + __set_bit(msr, msr_bitmap + 0x800 / f); | ||
2359 | + | ||
2360 | + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { | ||
2361 | + msr &= 0x1fff; | ||
2362 | + if (type & MSR_TYPE_R) | ||
2363 | + /* read-high */ | ||
2364 | + __set_bit(msr, msr_bitmap + 0x400 / f); | ||
2365 | + | ||
2366 | + if (type & MSR_TYPE_W) | ||
2367 | + /* write-high */ | ||
2368 | + __set_bit(msr, msr_bitmap + 0xc00 / f); | ||
2369 | + | ||
2370 | + } | ||
2371 | +} | ||
2372 | + | ||
2373 | +static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, | ||
2374 | + u32 msr, int type, bool value) | ||
2375 | +{ | ||
2376 | + if (value) | ||
2377 | + vmx_enable_intercept_for_msr(msr_bitmap, msr, type); | ||
2378 | + else | ||
2379 | + vmx_disable_intercept_for_msr(msr_bitmap, msr, type); | ||
2380 | +} | ||
2381 | + | ||
2382 | /* | ||
2383 | * If a msr is allowed by L0, we should check whether it is allowed by L1. | ||
2384 | * The corresponding bit will be cleared unless both of L0 and L1 allow it. | ||
2385 | @@ -5000,30 +5154,70 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, | ||
2386 | } | ||
2387 | } | ||
2388 | |||
2389 | -static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) | ||
2390 | +static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) | ||
2391 | { | ||
2392 | - if (!longmode_only) | ||
2393 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, | ||
2394 | - msr, MSR_TYPE_R | MSR_TYPE_W); | ||
2395 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, | ||
2396 | - msr, MSR_TYPE_R | MSR_TYPE_W); | ||
2397 | + u8 mode = 0; | ||
2398 | + | ||
2399 | + if (cpu_has_secondary_exec_ctrls() && | ||
2400 | + (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & | ||
2401 | + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { | ||
2402 | + mode |= MSR_BITMAP_MODE_X2APIC; | ||
2403 | + if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) | ||
2404 | + mode |= MSR_BITMAP_MODE_X2APIC_APICV; | ||
2405 | + } | ||
2406 | + | ||
2407 | + if (is_long_mode(vcpu)) | ||
2408 | + mode |= MSR_BITMAP_MODE_LM; | ||
2409 | + | ||
2410 | + return mode; | ||
2411 | } | ||
2412 | |||
2413 | -static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_active) | ||
2414 | +#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) | ||
2415 | + | ||
2416 | +static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, | ||
2417 | + u8 mode) | ||
2418 | { | ||
2419 | - if (apicv_active) { | ||
2420 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv, | ||
2421 | - msr, type); | ||
2422 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv, | ||
2423 | - msr, type); | ||
2424 | - } else { | ||
2425 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, | ||
2426 | - msr, type); | ||
2427 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, | ||
2428 | - msr, type); | ||
2429 | + int msr; | ||
2430 | + | ||
2431 | + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { | ||
2432 | + unsigned word = msr / BITS_PER_LONG; | ||
2433 | + msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; | ||
2434 | + msr_bitmap[word + (0x800 / sizeof(long))] = ~0; | ||
2435 | + } | ||
2436 | + | ||
2437 | + if (mode & MSR_BITMAP_MODE_X2APIC) { | ||
2438 | + /* | ||
2439 | + * TPR reads and writes can be virtualized even if virtual interrupt | ||
2440 | + * delivery is not in use. | ||
2441 | + */ | ||
2442 | + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); | ||
2443 | + if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { | ||
2444 | + vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); | ||
2445 | + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); | ||
2446 | + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); | ||
2447 | + } | ||
2448 | } | ||
2449 | } | ||
2450 | |||
2451 | +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) | ||
2452 | +{ | ||
2453 | + struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2454 | + unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; | ||
2455 | + u8 mode = vmx_msr_bitmap_mode(vcpu); | ||
2456 | + u8 changed = mode ^ vmx->msr_bitmap_mode; | ||
2457 | + | ||
2458 | + if (!changed) | ||
2459 | + return; | ||
2460 | + | ||
2461 | + vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, | ||
2462 | + !(mode & MSR_BITMAP_MODE_LM)); | ||
2463 | + | ||
2464 | + if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) | ||
2465 | + vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); | ||
2466 | + | ||
2467 | + vmx->msr_bitmap_mode = mode; | ||
2468 | +} | ||
2469 | + | ||
2470 | static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu) | ||
2471 | { | ||
2472 | return enable_apicv; | ||
2473 | @@ -5269,7 +5463,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) | ||
2474 | } | ||
2475 | |||
2476 | if (cpu_has_vmx_msr_bitmap()) | ||
2477 | - vmx_set_msr_bitmap(vcpu); | ||
2478 | + vmx_update_msr_bitmap(vcpu); | ||
2479 | } | ||
2480 | |||
2481 | static u32 vmx_exec_control(struct vcpu_vmx *vmx) | ||
2482 | @@ -5456,7 +5650,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | ||
2483 | vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); | ||
2484 | } | ||
2485 | if (cpu_has_vmx_msr_bitmap()) | ||
2486 | - vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); | ||
2487 | + vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); | ||
2488 | |||
2489 | vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ | ||
2490 | |||
2491 | @@ -5534,6 +5728,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | ||
2492 | ++vmx->nmsrs; | ||
2493 | } | ||
2494 | |||
2495 | + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) | ||
2496 | + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); | ||
2497 | |||
2498 | vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); | ||
2499 | |||
2500 | @@ -5564,6 +5760,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | ||
2501 | u64 cr0; | ||
2502 | |||
2503 | vmx->rmode.vm86_active = 0; | ||
2504 | + vmx->spec_ctrl = 0; | ||
2505 | |||
2506 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); | ||
2507 | kvm_set_cr8(vcpu, 0); | ||
2508 | @@ -6739,7 +6936,7 @@ void vmx_enable_tdp(void) | ||
2509 | |||
2510 | static __init int hardware_setup(void) | ||
2511 | { | ||
2512 | - int r = -ENOMEM, i, msr; | ||
2513 | + int r = -ENOMEM, i; | ||
2514 | |||
2515 | rdmsrl_safe(MSR_EFER, &host_efer); | ||
2516 | |||
2517 | @@ -6760,9 +6957,6 @@ static __init int hardware_setup(void) | ||
2518 | |||
2519 | memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); | ||
2520 | |||
2521 | - memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); | ||
2522 | - memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); | ||
2523 | - | ||
2524 | if (setup_vmcs_config(&vmcs_config) < 0) { | ||
2525 | r = -EIO; | ||
2526 | goto out; | ||
2527 | @@ -6825,42 +7019,8 @@ static __init int hardware_setup(void) | ||
2528 | kvm_tsc_scaling_ratio_frac_bits = 48; | ||
2529 | } | ||
2530 | |||
2531 | - vmx_disable_intercept_for_msr(MSR_FS_BASE, false); | ||
2532 | - vmx_disable_intercept_for_msr(MSR_GS_BASE, false); | ||
2533 | - vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); | ||
2534 | - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); | ||
2535 | - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); | ||
2536 | - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); | ||
2537 | - | ||
2538 | - memcpy(vmx_msr_bitmap_legacy_x2apic_apicv, | ||
2539 | - vmx_msr_bitmap_legacy, PAGE_SIZE); | ||
2540 | - memcpy(vmx_msr_bitmap_longmode_x2apic_apicv, | ||
2541 | - vmx_msr_bitmap_longmode, PAGE_SIZE); | ||
2542 | - memcpy(vmx_msr_bitmap_legacy_x2apic, | ||
2543 | - vmx_msr_bitmap_legacy, PAGE_SIZE); | ||
2544 | - memcpy(vmx_msr_bitmap_longmode_x2apic, | ||
2545 | - vmx_msr_bitmap_longmode, PAGE_SIZE); | ||
2546 | - | ||
2547 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ | ||
2548 | |||
2549 | - for (msr = 0x800; msr <= 0x8ff; msr++) { | ||
2550 | - if (msr == 0x839 /* TMCCT */) | ||
2551 | - continue; | ||
2552 | - vmx_disable_intercept_msr_x2apic(msr, MSR_TYPE_R, true); | ||
2553 | - } | ||
2554 | - | ||
2555 | - /* | ||
2556 | - * TPR reads and writes can be virtualized even if virtual interrupt | ||
2557 | - * delivery is not in use. | ||
2558 | - */ | ||
2559 | - vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_W, true); | ||
2560 | - vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_R | MSR_TYPE_W, false); | ||
2561 | - | ||
2562 | - /* EOI */ | ||
2563 | - vmx_disable_intercept_msr_x2apic(0x80b, MSR_TYPE_W, true); | ||
2564 | - /* SELF-IPI */ | ||
2565 | - vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true); | ||
2566 | - | ||
2567 | if (enable_ept) | ||
2568 | vmx_enable_tdp(); | ||
2569 | else | ||
2570 | @@ -6963,94 +7123,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu) | ||
2571 | return handle_nop(vcpu); | ||
2572 | } | ||
2573 | |||
2574 | -/* | ||
2575 | - * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. | ||
2576 | - * We could reuse a single VMCS for all the L2 guests, but we also want the | ||
2577 | - * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this | ||
2578 | - * allows keeping them loaded on the processor, and in the future will allow | ||
2579 | - * optimizations where prepare_vmcs02 doesn't need to set all the fields on | ||
2580 | - * every entry if they never change. | ||
2581 | - * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE | ||
2582 | - * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. | ||
2583 | - * | ||
2584 | - * The following functions allocate and free a vmcs02 in this pool. | ||
2585 | - */ | ||
2586 | - | ||
2587 | -/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */ | ||
2588 | -static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) | ||
2589 | -{ | ||
2590 | - struct vmcs02_list *item; | ||
2591 | - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) | ||
2592 | - if (item->vmptr == vmx->nested.current_vmptr) { | ||
2593 | - list_move(&item->list, &vmx->nested.vmcs02_pool); | ||
2594 | - return &item->vmcs02; | ||
2595 | - } | ||
2596 | - | ||
2597 | - if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { | ||
2598 | - /* Recycle the least recently used VMCS. */ | ||
2599 | - item = list_last_entry(&vmx->nested.vmcs02_pool, | ||
2600 | - struct vmcs02_list, list); | ||
2601 | - item->vmptr = vmx->nested.current_vmptr; | ||
2602 | - list_move(&item->list, &vmx->nested.vmcs02_pool); | ||
2603 | - return &item->vmcs02; | ||
2604 | - } | ||
2605 | - | ||
2606 | - /* Create a new VMCS */ | ||
2607 | - item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL); | ||
2608 | - if (!item) | ||
2609 | - return NULL; | ||
2610 | - item->vmcs02.vmcs = alloc_vmcs(); | ||
2611 | - item->vmcs02.shadow_vmcs = NULL; | ||
2612 | - if (!item->vmcs02.vmcs) { | ||
2613 | - kfree(item); | ||
2614 | - return NULL; | ||
2615 | - } | ||
2616 | - loaded_vmcs_init(&item->vmcs02); | ||
2617 | - item->vmptr = vmx->nested.current_vmptr; | ||
2618 | - list_add(&(item->list), &(vmx->nested.vmcs02_pool)); | ||
2619 | - vmx->nested.vmcs02_num++; | ||
2620 | - return &item->vmcs02; | ||
2621 | -} | ||
2622 | - | ||
2623 | -/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ | ||
2624 | -static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) | ||
2625 | -{ | ||
2626 | - struct vmcs02_list *item; | ||
2627 | - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) | ||
2628 | - if (item->vmptr == vmptr) { | ||
2629 | - free_loaded_vmcs(&item->vmcs02); | ||
2630 | - list_del(&item->list); | ||
2631 | - kfree(item); | ||
2632 | - vmx->nested.vmcs02_num--; | ||
2633 | - return; | ||
2634 | - } | ||
2635 | -} | ||
2636 | - | ||
2637 | -/* | ||
2638 | - * Free all VMCSs saved for this vcpu, except the one pointed by | ||
2639 | - * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs | ||
2640 | - * must be &vmx->vmcs01. | ||
2641 | - */ | ||
2642 | -static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) | ||
2643 | -{ | ||
2644 | - struct vmcs02_list *item, *n; | ||
2645 | - | ||
2646 | - WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); | ||
2647 | - list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { | ||
2648 | - /* | ||
2649 | - * Something will leak if the above WARN triggers. Better than | ||
2650 | - * a use-after-free. | ||
2651 | - */ | ||
2652 | - if (vmx->loaded_vmcs == &item->vmcs02) | ||
2653 | - continue; | ||
2654 | - | ||
2655 | - free_loaded_vmcs(&item->vmcs02); | ||
2656 | - list_del(&item->list); | ||
2657 | - kfree(item); | ||
2658 | - vmx->nested.vmcs02_num--; | ||
2659 | - } | ||
2660 | -} | ||
2661 | - | ||
2662 | /* | ||
2663 | * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), | ||
2664 | * set the success or error code of an emulated VMX instruction, as specified | ||
2665 | @@ -7231,13 +7303,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) | ||
2666 | { | ||
2667 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2668 | struct vmcs *shadow_vmcs; | ||
2669 | + int r; | ||
2670 | |||
2671 | - if (cpu_has_vmx_msr_bitmap()) { | ||
2672 | - vmx->nested.msr_bitmap = | ||
2673 | - (unsigned long *)__get_free_page(GFP_KERNEL); | ||
2674 | - if (!vmx->nested.msr_bitmap) | ||
2675 | - goto out_msr_bitmap; | ||
2676 | - } | ||
2677 | + r = alloc_loaded_vmcs(&vmx->nested.vmcs02); | ||
2678 | + if (r < 0) | ||
2679 | + goto out_vmcs02; | ||
2680 | |||
2681 | vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); | ||
2682 | if (!vmx->nested.cached_vmcs12) | ||
2683 | @@ -7254,9 +7324,6 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) | ||
2684 | vmx->vmcs01.shadow_vmcs = shadow_vmcs; | ||
2685 | } | ||
2686 | |||
2687 | - INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); | ||
2688 | - vmx->nested.vmcs02_num = 0; | ||
2689 | - | ||
2690 | hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, | ||
2691 | HRTIMER_MODE_REL_PINNED); | ||
2692 | vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; | ||
2693 | @@ -7268,9 +7335,9 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) | ||
2694 | kfree(vmx->nested.cached_vmcs12); | ||
2695 | |||
2696 | out_cached_vmcs12: | ||
2697 | - free_page((unsigned long)vmx->nested.msr_bitmap); | ||
2698 | + free_loaded_vmcs(&vmx->nested.vmcs02); | ||
2699 | |||
2700 | -out_msr_bitmap: | ||
2701 | +out_vmcs02: | ||
2702 | return -ENOMEM; | ||
2703 | } | ||
2704 | |||
2705 | @@ -7412,10 +7479,6 @@ static void free_nested(struct vcpu_vmx *vmx) | ||
2706 | free_vpid(vmx->nested.vpid02); | ||
2707 | vmx->nested.posted_intr_nv = -1; | ||
2708 | vmx->nested.current_vmptr = -1ull; | ||
2709 | - if (vmx->nested.msr_bitmap) { | ||
2710 | - free_page((unsigned long)vmx->nested.msr_bitmap); | ||
2711 | - vmx->nested.msr_bitmap = NULL; | ||
2712 | - } | ||
2713 | if (enable_shadow_vmcs) { | ||
2714 | vmx_disable_shadow_vmcs(vmx); | ||
2715 | vmcs_clear(vmx->vmcs01.shadow_vmcs); | ||
2716 | @@ -7423,7 +7486,7 @@ static void free_nested(struct vcpu_vmx *vmx) | ||
2717 | vmx->vmcs01.shadow_vmcs = NULL; | ||
2718 | } | ||
2719 | kfree(vmx->nested.cached_vmcs12); | ||
2720 | - /* Unpin physical memory we referred to in current vmcs02 */ | ||
2721 | + /* Unpin physical memory we referred to in the vmcs02 */ | ||
2722 | if (vmx->nested.apic_access_page) { | ||
2723 | kvm_release_page_dirty(vmx->nested.apic_access_page); | ||
2724 | vmx->nested.apic_access_page = NULL; | ||
2725 | @@ -7439,7 +7502,7 @@ static void free_nested(struct vcpu_vmx *vmx) | ||
2726 | vmx->nested.pi_desc = NULL; | ||
2727 | } | ||
2728 | |||
2729 | - nested_free_all_saved_vmcss(vmx); | ||
2730 | + free_loaded_vmcs(&vmx->nested.vmcs02); | ||
2731 | } | ||
2732 | |||
2733 | /* Emulate the VMXOFF instruction */ | ||
2734 | @@ -7482,8 +7545,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) | ||
2735 | vmptr + offsetof(struct vmcs12, launch_state), | ||
2736 | &zero, sizeof(zero)); | ||
2737 | |||
2738 | - nested_free_vmcs02(vmx, vmptr); | ||
2739 | - | ||
2740 | nested_vmx_succeed(vcpu); | ||
2741 | return kvm_skip_emulated_instruction(vcpu); | ||
2742 | } | ||
2743 | @@ -8395,10 +8456,11 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) | ||
2744 | |||
2745 | /* | ||
2746 | * The host physical addresses of some pages of guest memory | ||
2747 | - * are loaded into VMCS02 (e.g. L1's Virtual APIC Page). The CPU | ||
2748 | - * may write to these pages via their host physical address while | ||
2749 | - * L2 is running, bypassing any address-translation-based dirty | ||
2750 | - * tracking (e.g. EPT write protection). | ||
2751 | + * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC | ||
2752 | + * Page). The CPU may write to these pages via their host | ||
2753 | + * physical address while L2 is running, bypassing any | ||
2754 | + * address-translation-based dirty tracking (e.g. EPT write | ||
2755 | + * protection). | ||
2756 | * | ||
2757 | * Mark them dirty on every exit from L2 to prevent them from | ||
2758 | * getting out of sync with dirty tracking. | ||
2759 | @@ -8932,7 +8994,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) | ||
2760 | } | ||
2761 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); | ||
2762 | |||
2763 | - vmx_set_msr_bitmap(vcpu); | ||
2764 | + vmx_update_msr_bitmap(vcpu); | ||
2765 | } | ||
2766 | |||
2767 | static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) | ||
2768 | @@ -9118,14 +9180,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) | ||
2769 | #endif | ||
2770 | "pushf\n\t" | ||
2771 | __ASM_SIZE(push) " $%c[cs]\n\t" | ||
2772 | - "call *%[entry]\n\t" | ||
2773 | + CALL_NOSPEC | ||
2774 | : | ||
2775 | #ifdef CONFIG_X86_64 | ||
2776 | [sp]"=&r"(tmp), | ||
2777 | #endif | ||
2778 | ASM_CALL_CONSTRAINT | ||
2779 | : | ||
2780 | - [entry]"r"(entry), | ||
2781 | + THUNK_TARGET(entry), | ||
2782 | [ss]"i"(__KERNEL_DS), | ||
2783 | [cs]"i"(__KERNEL_CS) | ||
2784 | ); | ||
2785 | @@ -9362,6 +9424,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | ||
2786 | |||
2787 | vmx_arm_hv_timer(vcpu); | ||
2788 | |||
2789 | + /* | ||
2790 | + * If this vCPU has touched SPEC_CTRL, restore the guest's value if | ||
2791 | + * it's non-zero. Since vmentry is serialising on affected CPUs, there | ||
2792 | + * is no need to worry about the conditional branch over the wrmsr | ||
2793 | + * being speculatively taken. | ||
2794 | + */ | ||
2795 | + if (vmx->spec_ctrl) | ||
2796 | + wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); | ||
2797 | + | ||
2798 | vmx->__launched = vmx->loaded_vmcs->launched; | ||
2799 | asm( | ||
2800 | /* Store host registers */ | ||
2801 | @@ -9480,6 +9551,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | ||
2802 | #endif | ||
2803 | ); | ||
2804 | |||
2805 | + /* | ||
2806 | + * We do not use IBRS in the kernel. If this vCPU has used the | ||
2807 | + * SPEC_CTRL MSR it may have left it on; save the value and | ||
2808 | + * turn it off. This is much more efficient than blindly adding | ||
2809 | + * it to the atomic save/restore list. Especially as the former | ||
2810 | + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. | ||
2811 | + * | ||
2812 | + * For non-nested case: | ||
2813 | + * If the L01 MSR bitmap does not intercept the MSR, then we need to | ||
2814 | + * save it. | ||
2815 | + * | ||
2816 | + * For nested case: | ||
2817 | + * If the L02 MSR bitmap does not intercept the MSR, then we need to | ||
2818 | + * save it. | ||
2819 | + */ | ||
2820 | + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) | ||
2821 | + rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); | ||
2822 | + | ||
2823 | + if (vmx->spec_ctrl) | ||
2824 | + wrmsrl(MSR_IA32_SPEC_CTRL, 0); | ||
2825 | + | ||
2826 | /* Eliminate branch target predictions from guest mode */ | ||
2827 | vmexit_fill_RSB(); | ||
2828 | |||
2829 | @@ -9594,6 +9686,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | ||
2830 | { | ||
2831 | int err; | ||
2832 | struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); | ||
2833 | + unsigned long *msr_bitmap; | ||
2834 | int cpu; | ||
2835 | |||
2836 | if (!vmx) | ||
2837 | @@ -9626,13 +9719,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | ||
2838 | if (!vmx->guest_msrs) | ||
2839 | goto free_pml; | ||
2840 | |||
2841 | - vmx->loaded_vmcs = &vmx->vmcs01; | ||
2842 | - vmx->loaded_vmcs->vmcs = alloc_vmcs(); | ||
2843 | - vmx->loaded_vmcs->shadow_vmcs = NULL; | ||
2844 | - if (!vmx->loaded_vmcs->vmcs) | ||
2845 | + err = alloc_loaded_vmcs(&vmx->vmcs01); | ||
2846 | + if (err < 0) | ||
2847 | goto free_msrs; | ||
2848 | - loaded_vmcs_init(vmx->loaded_vmcs); | ||
2849 | |||
2850 | + msr_bitmap = vmx->vmcs01.msr_bitmap; | ||
2851 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); | ||
2852 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); | ||
2853 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); | ||
2854 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); | ||
2855 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); | ||
2856 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); | ||
2857 | + vmx->msr_bitmap_mode = 0; | ||
2858 | + | ||
2859 | + vmx->loaded_vmcs = &vmx->vmcs01; | ||
2860 | cpu = get_cpu(); | ||
2861 | vmx_vcpu_load(&vmx->vcpu, cpu); | ||
2862 | vmx->vcpu.cpu = cpu; | ||
2863 | @@ -10101,10 +10201,25 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, | ||
2864 | int msr; | ||
2865 | struct page *page; | ||
2866 | unsigned long *msr_bitmap_l1; | ||
2867 | - unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; | ||
2868 | + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; | ||
2869 | + /* | ||
2870 | + * pred_cmd & spec_ctrl are trying to verify two things: | ||
2871 | + * | ||
2872 | + * 1. L0 gave a permission to L1 to actually passthrough the MSR. This | ||
2873 | + * ensures that we do not accidentally generate an L02 MSR bitmap | ||
2874 | + * from the L12 MSR bitmap that is too permissive. | ||
2875 | + * 2. That L1 or L2s have actually used the MSR. This avoids | ||
2876 | + * unnecessarily merging of the bitmap if the MSR is unused. This | ||
2877 | + * works properly because we only update the L01 MSR bitmap lazily. | ||
2878 | + * So even if L0 should pass L1 these MSRs, the L01 bitmap is only | ||
2879 | + * updated to reflect this when L1 (or its L2s) actually write to | ||
2880 | + * the MSR. | ||
2881 | + */ | ||
2882 | + bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); | ||
2883 | + bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); | ||
2884 | |||
2885 | - /* This shortcut is ok because we support only x2APIC MSRs so far. */ | ||
2886 | - if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) | ||
2887 | + if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && | ||
2888 | + !pred_cmd && !spec_ctrl) | ||
2889 | return false; | ||
2890 | |||
2891 | page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->msr_bitmap); | ||
2892 | @@ -10137,6 +10252,19 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, | ||
2893 | MSR_TYPE_W); | ||
2894 | } | ||
2895 | } | ||
2896 | + | ||
2897 | + if (spec_ctrl) | ||
2898 | + nested_vmx_disable_intercept_for_msr( | ||
2899 | + msr_bitmap_l1, msr_bitmap_l0, | ||
2900 | + MSR_IA32_SPEC_CTRL, | ||
2901 | + MSR_TYPE_R | MSR_TYPE_W); | ||
2902 | + | ||
2903 | + if (pred_cmd) | ||
2904 | + nested_vmx_disable_intercept_for_msr( | ||
2905 | + msr_bitmap_l1, msr_bitmap_l0, | ||
2906 | + MSR_IA32_PRED_CMD, | ||
2907 | + MSR_TYPE_W); | ||
2908 | + | ||
2909 | kunmap(page); | ||
2910 | kvm_release_page_clean(page); | ||
2911 | |||
2912 | @@ -10678,6 +10806,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | ||
2913 | if (kvm_has_tsc_control) | ||
2914 | decache_tsc_multiplier(vmx); | ||
2915 | |||
2916 | + if (cpu_has_vmx_msr_bitmap()) | ||
2917 | + vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); | ||
2918 | + | ||
2919 | if (enable_vpid) { | ||
2920 | /* | ||
2921 | * There is no direct mapping between vpid02 and vpid12, the | ||
2922 | @@ -10894,20 +11025,15 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) | ||
2923 | { | ||
2924 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2925 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
2926 | - struct loaded_vmcs *vmcs02; | ||
2927 | u32 msr_entry_idx; | ||
2928 | u32 exit_qual; | ||
2929 | |||
2930 | - vmcs02 = nested_get_current_vmcs02(vmx); | ||
2931 | - if (!vmcs02) | ||
2932 | - return -ENOMEM; | ||
2933 | - | ||
2934 | enter_guest_mode(vcpu); | ||
2935 | |||
2936 | if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) | ||
2937 | vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); | ||
2938 | |||
2939 | - vmx_switch_vmcs(vcpu, vmcs02); | ||
2940 | + vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); | ||
2941 | vmx_segment_cache_clear(vmx); | ||
2942 | |||
2943 | if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { | ||
2944 | @@ -11476,7 +11602,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | ||
2945 | vmcs_write64(GUEST_IA32_DEBUGCTL, 0); | ||
2946 | |||
2947 | if (cpu_has_vmx_msr_bitmap()) | ||
2948 | - vmx_set_msr_bitmap(vcpu); | ||
2949 | + vmx_update_msr_bitmap(vcpu); | ||
2950 | |||
2951 | if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, | ||
2952 | vmcs12->vm_exit_msr_load_count)) | ||
2953 | @@ -11522,10 +11648,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | ||
2954 | vm_exit_controls_reset_shadow(vmx); | ||
2955 | vmx_segment_cache_clear(vmx); | ||
2956 | |||
2957 | - /* if no vmcs02 cache requested, remove the one we used */ | ||
2958 | - if (VMCS02_POOL_SIZE == 0) | ||
2959 | - nested_free_vmcs02(vmx, vmx->nested.current_vmptr); | ||
2960 | - | ||
2961 | /* Update any VMCS fields that might have changed while L2 ran */ | ||
2962 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr); | ||
2963 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr); | ||
2964 | diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c | ||
2965 | index 8c28023a43b1..f97358423f9c 100644 | ||
2966 | --- a/arch/x86/kvm/x86.c | ||
2967 | +++ b/arch/x86/kvm/x86.c | ||
2968 | @@ -1006,6 +1006,7 @@ static u32 msrs_to_save[] = { | ||
2969 | #endif | ||
2970 | MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, | ||
2971 | MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, | ||
2972 | + MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES | ||
2973 | }; | ||
2974 | |||
2975 | static unsigned num_msrs_to_save; | ||
2976 | diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile | ||
2977 | index d435c89875c1..d0a3170e6804 100644 | ||
2978 | --- a/arch/x86/lib/Makefile | ||
2979 | +++ b/arch/x86/lib/Makefile | ||
2980 | @@ -27,6 +27,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o | ||
2981 | lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o | ||
2982 | lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o | ||
2983 | lib-$(CONFIG_RETPOLINE) += retpoline.o | ||
2984 | +OBJECT_FILES_NON_STANDARD_retpoline.o :=y | ||
2985 | |||
2986 | obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o | ||
2987 | |||
2988 | diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S | ||
2989 | index c97d935a29e8..49b167f73215 100644 | ||
2990 | --- a/arch/x86/lib/getuser.S | ||
2991 | +++ b/arch/x86/lib/getuser.S | ||
2992 | @@ -40,6 +40,8 @@ ENTRY(__get_user_1) | ||
2993 | mov PER_CPU_VAR(current_task), %_ASM_DX | ||
2994 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX | ||
2995 | jae bad_get_user | ||
2996 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ | ||
2997 | + and %_ASM_DX, %_ASM_AX | ||
2998 | ASM_STAC | ||
2999 | 1: movzbl (%_ASM_AX),%edx | ||
3000 | xor %eax,%eax | ||
3001 | @@ -54,6 +56,8 @@ ENTRY(__get_user_2) | ||
3002 | mov PER_CPU_VAR(current_task), %_ASM_DX | ||
3003 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX | ||
3004 | jae bad_get_user | ||
3005 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ | ||
3006 | + and %_ASM_DX, %_ASM_AX | ||
3007 | ASM_STAC | ||
3008 | 2: movzwl -1(%_ASM_AX),%edx | ||
3009 | xor %eax,%eax | ||
3010 | @@ -68,6 +72,8 @@ ENTRY(__get_user_4) | ||
3011 | mov PER_CPU_VAR(current_task), %_ASM_DX | ||
3012 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX | ||
3013 | jae bad_get_user | ||
3014 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ | ||
3015 | + and %_ASM_DX, %_ASM_AX | ||
3016 | ASM_STAC | ||
3017 | 3: movl -3(%_ASM_AX),%edx | ||
3018 | xor %eax,%eax | ||
3019 | @@ -83,6 +89,8 @@ ENTRY(__get_user_8) | ||
3020 | mov PER_CPU_VAR(current_task), %_ASM_DX | ||
3021 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX | ||
3022 | jae bad_get_user | ||
3023 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ | ||
3024 | + and %_ASM_DX, %_ASM_AX | ||
3025 | ASM_STAC | ||
3026 | 4: movq -7(%_ASM_AX),%rdx | ||
3027 | xor %eax,%eax | ||
3028 | @@ -94,6 +102,8 @@ ENTRY(__get_user_8) | ||
3029 | mov PER_CPU_VAR(current_task), %_ASM_DX | ||
3030 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX | ||
3031 | jae bad_get_user_8 | ||
3032 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ | ||
3033 | + and %_ASM_DX, %_ASM_AX | ||
3034 | ASM_STAC | ||
3035 | 4: movl -7(%_ASM_AX),%edx | ||
3036 | 5: movl -3(%_ASM_AX),%ecx | ||
3037 | diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S | ||
3038 | index dfb2ba91b670..480edc3a5e03 100644 | ||
3039 | --- a/arch/x86/lib/retpoline.S | ||
3040 | +++ b/arch/x86/lib/retpoline.S | ||
3041 | @@ -7,6 +7,7 @@ | ||
3042 | #include <asm/alternative-asm.h> | ||
3043 | #include <asm/export.h> | ||
3044 | #include <asm/nospec-branch.h> | ||
3045 | +#include <asm/bitsperlong.h> | ||
3046 | |||
3047 | .macro THUNK reg | ||
3048 | .section .text.__x86.indirect_thunk | ||
3049 | @@ -36,7 +37,6 @@ GENERATE_THUNK(_ASM_DX) | ||
3050 | GENERATE_THUNK(_ASM_SI) | ||
3051 | GENERATE_THUNK(_ASM_DI) | ||
3052 | GENERATE_THUNK(_ASM_BP) | ||
3053 | -GENERATE_THUNK(_ASM_SP) | ||
3054 | #ifdef CONFIG_64BIT | ||
3055 | GENERATE_THUNK(r8) | ||
3056 | GENERATE_THUNK(r9) | ||
3057 | @@ -47,3 +47,58 @@ GENERATE_THUNK(r13) | ||
3058 | GENERATE_THUNK(r14) | ||
3059 | GENERATE_THUNK(r15) | ||
3060 | #endif | ||
3061 | + | ||
3062 | +/* | ||
3063 | + * Fill the CPU return stack buffer. | ||
3064 | + * | ||
3065 | + * Each entry in the RSB, if used for a speculative 'ret', contains an | ||
3066 | + * infinite 'pause; lfence; jmp' loop to capture speculative execution. | ||
3067 | + * | ||
3068 | + * This is required in various cases for retpoline and IBRS-based | ||
3069 | + * mitigations for the Spectre variant 2 vulnerability. Sometimes to | ||
3070 | + * eliminate potentially bogus entries from the RSB, and sometimes | ||
3071 | + * purely to ensure that it doesn't get empty, which on some CPUs would | ||
3072 | + * allow predictions from other (unwanted!) sources to be used. | ||
3073 | + * | ||
3074 | + * Google experimented with loop-unrolling and this turned out to be | ||
3075 | + * the optimal version - two calls, each with their own speculation | ||
3076 | + * trap should their return address end up getting used, in a loop. | ||
3077 | + */ | ||
3078 | +.macro STUFF_RSB nr:req sp:req | ||
3079 | + mov $(\nr / 2), %_ASM_BX | ||
3080 | + .align 16 | ||
3081 | +771: | ||
3082 | + call 772f | ||
3083 | +773: /* speculation trap */ | ||
3084 | + pause | ||
3085 | + lfence | ||
3086 | + jmp 773b | ||
3087 | + .align 16 | ||
3088 | +772: | ||
3089 | + call 774f | ||
3090 | +775: /* speculation trap */ | ||
3091 | + pause | ||
3092 | + lfence | ||
3093 | + jmp 775b | ||
3094 | + .align 16 | ||
3095 | +774: | ||
3096 | + dec %_ASM_BX | ||
3097 | + jnz 771b | ||
3098 | + add $((BITS_PER_LONG/8) * \nr), \sp | ||
3099 | +.endm | ||
3100 | + | ||
3101 | +#define RSB_FILL_LOOPS 16 /* To avoid underflow */ | ||
3102 | + | ||
3103 | +ENTRY(__fill_rsb) | ||
3104 | + STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP | ||
3105 | + ret | ||
3106 | +END(__fill_rsb) | ||
3107 | +EXPORT_SYMBOL_GPL(__fill_rsb) | ||
3108 | + | ||
3109 | +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ | ||
3110 | + | ||
3111 | +ENTRY(__clear_rsb) | ||
3112 | + STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP | ||
3113 | + ret | ||
3114 | +END(__clear_rsb) | ||
3115 | +EXPORT_SYMBOL_GPL(__clear_rsb) | ||
3116 | diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c | ||
3117 | index 1b377f734e64..7add8ba06887 100644 | ||
3118 | --- a/arch/x86/lib/usercopy_32.c | ||
3119 | +++ b/arch/x86/lib/usercopy_32.c | ||
3120 | @@ -331,12 +331,12 @@ do { \ | ||
3121 | |||
3122 | unsigned long __copy_user_ll(void *to, const void *from, unsigned long n) | ||
3123 | { | ||
3124 | - stac(); | ||
3125 | + __uaccess_begin_nospec(); | ||
3126 | if (movsl_is_ok(to, from, n)) | ||
3127 | __copy_user(to, from, n); | ||
3128 | else | ||
3129 | n = __copy_user_intel(to, from, n); | ||
3130 | - clac(); | ||
3131 | + __uaccess_end(); | ||
3132 | return n; | ||
3133 | } | ||
3134 | EXPORT_SYMBOL(__copy_user_ll); | ||
3135 | @@ -344,7 +344,7 @@ EXPORT_SYMBOL(__copy_user_ll); | ||
3136 | unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, | ||
3137 | unsigned long n) | ||
3138 | { | ||
3139 | - stac(); | ||
3140 | + __uaccess_begin_nospec(); | ||
3141 | #ifdef CONFIG_X86_INTEL_USERCOPY | ||
3142 | if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) | ||
3143 | n = __copy_user_intel_nocache(to, from, n); | ||
3144 | @@ -353,7 +353,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr | ||
3145 | #else | ||
3146 | __copy_user(to, from, n); | ||
3147 | #endif | ||
3148 | - clac(); | ||
3149 | + __uaccess_end(); | ||
3150 | return n; | ||
3151 | } | ||
3152 | EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); | ||
3153 | diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c | ||
3154 | index 5bfe61a5e8e3..012d02624848 100644 | ||
3155 | --- a/arch/x86/mm/tlb.c | ||
3156 | +++ b/arch/x86/mm/tlb.c | ||
3157 | @@ -6,13 +6,14 @@ | ||
3158 | #include <linux/interrupt.h> | ||
3159 | #include <linux/export.h> | ||
3160 | #include <linux/cpu.h> | ||
3161 | +#include <linux/debugfs.h> | ||
3162 | |||
3163 | #include <asm/tlbflush.h> | ||
3164 | #include <asm/mmu_context.h> | ||
3165 | +#include <asm/nospec-branch.h> | ||
3166 | #include <asm/cache.h> | ||
3167 | #include <asm/apic.h> | ||
3168 | #include <asm/uv/uv.h> | ||
3169 | -#include <linux/debugfs.h> | ||
3170 | |||
3171 | /* | ||
3172 | * TLB flushing, formerly SMP-only | ||
3173 | @@ -247,6 +248,27 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | ||
3174 | } else { | ||
3175 | u16 new_asid; | ||
3176 | bool need_flush; | ||
3177 | + u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); | ||
3178 | + | ||
3179 | + /* | ||
3180 | + * Avoid user/user BTB poisoning by flushing the branch | ||
3181 | + * predictor when switching between processes. This stops | ||
3182 | + * one process from doing Spectre-v2 attacks on another. | ||
3183 | + * | ||
3184 | + * As an optimization, flush indirect branches only when | ||
3185 | + * switching into processes that disable dumping. This | ||
3186 | + * protects high value processes like gpg, without having | ||
3187 | + * too high performance overhead. IBPB is *expensive*! | ||
3188 | + * | ||
3189 | + * This will not flush branches when switching into kernel | ||
3190 | + * threads. It will also not flush if we switch to idle | ||
3191 | + * thread and back to the same process. It will flush if we | ||
3192 | + * switch to a different non-dumpable process. | ||
3193 | + */ | ||
3194 | + if (tsk && tsk->mm && | ||
3195 | + tsk->mm->context.ctx_id != last_ctx_id && | ||
3196 | + get_dumpable(tsk->mm) != SUID_DUMP_USER) | ||
3197 | + indirect_branch_prediction_barrier(); | ||
3198 | |||
3199 | if (IS_ENABLED(CONFIG_VMAP_STACK)) { | ||
3200 | /* | ||
3201 | @@ -292,6 +314,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, | ||
3202 | trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); | ||
3203 | } | ||
3204 | |||
3205 | + /* | ||
3206 | + * Record last user mm's context id, so we can avoid | ||
3207 | + * flushing branch buffer with IBPB if we switch back | ||
3208 | + * to the same user. | ||
3209 | + */ | ||
3210 | + if (next != &init_mm) | ||
3211 | + this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); | ||
3212 | + | ||
3213 | this_cpu_write(cpu_tlbstate.loaded_mm, next); | ||
3214 | this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); | ||
3215 | } | ||
3216 | @@ -369,6 +399,7 @@ void initialize_tlbstate_and_flush(void) | ||
3217 | write_cr3(build_cr3(mm->pgd, 0)); | ||
3218 | |||
3219 | /* Reinitialize tlbstate. */ | ||
3220 | + this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id); | ||
3221 | this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); | ||
3222 | this_cpu_write(cpu_tlbstate.next_asid, 1); | ||
3223 | this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); | ||
3224 | diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c | ||
3225 | index a9020f82eea7..58403052514f 100644 | ||
3226 | --- a/drivers/auxdisplay/img-ascii-lcd.c | ||
3227 | +++ b/drivers/auxdisplay/img-ascii-lcd.c | ||
3228 | @@ -443,3 +443,7 @@ static struct platform_driver img_ascii_lcd_driver = { | ||
3229 | .remove = img_ascii_lcd_remove, | ||
3230 | }; | ||
3231 | module_platform_driver(img_ascii_lcd_driver); | ||
3232 | + | ||
3233 | +MODULE_DESCRIPTION("Imagination Technologies ASCII LCD Display"); | ||
3234 | +MODULE_AUTHOR("Paul Burton <paul.burton@mips.com>"); | ||
3235 | +MODULE_LICENSE("GPL"); | ||
3236 | diff --git a/drivers/fpga/fpga-region.c b/drivers/fpga/fpga-region.c | ||
3237 | index d9ab7c75b14f..e0c73ceba2ed 100644 | ||
3238 | --- a/drivers/fpga/fpga-region.c | ||
3239 | +++ b/drivers/fpga/fpga-region.c | ||
3240 | @@ -147,6 +147,7 @@ static struct fpga_manager *fpga_region_get_manager(struct fpga_region *region) | ||
3241 | mgr_node = of_parse_phandle(np, "fpga-mgr", 0); | ||
3242 | if (mgr_node) { | ||
3243 | mgr = of_fpga_mgr_get(mgr_node); | ||
3244 | + of_node_put(mgr_node); | ||
3245 | of_node_put(np); | ||
3246 | return mgr; | ||
3247 | } | ||
3248 | @@ -192,10 +193,13 @@ static int fpga_region_get_bridges(struct fpga_region *region, | ||
3249 | parent_br = region_np->parent; | ||
3250 | |||
3251 | /* If overlay has a list of bridges, use it. */ | ||
3252 | - if (of_parse_phandle(overlay, "fpga-bridges", 0)) | ||
3253 | + br = of_parse_phandle(overlay, "fpga-bridges", 0); | ||
3254 | + if (br) { | ||
3255 | + of_node_put(br); | ||
3256 | np = overlay; | ||
3257 | - else | ||
3258 | + } else { | ||
3259 | np = region_np; | ||
3260 | + } | ||
3261 | |||
3262 | for (i = 0; ; i++) { | ||
3263 | br = of_parse_phandle(np, "fpga-bridges", i); | ||
3264 | @@ -203,12 +207,15 @@ static int fpga_region_get_bridges(struct fpga_region *region, | ||
3265 | break; | ||
3266 | |||
3267 | /* If parent bridge is in list, skip it. */ | ||
3268 | - if (br == parent_br) | ||
3269 | + if (br == parent_br) { | ||
3270 | + of_node_put(br); | ||
3271 | continue; | ||
3272 | + } | ||
3273 | |||
3274 | /* If node is a bridge, get it and add to list */ | ||
3275 | ret = fpga_bridge_get_to_list(br, region->info, | ||
3276 | ®ion->bridge_list); | ||
3277 | + of_node_put(br); | ||
3278 | |||
3279 | /* If any of the bridges are in use, give up */ | ||
3280 | if (ret == -EBUSY) { | ||
3281 | diff --git a/drivers/iio/accel/kxsd9-i2c.c b/drivers/iio/accel/kxsd9-i2c.c | ||
3282 | index 98fbb628d5bd..38411e1c155b 100644 | ||
3283 | --- a/drivers/iio/accel/kxsd9-i2c.c | ||
3284 | +++ b/drivers/iio/accel/kxsd9-i2c.c | ||
3285 | @@ -63,3 +63,6 @@ static struct i2c_driver kxsd9_i2c_driver = { | ||
3286 | .id_table = kxsd9_i2c_id, | ||
3287 | }; | ||
3288 | module_i2c_driver(kxsd9_i2c_driver); | ||
3289 | + | ||
3290 | +MODULE_LICENSE("GPL v2"); | ||
3291 | +MODULE_DESCRIPTION("KXSD9 accelerometer I2C interface"); | ||
3292 | diff --git a/drivers/iio/adc/qcom-vadc-common.c b/drivers/iio/adc/qcom-vadc-common.c | ||
3293 | index 47d24ae5462f..fe3d7826783c 100644 | ||
3294 | --- a/drivers/iio/adc/qcom-vadc-common.c | ||
3295 | +++ b/drivers/iio/adc/qcom-vadc-common.c | ||
3296 | @@ -5,6 +5,7 @@ | ||
3297 | #include <linux/math64.h> | ||
3298 | #include <linux/log2.h> | ||
3299 | #include <linux/err.h> | ||
3300 | +#include <linux/module.h> | ||
3301 | |||
3302 | #include "qcom-vadc-common.h" | ||
3303 | |||
3304 | @@ -229,3 +230,6 @@ int qcom_vadc_decimation_from_dt(u32 value) | ||
3305 | return __ffs64(value / VADC_DECIMATION_MIN); | ||
3306 | } | ||
3307 | EXPORT_SYMBOL(qcom_vadc_decimation_from_dt); | ||
3308 | + | ||
3309 | +MODULE_LICENSE("GPL v2"); | ||
3310 | +MODULE_DESCRIPTION("Qualcomm ADC common functionality"); | ||
3311 | diff --git a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c | ||
3312 | index 866aa3ce1ac9..6cf0006d4c8d 100644 | ||
3313 | --- a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c | ||
3314 | +++ b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c | ||
3315 | @@ -436,3 +436,7 @@ int pxa2xx_pinctrl_exit(struct platform_device *pdev) | ||
3316 | return 0; | ||
3317 | } | ||
3318 | EXPORT_SYMBOL_GPL(pxa2xx_pinctrl_exit); | ||
3319 | + | ||
3320 | +MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>"); | ||
3321 | +MODULE_DESCRIPTION("Marvell PXA2xx pinctrl driver"); | ||
3322 | +MODULE_LICENSE("GPL v2"); | ||
3323 | diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c | ||
3324 | index 3a14cccbd7ff..7948acf14601 100644 | ||
3325 | --- a/drivers/tty/serial/serial_core.c | ||
3326 | +++ b/drivers/tty/serial/serial_core.c | ||
3327 | @@ -987,6 +987,8 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port, | ||
3328 | } | ||
3329 | } else { | ||
3330 | retval = uart_startup(tty, state, 1); | ||
3331 | + if (retval == 0) | ||
3332 | + tty_port_set_initialized(port, true); | ||
3333 | if (retval > 0) | ||
3334 | retval = 0; | ||
3335 | } | ||
3336 | diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h | ||
3337 | index 1c65817673db..41615f38bcff 100644 | ||
3338 | --- a/include/linux/fdtable.h | ||
3339 | +++ b/include/linux/fdtable.h | ||
3340 | @@ -10,6 +10,7 @@ | ||
3341 | #include <linux/compiler.h> | ||
3342 | #include <linux/spinlock.h> | ||
3343 | #include <linux/rcupdate.h> | ||
3344 | +#include <linux/nospec.h> | ||
3345 | #include <linux/types.h> | ||
3346 | #include <linux/init.h> | ||
3347 | #include <linux/fs.h> | ||
3348 | @@ -82,8 +83,10 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i | ||
3349 | { | ||
3350 | struct fdtable *fdt = rcu_dereference_raw(files->fdt); | ||
3351 | |||
3352 | - if (fd < fdt->max_fds) | ||
3353 | + if (fd < fdt->max_fds) { | ||
3354 | + fd = array_index_nospec(fd, fdt->max_fds); | ||
3355 | return rcu_dereference_raw(fdt->fd[fd]); | ||
3356 | + } | ||
3357 | return NULL; | ||
3358 | } | ||
3359 | |||
3360 | diff --git a/include/linux/init.h b/include/linux/init.h | ||
3361 | index f38b993edacb..943139a563e3 100644 | ||
3362 | --- a/include/linux/init.h | ||
3363 | +++ b/include/linux/init.h | ||
3364 | @@ -5,6 +5,13 @@ | ||
3365 | #include <linux/compiler.h> | ||
3366 | #include <linux/types.h> | ||
3367 | |||
3368 | +/* Built-in __init functions needn't be compiled with retpoline */ | ||
3369 | +#if defined(RETPOLINE) && !defined(MODULE) | ||
3370 | +#define __noretpoline __attribute__((indirect_branch("keep"))) | ||
3371 | +#else | ||
3372 | +#define __noretpoline | ||
3373 | +#endif | ||
3374 | + | ||
3375 | /* These macros are used to mark some functions or | ||
3376 | * initialized data (doesn't apply to uninitialized data) | ||
3377 | * as `initialization' functions. The kernel can take this | ||
3378 | @@ -40,7 +47,7 @@ | ||
3379 | |||
3380 | /* These are for everybody (although not all archs will actually | ||
3381 | discard it in modules) */ | ||
3382 | -#define __init __section(.init.text) __cold __inittrace __latent_entropy | ||
3383 | +#define __init __section(.init.text) __cold __inittrace __latent_entropy __noretpoline | ||
3384 | #define __initdata __section(.init.data) | ||
3385 | #define __initconst __section(.init.rodata) | ||
3386 | #define __exitdata __section(.exit.data) | ||
3387 | diff --git a/include/linux/module.h b/include/linux/module.h | ||
3388 | index fe5aa3736707..b1cc541f2ddf 100644 | ||
3389 | --- a/include/linux/module.h | ||
3390 | +++ b/include/linux/module.h | ||
3391 | @@ -794,6 +794,15 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, | ||
3392 | static inline void module_bug_cleanup(struct module *mod) {} | ||
3393 | #endif /* CONFIG_GENERIC_BUG */ | ||
3394 | |||
3395 | +#ifdef RETPOLINE | ||
3396 | +extern bool retpoline_module_ok(bool has_retpoline); | ||
3397 | +#else | ||
3398 | +static inline bool retpoline_module_ok(bool has_retpoline) | ||
3399 | +{ | ||
3400 | + return true; | ||
3401 | +} | ||
3402 | +#endif | ||
3403 | + | ||
3404 | #ifdef CONFIG_MODULE_SIG | ||
3405 | static inline bool module_sig_ok(struct module *module) | ||
3406 | { | ||
3407 | diff --git a/include/linux/nospec.h b/include/linux/nospec.h | ||
3408 | new file mode 100644 | ||
3409 | index 000000000000..b99bced39ac2 | ||
3410 | --- /dev/null | ||
3411 | +++ b/include/linux/nospec.h | ||
3412 | @@ -0,0 +1,72 @@ | ||
3413 | +// SPDX-License-Identifier: GPL-2.0 | ||
3414 | +// Copyright(c) 2018 Linus Torvalds. All rights reserved. | ||
3415 | +// Copyright(c) 2018 Alexei Starovoitov. All rights reserved. | ||
3416 | +// Copyright(c) 2018 Intel Corporation. All rights reserved. | ||
3417 | + | ||
3418 | +#ifndef _LINUX_NOSPEC_H | ||
3419 | +#define _LINUX_NOSPEC_H | ||
3420 | + | ||
3421 | +/** | ||
3422 | + * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise | ||
3423 | + * @index: array element index | ||
3424 | + * @size: number of elements in array | ||
3425 | + * | ||
3426 | + * When @index is out of bounds (@index >= @size), the sign bit will be | ||
3427 | + * set. Extend the sign bit to all bits and invert, giving a result of | ||
3428 | + * zero for an out of bounds index, or ~0 if within bounds [0, @size). | ||
3429 | + */ | ||
3430 | +#ifndef array_index_mask_nospec | ||
3431 | +static inline unsigned long array_index_mask_nospec(unsigned long index, | ||
3432 | + unsigned long size) | ||
3433 | +{ | ||
3434 | + /* | ||
3435 | + * Warn developers about inappropriate array_index_nospec() usage. | ||
3436 | + * | ||
3437 | + * Even if the CPU speculates past the WARN_ONCE branch, the | ||
3438 | + * sign bit of @index is taken into account when generating the | ||
3439 | + * mask. | ||
3440 | + * | ||
3441 | + * This warning is compiled out when the compiler can infer that | ||
3442 | + * @index and @size are less than LONG_MAX. | ||
3443 | + */ | ||
3444 | + if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, | ||
3445 | + "array_index_nospec() limited to range of [0, LONG_MAX]\n")) | ||
3446 | + return 0; | ||
3447 | + | ||
3448 | + /* | ||
3449 | + * Always calculate and emit the mask even if the compiler | ||
3450 | + * thinks the mask is not needed. The compiler does not take | ||
3451 | + * into account the value of @index under speculation. | ||
3452 | + */ | ||
3453 | + OPTIMIZER_HIDE_VAR(index); | ||
3454 | + return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1); | ||
3455 | +} | ||
3456 | +#endif | ||
3457 | + | ||
3458 | +/* | ||
3459 | + * array_index_nospec - sanitize an array index after a bounds check | ||
3460 | + * | ||
3461 | + * For a code sequence like: | ||
3462 | + * | ||
3463 | + * if (index < size) { | ||
3464 | + * index = array_index_nospec(index, size); | ||
3465 | + * val = array[index]; | ||
3466 | + * } | ||
3467 | + * | ||
3468 | + * ...if the CPU speculates past the bounds check then | ||
3469 | + * array_index_nospec() will clamp the index within the range of [0, | ||
3470 | + * size). | ||
3471 | + */ | ||
3472 | +#define array_index_nospec(index, size) \ | ||
3473 | +({ \ | ||
3474 | + typeof(index) _i = (index); \ | ||
3475 | + typeof(size) _s = (size); \ | ||
3476 | + unsigned long _mask = array_index_mask_nospec(_i, _s); \ | ||
3477 | + \ | ||
3478 | + BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ | ||
3479 | + BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ | ||
3480 | + \ | ||
3481 | + _i &= _mask; \ | ||
3482 | + _i; \ | ||
3483 | +}) | ||
3484 | +#endif /* _LINUX_NOSPEC_H */ | ||
3485 | diff --git a/kernel/module.c b/kernel/module.c | ||
3486 | index de66ec825992..690c0651c40f 100644 | ||
3487 | --- a/kernel/module.c | ||
3488 | +++ b/kernel/module.c | ||
3489 | @@ -2855,6 +2855,15 @@ static int check_modinfo_livepatch(struct module *mod, struct load_info *info) | ||
3490 | } | ||
3491 | #endif /* CONFIG_LIVEPATCH */ | ||
3492 | |||
3493 | +static void check_modinfo_retpoline(struct module *mod, struct load_info *info) | ||
3494 | +{ | ||
3495 | + if (retpoline_module_ok(get_modinfo(info, "retpoline"))) | ||
3496 | + return; | ||
3497 | + | ||
3498 | + pr_warn("%s: loading module not compiled with retpoline compiler.\n", | ||
3499 | + mod->name); | ||
3500 | +} | ||
3501 | + | ||
3502 | /* Sets info->hdr and info->len. */ | ||
3503 | static int copy_module_from_user(const void __user *umod, unsigned long len, | ||
3504 | struct load_info *info) | ||
3505 | @@ -3021,6 +3030,8 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags) | ||
3506 | add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); | ||
3507 | } | ||
3508 | |||
3509 | + check_modinfo_retpoline(mod, info); | ||
3510 | + | ||
3511 | if (get_modinfo(info, "staging")) { | ||
3512 | add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK); | ||
3513 | pr_warn("%s: module is from the staging directory, the quality " | ||
3514 | diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c | ||
3515 | index d396cb61a280..81bef0676e1d 100644 | ||
3516 | --- a/net/wireless/nl80211.c | ||
3517 | +++ b/net/wireless/nl80211.c | ||
3518 | @@ -16,6 +16,7 @@ | ||
3519 | #include <linux/nl80211.h> | ||
3520 | #include <linux/rtnetlink.h> | ||
3521 | #include <linux/netlink.h> | ||
3522 | +#include <linux/nospec.h> | ||
3523 | #include <linux/etherdevice.h> | ||
3524 | #include <net/net_namespace.h> | ||
3525 | #include <net/genetlink.h> | ||
3526 | @@ -2056,20 +2057,22 @@ static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = { | ||
3527 | static int parse_txq_params(struct nlattr *tb[], | ||
3528 | struct ieee80211_txq_params *txq_params) | ||
3529 | { | ||
3530 | + u8 ac; | ||
3531 | + | ||
3532 | if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] || | ||
3533 | !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] || | ||
3534 | !tb[NL80211_TXQ_ATTR_AIFS]) | ||
3535 | return -EINVAL; | ||
3536 | |||
3537 | - txq_params->ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); | ||
3538 | + ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); | ||
3539 | txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]); | ||
3540 | txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]); | ||
3541 | txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]); | ||
3542 | txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]); | ||
3543 | |||
3544 | - if (txq_params->ac >= NL80211_NUM_ACS) | ||
3545 | + if (ac >= NL80211_NUM_ACS) | ||
3546 | return -EINVAL; | ||
3547 | - | ||
3548 | + txq_params->ac = array_index_nospec(ac, NL80211_NUM_ACS); | ||
3549 | return 0; | ||
3550 | } | ||
3551 | |||
3552 | diff --git a/scripts/faddr2line b/scripts/faddr2line | ||
3553 | index 39e07d8574dd..7721d5b2b0c0 100755 | ||
3554 | --- a/scripts/faddr2line | ||
3555 | +++ b/scripts/faddr2line | ||
3556 | @@ -44,10 +44,10 @@ | ||
3557 | set -o errexit | ||
3558 | set -o nounset | ||
3559 | |||
3560 | -READELF="${CROSS_COMPILE}readelf" | ||
3561 | -ADDR2LINE="${CROSS_COMPILE}addr2line" | ||
3562 | -SIZE="${CROSS_COMPILE}size" | ||
3563 | -NM="${CROSS_COMPILE}nm" | ||
3564 | +READELF="${CROSS_COMPILE:-}readelf" | ||
3565 | +ADDR2LINE="${CROSS_COMPILE:-}addr2line" | ||
3566 | +SIZE="${CROSS_COMPILE:-}size" | ||
3567 | +NM="${CROSS_COMPILE:-}nm" | ||
3568 | |||
3569 | command -v awk >/dev/null 2>&1 || die "awk isn't installed" | ||
3570 | command -v ${READELF} >/dev/null 2>&1 || die "readelf isn't installed" | ||
3571 | diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c | ||
3572 | index 98314b400a95..54deaa1066cf 100644 | ||
3573 | --- a/scripts/mod/modpost.c | ||
3574 | +++ b/scripts/mod/modpost.c | ||
3575 | @@ -2165,6 +2165,14 @@ static void add_intree_flag(struct buffer *b, int is_intree) | ||
3576 | buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n"); | ||
3577 | } | ||
3578 | |||
3579 | +/* Cannot check for assembler */ | ||
3580 | +static void add_retpoline(struct buffer *b) | ||
3581 | +{ | ||
3582 | + buf_printf(b, "\n#ifdef RETPOLINE\n"); | ||
3583 | + buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n"); | ||
3584 | + buf_printf(b, "#endif\n"); | ||
3585 | +} | ||
3586 | + | ||
3587 | static void add_staging_flag(struct buffer *b, const char *name) | ||
3588 | { | ||
3589 | static const char *staging_dir = "drivers/staging"; | ||
3590 | @@ -2506,6 +2514,7 @@ int main(int argc, char **argv) | ||
3591 | err |= check_modname_len(mod); | ||
3592 | add_header(&buf, mod); | ||
3593 | add_intree_flag(&buf, !external_module); | ||
3594 | + add_retpoline(&buf); | ||
3595 | add_staging_flag(&buf, mod->name); | ||
3596 | err |= add_versions(&buf, mod); | ||
3597 | add_depends(&buf, mod, modules); | ||
3598 | diff --git a/sound/soc/codecs/pcm512x-spi.c b/sound/soc/codecs/pcm512x-spi.c | ||
3599 | index 712ed6598c48..ebdf9bd5a64c 100644 | ||
3600 | --- a/sound/soc/codecs/pcm512x-spi.c | ||
3601 | +++ b/sound/soc/codecs/pcm512x-spi.c | ||
3602 | @@ -70,3 +70,7 @@ static struct spi_driver pcm512x_spi_driver = { | ||
3603 | }; | ||
3604 | |||
3605 | module_spi_driver(pcm512x_spi_driver); | ||
3606 | + | ||
3607 | +MODULE_DESCRIPTION("ASoC PCM512x codec driver - SPI"); | ||
3608 | +MODULE_AUTHOR("Mark Brown <broonie@kernel.org>"); | ||
3609 | +MODULE_LICENSE("GPL v2"); | ||
3610 | diff --git a/tools/objtool/check.c b/tools/objtool/check.c | ||
3611 | index f40d46e24bcc..9cd028aa1509 100644 | ||
3612 | --- a/tools/objtool/check.c | ||
3613 | +++ b/tools/objtool/check.c | ||
3614 | @@ -543,18 +543,14 @@ static int add_call_destinations(struct objtool_file *file) | ||
3615 | dest_off = insn->offset + insn->len + insn->immediate; | ||
3616 | insn->call_dest = find_symbol_by_offset(insn->sec, | ||
3617 | dest_off); | ||
3618 | - /* | ||
3619 | - * FIXME: Thanks to retpolines, it's now considered | ||
3620 | - * normal for a function to call within itself. So | ||
3621 | - * disable this warning for now. | ||
3622 | - */ | ||
3623 | -#if 0 | ||
3624 | - if (!insn->call_dest) { | ||
3625 | - WARN_FUNC("can't find call dest symbol at offset 0x%lx", | ||
3626 | - insn->sec, insn->offset, dest_off); | ||
3627 | + | ||
3628 | + if (!insn->call_dest && !insn->ignore) { | ||
3629 | + WARN_FUNC("unsupported intra-function call", | ||
3630 | + insn->sec, insn->offset); | ||
3631 | + WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE."); | ||
3632 | return -1; | ||
3633 | } | ||
3634 | -#endif | ||
3635 | + | ||
3636 | } else if (rela->sym->type == STT_SECTION) { | ||
3637 | insn->call_dest = find_symbol_by_offset(rela->sym->sec, | ||
3638 | rela->addend+4); | ||
3639 | @@ -598,7 +594,7 @@ static int handle_group_alt(struct objtool_file *file, | ||
3640 | struct instruction *orig_insn, | ||
3641 | struct instruction **new_insn) | ||
3642 | { | ||
3643 | - struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump; | ||
3644 | + struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL; | ||
3645 | unsigned long dest_off; | ||
3646 | |||
3647 | last_orig_insn = NULL; | ||
3648 | @@ -614,28 +610,30 @@ static int handle_group_alt(struct objtool_file *file, | ||
3649 | last_orig_insn = insn; | ||
3650 | } | ||
3651 | |||
3652 | - if (!next_insn_same_sec(file, last_orig_insn)) { | ||
3653 | - WARN("%s: don't know how to handle alternatives at end of section", | ||
3654 | - special_alt->orig_sec->name); | ||
3655 | - return -1; | ||
3656 | - } | ||
3657 | - | ||
3658 | - fake_jump = malloc(sizeof(*fake_jump)); | ||
3659 | - if (!fake_jump) { | ||
3660 | - WARN("malloc failed"); | ||
3661 | - return -1; | ||
3662 | + if (next_insn_same_sec(file, last_orig_insn)) { | ||
3663 | + fake_jump = malloc(sizeof(*fake_jump)); | ||
3664 | + if (!fake_jump) { | ||
3665 | + WARN("malloc failed"); | ||
3666 | + return -1; | ||
3667 | + } | ||
3668 | + memset(fake_jump, 0, sizeof(*fake_jump)); | ||
3669 | + INIT_LIST_HEAD(&fake_jump->alts); | ||
3670 | + clear_insn_state(&fake_jump->state); | ||
3671 | + | ||
3672 | + fake_jump->sec = special_alt->new_sec; | ||
3673 | + fake_jump->offset = -1; | ||
3674 | + fake_jump->type = INSN_JUMP_UNCONDITIONAL; | ||
3675 | + fake_jump->jump_dest = list_next_entry(last_orig_insn, list); | ||
3676 | + fake_jump->ignore = true; | ||
3677 | } | ||
3678 | - memset(fake_jump, 0, sizeof(*fake_jump)); | ||
3679 | - INIT_LIST_HEAD(&fake_jump->alts); | ||
3680 | - clear_insn_state(&fake_jump->state); | ||
3681 | - | ||
3682 | - fake_jump->sec = special_alt->new_sec; | ||
3683 | - fake_jump->offset = -1; | ||
3684 | - fake_jump->type = INSN_JUMP_UNCONDITIONAL; | ||
3685 | - fake_jump->jump_dest = list_next_entry(last_orig_insn, list); | ||
3686 | - fake_jump->ignore = true; | ||
3687 | |||
3688 | if (!special_alt->new_len) { | ||
3689 | + if (!fake_jump) { | ||
3690 | + WARN("%s: empty alternative at end of section", | ||
3691 | + special_alt->orig_sec->name); | ||
3692 | + return -1; | ||
3693 | + } | ||
3694 | + | ||
3695 | *new_insn = fake_jump; | ||
3696 | return 0; | ||
3697 | } | ||
3698 | @@ -648,6 +646,8 @@ static int handle_group_alt(struct objtool_file *file, | ||
3699 | |||
3700 | last_new_insn = insn; | ||
3701 | |||
3702 | + insn->ignore = orig_insn->ignore_alts; | ||
3703 | + | ||
3704 | if (insn->type != INSN_JUMP_CONDITIONAL && | ||
3705 | insn->type != INSN_JUMP_UNCONDITIONAL) | ||
3706 | continue; | ||
3707 | @@ -656,8 +656,14 @@ static int handle_group_alt(struct objtool_file *file, | ||
3708 | continue; | ||
3709 | |||
3710 | dest_off = insn->offset + insn->len + insn->immediate; | ||
3711 | - if (dest_off == special_alt->new_off + special_alt->new_len) | ||
3712 | + if (dest_off == special_alt->new_off + special_alt->new_len) { | ||
3713 | + if (!fake_jump) { | ||
3714 | + WARN("%s: alternative jump to end of section", | ||
3715 | + special_alt->orig_sec->name); | ||
3716 | + return -1; | ||
3717 | + } | ||
3718 | insn->jump_dest = fake_jump; | ||
3719 | + } | ||
3720 | |||
3721 | if (!insn->jump_dest) { | ||
3722 | WARN_FUNC("can't find alternative jump destination", | ||
3723 | @@ -672,7 +678,8 @@ static int handle_group_alt(struct objtool_file *file, | ||
3724 | return -1; | ||
3725 | } | ||
3726 | |||
3727 | - list_add(&fake_jump->list, &last_new_insn->list); | ||
3728 | + if (fake_jump) | ||
3729 | + list_add(&fake_jump->list, &last_new_insn->list); | ||
3730 | |||
3731 | return 0; | ||
3732 | } | ||
3733 | @@ -729,10 +736,6 @@ static int add_special_section_alts(struct objtool_file *file) | ||
3734 | goto out; | ||
3735 | } | ||
3736 | |||
3737 | - /* Ignore retpoline alternatives. */ | ||
3738 | - if (orig_insn->ignore_alts) | ||
3739 | - continue; | ||
3740 | - | ||
3741 | new_insn = NULL; | ||
3742 | if (!special_alt->group || special_alt->new_len) { | ||
3743 | new_insn = find_insn(file, special_alt->new_sec, | ||
3744 | @@ -1089,11 +1092,11 @@ static int decode_sections(struct objtool_file *file) | ||
3745 | if (ret) | ||
3746 | return ret; | ||
3747 | |||
3748 | - ret = add_call_destinations(file); | ||
3749 | + ret = add_special_section_alts(file); | ||
3750 | if (ret) | ||
3751 | return ret; | ||
3752 | |||
3753 | - ret = add_special_section_alts(file); | ||
3754 | + ret = add_call_destinations(file); | ||
3755 | if (ret) | ||
3756 | return ret; | ||
3757 | |||
3758 | @@ -1720,10 +1723,12 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, | ||
3759 | |||
3760 | insn->visited = true; | ||
3761 | |||
3762 | - list_for_each_entry(alt, &insn->alts, list) { | ||
3763 | - ret = validate_branch(file, alt->insn, state); | ||
3764 | - if (ret) | ||
3765 | - return 1; | ||
3766 | + if (!insn->ignore_alts) { | ||
3767 | + list_for_each_entry(alt, &insn->alts, list) { | ||
3768 | + ret = validate_branch(file, alt->insn, state); | ||
3769 | + if (ret) | ||
3770 | + return 1; | ||
3771 | + } | ||
3772 | } | ||
3773 | |||
3774 | switch (insn->type) { | ||
3775 | diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c | ||
3776 | index e61fe703197b..18384d9be4e1 100644 | ||
3777 | --- a/tools/objtool/orc_gen.c | ||
3778 | +++ b/tools/objtool/orc_gen.c | ||
3779 | @@ -98,6 +98,11 @@ static int create_orc_entry(struct section *u_sec, struct section *ip_relasec, | ||
3780 | struct orc_entry *orc; | ||
3781 | struct rela *rela; | ||
3782 | |||
3783 | + if (!insn_sec->sym) { | ||
3784 | + WARN("missing symbol for section %s", insn_sec->name); | ||
3785 | + return -1; | ||
3786 | + } | ||
3787 | + | ||
3788 | /* populate ORC data */ | ||
3789 | orc = (struct orc_entry *)u_sec->data->d_buf + idx; | ||
3790 | memcpy(orc, o, sizeof(*orc)); |