Contents of /trunk/kernel-alx/patches-5.4/0316-5.4.217-all-fixes.patch
Parent Directory | Revision Log
Revision 3635 -
(show annotations)
(download)
Mon Oct 24 12:34:12 2022 UTC (22 months, 4 weeks ago) by niro
File size: 103893 byte(s)
Mon Oct 24 12:34:12 2022 UTC (22 months, 4 weeks ago) by niro
File size: 103893 byte(s)
-sync kernel patches
1 | diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt |
2 | index db9d53b879f89..8f71a17ad5442 100644 |
3 | --- a/Documentation/admin-guide/kernel-parameters.txt |
4 | +++ b/Documentation/admin-guide/kernel-parameters.txt |
5 | @@ -4298,6 +4298,18 @@ |
6 | |
7 | retain_initrd [RAM] Keep initrd memory after extraction |
8 | |
9 | + retbleed= [X86] Control mitigation of RETBleed (Arbitrary |
10 | + Speculative Code Execution with Return Instructions) |
11 | + vulnerability. |
12 | + |
13 | + off - unconditionally disable |
14 | + auto - automatically select a migitation |
15 | + |
16 | + Selecting 'auto' will choose a mitigation method at run |
17 | + time according to the CPU. |
18 | + |
19 | + Not specifying this option is equivalent to retbleed=auto. |
20 | + |
21 | rfkill.default_state= |
22 | 0 "airplane mode". All wifi, bluetooth, wimax, gps, fm, |
23 | etc. communication is blocked by default. |
24 | @@ -4541,6 +4553,7 @@ |
25 | eibrs - enhanced IBRS |
26 | eibrs,retpoline - enhanced IBRS + Retpolines |
27 | eibrs,lfence - enhanced IBRS + LFENCE |
28 | + ibrs - use IBRS to protect kernel |
29 | |
30 | Not specifying this option is equivalent to |
31 | spectre_v2=auto. |
32 | diff --git a/Documentation/process/code-of-conduct-interpretation.rst b/Documentation/process/code-of-conduct-interpretation.rst |
33 | index e899f14a4ba24..4f8a06b00f608 100644 |
34 | --- a/Documentation/process/code-of-conduct-interpretation.rst |
35 | +++ b/Documentation/process/code-of-conduct-interpretation.rst |
36 | @@ -51,7 +51,7 @@ the Technical Advisory Board (TAB) or other maintainers if you're |
37 | uncertain how to handle situations that come up. It will not be |
38 | considered a violation report unless you want it to be. If you are |
39 | uncertain about approaching the TAB or any other maintainers, please |
40 | -reach out to our conflict mediator, Mishi Choudhary <mishi@linux.com>. |
41 | +reach out to our conflict mediator, Joanna Lee <joanna.lee@gesmer.com>. |
42 | |
43 | In the end, "be kind to each other" is really what the end goal is for |
44 | everybody. We know everyone is human and we all fail at times, but the |
45 | diff --git a/Makefile b/Makefile |
46 | index 3d9d7ef6f8bf1..201ac8e410a94 100644 |
47 | --- a/Makefile |
48 | +++ b/Makefile |
49 | @@ -1,7 +1,7 @@ |
50 | # SPDX-License-Identifier: GPL-2.0 |
51 | VERSION = 5 |
52 | PATCHLEVEL = 4 |
53 | -SUBLEVEL = 216 |
54 | +SUBLEVEL = 217 |
55 | EXTRAVERSION = |
56 | NAME = Kleptomaniac Octopus |
57 | |
58 | diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h |
59 | index b3f1214787386..29e5675c6d4f2 100644 |
60 | --- a/arch/x86/entry/calling.h |
61 | +++ b/arch/x86/entry/calling.h |
62 | @@ -6,6 +6,8 @@ |
63 | #include <asm/percpu.h> |
64 | #include <asm/asm-offsets.h> |
65 | #include <asm/processor-flags.h> |
66 | +#include <asm/msr.h> |
67 | +#include <asm/nospec-branch.h> |
68 | |
69 | /* |
70 | |
71 | @@ -146,27 +148,19 @@ For 32-bit we have the following conventions - kernel is built with |
72 | |
73 | .endm |
74 | |
75 | -.macro POP_REGS pop_rdi=1 skip_r11rcx=0 |
76 | +.macro POP_REGS pop_rdi=1 |
77 | popq %r15 |
78 | popq %r14 |
79 | popq %r13 |
80 | popq %r12 |
81 | popq %rbp |
82 | popq %rbx |
83 | - .if \skip_r11rcx |
84 | - popq %rsi |
85 | - .else |
86 | popq %r11 |
87 | - .endif |
88 | popq %r10 |
89 | popq %r9 |
90 | popq %r8 |
91 | popq %rax |
92 | - .if \skip_r11rcx |
93 | - popq %rsi |
94 | - .else |
95 | popq %rcx |
96 | - .endif |
97 | popq %rdx |
98 | popq %rsi |
99 | .if \pop_rdi |
100 | @@ -316,6 +310,62 @@ For 32-bit we have the following conventions - kernel is built with |
101 | |
102 | #endif |
103 | |
104 | +/* |
105 | + * IBRS kernel mitigation for Spectre_v2. |
106 | + * |
107 | + * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers |
108 | + * the regs it uses (AX, CX, DX). Must be called before the first RET |
109 | + * instruction (NOTE! UNTRAIN_RET includes a RET instruction) |
110 | + * |
111 | + * The optional argument is used to save/restore the current value, |
112 | + * which is used on the paranoid paths. |
113 | + * |
114 | + * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. |
115 | + */ |
116 | +.macro IBRS_ENTER save_reg |
117 | + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS |
118 | + movl $MSR_IA32_SPEC_CTRL, %ecx |
119 | + |
120 | +.ifnb \save_reg |
121 | + rdmsr |
122 | + shl $32, %rdx |
123 | + or %rdx, %rax |
124 | + mov %rax, \save_reg |
125 | + test $SPEC_CTRL_IBRS, %eax |
126 | + jz .Ldo_wrmsr_\@ |
127 | + lfence |
128 | + jmp .Lend_\@ |
129 | +.Ldo_wrmsr_\@: |
130 | +.endif |
131 | + |
132 | + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx |
133 | + movl %edx, %eax |
134 | + shr $32, %rdx |
135 | + wrmsr |
136 | +.Lend_\@: |
137 | +.endm |
138 | + |
139 | +/* |
140 | + * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX) |
141 | + * regs. Must be called after the last RET. |
142 | + */ |
143 | +.macro IBRS_EXIT save_reg |
144 | + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS |
145 | + movl $MSR_IA32_SPEC_CTRL, %ecx |
146 | + |
147 | +.ifnb \save_reg |
148 | + mov \save_reg, %rdx |
149 | +.else |
150 | + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx |
151 | + andl $(~SPEC_CTRL_IBRS), %edx |
152 | +.endif |
153 | + |
154 | + movl %edx, %eax |
155 | + shr $32, %rdx |
156 | + wrmsr |
157 | +.Lend_\@: |
158 | +.endm |
159 | + |
160 | /* |
161 | * Mitigate Spectre v1 for conditional swapgs code paths. |
162 | * |
163 | diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S |
164 | index bde3e0f85425f..2d837fb54c31b 100644 |
165 | --- a/arch/x86/entry/entry_32.S |
166 | +++ b/arch/x86/entry/entry_32.S |
167 | @@ -750,7 +750,6 @@ ENTRY(__switch_to_asm) |
168 | movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset |
169 | #endif |
170 | |
171 | -#ifdef CONFIG_RETPOLINE |
172 | /* |
173 | * When switching from a shallower to a deeper call stack |
174 | * the RSB may either underflow or use entries populated |
175 | @@ -759,7 +758,6 @@ ENTRY(__switch_to_asm) |
176 | * speculative execution to prevent attack. |
177 | */ |
178 | FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW |
179 | -#endif |
180 | |
181 | /* restore callee-saved registers */ |
182 | popfl |
183 | diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S |
184 | index 2ba3d53ac5b11..c82136030d58f 100644 |
185 | --- a/arch/x86/entry/entry_64.S |
186 | +++ b/arch/x86/entry/entry_64.S |
187 | @@ -172,6 +172,10 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) |
188 | /* IRQs are off. */ |
189 | movq %rax, %rdi |
190 | movq %rsp, %rsi |
191 | + |
192 | + /* clobbers %rax, make sure it is after saving the syscall nr */ |
193 | + IBRS_ENTER |
194 | + |
195 | call do_syscall_64 /* returns with IRQs disabled */ |
196 | |
197 | TRACE_IRQS_IRETQ /* we're about to change IF */ |
198 | @@ -248,8 +252,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) |
199 | * perf profiles. Nothing jumps here. |
200 | */ |
201 | syscall_return_via_sysret: |
202 | - /* rcx and r11 are already restored (see code above) */ |
203 | - POP_REGS pop_rdi=0 skip_r11rcx=1 |
204 | + IBRS_EXIT |
205 | + POP_REGS pop_rdi=0 |
206 | |
207 | /* |
208 | * Now all regs are restored except RSP and RDI. |
209 | @@ -301,7 +305,6 @@ ENTRY(__switch_to_asm) |
210 | movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset |
211 | #endif |
212 | |
213 | -#ifdef CONFIG_RETPOLINE |
214 | /* |
215 | * When switching from a shallower to a deeper call stack |
216 | * the RSB may either underflow or use entries populated |
217 | @@ -310,7 +313,6 @@ ENTRY(__switch_to_asm) |
218 | * speculative execution to prevent attack. |
219 | */ |
220 | FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW |
221 | -#endif |
222 | |
223 | /* restore callee-saved registers */ |
224 | popq %r15 |
225 | @@ -622,6 +624,7 @@ GLOBAL(retint_user) |
226 | TRACE_IRQS_IRETQ |
227 | |
228 | GLOBAL(swapgs_restore_regs_and_return_to_usermode) |
229 | + IBRS_EXIT |
230 | #ifdef CONFIG_DEBUG_ENTRY |
231 | /* Assert that pt_regs indicates user mode. */ |
232 | testb $3, CS(%rsp) |
233 | @@ -1248,7 +1251,13 @@ ENTRY(paranoid_entry) |
234 | */ |
235 | FENCE_SWAPGS_KERNEL_ENTRY |
236 | |
237 | - ret |
238 | + /* |
239 | + * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like |
240 | + * CR3 above, keep the old value in a callee saved register. |
241 | + */ |
242 | + IBRS_ENTER save_reg=%r15 |
243 | + |
244 | + RET |
245 | END(paranoid_entry) |
246 | |
247 | /* |
248 | @@ -1276,12 +1285,20 @@ ENTRY(paranoid_exit) |
249 | jmp .Lparanoid_exit_restore |
250 | .Lparanoid_exit_no_swapgs: |
251 | TRACE_IRQS_IRETQ_DEBUG |
252 | + |
253 | + /* |
254 | + * Must restore IBRS state before both CR3 and %GS since we need access |
255 | + * to the per-CPU x86_spec_ctrl_shadow variable. |
256 | + */ |
257 | + IBRS_EXIT save_reg=%r15 |
258 | + |
259 | /* Always restore stashed CR3 value (see paranoid_entry) */ |
260 | RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 |
261 | .Lparanoid_exit_restore: |
262 | jmp restore_regs_and_return_to_kernel |
263 | END(paranoid_exit) |
264 | |
265 | + |
266 | /* |
267 | * Save all registers in pt_regs, and switch GS if needed. |
268 | */ |
269 | @@ -1301,6 +1318,7 @@ ENTRY(error_entry) |
270 | FENCE_SWAPGS_USER_ENTRY |
271 | /* We have user CR3. Change to kernel CR3. */ |
272 | SWITCH_TO_KERNEL_CR3 scratch_reg=%rax |
273 | + IBRS_ENTER |
274 | |
275 | .Lerror_entry_from_usermode_after_swapgs: |
276 | /* Put us onto the real thread stack. */ |
277 | @@ -1356,6 +1374,7 @@ ENTRY(error_entry) |
278 | SWAPGS |
279 | FENCE_SWAPGS_USER_ENTRY |
280 | SWITCH_TO_KERNEL_CR3 scratch_reg=%rax |
281 | + IBRS_ENTER |
282 | |
283 | /* |
284 | * Pretend that the exception came from user mode: set up pt_regs |
285 | @@ -1461,6 +1480,8 @@ ENTRY(nmi) |
286 | PUSH_AND_CLEAR_REGS rdx=(%rdx) |
287 | ENCODE_FRAME_POINTER |
288 | |
289 | + IBRS_ENTER |
290 | + |
291 | /* |
292 | * At this point we no longer need to worry about stack damage |
293 | * due to nesting -- we're on the normal thread stack and we're |
294 | @@ -1684,6 +1705,9 @@ end_repeat_nmi: |
295 | movq $-1, %rsi |
296 | call do_nmi |
297 | |
298 | + /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */ |
299 | + IBRS_EXIT save_reg=%r15 |
300 | + |
301 | /* Always restore stashed CR3 value (see paranoid_entry) */ |
302 | RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 |
303 | |
304 | diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S |
305 | index 39913770a44d5..c3c4ea4a6711a 100644 |
306 | --- a/arch/x86/entry/entry_64_compat.S |
307 | +++ b/arch/x86/entry/entry_64_compat.S |
308 | @@ -4,7 +4,6 @@ |
309 | * |
310 | * Copyright 2000-2002 Andi Kleen, SuSE Labs. |
311 | */ |
312 | -#include "calling.h" |
313 | #include <asm/asm-offsets.h> |
314 | #include <asm/current.h> |
315 | #include <asm/errno.h> |
316 | @@ -17,6 +16,8 @@ |
317 | #include <linux/linkage.h> |
318 | #include <linux/err.h> |
319 | |
320 | +#include "calling.h" |
321 | + |
322 | .section .entry.text, "ax" |
323 | |
324 | /* |
325 | @@ -106,6 +107,8 @@ ENTRY(entry_SYSENTER_compat) |
326 | xorl %r15d, %r15d /* nospec r15 */ |
327 | cld |
328 | |
329 | + IBRS_ENTER |
330 | + |
331 | /* |
332 | * SYSENTER doesn't filter flags, so we need to clear NT and AC |
333 | * ourselves. To save a few cycles, we can check whether |
334 | @@ -253,6 +256,8 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) |
335 | */ |
336 | TRACE_IRQS_OFF |
337 | |
338 | + IBRS_ENTER |
339 | + |
340 | movq %rsp, %rdi |
341 | call do_fast_syscall_32 |
342 | /* XEN PV guests always use IRET path */ |
343 | @@ -267,6 +272,9 @@ sysret32_from_system_call: |
344 | */ |
345 | STACKLEAK_ERASE |
346 | TRACE_IRQS_ON /* User mode traces as IRQs on. */ |
347 | + |
348 | + IBRS_EXIT |
349 | + |
350 | movq RBX(%rsp), %rbx /* pt_regs->rbx */ |
351 | movq RBP(%rsp), %rbp /* pt_regs->rbp */ |
352 | movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ |
353 | @@ -408,6 +416,7 @@ ENTRY(entry_INT80_compat) |
354 | * gate turned them off. |
355 | */ |
356 | TRACE_IRQS_OFF |
357 | + IBRS_ENTER |
358 | |
359 | movq %rsp, %rdi |
360 | call do_int80_syscall_32 |
361 | diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h |
362 | index 0c814cd9ea42c..cdf39decf7340 100644 |
363 | --- a/arch/x86/include/asm/cpu_device_id.h |
364 | +++ b/arch/x86/include/asm/cpu_device_id.h |
365 | @@ -5,15 +5,22 @@ |
366 | /* |
367 | * Declare drivers belonging to specific x86 CPUs |
368 | * Similar in spirit to pci_device_id and related PCI functions |
369 | + * |
370 | + * The wildcard initializers are in mod_devicetable.h because |
371 | + * file2alias needs them. Sigh. |
372 | */ |
373 | - |
374 | #include <linux/mod_devicetable.h> |
375 | +/* Get the INTEL_FAM* model defines */ |
376 | +#include <asm/intel-family.h> |
377 | +/* And the X86_VENDOR_* ones */ |
378 | +#include <asm/processor.h> |
379 | |
380 | +/* Centaur FAM6 models */ |
381 | +#define X86_CENTAUR_FAM6_C7_A 0xa |
382 | #define X86_CENTAUR_FAM6_C7_D 0xd |
383 | #define X86_CENTAUR_FAM6_NANO 0xf |
384 | |
385 | #define X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins) |
386 | - |
387 | /** |
388 | * X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE - Base macro for CPU matching |
389 | * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY |
390 | @@ -26,8 +33,11 @@ |
391 | * format is unsigned long. The supplied value, pointer |
392 | * etc. is casted to unsigned long internally. |
393 | * |
394 | - * Backport version to keep the SRBDS pile consistant. No shorter variants |
395 | - * required for this. |
396 | + * Use only if you need all selectors. Otherwise use one of the shorter |
397 | + * macros of the X86_MATCH_* family. If there is no matching shorthand |
398 | + * macro, consider to add one. If you really need to wrap one of the macros |
399 | + * into another macro at the usage site for good reasons, then please |
400 | + * start this local macro with X86_MATCH to allow easy grepping. |
401 | */ |
402 | #define X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \ |
403 | _steppings, _feature, _data) { \ |
404 | @@ -39,6 +49,120 @@ |
405 | .driver_data = (unsigned long) _data \ |
406 | } |
407 | |
408 | +/** |
409 | + * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Macro for CPU matching |
410 | + * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY |
411 | + * The name is expanded to X86_VENDOR_@_vendor |
412 | + * @_family: The family number or X86_FAMILY_ANY |
413 | + * @_model: The model number, model constant or X86_MODEL_ANY |
414 | + * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY |
415 | + * @_data: Driver specific data or NULL. The internal storage |
416 | + * format is unsigned long. The supplied value, pointer |
417 | + * etc. is casted to unsigned long internally. |
418 | + * |
419 | + * The steppings arguments of X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE() is |
420 | + * set to wildcards. |
421 | + */ |
422 | +#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, feature, data) \ |
423 | + X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(vendor, family, model, \ |
424 | + X86_STEPPING_ANY, feature, data) |
425 | + |
426 | +/** |
427 | + * X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature |
428 | + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY |
429 | + * The name is expanded to X86_VENDOR_@vendor |
430 | + * @family: The family number or X86_FAMILY_ANY |
431 | + * @feature: A X86_FEATURE bit |
432 | + * @data: Driver specific data or NULL. The internal storage |
433 | + * format is unsigned long. The supplied value, pointer |
434 | + * etc. is casted to unsigned long internally. |
435 | + * |
436 | + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are |
437 | + * set to wildcards. |
438 | + */ |
439 | +#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \ |
440 | + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, \ |
441 | + X86_MODEL_ANY, feature, data) |
442 | + |
443 | +/** |
444 | + * X86_MATCH_VENDOR_FEATURE - Macro for matching vendor and CPU feature |
445 | + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY |
446 | + * The name is expanded to X86_VENDOR_@vendor |
447 | + * @feature: A X86_FEATURE bit |
448 | + * @data: Driver specific data or NULL. The internal storage |
449 | + * format is unsigned long. The supplied value, pointer |
450 | + * etc. is casted to unsigned long internally. |
451 | + * |
452 | + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are |
453 | + * set to wildcards. |
454 | + */ |
455 | +#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \ |
456 | + X86_MATCH_VENDOR_FAM_FEATURE(vendor, X86_FAMILY_ANY, feature, data) |
457 | + |
458 | +/** |
459 | + * X86_MATCH_FEATURE - Macro for matching a CPU feature |
460 | + * @feature: A X86_FEATURE bit |
461 | + * @data: Driver specific data or NULL. The internal storage |
462 | + * format is unsigned long. The supplied value, pointer |
463 | + * etc. is casted to unsigned long internally. |
464 | + * |
465 | + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are |
466 | + * set to wildcards. |
467 | + */ |
468 | +#define X86_MATCH_FEATURE(feature, data) \ |
469 | + X86_MATCH_VENDOR_FEATURE(ANY, feature, data) |
470 | + |
471 | +/* Transitional to keep the existing code working */ |
472 | +#define X86_FEATURE_MATCH(feature) X86_MATCH_FEATURE(feature, NULL) |
473 | + |
474 | +/** |
475 | + * X86_MATCH_VENDOR_FAM_MODEL - Match vendor, family and model |
476 | + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY |
477 | + * The name is expanded to X86_VENDOR_@vendor |
478 | + * @family: The family number or X86_FAMILY_ANY |
479 | + * @model: The model number, model constant or X86_MODEL_ANY |
480 | + * @data: Driver specific data or NULL. The internal storage |
481 | + * format is unsigned long. The supplied value, pointer |
482 | + * etc. is casted to unsigned long internally. |
483 | + * |
484 | + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are |
485 | + * set to wildcards. |
486 | + */ |
487 | +#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \ |
488 | + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, \ |
489 | + X86_FEATURE_ANY, data) |
490 | + |
491 | +/** |
492 | + * X86_MATCH_VENDOR_FAM - Match vendor and family |
493 | + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY |
494 | + * The name is expanded to X86_VENDOR_@vendor |
495 | + * @family: The family number or X86_FAMILY_ANY |
496 | + * @data: Driver specific data or NULL. The internal storage |
497 | + * format is unsigned long. The supplied value, pointer |
498 | + * etc. is casted to unsigned long internally. |
499 | + * |
500 | + * All other missing arguments to X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are |
501 | + * set of wildcards. |
502 | + */ |
503 | +#define X86_MATCH_VENDOR_FAM(vendor, family, data) \ |
504 | + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, X86_MODEL_ANY, data) |
505 | + |
506 | +/** |
507 | + * X86_MATCH_INTEL_FAM6_MODEL - Match vendor INTEL, family 6 and model |
508 | + * @model: The model name without the INTEL_FAM6_ prefix or ANY |
509 | + * The model name is expanded to INTEL_FAM6_@model internally |
510 | + * @data: Driver specific data or NULL. The internal storage |
511 | + * format is unsigned long. The supplied value, pointer |
512 | + * etc. is casted to unsigned long internally. |
513 | + * |
514 | + * The vendor is set to INTEL, the family to 6 and all other missing |
515 | + * arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are set to wildcards. |
516 | + * |
517 | + * See X86_MATCH_VENDOR_FAM_MODEL_FEATURE() for further information. |
518 | + */ |
519 | +#define X86_MATCH_INTEL_FAM6_MODEL(model, data) \ |
520 | + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, INTEL_FAM6_##model, data) |
521 | + |
522 | /* |
523 | * Match specific microcode revisions. |
524 | * |
525 | diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h |
526 | index 736b0e412344b..2ec85d7bfdff2 100644 |
527 | --- a/arch/x86/include/asm/cpufeatures.h |
528 | +++ b/arch/x86/include/asm/cpufeatures.h |
529 | @@ -203,8 +203,8 @@ |
530 | #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ |
531 | #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ |
532 | #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ |
533 | -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ |
534 | -#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ |
535 | +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ |
536 | +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ |
537 | #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ |
538 | #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ |
539 | #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ |
540 | @@ -286,7 +286,10 @@ |
541 | #define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ |
542 | #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ |
543 | #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ |
544 | -#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+ 6) /* "" Fill RSB on VM exit when EIBRS is enabled */ |
545 | +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ |
546 | +#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ |
547 | +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ |
548 | +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ |
549 | |
550 | /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ |
551 | #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ |
552 | @@ -303,6 +306,7 @@ |
553 | #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ |
554 | #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ |
555 | #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ |
556 | +#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ |
557 | |
558 | /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ |
559 | #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ |
560 | @@ -407,7 +411,8 @@ |
561 | #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ |
562 | #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ |
563 | #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ |
564 | -#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ |
565 | +#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ |
566 | #define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ |
567 | +#define X86_BUG_MMIO_UNKNOWN X86_BUG(28) /* CPU is too old and its MMIO Stale Data status is unknown */ |
568 | |
569 | #endif /* _ASM_X86_CPUFEATURES_H */ |
570 | diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h |
571 | index 5b07573c3bc87..c1d6d8bbb7dad 100644 |
572 | --- a/arch/x86/include/asm/intel-family.h |
573 | +++ b/arch/x86/include/asm/intel-family.h |
574 | @@ -35,6 +35,9 @@ |
575 | * The #define line may optionally include a comment including platform names. |
576 | */ |
577 | |
578 | +/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */ |
579 | +#define INTEL_FAM6_ANY X86_MODEL_ANY |
580 | + |
581 | #define INTEL_FAM6_CORE_YONAH 0x0E |
582 | |
583 | #define INTEL_FAM6_CORE2_MEROM 0x0F |
584 | @@ -126,6 +129,9 @@ |
585 | #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ |
586 | #define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ |
587 | |
588 | +/* Family 5 */ |
589 | +#define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */ |
590 | + |
591 | /* Useful macros */ |
592 | #define INTEL_CPU_FAM_ANY(_family, _model, _driver_data) \ |
593 | { \ |
594 | diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h |
595 | index cef4eba03ff36..713886d5493a8 100644 |
596 | --- a/arch/x86/include/asm/msr-index.h |
597 | +++ b/arch/x86/include/asm/msr-index.h |
598 | @@ -47,6 +47,8 @@ |
599 | #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ |
600 | #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ |
601 | #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ |
602 | +#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ |
603 | +#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) |
604 | |
605 | #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ |
606 | #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ |
607 | @@ -82,6 +84,7 @@ |
608 | #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a |
609 | #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ |
610 | #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ |
611 | +#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */ |
612 | #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ |
613 | #define ARCH_CAP_SSB_NO BIT(4) /* |
614 | * Not susceptible to Speculative Store Bypass |
615 | @@ -129,6 +132,13 @@ |
616 | * bit available to control VERW |
617 | * behavior. |
618 | */ |
619 | +#define ARCH_CAP_RRSBA BIT(19) /* |
620 | + * Indicates RET may use predictors |
621 | + * other than the RSB. With eIBRS |
622 | + * enabled predictions in kernel mode |
623 | + * are restricted to targets in |
624 | + * kernel. |
625 | + */ |
626 | #define ARCH_CAP_PBRSB_NO BIT(24) /* |
627 | * Not susceptible to Post-Barrier |
628 | * Return Stack Buffer Predictions. |
629 | diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h |
630 | index a1ee1a760c3eb..8c898eed28941 100644 |
631 | --- a/arch/x86/include/asm/nospec-branch.h |
632 | +++ b/arch/x86/include/asm/nospec-branch.h |
633 | @@ -4,11 +4,14 @@ |
634 | #define _ASM_X86_NOSPEC_BRANCH_H_ |
635 | |
636 | #include <linux/static_key.h> |
637 | +#include <linux/frame.h> |
638 | |
639 | #include <asm/alternative.h> |
640 | #include <asm/alternative-asm.h> |
641 | #include <asm/cpufeatures.h> |
642 | #include <asm/msr-index.h> |
643 | +#include <asm/unwind_hints.h> |
644 | +#include <asm/percpu.h> |
645 | |
646 | /* |
647 | * This should be used immediately before a retpoline alternative. It tells |
648 | @@ -60,9 +63,9 @@ |
649 | lfence; \ |
650 | jmp 775b; \ |
651 | 774: \ |
652 | + add $(BITS_PER_LONG/8) * 2, sp; \ |
653 | dec reg; \ |
654 | jnz 771b; \ |
655 | - add $(BITS_PER_LONG/8) * nr, sp; \ |
656 | /* barrier for jnz misprediction */ \ |
657 | lfence; |
658 | #else |
659 | @@ -79,13 +82,6 @@ |
660 | add $(BITS_PER_LONG/8) * nr, sp; |
661 | #endif |
662 | |
663 | -#define __ISSUE_UNBALANCED_RET_GUARD(sp) \ |
664 | - call 881f; \ |
665 | - int3; \ |
666 | -881: \ |
667 | - add $(BITS_PER_LONG/8), sp; \ |
668 | - lfence; |
669 | - |
670 | #ifdef __ASSEMBLY__ |
671 | |
672 | /* |
673 | @@ -155,26 +151,28 @@ |
674 | #endif |
675 | .endm |
676 | |
677 | -.macro ISSUE_UNBALANCED_RET_GUARD ftr:req |
678 | - ANNOTATE_NOSPEC_ALTERNATIVE |
679 | - ALTERNATIVE "jmp .Lskip_pbrsb_\@", \ |
680 | - __stringify(__ISSUE_UNBALANCED_RET_GUARD(%_ASM_SP)) \ |
681 | - \ftr |
682 | -.Lskip_pbrsb_\@: |
683 | +.macro ISSUE_UNBALANCED_RET_GUARD |
684 | + call .Lunbalanced_ret_guard_\@ |
685 | + int3 |
686 | +.Lunbalanced_ret_guard_\@: |
687 | + add $(BITS_PER_LONG/8), %_ASM_SP |
688 | + lfence |
689 | .endm |
690 | |
691 | /* |
692 | * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP |
693 | * monstrosity above, manually. |
694 | */ |
695 | -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req |
696 | -#ifdef CONFIG_RETPOLINE |
697 | - ANNOTATE_NOSPEC_ALTERNATIVE |
698 | - ALTERNATIVE "jmp .Lskip_rsb_\@", \ |
699 | - __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ |
700 | - \ftr |
701 | +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2 |
702 | +.ifb \ftr2 |
703 | + ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr |
704 | +.else |
705 | + ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2 |
706 | +.endif |
707 | + __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) |
708 | +.Lunbalanced_\@: |
709 | + ISSUE_UNBALANCED_RET_GUARD |
710 | .Lskip_rsb_\@: |
711 | -#endif |
712 | .endm |
713 | |
714 | #else /* __ASSEMBLY__ */ |
715 | @@ -249,6 +247,7 @@ enum spectre_v2_mitigation { |
716 | SPECTRE_V2_EIBRS, |
717 | SPECTRE_V2_EIBRS_RETPOLINE, |
718 | SPECTRE_V2_EIBRS_LFENCE, |
719 | + SPECTRE_V2_IBRS, |
720 | }; |
721 | |
722 | /* The indirect branch speculation control variants */ |
723 | @@ -312,6 +311,9 @@ static inline void indirect_branch_prediction_barrier(void) |
724 | |
725 | /* The Intel SPEC CTRL MSR base value cache */ |
726 | extern u64 x86_spec_ctrl_base; |
727 | +DECLARE_PER_CPU(u64, x86_spec_ctrl_current); |
728 | +extern void write_spec_ctrl_current(u64 val, bool force); |
729 | +extern u64 spec_ctrl_current(void); |
730 | |
731 | /* |
732 | * With retpoline, we must use IBRS to restrict branch prediction |
733 | @@ -321,18 +323,16 @@ extern u64 x86_spec_ctrl_base; |
734 | */ |
735 | #define firmware_restrict_branch_speculation_start() \ |
736 | do { \ |
737 | - u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ |
738 | - \ |
739 | preempt_disable(); \ |
740 | - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ |
741 | + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ |
742 | + spec_ctrl_current() | SPEC_CTRL_IBRS, \ |
743 | X86_FEATURE_USE_IBRS_FW); \ |
744 | } while (0) |
745 | |
746 | #define firmware_restrict_branch_speculation_end() \ |
747 | do { \ |
748 | - u64 val = x86_spec_ctrl_base; \ |
749 | - \ |
750 | - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ |
751 | + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ |
752 | + spec_ctrl_current(), \ |
753 | X86_FEATURE_USE_IBRS_FW); \ |
754 | preempt_enable(); \ |
755 | } while (0) |
756 | diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c |
757 | index 88cef978380bf..5571b28d35b60 100644 |
758 | --- a/arch/x86/kernel/cpu/amd.c |
759 | +++ b/arch/x86/kernel/cpu/amd.c |
760 | @@ -894,12 +894,21 @@ static void init_amd_zn(struct cpuinfo_x86 *c) |
761 | node_reclaim_distance = 32; |
762 | #endif |
763 | |
764 | - /* |
765 | - * Fix erratum 1076: CPB feature bit not being set in CPUID. |
766 | - * Always set it, except when running under a hypervisor. |
767 | - */ |
768 | - if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB)) |
769 | - set_cpu_cap(c, X86_FEATURE_CPB); |
770 | + /* Fix up CPUID bits, but only if not virtualised. */ |
771 | + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { |
772 | + |
773 | + /* Erratum 1076: CPB feature bit not being set in CPUID. */ |
774 | + if (!cpu_has(c, X86_FEATURE_CPB)) |
775 | + set_cpu_cap(c, X86_FEATURE_CPB); |
776 | + |
777 | + /* |
778 | + * Zen3 (Fam19 model < 0x10) parts are not susceptible to |
779 | + * Branch Type Confusion, but predate the allocation of the |
780 | + * BTC_NO bit. |
781 | + */ |
782 | + if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO)) |
783 | + set_cpu_cap(c, X86_FEATURE_BTC_NO); |
784 | + } |
785 | } |
786 | |
787 | static void init_amd(struct cpuinfo_x86 *c) |
788 | diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c |
789 | index c90d91cb14341..cf5a18e261e36 100644 |
790 | --- a/arch/x86/kernel/cpu/bugs.c |
791 | +++ b/arch/x86/kernel/cpu/bugs.c |
792 | @@ -37,6 +37,8 @@ |
793 | |
794 | static void __init spectre_v1_select_mitigation(void); |
795 | static void __init spectre_v2_select_mitigation(void); |
796 | +static void __init retbleed_select_mitigation(void); |
797 | +static void __init spectre_v2_user_select_mitigation(void); |
798 | static void __init ssb_select_mitigation(void); |
799 | static void __init l1tf_select_mitigation(void); |
800 | static void __init mds_select_mitigation(void); |
801 | @@ -46,16 +48,40 @@ static void __init taa_select_mitigation(void); |
802 | static void __init mmio_select_mitigation(void); |
803 | static void __init srbds_select_mitigation(void); |
804 | |
805 | -/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ |
806 | +/* The base value of the SPEC_CTRL MSR without task-specific bits set */ |
807 | u64 x86_spec_ctrl_base; |
808 | EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); |
809 | + |
810 | +/* The current value of the SPEC_CTRL MSR with task-specific bits set */ |
811 | +DEFINE_PER_CPU(u64, x86_spec_ctrl_current); |
812 | +EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); |
813 | + |
814 | static DEFINE_MUTEX(spec_ctrl_mutex); |
815 | |
816 | /* |
817 | - * The vendor and possibly platform specific bits which can be modified in |
818 | - * x86_spec_ctrl_base. |
819 | + * Keep track of the SPEC_CTRL MSR value for the current task, which may differ |
820 | + * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). |
821 | */ |
822 | -static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; |
823 | +void write_spec_ctrl_current(u64 val, bool force) |
824 | +{ |
825 | + if (this_cpu_read(x86_spec_ctrl_current) == val) |
826 | + return; |
827 | + |
828 | + this_cpu_write(x86_spec_ctrl_current, val); |
829 | + |
830 | + /* |
831 | + * When KERNEL_IBRS this MSR is written on return-to-user, unless |
832 | + * forced the update can be delayed until that time. |
833 | + */ |
834 | + if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) |
835 | + wrmsrl(MSR_IA32_SPEC_CTRL, val); |
836 | +} |
837 | + |
838 | +u64 spec_ctrl_current(void) |
839 | +{ |
840 | + return this_cpu_read(x86_spec_ctrl_current); |
841 | +} |
842 | +EXPORT_SYMBOL_GPL(spec_ctrl_current); |
843 | |
844 | /* |
845 | * AMD specific MSR info for Speculative Store Bypass control. |
846 | @@ -105,13 +131,21 @@ void __init check_bugs(void) |
847 | if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) |
848 | rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); |
849 | |
850 | - /* Allow STIBP in MSR_SPEC_CTRL if supported */ |
851 | - if (boot_cpu_has(X86_FEATURE_STIBP)) |
852 | - x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; |
853 | - |
854 | /* Select the proper CPU mitigations before patching alternatives: */ |
855 | spectre_v1_select_mitigation(); |
856 | spectre_v2_select_mitigation(); |
857 | + /* |
858 | + * retbleed_select_mitigation() relies on the state set by |
859 | + * spectre_v2_select_mitigation(); specifically it wants to know about |
860 | + * spectre_v2=ibrs. |
861 | + */ |
862 | + retbleed_select_mitigation(); |
863 | + /* |
864 | + * spectre_v2_user_select_mitigation() relies on the state set by |
865 | + * retbleed_select_mitigation(); specifically the STIBP selection is |
866 | + * forced for UNRET. |
867 | + */ |
868 | + spectre_v2_user_select_mitigation(); |
869 | ssb_select_mitigation(); |
870 | l1tf_select_mitigation(); |
871 | md_clear_select_mitigation(); |
872 | @@ -151,31 +185,17 @@ void __init check_bugs(void) |
873 | #endif |
874 | } |
875 | |
876 | +/* |
877 | + * NOTE: For VMX, this function is not called in the vmexit path. |
878 | + * It uses vmx_spec_ctrl_restore_host() instead. |
879 | + */ |
880 | void |
881 | x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) |
882 | { |
883 | - u64 msrval, guestval, hostval = x86_spec_ctrl_base; |
884 | + u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); |
885 | struct thread_info *ti = current_thread_info(); |
886 | |
887 | - /* Is MSR_SPEC_CTRL implemented ? */ |
888 | if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { |
889 | - /* |
890 | - * Restrict guest_spec_ctrl to supported values. Clear the |
891 | - * modifiable bits in the host base value and or the |
892 | - * modifiable bits from the guest value. |
893 | - */ |
894 | - guestval = hostval & ~x86_spec_ctrl_mask; |
895 | - guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; |
896 | - |
897 | - /* SSBD controlled in MSR_SPEC_CTRL */ |
898 | - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || |
899 | - static_cpu_has(X86_FEATURE_AMD_SSBD)) |
900 | - hostval |= ssbd_tif_to_spec_ctrl(ti->flags); |
901 | - |
902 | - /* Conditional STIBP enabled? */ |
903 | - if (static_branch_unlikely(&switch_to_cond_stibp)) |
904 | - hostval |= stibp_tif_to_spec_ctrl(ti->flags); |
905 | - |
906 | if (hostval != guestval) { |
907 | msrval = setguest ? guestval : hostval; |
908 | wrmsrl(MSR_IA32_SPEC_CTRL, msrval); |
909 | @@ -705,12 +725,103 @@ static int __init nospectre_v1_cmdline(char *str) |
910 | } |
911 | early_param("nospectre_v1", nospectre_v1_cmdline); |
912 | |
913 | -#undef pr_fmt |
914 | -#define pr_fmt(fmt) "Spectre V2 : " fmt |
915 | - |
916 | static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = |
917 | SPECTRE_V2_NONE; |
918 | |
919 | +#undef pr_fmt |
920 | +#define pr_fmt(fmt) "RETBleed: " fmt |
921 | + |
922 | +enum retbleed_mitigation { |
923 | + RETBLEED_MITIGATION_NONE, |
924 | + RETBLEED_MITIGATION_IBRS, |
925 | + RETBLEED_MITIGATION_EIBRS, |
926 | +}; |
927 | + |
928 | +enum retbleed_mitigation_cmd { |
929 | + RETBLEED_CMD_OFF, |
930 | + RETBLEED_CMD_AUTO, |
931 | +}; |
932 | + |
933 | +const char * const retbleed_strings[] = { |
934 | + [RETBLEED_MITIGATION_NONE] = "Vulnerable", |
935 | + [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", |
936 | + [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", |
937 | +}; |
938 | + |
939 | +static enum retbleed_mitigation retbleed_mitigation __ro_after_init = |
940 | + RETBLEED_MITIGATION_NONE; |
941 | +static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init = |
942 | + RETBLEED_CMD_AUTO; |
943 | + |
944 | +static int __init retbleed_parse_cmdline(char *str) |
945 | +{ |
946 | + if (!str) |
947 | + return -EINVAL; |
948 | + |
949 | + if (!strcmp(str, "off")) |
950 | + retbleed_cmd = RETBLEED_CMD_OFF; |
951 | + else if (!strcmp(str, "auto")) |
952 | + retbleed_cmd = RETBLEED_CMD_AUTO; |
953 | + else |
954 | + pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str); |
955 | + |
956 | + return 0; |
957 | +} |
958 | +early_param("retbleed", retbleed_parse_cmdline); |
959 | + |
960 | +#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" |
961 | +#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n" |
962 | +#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" |
963 | + |
964 | +static void __init retbleed_select_mitigation(void) |
965 | +{ |
966 | + if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) |
967 | + return; |
968 | + |
969 | + switch (retbleed_cmd) { |
970 | + case RETBLEED_CMD_OFF: |
971 | + return; |
972 | + |
973 | + case RETBLEED_CMD_AUTO: |
974 | + default: |
975 | + /* |
976 | + * The Intel mitigation (IBRS) was already selected in |
977 | + * spectre_v2_select_mitigation(). |
978 | + */ |
979 | + |
980 | + break; |
981 | + } |
982 | + |
983 | + switch (retbleed_mitigation) { |
984 | + default: |
985 | + break; |
986 | + } |
987 | + |
988 | + /* |
989 | + * Let IBRS trump all on Intel without affecting the effects of the |
990 | + * retbleed= cmdline option. |
991 | + */ |
992 | + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { |
993 | + switch (spectre_v2_enabled) { |
994 | + case SPECTRE_V2_IBRS: |
995 | + retbleed_mitigation = RETBLEED_MITIGATION_IBRS; |
996 | + break; |
997 | + case SPECTRE_V2_EIBRS: |
998 | + case SPECTRE_V2_EIBRS_RETPOLINE: |
999 | + case SPECTRE_V2_EIBRS_LFENCE: |
1000 | + retbleed_mitigation = RETBLEED_MITIGATION_EIBRS; |
1001 | + break; |
1002 | + default: |
1003 | + pr_err(RETBLEED_INTEL_MSG); |
1004 | + } |
1005 | + } |
1006 | + |
1007 | + pr_info("%s\n", retbleed_strings[retbleed_mitigation]); |
1008 | +} |
1009 | + |
1010 | +#undef pr_fmt |
1011 | +#define pr_fmt(fmt) "Spectre V2 : " fmt |
1012 | + |
1013 | static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = |
1014 | SPECTRE_V2_USER_NONE; |
1015 | static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = |
1016 | @@ -740,6 +851,7 @@ static inline const char *spectre_v2_module_string(void) { return ""; } |
1017 | #define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n" |
1018 | #define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n" |
1019 | #define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n" |
1020 | +#define SPECTRE_V2_IBRS_PERF_MSG "WARNING: IBRS mitigation selected on Enhanced IBRS CPU, this may cause unnecessary performance loss\n" |
1021 | |
1022 | #ifdef CONFIG_BPF_SYSCALL |
1023 | void unpriv_ebpf_notify(int new_state) |
1024 | @@ -781,6 +893,7 @@ enum spectre_v2_mitigation_cmd { |
1025 | SPECTRE_V2_CMD_EIBRS, |
1026 | SPECTRE_V2_CMD_EIBRS_RETPOLINE, |
1027 | SPECTRE_V2_CMD_EIBRS_LFENCE, |
1028 | + SPECTRE_V2_CMD_IBRS, |
1029 | }; |
1030 | |
1031 | enum spectre_v2_user_cmd { |
1032 | @@ -821,13 +934,15 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure) |
1033 | pr_info("spectre_v2_user=%s forced on command line.\n", reason); |
1034 | } |
1035 | |
1036 | +static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd; |
1037 | + |
1038 | static enum spectre_v2_user_cmd __init |
1039 | -spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) |
1040 | +spectre_v2_parse_user_cmdline(void) |
1041 | { |
1042 | char arg[20]; |
1043 | int ret, i; |
1044 | |
1045 | - switch (v2_cmd) { |
1046 | + switch (spectre_v2_cmd) { |
1047 | case SPECTRE_V2_CMD_NONE: |
1048 | return SPECTRE_V2_USER_CMD_NONE; |
1049 | case SPECTRE_V2_CMD_FORCE: |
1050 | @@ -853,15 +968,16 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) |
1051 | return SPECTRE_V2_USER_CMD_AUTO; |
1052 | } |
1053 | |
1054 | -static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) |
1055 | +static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) |
1056 | { |
1057 | - return (mode == SPECTRE_V2_EIBRS || |
1058 | - mode == SPECTRE_V2_EIBRS_RETPOLINE || |
1059 | - mode == SPECTRE_V2_EIBRS_LFENCE); |
1060 | + return mode == SPECTRE_V2_IBRS || |
1061 | + mode == SPECTRE_V2_EIBRS || |
1062 | + mode == SPECTRE_V2_EIBRS_RETPOLINE || |
1063 | + mode == SPECTRE_V2_EIBRS_LFENCE; |
1064 | } |
1065 | |
1066 | static void __init |
1067 | -spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) |
1068 | +spectre_v2_user_select_mitigation(void) |
1069 | { |
1070 | enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; |
1071 | bool smt_possible = IS_ENABLED(CONFIG_SMP); |
1072 | @@ -874,7 +990,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) |
1073 | cpu_smt_control == CPU_SMT_NOT_SUPPORTED) |
1074 | smt_possible = false; |
1075 | |
1076 | - cmd = spectre_v2_parse_user_cmdline(v2_cmd); |
1077 | + cmd = spectre_v2_parse_user_cmdline(); |
1078 | switch (cmd) { |
1079 | case SPECTRE_V2_USER_CMD_NONE: |
1080 | goto set_mode; |
1081 | @@ -922,12 +1038,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) |
1082 | } |
1083 | |
1084 | /* |
1085 | - * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not |
1086 | - * required. |
1087 | + * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible, |
1088 | + * STIBP is not required. |
1089 | */ |
1090 | if (!boot_cpu_has(X86_FEATURE_STIBP) || |
1091 | !smt_possible || |
1092 | - spectre_v2_in_eibrs_mode(spectre_v2_enabled)) |
1093 | + spectre_v2_in_ibrs_mode(spectre_v2_enabled)) |
1094 | return; |
1095 | |
1096 | /* |
1097 | @@ -952,6 +1068,7 @@ static const char * const spectre_v2_strings[] = { |
1098 | [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", |
1099 | [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", |
1100 | [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", |
1101 | + [SPECTRE_V2_IBRS] = "Mitigation: IBRS", |
1102 | }; |
1103 | |
1104 | static const struct { |
1105 | @@ -969,6 +1086,7 @@ static const struct { |
1106 | { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false }, |
1107 | { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false }, |
1108 | { "auto", SPECTRE_V2_CMD_AUTO, false }, |
1109 | + { "ibrs", SPECTRE_V2_CMD_IBRS, false }, |
1110 | }; |
1111 | |
1112 | static void __init spec_v2_print_cond(const char *reason, bool secure) |
1113 | @@ -1031,6 +1149,24 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) |
1114 | return SPECTRE_V2_CMD_AUTO; |
1115 | } |
1116 | |
1117 | + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { |
1118 | + pr_err("%s selected but not Intel CPU. Switching to AUTO select\n", |
1119 | + mitigation_options[i].option); |
1120 | + return SPECTRE_V2_CMD_AUTO; |
1121 | + } |
1122 | + |
1123 | + if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) { |
1124 | + pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n", |
1125 | + mitigation_options[i].option); |
1126 | + return SPECTRE_V2_CMD_AUTO; |
1127 | + } |
1128 | + |
1129 | + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) { |
1130 | + pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n", |
1131 | + mitigation_options[i].option); |
1132 | + return SPECTRE_V2_CMD_AUTO; |
1133 | + } |
1134 | + |
1135 | spec_v2_print_cond(mitigation_options[i].option, |
1136 | mitigation_options[i].secure); |
1137 | return cmd; |
1138 | @@ -1046,6 +1182,22 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) |
1139 | return SPECTRE_V2_RETPOLINE; |
1140 | } |
1141 | |
1142 | +/* Disable in-kernel use of non-RSB RET predictors */ |
1143 | +static void __init spec_ctrl_disable_kernel_rrsba(void) |
1144 | +{ |
1145 | + u64 ia32_cap; |
1146 | + |
1147 | + if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) |
1148 | + return; |
1149 | + |
1150 | + ia32_cap = x86_read_arch_cap_msr(); |
1151 | + |
1152 | + if (ia32_cap & ARCH_CAP_RRSBA) { |
1153 | + x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; |
1154 | + write_spec_ctrl_current(x86_spec_ctrl_base, true); |
1155 | + } |
1156 | +} |
1157 | + |
1158 | static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) |
1159 | { |
1160 | /* |
1161 | @@ -1070,10 +1222,6 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_ |
1162 | */ |
1163 | switch (mode) { |
1164 | case SPECTRE_V2_NONE: |
1165 | - /* These modes already fill RSB at vmexit */ |
1166 | - case SPECTRE_V2_LFENCE: |
1167 | - case SPECTRE_V2_RETPOLINE: |
1168 | - case SPECTRE_V2_EIBRS_RETPOLINE: |
1169 | return; |
1170 | |
1171 | case SPECTRE_V2_EIBRS_LFENCE: |
1172 | @@ -1083,6 +1231,14 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_ |
1173 | pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n"); |
1174 | } |
1175 | return; |
1176 | + |
1177 | + case SPECTRE_V2_EIBRS_RETPOLINE: |
1178 | + case SPECTRE_V2_RETPOLINE: |
1179 | + case SPECTRE_V2_LFENCE: |
1180 | + case SPECTRE_V2_IBRS: |
1181 | + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); |
1182 | + pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n"); |
1183 | + return; |
1184 | } |
1185 | |
1186 | pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit"); |
1187 | @@ -1113,6 +1269,14 @@ static void __init spectre_v2_select_mitigation(void) |
1188 | break; |
1189 | } |
1190 | |
1191 | + if (boot_cpu_has_bug(X86_BUG_RETBLEED) && |
1192 | + retbleed_cmd != RETBLEED_CMD_OFF && |
1193 | + boot_cpu_has(X86_FEATURE_IBRS) && |
1194 | + boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { |
1195 | + mode = SPECTRE_V2_IBRS; |
1196 | + break; |
1197 | + } |
1198 | + |
1199 | mode = spectre_v2_select_retpoline(); |
1200 | break; |
1201 | |
1202 | @@ -1129,6 +1293,10 @@ static void __init spectre_v2_select_mitigation(void) |
1203 | mode = spectre_v2_select_retpoline(); |
1204 | break; |
1205 | |
1206 | + case SPECTRE_V2_CMD_IBRS: |
1207 | + mode = SPECTRE_V2_IBRS; |
1208 | + break; |
1209 | + |
1210 | case SPECTRE_V2_CMD_EIBRS: |
1211 | mode = SPECTRE_V2_EIBRS; |
1212 | break; |
1213 | @@ -1145,10 +1313,9 @@ static void __init spectre_v2_select_mitigation(void) |
1214 | if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) |
1215 | pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); |
1216 | |
1217 | - if (spectre_v2_in_eibrs_mode(mode)) { |
1218 | - /* Force it so VMEXIT will restore correctly */ |
1219 | + if (spectre_v2_in_ibrs_mode(mode)) { |
1220 | x86_spec_ctrl_base |= SPEC_CTRL_IBRS; |
1221 | - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); |
1222 | + write_spec_ctrl_current(x86_spec_ctrl_base, true); |
1223 | } |
1224 | |
1225 | switch (mode) { |
1226 | @@ -1156,6 +1323,12 @@ static void __init spectre_v2_select_mitigation(void) |
1227 | case SPECTRE_V2_EIBRS: |
1228 | break; |
1229 | |
1230 | + case SPECTRE_V2_IBRS: |
1231 | + setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS); |
1232 | + if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) |
1233 | + pr_warn(SPECTRE_V2_IBRS_PERF_MSG); |
1234 | + break; |
1235 | + |
1236 | case SPECTRE_V2_LFENCE: |
1237 | case SPECTRE_V2_EIBRS_LFENCE: |
1238 | setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE); |
1239 | @@ -1167,16 +1340,56 @@ static void __init spectre_v2_select_mitigation(void) |
1240 | break; |
1241 | } |
1242 | |
1243 | + /* |
1244 | + * Disable alternate RSB predictions in kernel when indirect CALLs and |
1245 | + * JMPs gets protection against BHI and Intramode-BTI, but RET |
1246 | + * prediction from a non-RSB predictor is still a risk. |
1247 | + */ |
1248 | + if (mode == SPECTRE_V2_EIBRS_LFENCE || |
1249 | + mode == SPECTRE_V2_EIBRS_RETPOLINE || |
1250 | + mode == SPECTRE_V2_RETPOLINE) |
1251 | + spec_ctrl_disable_kernel_rrsba(); |
1252 | + |
1253 | spectre_v2_enabled = mode; |
1254 | pr_info("%s\n", spectre_v2_strings[mode]); |
1255 | |
1256 | /* |
1257 | - * If spectre v2 protection has been enabled, unconditionally fill |
1258 | - * RSB during a context switch; this protects against two independent |
1259 | - * issues: |
1260 | + * If Spectre v2 protection has been enabled, fill the RSB during a |
1261 | + * context switch. In general there are two types of RSB attacks |
1262 | + * across context switches, for which the CALLs/RETs may be unbalanced. |
1263 | + * |
1264 | + * 1) RSB underflow |
1265 | + * |
1266 | + * Some Intel parts have "bottomless RSB". When the RSB is empty, |
1267 | + * speculated return targets may come from the branch predictor, |
1268 | + * which could have a user-poisoned BTB or BHB entry. |
1269 | + * |
1270 | + * AMD has it even worse: *all* returns are speculated from the BTB, |
1271 | + * regardless of the state of the RSB. |
1272 | + * |
1273 | + * When IBRS or eIBRS is enabled, the "user -> kernel" attack |
1274 | + * scenario is mitigated by the IBRS branch prediction isolation |
1275 | + * properties, so the RSB buffer filling wouldn't be necessary to |
1276 | + * protect against this type of attack. |
1277 | + * |
1278 | + * The "user -> user" attack scenario is mitigated by RSB filling. |
1279 | * |
1280 | - * - RSB underflow (and switch to BTB) on Skylake+ |
1281 | - * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs |
1282 | + * 2) Poisoned RSB entry |
1283 | + * |
1284 | + * If the 'next' in-kernel return stack is shorter than 'prev', |
1285 | + * 'next' could be tricked into speculating with a user-poisoned RSB |
1286 | + * entry. |
1287 | + * |
1288 | + * The "user -> kernel" attack scenario is mitigated by SMEP and |
1289 | + * eIBRS. |
1290 | + * |
1291 | + * The "user -> user" scenario, also known as SpectreBHB, requires |
1292 | + * RSB clearing. |
1293 | + * |
1294 | + * So to mitigate all cases, unconditionally fill RSB on context |
1295 | + * switches. |
1296 | + * |
1297 | + * FIXME: Is this pointless for retbleed-affected AMD? |
1298 | */ |
1299 | setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); |
1300 | pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); |
1301 | @@ -1184,28 +1397,29 @@ static void __init spectre_v2_select_mitigation(void) |
1302 | spectre_v2_determine_rsb_fill_type_at_vmexit(mode); |
1303 | |
1304 | /* |
1305 | - * Retpoline means the kernel is safe because it has no indirect |
1306 | - * branches. Enhanced IBRS protects firmware too, so, enable restricted |
1307 | - * speculation around firmware calls only when Enhanced IBRS isn't |
1308 | - * supported. |
1309 | + * Retpoline protects the kernel, but doesn't protect firmware. IBRS |
1310 | + * and Enhanced IBRS protect firmware too, so enable IBRS around |
1311 | + * firmware calls only when IBRS / Enhanced IBRS aren't otherwise |
1312 | + * enabled. |
1313 | * |
1314 | * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because |
1315 | * the user might select retpoline on the kernel command line and if |
1316 | * the CPU supports Enhanced IBRS, kernel might un-intentionally not |
1317 | * enable IBRS around firmware calls. |
1318 | */ |
1319 | - if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) { |
1320 | + if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) { |
1321 | setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); |
1322 | pr_info("Enabling Restricted Speculation for firmware calls\n"); |
1323 | } |
1324 | |
1325 | /* Set up IBPB and STIBP depending on the general spectre V2 command */ |
1326 | - spectre_v2_user_select_mitigation(cmd); |
1327 | + spectre_v2_cmd = cmd; |
1328 | } |
1329 | |
1330 | static void update_stibp_msr(void * __unused) |
1331 | { |
1332 | - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); |
1333 | + u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); |
1334 | + write_spec_ctrl_current(val, true); |
1335 | } |
1336 | |
1337 | /* Update x86_spec_ctrl_base in case SMT state changed. */ |
1338 | @@ -1421,16 +1635,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) |
1339 | break; |
1340 | } |
1341 | |
1342 | - /* |
1343 | - * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper |
1344 | - * bit in the mask to allow guests to use the mitigation even in the |
1345 | - * case where the host does not enable it. |
1346 | - */ |
1347 | - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || |
1348 | - static_cpu_has(X86_FEATURE_AMD_SSBD)) { |
1349 | - x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; |
1350 | - } |
1351 | - |
1352 | /* |
1353 | * We have three CPU feature flags that are in play here: |
1354 | * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. |
1355 | @@ -1448,7 +1652,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) |
1356 | x86_amd_ssb_disable(); |
1357 | } else { |
1358 | x86_spec_ctrl_base |= SPEC_CTRL_SSBD; |
1359 | - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); |
1360 | + write_spec_ctrl_current(x86_spec_ctrl_base, true); |
1361 | } |
1362 | } |
1363 | |
1364 | @@ -1665,7 +1869,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) |
1365 | void x86_spec_ctrl_setup_ap(void) |
1366 | { |
1367 | if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) |
1368 | - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); |
1369 | + write_spec_ctrl_current(x86_spec_ctrl_base, true); |
1370 | |
1371 | if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) |
1372 | x86_amd_ssb_disable(); |
1373 | @@ -1900,7 +2104,7 @@ static ssize_t mmio_stale_data_show_state(char *buf) |
1374 | |
1375 | static char *stibp_state(void) |
1376 | { |
1377 | - if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) |
1378 | + if (spectre_v2_in_ibrs_mode(spectre_v2_enabled)) |
1379 | return ""; |
1380 | |
1381 | switch (spectre_v2_user_stibp) { |
1382 | @@ -1934,7 +2138,7 @@ static char *pbrsb_eibrs_state(void) |
1383 | { |
1384 | if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { |
1385 | if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) || |
1386 | - boot_cpu_has(X86_FEATURE_RETPOLINE)) |
1387 | + boot_cpu_has(X86_FEATURE_RSB_VMEXIT)) |
1388 | return ", PBRSB-eIBRS: SW sequence"; |
1389 | else |
1390 | return ", PBRSB-eIBRS: Vulnerable"; |
1391 | @@ -1970,6 +2174,11 @@ static ssize_t srbds_show_state(char *buf) |
1392 | return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); |
1393 | } |
1394 | |
1395 | +static ssize_t retbleed_show_state(char *buf) |
1396 | +{ |
1397 | + return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); |
1398 | +} |
1399 | + |
1400 | static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, |
1401 | char *buf, unsigned int bug) |
1402 | { |
1403 | @@ -2016,6 +2225,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr |
1404 | case X86_BUG_MMIO_UNKNOWN: |
1405 | return mmio_stale_data_show_state(buf); |
1406 | |
1407 | + case X86_BUG_RETBLEED: |
1408 | + return retbleed_show_state(buf); |
1409 | + |
1410 | default: |
1411 | break; |
1412 | } |
1413 | @@ -2075,4 +2287,9 @@ ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *at |
1414 | else |
1415 | return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); |
1416 | } |
1417 | + |
1418 | +ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) |
1419 | +{ |
1420 | + return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED); |
1421 | +} |
1422 | #endif |
1423 | diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c |
1424 | index 59413e741ecf1..5e1e32f1086ba 100644 |
1425 | --- a/arch/x86/kernel/cpu/common.c |
1426 | +++ b/arch/x86/kernel/cpu/common.c |
1427 | @@ -1102,48 +1102,60 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { |
1428 | {} |
1429 | }; |
1430 | |
1431 | +#define VULNBL(vendor, family, model, blacklist) \ |
1432 | + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist) |
1433 | + |
1434 | #define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \ |
1435 | X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \ |
1436 | INTEL_FAM6_##model, steppings, \ |
1437 | X86_FEATURE_ANY, issues) |
1438 | |
1439 | +#define VULNBL_AMD(family, blacklist) \ |
1440 | + VULNBL(AMD, family, X86_MODEL_ANY, blacklist) |
1441 | + |
1442 | +#define VULNBL_HYGON(family, blacklist) \ |
1443 | + VULNBL(HYGON, family, X86_MODEL_ANY, blacklist) |
1444 | + |
1445 | #define SRBDS BIT(0) |
1446 | /* CPU is affected by X86_BUG_MMIO_STALE_DATA */ |
1447 | #define MMIO BIT(1) |
1448 | /* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */ |
1449 | #define MMIO_SBDS BIT(2) |
1450 | +/* CPU is affected by RETbleed, speculating where you would not expect it */ |
1451 | +#define RETBLEED BIT(3) |
1452 | |
1453 | static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { |
1454 | VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), |
1455 | VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), |
1456 | VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), |
1457 | VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), |
1458 | - VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO), |
1459 | - VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO), |
1460 | + VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO), |
1461 | + VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO), |
1462 | VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), |
1463 | VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), |
1464 | VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), |
1465 | - VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), |
1466 | - VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), |
1467 | - VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) | |
1468 | - BIT(7) | BIT(0xB), MMIO), |
1469 | - VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), |
1470 | - VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), |
1471 | - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO), |
1472 | - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS), |
1473 | - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO), |
1474 | - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS), |
1475 | - VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS), |
1476 | - VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO), |
1477 | - VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO), |
1478 | - VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS), |
1479 | - VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), |
1480 | - VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO), |
1481 | - VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), |
1482 | - VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO), |
1483 | - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), |
1484 | + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), |
1485 | + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED), |
1486 | + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), |
1487 | + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), |
1488 | + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), |
1489 | + VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), |
1490 | + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), |
1491 | + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO), |
1492 | + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO), |
1493 | + VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), |
1494 | + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), |
1495 | + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), |
1496 | + VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), |
1497 | + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED), |
1498 | + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), |
1499 | VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), |
1500 | - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), |
1501 | + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), |
1502 | + |
1503 | + VULNBL_AMD(0x15, RETBLEED), |
1504 | + VULNBL_AMD(0x16, RETBLEED), |
1505 | + VULNBL_AMD(0x17, RETBLEED), |
1506 | + VULNBL_HYGON(0x18, RETBLEED), |
1507 | {} |
1508 | }; |
1509 | |
1510 | @@ -1251,6 +1263,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) |
1511 | setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN); |
1512 | } |
1513 | |
1514 | + if (!cpu_has(c, X86_FEATURE_BTC_NO)) { |
1515 | + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) |
1516 | + setup_force_cpu_bug(X86_BUG_RETBLEED); |
1517 | + } |
1518 | + |
1519 | if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) && |
1520 | !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && |
1521 | !(ia32_cap & ARCH_CAP_PBRSB_NO)) |
1522 | diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c |
1523 | index 2f163e6646b6f..ad6776081e60d 100644 |
1524 | --- a/arch/x86/kernel/cpu/match.c |
1525 | +++ b/arch/x86/kernel/cpu/match.c |
1526 | @@ -16,12 +16,17 @@ |
1527 | * respective wildcard entries. |
1528 | * |
1529 | * A typical table entry would be to match a specific CPU |
1530 | - * { X86_VENDOR_INTEL, 6, 0x12 } |
1531 | - * or to match a specific CPU feature |
1532 | - * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) } |
1533 | + * |
1534 | + * X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_BROADWELL, |
1535 | + * X86_FEATURE_ANY, NULL); |
1536 | * |
1537 | * Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY, |
1538 | - * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor) |
1539 | + * %X86_MODEL_ANY, %X86_FEATURE_ANY (except for vendor) |
1540 | + * |
1541 | + * asm/cpu_device_id.h contains a set of useful macros which are shortcuts |
1542 | + * for various common selections. The above can be shortened to: |
1543 | + * |
1544 | + * X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, NULL); |
1545 | * |
1546 | * Arrays used to match for this should also be declared using |
1547 | * MODULE_DEVICE_TABLE(x86cpu, ...) |
1548 | diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c |
1549 | index 53004dbd55c47..a03e309a0ac5f 100644 |
1550 | --- a/arch/x86/kernel/cpu/scattered.c |
1551 | +++ b/arch/x86/kernel/cpu/scattered.c |
1552 | @@ -26,6 +26,7 @@ struct cpuid_bit { |
1553 | static const struct cpuid_bit cpuid_bits[] = { |
1554 | { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, |
1555 | { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, |
1556 | + { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, |
1557 | { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, |
1558 | { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, |
1559 | { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, |
1560 | diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c |
1561 | index 068715a52ac10..87cfd2ee9ca0d 100644 |
1562 | --- a/arch/x86/kernel/process.c |
1563 | +++ b/arch/x86/kernel/process.c |
1564 | @@ -449,7 +449,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, |
1565 | } |
1566 | |
1567 | if (updmsr) |
1568 | - wrmsrl(MSR_IA32_SPEC_CTRL, msr); |
1569 | + write_spec_ctrl_current(msr, false); |
1570 | } |
1571 | |
1572 | static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) |
1573 | diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c |
1574 | index 1efcc7d4bc88e..3db407e3c4166 100644 |
1575 | --- a/arch/x86/kvm/svm.c |
1576 | +++ b/arch/x86/kvm/svm.c |
1577 | @@ -47,6 +47,7 @@ |
1578 | #include <asm/kvm_para.h> |
1579 | #include <asm/irq_remapping.h> |
1580 | #include <asm/spec-ctrl.h> |
1581 | +#include <asm/cpu_device_id.h> |
1582 | |
1583 | #include <asm/virtext.h> |
1584 | #include "trace.h" |
1585 | diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c |
1586 | index 34ee4835b0177..a7b62a00913e5 100644 |
1587 | --- a/arch/x86/kvm/vmx/nested.c |
1588 | +++ b/arch/x86/kvm/vmx/nested.c |
1589 | @@ -11,6 +11,7 @@ |
1590 | #include "mmu.h" |
1591 | #include "nested.h" |
1592 | #include "trace.h" |
1593 | +#include "vmx.h" |
1594 | #include "x86.h" |
1595 | |
1596 | static bool __read_mostly enable_shadow_vmcs = 1; |
1597 | @@ -2863,35 +2864,8 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) |
1598 | vmx->loaded_vmcs->host_state.cr4 = cr4; |
1599 | } |
1600 | |
1601 | - asm( |
1602 | - "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */ |
1603 | - "cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t" |
1604 | - "je 1f \n\t" |
1605 | - __ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t" |
1606 | - "mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t" |
1607 | - "1: \n\t" |
1608 | - "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */ |
1609 | - |
1610 | - /* Check if vmlaunch or vmresume is needed */ |
1611 | - "cmpb $0, %c[launched](%[loaded_vmcs])\n\t" |
1612 | - |
1613 | - /* |
1614 | - * VMLAUNCH and VMRESUME clear RFLAGS.{CF,ZF} on VM-Exit, set |
1615 | - * RFLAGS.CF on VM-Fail Invalid and set RFLAGS.ZF on VM-Fail |
1616 | - * Valid. vmx_vmenter() directly "returns" RFLAGS, and so the |
1617 | - * results of VM-Enter is captured via CC_{SET,OUT} to vm_fail. |
1618 | - */ |
1619 | - "call vmx_vmenter\n\t" |
1620 | - |
1621 | - CC_SET(be) |
1622 | - : ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail) |
1623 | - : [HOST_RSP]"r"((unsigned long)HOST_RSP), |
1624 | - [loaded_vmcs]"r"(vmx->loaded_vmcs), |
1625 | - [launched]"i"(offsetof(struct loaded_vmcs, launched)), |
1626 | - [host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)), |
1627 | - [wordsize]"i"(sizeof(ulong)) |
1628 | - : "memory" |
1629 | - ); |
1630 | + vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, |
1631 | + __vmx_vcpu_run_flags(vmx)); |
1632 | |
1633 | if (vmx->msr_autoload.host.nr) |
1634 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); |
1635 | diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h |
1636 | new file mode 100644 |
1637 | index 0000000000000..edc3f16cc1896 |
1638 | --- /dev/null |
1639 | +++ b/arch/x86/kvm/vmx/run_flags.h |
1640 | @@ -0,0 +1,8 @@ |
1641 | +/* SPDX-License-Identifier: GPL-2.0 */ |
1642 | +#ifndef __KVM_X86_VMX_RUN_FLAGS_H |
1643 | +#define __KVM_X86_VMX_RUN_FLAGS_H |
1644 | + |
1645 | +#define VMX_RUN_VMRESUME (1 << 0) |
1646 | +#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) |
1647 | + |
1648 | +#endif /* __KVM_X86_VMX_RUN_FLAGS_H */ |
1649 | diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S |
1650 | index 946d9205c3b6d..2850670c38bb0 100644 |
1651 | --- a/arch/x86/kvm/vmx/vmenter.S |
1652 | +++ b/arch/x86/kvm/vmx/vmenter.S |
1653 | @@ -4,6 +4,7 @@ |
1654 | #include <asm/bitsperlong.h> |
1655 | #include <asm/kvm_vcpu_regs.h> |
1656 | #include <asm/nospec-branch.h> |
1657 | +#include "run_flags.h" |
1658 | |
1659 | #define WORD_SIZE (BITS_PER_LONG / 8) |
1660 | |
1661 | @@ -29,78 +30,12 @@ |
1662 | |
1663 | .text |
1664 | |
1665 | -/** |
1666 | - * vmx_vmenter - VM-Enter the current loaded VMCS |
1667 | - * |
1668 | - * %RFLAGS.ZF: !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME |
1669 | - * |
1670 | - * Returns: |
1671 | - * %RFLAGS.CF is set on VM-Fail Invalid |
1672 | - * %RFLAGS.ZF is set on VM-Fail Valid |
1673 | - * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit |
1674 | - * |
1675 | - * Note that VMRESUME/VMLAUNCH fall-through and return directly if |
1676 | - * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump |
1677 | - * to vmx_vmexit. |
1678 | - */ |
1679 | -ENTRY(vmx_vmenter) |
1680 | - /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */ |
1681 | - je 2f |
1682 | - |
1683 | -1: vmresume |
1684 | - ret |
1685 | - |
1686 | -2: vmlaunch |
1687 | - ret |
1688 | - |
1689 | -3: cmpb $0, kvm_rebooting |
1690 | - je 4f |
1691 | - ret |
1692 | -4: ud2 |
1693 | - |
1694 | - .pushsection .fixup, "ax" |
1695 | -5: jmp 3b |
1696 | - .popsection |
1697 | - |
1698 | - _ASM_EXTABLE(1b, 5b) |
1699 | - _ASM_EXTABLE(2b, 5b) |
1700 | - |
1701 | -ENDPROC(vmx_vmenter) |
1702 | - |
1703 | -/** |
1704 | - * vmx_vmexit - Handle a VMX VM-Exit |
1705 | - * |
1706 | - * Returns: |
1707 | - * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit |
1708 | - * |
1709 | - * This is vmx_vmenter's partner in crime. On a VM-Exit, control will jump |
1710 | - * here after hardware loads the host's state, i.e. this is the destination |
1711 | - * referred to by VMCS.HOST_RIP. |
1712 | - */ |
1713 | -ENTRY(vmx_vmexit) |
1714 | -#ifdef CONFIG_RETPOLINE |
1715 | - ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE |
1716 | - /* Preserve guest's RAX, it's used to stuff the RSB. */ |
1717 | - push %_ASM_AX |
1718 | - |
1719 | - /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ |
1720 | - FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE |
1721 | - |
1722 | - /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */ |
1723 | - or $1, %_ASM_AX |
1724 | - |
1725 | - pop %_ASM_AX |
1726 | -.Lvmexit_skip_rsb: |
1727 | -#endif |
1728 | - ISSUE_UNBALANCED_RET_GUARD X86_FEATURE_RSB_VMEXIT_LITE |
1729 | - ret |
1730 | -ENDPROC(vmx_vmexit) |
1731 | - |
1732 | /** |
1733 | * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode |
1734 | - * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) |
1735 | + * @vmx: struct vcpu_vmx * |
1736 | * @regs: unsigned long * (to guest registers) |
1737 | - * @launched: %true if the VMCS has been launched |
1738 | + * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH |
1739 | + * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl |
1740 | * |
1741 | * Returns: |
1742 | * 0 on VM-Exit, 1 on VM-Fail |
1743 | @@ -119,24 +54,29 @@ ENTRY(__vmx_vcpu_run) |
1744 | #endif |
1745 | push %_ASM_BX |
1746 | |
1747 | + /* Save @vmx for SPEC_CTRL handling */ |
1748 | + push %_ASM_ARG1 |
1749 | + |
1750 | + /* Save @flags for SPEC_CTRL handling */ |
1751 | + push %_ASM_ARG3 |
1752 | + |
1753 | /* |
1754 | * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and |
1755 | * @regs is needed after VM-Exit to save the guest's register values. |
1756 | */ |
1757 | push %_ASM_ARG2 |
1758 | |
1759 | - /* Copy @launched to BL, _ASM_ARG3 is volatile. */ |
1760 | + /* Copy @flags to BL, _ASM_ARG3 is volatile. */ |
1761 | mov %_ASM_ARG3B, %bl |
1762 | |
1763 | - /* Adjust RSP to account for the CALL to vmx_vmenter(). */ |
1764 | - lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2 |
1765 | + lea (%_ASM_SP), %_ASM_ARG2 |
1766 | call vmx_update_host_rsp |
1767 | |
1768 | /* Load @regs to RAX. */ |
1769 | mov (%_ASM_SP), %_ASM_AX |
1770 | |
1771 | /* Check if vmlaunch or vmresume is needed */ |
1772 | - cmpb $0, %bl |
1773 | + testb $VMX_RUN_VMRESUME, %bl |
1774 | |
1775 | /* Load guest registers. Don't clobber flags. */ |
1776 | mov VCPU_RBX(%_ASM_AX), %_ASM_BX |
1777 | @@ -158,11 +98,25 @@ ENTRY(__vmx_vcpu_run) |
1778 | /* Load guest RAX. This kills the @regs pointer! */ |
1779 | mov VCPU_RAX(%_ASM_AX), %_ASM_AX |
1780 | |
1781 | - /* Enter guest mode */ |
1782 | - call vmx_vmenter |
1783 | + /* Check EFLAGS.ZF from 'testb' above */ |
1784 | + jz .Lvmlaunch |
1785 | |
1786 | - /* Jump on VM-Fail. */ |
1787 | - jbe 2f |
1788 | +/* |
1789 | + * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at |
1790 | + * the 'vmx_vmexit' label below. |
1791 | + */ |
1792 | +.Lvmresume: |
1793 | + vmresume |
1794 | + jmp .Lvmfail |
1795 | + |
1796 | +.Lvmlaunch: |
1797 | + vmlaunch |
1798 | + jmp .Lvmfail |
1799 | + |
1800 | + _ASM_EXTABLE(.Lvmresume, .Lfixup) |
1801 | + _ASM_EXTABLE(.Lvmlaunch, .Lfixup) |
1802 | + |
1803 | +SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) |
1804 | |
1805 | /* Temporarily save guest's RAX. */ |
1806 | push %_ASM_AX |
1807 | @@ -189,19 +143,21 @@ ENTRY(__vmx_vcpu_run) |
1808 | mov %r15, VCPU_R15(%_ASM_AX) |
1809 | #endif |
1810 | |
1811 | - /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ |
1812 | - xor %eax, %eax |
1813 | + /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */ |
1814 | + xor %ebx, %ebx |
1815 | |
1816 | +.Lclear_regs: |
1817 | /* |
1818 | - * Clear all general purpose registers except RSP and RAX to prevent |
1819 | + * Clear all general purpose registers except RSP and RBX to prevent |
1820 | * speculative use of the guest's values, even those that are reloaded |
1821 | * via the stack. In theory, an L1 cache miss when restoring registers |
1822 | * could lead to speculative execution with the guest's values. |
1823 | * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially |
1824 | * free. RSP and RAX are exempt as RSP is restored by hardware during |
1825 | - * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. |
1826 | + * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return |
1827 | + * value. |
1828 | */ |
1829 | -1: xor %ebx, %ebx |
1830 | + xor %eax, %eax |
1831 | xor %ecx, %ecx |
1832 | xor %edx, %edx |
1833 | xor %esi, %esi |
1834 | @@ -220,8 +176,32 @@ ENTRY(__vmx_vcpu_run) |
1835 | |
1836 | /* "POP" @regs. */ |
1837 | add $WORD_SIZE, %_ASM_SP |
1838 | - pop %_ASM_BX |
1839 | |
1840 | + /* |
1841 | + * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before |
1842 | + * the first unbalanced RET after vmexit! |
1843 | + * |
1844 | + * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB |
1845 | + * entries and (in some cases) RSB underflow. |
1846 | + * |
1847 | + * eIBRS has its own protection against poisoned RSB, so it doesn't |
1848 | + * need the RSB filling sequence. But it does need to be enabled, and a |
1849 | + * single call to retire, before the first unbalanced RET. |
1850 | + */ |
1851 | + |
1852 | + FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\ |
1853 | + X86_FEATURE_RSB_VMEXIT_LITE |
1854 | + |
1855 | + |
1856 | + pop %_ASM_ARG2 /* @flags */ |
1857 | + pop %_ASM_ARG1 /* @vmx */ |
1858 | + |
1859 | + call vmx_spec_ctrl_restore_host |
1860 | + |
1861 | + /* Put return value in AX */ |
1862 | + mov %_ASM_BX, %_ASM_AX |
1863 | + |
1864 | + pop %_ASM_BX |
1865 | #ifdef CONFIG_X86_64 |
1866 | pop %r12 |
1867 | pop %r13 |
1868 | @@ -234,11 +214,20 @@ ENTRY(__vmx_vcpu_run) |
1869 | pop %_ASM_BP |
1870 | ret |
1871 | |
1872 | - /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */ |
1873 | -2: mov $1, %eax |
1874 | - jmp 1b |
1875 | +.Lfixup: |
1876 | + cmpb $0, kvm_rebooting |
1877 | + jne .Lvmfail |
1878 | + ud2 |
1879 | +.Lvmfail: |
1880 | + /* VM-Fail: set return value to 1 */ |
1881 | + mov $1, %_ASM_BX |
1882 | + jmp .Lclear_regs |
1883 | + |
1884 | ENDPROC(__vmx_vcpu_run) |
1885 | |
1886 | + |
1887 | +.section .text, "ax" |
1888 | + |
1889 | /** |
1890 | * vmread_error_trampoline - Trampoline from inline asm to vmread_error() |
1891 | * @field: VMCS field encoding that failed |
1892 | diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c |
1893 | index 4bd1bf6214eea..d522c9de41df9 100644 |
1894 | --- a/arch/x86/kvm/vmx/vmx.c |
1895 | +++ b/arch/x86/kvm/vmx/vmx.c |
1896 | @@ -31,6 +31,7 @@ |
1897 | #include <asm/apic.h> |
1898 | #include <asm/asm.h> |
1899 | #include <asm/cpu.h> |
1900 | +#include <asm/cpu_device_id.h> |
1901 | #include <asm/debugreg.h> |
1902 | #include <asm/desc.h> |
1903 | #include <asm/fpu/internal.h> |
1904 | @@ -358,9 +359,9 @@ static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx) |
1905 | if (!vmx->disable_fb_clear) |
1906 | return; |
1907 | |
1908 | - rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr); |
1909 | + msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL); |
1910 | msr |= FB_CLEAR_DIS; |
1911 | - wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); |
1912 | + native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); |
1913 | /* Cache the MSR value to avoid reading it later */ |
1914 | vmx->msr_ia32_mcu_opt_ctrl = msr; |
1915 | } |
1916 | @@ -371,7 +372,7 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) |
1917 | return; |
1918 | |
1919 | vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS; |
1920 | - wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); |
1921 | + native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); |
1922 | } |
1923 | |
1924 | static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) |
1925 | @@ -862,6 +863,24 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) |
1926 | return true; |
1927 | } |
1928 | |
1929 | +unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) |
1930 | +{ |
1931 | + unsigned int flags = 0; |
1932 | + |
1933 | + if (vmx->loaded_vmcs->launched) |
1934 | + flags |= VMX_RUN_VMRESUME; |
1935 | + |
1936 | + /* |
1937 | + * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free |
1938 | + * to change it directly without causing a vmexit. In that case read |
1939 | + * it after vmexit and store it in vmx->spec_ctrl. |
1940 | + */ |
1941 | + if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) |
1942 | + flags |= VMX_RUN_SAVE_SPEC_CTRL; |
1943 | + |
1944 | + return flags; |
1945 | +} |
1946 | + |
1947 | static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, |
1948 | unsigned long entry, unsigned long exit) |
1949 | { |
1950 | @@ -6539,7 +6558,30 @@ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) |
1951 | } |
1952 | } |
1953 | |
1954 | -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); |
1955 | +void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, |
1956 | + unsigned int flags) |
1957 | +{ |
1958 | + u64 hostval = this_cpu_read(x86_spec_ctrl_current); |
1959 | + |
1960 | + if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) |
1961 | + return; |
1962 | + |
1963 | + if (flags & VMX_RUN_SAVE_SPEC_CTRL) |
1964 | + vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL); |
1965 | + |
1966 | + /* |
1967 | + * If the guest/host SPEC_CTRL values differ, restore the host value. |
1968 | + * |
1969 | + * For legacy IBRS, the IBRS bit always needs to be written after |
1970 | + * transitioning from a less privileged predictor mode, regardless of |
1971 | + * whether the guest/host values differ. |
1972 | + */ |
1973 | + if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) || |
1974 | + vmx->spec_ctrl != hostval) |
1975 | + native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); |
1976 | + |
1977 | + barrier_nospec(); |
1978 | +} |
1979 | |
1980 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu) |
1981 | { |
1982 | @@ -6628,32 +6670,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) |
1983 | write_cr2(vcpu->arch.cr2); |
1984 | |
1985 | vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, |
1986 | - vmx->loaded_vmcs->launched); |
1987 | + __vmx_vcpu_run_flags(vmx)); |
1988 | |
1989 | vcpu->arch.cr2 = read_cr2(); |
1990 | |
1991 | vmx_enable_fb_clear(vmx); |
1992 | |
1993 | - /* |
1994 | - * We do not use IBRS in the kernel. If this vCPU has used the |
1995 | - * SPEC_CTRL MSR it may have left it on; save the value and |
1996 | - * turn it off. This is much more efficient than blindly adding |
1997 | - * it to the atomic save/restore list. Especially as the former |
1998 | - * (Saving guest MSRs on vmexit) doesn't even exist in KVM. |
1999 | - * |
2000 | - * For non-nested case: |
2001 | - * If the L01 MSR bitmap does not intercept the MSR, then we need to |
2002 | - * save it. |
2003 | - * |
2004 | - * For nested case: |
2005 | - * If the L02 MSR bitmap does not intercept the MSR, then we need to |
2006 | - * save it. |
2007 | - */ |
2008 | - if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) |
2009 | - vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); |
2010 | - |
2011 | - x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); |
2012 | - |
2013 | /* All fields are clean at this point */ |
2014 | if (static_branch_unlikely(&enable_evmcs)) |
2015 | current_evmcs->hv_clean_fields |= |
2016 | diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h |
2017 | index 7a3362ab59867..4d5be4610af84 100644 |
2018 | --- a/arch/x86/kvm/vmx/vmx.h |
2019 | +++ b/arch/x86/kvm/vmx/vmx.h |
2020 | @@ -10,6 +10,7 @@ |
2021 | #include "capabilities.h" |
2022 | #include "ops.h" |
2023 | #include "vmcs.h" |
2024 | +#include "run_flags.h" |
2025 | |
2026 | extern const u32 vmx_msr_index[]; |
2027 | extern u64 host_efer; |
2028 | @@ -336,6 +337,10 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); |
2029 | struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr); |
2030 | void pt_update_intercept_for_msr(struct vcpu_vmx *vmx); |
2031 | void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); |
2032 | +void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags); |
2033 | +unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); |
2034 | +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, |
2035 | + unsigned int flags); |
2036 | |
2037 | #define POSTED_INTR_ON 0 |
2038 | #define POSTED_INTR_SN 1 |
2039 | diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c |
2040 | index d0b297583df88..c431a34522d6c 100644 |
2041 | --- a/arch/x86/kvm/x86.c |
2042 | +++ b/arch/x86/kvm/x86.c |
2043 | @@ -10329,9 +10329,9 @@ void kvm_arch_end_assignment(struct kvm *kvm) |
2044 | } |
2045 | EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); |
2046 | |
2047 | -bool kvm_arch_has_assigned_device(struct kvm *kvm) |
2048 | +bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm) |
2049 | { |
2050 | - return atomic_read(&kvm->arch.assigned_device_count); |
2051 | + return arch_atomic_read(&kvm->arch.assigned_device_count); |
2052 | } |
2053 | EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); |
2054 | |
2055 | diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c |
2056 | index 9b5edf1dfe9e9..7000c836951c5 100644 |
2057 | --- a/drivers/base/cpu.c |
2058 | +++ b/drivers/base/cpu.c |
2059 | @@ -574,6 +574,12 @@ ssize_t __weak cpu_show_mmio_stale_data(struct device *dev, |
2060 | return sysfs_emit(buf, "Not affected\n"); |
2061 | } |
2062 | |
2063 | +ssize_t __weak cpu_show_retbleed(struct device *dev, |
2064 | + struct device_attribute *attr, char *buf) |
2065 | +{ |
2066 | + return sysfs_emit(buf, "Not affected\n"); |
2067 | +} |
2068 | + |
2069 | static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); |
2070 | static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); |
2071 | static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); |
2072 | @@ -584,6 +590,7 @@ static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); |
2073 | static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); |
2074 | static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); |
2075 | static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); |
2076 | +static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); |
2077 | |
2078 | static struct attribute *cpu_root_vulnerabilities_attrs[] = { |
2079 | &dev_attr_meltdown.attr, |
2080 | @@ -596,6 +603,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { |
2081 | &dev_attr_itlb_multihit.attr, |
2082 | &dev_attr_srbds.attr, |
2083 | &dev_attr_mmio_stale_data.attr, |
2084 | + &dev_attr_retbleed.attr, |
2085 | NULL |
2086 | }; |
2087 | |
2088 | diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c |
2089 | index 4195834a45912..cf7ebe3bd1ad2 100644 |
2090 | --- a/drivers/cpufreq/acpi-cpufreq.c |
2091 | +++ b/drivers/cpufreq/acpi-cpufreq.c |
2092 | @@ -30,6 +30,7 @@ |
2093 | #include <asm/msr.h> |
2094 | #include <asm/processor.h> |
2095 | #include <asm/cpufeature.h> |
2096 | +#include <asm/cpu_device_id.h> |
2097 | |
2098 | MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); |
2099 | MODULE_DESCRIPTION("ACPI Processor P-States Driver"); |
2100 | diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c |
2101 | index e2df9d1121063..5107cbe2d64dd 100644 |
2102 | --- a/drivers/cpufreq/amd_freq_sensitivity.c |
2103 | +++ b/drivers/cpufreq/amd_freq_sensitivity.c |
2104 | @@ -18,6 +18,7 @@ |
2105 | |
2106 | #include <asm/msr.h> |
2107 | #include <asm/cpufeature.h> |
2108 | +#include <asm/cpu_device_id.h> |
2109 | |
2110 | #include "cpufreq_ondemand.h" |
2111 | |
2112 | diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |
2113 | index d8687868407de..b588e0e409e72 100644 |
2114 | --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |
2115 | +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |
2116 | @@ -35,7 +35,6 @@ |
2117 | #include <linux/pci.h> |
2118 | #include <linux/pm_runtime.h> |
2119 | #include <drm/drm_crtc_helper.h> |
2120 | -#include <drm/drm_damage_helper.h> |
2121 | #include <drm/drm_edid.h> |
2122 | #include <drm/drm_gem_framebuffer_helper.h> |
2123 | #include <drm/drm_fb_helper.h> |
2124 | @@ -496,7 +495,6 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector, |
2125 | static const struct drm_framebuffer_funcs amdgpu_fb_funcs = { |
2126 | .destroy = drm_gem_fb_destroy, |
2127 | .create_handle = drm_gem_fb_create_handle, |
2128 | - .dirty = drm_atomic_helper_dirtyfb, |
2129 | }; |
2130 | |
2131 | uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, |
2132 | diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c |
2133 | index 347b08b56042f..63b2212262618 100644 |
2134 | --- a/drivers/idle/intel_idle.c |
2135 | +++ b/drivers/idle/intel_idle.c |
2136 | @@ -46,11 +46,13 @@ |
2137 | #include <linux/tick.h> |
2138 | #include <trace/events/power.h> |
2139 | #include <linux/sched.h> |
2140 | +#include <linux/sched/smt.h> |
2141 | #include <linux/notifier.h> |
2142 | #include <linux/cpu.h> |
2143 | #include <linux/moduleparam.h> |
2144 | #include <asm/cpu_device_id.h> |
2145 | #include <asm/intel-family.h> |
2146 | +#include <asm/nospec-branch.h> |
2147 | #include <asm/mwait.h> |
2148 | #include <asm/msr.h> |
2149 | |
2150 | @@ -97,6 +99,12 @@ static struct cpuidle_state *cpuidle_state_table; |
2151 | */ |
2152 | #define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 |
2153 | |
2154 | +/* |
2155 | + * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE |
2156 | + * above. |
2157 | + */ |
2158 | +#define CPUIDLE_FLAG_IBRS BIT(16) |
2159 | + |
2160 | /* |
2161 | * MWAIT takes an 8-bit "hint" in EAX "suggesting" |
2162 | * the C-state (top nibble) and sub-state (bottom nibble) |
2163 | @@ -107,6 +115,24 @@ static struct cpuidle_state *cpuidle_state_table; |
2164 | #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) |
2165 | #define MWAIT2flg(eax) ((eax & 0xFF) << 24) |
2166 | |
2167 | +static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, |
2168 | + struct cpuidle_driver *drv, int index) |
2169 | +{ |
2170 | + bool smt_active = sched_smt_active(); |
2171 | + u64 spec_ctrl = spec_ctrl_current(); |
2172 | + int ret; |
2173 | + |
2174 | + if (smt_active) |
2175 | + wrmsrl(MSR_IA32_SPEC_CTRL, 0); |
2176 | + |
2177 | + ret = intel_idle(dev, drv, index); |
2178 | + |
2179 | + if (smt_active) |
2180 | + wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); |
2181 | + |
2182 | + return ret; |
2183 | +} |
2184 | + |
2185 | /* |
2186 | * States are indexed by the cstate number, |
2187 | * which is also the index into the MWAIT hint array. |
2188 | @@ -605,7 +631,7 @@ static struct cpuidle_state skl_cstates[] = { |
2189 | { |
2190 | .name = "C6", |
2191 | .desc = "MWAIT 0x20", |
2192 | - .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, |
2193 | + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, |
2194 | .exit_latency = 85, |
2195 | .target_residency = 200, |
2196 | .enter = &intel_idle, |
2197 | @@ -613,7 +639,7 @@ static struct cpuidle_state skl_cstates[] = { |
2198 | { |
2199 | .name = "C7s", |
2200 | .desc = "MWAIT 0x33", |
2201 | - .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, |
2202 | + .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, |
2203 | .exit_latency = 124, |
2204 | .target_residency = 800, |
2205 | .enter = &intel_idle, |
2206 | @@ -621,7 +647,7 @@ static struct cpuidle_state skl_cstates[] = { |
2207 | { |
2208 | .name = "C8", |
2209 | .desc = "MWAIT 0x40", |
2210 | - .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, |
2211 | + .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, |
2212 | .exit_latency = 200, |
2213 | .target_residency = 800, |
2214 | .enter = &intel_idle, |
2215 | @@ -629,7 +655,7 @@ static struct cpuidle_state skl_cstates[] = { |
2216 | { |
2217 | .name = "C9", |
2218 | .desc = "MWAIT 0x50", |
2219 | - .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, |
2220 | + .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, |
2221 | .exit_latency = 480, |
2222 | .target_residency = 5000, |
2223 | .enter = &intel_idle, |
2224 | @@ -637,7 +663,7 @@ static struct cpuidle_state skl_cstates[] = { |
2225 | { |
2226 | .name = "C10", |
2227 | .desc = "MWAIT 0x60", |
2228 | - .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, |
2229 | + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, |
2230 | .exit_latency = 890, |
2231 | .target_residency = 5000, |
2232 | .enter = &intel_idle, |
2233 | @@ -666,7 +692,7 @@ static struct cpuidle_state skx_cstates[] = { |
2234 | { |
2235 | .name = "C6", |
2236 | .desc = "MWAIT 0x20", |
2237 | - .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, |
2238 | + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, |
2239 | .exit_latency = 133, |
2240 | .target_residency = 600, |
2241 | .enter = &intel_idle, |
2242 | @@ -1370,6 +1396,11 @@ static void __init intel_idle_cpuidle_driver_init(void) |
2243 | drv->states[drv->state_count] = /* structure copy */ |
2244 | cpuidle_state_table[cstate]; |
2245 | |
2246 | + if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && |
2247 | + cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) { |
2248 | + drv->states[drv->state_count].enter = intel_idle_ibrs; |
2249 | + } |
2250 | + |
2251 | drv->state_count += 1; |
2252 | } |
2253 | |
2254 | diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c |
2255 | index 510ca69746042..c83ff610ecb6c 100644 |
2256 | --- a/fs/xfs/libxfs/xfs_attr.c |
2257 | +++ b/fs/xfs/libxfs/xfs_attr.c |
2258 | @@ -1007,7 +1007,7 @@ restart: |
2259 | * The INCOMPLETE flag means that we will find the "old" |
2260 | * attr, not the "new" one. |
2261 | */ |
2262 | - args->flags |= XFS_ATTR_INCOMPLETE; |
2263 | + args->op_flags |= XFS_DA_OP_INCOMPLETE; |
2264 | state = xfs_da_state_alloc(); |
2265 | state->args = args; |
2266 | state->mp = mp; |
2267 | diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c |
2268 | index 0c23127347aca..c86ddbf6d105b 100644 |
2269 | --- a/fs/xfs/libxfs/xfs_attr_leaf.c |
2270 | +++ b/fs/xfs/libxfs/xfs_attr_leaf.c |
2271 | @@ -2345,8 +2345,8 @@ xfs_attr3_leaf_lookup_int( |
2272 | * If we are looking for INCOMPLETE entries, show only those. |
2273 | * If we are looking for complete entries, show only those. |
2274 | */ |
2275 | - if ((args->flags & XFS_ATTR_INCOMPLETE) != |
2276 | - (entry->flags & XFS_ATTR_INCOMPLETE)) { |
2277 | + if (!!(args->op_flags & XFS_DA_OP_INCOMPLETE) != |
2278 | + !!(entry->flags & XFS_ATTR_INCOMPLETE)) { |
2279 | continue; |
2280 | } |
2281 | if (entry->flags & XFS_ATTR_LOCAL) { |
2282 | diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h |
2283 | index 7b74e18becff7..38c05d6ae2aa4 100644 |
2284 | --- a/fs/xfs/libxfs/xfs_attr_leaf.h |
2285 | +++ b/fs/xfs/libxfs/xfs_attr_leaf.h |
2286 | @@ -17,13 +17,27 @@ struct xfs_inode; |
2287 | struct xfs_trans; |
2288 | |
2289 | /* |
2290 | - * Used to keep a list of "remote value" extents when unlinking an inode. |
2291 | + * Incore version of the attribute leaf header. |
2292 | */ |
2293 | -typedef struct xfs_attr_inactive_list { |
2294 | - xfs_dablk_t valueblk; /* block number of value bytes */ |
2295 | - int valuelen; /* number of bytes in value */ |
2296 | -} xfs_attr_inactive_list_t; |
2297 | - |
2298 | +struct xfs_attr3_icleaf_hdr { |
2299 | + uint32_t forw; |
2300 | + uint32_t back; |
2301 | + uint16_t magic; |
2302 | + uint16_t count; |
2303 | + uint16_t usedbytes; |
2304 | + /* |
2305 | + * Firstused is 32-bit here instead of 16-bit like the on-disk variant |
2306 | + * to support maximum fsb size of 64k without overflow issues throughout |
2307 | + * the attr code. Instead, the overflow condition is handled on |
2308 | + * conversion to/from disk. |
2309 | + */ |
2310 | + uint32_t firstused; |
2311 | + __u8 holes; |
2312 | + struct { |
2313 | + uint16_t base; |
2314 | + uint16_t size; |
2315 | + } freemap[XFS_ATTR_LEAF_MAPSIZE]; |
2316 | +}; |
2317 | |
2318 | /*======================================================================== |
2319 | * Function prototypes for the kernel. |
2320 | diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c |
2321 | index 3e39b7d40f256..de9096b8a47c6 100644 |
2322 | --- a/fs/xfs/libxfs/xfs_attr_remote.c |
2323 | +++ b/fs/xfs/libxfs/xfs_attr_remote.c |
2324 | @@ -24,6 +24,23 @@ |
2325 | |
2326 | #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ |
2327 | |
2328 | +/* |
2329 | + * Remote Attribute Values |
2330 | + * ======================= |
2331 | + * |
2332 | + * Remote extended attribute values are conceptually simple -- they're written |
2333 | + * to data blocks mapped by an inode's attribute fork, and they have an upper |
2334 | + * size limit of 64k. Setting a value does not involve the XFS log. |
2335 | + * |
2336 | + * However, on a v5 filesystem, maximally sized remote attr values require one |
2337 | + * block more than 64k worth of space to hold both the remote attribute value |
2338 | + * header (64 bytes). On a 4k block filesystem this results in a 68k buffer; |
2339 | + * on a 64k block filesystem, this would be a 128k buffer. Note that the log |
2340 | + * format can only handle a dirty buffer of XFS_MAX_BLOCKSIZE length (64k). |
2341 | + * Therefore, we /must/ ensure that remote attribute value buffers never touch |
2342 | + * the logging system and therefore never have a log item. |
2343 | + */ |
2344 | + |
2345 | /* |
2346 | * Each contiguous block has a header, so it is not just a simple attribute |
2347 | * length to FSB conversion. |
2348 | @@ -400,17 +417,25 @@ xfs_attr_rmtval_get( |
2349 | (map[i].br_startblock != HOLESTARTBLOCK)); |
2350 | dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); |
2351 | dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); |
2352 | - error = xfs_trans_read_buf(mp, args->trans, |
2353 | - mp->m_ddev_targp, |
2354 | - dblkno, dblkcnt, 0, &bp, |
2355 | - &xfs_attr3_rmt_buf_ops); |
2356 | - if (error) |
2357 | + bp = xfs_buf_read(mp->m_ddev_targp, dblkno, dblkcnt, 0, |
2358 | + &xfs_attr3_rmt_buf_ops); |
2359 | + if (!bp) |
2360 | + return -ENOMEM; |
2361 | + error = bp->b_error; |
2362 | + if (error) { |
2363 | + xfs_buf_ioerror_alert(bp, __func__); |
2364 | + xfs_buf_relse(bp); |
2365 | + |
2366 | + /* bad CRC means corrupted metadata */ |
2367 | + if (error == -EFSBADCRC) |
2368 | + error = -EFSCORRUPTED; |
2369 | return error; |
2370 | + } |
2371 | |
2372 | error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino, |
2373 | &offset, &valuelen, |
2374 | &dst); |
2375 | - xfs_trans_brelse(args->trans, bp); |
2376 | + xfs_buf_relse(bp); |
2377 | if (error) |
2378 | return error; |
2379 | |
2380 | @@ -551,6 +576,32 @@ xfs_attr_rmtval_set( |
2381 | return 0; |
2382 | } |
2383 | |
2384 | +/* Mark stale any incore buffers for the remote value. */ |
2385 | +int |
2386 | +xfs_attr_rmtval_stale( |
2387 | + struct xfs_inode *ip, |
2388 | + struct xfs_bmbt_irec *map, |
2389 | + xfs_buf_flags_t incore_flags) |
2390 | +{ |
2391 | + struct xfs_mount *mp = ip->i_mount; |
2392 | + struct xfs_buf *bp; |
2393 | + |
2394 | + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
2395 | + |
2396 | + ASSERT((map->br_startblock != DELAYSTARTBLOCK) && |
2397 | + (map->br_startblock != HOLESTARTBLOCK)); |
2398 | + |
2399 | + bp = xfs_buf_incore(mp->m_ddev_targp, |
2400 | + XFS_FSB_TO_DADDR(mp, map->br_startblock), |
2401 | + XFS_FSB_TO_BB(mp, map->br_blockcount), incore_flags); |
2402 | + if (bp) { |
2403 | + xfs_buf_stale(bp); |
2404 | + xfs_buf_relse(bp); |
2405 | + } |
2406 | + |
2407 | + return 0; |
2408 | +} |
2409 | + |
2410 | /* |
2411 | * Remove the value associated with an attribute by deleting the |
2412 | * out-of-line buffer that it is stored on. |
2413 | @@ -559,7 +610,6 @@ int |
2414 | xfs_attr_rmtval_remove( |
2415 | struct xfs_da_args *args) |
2416 | { |
2417 | - struct xfs_mount *mp = args->dp->i_mount; |
2418 | xfs_dablk_t lblkno; |
2419 | int blkcnt; |
2420 | int error; |
2421 | @@ -574,9 +624,6 @@ xfs_attr_rmtval_remove( |
2422 | blkcnt = args->rmtblkcnt; |
2423 | while (blkcnt > 0) { |
2424 | struct xfs_bmbt_irec map; |
2425 | - struct xfs_buf *bp; |
2426 | - xfs_daddr_t dblkno; |
2427 | - int dblkcnt; |
2428 | int nmap; |
2429 | |
2430 | /* |
2431 | @@ -588,21 +635,9 @@ xfs_attr_rmtval_remove( |
2432 | if (error) |
2433 | return error; |
2434 | ASSERT(nmap == 1); |
2435 | - ASSERT((map.br_startblock != DELAYSTARTBLOCK) && |
2436 | - (map.br_startblock != HOLESTARTBLOCK)); |
2437 | - |
2438 | - dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), |
2439 | - dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); |
2440 | - |
2441 | - /* |
2442 | - * If the "remote" value is in the cache, remove it. |
2443 | - */ |
2444 | - bp = xfs_buf_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK); |
2445 | - if (bp) { |
2446 | - xfs_buf_stale(bp); |
2447 | - xfs_buf_relse(bp); |
2448 | - bp = NULL; |
2449 | - } |
2450 | + error = xfs_attr_rmtval_stale(args->dp, &map, XBF_TRYLOCK); |
2451 | + if (error) |
2452 | + return error; |
2453 | |
2454 | lblkno += map.br_blockcount; |
2455 | blkcnt -= map.br_blockcount; |
2456 | diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h |
2457 | index 9d20b66ad379e..6fb4572845ce8 100644 |
2458 | --- a/fs/xfs/libxfs/xfs_attr_remote.h |
2459 | +++ b/fs/xfs/libxfs/xfs_attr_remote.h |
2460 | @@ -11,5 +11,7 @@ int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen); |
2461 | int xfs_attr_rmtval_get(struct xfs_da_args *args); |
2462 | int xfs_attr_rmtval_set(struct xfs_da_args *args); |
2463 | int xfs_attr_rmtval_remove(struct xfs_da_args *args); |
2464 | +int xfs_attr_rmtval_stale(struct xfs_inode *ip, struct xfs_bmbt_irec *map, |
2465 | + xfs_buf_flags_t incore_flags); |
2466 | |
2467 | #endif /* __XFS_ATTR_REMOTE_H__ */ |
2468 | diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h |
2469 | index ae0bbd20d9caf..588e4674e931f 100644 |
2470 | --- a/fs/xfs/libxfs/xfs_da_btree.h |
2471 | +++ b/fs/xfs/libxfs/xfs_da_btree.h |
2472 | @@ -82,6 +82,7 @@ typedef struct xfs_da_args { |
2473 | #define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */ |
2474 | #define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */ |
2475 | #define XFS_DA_OP_ALLOCVAL 0x0020 /* lookup to alloc buffer if found */ |
2476 | +#define XFS_DA_OP_INCOMPLETE 0x0040 /* lookup INCOMPLETE attr keys */ |
2477 | |
2478 | #define XFS_DA_OP_FLAGS \ |
2479 | { XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \ |
2480 | @@ -89,7 +90,8 @@ typedef struct xfs_da_args { |
2481 | { XFS_DA_OP_ADDNAME, "ADDNAME" }, \ |
2482 | { XFS_DA_OP_OKNOENT, "OKNOENT" }, \ |
2483 | { XFS_DA_OP_CILOOKUP, "CILOOKUP" }, \ |
2484 | - { XFS_DA_OP_ALLOCVAL, "ALLOCVAL" } |
2485 | + { XFS_DA_OP_ALLOCVAL, "ALLOCVAL" }, \ |
2486 | + { XFS_DA_OP_INCOMPLETE, "INCOMPLETE" } |
2487 | |
2488 | /* |
2489 | * Storage for holding state during Btree searches and split/join ops. |
2490 | @@ -124,6 +126,19 @@ typedef struct xfs_da_state { |
2491 | /* for dirv2 extrablk is data */ |
2492 | } xfs_da_state_t; |
2493 | |
2494 | +/* |
2495 | + * In-core version of the node header to abstract the differences in the v2 and |
2496 | + * v3 disk format of the headers. Callers need to convert to/from disk format as |
2497 | + * appropriate. |
2498 | + */ |
2499 | +struct xfs_da3_icnode_hdr { |
2500 | + uint32_t forw; |
2501 | + uint32_t back; |
2502 | + uint16_t magic; |
2503 | + uint16_t count; |
2504 | + uint16_t level; |
2505 | +}; |
2506 | + |
2507 | /* |
2508 | * Utility macros to aid in logging changed structure fields. |
2509 | */ |
2510 | diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c |
2511 | index b1ae572496b69..31bb250c18992 100644 |
2512 | --- a/fs/xfs/libxfs/xfs_da_format.c |
2513 | +++ b/fs/xfs/libxfs/xfs_da_format.c |
2514 | @@ -13,6 +13,7 @@ |
2515 | #include "xfs_mount.h" |
2516 | #include "xfs_inode.h" |
2517 | #include "xfs_dir2.h" |
2518 | +#include "xfs_dir2_priv.h" |
2519 | |
2520 | /* |
2521 | * Shortform directory ops |
2522 | diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h |
2523 | index ae654e06b2fb6..222ee48da5e80 100644 |
2524 | --- a/fs/xfs/libxfs/xfs_da_format.h |
2525 | +++ b/fs/xfs/libxfs/xfs_da_format.h |
2526 | @@ -93,19 +93,6 @@ struct xfs_da3_intnode { |
2527 | struct xfs_da_node_entry __btree[]; |
2528 | }; |
2529 | |
2530 | -/* |
2531 | - * In-core version of the node header to abstract the differences in the v2 and |
2532 | - * v3 disk format of the headers. Callers need to convert to/from disk format as |
2533 | - * appropriate. |
2534 | - */ |
2535 | -struct xfs_da3_icnode_hdr { |
2536 | - uint32_t forw; |
2537 | - uint32_t back; |
2538 | - uint16_t magic; |
2539 | - uint16_t count; |
2540 | - uint16_t level; |
2541 | -}; |
2542 | - |
2543 | /* |
2544 | * Directory version 2. |
2545 | * |
2546 | @@ -434,14 +421,6 @@ struct xfs_dir3_leaf_hdr { |
2547 | __be32 pad; /* 64 bit alignment */ |
2548 | }; |
2549 | |
2550 | -struct xfs_dir3_icleaf_hdr { |
2551 | - uint32_t forw; |
2552 | - uint32_t back; |
2553 | - uint16_t magic; |
2554 | - uint16_t count; |
2555 | - uint16_t stale; |
2556 | -}; |
2557 | - |
2558 | /* |
2559 | * Leaf block entry. |
2560 | */ |
2561 | @@ -520,19 +499,6 @@ struct xfs_dir3_free { |
2562 | |
2563 | #define XFS_DIR3_FREE_CRC_OFF offsetof(struct xfs_dir3_free, hdr.hdr.crc) |
2564 | |
2565 | -/* |
2566 | - * In core version of the free block header, abstracted away from on-disk format |
2567 | - * differences. Use this in the code, and convert to/from the disk version using |
2568 | - * xfs_dir3_free_hdr_from_disk/xfs_dir3_free_hdr_to_disk. |
2569 | - */ |
2570 | -struct xfs_dir3_icfree_hdr { |
2571 | - uint32_t magic; |
2572 | - uint32_t firstdb; |
2573 | - uint32_t nvalid; |
2574 | - uint32_t nused; |
2575 | - |
2576 | -}; |
2577 | - |
2578 | /* |
2579 | * Single block format. |
2580 | * |
2581 | @@ -709,29 +675,6 @@ struct xfs_attr3_leafblock { |
2582 | */ |
2583 | }; |
2584 | |
2585 | -/* |
2586 | - * incore, neutral version of the attribute leaf header |
2587 | - */ |
2588 | -struct xfs_attr3_icleaf_hdr { |
2589 | - uint32_t forw; |
2590 | - uint32_t back; |
2591 | - uint16_t magic; |
2592 | - uint16_t count; |
2593 | - uint16_t usedbytes; |
2594 | - /* |
2595 | - * firstused is 32-bit here instead of 16-bit like the on-disk variant |
2596 | - * to support maximum fsb size of 64k without overflow issues throughout |
2597 | - * the attr code. Instead, the overflow condition is handled on |
2598 | - * conversion to/from disk. |
2599 | - */ |
2600 | - uint32_t firstused; |
2601 | - __u8 holes; |
2602 | - struct { |
2603 | - uint16_t base; |
2604 | - uint16_t size; |
2605 | - } freemap[XFS_ATTR_LEAF_MAPSIZE]; |
2606 | -}; |
2607 | - |
2608 | /* |
2609 | * Special value to represent fs block size in the leaf header firstused field. |
2610 | * Only used when block size overflows the 2-bytes available on disk. |
2611 | @@ -740,8 +683,6 @@ struct xfs_attr3_icleaf_hdr { |
2612 | |
2613 | /* |
2614 | * Flags used in the leaf_entry[i].flags field. |
2615 | - * NOTE: the INCOMPLETE bit must not collide with the flags bits specified |
2616 | - * on the system call, they are "or"ed together for various operations. |
2617 | */ |
2618 | #define XFS_ATTR_LOCAL_BIT 0 /* attr is stored locally */ |
2619 | #define XFS_ATTR_ROOT_BIT 1 /* limit access to trusted attrs */ |
2620 | diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h |
2621 | index f542447794928..e170792c0acce 100644 |
2622 | --- a/fs/xfs/libxfs/xfs_dir2.h |
2623 | +++ b/fs/xfs/libxfs/xfs_dir2.h |
2624 | @@ -18,6 +18,8 @@ struct xfs_dir2_sf_entry; |
2625 | struct xfs_dir2_data_hdr; |
2626 | struct xfs_dir2_data_entry; |
2627 | struct xfs_dir2_data_unused; |
2628 | +struct xfs_dir3_icfree_hdr; |
2629 | +struct xfs_dir3_icleaf_hdr; |
2630 | |
2631 | extern struct xfs_name xfs_name_dotdot; |
2632 | |
2633 | diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h |
2634 | index 59f9fb2241a5f..d2eaea663e7f2 100644 |
2635 | --- a/fs/xfs/libxfs/xfs_dir2_priv.h |
2636 | +++ b/fs/xfs/libxfs/xfs_dir2_priv.h |
2637 | @@ -8,6 +8,25 @@ |
2638 | |
2639 | struct dir_context; |
2640 | |
2641 | +/* |
2642 | + * In-core version of the leaf and free block headers to abstract the |
2643 | + * differences in the v2 and v3 disk format of the headers. |
2644 | + */ |
2645 | +struct xfs_dir3_icleaf_hdr { |
2646 | + uint32_t forw; |
2647 | + uint32_t back; |
2648 | + uint16_t magic; |
2649 | + uint16_t count; |
2650 | + uint16_t stale; |
2651 | +}; |
2652 | + |
2653 | +struct xfs_dir3_icfree_hdr { |
2654 | + uint32_t magic; |
2655 | + uint32_t firstdb; |
2656 | + uint32_t nvalid; |
2657 | + uint32_t nused; |
2658 | +}; |
2659 | + |
2660 | /* xfs_dir2.c */ |
2661 | extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, |
2662 | xfs_dir2_db_t *dbp); |
2663 | diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h |
2664 | index c968b60cee15b..28203b626f6a2 100644 |
2665 | --- a/fs/xfs/libxfs/xfs_format.h |
2666 | +++ b/fs/xfs/libxfs/xfs_format.h |
2667 | @@ -1540,6 +1540,13 @@ typedef struct xfs_bmdr_block { |
2668 | #define BMBT_BLOCKCOUNT_BITLEN 21 |
2669 | |
2670 | #define BMBT_STARTOFF_MASK ((1ULL << BMBT_STARTOFF_BITLEN) - 1) |
2671 | +#define BMBT_BLOCKCOUNT_MASK ((1ULL << BMBT_BLOCKCOUNT_BITLEN) - 1) |
2672 | + |
2673 | +/* |
2674 | + * bmbt records have a file offset (block) field that is 54 bits wide, so this |
2675 | + * is the largest xfs_fileoff_t that we ever expect to see. |
2676 | + */ |
2677 | +#define XFS_MAX_FILEOFF (BMBT_STARTOFF_MASK + BMBT_BLOCKCOUNT_MASK) |
2678 | |
2679 | typedef struct xfs_bmbt_rec { |
2680 | __be64 l0, l1; |
2681 | diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c |
2682 | index 766b1386402a0..9c88203b537b1 100644 |
2683 | --- a/fs/xfs/xfs_attr_inactive.c |
2684 | +++ b/fs/xfs/xfs_attr_inactive.c |
2685 | @@ -25,22 +25,18 @@ |
2686 | #include "xfs_error.h" |
2687 | |
2688 | /* |
2689 | - * Look at all the extents for this logical region, |
2690 | - * invalidate any buffers that are incore/in transactions. |
2691 | + * Invalidate any incore buffers associated with this remote attribute value |
2692 | + * extent. We never log remote attribute value buffers, which means that they |
2693 | + * won't be attached to a transaction and are therefore safe to mark stale. |
2694 | + * The actual bunmapi will be taken care of later. |
2695 | */ |
2696 | STATIC int |
2697 | -xfs_attr3_leaf_freextent( |
2698 | - struct xfs_trans **trans, |
2699 | +xfs_attr3_rmt_stale( |
2700 | struct xfs_inode *dp, |
2701 | xfs_dablk_t blkno, |
2702 | int blkcnt) |
2703 | { |
2704 | struct xfs_bmbt_irec map; |
2705 | - struct xfs_buf *bp; |
2706 | - xfs_dablk_t tblkno; |
2707 | - xfs_daddr_t dblkno; |
2708 | - int tblkcnt; |
2709 | - int dblkcnt; |
2710 | int nmap; |
2711 | int error; |
2712 | |
2713 | @@ -48,47 +44,28 @@ xfs_attr3_leaf_freextent( |
2714 | * Roll through the "value", invalidating the attribute value's |
2715 | * blocks. |
2716 | */ |
2717 | - tblkno = blkno; |
2718 | - tblkcnt = blkcnt; |
2719 | - while (tblkcnt > 0) { |
2720 | + while (blkcnt > 0) { |
2721 | /* |
2722 | * Try to remember where we decided to put the value. |
2723 | */ |
2724 | nmap = 1; |
2725 | - error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt, |
2726 | + error = xfs_bmapi_read(dp, (xfs_fileoff_t)blkno, blkcnt, |
2727 | &map, &nmap, XFS_BMAPI_ATTRFORK); |
2728 | - if (error) { |
2729 | + if (error) |
2730 | return error; |
2731 | - } |
2732 | ASSERT(nmap == 1); |
2733 | - ASSERT(map.br_startblock != DELAYSTARTBLOCK); |
2734 | |
2735 | /* |
2736 | - * If it's a hole, these are already unmapped |
2737 | - * so there's nothing to invalidate. |
2738 | + * Mark any incore buffers for the remote value as stale. We |
2739 | + * never log remote attr value buffers, so the buffer should be |
2740 | + * easy to kill. |
2741 | */ |
2742 | - if (map.br_startblock != HOLESTARTBLOCK) { |
2743 | - |
2744 | - dblkno = XFS_FSB_TO_DADDR(dp->i_mount, |
2745 | - map.br_startblock); |
2746 | - dblkcnt = XFS_FSB_TO_BB(dp->i_mount, |
2747 | - map.br_blockcount); |
2748 | - bp = xfs_trans_get_buf(*trans, |
2749 | - dp->i_mount->m_ddev_targp, |
2750 | - dblkno, dblkcnt, 0); |
2751 | - if (!bp) |
2752 | - return -ENOMEM; |
2753 | - xfs_trans_binval(*trans, bp); |
2754 | - /* |
2755 | - * Roll to next transaction. |
2756 | - */ |
2757 | - error = xfs_trans_roll_inode(trans, dp); |
2758 | - if (error) |
2759 | - return error; |
2760 | - } |
2761 | + error = xfs_attr_rmtval_stale(dp, &map, 0); |
2762 | + if (error) |
2763 | + return error; |
2764 | |
2765 | - tblkno += map.br_blockcount; |
2766 | - tblkcnt -= map.br_blockcount; |
2767 | + blkno += map.br_blockcount; |
2768 | + blkcnt -= map.br_blockcount; |
2769 | } |
2770 | |
2771 | return 0; |
2772 | @@ -102,86 +79,45 @@ xfs_attr3_leaf_freextent( |
2773 | */ |
2774 | STATIC int |
2775 | xfs_attr3_leaf_inactive( |
2776 | - struct xfs_trans **trans, |
2777 | - struct xfs_inode *dp, |
2778 | - struct xfs_buf *bp) |
2779 | + struct xfs_trans **trans, |
2780 | + struct xfs_inode *dp, |
2781 | + struct xfs_buf *bp) |
2782 | { |
2783 | - struct xfs_attr_leafblock *leaf; |
2784 | - struct xfs_attr3_icleaf_hdr ichdr; |
2785 | - struct xfs_attr_leaf_entry *entry; |
2786 | + struct xfs_attr3_icleaf_hdr ichdr; |
2787 | + struct xfs_mount *mp = bp->b_mount; |
2788 | + struct xfs_attr_leafblock *leaf = bp->b_addr; |
2789 | + struct xfs_attr_leaf_entry *entry; |
2790 | struct xfs_attr_leaf_name_remote *name_rmt; |
2791 | - struct xfs_attr_inactive_list *list; |
2792 | - struct xfs_attr_inactive_list *lp; |
2793 | - int error; |
2794 | - int count; |
2795 | - int size; |
2796 | - int tmp; |
2797 | - int i; |
2798 | - struct xfs_mount *mp = bp->b_mount; |
2799 | + int error = 0; |
2800 | + int i; |
2801 | |
2802 | - leaf = bp->b_addr; |
2803 | xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); |
2804 | |
2805 | /* |
2806 | - * Count the number of "remote" value extents. |
2807 | + * Find the remote value extents for this leaf and invalidate their |
2808 | + * incore buffers. |
2809 | */ |
2810 | - count = 0; |
2811 | entry = xfs_attr3_leaf_entryp(leaf); |
2812 | for (i = 0; i < ichdr.count; entry++, i++) { |
2813 | - if (be16_to_cpu(entry->nameidx) && |
2814 | - ((entry->flags & XFS_ATTR_LOCAL) == 0)) { |
2815 | - name_rmt = xfs_attr3_leaf_name_remote(leaf, i); |
2816 | - if (name_rmt->valueblk) |
2817 | - count++; |
2818 | - } |
2819 | - } |
2820 | - |
2821 | - /* |
2822 | - * If there are no "remote" values, we're done. |
2823 | - */ |
2824 | - if (count == 0) { |
2825 | - xfs_trans_brelse(*trans, bp); |
2826 | - return 0; |
2827 | - } |
2828 | + int blkcnt; |
2829 | |
2830 | - /* |
2831 | - * Allocate storage for a list of all the "remote" value extents. |
2832 | - */ |
2833 | - size = count * sizeof(xfs_attr_inactive_list_t); |
2834 | - list = kmem_alloc(size, 0); |
2835 | - |
2836 | - /* |
2837 | - * Identify each of the "remote" value extents. |
2838 | - */ |
2839 | - lp = list; |
2840 | - entry = xfs_attr3_leaf_entryp(leaf); |
2841 | - for (i = 0; i < ichdr.count; entry++, i++) { |
2842 | - if (be16_to_cpu(entry->nameidx) && |
2843 | - ((entry->flags & XFS_ATTR_LOCAL) == 0)) { |
2844 | - name_rmt = xfs_attr3_leaf_name_remote(leaf, i); |
2845 | - if (name_rmt->valueblk) { |
2846 | - lp->valueblk = be32_to_cpu(name_rmt->valueblk); |
2847 | - lp->valuelen = xfs_attr3_rmt_blocks(dp->i_mount, |
2848 | - be32_to_cpu(name_rmt->valuelen)); |
2849 | - lp++; |
2850 | - } |
2851 | - } |
2852 | - } |
2853 | - xfs_trans_brelse(*trans, bp); /* unlock for trans. in freextent() */ |
2854 | + if (!entry->nameidx || (entry->flags & XFS_ATTR_LOCAL)) |
2855 | + continue; |
2856 | |
2857 | - /* |
2858 | - * Invalidate each of the "remote" value extents. |
2859 | - */ |
2860 | - error = 0; |
2861 | - for (lp = list, i = 0; i < count; i++, lp++) { |
2862 | - tmp = xfs_attr3_leaf_freextent(trans, dp, |
2863 | - lp->valueblk, lp->valuelen); |
2864 | + name_rmt = xfs_attr3_leaf_name_remote(leaf, i); |
2865 | + if (!name_rmt->valueblk) |
2866 | + continue; |
2867 | |
2868 | - if (error == 0) |
2869 | - error = tmp; /* save only the 1st errno */ |
2870 | + blkcnt = xfs_attr3_rmt_blocks(dp->i_mount, |
2871 | + be32_to_cpu(name_rmt->valuelen)); |
2872 | + error = xfs_attr3_rmt_stale(dp, |
2873 | + be32_to_cpu(name_rmt->valueblk), blkcnt); |
2874 | + if (error) |
2875 | + goto err; |
2876 | } |
2877 | |
2878 | - kmem_free(list); |
2879 | + xfs_trans_brelse(*trans, bp); |
2880 | +err: |
2881 | return error; |
2882 | } |
2883 | |
2884 | diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c |
2885 | index 203065a647652..e41c13ffa5a43 100644 |
2886 | --- a/fs/xfs/xfs_file.c |
2887 | +++ b/fs/xfs/xfs_file.c |
2888 | @@ -187,7 +187,12 @@ xfs_file_dio_aio_read( |
2889 | |
2890 | file_accessed(iocb->ki_filp); |
2891 | |
2892 | - xfs_ilock(ip, XFS_IOLOCK_SHARED); |
2893 | + if (iocb->ki_flags & IOCB_NOWAIT) { |
2894 | + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) |
2895 | + return -EAGAIN; |
2896 | + } else { |
2897 | + xfs_ilock(ip, XFS_IOLOCK_SHARED); |
2898 | + } |
2899 | ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL); |
2900 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
2901 | |
2902 | diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c |
2903 | index 7b72c189cff0b..30202d8c25e4f 100644 |
2904 | --- a/fs/xfs/xfs_inode.c |
2905 | +++ b/fs/xfs/xfs_inode.c |
2906 | @@ -1513,10 +1513,8 @@ xfs_itruncate_extents_flags( |
2907 | struct xfs_mount *mp = ip->i_mount; |
2908 | struct xfs_trans *tp = *tpp; |
2909 | xfs_fileoff_t first_unmap_block; |
2910 | - xfs_fileoff_t last_block; |
2911 | xfs_filblks_t unmap_len; |
2912 | int error = 0; |
2913 | - int done = 0; |
2914 | |
2915 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
2916 | ASSERT(!atomic_read(&VFS_I(ip)->i_count) || |
2917 | @@ -1536,21 +1534,22 @@ xfs_itruncate_extents_flags( |
2918 | * the end of the file (in a crash where the space is allocated |
2919 | * but the inode size is not yet updated), simply remove any |
2920 | * blocks which show up between the new EOF and the maximum |
2921 | - * possible file size. If the first block to be removed is |
2922 | - * beyond the maximum file size (ie it is the same as last_block), |
2923 | - * then there is nothing to do. |
2924 | + * possible file size. |
2925 | + * |
2926 | + * We have to free all the blocks to the bmbt maximum offset, even if |
2927 | + * the page cache can't scale that far. |
2928 | */ |
2929 | first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); |
2930 | - last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); |
2931 | - if (first_unmap_block == last_block) |
2932 | + if (first_unmap_block >= XFS_MAX_FILEOFF) { |
2933 | + WARN_ON_ONCE(first_unmap_block > XFS_MAX_FILEOFF); |
2934 | return 0; |
2935 | + } |
2936 | |
2937 | - ASSERT(first_unmap_block < last_block); |
2938 | - unmap_len = last_block - first_unmap_block + 1; |
2939 | - while (!done) { |
2940 | + unmap_len = XFS_MAX_FILEOFF - first_unmap_block + 1; |
2941 | + while (unmap_len > 0) { |
2942 | ASSERT(tp->t_firstblock == NULLFSBLOCK); |
2943 | - error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags, |
2944 | - XFS_ITRUNC_MAX_EXTENTS, &done); |
2945 | + error = __xfs_bunmapi(tp, ip, first_unmap_block, &unmap_len, |
2946 | + flags, XFS_ITRUNC_MAX_EXTENTS); |
2947 | if (error) |
2948 | goto out; |
2949 | |
2950 | @@ -1570,7 +1569,7 @@ xfs_itruncate_extents_flags( |
2951 | if (whichfork == XFS_DATA_FORK) { |
2952 | /* Remove all pending CoW reservations. */ |
2953 | error = xfs_reflink_cancel_cow_blocks(ip, &tp, |
2954 | - first_unmap_block, last_block, true); |
2955 | + first_unmap_block, XFS_MAX_FILEOFF, true); |
2956 | if (error) |
2957 | goto out; |
2958 | |
2959 | diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c |
2960 | index 904d8285c2269..dfbf3f8f1ec86 100644 |
2961 | --- a/fs/xfs/xfs_reflink.c |
2962 | +++ b/fs/xfs/xfs_reflink.c |
2963 | @@ -1544,7 +1544,8 @@ xfs_reflink_clear_inode_flag( |
2964 | * We didn't find any shared blocks so turn off the reflink flag. |
2965 | * First, get rid of any leftover CoW mappings. |
2966 | */ |
2967 | - error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true); |
2968 | + error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, XFS_MAX_FILEOFF, |
2969 | + true); |
2970 | if (error) |
2971 | return error; |
2972 | |
2973 | diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c |
2974 | index 8d1df9f8be071..a3a54a0fbffea 100644 |
2975 | --- a/fs/xfs/xfs_super.c |
2976 | +++ b/fs/xfs/xfs_super.c |
2977 | @@ -512,32 +512,6 @@ xfs_showargs( |
2978 | seq_puts(m, ",noquota"); |
2979 | } |
2980 | |
2981 | -static uint64_t |
2982 | -xfs_max_file_offset( |
2983 | - unsigned int blockshift) |
2984 | -{ |
2985 | - unsigned int pagefactor = 1; |
2986 | - unsigned int bitshift = BITS_PER_LONG - 1; |
2987 | - |
2988 | - /* Figure out maximum filesize, on Linux this can depend on |
2989 | - * the filesystem blocksize (on 32 bit platforms). |
2990 | - * __block_write_begin does this in an [unsigned] long long... |
2991 | - * page->index << (PAGE_SHIFT - bbits) |
2992 | - * So, for page sized blocks (4K on 32 bit platforms), |
2993 | - * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is |
2994 | - * (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1) |
2995 | - * but for smaller blocksizes it is less (bbits = log2 bsize). |
2996 | - */ |
2997 | - |
2998 | -#if BITS_PER_LONG == 32 |
2999 | - ASSERT(sizeof(sector_t) == 8); |
3000 | - pagefactor = PAGE_SIZE; |
3001 | - bitshift = BITS_PER_LONG; |
3002 | -#endif |
3003 | - |
3004 | - return (((uint64_t)pagefactor) << bitshift) - 1; |
3005 | -} |
3006 | - |
3007 | /* |
3008 | * Set parameters for inode allocation heuristics, taking into account |
3009 | * filesystem size and inode32/inode64 mount options; i.e. specifically |
3010 | @@ -1650,6 +1624,26 @@ xfs_fs_fill_super( |
3011 | if (error) |
3012 | goto out_free_sb; |
3013 | |
3014 | + /* |
3015 | + * XFS block mappings use 54 bits to store the logical block offset. |
3016 | + * This should suffice to handle the maximum file size that the VFS |
3017 | + * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT |
3018 | + * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes |
3019 | + * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON |
3020 | + * to check this assertion. |
3021 | + * |
3022 | + * Avoid integer overflow by comparing the maximum bmbt offset to the |
3023 | + * maximum pagecache offset in units of fs blocks. |
3024 | + */ |
3025 | + if (XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE) > XFS_MAX_FILEOFF) { |
3026 | + xfs_warn(mp, |
3027 | +"MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!", |
3028 | + XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE), |
3029 | + XFS_MAX_FILEOFF); |
3030 | + error = -EINVAL; |
3031 | + goto out_free_sb; |
3032 | + } |
3033 | + |
3034 | error = xfs_filestream_mount(mp); |
3035 | if (error) |
3036 | goto out_free_sb; |
3037 | @@ -1661,7 +1655,7 @@ xfs_fs_fill_super( |
3038 | sb->s_magic = XFS_SUPER_MAGIC; |
3039 | sb->s_blocksize = mp->m_sb.sb_blocksize; |
3040 | sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; |
3041 | - sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); |
3042 | + sb->s_maxbytes = MAX_LFS_FILESIZE; |
3043 | sb->s_max_links = XFS_MAXLINK; |
3044 | sb->s_time_gran = 1; |
3045 | sb->s_time_min = S32_MIN; |
3046 | diff --git a/include/linux/cpu.h b/include/linux/cpu.h |
3047 | index 29a6fa2f518db..b42e9c4134475 100644 |
3048 | --- a/include/linux/cpu.h |
3049 | +++ b/include/linux/cpu.h |
3050 | @@ -68,6 +68,8 @@ extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, |
3051 | extern ssize_t cpu_show_mmio_stale_data(struct device *dev, |
3052 | struct device_attribute *attr, |
3053 | char *buf); |
3054 | +extern ssize_t cpu_show_retbleed(struct device *dev, |
3055 | + struct device_attribute *attr, char *buf); |
3056 | |
3057 | extern __printf(4, 5) |
3058 | struct device *cpu_device_create(struct device *parent, void *drvdata, |
3059 | diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h |
3060 | index dd4cdad76b18e..ee7d57478a454 100644 |
3061 | --- a/include/linux/kvm_host.h |
3062 | +++ b/include/linux/kvm_host.h |
3063 | @@ -955,7 +955,7 @@ static inline void kvm_arch_end_assignment(struct kvm *kvm) |
3064 | { |
3065 | } |
3066 | |
3067 | -static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) |
3068 | +static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm) |
3069 | { |
3070 | return false; |
3071 | } |
3072 | diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h |
3073 | index 4c56404e53a76..8265b99d6d55b 100644 |
3074 | --- a/include/linux/mod_devicetable.h |
3075 | +++ b/include/linux/mod_devicetable.h |
3076 | @@ -672,9 +672,7 @@ struct x86_cpu_id { |
3077 | __u16 steppings; |
3078 | }; |
3079 | |
3080 | -#define X86_FEATURE_MATCH(x) \ |
3081 | - { X86_VENDOR_ANY, X86_FAMILY_ANY, X86_MODEL_ANY, x } |
3082 | - |
3083 | +/* Wild cards for x86_cpu_id::vendor, family, model and feature */ |
3084 | #define X86_VENDOR_ANY 0xffff |
3085 | #define X86_FAMILY_ANY 0 |
3086 | #define X86_MODEL_ANY 0 |
3087 | diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn |
3088 | index 854e2ba9daa29..6a78afc6f13b4 100644 |
3089 | --- a/scripts/Makefile.extrawarn |
3090 | +++ b/scripts/Makefile.extrawarn |
3091 | @@ -50,6 +50,7 @@ KBUILD_CFLAGS += -Wno-sign-compare |
3092 | KBUILD_CFLAGS += -Wno-format-zero-length |
3093 | KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) |
3094 | KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access) |
3095 | +KBUILD_CFLAGS += $(call cc-disable-warning, cast-function-type-strict) |
3096 | endif |
3097 | |
3098 | endif |
3099 | diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h |
3100 | index 59f924e92c284..3efaf338d3257 100644 |
3101 | --- a/tools/arch/x86/include/asm/cpufeatures.h |
3102 | +++ b/tools/arch/x86/include/asm/cpufeatures.h |
3103 | @@ -284,7 +284,7 @@ |
3104 | #define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ |
3105 | #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ |
3106 | #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ |
3107 | -#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+ 6) /* "" Fill RSB on VM-Exit when EIBRS is enabled */ |
3108 | +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM-Exit when EIBRS is enabled */ |
3109 | |
3110 | /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ |
3111 | #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ |