Contents of /trunk/kernel-alx/patches-4.9/0180-4.9.81-all-fixes.patch
Parent Directory | Revision Log
Revision 3200 -
(show annotations)
(download)
Wed Aug 8 14:25:51 2018 UTC (6 years, 1 month ago) by niro
File size: 166338 byte(s)
Wed Aug 8 14:25:51 2018 UTC (6 years, 1 month ago) by niro
File size: 166338 byte(s)
-linux-4.9.81
1 | diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt |
2 | index 4c2667aa4634..466c039c622b 100644 |
3 | --- a/Documentation/kernel-parameters.txt |
4 | +++ b/Documentation/kernel-parameters.txt |
5 | @@ -2805,8 +2805,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. |
6 | norandmaps Don't use address space randomization. Equivalent to |
7 | echo 0 > /proc/sys/kernel/randomize_va_space |
8 | |
9 | - noreplace-paravirt [X86,IA-64,PV_OPS] Don't patch paravirt_ops |
10 | - |
11 | noreplace-smp [X86-32,SMP] Don't replace SMP instructions |
12 | with UP alternatives |
13 | |
14 | diff --git a/Documentation/speculation.txt b/Documentation/speculation.txt |
15 | new file mode 100644 |
16 | index 000000000000..e9e6cbae2841 |
17 | --- /dev/null |
18 | +++ b/Documentation/speculation.txt |
19 | @@ -0,0 +1,90 @@ |
20 | +This document explains potential effects of speculation, and how undesirable |
21 | +effects can be mitigated portably using common APIs. |
22 | + |
23 | +=========== |
24 | +Speculation |
25 | +=========== |
26 | + |
27 | +To improve performance and minimize average latencies, many contemporary CPUs |
28 | +employ speculative execution techniques such as branch prediction, performing |
29 | +work which may be discarded at a later stage. |
30 | + |
31 | +Typically speculative execution cannot be observed from architectural state, |
32 | +such as the contents of registers. However, in some cases it is possible to |
33 | +observe its impact on microarchitectural state, such as the presence or |
34 | +absence of data in caches. Such state may form side-channels which can be |
35 | +observed to extract secret information. |
36 | + |
37 | +For example, in the presence of branch prediction, it is possible for bounds |
38 | +checks to be ignored by code which is speculatively executed. Consider the |
39 | +following code: |
40 | + |
41 | + int load_array(int *array, unsigned int index) |
42 | + { |
43 | + if (index >= MAX_ARRAY_ELEMS) |
44 | + return 0; |
45 | + else |
46 | + return array[index]; |
47 | + } |
48 | + |
49 | +Which, on arm64, may be compiled to an assembly sequence such as: |
50 | + |
51 | + CMP <index>, #MAX_ARRAY_ELEMS |
52 | + B.LT less |
53 | + MOV <returnval>, #0 |
54 | + RET |
55 | + less: |
56 | + LDR <returnval>, [<array>, <index>] |
57 | + RET |
58 | + |
59 | +It is possible that a CPU mis-predicts the conditional branch, and |
60 | +speculatively loads array[index], even if index >= MAX_ARRAY_ELEMS. This |
61 | +value will subsequently be discarded, but the speculated load may affect |
62 | +microarchitectural state which can be subsequently measured. |
63 | + |
64 | +More complex sequences involving multiple dependent memory accesses may |
65 | +result in sensitive information being leaked. Consider the following |
66 | +code, building on the prior example: |
67 | + |
68 | + int load_dependent_arrays(int *arr1, int *arr2, int index) |
69 | + { |
70 | + int val1, val2, |
71 | + |
72 | + val1 = load_array(arr1, index); |
73 | + val2 = load_array(arr2, val1); |
74 | + |
75 | + return val2; |
76 | + } |
77 | + |
78 | +Under speculation, the first call to load_array() may return the value |
79 | +of an out-of-bounds address, while the second call will influence |
80 | +microarchitectural state dependent on this value. This may provide an |
81 | +arbitrary read primitive. |
82 | + |
83 | +==================================== |
84 | +Mitigating speculation side-channels |
85 | +==================================== |
86 | + |
87 | +The kernel provides a generic API to ensure that bounds checks are |
88 | +respected even under speculation. Architectures which are affected by |
89 | +speculation-based side-channels are expected to implement these |
90 | +primitives. |
91 | + |
92 | +The array_index_nospec() helper in <linux/nospec.h> can be used to |
93 | +prevent information from being leaked via side-channels. |
94 | + |
95 | +A call to array_index_nospec(index, size) returns a sanitized index |
96 | +value that is bounded to [0, size) even under cpu speculation |
97 | +conditions. |
98 | + |
99 | +This can be used to protect the earlier load_array() example: |
100 | + |
101 | + int load_array(int *array, unsigned int index) |
102 | + { |
103 | + if (index >= MAX_ARRAY_ELEMS) |
104 | + return 0; |
105 | + else { |
106 | + index = array_index_nospec(index, MAX_ARRAY_ELEMS); |
107 | + return array[index]; |
108 | + } |
109 | + } |
110 | diff --git a/Makefile b/Makefile |
111 | index 9550b6939076..4d5753f1c37b 100644 |
112 | --- a/Makefile |
113 | +++ b/Makefile |
114 | @@ -1,6 +1,6 @@ |
115 | VERSION = 4 |
116 | PATCHLEVEL = 9 |
117 | -SUBLEVEL = 80 |
118 | +SUBLEVEL = 81 |
119 | EXTRAVERSION = |
120 | NAME = Roaring Lionus |
121 | |
122 | diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig |
123 | index 6eda5abbd719..0a6bb48854e3 100644 |
124 | --- a/arch/powerpc/Kconfig |
125 | +++ b/arch/powerpc/Kconfig |
126 | @@ -128,6 +128,7 @@ config PPC |
127 | select ARCH_HAS_GCOV_PROFILE_ALL |
128 | select GENERIC_SMP_IDLE_THREAD |
129 | select GENERIC_CMOS_UPDATE |
130 | + select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64 |
131 | select GENERIC_TIME_VSYSCALL_OLD |
132 | select GENERIC_CLOCKEVENTS |
133 | select GENERIC_CLOCKEVENTS_BROADCAST if SMP |
134 | diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h |
135 | index a703452d67b6..555e22d5e07f 100644 |
136 | --- a/arch/powerpc/include/asm/exception-64e.h |
137 | +++ b/arch/powerpc/include/asm/exception-64e.h |
138 | @@ -209,5 +209,11 @@ exc_##label##_book3e: |
139 | ori r3,r3,vector_offset@l; \ |
140 | mtspr SPRN_IVOR##vector_number,r3; |
141 | |
142 | +#define RFI_TO_KERNEL \ |
143 | + rfi |
144 | + |
145 | +#define RFI_TO_USER \ |
146 | + rfi |
147 | + |
148 | #endif /* _ASM_POWERPC_EXCEPTION_64E_H */ |
149 | |
150 | diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h |
151 | index 9a3eee661297..cab6d2a46c41 100644 |
152 | --- a/arch/powerpc/include/asm/exception-64s.h |
153 | +++ b/arch/powerpc/include/asm/exception-64s.h |
154 | @@ -51,6 +51,59 @@ |
155 | #define EX_PPR 88 /* SMT thread status register (priority) */ |
156 | #define EX_CTR 96 |
157 | |
158 | +/* |
159 | + * Macros for annotating the expected destination of (h)rfid |
160 | + * |
161 | + * The nop instructions allow us to insert one or more instructions to flush the |
162 | + * L1-D cache when returning to userspace or a guest. |
163 | + */ |
164 | +#define RFI_FLUSH_SLOT \ |
165 | + RFI_FLUSH_FIXUP_SECTION; \ |
166 | + nop; \ |
167 | + nop; \ |
168 | + nop |
169 | + |
170 | +#define RFI_TO_KERNEL \ |
171 | + rfid |
172 | + |
173 | +#define RFI_TO_USER \ |
174 | + RFI_FLUSH_SLOT; \ |
175 | + rfid; \ |
176 | + b rfi_flush_fallback |
177 | + |
178 | +#define RFI_TO_USER_OR_KERNEL \ |
179 | + RFI_FLUSH_SLOT; \ |
180 | + rfid; \ |
181 | + b rfi_flush_fallback |
182 | + |
183 | +#define RFI_TO_GUEST \ |
184 | + RFI_FLUSH_SLOT; \ |
185 | + rfid; \ |
186 | + b rfi_flush_fallback |
187 | + |
188 | +#define HRFI_TO_KERNEL \ |
189 | + hrfid |
190 | + |
191 | +#define HRFI_TO_USER \ |
192 | + RFI_FLUSH_SLOT; \ |
193 | + hrfid; \ |
194 | + b hrfi_flush_fallback |
195 | + |
196 | +#define HRFI_TO_USER_OR_KERNEL \ |
197 | + RFI_FLUSH_SLOT; \ |
198 | + hrfid; \ |
199 | + b hrfi_flush_fallback |
200 | + |
201 | +#define HRFI_TO_GUEST \ |
202 | + RFI_FLUSH_SLOT; \ |
203 | + hrfid; \ |
204 | + b hrfi_flush_fallback |
205 | + |
206 | +#define HRFI_TO_UNKNOWN \ |
207 | + RFI_FLUSH_SLOT; \ |
208 | + hrfid; \ |
209 | + b hrfi_flush_fallback |
210 | + |
211 | #ifdef CONFIG_RELOCATABLE |
212 | #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ |
213 | mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ |
214 | diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h |
215 | index ddf54f5bbdd1..7b332342071c 100644 |
216 | --- a/arch/powerpc/include/asm/feature-fixups.h |
217 | +++ b/arch/powerpc/include/asm/feature-fixups.h |
218 | @@ -189,4 +189,19 @@ void apply_feature_fixups(void); |
219 | void setup_feature_keys(void); |
220 | #endif |
221 | |
222 | +#define RFI_FLUSH_FIXUP_SECTION \ |
223 | +951: \ |
224 | + .pushsection __rfi_flush_fixup,"a"; \ |
225 | + .align 2; \ |
226 | +952: \ |
227 | + FTR_ENTRY_OFFSET 951b-952b; \ |
228 | + .popsection; |
229 | + |
230 | + |
231 | +#ifndef __ASSEMBLY__ |
232 | + |
233 | +extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; |
234 | + |
235 | +#endif |
236 | + |
237 | #endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */ |
238 | diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h |
239 | index 708edebcf147..0e12cb2437d1 100644 |
240 | --- a/arch/powerpc/include/asm/hvcall.h |
241 | +++ b/arch/powerpc/include/asm/hvcall.h |
242 | @@ -240,6 +240,7 @@ |
243 | #define H_GET_HCA_INFO 0x1B8 |
244 | #define H_GET_PERF_COUNT 0x1BC |
245 | #define H_MANAGE_TRACE 0x1C0 |
246 | +#define H_GET_CPU_CHARACTERISTICS 0x1C8 |
247 | #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4 |
248 | #define H_QUERY_INT_STATE 0x1E4 |
249 | #define H_POLL_PENDING 0x1D8 |
250 | @@ -306,6 +307,17 @@ |
251 | #define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3 |
252 | #define H_SET_MODE_RESOURCE_LE 4 |
253 | |
254 | +/* H_GET_CPU_CHARACTERISTICS return values */ |
255 | +#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0 |
256 | +#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1 |
257 | +#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2 |
258 | +#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3 |
259 | +#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4 |
260 | + |
261 | +#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 |
262 | +#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 |
263 | +#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 |
264 | + |
265 | #ifndef __ASSEMBLY__ |
266 | |
267 | /** |
268 | @@ -433,6 +445,11 @@ static inline unsigned long cmo_get_page_size(void) |
269 | } |
270 | #endif /* CONFIG_PPC_PSERIES */ |
271 | |
272 | +struct h_cpu_char_result { |
273 | + u64 character; |
274 | + u64 behaviour; |
275 | +}; |
276 | + |
277 | #endif /* __ASSEMBLY__ */ |
278 | #endif /* __KERNEL__ */ |
279 | #endif /* _ASM_POWERPC_HVCALL_H */ |
280 | diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h |
281 | index 6a6792bb39fb..ea43897183fd 100644 |
282 | --- a/arch/powerpc/include/asm/paca.h |
283 | +++ b/arch/powerpc/include/asm/paca.h |
284 | @@ -205,6 +205,16 @@ struct paca_struct { |
285 | struct sibling_subcore_state *sibling_subcore_state; |
286 | #endif |
287 | #endif |
288 | +#ifdef CONFIG_PPC_BOOK3S_64 |
289 | + /* |
290 | + * rfi fallback flush must be in its own cacheline to prevent |
291 | + * other paca data leaking into the L1d |
292 | + */ |
293 | + u64 exrfi[13] __aligned(0x80); |
294 | + void *rfi_flush_fallback_area; |
295 | + u64 l1d_flush_congruence; |
296 | + u64 l1d_flush_sets; |
297 | +#endif |
298 | }; |
299 | |
300 | #ifdef CONFIG_PPC_BOOK3S |
301 | diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h |
302 | index 1b394247afc2..4e53b8570d1f 100644 |
303 | --- a/arch/powerpc/include/asm/plpar_wrappers.h |
304 | +++ b/arch/powerpc/include/asm/plpar_wrappers.h |
305 | @@ -340,4 +340,18 @@ static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawr |
306 | return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0); |
307 | } |
308 | |
309 | +static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) |
310 | +{ |
311 | + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; |
312 | + long rc; |
313 | + |
314 | + rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf); |
315 | + if (rc == H_SUCCESS) { |
316 | + p->character = retbuf[0]; |
317 | + p->behaviour = retbuf[1]; |
318 | + } |
319 | + |
320 | + return rc; |
321 | +} |
322 | + |
323 | #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ |
324 | diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h |
325 | index 654d64c9f3ac..6825a67cc3db 100644 |
326 | --- a/arch/powerpc/include/asm/setup.h |
327 | +++ b/arch/powerpc/include/asm/setup.h |
328 | @@ -38,6 +38,19 @@ static inline void pseries_big_endian_exceptions(void) {} |
329 | static inline void pseries_little_endian_exceptions(void) {} |
330 | #endif /* CONFIG_PPC_PSERIES */ |
331 | |
332 | +void rfi_flush_enable(bool enable); |
333 | + |
334 | +/* These are bit flags */ |
335 | +enum l1d_flush_type { |
336 | + L1D_FLUSH_NONE = 0x1, |
337 | + L1D_FLUSH_FALLBACK = 0x2, |
338 | + L1D_FLUSH_ORI = 0x4, |
339 | + L1D_FLUSH_MTTRIG = 0x8, |
340 | +}; |
341 | + |
342 | +void __init setup_rfi_flush(enum l1d_flush_type, bool enable); |
343 | +void do_rfi_flush_fixups(enum l1d_flush_type types); |
344 | + |
345 | #endif /* !__ASSEMBLY__ */ |
346 | |
347 | #endif /* _ASM_POWERPC_SETUP_H */ |
348 | diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c |
349 | index c833d88c423d..64bcbd580495 100644 |
350 | --- a/arch/powerpc/kernel/asm-offsets.c |
351 | +++ b/arch/powerpc/kernel/asm-offsets.c |
352 | @@ -240,6 +240,10 @@ int main(void) |
353 | #ifdef CONFIG_PPC_BOOK3S_64 |
354 | DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp)); |
355 | DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce)); |
356 | + DEFINE(PACA_RFI_FLUSH_FALLBACK_AREA, offsetof(struct paca_struct, rfi_flush_fallback_area)); |
357 | + DEFINE(PACA_EXRFI, offsetof(struct paca_struct, exrfi)); |
358 | + DEFINE(PACA_L1D_FLUSH_CONGRUENCE, offsetof(struct paca_struct, l1d_flush_congruence)); |
359 | + DEFINE(PACA_L1D_FLUSH_SETS, offsetof(struct paca_struct, l1d_flush_sets)); |
360 | #endif |
361 | DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); |
362 | DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); |
363 | diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S |
364 | index caa659671599..c33b69d10919 100644 |
365 | --- a/arch/powerpc/kernel/entry_64.S |
366 | +++ b/arch/powerpc/kernel/entry_64.S |
367 | @@ -251,13 +251,23 @@ BEGIN_FTR_SECTION |
368 | END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) |
369 | |
370 | ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ |
371 | + ld r2,GPR2(r1) |
372 | + ld r1,GPR1(r1) |
373 | + mtlr r4 |
374 | + mtcr r5 |
375 | + mtspr SPRN_SRR0,r7 |
376 | + mtspr SPRN_SRR1,r8 |
377 | + RFI_TO_USER |
378 | + b . /* prevent speculative execution */ |
379 | + |
380 | + /* exit to kernel */ |
381 | 1: ld r2,GPR2(r1) |
382 | ld r1,GPR1(r1) |
383 | mtlr r4 |
384 | mtcr r5 |
385 | mtspr SPRN_SRR0,r7 |
386 | mtspr SPRN_SRR1,r8 |
387 | - RFI |
388 | + RFI_TO_KERNEL |
389 | b . /* prevent speculative execution */ |
390 | |
391 | syscall_error: |
392 | @@ -859,7 +869,7 @@ BEGIN_FTR_SECTION |
393 | END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) |
394 | ACCOUNT_CPU_USER_EXIT(r13, r2, r4) |
395 | REST_GPR(13, r1) |
396 | -1: |
397 | + |
398 | mtspr SPRN_SRR1,r3 |
399 | |
400 | ld r2,_CCR(r1) |
401 | @@ -872,8 +882,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) |
402 | ld r3,GPR3(r1) |
403 | ld r4,GPR4(r1) |
404 | ld r1,GPR1(r1) |
405 | + RFI_TO_USER |
406 | + b . /* prevent speculative execution */ |
407 | |
408 | - rfid |
409 | +1: mtspr SPRN_SRR1,r3 |
410 | + |
411 | + ld r2,_CCR(r1) |
412 | + mtcrf 0xFF,r2 |
413 | + ld r2,_NIP(r1) |
414 | + mtspr SPRN_SRR0,r2 |
415 | + |
416 | + ld r0,GPR0(r1) |
417 | + ld r2,GPR2(r1) |
418 | + ld r3,GPR3(r1) |
419 | + ld r4,GPR4(r1) |
420 | + ld r1,GPR1(r1) |
421 | + RFI_TO_KERNEL |
422 | b . /* prevent speculative execution */ |
423 | |
424 | #endif /* CONFIG_PPC_BOOK3E */ |
425 | diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S |
426 | index fd68e19b9ef7..96db6c3adebe 100644 |
427 | --- a/arch/powerpc/kernel/exceptions-64s.S |
428 | +++ b/arch/powerpc/kernel/exceptions-64s.S |
429 | @@ -655,6 +655,8 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) |
430 | |
431 | andi. r10,r12,MSR_RI /* check for unrecoverable exception */ |
432 | beq- 2f |
433 | + andi. r10,r12,MSR_PR /* check for user mode (PR != 0) */ |
434 | + bne 1f |
435 | |
436 | /* All done -- return from exception. */ |
437 | |
438 | @@ -671,7 +673,23 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) |
439 | ld r11,PACA_EXSLB+EX_R11(r13) |
440 | ld r12,PACA_EXSLB+EX_R12(r13) |
441 | ld r13,PACA_EXSLB+EX_R13(r13) |
442 | - rfid |
443 | + RFI_TO_KERNEL |
444 | + b . /* prevent speculative execution */ |
445 | + |
446 | +1: |
447 | +.machine push |
448 | +.machine "power4" |
449 | + mtcrf 0x80,r9 |
450 | + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ |
451 | +.machine pop |
452 | + |
453 | + RESTORE_PPR_PACA(PACA_EXSLB, r9) |
454 | + ld r9,PACA_EXSLB+EX_R9(r13) |
455 | + ld r10,PACA_EXSLB+EX_R10(r13) |
456 | + ld r11,PACA_EXSLB+EX_R11(r13) |
457 | + ld r12,PACA_EXSLB+EX_R12(r13) |
458 | + ld r13,PACA_EXSLB+EX_R13(r13) |
459 | + RFI_TO_USER |
460 | b . /* prevent speculative execution */ |
461 | |
462 | 2: mfspr r11,SPRN_SRR0 |
463 | @@ -679,7 +697,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) |
464 | mtspr SPRN_SRR0,r10 |
465 | ld r10,PACAKMSR(r13) |
466 | mtspr SPRN_SRR1,r10 |
467 | - rfid |
468 | + RFI_TO_KERNEL |
469 | b . |
470 | |
471 | 8: mfspr r11,SPRN_SRR0 |
472 | @@ -1576,6 +1594,92 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) |
473 | bl kernel_bad_stack |
474 | b 1b |
475 | |
476 | + .globl rfi_flush_fallback |
477 | +rfi_flush_fallback: |
478 | + SET_SCRATCH0(r13); |
479 | + GET_PACA(r13); |
480 | + std r9,PACA_EXRFI+EX_R9(r13) |
481 | + std r10,PACA_EXRFI+EX_R10(r13) |
482 | + std r11,PACA_EXRFI+EX_R11(r13) |
483 | + std r12,PACA_EXRFI+EX_R12(r13) |
484 | + std r8,PACA_EXRFI+EX_R13(r13) |
485 | + mfctr r9 |
486 | + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) |
487 | + ld r11,PACA_L1D_FLUSH_SETS(r13) |
488 | + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) |
489 | + /* |
490 | + * The load adresses are at staggered offsets within cachelines, |
491 | + * which suits some pipelines better (on others it should not |
492 | + * hurt). |
493 | + */ |
494 | + addi r12,r12,8 |
495 | + mtctr r11 |
496 | + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ |
497 | + |
498 | + /* order ld/st prior to dcbt stop all streams with flushing */ |
499 | + sync |
500 | +1: li r8,0 |
501 | + .rept 8 /* 8-way set associative */ |
502 | + ldx r11,r10,r8 |
503 | + add r8,r8,r12 |
504 | + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not |
505 | + add r8,r8,r11 // Add 0, this creates a dependency on the ldx |
506 | + .endr |
507 | + addi r10,r10,128 /* 128 byte cache line */ |
508 | + bdnz 1b |
509 | + |
510 | + mtctr r9 |
511 | + ld r9,PACA_EXRFI+EX_R9(r13) |
512 | + ld r10,PACA_EXRFI+EX_R10(r13) |
513 | + ld r11,PACA_EXRFI+EX_R11(r13) |
514 | + ld r12,PACA_EXRFI+EX_R12(r13) |
515 | + ld r8,PACA_EXRFI+EX_R13(r13) |
516 | + GET_SCRATCH0(r13); |
517 | + rfid |
518 | + |
519 | + .globl hrfi_flush_fallback |
520 | +hrfi_flush_fallback: |
521 | + SET_SCRATCH0(r13); |
522 | + GET_PACA(r13); |
523 | + std r9,PACA_EXRFI+EX_R9(r13) |
524 | + std r10,PACA_EXRFI+EX_R10(r13) |
525 | + std r11,PACA_EXRFI+EX_R11(r13) |
526 | + std r12,PACA_EXRFI+EX_R12(r13) |
527 | + std r8,PACA_EXRFI+EX_R13(r13) |
528 | + mfctr r9 |
529 | + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) |
530 | + ld r11,PACA_L1D_FLUSH_SETS(r13) |
531 | + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) |
532 | + /* |
533 | + * The load adresses are at staggered offsets within cachelines, |
534 | + * which suits some pipelines better (on others it should not |
535 | + * hurt). |
536 | + */ |
537 | + addi r12,r12,8 |
538 | + mtctr r11 |
539 | + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ |
540 | + |
541 | + /* order ld/st prior to dcbt stop all streams with flushing */ |
542 | + sync |
543 | +1: li r8,0 |
544 | + .rept 8 /* 8-way set associative */ |
545 | + ldx r11,r10,r8 |
546 | + add r8,r8,r12 |
547 | + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not |
548 | + add r8,r8,r11 // Add 0, this creates a dependency on the ldx |
549 | + .endr |
550 | + addi r10,r10,128 /* 128 byte cache line */ |
551 | + bdnz 1b |
552 | + |
553 | + mtctr r9 |
554 | + ld r9,PACA_EXRFI+EX_R9(r13) |
555 | + ld r10,PACA_EXRFI+EX_R10(r13) |
556 | + ld r11,PACA_EXRFI+EX_R11(r13) |
557 | + ld r12,PACA_EXRFI+EX_R12(r13) |
558 | + ld r8,PACA_EXRFI+EX_R13(r13) |
559 | + GET_SCRATCH0(r13); |
560 | + hrfid |
561 | + |
562 | /* |
563 | * Called from arch_local_irq_enable when an interrupt needs |
564 | * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate |
565 | diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c |
566 | index a12be60181bf..7c30a91c1f86 100644 |
567 | --- a/arch/powerpc/kernel/setup_64.c |
568 | +++ b/arch/powerpc/kernel/setup_64.c |
569 | @@ -37,6 +37,7 @@ |
570 | #include <linux/memblock.h> |
571 | #include <linux/memory.h> |
572 | #include <linux/nmi.h> |
573 | +#include <linux/debugfs.h> |
574 | |
575 | #include <asm/io.h> |
576 | #include <asm/kdump.h> |
577 | @@ -678,4 +679,142 @@ static int __init disable_hardlockup_detector(void) |
578 | return 0; |
579 | } |
580 | early_initcall(disable_hardlockup_detector); |
581 | + |
582 | +#ifdef CONFIG_PPC_BOOK3S_64 |
583 | +static enum l1d_flush_type enabled_flush_types; |
584 | +static void *l1d_flush_fallback_area; |
585 | +static bool no_rfi_flush; |
586 | +bool rfi_flush; |
587 | + |
588 | +static int __init handle_no_rfi_flush(char *p) |
589 | +{ |
590 | + pr_info("rfi-flush: disabled on command line."); |
591 | + no_rfi_flush = true; |
592 | + return 0; |
593 | +} |
594 | +early_param("no_rfi_flush", handle_no_rfi_flush); |
595 | + |
596 | +/* |
597 | + * The RFI flush is not KPTI, but because users will see doco that says to use |
598 | + * nopti we hijack that option here to also disable the RFI flush. |
599 | + */ |
600 | +static int __init handle_no_pti(char *p) |
601 | +{ |
602 | + pr_info("rfi-flush: disabling due to 'nopti' on command line.\n"); |
603 | + handle_no_rfi_flush(NULL); |
604 | + return 0; |
605 | +} |
606 | +early_param("nopti", handle_no_pti); |
607 | + |
608 | +static void do_nothing(void *unused) |
609 | +{ |
610 | + /* |
611 | + * We don't need to do the flush explicitly, just enter+exit kernel is |
612 | + * sufficient, the RFI exit handlers will do the right thing. |
613 | + */ |
614 | +} |
615 | + |
616 | +void rfi_flush_enable(bool enable) |
617 | +{ |
618 | + if (rfi_flush == enable) |
619 | + return; |
620 | + |
621 | + if (enable) { |
622 | + do_rfi_flush_fixups(enabled_flush_types); |
623 | + on_each_cpu(do_nothing, NULL, 1); |
624 | + } else |
625 | + do_rfi_flush_fixups(L1D_FLUSH_NONE); |
626 | + |
627 | + rfi_flush = enable; |
628 | +} |
629 | + |
630 | +static void init_fallback_flush(void) |
631 | +{ |
632 | + u64 l1d_size, limit; |
633 | + int cpu; |
634 | + |
635 | + l1d_size = ppc64_caches.dsize; |
636 | + limit = min(safe_stack_limit(), ppc64_rma_size); |
637 | + |
638 | + /* |
639 | + * Align to L1d size, and size it at 2x L1d size, to catch possible |
640 | + * hardware prefetch runoff. We don't have a recipe for load patterns to |
641 | + * reliably avoid the prefetcher. |
642 | + */ |
643 | + l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit)); |
644 | + memset(l1d_flush_fallback_area, 0, l1d_size * 2); |
645 | + |
646 | + for_each_possible_cpu(cpu) { |
647 | + /* |
648 | + * The fallback flush is currently coded for 8-way |
649 | + * associativity. Different associativity is possible, but it |
650 | + * will be treated as 8-way and may not evict the lines as |
651 | + * effectively. |
652 | + * |
653 | + * 128 byte lines are mandatory. |
654 | + */ |
655 | + u64 c = l1d_size / 8; |
656 | + |
657 | + paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area; |
658 | + paca[cpu].l1d_flush_congruence = c; |
659 | + paca[cpu].l1d_flush_sets = c / 128; |
660 | + } |
661 | +} |
662 | + |
663 | +void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) |
664 | +{ |
665 | + if (types & L1D_FLUSH_FALLBACK) { |
666 | + pr_info("rfi-flush: Using fallback displacement flush\n"); |
667 | + init_fallback_flush(); |
668 | + } |
669 | + |
670 | + if (types & L1D_FLUSH_ORI) |
671 | + pr_info("rfi-flush: Using ori type flush\n"); |
672 | + |
673 | + if (types & L1D_FLUSH_MTTRIG) |
674 | + pr_info("rfi-flush: Using mttrig type flush\n"); |
675 | + |
676 | + enabled_flush_types = types; |
677 | + |
678 | + if (!no_rfi_flush) |
679 | + rfi_flush_enable(enable); |
680 | +} |
681 | + |
682 | +#ifdef CONFIG_DEBUG_FS |
683 | +static int rfi_flush_set(void *data, u64 val) |
684 | +{ |
685 | + if (val == 1) |
686 | + rfi_flush_enable(true); |
687 | + else if (val == 0) |
688 | + rfi_flush_enable(false); |
689 | + else |
690 | + return -EINVAL; |
691 | + |
692 | + return 0; |
693 | +} |
694 | + |
695 | +static int rfi_flush_get(void *data, u64 *val) |
696 | +{ |
697 | + *val = rfi_flush ? 1 : 0; |
698 | + return 0; |
699 | +} |
700 | + |
701 | +DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); |
702 | + |
703 | +static __init int rfi_flush_debugfs_init(void) |
704 | +{ |
705 | + debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); |
706 | + return 0; |
707 | +} |
708 | +device_initcall(rfi_flush_debugfs_init); |
709 | +#endif |
710 | + |
711 | +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) |
712 | +{ |
713 | + if (rfi_flush) |
714 | + return sprintf(buf, "Mitigation: RFI Flush\n"); |
715 | + |
716 | + return sprintf(buf, "Vulnerable\n"); |
717 | +} |
718 | +#endif /* CONFIG_PPC_BOOK3S_64 */ |
719 | #endif |
720 | diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S |
721 | index 7394b770ae1f..b61fb7902018 100644 |
722 | --- a/arch/powerpc/kernel/vmlinux.lds.S |
723 | +++ b/arch/powerpc/kernel/vmlinux.lds.S |
724 | @@ -132,6 +132,15 @@ SECTIONS |
725 | /* Read-only data */ |
726 | RODATA |
727 | |
728 | +#ifdef CONFIG_PPC64 |
729 | + . = ALIGN(8); |
730 | + __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) { |
731 | + __start___rfi_flush_fixup = .; |
732 | + *(__rfi_flush_fixup) |
733 | + __stop___rfi_flush_fixup = .; |
734 | + } |
735 | +#endif |
736 | + |
737 | EXCEPTION_TABLE(0) |
738 | |
739 | NOTES :kernel :notes |
740 | diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c |
741 | index 043415f0bdb1..e86bfa111f3c 100644 |
742 | --- a/arch/powerpc/lib/feature-fixups.c |
743 | +++ b/arch/powerpc/lib/feature-fixups.c |
744 | @@ -23,6 +23,7 @@ |
745 | #include <asm/sections.h> |
746 | #include <asm/setup.h> |
747 | #include <asm/firmware.h> |
748 | +#include <asm/setup.h> |
749 | |
750 | struct fixup_entry { |
751 | unsigned long mask; |
752 | @@ -115,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) |
753 | } |
754 | } |
755 | |
756 | +#ifdef CONFIG_PPC_BOOK3S_64 |
757 | +void do_rfi_flush_fixups(enum l1d_flush_type types) |
758 | +{ |
759 | + unsigned int instrs[3], *dest; |
760 | + long *start, *end; |
761 | + int i; |
762 | + |
763 | + start = PTRRELOC(&__start___rfi_flush_fixup), |
764 | + end = PTRRELOC(&__stop___rfi_flush_fixup); |
765 | + |
766 | + instrs[0] = 0x60000000; /* nop */ |
767 | + instrs[1] = 0x60000000; /* nop */ |
768 | + instrs[2] = 0x60000000; /* nop */ |
769 | + |
770 | + if (types & L1D_FLUSH_FALLBACK) |
771 | + /* b .+16 to fallback flush */ |
772 | + instrs[0] = 0x48000010; |
773 | + |
774 | + i = 0; |
775 | + if (types & L1D_FLUSH_ORI) { |
776 | + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ |
777 | + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ |
778 | + } |
779 | + |
780 | + if (types & L1D_FLUSH_MTTRIG) |
781 | + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ |
782 | + |
783 | + for (i = 0; start < end; start++, i++) { |
784 | + dest = (void *)start + *start; |
785 | + |
786 | + pr_devel("patching dest %lx\n", (unsigned long)dest); |
787 | + |
788 | + patch_instruction(dest, instrs[0]); |
789 | + patch_instruction(dest + 1, instrs[1]); |
790 | + patch_instruction(dest + 2, instrs[2]); |
791 | + } |
792 | + |
793 | + printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i); |
794 | +} |
795 | +#endif /* CONFIG_PPC_BOOK3S_64 */ |
796 | + |
797 | void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) |
798 | { |
799 | long *start, *end; |
800 | diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c |
801 | index b33faa0015cc..6f8b4c19373a 100644 |
802 | --- a/arch/powerpc/platforms/powernv/setup.c |
803 | +++ b/arch/powerpc/platforms/powernv/setup.c |
804 | @@ -35,13 +35,63 @@ |
805 | #include <asm/opal.h> |
806 | #include <asm/kexec.h> |
807 | #include <asm/smp.h> |
808 | +#include <asm/tm.h> |
809 | +#include <asm/setup.h> |
810 | |
811 | #include "powernv.h" |
812 | |
813 | +static void pnv_setup_rfi_flush(void) |
814 | +{ |
815 | + struct device_node *np, *fw_features; |
816 | + enum l1d_flush_type type; |
817 | + int enable; |
818 | + |
819 | + /* Default to fallback in case fw-features are not available */ |
820 | + type = L1D_FLUSH_FALLBACK; |
821 | + enable = 1; |
822 | + |
823 | + np = of_find_node_by_name(NULL, "ibm,opal"); |
824 | + fw_features = of_get_child_by_name(np, "fw-features"); |
825 | + of_node_put(np); |
826 | + |
827 | + if (fw_features) { |
828 | + np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2"); |
829 | + if (np && of_property_read_bool(np, "enabled")) |
830 | + type = L1D_FLUSH_MTTRIG; |
831 | + |
832 | + of_node_put(np); |
833 | + |
834 | + np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0"); |
835 | + if (np && of_property_read_bool(np, "enabled")) |
836 | + type = L1D_FLUSH_ORI; |
837 | + |
838 | + of_node_put(np); |
839 | + |
840 | + /* Enable unless firmware says NOT to */ |
841 | + enable = 2; |
842 | + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0"); |
843 | + if (np && of_property_read_bool(np, "disabled")) |
844 | + enable--; |
845 | + |
846 | + of_node_put(np); |
847 | + |
848 | + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1"); |
849 | + if (np && of_property_read_bool(np, "disabled")) |
850 | + enable--; |
851 | + |
852 | + of_node_put(np); |
853 | + of_node_put(fw_features); |
854 | + } |
855 | + |
856 | + setup_rfi_flush(type, enable > 0); |
857 | +} |
858 | + |
859 | static void __init pnv_setup_arch(void) |
860 | { |
861 | set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); |
862 | |
863 | + pnv_setup_rfi_flush(); |
864 | + |
865 | /* Initialize SMP */ |
866 | pnv_smp_init(); |
867 | |
868 | diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c |
869 | index 97aa3f332f24..1845fc611912 100644 |
870 | --- a/arch/powerpc/platforms/pseries/setup.c |
871 | +++ b/arch/powerpc/platforms/pseries/setup.c |
872 | @@ -450,6 +450,39 @@ static void __init find_and_init_phbs(void) |
873 | of_pci_check_probe_only(); |
874 | } |
875 | |
876 | +static void pseries_setup_rfi_flush(void) |
877 | +{ |
878 | + struct h_cpu_char_result result; |
879 | + enum l1d_flush_type types; |
880 | + bool enable; |
881 | + long rc; |
882 | + |
883 | + /* Enable by default */ |
884 | + enable = true; |
885 | + |
886 | + rc = plpar_get_cpu_characteristics(&result); |
887 | + if (rc == H_SUCCESS) { |
888 | + types = L1D_FLUSH_NONE; |
889 | + |
890 | + if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2) |
891 | + types |= L1D_FLUSH_MTTRIG; |
892 | + if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30) |
893 | + types |= L1D_FLUSH_ORI; |
894 | + |
895 | + /* Use fallback if nothing set in hcall */ |
896 | + if (types == L1D_FLUSH_NONE) |
897 | + types = L1D_FLUSH_FALLBACK; |
898 | + |
899 | + if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) |
900 | + enable = false; |
901 | + } else { |
902 | + /* Default to fallback if case hcall is not available */ |
903 | + types = L1D_FLUSH_FALLBACK; |
904 | + } |
905 | + |
906 | + setup_rfi_flush(types, enable); |
907 | +} |
908 | + |
909 | static void __init pSeries_setup_arch(void) |
910 | { |
911 | set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); |
912 | @@ -467,6 +500,8 @@ static void __init pSeries_setup_arch(void) |
913 | |
914 | fwnmi_init(); |
915 | |
916 | + pseries_setup_rfi_flush(); |
917 | + |
918 | /* By default, only probe PCI (can be overridden by rtas_pci) */ |
919 | pci_add_flags(PCI_PROBE_ONLY); |
920 | |
921 | diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c |
922 | index bdd9cc59d20f..b0cd306dc527 100644 |
923 | --- a/arch/x86/entry/common.c |
924 | +++ b/arch/x86/entry/common.c |
925 | @@ -20,6 +20,7 @@ |
926 | #include <linux/export.h> |
927 | #include <linux/context_tracking.h> |
928 | #include <linux/user-return-notifier.h> |
929 | +#include <linux/nospec.h> |
930 | #include <linux/uprobes.h> |
931 | |
932 | #include <asm/desc.h> |
933 | @@ -201,7 +202,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) |
934 | * special case only applies after poking regs and before the |
935 | * very next return to user mode. |
936 | */ |
937 | - current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED); |
938 | + ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); |
939 | #endif |
940 | |
941 | user_enter_irqoff(); |
942 | @@ -277,7 +278,8 @@ __visible void do_syscall_64(struct pt_regs *regs) |
943 | * regs->orig_ax, which changes the behavior of some syscalls. |
944 | */ |
945 | if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) { |
946 | - regs->ax = sys_call_table[nr & __SYSCALL_MASK]( |
947 | + nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls); |
948 | + regs->ax = sys_call_table[nr]( |
949 | regs->di, regs->si, regs->dx, |
950 | regs->r10, regs->r8, regs->r9); |
951 | } |
952 | @@ -299,7 +301,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) |
953 | unsigned int nr = (unsigned int)regs->orig_ax; |
954 | |
955 | #ifdef CONFIG_IA32_EMULATION |
956 | - current->thread.status |= TS_COMPAT; |
957 | + ti->status |= TS_COMPAT; |
958 | #endif |
959 | |
960 | if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { |
961 | @@ -313,6 +315,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) |
962 | } |
963 | |
964 | if (likely(nr < IA32_NR_syscalls)) { |
965 | + nr = array_index_nospec(nr, IA32_NR_syscalls); |
966 | /* |
967 | * It's possible that a 32-bit syscall implementation |
968 | * takes a 64-bit parameter but nonetheless assumes that |
969 | diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S |
970 | index a76dc738ec61..f5434b4670c1 100644 |
971 | --- a/arch/x86/entry/entry_32.S |
972 | +++ b/arch/x86/entry/entry_32.S |
973 | @@ -237,7 +237,8 @@ ENTRY(__switch_to_asm) |
974 | * exist, overwrite the RSB with entries which capture |
975 | * speculative execution to prevent attack. |
976 | */ |
977 | - FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW |
978 | + /* Clobbers %ebx */ |
979 | + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW |
980 | #endif |
981 | |
982 | /* restore callee-saved registers */ |
983 | diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S |
984 | index e729e1528584..db5009ce065a 100644 |
985 | --- a/arch/x86/entry/entry_64.S |
986 | +++ b/arch/x86/entry/entry_64.S |
987 | @@ -177,96 +177,17 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) |
988 | pushq %r9 /* pt_regs->r9 */ |
989 | pushq %r10 /* pt_regs->r10 */ |
990 | pushq %r11 /* pt_regs->r11 */ |
991 | - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ |
992 | + pushq %rbx /* pt_regs->rbx */ |
993 | + pushq %rbp /* pt_regs->rbp */ |
994 | + pushq %r12 /* pt_regs->r12 */ |
995 | + pushq %r13 /* pt_regs->r13 */ |
996 | + pushq %r14 /* pt_regs->r14 */ |
997 | + pushq %r15 /* pt_regs->r15 */ |
998 | |
999 | - /* |
1000 | - * If we need to do entry work or if we guess we'll need to do |
1001 | - * exit work, go straight to the slow path. |
1002 | - */ |
1003 | - movq PER_CPU_VAR(current_task), %r11 |
1004 | - testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) |
1005 | - jnz entry_SYSCALL64_slow_path |
1006 | - |
1007 | -entry_SYSCALL_64_fastpath: |
1008 | - /* |
1009 | - * Easy case: enable interrupts and issue the syscall. If the syscall |
1010 | - * needs pt_regs, we'll call a stub that disables interrupts again |
1011 | - * and jumps to the slow path. |
1012 | - */ |
1013 | - TRACE_IRQS_ON |
1014 | - ENABLE_INTERRUPTS(CLBR_NONE) |
1015 | -#if __SYSCALL_MASK == ~0 |
1016 | - cmpq $__NR_syscall_max, %rax |
1017 | -#else |
1018 | - andl $__SYSCALL_MASK, %eax |
1019 | - cmpl $__NR_syscall_max, %eax |
1020 | -#endif |
1021 | - ja 1f /* return -ENOSYS (already in pt_regs->ax) */ |
1022 | - movq %r10, %rcx |
1023 | - |
1024 | - /* |
1025 | - * This call instruction is handled specially in stub_ptregs_64. |
1026 | - * It might end up jumping to the slow path. If it jumps, RAX |
1027 | - * and all argument registers are clobbered. |
1028 | - */ |
1029 | -#ifdef CONFIG_RETPOLINE |
1030 | - movq sys_call_table(, %rax, 8), %rax |
1031 | - call __x86_indirect_thunk_rax |
1032 | -#else |
1033 | - call *sys_call_table(, %rax, 8) |
1034 | -#endif |
1035 | -.Lentry_SYSCALL_64_after_fastpath_call: |
1036 | - |
1037 | - movq %rax, RAX(%rsp) |
1038 | -1: |
1039 | - |
1040 | - /* |
1041 | - * If we get here, then we know that pt_regs is clean for SYSRET64. |
1042 | - * If we see that no exit work is required (which we are required |
1043 | - * to check with IRQs off), then we can go straight to SYSRET64. |
1044 | - */ |
1045 | - DISABLE_INTERRUPTS(CLBR_NONE) |
1046 | - TRACE_IRQS_OFF |
1047 | - movq PER_CPU_VAR(current_task), %r11 |
1048 | - testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) |
1049 | - jnz 1f |
1050 | - |
1051 | - LOCKDEP_SYS_EXIT |
1052 | - TRACE_IRQS_ON /* user mode is traced as IRQs on */ |
1053 | - movq RIP(%rsp), %rcx |
1054 | - movq EFLAGS(%rsp), %r11 |
1055 | - RESTORE_C_REGS_EXCEPT_RCX_R11 |
1056 | - /* |
1057 | - * This opens a window where we have a user CR3, but are |
1058 | - * running in the kernel. This makes using the CS |
1059 | - * register useless for telling whether or not we need to |
1060 | - * switch CR3 in NMIs. Normal interrupts are OK because |
1061 | - * they are off here. |
1062 | - */ |
1063 | - SWITCH_USER_CR3 |
1064 | - movq RSP(%rsp), %rsp |
1065 | - USERGS_SYSRET64 |
1066 | - |
1067 | -1: |
1068 | - /* |
1069 | - * The fast path looked good when we started, but something changed |
1070 | - * along the way and we need to switch to the slow path. Calling |
1071 | - * raise(3) will trigger this, for example. IRQs are off. |
1072 | - */ |
1073 | - TRACE_IRQS_ON |
1074 | - ENABLE_INTERRUPTS(CLBR_NONE) |
1075 | - SAVE_EXTRA_REGS |
1076 | - movq %rsp, %rdi |
1077 | - call syscall_return_slowpath /* returns with IRQs disabled */ |
1078 | - jmp return_from_SYSCALL_64 |
1079 | - |
1080 | -entry_SYSCALL64_slow_path: |
1081 | /* IRQs are off. */ |
1082 | - SAVE_EXTRA_REGS |
1083 | movq %rsp, %rdi |
1084 | call do_syscall_64 /* returns with IRQs disabled */ |
1085 | |
1086 | -return_from_SYSCALL_64: |
1087 | RESTORE_EXTRA_REGS |
1088 | TRACE_IRQS_IRETQ /* we're about to change IF */ |
1089 | |
1090 | @@ -339,6 +260,7 @@ return_from_SYSCALL_64: |
1091 | syscall_return_via_sysret: |
1092 | /* rcx and r11 are already restored (see code above) */ |
1093 | RESTORE_C_REGS_EXCEPT_RCX_R11 |
1094 | + |
1095 | /* |
1096 | * This opens a window where we have a user CR3, but are |
1097 | * running in the kernel. This makes using the CS |
1098 | @@ -363,45 +285,6 @@ opportunistic_sysret_failed: |
1099 | jmp restore_c_regs_and_iret |
1100 | END(entry_SYSCALL_64) |
1101 | |
1102 | -ENTRY(stub_ptregs_64) |
1103 | - /* |
1104 | - * Syscalls marked as needing ptregs land here. |
1105 | - * If we are on the fast path, we need to save the extra regs, |
1106 | - * which we achieve by trying again on the slow path. If we are on |
1107 | - * the slow path, the extra regs are already saved. |
1108 | - * |
1109 | - * RAX stores a pointer to the C function implementing the syscall. |
1110 | - * IRQs are on. |
1111 | - */ |
1112 | - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp) |
1113 | - jne 1f |
1114 | - |
1115 | - /* |
1116 | - * Called from fast path -- disable IRQs again, pop return address |
1117 | - * and jump to slow path |
1118 | - */ |
1119 | - DISABLE_INTERRUPTS(CLBR_NONE) |
1120 | - TRACE_IRQS_OFF |
1121 | - popq %rax |
1122 | - jmp entry_SYSCALL64_slow_path |
1123 | - |
1124 | -1: |
1125 | - JMP_NOSPEC %rax /* Called from C */ |
1126 | -END(stub_ptregs_64) |
1127 | - |
1128 | -.macro ptregs_stub func |
1129 | -ENTRY(ptregs_\func) |
1130 | - leaq \func(%rip), %rax |
1131 | - jmp stub_ptregs_64 |
1132 | -END(ptregs_\func) |
1133 | -.endm |
1134 | - |
1135 | -/* Instantiate ptregs_stub for each ptregs-using syscall */ |
1136 | -#define __SYSCALL_64_QUAL_(sym) |
1137 | -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym |
1138 | -#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym) |
1139 | -#include <asm/syscalls_64.h> |
1140 | - |
1141 | /* |
1142 | * %rdi: prev task |
1143 | * %rsi: next task |
1144 | @@ -435,7 +318,8 @@ ENTRY(__switch_to_asm) |
1145 | * exist, overwrite the RSB with entries which capture |
1146 | * speculative execution to prevent attack. |
1147 | */ |
1148 | - FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW |
1149 | + /* Clobbers %rbx */ |
1150 | + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW |
1151 | #endif |
1152 | |
1153 | /* restore callee-saved registers */ |
1154 | diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c |
1155 | index 9dbc5abb6162..6705edda4ac3 100644 |
1156 | --- a/arch/x86/entry/syscall_64.c |
1157 | +++ b/arch/x86/entry/syscall_64.c |
1158 | @@ -6,14 +6,11 @@ |
1159 | #include <asm/asm-offsets.h> |
1160 | #include <asm/syscall.h> |
1161 | |
1162 | -#define __SYSCALL_64_QUAL_(sym) sym |
1163 | -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym |
1164 | - |
1165 | -#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); |
1166 | +#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); |
1167 | #include <asm/syscalls_64.h> |
1168 | #undef __SYSCALL_64 |
1169 | |
1170 | -#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym), |
1171 | +#define __SYSCALL_64(nr, sym, qual) [nr] = sym, |
1172 | |
1173 | extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); |
1174 | |
1175 | diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c |
1176 | index 982c9e31daca..21298c173b0e 100644 |
1177 | --- a/arch/x86/events/intel/bts.c |
1178 | +++ b/arch/x86/events/intel/bts.c |
1179 | @@ -22,6 +22,7 @@ |
1180 | #include <linux/debugfs.h> |
1181 | #include <linux/device.h> |
1182 | #include <linux/coredump.h> |
1183 | +#include <linux/kaiser.h> |
1184 | |
1185 | #include <asm-generic/sizes.h> |
1186 | #include <asm/perf_event.h> |
1187 | @@ -77,6 +78,23 @@ static size_t buf_size(struct page *page) |
1188 | return 1 << (PAGE_SHIFT + page_private(page)); |
1189 | } |
1190 | |
1191 | +static void bts_buffer_free_aux(void *data) |
1192 | +{ |
1193 | +#ifdef CONFIG_PAGE_TABLE_ISOLATION |
1194 | + struct bts_buffer *buf = data; |
1195 | + int nbuf; |
1196 | + |
1197 | + for (nbuf = 0; nbuf < buf->nr_bufs; nbuf++) { |
1198 | + struct page *page = buf->buf[nbuf].page; |
1199 | + void *kaddr = page_address(page); |
1200 | + size_t page_size = buf_size(page); |
1201 | + |
1202 | + kaiser_remove_mapping((unsigned long)kaddr, page_size); |
1203 | + } |
1204 | +#endif |
1205 | + kfree(data); |
1206 | +} |
1207 | + |
1208 | static void * |
1209 | bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) |
1210 | { |
1211 | @@ -113,29 +131,33 @@ bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) |
1212 | buf->real_size = size - size % BTS_RECORD_SIZE; |
1213 | |
1214 | for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) { |
1215 | - unsigned int __nr_pages; |
1216 | + void *kaddr = pages[pg]; |
1217 | + size_t page_size; |
1218 | + |
1219 | + page = virt_to_page(kaddr); |
1220 | + page_size = buf_size(page); |
1221 | + |
1222 | + if (kaiser_add_mapping((unsigned long)kaddr, |
1223 | + page_size, __PAGE_KERNEL) < 0) { |
1224 | + buf->nr_bufs = nbuf; |
1225 | + bts_buffer_free_aux(buf); |
1226 | + return NULL; |
1227 | + } |
1228 | |
1229 | - page = virt_to_page(pages[pg]); |
1230 | - __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1; |
1231 | buf->buf[nbuf].page = page; |
1232 | buf->buf[nbuf].offset = offset; |
1233 | buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); |
1234 | - buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement; |
1235 | + buf->buf[nbuf].size = page_size - buf->buf[nbuf].displacement; |
1236 | pad = buf->buf[nbuf].size % BTS_RECORD_SIZE; |
1237 | buf->buf[nbuf].size -= pad; |
1238 | |
1239 | - pg += __nr_pages; |
1240 | - offset += __nr_pages << PAGE_SHIFT; |
1241 | + pg += page_size >> PAGE_SHIFT; |
1242 | + offset += page_size; |
1243 | } |
1244 | |
1245 | return buf; |
1246 | } |
1247 | |
1248 | -static void bts_buffer_free_aux(void *data) |
1249 | -{ |
1250 | - kfree(data); |
1251 | -} |
1252 | - |
1253 | static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx) |
1254 | { |
1255 | return buf->buf[idx].offset + buf->buf[idx].displacement; |
1256 | diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h |
1257 | index b15aa4083dfd..166654218329 100644 |
1258 | --- a/arch/x86/include/asm/asm-prototypes.h |
1259 | +++ b/arch/x86/include/asm/asm-prototypes.h |
1260 | @@ -37,5 +37,7 @@ INDIRECT_THUNK(dx) |
1261 | INDIRECT_THUNK(si) |
1262 | INDIRECT_THUNK(di) |
1263 | INDIRECT_THUNK(bp) |
1264 | -INDIRECT_THUNK(sp) |
1265 | +asmlinkage void __fill_rsb(void); |
1266 | +asmlinkage void __clear_rsb(void); |
1267 | + |
1268 | #endif /* CONFIG_RETPOLINE */ |
1269 | diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h |
1270 | index 00523524edbf..7bb29a416b77 100644 |
1271 | --- a/arch/x86/include/asm/asm.h |
1272 | +++ b/arch/x86/include/asm/asm.h |
1273 | @@ -11,10 +11,12 @@ |
1274 | # define __ASM_FORM_COMMA(x) " " #x "," |
1275 | #endif |
1276 | |
1277 | -#ifdef CONFIG_X86_32 |
1278 | +#ifndef __x86_64__ |
1279 | +/* 32 bit */ |
1280 | # define __ASM_SEL(a,b) __ASM_FORM(a) |
1281 | # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) |
1282 | #else |
1283 | +/* 64 bit */ |
1284 | # define __ASM_SEL(a,b) __ASM_FORM(b) |
1285 | # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) |
1286 | #endif |
1287 | diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h |
1288 | index bfb28caf97b1..857590390397 100644 |
1289 | --- a/arch/x86/include/asm/barrier.h |
1290 | +++ b/arch/x86/include/asm/barrier.h |
1291 | @@ -23,6 +23,34 @@ |
1292 | #define wmb() asm volatile("sfence" ::: "memory") |
1293 | #endif |
1294 | |
1295 | +/** |
1296 | + * array_index_mask_nospec() - generate a mask that is ~0UL when the |
1297 | + * bounds check succeeds and 0 otherwise |
1298 | + * @index: array element index |
1299 | + * @size: number of elements in array |
1300 | + * |
1301 | + * Returns: |
1302 | + * 0 - (index < size) |
1303 | + */ |
1304 | +static inline unsigned long array_index_mask_nospec(unsigned long index, |
1305 | + unsigned long size) |
1306 | +{ |
1307 | + unsigned long mask; |
1308 | + |
1309 | + asm ("cmp %1,%2; sbb %0,%0;" |
1310 | + :"=r" (mask) |
1311 | + :"r"(size),"r" (index) |
1312 | + :"cc"); |
1313 | + return mask; |
1314 | +} |
1315 | + |
1316 | +/* Override the default implementation from linux/nospec.h. */ |
1317 | +#define array_index_mask_nospec array_index_mask_nospec |
1318 | + |
1319 | +/* Prevent speculative execution past this barrier. */ |
1320 | +#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ |
1321 | + "lfence", X86_FEATURE_LFENCE_RDTSC) |
1322 | + |
1323 | #ifdef CONFIG_X86_PPRO_FENCE |
1324 | #define dma_rmb() rmb() |
1325 | #else |
1326 | diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h |
1327 | index 9ea67a04ff4f..8c101579f535 100644 |
1328 | --- a/arch/x86/include/asm/cpufeature.h |
1329 | +++ b/arch/x86/include/asm/cpufeature.h |
1330 | @@ -28,6 +28,7 @@ enum cpuid_leafs |
1331 | CPUID_8000_000A_EDX, |
1332 | CPUID_7_ECX, |
1333 | CPUID_8000_0007_EBX, |
1334 | + CPUID_7_EDX, |
1335 | }; |
1336 | |
1337 | #ifdef CONFIG_X86_FEATURE_NAMES |
1338 | @@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; |
1339 | CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ |
1340 | CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ |
1341 | CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ |
1342 | + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ |
1343 | REQUIRED_MASK_CHECK || \ |
1344 | - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) |
1345 | + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) |
1346 | |
1347 | #define DISABLED_MASK_BIT_SET(feature_bit) \ |
1348 | ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ |
1349 | @@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; |
1350 | CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ |
1351 | CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ |
1352 | CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ |
1353 | + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ |
1354 | DISABLED_MASK_CHECK || \ |
1355 | - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) |
1356 | + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) |
1357 | |
1358 | #define cpu_has(c, bit) \ |
1359 | (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ |
1360 | diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h |
1361 | index 8537a21acd8b..8eb23f5cf7f4 100644 |
1362 | --- a/arch/x86/include/asm/cpufeatures.h |
1363 | +++ b/arch/x86/include/asm/cpufeatures.h |
1364 | @@ -12,7 +12,7 @@ |
1365 | /* |
1366 | * Defines x86 CPU feature bits |
1367 | */ |
1368 | -#define NCAPINTS 18 /* N 32-bit words worth of info */ |
1369 | +#define NCAPINTS 19 /* N 32-bit words worth of info */ |
1370 | #define NBUGINTS 1 /* N 32-bit bug flags */ |
1371 | |
1372 | /* |
1373 | @@ -194,16 +194,16 @@ |
1374 | #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ |
1375 | #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ |
1376 | |
1377 | -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ |
1378 | -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ |
1379 | +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ |
1380 | +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ |
1381 | |
1382 | -#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ |
1383 | -#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ |
1384 | -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ |
1385 | +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ |
1386 | |
1387 | /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ |
1388 | #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ |
1389 | |
1390 | +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ |
1391 | + |
1392 | /* Virtualization flags: Linux defined, word 8 */ |
1393 | #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ |
1394 | #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ |
1395 | @@ -260,6 +260,9 @@ |
1396 | /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ |
1397 | #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ |
1398 | #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ |
1399 | +#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ |
1400 | +#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ |
1401 | +#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ |
1402 | |
1403 | /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ |
1404 | #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ |
1405 | @@ -295,6 +298,13 @@ |
1406 | #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ |
1407 | #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ |
1408 | |
1409 | +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ |
1410 | +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ |
1411 | +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ |
1412 | +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ |
1413 | +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ |
1414 | +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ |
1415 | + |
1416 | /* |
1417 | * BUG word(s) |
1418 | */ |
1419 | diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h |
1420 | index 21c5ac15657b..1f8cca459c6c 100644 |
1421 | --- a/arch/x86/include/asm/disabled-features.h |
1422 | +++ b/arch/x86/include/asm/disabled-features.h |
1423 | @@ -59,6 +59,7 @@ |
1424 | #define DISABLED_MASK15 0 |
1425 | #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE) |
1426 | #define DISABLED_MASK17 0 |
1427 | -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) |
1428 | +#define DISABLED_MASK18 0 |
1429 | +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) |
1430 | |
1431 | #endif /* _ASM_X86_DISABLED_FEATURES_H */ |
1432 | diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h |
1433 | index 34a46dc076d3..75b748a1deb8 100644 |
1434 | --- a/arch/x86/include/asm/intel-family.h |
1435 | +++ b/arch/x86/include/asm/intel-family.h |
1436 | @@ -12,6 +12,7 @@ |
1437 | */ |
1438 | |
1439 | #define INTEL_FAM6_CORE_YONAH 0x0E |
1440 | + |
1441 | #define INTEL_FAM6_CORE2_MEROM 0x0F |
1442 | #define INTEL_FAM6_CORE2_MEROM_L 0x16 |
1443 | #define INTEL_FAM6_CORE2_PENRYN 0x17 |
1444 | @@ -21,6 +22,7 @@ |
1445 | #define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */ |
1446 | #define INTEL_FAM6_NEHALEM_EP 0x1A |
1447 | #define INTEL_FAM6_NEHALEM_EX 0x2E |
1448 | + |
1449 | #define INTEL_FAM6_WESTMERE 0x25 |
1450 | #define INTEL_FAM6_WESTMERE_EP 0x2C |
1451 | #define INTEL_FAM6_WESTMERE_EX 0x2F |
1452 | @@ -36,9 +38,9 @@ |
1453 | #define INTEL_FAM6_HASWELL_GT3E 0x46 |
1454 | |
1455 | #define INTEL_FAM6_BROADWELL_CORE 0x3D |
1456 | -#define INTEL_FAM6_BROADWELL_XEON_D 0x56 |
1457 | #define INTEL_FAM6_BROADWELL_GT3E 0x47 |
1458 | #define INTEL_FAM6_BROADWELL_X 0x4F |
1459 | +#define INTEL_FAM6_BROADWELL_XEON_D 0x56 |
1460 | |
1461 | #define INTEL_FAM6_SKYLAKE_MOBILE 0x4E |
1462 | #define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E |
1463 | @@ -57,9 +59,10 @@ |
1464 | #define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ |
1465 | #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ |
1466 | #define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ |
1467 | -#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */ |
1468 | +#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */ |
1469 | #define INTEL_FAM6_ATOM_GOLDMONT 0x5C |
1470 | #define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ |
1471 | +#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A |
1472 | |
1473 | /* Xeon Phi */ |
1474 | |
1475 | diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h |
1476 | index b11c4c072df8..c768bc1550a1 100644 |
1477 | --- a/arch/x86/include/asm/msr-index.h |
1478 | +++ b/arch/x86/include/asm/msr-index.h |
1479 | @@ -37,6 +37,13 @@ |
1480 | #define EFER_FFXSR (1<<_EFER_FFXSR) |
1481 | |
1482 | /* Intel MSRs. Some also available on other CPUs */ |
1483 | +#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ |
1484 | +#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ |
1485 | +#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ |
1486 | + |
1487 | +#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ |
1488 | +#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ |
1489 | + |
1490 | #define MSR_IA32_PERFCTR0 0x000000c1 |
1491 | #define MSR_IA32_PERFCTR1 0x000000c2 |
1492 | #define MSR_FSB_FREQ 0x000000cd |
1493 | @@ -50,6 +57,11 @@ |
1494 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) |
1495 | |
1496 | #define MSR_MTRRcap 0x000000fe |
1497 | + |
1498 | +#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a |
1499 | +#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ |
1500 | +#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ |
1501 | + |
1502 | #define MSR_IA32_BBL_CR_CTL 0x00000119 |
1503 | #define MSR_IA32_BBL_CR_CTL3 0x0000011e |
1504 | |
1505 | diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h |
1506 | index b5fee97813cd..ed35b915b5c9 100644 |
1507 | --- a/arch/x86/include/asm/msr.h |
1508 | +++ b/arch/x86/include/asm/msr.h |
1509 | @@ -188,8 +188,7 @@ static __always_inline unsigned long long rdtsc_ordered(void) |
1510 | * that some other imaginary CPU is updating continuously with a |
1511 | * time stamp. |
1512 | */ |
1513 | - alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, |
1514 | - "lfence", X86_FEATURE_LFENCE_RDTSC); |
1515 | + barrier_nospec(); |
1516 | return rdtsc(); |
1517 | } |
1518 | |
1519 | diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h |
1520 | index 4ad41087ce0e..300cc159b4a0 100644 |
1521 | --- a/arch/x86/include/asm/nospec-branch.h |
1522 | +++ b/arch/x86/include/asm/nospec-branch.h |
1523 | @@ -1,56 +1,12 @@ |
1524 | /* SPDX-License-Identifier: GPL-2.0 */ |
1525 | |
1526 | -#ifndef __NOSPEC_BRANCH_H__ |
1527 | -#define __NOSPEC_BRANCH_H__ |
1528 | +#ifndef _ASM_X86_NOSPEC_BRANCH_H_ |
1529 | +#define _ASM_X86_NOSPEC_BRANCH_H_ |
1530 | |
1531 | #include <asm/alternative.h> |
1532 | #include <asm/alternative-asm.h> |
1533 | #include <asm/cpufeatures.h> |
1534 | |
1535 | -/* |
1536 | - * Fill the CPU return stack buffer. |
1537 | - * |
1538 | - * Each entry in the RSB, if used for a speculative 'ret', contains an |
1539 | - * infinite 'pause; lfence; jmp' loop to capture speculative execution. |
1540 | - * |
1541 | - * This is required in various cases for retpoline and IBRS-based |
1542 | - * mitigations for the Spectre variant 2 vulnerability. Sometimes to |
1543 | - * eliminate potentially bogus entries from the RSB, and sometimes |
1544 | - * purely to ensure that it doesn't get empty, which on some CPUs would |
1545 | - * allow predictions from other (unwanted!) sources to be used. |
1546 | - * |
1547 | - * We define a CPP macro such that it can be used from both .S files and |
1548 | - * inline assembly. It's possible to do a .macro and then include that |
1549 | - * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. |
1550 | - */ |
1551 | - |
1552 | -#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ |
1553 | -#define RSB_FILL_LOOPS 16 /* To avoid underflow */ |
1554 | - |
1555 | -/* |
1556 | - * Google experimented with loop-unrolling and this turned out to be |
1557 | - * the optimal version — two calls, each with their own speculation |
1558 | - * trap should their return address end up getting used, in a loop. |
1559 | - */ |
1560 | -#define __FILL_RETURN_BUFFER(reg, nr, sp) \ |
1561 | - mov $(nr/2), reg; \ |
1562 | -771: \ |
1563 | - call 772f; \ |
1564 | -773: /* speculation trap */ \ |
1565 | - pause; \ |
1566 | - lfence; \ |
1567 | - jmp 773b; \ |
1568 | -772: \ |
1569 | - call 774f; \ |
1570 | -775: /* speculation trap */ \ |
1571 | - pause; \ |
1572 | - lfence; \ |
1573 | - jmp 775b; \ |
1574 | -774: \ |
1575 | - dec reg; \ |
1576 | - jnz 771b; \ |
1577 | - add $(BITS_PER_LONG/8) * nr, sp; |
1578 | - |
1579 | #ifdef __ASSEMBLY__ |
1580 | |
1581 | /* |
1582 | @@ -121,17 +77,10 @@ |
1583 | #endif |
1584 | .endm |
1585 | |
1586 | - /* |
1587 | - * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP |
1588 | - * monstrosity above, manually. |
1589 | - */ |
1590 | -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req |
1591 | +/* This clobbers the BX register */ |
1592 | +.macro FILL_RETURN_BUFFER nr:req ftr:req |
1593 | #ifdef CONFIG_RETPOLINE |
1594 | - ANNOTATE_NOSPEC_ALTERNATIVE |
1595 | - ALTERNATIVE "jmp .Lskip_rsb_\@", \ |
1596 | - __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ |
1597 | - \ftr |
1598 | -.Lskip_rsb_\@: |
1599 | + ALTERNATIVE "", "call __clear_rsb", \ftr |
1600 | #endif |
1601 | .endm |
1602 | |
1603 | @@ -201,22 +150,30 @@ extern char __indirect_thunk_end[]; |
1604 | * On VMEXIT we must ensure that no RSB predictions learned in the guest |
1605 | * can be followed in the host, by overwriting the RSB completely. Both |
1606 | * retpoline and IBRS mitigations for Spectre v2 need this; only on future |
1607 | - * CPUs with IBRS_ATT *might* it be avoided. |
1608 | + * CPUs with IBRS_ALL *might* it be avoided. |
1609 | */ |
1610 | static inline void vmexit_fill_RSB(void) |
1611 | { |
1612 | #ifdef CONFIG_RETPOLINE |
1613 | - unsigned long loops; |
1614 | - |
1615 | - asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE |
1616 | - ALTERNATIVE("jmp 910f", |
1617 | - __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), |
1618 | - X86_FEATURE_RETPOLINE) |
1619 | - "910:" |
1620 | - : "=r" (loops), ASM_CALL_CONSTRAINT |
1621 | - : : "memory" ); |
1622 | + alternative_input("", |
1623 | + "call __fill_rsb", |
1624 | + X86_FEATURE_RETPOLINE, |
1625 | + ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); |
1626 | #endif |
1627 | } |
1628 | |
1629 | +static inline void indirect_branch_prediction_barrier(void) |
1630 | +{ |
1631 | + asm volatile(ALTERNATIVE("", |
1632 | + "movl %[msr], %%ecx\n\t" |
1633 | + "movl %[val], %%eax\n\t" |
1634 | + "movl $0, %%edx\n\t" |
1635 | + "wrmsr", |
1636 | + X86_FEATURE_USE_IBPB) |
1637 | + : : [msr] "i" (MSR_IA32_PRED_CMD), |
1638 | + [val] "i" (PRED_CMD_IBPB) |
1639 | + : "eax", "ecx", "edx", "memory"); |
1640 | +} |
1641 | + |
1642 | #endif /* __ASSEMBLY__ */ |
1643 | -#endif /* __NOSPEC_BRANCH_H__ */ |
1644 | +#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ |
1645 | diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h |
1646 | index 1178a51b77f3..b6d425999f99 100644 |
1647 | --- a/arch/x86/include/asm/pgalloc.h |
1648 | +++ b/arch/x86/include/asm/pgalloc.h |
1649 | @@ -27,17 +27,6 @@ static inline void paravirt_release_pud(unsigned long pfn) {} |
1650 | */ |
1651 | extern gfp_t __userpte_alloc_gfp; |
1652 | |
1653 | -#ifdef CONFIG_PAGE_TABLE_ISOLATION |
1654 | -/* |
1655 | - * Instead of one PGD, we acquire two PGDs. Being order-1, it is |
1656 | - * both 8k in size and 8k-aligned. That lets us just flip bit 12 |
1657 | - * in a pointer to swap between the two 4k halves. |
1658 | - */ |
1659 | -#define PGD_ALLOCATION_ORDER 1 |
1660 | -#else |
1661 | -#define PGD_ALLOCATION_ORDER 0 |
1662 | -#endif |
1663 | - |
1664 | /* |
1665 | * Allocate and free page tables. |
1666 | */ |
1667 | diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h |
1668 | index 2536f90cd30c..5af0401ccff2 100644 |
1669 | --- a/arch/x86/include/asm/pgtable.h |
1670 | +++ b/arch/x86/include/asm/pgtable.h |
1671 | @@ -20,9 +20,15 @@ |
1672 | |
1673 | #ifdef CONFIG_PAGE_TABLE_ISOLATION |
1674 | extern int kaiser_enabled; |
1675 | +/* |
1676 | + * Instead of one PGD, we acquire two PGDs. Being order-1, it is |
1677 | + * both 8k in size and 8k-aligned. That lets us just flip bit 12 |
1678 | + * in a pointer to swap between the two 4k halves. |
1679 | + */ |
1680 | #else |
1681 | #define kaiser_enabled 0 |
1682 | #endif |
1683 | +#define PGD_ALLOCATION_ORDER kaiser_enabled |
1684 | |
1685 | void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); |
1686 | void ptdump_walk_pgd_level_checkwx(void); |
1687 | diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h |
1688 | index 353f038ec645..cb866ae1bc5d 100644 |
1689 | --- a/arch/x86/include/asm/processor.h |
1690 | +++ b/arch/x86/include/asm/processor.h |
1691 | @@ -391,8 +391,6 @@ struct thread_struct { |
1692 | unsigned short gsindex; |
1693 | #endif |
1694 | |
1695 | - u32 status; /* thread synchronous flags */ |
1696 | - |
1697 | #ifdef CONFIG_X86_64 |
1698 | unsigned long fsbase; |
1699 | unsigned long gsbase; |
1700 | diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h |
1701 | index fac9a5c0abe9..6847d85400a8 100644 |
1702 | --- a/arch/x86/include/asm/required-features.h |
1703 | +++ b/arch/x86/include/asm/required-features.h |
1704 | @@ -100,6 +100,7 @@ |
1705 | #define REQUIRED_MASK15 0 |
1706 | #define REQUIRED_MASK16 0 |
1707 | #define REQUIRED_MASK17 0 |
1708 | -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) |
1709 | +#define REQUIRED_MASK18 0 |
1710 | +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) |
1711 | |
1712 | #endif /* _ASM_X86_REQUIRED_FEATURES_H */ |
1713 | diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h |
1714 | index e3c95e8e61c5..03eedc21246d 100644 |
1715 | --- a/arch/x86/include/asm/syscall.h |
1716 | +++ b/arch/x86/include/asm/syscall.h |
1717 | @@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task, |
1718 | * TS_COMPAT is set for 32-bit syscall entries and then |
1719 | * remains set until we return to user mode. |
1720 | */ |
1721 | - if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) |
1722 | + if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED)) |
1723 | /* |
1724 | * Sign-extend the value so (int)-EFOO becomes (long)-EFOO |
1725 | * and will match correctly in comparisons. |
1726 | @@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task, |
1727 | unsigned long *args) |
1728 | { |
1729 | # ifdef CONFIG_IA32_EMULATION |
1730 | - if (task->thread.status & TS_COMPAT) |
1731 | + if (task->thread_info.status & TS_COMPAT) |
1732 | switch (i) { |
1733 | case 0: |
1734 | if (!n--) break; |
1735 | @@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task, |
1736 | const unsigned long *args) |
1737 | { |
1738 | # ifdef CONFIG_IA32_EMULATION |
1739 | - if (task->thread.status & TS_COMPAT) |
1740 | + if (task->thread_info.status & TS_COMPAT) |
1741 | switch (i) { |
1742 | case 0: |
1743 | if (!n--) break; |
1744 | diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h |
1745 | index bdf9c4c91572..89978b9c667a 100644 |
1746 | --- a/arch/x86/include/asm/thread_info.h |
1747 | +++ b/arch/x86/include/asm/thread_info.h |
1748 | @@ -54,6 +54,7 @@ struct task_struct; |
1749 | |
1750 | struct thread_info { |
1751 | unsigned long flags; /* low level flags */ |
1752 | + u32 status; /* thread synchronous flags */ |
1753 | }; |
1754 | |
1755 | #define INIT_THREAD_INFO(tsk) \ |
1756 | @@ -213,7 +214,7 @@ static inline int arch_within_stack_frames(const void * const stack, |
1757 | #define in_ia32_syscall() true |
1758 | #else |
1759 | #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \ |
1760 | - current->thread.status & TS_COMPAT) |
1761 | + current_thread_info()->status & TS_COMPAT) |
1762 | #endif |
1763 | |
1764 | /* |
1765 | diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h |
1766 | index dead0f3921f3..a8d85a687cf4 100644 |
1767 | --- a/arch/x86/include/asm/uaccess.h |
1768 | +++ b/arch/x86/include/asm/uaccess.h |
1769 | @@ -123,6 +123,11 @@ extern int __get_user_bad(void); |
1770 | |
1771 | #define __uaccess_begin() stac() |
1772 | #define __uaccess_end() clac() |
1773 | +#define __uaccess_begin_nospec() \ |
1774 | +({ \ |
1775 | + stac(); \ |
1776 | + barrier_nospec(); \ |
1777 | +}) |
1778 | |
1779 | /* |
1780 | * This is a type: either unsigned long, if the argument fits into |
1781 | @@ -432,7 +437,7 @@ do { \ |
1782 | ({ \ |
1783 | int __gu_err; \ |
1784 | __inttype(*(ptr)) __gu_val; \ |
1785 | - __uaccess_begin(); \ |
1786 | + __uaccess_begin_nospec(); \ |
1787 | __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ |
1788 | __uaccess_end(); \ |
1789 | (x) = (__force __typeof__(*(ptr)))__gu_val; \ |
1790 | @@ -474,6 +479,10 @@ struct __large_struct { unsigned long buf[100]; }; |
1791 | __uaccess_begin(); \ |
1792 | barrier(); |
1793 | |
1794 | +#define uaccess_try_nospec do { \ |
1795 | + current->thread.uaccess_err = 0; \ |
1796 | + __uaccess_begin_nospec(); \ |
1797 | + |
1798 | #define uaccess_catch(err) \ |
1799 | __uaccess_end(); \ |
1800 | (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \ |
1801 | @@ -538,7 +547,7 @@ struct __large_struct { unsigned long buf[100]; }; |
1802 | * get_user_ex(...); |
1803 | * } get_user_catch(err) |
1804 | */ |
1805 | -#define get_user_try uaccess_try |
1806 | +#define get_user_try uaccess_try_nospec |
1807 | #define get_user_catch(err) uaccess_catch(err) |
1808 | |
1809 | #define get_user_ex(x, ptr) do { \ |
1810 | @@ -573,7 +582,7 @@ extern void __cmpxchg_wrong_size(void) |
1811 | __typeof__(ptr) __uval = (uval); \ |
1812 | __typeof__(*(ptr)) __old = (old); \ |
1813 | __typeof__(*(ptr)) __new = (new); \ |
1814 | - __uaccess_begin(); \ |
1815 | + __uaccess_begin_nospec(); \ |
1816 | switch (size) { \ |
1817 | case 1: \ |
1818 | { \ |
1819 | diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h |
1820 | index 7d3bdd1ed697..d6d245088dd5 100644 |
1821 | --- a/arch/x86/include/asm/uaccess_32.h |
1822 | +++ b/arch/x86/include/asm/uaccess_32.h |
1823 | @@ -102,17 +102,17 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) |
1824 | |
1825 | switch (n) { |
1826 | case 1: |
1827 | - __uaccess_begin(); |
1828 | + __uaccess_begin_nospec(); |
1829 | __get_user_size(*(u8 *)to, from, 1, ret, 1); |
1830 | __uaccess_end(); |
1831 | return ret; |
1832 | case 2: |
1833 | - __uaccess_begin(); |
1834 | + __uaccess_begin_nospec(); |
1835 | __get_user_size(*(u16 *)to, from, 2, ret, 2); |
1836 | __uaccess_end(); |
1837 | return ret; |
1838 | case 4: |
1839 | - __uaccess_begin(); |
1840 | + __uaccess_begin_nospec(); |
1841 | __get_user_size(*(u32 *)to, from, 4, ret, 4); |
1842 | __uaccess_end(); |
1843 | return ret; |
1844 | @@ -130,17 +130,17 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to, |
1845 | |
1846 | switch (n) { |
1847 | case 1: |
1848 | - __uaccess_begin(); |
1849 | + __uaccess_begin_nospec(); |
1850 | __get_user_size(*(u8 *)to, from, 1, ret, 1); |
1851 | __uaccess_end(); |
1852 | return ret; |
1853 | case 2: |
1854 | - __uaccess_begin(); |
1855 | + __uaccess_begin_nospec(); |
1856 | __get_user_size(*(u16 *)to, from, 2, ret, 2); |
1857 | __uaccess_end(); |
1858 | return ret; |
1859 | case 4: |
1860 | - __uaccess_begin(); |
1861 | + __uaccess_begin_nospec(); |
1862 | __get_user_size(*(u32 *)to, from, 4, ret, 4); |
1863 | __uaccess_end(); |
1864 | return ret; |
1865 | diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h |
1866 | index 673059a109fe..6e5cc08134ba 100644 |
1867 | --- a/arch/x86/include/asm/uaccess_64.h |
1868 | +++ b/arch/x86/include/asm/uaccess_64.h |
1869 | @@ -59,31 +59,31 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) |
1870 | return copy_user_generic(dst, (__force void *)src, size); |
1871 | switch (size) { |
1872 | case 1: |
1873 | - __uaccess_begin(); |
1874 | + __uaccess_begin_nospec(); |
1875 | __get_user_asm(*(u8 *)dst, (u8 __user *)src, |
1876 | ret, "b", "b", "=q", 1); |
1877 | __uaccess_end(); |
1878 | return ret; |
1879 | case 2: |
1880 | - __uaccess_begin(); |
1881 | + __uaccess_begin_nospec(); |
1882 | __get_user_asm(*(u16 *)dst, (u16 __user *)src, |
1883 | ret, "w", "w", "=r", 2); |
1884 | __uaccess_end(); |
1885 | return ret; |
1886 | case 4: |
1887 | - __uaccess_begin(); |
1888 | + __uaccess_begin_nospec(); |
1889 | __get_user_asm(*(u32 *)dst, (u32 __user *)src, |
1890 | ret, "l", "k", "=r", 4); |
1891 | __uaccess_end(); |
1892 | return ret; |
1893 | case 8: |
1894 | - __uaccess_begin(); |
1895 | + __uaccess_begin_nospec(); |
1896 | __get_user_asm(*(u64 *)dst, (u64 __user *)src, |
1897 | ret, "q", "", "=r", 8); |
1898 | __uaccess_end(); |
1899 | return ret; |
1900 | case 10: |
1901 | - __uaccess_begin(); |
1902 | + __uaccess_begin_nospec(); |
1903 | __get_user_asm(*(u64 *)dst, (u64 __user *)src, |
1904 | ret, "q", "", "=r", 10); |
1905 | if (likely(!ret)) |
1906 | @@ -93,7 +93,7 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) |
1907 | __uaccess_end(); |
1908 | return ret; |
1909 | case 16: |
1910 | - __uaccess_begin(); |
1911 | + __uaccess_begin_nospec(); |
1912 | __get_user_asm(*(u64 *)dst, (u64 __user *)src, |
1913 | ret, "q", "", "=r", 16); |
1914 | if (likely(!ret)) |
1915 | diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c |
1916 | index 10d5a3d6affc..03b6e5c6cf23 100644 |
1917 | --- a/arch/x86/kernel/alternative.c |
1918 | +++ b/arch/x86/kernel/alternative.c |
1919 | @@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(char *str) |
1920 | } |
1921 | __setup("noreplace-smp", setup_noreplace_smp); |
1922 | |
1923 | -#ifdef CONFIG_PARAVIRT |
1924 | -static int __initdata_or_module noreplace_paravirt = 0; |
1925 | - |
1926 | -static int __init setup_noreplace_paravirt(char *str) |
1927 | -{ |
1928 | - noreplace_paravirt = 1; |
1929 | - return 1; |
1930 | -} |
1931 | -__setup("noreplace-paravirt", setup_noreplace_paravirt); |
1932 | -#endif |
1933 | - |
1934 | #define DPRINTK(fmt, args...) \ |
1935 | do { \ |
1936 | if (debug_alternative) \ |
1937 | @@ -588,9 +577,6 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, |
1938 | struct paravirt_patch_site *p; |
1939 | char insnbuf[MAX_PATCH_LEN]; |
1940 | |
1941 | - if (noreplace_paravirt) |
1942 | - return; |
1943 | - |
1944 | for (p = start; p < end; p++) { |
1945 | unsigned int used; |
1946 | |
1947 | diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c |
1948 | index 8cacf62ec458..957ad443b786 100644 |
1949 | --- a/arch/x86/kernel/cpu/bugs.c |
1950 | +++ b/arch/x86/kernel/cpu/bugs.c |
1951 | @@ -10,6 +10,7 @@ |
1952 | #include <linux/init.h> |
1953 | #include <linux/utsname.h> |
1954 | #include <linux/cpu.h> |
1955 | +#include <linux/module.h> |
1956 | |
1957 | #include <asm/nospec-branch.h> |
1958 | #include <asm/cmdline.h> |
1959 | @@ -89,20 +90,41 @@ static const char *spectre_v2_strings[] = { |
1960 | }; |
1961 | |
1962 | #undef pr_fmt |
1963 | -#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt |
1964 | +#define pr_fmt(fmt) "Spectre V2 : " fmt |
1965 | |
1966 | static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; |
1967 | |
1968 | +#ifdef RETPOLINE |
1969 | +static bool spectre_v2_bad_module; |
1970 | + |
1971 | +bool retpoline_module_ok(bool has_retpoline) |
1972 | +{ |
1973 | + if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) |
1974 | + return true; |
1975 | + |
1976 | + pr_err("System may be vulnerable to spectre v2\n"); |
1977 | + spectre_v2_bad_module = true; |
1978 | + return false; |
1979 | +} |
1980 | + |
1981 | +static inline const char *spectre_v2_module_string(void) |
1982 | +{ |
1983 | + return spectre_v2_bad_module ? " - vulnerable module loaded" : ""; |
1984 | +} |
1985 | +#else |
1986 | +static inline const char *spectre_v2_module_string(void) { return ""; } |
1987 | +#endif |
1988 | + |
1989 | static void __init spec2_print_if_insecure(const char *reason) |
1990 | { |
1991 | if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) |
1992 | - pr_info("%s\n", reason); |
1993 | + pr_info("%s selected on command line.\n", reason); |
1994 | } |
1995 | |
1996 | static void __init spec2_print_if_secure(const char *reason) |
1997 | { |
1998 | if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) |
1999 | - pr_info("%s\n", reason); |
2000 | + pr_info("%s selected on command line.\n", reason); |
2001 | } |
2002 | |
2003 | static inline bool retp_compiler(void) |
2004 | @@ -117,42 +139,68 @@ static inline bool match_option(const char *arg, int arglen, const char *opt) |
2005 | return len == arglen && !strncmp(arg, opt, len); |
2006 | } |
2007 | |
2008 | +static const struct { |
2009 | + const char *option; |
2010 | + enum spectre_v2_mitigation_cmd cmd; |
2011 | + bool secure; |
2012 | +} mitigation_options[] = { |
2013 | + { "off", SPECTRE_V2_CMD_NONE, false }, |
2014 | + { "on", SPECTRE_V2_CMD_FORCE, true }, |
2015 | + { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, |
2016 | + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, |
2017 | + { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, |
2018 | + { "auto", SPECTRE_V2_CMD_AUTO, false }, |
2019 | +}; |
2020 | + |
2021 | static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) |
2022 | { |
2023 | char arg[20]; |
2024 | - int ret; |
2025 | - |
2026 | - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, |
2027 | - sizeof(arg)); |
2028 | - if (ret > 0) { |
2029 | - if (match_option(arg, ret, "off")) { |
2030 | - goto disable; |
2031 | - } else if (match_option(arg, ret, "on")) { |
2032 | - spec2_print_if_secure("force enabled on command line."); |
2033 | - return SPECTRE_V2_CMD_FORCE; |
2034 | - } else if (match_option(arg, ret, "retpoline")) { |
2035 | - spec2_print_if_insecure("retpoline selected on command line."); |
2036 | - return SPECTRE_V2_CMD_RETPOLINE; |
2037 | - } else if (match_option(arg, ret, "retpoline,amd")) { |
2038 | - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { |
2039 | - pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); |
2040 | - return SPECTRE_V2_CMD_AUTO; |
2041 | - } |
2042 | - spec2_print_if_insecure("AMD retpoline selected on command line."); |
2043 | - return SPECTRE_V2_CMD_RETPOLINE_AMD; |
2044 | - } else if (match_option(arg, ret, "retpoline,generic")) { |
2045 | - spec2_print_if_insecure("generic retpoline selected on command line."); |
2046 | - return SPECTRE_V2_CMD_RETPOLINE_GENERIC; |
2047 | - } else if (match_option(arg, ret, "auto")) { |
2048 | + int ret, i; |
2049 | + enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; |
2050 | + |
2051 | + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) |
2052 | + return SPECTRE_V2_CMD_NONE; |
2053 | + else { |
2054 | + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, |
2055 | + sizeof(arg)); |
2056 | + if (ret < 0) |
2057 | return SPECTRE_V2_CMD_AUTO; |
2058 | + |
2059 | + for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { |
2060 | + if (!match_option(arg, ret, mitigation_options[i].option)) |
2061 | + continue; |
2062 | + cmd = mitigation_options[i].cmd; |
2063 | + break; |
2064 | } |
2065 | + |
2066 | + if (i >= ARRAY_SIZE(mitigation_options)) { |
2067 | + pr_err("unknown option (%s). Switching to AUTO select\n", |
2068 | + mitigation_options[i].option); |
2069 | + return SPECTRE_V2_CMD_AUTO; |
2070 | + } |
2071 | + } |
2072 | + |
2073 | + if ((cmd == SPECTRE_V2_CMD_RETPOLINE || |
2074 | + cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || |
2075 | + cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && |
2076 | + !IS_ENABLED(CONFIG_RETPOLINE)) { |
2077 | + pr_err("%s selected but not compiled in. Switching to AUTO select\n", |
2078 | + mitigation_options[i].option); |
2079 | + return SPECTRE_V2_CMD_AUTO; |
2080 | } |
2081 | |
2082 | - if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) |
2083 | + if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD && |
2084 | + boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { |
2085 | + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); |
2086 | return SPECTRE_V2_CMD_AUTO; |
2087 | -disable: |
2088 | - spec2_print_if_insecure("disabled on command line."); |
2089 | - return SPECTRE_V2_CMD_NONE; |
2090 | + } |
2091 | + |
2092 | + if (mitigation_options[i].secure) |
2093 | + spec2_print_if_secure(mitigation_options[i].option); |
2094 | + else |
2095 | + spec2_print_if_insecure(mitigation_options[i].option); |
2096 | + |
2097 | + return cmd; |
2098 | } |
2099 | |
2100 | /* Check for Skylake-like CPUs (for RSB handling) */ |
2101 | @@ -190,10 +238,10 @@ static void __init spectre_v2_select_mitigation(void) |
2102 | return; |
2103 | |
2104 | case SPECTRE_V2_CMD_FORCE: |
2105 | - /* FALLTRHU */ |
2106 | case SPECTRE_V2_CMD_AUTO: |
2107 | - goto retpoline_auto; |
2108 | - |
2109 | + if (IS_ENABLED(CONFIG_RETPOLINE)) |
2110 | + goto retpoline_auto; |
2111 | + break; |
2112 | case SPECTRE_V2_CMD_RETPOLINE_AMD: |
2113 | if (IS_ENABLED(CONFIG_RETPOLINE)) |
2114 | goto retpoline_amd; |
2115 | @@ -248,6 +296,12 @@ static void __init spectre_v2_select_mitigation(void) |
2116 | setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); |
2117 | pr_info("Filling RSB on context switch\n"); |
2118 | } |
2119 | + |
2120 | + /* Initialize Indirect Branch Prediction Barrier if supported */ |
2121 | + if (boot_cpu_has(X86_FEATURE_IBPB)) { |
2122 | + setup_force_cpu_cap(X86_FEATURE_USE_IBPB); |
2123 | + pr_info("Enabling Indirect Branch Prediction Barrier\n"); |
2124 | + } |
2125 | } |
2126 | |
2127 | #undef pr_fmt |
2128 | @@ -268,7 +322,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev, |
2129 | { |
2130 | if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) |
2131 | return sprintf(buf, "Not affected\n"); |
2132 | - return sprintf(buf, "Vulnerable\n"); |
2133 | + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); |
2134 | } |
2135 | |
2136 | ssize_t cpu_show_spectre_v2(struct device *dev, |
2137 | @@ -277,6 +331,8 @@ ssize_t cpu_show_spectre_v2(struct device *dev, |
2138 | if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) |
2139 | return sprintf(buf, "Not affected\n"); |
2140 | |
2141 | - return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); |
2142 | + return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], |
2143 | + boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", |
2144 | + spectre_v2_module_string()); |
2145 | } |
2146 | #endif |
2147 | diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c |
2148 | index d198ae02f2b7..08e89ed6aa87 100644 |
2149 | --- a/arch/x86/kernel/cpu/common.c |
2150 | +++ b/arch/x86/kernel/cpu/common.c |
2151 | @@ -44,6 +44,8 @@ |
2152 | #include <asm/pat.h> |
2153 | #include <asm/microcode.h> |
2154 | #include <asm/microcode_intel.h> |
2155 | +#include <asm/intel-family.h> |
2156 | +#include <asm/cpu_device_id.h> |
2157 | |
2158 | #ifdef CONFIG_X86_LOCAL_APIC |
2159 | #include <asm/uv/uv.h> |
2160 | @@ -716,6 +718,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) |
2161 | } |
2162 | } |
2163 | |
2164 | +static void init_speculation_control(struct cpuinfo_x86 *c) |
2165 | +{ |
2166 | + /* |
2167 | + * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, |
2168 | + * and they also have a different bit for STIBP support. Also, |
2169 | + * a hypervisor might have set the individual AMD bits even on |
2170 | + * Intel CPUs, for finer-grained selection of what's available. |
2171 | + * |
2172 | + * We use the AMD bits in 0x8000_0008 EBX as the generic hardware |
2173 | + * features, which are visible in /proc/cpuinfo and used by the |
2174 | + * kernel. So set those accordingly from the Intel bits. |
2175 | + */ |
2176 | + if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { |
2177 | + set_cpu_cap(c, X86_FEATURE_IBRS); |
2178 | + set_cpu_cap(c, X86_FEATURE_IBPB); |
2179 | + } |
2180 | + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) |
2181 | + set_cpu_cap(c, X86_FEATURE_STIBP); |
2182 | +} |
2183 | + |
2184 | void get_cpu_cap(struct cpuinfo_x86 *c) |
2185 | { |
2186 | u32 eax, ebx, ecx, edx; |
2187 | @@ -737,6 +759,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) |
2188 | cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); |
2189 | c->x86_capability[CPUID_7_0_EBX] = ebx; |
2190 | c->x86_capability[CPUID_7_ECX] = ecx; |
2191 | + c->x86_capability[CPUID_7_EDX] = edx; |
2192 | } |
2193 | |
2194 | /* Extended state features: level 0x0000000d */ |
2195 | @@ -809,6 +832,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) |
2196 | c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); |
2197 | |
2198 | init_scattered_cpuid_features(c); |
2199 | + init_speculation_control(c); |
2200 | } |
2201 | |
2202 | static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) |
2203 | @@ -837,6 +861,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) |
2204 | #endif |
2205 | } |
2206 | |
2207 | +static const __initconst struct x86_cpu_id cpu_no_speculation[] = { |
2208 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, |
2209 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, |
2210 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, |
2211 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY }, |
2212 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY }, |
2213 | + { X86_VENDOR_CENTAUR, 5 }, |
2214 | + { X86_VENDOR_INTEL, 5 }, |
2215 | + { X86_VENDOR_NSC, 5 }, |
2216 | + { X86_VENDOR_ANY, 4 }, |
2217 | + {} |
2218 | +}; |
2219 | + |
2220 | +static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { |
2221 | + { X86_VENDOR_AMD }, |
2222 | + {} |
2223 | +}; |
2224 | + |
2225 | +static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) |
2226 | +{ |
2227 | + u64 ia32_cap = 0; |
2228 | + |
2229 | + if (x86_match_cpu(cpu_no_meltdown)) |
2230 | + return false; |
2231 | + |
2232 | + if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) |
2233 | + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); |
2234 | + |
2235 | + /* Rogue Data Cache Load? No! */ |
2236 | + if (ia32_cap & ARCH_CAP_RDCL_NO) |
2237 | + return false; |
2238 | + |
2239 | + return true; |
2240 | +} |
2241 | + |
2242 | /* |
2243 | * Do minimum CPU detection early. |
2244 | * Fields really needed: vendor, cpuid_level, family, model, mask, |
2245 | @@ -883,11 +942,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) |
2246 | |
2247 | setup_force_cpu_cap(X86_FEATURE_ALWAYS); |
2248 | |
2249 | - if (c->x86_vendor != X86_VENDOR_AMD) |
2250 | - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); |
2251 | - |
2252 | - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); |
2253 | - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); |
2254 | + if (!x86_match_cpu(cpu_no_speculation)) { |
2255 | + if (cpu_vulnerable_to_meltdown(c)) |
2256 | + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); |
2257 | + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); |
2258 | + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); |
2259 | + } |
2260 | |
2261 | fpu__init_system(c); |
2262 | |
2263 | diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c |
2264 | index fcd484d2bb03..4097b43cba2d 100644 |
2265 | --- a/arch/x86/kernel/cpu/intel.c |
2266 | +++ b/arch/x86/kernel/cpu/intel.c |
2267 | @@ -61,6 +61,59 @@ void check_mpx_erratum(struct cpuinfo_x86 *c) |
2268 | } |
2269 | } |
2270 | |
2271 | +/* |
2272 | + * Early microcode releases for the Spectre v2 mitigation were broken. |
2273 | + * Information taken from; |
2274 | + * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf |
2275 | + * - https://kb.vmware.com/s/article/52345 |
2276 | + * - Microcode revisions observed in the wild |
2277 | + * - Release note from 20180108 microcode release |
2278 | + */ |
2279 | +struct sku_microcode { |
2280 | + u8 model; |
2281 | + u8 stepping; |
2282 | + u32 microcode; |
2283 | +}; |
2284 | +static const struct sku_microcode spectre_bad_microcodes[] = { |
2285 | + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, |
2286 | + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, |
2287 | + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, |
2288 | + { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, |
2289 | + { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, |
2290 | + { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, |
2291 | + { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, |
2292 | + { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, |
2293 | + { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, |
2294 | + { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, |
2295 | + { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, |
2296 | + { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, |
2297 | + { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 }, |
2298 | + { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, |
2299 | + { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 }, |
2300 | + { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 }, |
2301 | + { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 }, |
2302 | + { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, |
2303 | + { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, |
2304 | + { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, |
2305 | + /* Updated in the 20180108 release; blacklist until we know otherwise */ |
2306 | + { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, |
2307 | + /* Observed in the wild */ |
2308 | + { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, |
2309 | + { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, |
2310 | +}; |
2311 | + |
2312 | +static bool bad_spectre_microcode(struct cpuinfo_x86 *c) |
2313 | +{ |
2314 | + int i; |
2315 | + |
2316 | + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { |
2317 | + if (c->x86_model == spectre_bad_microcodes[i].model && |
2318 | + c->x86_mask == spectre_bad_microcodes[i].stepping) |
2319 | + return (c->microcode <= spectre_bad_microcodes[i].microcode); |
2320 | + } |
2321 | + return false; |
2322 | +} |
2323 | + |
2324 | static void early_init_intel(struct cpuinfo_x86 *c) |
2325 | { |
2326 | u64 misc_enable; |
2327 | @@ -87,6 +140,19 @@ static void early_init_intel(struct cpuinfo_x86 *c) |
2328 | rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); |
2329 | } |
2330 | |
2331 | + /* Now if any of them are set, check the blacklist and clear the lot */ |
2332 | + if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || |
2333 | + cpu_has(c, X86_FEATURE_INTEL_STIBP) || |
2334 | + cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || |
2335 | + cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { |
2336 | + pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); |
2337 | + setup_clear_cpu_cap(X86_FEATURE_IBRS); |
2338 | + setup_clear_cpu_cap(X86_FEATURE_IBPB); |
2339 | + setup_clear_cpu_cap(X86_FEATURE_STIBP); |
2340 | + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); |
2341 | + setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); |
2342 | + } |
2343 | + |
2344 | /* |
2345 | * Atom erratum AAE44/AAF40/AAG38/AAH41: |
2346 | * |
2347 | diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c |
2348 | index 5ce5155f0695..0afaf00b029b 100644 |
2349 | --- a/arch/x86/kernel/cpu/microcode/core.c |
2350 | +++ b/arch/x86/kernel/cpu/microcode/core.c |
2351 | @@ -43,7 +43,7 @@ |
2352 | #define MICROCODE_VERSION "2.01" |
2353 | |
2354 | static struct microcode_ops *microcode_ops; |
2355 | -static bool dis_ucode_ldr; |
2356 | +static bool dis_ucode_ldr = true; |
2357 | |
2358 | /* |
2359 | * Synchronization. |
2360 | @@ -73,6 +73,7 @@ struct cpu_info_ctx { |
2361 | static bool __init check_loader_disabled_bsp(void) |
2362 | { |
2363 | static const char *__dis_opt_str = "dis_ucode_ldr"; |
2364 | + u32 a, b, c, d; |
2365 | |
2366 | #ifdef CONFIG_X86_32 |
2367 | const char *cmdline = (const char *)__pa_nodebug(boot_command_line); |
2368 | @@ -85,8 +86,20 @@ static bool __init check_loader_disabled_bsp(void) |
2369 | bool *res = &dis_ucode_ldr; |
2370 | #endif |
2371 | |
2372 | - if (cmdline_find_option_bool(cmdline, option)) |
2373 | - *res = true; |
2374 | + a = 1; |
2375 | + c = 0; |
2376 | + native_cpuid(&a, &b, &c, &d); |
2377 | + |
2378 | + /* |
2379 | + * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not |
2380 | + * completely accurate as xen pv guests don't see that CPUID bit set but |
2381 | + * that's good enough as they don't land on the BSP path anyway. |
2382 | + */ |
2383 | + if (c & BIT(31)) |
2384 | + return *res; |
2385 | + |
2386 | + if (cmdline_find_option_bool(cmdline, option) <= 0) |
2387 | + *res = false; |
2388 | |
2389 | return *res; |
2390 | } |
2391 | @@ -114,9 +127,7 @@ void __init load_ucode_bsp(void) |
2392 | { |
2393 | int vendor; |
2394 | unsigned int family; |
2395 | - |
2396 | - if (check_loader_disabled_bsp()) |
2397 | - return; |
2398 | + bool intel = true; |
2399 | |
2400 | if (!have_cpuid_p()) |
2401 | return; |
2402 | @@ -126,16 +137,27 @@ void __init load_ucode_bsp(void) |
2403 | |
2404 | switch (vendor) { |
2405 | case X86_VENDOR_INTEL: |
2406 | - if (family >= 6) |
2407 | - load_ucode_intel_bsp(); |
2408 | + if (family < 6) |
2409 | + return; |
2410 | break; |
2411 | + |
2412 | case X86_VENDOR_AMD: |
2413 | - if (family >= 0x10) |
2414 | - load_ucode_amd_bsp(family); |
2415 | + if (family < 0x10) |
2416 | + return; |
2417 | + intel = false; |
2418 | break; |
2419 | + |
2420 | default: |
2421 | - break; |
2422 | + return; |
2423 | } |
2424 | + |
2425 | + if (check_loader_disabled_bsp()) |
2426 | + return; |
2427 | + |
2428 | + if (intel) |
2429 | + load_ucode_intel_bsp(); |
2430 | + else |
2431 | + load_ucode_amd_bsp(family); |
2432 | } |
2433 | |
2434 | static bool check_loader_disabled_ap(void) |
2435 | @@ -154,9 +176,6 @@ void load_ucode_ap(void) |
2436 | if (check_loader_disabled_ap()) |
2437 | return; |
2438 | |
2439 | - if (!have_cpuid_p()) |
2440 | - return; |
2441 | - |
2442 | vendor = x86_cpuid_vendor(); |
2443 | family = x86_cpuid_family(); |
2444 | |
2445 | diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c |
2446 | index b0dd9aec183d..afbb52532791 100644 |
2447 | --- a/arch/x86/kernel/cpu/scattered.c |
2448 | +++ b/arch/x86/kernel/cpu/scattered.c |
2449 | @@ -31,8 +31,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) |
2450 | const struct cpuid_bit *cb; |
2451 | |
2452 | static const struct cpuid_bit cpuid_bits[] = { |
2453 | - { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 }, |
2454 | - { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 }, |
2455 | { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, |
2456 | { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, |
2457 | { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, |
2458 | diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c |
2459 | index 0887d2ae3797..dffe81d3c261 100644 |
2460 | --- a/arch/x86/kernel/process_64.c |
2461 | +++ b/arch/x86/kernel/process_64.c |
2462 | @@ -538,7 +538,7 @@ void set_personality_ia32(bool x32) |
2463 | current->personality &= ~READ_IMPLIES_EXEC; |
2464 | /* in_compat_syscall() uses the presence of the x32 |
2465 | syscall bit flag to determine compat status */ |
2466 | - current->thread.status &= ~TS_COMPAT; |
2467 | + current_thread_info()->status &= ~TS_COMPAT; |
2468 | } else { |
2469 | set_thread_flag(TIF_IA32); |
2470 | clear_thread_flag(TIF_X32); |
2471 | @@ -546,7 +546,7 @@ void set_personality_ia32(bool x32) |
2472 | current->mm->context.ia32_compat = TIF_IA32; |
2473 | current->personality |= force_personality32; |
2474 | /* Prepare the first "return" to user space */ |
2475 | - current->thread.status |= TS_COMPAT; |
2476 | + current_thread_info()->status |= TS_COMPAT; |
2477 | } |
2478 | } |
2479 | EXPORT_SYMBOL_GPL(set_personality_ia32); |
2480 | diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c |
2481 | index 0e63c0267f99..e497d374412a 100644 |
2482 | --- a/arch/x86/kernel/ptrace.c |
2483 | +++ b/arch/x86/kernel/ptrace.c |
2484 | @@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) |
2485 | */ |
2486 | regs->orig_ax = value; |
2487 | if (syscall_get_nr(child, regs) >= 0) |
2488 | - child->thread.status |= TS_I386_REGS_POKED; |
2489 | + child->thread_info.status |= TS_I386_REGS_POKED; |
2490 | break; |
2491 | |
2492 | case offsetof(struct user32, regs.eflags): |
2493 | diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c |
2494 | index 763af1d0de64..b1a5d252d482 100644 |
2495 | --- a/arch/x86/kernel/signal.c |
2496 | +++ b/arch/x86/kernel/signal.c |
2497 | @@ -785,7 +785,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) |
2498 | * than the tracee. |
2499 | */ |
2500 | #ifdef CONFIG_IA32_EMULATION |
2501 | - if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) |
2502 | + if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) |
2503 | return __NR_ia32_restart_syscall; |
2504 | #endif |
2505 | #ifdef CONFIG_X86_X32_ABI |
2506 | diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c |
2507 | index 8402907825b0..21454e254a4c 100644 |
2508 | --- a/arch/x86/kernel/tboot.c |
2509 | +++ b/arch/x86/kernel/tboot.c |
2510 | @@ -134,6 +134,16 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, |
2511 | return -1; |
2512 | set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); |
2513 | pte_unmap(pte); |
2514 | + |
2515 | + /* |
2516 | + * PTI poisons low addresses in the kernel page tables in the |
2517 | + * name of making them unusable for userspace. To execute |
2518 | + * code at such a low address, the poison must be cleared. |
2519 | + * |
2520 | + * Note: 'pgd' actually gets set in pud_alloc(). |
2521 | + */ |
2522 | + pgd->pgd &= ~_PAGE_NX; |
2523 | + |
2524 | return 0; |
2525 | } |
2526 | |
2527 | diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c |
2528 | index 91af75e37306..93f924de06cf 100644 |
2529 | --- a/arch/x86/kvm/cpuid.c |
2530 | +++ b/arch/x86/kvm/cpuid.c |
2531 | @@ -355,6 +355,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
2532 | F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | |
2533 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); |
2534 | |
2535 | + /* cpuid 0x80000008.ebx */ |
2536 | + const u32 kvm_cpuid_8000_0008_ebx_x86_features = |
2537 | + F(IBPB) | F(IBRS); |
2538 | + |
2539 | /* cpuid 0xC0000001.edx */ |
2540 | const u32 kvm_cpuid_C000_0001_edx_x86_features = |
2541 | F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | |
2542 | @@ -376,6 +380,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
2543 | /* cpuid 7.0.ecx*/ |
2544 | const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/; |
2545 | |
2546 | + /* cpuid 7.0.edx*/ |
2547 | + const u32 kvm_cpuid_7_0_edx_x86_features = |
2548 | + F(SPEC_CTRL) | F(ARCH_CAPABILITIES); |
2549 | + |
2550 | /* all calls to cpuid_count() should be made on the same cpu */ |
2551 | get_cpu(); |
2552 | |
2553 | @@ -458,12 +466,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
2554 | /* PKU is not yet implemented for shadow paging. */ |
2555 | if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) |
2556 | entry->ecx &= ~F(PKU); |
2557 | + entry->edx &= kvm_cpuid_7_0_edx_x86_features; |
2558 | + cpuid_mask(&entry->edx, CPUID_7_EDX); |
2559 | } else { |
2560 | entry->ebx = 0; |
2561 | entry->ecx = 0; |
2562 | + entry->edx = 0; |
2563 | } |
2564 | entry->eax = 0; |
2565 | - entry->edx = 0; |
2566 | break; |
2567 | } |
2568 | case 9: |
2569 | @@ -607,7 +617,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
2570 | if (!g_phys_as) |
2571 | g_phys_as = phys_as; |
2572 | entry->eax = g_phys_as | (virt_as << 8); |
2573 | - entry->ebx = entry->edx = 0; |
2574 | + entry->edx = 0; |
2575 | + /* IBRS and IBPB aren't necessarily present in hardware cpuid */ |
2576 | + if (boot_cpu_has(X86_FEATURE_IBPB)) |
2577 | + entry->ebx |= F(IBPB); |
2578 | + if (boot_cpu_has(X86_FEATURE_IBRS)) |
2579 | + entry->ebx |= F(IBRS); |
2580 | + entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; |
2581 | + cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); |
2582 | break; |
2583 | } |
2584 | case 0x80000019: |
2585 | diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h |
2586 | index 9368fecca3ee..d1beb7156704 100644 |
2587 | --- a/arch/x86/kvm/cpuid.h |
2588 | +++ b/arch/x86/kvm/cpuid.h |
2589 | @@ -160,6 +160,37 @@ static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) |
2590 | return best && (best->edx & bit(X86_FEATURE_RDTSCP)); |
2591 | } |
2592 | |
2593 | +static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) |
2594 | +{ |
2595 | + struct kvm_cpuid_entry2 *best; |
2596 | + |
2597 | + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); |
2598 | + if (best && (best->ebx & bit(X86_FEATURE_IBPB))) |
2599 | + return true; |
2600 | + best = kvm_find_cpuid_entry(vcpu, 7, 0); |
2601 | + return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); |
2602 | +} |
2603 | + |
2604 | +static inline bool guest_cpuid_has_ibrs(struct kvm_vcpu *vcpu) |
2605 | +{ |
2606 | + struct kvm_cpuid_entry2 *best; |
2607 | + |
2608 | + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); |
2609 | + if (best && (best->ebx & bit(X86_FEATURE_IBRS))) |
2610 | + return true; |
2611 | + best = kvm_find_cpuid_entry(vcpu, 7, 0); |
2612 | + return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); |
2613 | +} |
2614 | + |
2615 | +static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) |
2616 | +{ |
2617 | + struct kvm_cpuid_entry2 *best; |
2618 | + |
2619 | + best = kvm_find_cpuid_entry(vcpu, 7, 0); |
2620 | + return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES)); |
2621 | +} |
2622 | + |
2623 | + |
2624 | /* |
2625 | * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 |
2626 | */ |
2627 | diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c |
2628 | index 6f5a3b076341..c8d573822e60 100644 |
2629 | --- a/arch/x86/kvm/emulate.c |
2630 | +++ b/arch/x86/kvm/emulate.c |
2631 | @@ -25,6 +25,7 @@ |
2632 | #include <asm/kvm_emulate.h> |
2633 | #include <linux/stringify.h> |
2634 | #include <asm/debugreg.h> |
2635 | +#include <asm/nospec-branch.h> |
2636 | |
2637 | #include "x86.h" |
2638 | #include "tss.h" |
2639 | @@ -1012,8 +1013,8 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) |
2640 | void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); |
2641 | |
2642 | flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; |
2643 | - asm("push %[flags]; popf; call *%[fastop]" |
2644 | - : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags)); |
2645 | + asm("push %[flags]; popf; " CALL_NOSPEC |
2646 | + : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags)); |
2647 | return rc; |
2648 | } |
2649 | |
2650 | @@ -5306,15 +5307,14 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, |
2651 | |
2652 | static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) |
2653 | { |
2654 | - register void *__sp asm(_ASM_SP); |
2655 | ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; |
2656 | |
2657 | if (!(ctxt->d & ByteOp)) |
2658 | fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; |
2659 | |
2660 | - asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" |
2661 | + asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" |
2662 | : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), |
2663 | - [fastop]"+S"(fop), "+r"(__sp) |
2664 | + [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT |
2665 | : "c"(ctxt->src2.val)); |
2666 | |
2667 | ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); |
2668 | diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c |
2669 | index 24af898fb3a6..be644afab1bb 100644 |
2670 | --- a/arch/x86/kvm/svm.c |
2671 | +++ b/arch/x86/kvm/svm.c |
2672 | @@ -183,6 +183,8 @@ struct vcpu_svm { |
2673 | u64 gs_base; |
2674 | } host; |
2675 | |
2676 | + u64 spec_ctrl; |
2677 | + |
2678 | u32 *msrpm; |
2679 | |
2680 | ulong nmi_iret_rip; |
2681 | @@ -248,6 +250,8 @@ static const struct svm_direct_access_msrs { |
2682 | { .index = MSR_CSTAR, .always = true }, |
2683 | { .index = MSR_SYSCALL_MASK, .always = true }, |
2684 | #endif |
2685 | + { .index = MSR_IA32_SPEC_CTRL, .always = false }, |
2686 | + { .index = MSR_IA32_PRED_CMD, .always = false }, |
2687 | { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, |
2688 | { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, |
2689 | { .index = MSR_IA32_LASTINTFROMIP, .always = false }, |
2690 | @@ -510,6 +514,7 @@ struct svm_cpu_data { |
2691 | struct kvm_ldttss_desc *tss_desc; |
2692 | |
2693 | struct page *save_area; |
2694 | + struct vmcb *current_vmcb; |
2695 | }; |
2696 | |
2697 | static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); |
2698 | @@ -861,6 +866,25 @@ static bool valid_msr_intercept(u32 index) |
2699 | return false; |
2700 | } |
2701 | |
2702 | +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr) |
2703 | +{ |
2704 | + u8 bit_write; |
2705 | + unsigned long tmp; |
2706 | + u32 offset; |
2707 | + u32 *msrpm; |
2708 | + |
2709 | + msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: |
2710 | + to_svm(vcpu)->msrpm; |
2711 | + |
2712 | + offset = svm_msrpm_offset(msr); |
2713 | + bit_write = 2 * (msr & 0x0f) + 1; |
2714 | + tmp = msrpm[offset]; |
2715 | + |
2716 | + BUG_ON(offset == MSR_INVALID); |
2717 | + |
2718 | + return !!test_bit(bit_write, &tmp); |
2719 | +} |
2720 | + |
2721 | static void set_msr_interception(u32 *msrpm, unsigned msr, |
2722 | int read, int write) |
2723 | { |
2724 | @@ -1535,6 +1559,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) |
2725 | u32 dummy; |
2726 | u32 eax = 1; |
2727 | |
2728 | + svm->spec_ctrl = 0; |
2729 | + |
2730 | if (!init_event) { |
2731 | svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | |
2732 | MSR_IA32_APICBASE_ENABLE; |
2733 | @@ -1644,11 +1670,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) |
2734 | __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); |
2735 | kvm_vcpu_uninit(vcpu); |
2736 | kmem_cache_free(kvm_vcpu_cache, svm); |
2737 | + /* |
2738 | + * The vmcb page can be recycled, causing a false negative in |
2739 | + * svm_vcpu_load(). So do a full IBPB now. |
2740 | + */ |
2741 | + indirect_branch_prediction_barrier(); |
2742 | } |
2743 | |
2744 | static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
2745 | { |
2746 | struct vcpu_svm *svm = to_svm(vcpu); |
2747 | + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); |
2748 | int i; |
2749 | |
2750 | if (unlikely(cpu != vcpu->cpu)) { |
2751 | @@ -1677,6 +1709,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
2752 | if (static_cpu_has(X86_FEATURE_RDTSCP)) |
2753 | wrmsrl(MSR_TSC_AUX, svm->tsc_aux); |
2754 | |
2755 | + if (sd->current_vmcb != svm->vmcb) { |
2756 | + sd->current_vmcb = svm->vmcb; |
2757 | + indirect_branch_prediction_barrier(); |
2758 | + } |
2759 | avic_vcpu_load(vcpu, cpu); |
2760 | } |
2761 | |
2762 | @@ -3508,6 +3544,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
2763 | case MSR_VM_CR: |
2764 | msr_info->data = svm->nested.vm_cr_msr; |
2765 | break; |
2766 | + case MSR_IA32_SPEC_CTRL: |
2767 | + if (!msr_info->host_initiated && |
2768 | + !guest_cpuid_has_ibrs(vcpu)) |
2769 | + return 1; |
2770 | + |
2771 | + msr_info->data = svm->spec_ctrl; |
2772 | + break; |
2773 | case MSR_IA32_UCODE_REV: |
2774 | msr_info->data = 0x01000065; |
2775 | break; |
2776 | @@ -3599,6 +3642,49 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) |
2777 | case MSR_IA32_TSC: |
2778 | kvm_write_tsc(vcpu, msr); |
2779 | break; |
2780 | + case MSR_IA32_SPEC_CTRL: |
2781 | + if (!msr->host_initiated && |
2782 | + !guest_cpuid_has_ibrs(vcpu)) |
2783 | + return 1; |
2784 | + |
2785 | + /* The STIBP bit doesn't fault even if it's not advertised */ |
2786 | + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) |
2787 | + return 1; |
2788 | + |
2789 | + svm->spec_ctrl = data; |
2790 | + |
2791 | + if (!data) |
2792 | + break; |
2793 | + |
2794 | + /* |
2795 | + * For non-nested: |
2796 | + * When it's written (to non-zero) for the first time, pass |
2797 | + * it through. |
2798 | + * |
2799 | + * For nested: |
2800 | + * The handling of the MSR bitmap for L2 guests is done in |
2801 | + * nested_svm_vmrun_msrpm. |
2802 | + * We update the L1 MSR bit as well since it will end up |
2803 | + * touching the MSR anyway now. |
2804 | + */ |
2805 | + set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); |
2806 | + break; |
2807 | + case MSR_IA32_PRED_CMD: |
2808 | + if (!msr->host_initiated && |
2809 | + !guest_cpuid_has_ibpb(vcpu)) |
2810 | + return 1; |
2811 | + |
2812 | + if (data & ~PRED_CMD_IBPB) |
2813 | + return 1; |
2814 | + |
2815 | + if (!data) |
2816 | + break; |
2817 | + |
2818 | + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); |
2819 | + if (is_guest_mode(vcpu)) |
2820 | + break; |
2821 | + set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); |
2822 | + break; |
2823 | case MSR_STAR: |
2824 | svm->vmcb->save.star = data; |
2825 | break; |
2826 | @@ -4826,6 +4912,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) |
2827 | |
2828 | local_irq_enable(); |
2829 | |
2830 | + /* |
2831 | + * If this vCPU has touched SPEC_CTRL, restore the guest's value if |
2832 | + * it's non-zero. Since vmentry is serialising on affected CPUs, there |
2833 | + * is no need to worry about the conditional branch over the wrmsr |
2834 | + * being speculatively taken. |
2835 | + */ |
2836 | + if (svm->spec_ctrl) |
2837 | + wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); |
2838 | + |
2839 | asm volatile ( |
2840 | "push %%" _ASM_BP "; \n\t" |
2841 | "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" |
2842 | @@ -4918,6 +5013,27 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) |
2843 | #endif |
2844 | ); |
2845 | |
2846 | + /* |
2847 | + * We do not use IBRS in the kernel. If this vCPU has used the |
2848 | + * SPEC_CTRL MSR it may have left it on; save the value and |
2849 | + * turn it off. This is much more efficient than blindly adding |
2850 | + * it to the atomic save/restore list. Especially as the former |
2851 | + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. |
2852 | + * |
2853 | + * For non-nested case: |
2854 | + * If the L01 MSR bitmap does not intercept the MSR, then we need to |
2855 | + * save it. |
2856 | + * |
2857 | + * For nested case: |
2858 | + * If the L02 MSR bitmap does not intercept the MSR, then we need to |
2859 | + * save it. |
2860 | + */ |
2861 | + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) |
2862 | + rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); |
2863 | + |
2864 | + if (svm->spec_ctrl) |
2865 | + wrmsrl(MSR_IA32_SPEC_CTRL, 0); |
2866 | + |
2867 | /* Eliminate branch target predictions from guest mode */ |
2868 | vmexit_fill_RSB(); |
2869 | |
2870 | diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c |
2871 | index 178a344f55f8..d49da86e3099 100644 |
2872 | --- a/arch/x86/kvm/vmx.c |
2873 | +++ b/arch/x86/kvm/vmx.c |
2874 | @@ -33,6 +33,7 @@ |
2875 | #include <linux/slab.h> |
2876 | #include <linux/tboot.h> |
2877 | #include <linux/hrtimer.h> |
2878 | +#include <linux/nospec.h> |
2879 | #include "kvm_cache_regs.h" |
2880 | #include "x86.h" |
2881 | |
2882 | @@ -109,6 +110,14 @@ static u64 __read_mostly host_xss; |
2883 | static bool __read_mostly enable_pml = 1; |
2884 | module_param_named(pml, enable_pml, bool, S_IRUGO); |
2885 | |
2886 | +#define MSR_TYPE_R 1 |
2887 | +#define MSR_TYPE_W 2 |
2888 | +#define MSR_TYPE_RW 3 |
2889 | + |
2890 | +#define MSR_BITMAP_MODE_X2APIC 1 |
2891 | +#define MSR_BITMAP_MODE_X2APIC_APICV 2 |
2892 | +#define MSR_BITMAP_MODE_LM 4 |
2893 | + |
2894 | #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL |
2895 | |
2896 | /* Guest_tsc -> host_tsc conversion requires 64-bit division. */ |
2897 | @@ -173,7 +182,6 @@ module_param(ple_window_max, int, S_IRUGO); |
2898 | extern const ulong vmx_return; |
2899 | |
2900 | #define NR_AUTOLOAD_MSRS 8 |
2901 | -#define VMCS02_POOL_SIZE 1 |
2902 | |
2903 | struct vmcs { |
2904 | u32 revision_id; |
2905 | @@ -191,6 +199,7 @@ struct loaded_vmcs { |
2906 | struct vmcs *shadow_vmcs; |
2907 | int cpu; |
2908 | int launched; |
2909 | + unsigned long *msr_bitmap; |
2910 | struct list_head loaded_vmcss_on_cpu_link; |
2911 | }; |
2912 | |
2913 | @@ -207,7 +216,7 @@ struct shared_msr_entry { |
2914 | * stored in guest memory specified by VMPTRLD, but is opaque to the guest, |
2915 | * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. |
2916 | * More than one of these structures may exist, if L1 runs multiple L2 guests. |
2917 | - * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the |
2918 | + * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the |
2919 | * underlying hardware which will be used to run L2. |
2920 | * This structure is packed to ensure that its layout is identical across |
2921 | * machines (necessary for live migration). |
2922 | @@ -386,13 +395,6 @@ struct __packed vmcs12 { |
2923 | */ |
2924 | #define VMCS12_SIZE 0x1000 |
2925 | |
2926 | -/* Used to remember the last vmcs02 used for some recently used vmcs12s */ |
2927 | -struct vmcs02_list { |
2928 | - struct list_head list; |
2929 | - gpa_t vmptr; |
2930 | - struct loaded_vmcs vmcs02; |
2931 | -}; |
2932 | - |
2933 | /* |
2934 | * The nested_vmx structure is part of vcpu_vmx, and holds information we need |
2935 | * for correct emulation of VMX (i.e., nested VMX) on this vcpu. |
2936 | @@ -419,15 +421,15 @@ struct nested_vmx { |
2937 | */ |
2938 | bool sync_shadow_vmcs; |
2939 | |
2940 | - /* vmcs02_list cache of VMCSs recently used to run L2 guests */ |
2941 | - struct list_head vmcs02_pool; |
2942 | - int vmcs02_num; |
2943 | bool change_vmcs01_virtual_x2apic_mode; |
2944 | /* L2 must run next, and mustn't decide to exit to L1. */ |
2945 | bool nested_run_pending; |
2946 | + |
2947 | + struct loaded_vmcs vmcs02; |
2948 | + |
2949 | /* |
2950 | - * Guest pages referred to in vmcs02 with host-physical pointers, so |
2951 | - * we must keep them pinned while L2 runs. |
2952 | + * Guest pages referred to in the vmcs02 with host-physical |
2953 | + * pointers, so we must keep them pinned while L2 runs. |
2954 | */ |
2955 | struct page *apic_access_page; |
2956 | struct page *virtual_apic_page; |
2957 | @@ -436,8 +438,6 @@ struct nested_vmx { |
2958 | bool pi_pending; |
2959 | u16 posted_intr_nv; |
2960 | |
2961 | - unsigned long *msr_bitmap; |
2962 | - |
2963 | struct hrtimer preemption_timer; |
2964 | bool preemption_timer_expired; |
2965 | |
2966 | @@ -538,6 +538,7 @@ struct vcpu_vmx { |
2967 | unsigned long host_rsp; |
2968 | u8 fail; |
2969 | bool nmi_known_unmasked; |
2970 | + u8 msr_bitmap_mode; |
2971 | u32 exit_intr_info; |
2972 | u32 idt_vectoring_info; |
2973 | ulong rflags; |
2974 | @@ -549,6 +550,10 @@ struct vcpu_vmx { |
2975 | u64 msr_host_kernel_gs_base; |
2976 | u64 msr_guest_kernel_gs_base; |
2977 | #endif |
2978 | + |
2979 | + u64 arch_capabilities; |
2980 | + u64 spec_ctrl; |
2981 | + |
2982 | u32 vm_entry_controls_shadow; |
2983 | u32 vm_exit_controls_shadow; |
2984 | /* |
2985 | @@ -856,21 +861,18 @@ static const unsigned short vmcs_field_to_offset_table[] = { |
2986 | |
2987 | static inline short vmcs_field_to_offset(unsigned long field) |
2988 | { |
2989 | - BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); |
2990 | + const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table); |
2991 | + unsigned short offset; |
2992 | |
2993 | - if (field >= ARRAY_SIZE(vmcs_field_to_offset_table)) |
2994 | + BUILD_BUG_ON(size > SHRT_MAX); |
2995 | + if (field >= size) |
2996 | return -ENOENT; |
2997 | |
2998 | - /* |
2999 | - * FIXME: Mitigation for CVE-2017-5753. To be replaced with a |
3000 | - * generic mechanism. |
3001 | - */ |
3002 | - asm("lfence"); |
3003 | - |
3004 | - if (vmcs_field_to_offset_table[field] == 0) |
3005 | + field = array_index_nospec(field, size); |
3006 | + offset = vmcs_field_to_offset_table[field]; |
3007 | + if (offset == 0) |
3008 | return -ENOENT; |
3009 | - |
3010 | - return vmcs_field_to_offset_table[field]; |
3011 | + return offset; |
3012 | } |
3013 | |
3014 | static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) |
3015 | @@ -912,6 +914,9 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); |
3016 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); |
3017 | static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); |
3018 | static int alloc_identity_pagetable(struct kvm *kvm); |
3019 | +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); |
3020 | +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, |
3021 | + u32 msr, int type); |
3022 | |
3023 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
3024 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
3025 | @@ -931,12 +936,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); |
3026 | |
3027 | static unsigned long *vmx_io_bitmap_a; |
3028 | static unsigned long *vmx_io_bitmap_b; |
3029 | -static unsigned long *vmx_msr_bitmap_legacy; |
3030 | -static unsigned long *vmx_msr_bitmap_longmode; |
3031 | -static unsigned long *vmx_msr_bitmap_legacy_x2apic; |
3032 | -static unsigned long *vmx_msr_bitmap_longmode_x2apic; |
3033 | -static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive; |
3034 | -static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive; |
3035 | static unsigned long *vmx_vmread_bitmap; |
3036 | static unsigned long *vmx_vmwrite_bitmap; |
3037 | |
3038 | @@ -1853,6 +1852,52 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) |
3039 | vmcs_write32(EXCEPTION_BITMAP, eb); |
3040 | } |
3041 | |
3042 | +/* |
3043 | + * Check if MSR is intercepted for currently loaded MSR bitmap. |
3044 | + */ |
3045 | +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) |
3046 | +{ |
3047 | + unsigned long *msr_bitmap; |
3048 | + int f = sizeof(unsigned long); |
3049 | + |
3050 | + if (!cpu_has_vmx_msr_bitmap()) |
3051 | + return true; |
3052 | + |
3053 | + msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; |
3054 | + |
3055 | + if (msr <= 0x1fff) { |
3056 | + return !!test_bit(msr, msr_bitmap + 0x800 / f); |
3057 | + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { |
3058 | + msr &= 0x1fff; |
3059 | + return !!test_bit(msr, msr_bitmap + 0xc00 / f); |
3060 | + } |
3061 | + |
3062 | + return true; |
3063 | +} |
3064 | + |
3065 | +/* |
3066 | + * Check if MSR is intercepted for L01 MSR bitmap. |
3067 | + */ |
3068 | +static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) |
3069 | +{ |
3070 | + unsigned long *msr_bitmap; |
3071 | + int f = sizeof(unsigned long); |
3072 | + |
3073 | + if (!cpu_has_vmx_msr_bitmap()) |
3074 | + return true; |
3075 | + |
3076 | + msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; |
3077 | + |
3078 | + if (msr <= 0x1fff) { |
3079 | + return !!test_bit(msr, msr_bitmap + 0x800 / f); |
3080 | + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { |
3081 | + msr &= 0x1fff; |
3082 | + return !!test_bit(msr, msr_bitmap + 0xc00 / f); |
3083 | + } |
3084 | + |
3085 | + return true; |
3086 | +} |
3087 | + |
3088 | static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, |
3089 | unsigned long entry, unsigned long exit) |
3090 | { |
3091 | @@ -2262,6 +2307,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
3092 | if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { |
3093 | per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; |
3094 | vmcs_load(vmx->loaded_vmcs->vmcs); |
3095 | + indirect_branch_prediction_barrier(); |
3096 | } |
3097 | |
3098 | if (!already_loaded) { |
3099 | @@ -2530,36 +2576,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) |
3100 | vmx->guest_msrs[from] = tmp; |
3101 | } |
3102 | |
3103 | -static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) |
3104 | -{ |
3105 | - unsigned long *msr_bitmap; |
3106 | - |
3107 | - if (is_guest_mode(vcpu)) |
3108 | - msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; |
3109 | - else if (cpu_has_secondary_exec_ctrls() && |
3110 | - (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & |
3111 | - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { |
3112 | - if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) { |
3113 | - if (is_long_mode(vcpu)) |
3114 | - msr_bitmap = vmx_msr_bitmap_longmode_x2apic; |
3115 | - else |
3116 | - msr_bitmap = vmx_msr_bitmap_legacy_x2apic; |
3117 | - } else { |
3118 | - if (is_long_mode(vcpu)) |
3119 | - msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv_inactive; |
3120 | - else |
3121 | - msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv_inactive; |
3122 | - } |
3123 | - } else { |
3124 | - if (is_long_mode(vcpu)) |
3125 | - msr_bitmap = vmx_msr_bitmap_longmode; |
3126 | - else |
3127 | - msr_bitmap = vmx_msr_bitmap_legacy; |
3128 | - } |
3129 | - |
3130 | - vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); |
3131 | -} |
3132 | - |
3133 | /* |
3134 | * Set up the vmcs to automatically save and restore system |
3135 | * msrs. Don't touch the 64-bit msrs if the guest is in legacy |
3136 | @@ -2600,7 +2616,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) |
3137 | vmx->save_nmsrs = save_nmsrs; |
3138 | |
3139 | if (cpu_has_vmx_msr_bitmap()) |
3140 | - vmx_set_msr_bitmap(&vmx->vcpu); |
3141 | + vmx_update_msr_bitmap(&vmx->vcpu); |
3142 | } |
3143 | |
3144 | /* |
3145 | @@ -2989,6 +3005,19 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
3146 | case MSR_IA32_TSC: |
3147 | msr_info->data = guest_read_tsc(vcpu); |
3148 | break; |
3149 | + case MSR_IA32_SPEC_CTRL: |
3150 | + if (!msr_info->host_initiated && |
3151 | + !guest_cpuid_has_ibrs(vcpu)) |
3152 | + return 1; |
3153 | + |
3154 | + msr_info->data = to_vmx(vcpu)->spec_ctrl; |
3155 | + break; |
3156 | + case MSR_IA32_ARCH_CAPABILITIES: |
3157 | + if (!msr_info->host_initiated && |
3158 | + !guest_cpuid_has_arch_capabilities(vcpu)) |
3159 | + return 1; |
3160 | + msr_info->data = to_vmx(vcpu)->arch_capabilities; |
3161 | + break; |
3162 | case MSR_IA32_SYSENTER_CS: |
3163 | msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); |
3164 | break; |
3165 | @@ -3093,6 +3122,68 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
3166 | case MSR_IA32_TSC: |
3167 | kvm_write_tsc(vcpu, msr_info); |
3168 | break; |
3169 | + case MSR_IA32_SPEC_CTRL: |
3170 | + if (!msr_info->host_initiated && |
3171 | + !guest_cpuid_has_ibrs(vcpu)) |
3172 | + return 1; |
3173 | + |
3174 | + /* The STIBP bit doesn't fault even if it's not advertised */ |
3175 | + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) |
3176 | + return 1; |
3177 | + |
3178 | + vmx->spec_ctrl = data; |
3179 | + |
3180 | + if (!data) |
3181 | + break; |
3182 | + |
3183 | + /* |
3184 | + * For non-nested: |
3185 | + * When it's written (to non-zero) for the first time, pass |
3186 | + * it through. |
3187 | + * |
3188 | + * For nested: |
3189 | + * The handling of the MSR bitmap for L2 guests is done in |
3190 | + * nested_vmx_merge_msr_bitmap. We should not touch the |
3191 | + * vmcs02.msr_bitmap here since it gets completely overwritten |
3192 | + * in the merging. We update the vmcs01 here for L1 as well |
3193 | + * since it will end up touching the MSR anyway now. |
3194 | + */ |
3195 | + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, |
3196 | + MSR_IA32_SPEC_CTRL, |
3197 | + MSR_TYPE_RW); |
3198 | + break; |
3199 | + case MSR_IA32_PRED_CMD: |
3200 | + if (!msr_info->host_initiated && |
3201 | + !guest_cpuid_has_ibpb(vcpu)) |
3202 | + return 1; |
3203 | + |
3204 | + if (data & ~PRED_CMD_IBPB) |
3205 | + return 1; |
3206 | + |
3207 | + if (!data) |
3208 | + break; |
3209 | + |
3210 | + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); |
3211 | + |
3212 | + /* |
3213 | + * For non-nested: |
3214 | + * When it's written (to non-zero) for the first time, pass |
3215 | + * it through. |
3216 | + * |
3217 | + * For nested: |
3218 | + * The handling of the MSR bitmap for L2 guests is done in |
3219 | + * nested_vmx_merge_msr_bitmap. We should not touch the |
3220 | + * vmcs02.msr_bitmap here since it gets completely overwritten |
3221 | + * in the merging. |
3222 | + */ |
3223 | + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, |
3224 | + MSR_TYPE_W); |
3225 | + break; |
3226 | + case MSR_IA32_ARCH_CAPABILITIES: |
3227 | + if (!msr_info->host_initiated) |
3228 | + return 1; |
3229 | + vmx->arch_capabilities = data; |
3230 | + break; |
3231 | case MSR_IA32_CR_PAT: |
3232 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { |
3233 | if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) |
3234 | @@ -3532,11 +3623,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) |
3235 | return vmcs; |
3236 | } |
3237 | |
3238 | -static struct vmcs *alloc_vmcs(void) |
3239 | -{ |
3240 | - return alloc_vmcs_cpu(raw_smp_processor_id()); |
3241 | -} |
3242 | - |
3243 | static void free_vmcs(struct vmcs *vmcs) |
3244 | { |
3245 | free_pages((unsigned long)vmcs, vmcs_config.order); |
3246 | @@ -3552,9 +3638,38 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) |
3247 | loaded_vmcs_clear(loaded_vmcs); |
3248 | free_vmcs(loaded_vmcs->vmcs); |
3249 | loaded_vmcs->vmcs = NULL; |
3250 | + if (loaded_vmcs->msr_bitmap) |
3251 | + free_page((unsigned long)loaded_vmcs->msr_bitmap); |
3252 | WARN_ON(loaded_vmcs->shadow_vmcs != NULL); |
3253 | } |
3254 | |
3255 | +static struct vmcs *alloc_vmcs(void) |
3256 | +{ |
3257 | + return alloc_vmcs_cpu(raw_smp_processor_id()); |
3258 | +} |
3259 | + |
3260 | +static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) |
3261 | +{ |
3262 | + loaded_vmcs->vmcs = alloc_vmcs(); |
3263 | + if (!loaded_vmcs->vmcs) |
3264 | + return -ENOMEM; |
3265 | + |
3266 | + loaded_vmcs->shadow_vmcs = NULL; |
3267 | + loaded_vmcs_init(loaded_vmcs); |
3268 | + |
3269 | + if (cpu_has_vmx_msr_bitmap()) { |
3270 | + loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); |
3271 | + if (!loaded_vmcs->msr_bitmap) |
3272 | + goto out_vmcs; |
3273 | + memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); |
3274 | + } |
3275 | + return 0; |
3276 | + |
3277 | +out_vmcs: |
3278 | + free_loaded_vmcs(loaded_vmcs); |
3279 | + return -ENOMEM; |
3280 | +} |
3281 | + |
3282 | static void free_kvm_area(void) |
3283 | { |
3284 | int cpu; |
3285 | @@ -4561,10 +4676,8 @@ static void free_vpid(int vpid) |
3286 | spin_unlock(&vmx_vpid_lock); |
3287 | } |
3288 | |
3289 | -#define MSR_TYPE_R 1 |
3290 | -#define MSR_TYPE_W 2 |
3291 | -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, |
3292 | - u32 msr, int type) |
3293 | +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, |
3294 | + u32 msr, int type) |
3295 | { |
3296 | int f = sizeof(unsigned long); |
3297 | |
3298 | @@ -4598,8 +4711,8 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, |
3299 | } |
3300 | } |
3301 | |
3302 | -static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, |
3303 | - u32 msr, int type) |
3304 | +static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, |
3305 | + u32 msr, int type) |
3306 | { |
3307 | int f = sizeof(unsigned long); |
3308 | |
3309 | @@ -4633,6 +4746,15 @@ static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, |
3310 | } |
3311 | } |
3312 | |
3313 | +static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, |
3314 | + u32 msr, int type, bool value) |
3315 | +{ |
3316 | + if (value) |
3317 | + vmx_enable_intercept_for_msr(msr_bitmap, msr, type); |
3318 | + else |
3319 | + vmx_disable_intercept_for_msr(msr_bitmap, msr, type); |
3320 | +} |
3321 | + |
3322 | /* |
3323 | * If a msr is allowed by L0, we should check whether it is allowed by L1. |
3324 | * The corresponding bit will be cleared unless both of L0 and L1 allow it. |
3325 | @@ -4679,58 +4801,68 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, |
3326 | } |
3327 | } |
3328 | |
3329 | -static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) |
3330 | +static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) |
3331 | { |
3332 | - if (!longmode_only) |
3333 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, |
3334 | - msr, MSR_TYPE_R | MSR_TYPE_W); |
3335 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, |
3336 | - msr, MSR_TYPE_R | MSR_TYPE_W); |
3337 | -} |
3338 | + u8 mode = 0; |
3339 | |
3340 | -static void vmx_enable_intercept_msr_read_x2apic(u32 msr, bool apicv_active) |
3341 | -{ |
3342 | - if (apicv_active) { |
3343 | - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, |
3344 | - msr, MSR_TYPE_R); |
3345 | - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, |
3346 | - msr, MSR_TYPE_R); |
3347 | - } else { |
3348 | - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, |
3349 | - msr, MSR_TYPE_R); |
3350 | - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, |
3351 | - msr, MSR_TYPE_R); |
3352 | + if (cpu_has_secondary_exec_ctrls() && |
3353 | + (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & |
3354 | + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { |
3355 | + mode |= MSR_BITMAP_MODE_X2APIC; |
3356 | + if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) |
3357 | + mode |= MSR_BITMAP_MODE_X2APIC_APICV; |
3358 | } |
3359 | + |
3360 | + if (is_long_mode(vcpu)) |
3361 | + mode |= MSR_BITMAP_MODE_LM; |
3362 | + |
3363 | + return mode; |
3364 | } |
3365 | |
3366 | -static void vmx_disable_intercept_msr_read_x2apic(u32 msr, bool apicv_active) |
3367 | +#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) |
3368 | + |
3369 | +static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, |
3370 | + u8 mode) |
3371 | { |
3372 | - if (apicv_active) { |
3373 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, |
3374 | - msr, MSR_TYPE_R); |
3375 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, |
3376 | - msr, MSR_TYPE_R); |
3377 | - } else { |
3378 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, |
3379 | - msr, MSR_TYPE_R); |
3380 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, |
3381 | - msr, MSR_TYPE_R); |
3382 | + int msr; |
3383 | + |
3384 | + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { |
3385 | + unsigned word = msr / BITS_PER_LONG; |
3386 | + msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; |
3387 | + msr_bitmap[word + (0x800 / sizeof(long))] = ~0; |
3388 | + } |
3389 | + |
3390 | + if (mode & MSR_BITMAP_MODE_X2APIC) { |
3391 | + /* |
3392 | + * TPR reads and writes can be virtualized even if virtual interrupt |
3393 | + * delivery is not in use. |
3394 | + */ |
3395 | + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); |
3396 | + if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { |
3397 | + vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); |
3398 | + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); |
3399 | + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); |
3400 | + } |
3401 | } |
3402 | } |
3403 | |
3404 | -static void vmx_disable_intercept_msr_write_x2apic(u32 msr, bool apicv_active) |
3405 | +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) |
3406 | { |
3407 | - if (apicv_active) { |
3408 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, |
3409 | - msr, MSR_TYPE_W); |
3410 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, |
3411 | - msr, MSR_TYPE_W); |
3412 | - } else { |
3413 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, |
3414 | - msr, MSR_TYPE_W); |
3415 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, |
3416 | - msr, MSR_TYPE_W); |
3417 | - } |
3418 | + struct vcpu_vmx *vmx = to_vmx(vcpu); |
3419 | + unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; |
3420 | + u8 mode = vmx_msr_bitmap_mode(vcpu); |
3421 | + u8 changed = mode ^ vmx->msr_bitmap_mode; |
3422 | + |
3423 | + if (!changed) |
3424 | + return; |
3425 | + |
3426 | + vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, |
3427 | + !(mode & MSR_BITMAP_MODE_LM)); |
3428 | + |
3429 | + if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) |
3430 | + vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); |
3431 | + |
3432 | + vmx->msr_bitmap_mode = mode; |
3433 | } |
3434 | |
3435 | static bool vmx_get_enable_apicv(void) |
3436 | @@ -4738,30 +4870,45 @@ static bool vmx_get_enable_apicv(void) |
3437 | return enable_apicv; |
3438 | } |
3439 | |
3440 | -static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) |
3441 | +static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) |
3442 | +{ |
3443 | + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
3444 | + gfn_t gfn; |
3445 | + |
3446 | + /* |
3447 | + * Don't need to mark the APIC access page dirty; it is never |
3448 | + * written to by the CPU during APIC virtualization. |
3449 | + */ |
3450 | + |
3451 | + if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { |
3452 | + gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT; |
3453 | + kvm_vcpu_mark_page_dirty(vcpu, gfn); |
3454 | + } |
3455 | + |
3456 | + if (nested_cpu_has_posted_intr(vmcs12)) { |
3457 | + gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT; |
3458 | + kvm_vcpu_mark_page_dirty(vcpu, gfn); |
3459 | + } |
3460 | +} |
3461 | + |
3462 | + |
3463 | +static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) |
3464 | { |
3465 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3466 | int max_irr; |
3467 | void *vapic_page; |
3468 | u16 status; |
3469 | |
3470 | - if (vmx->nested.pi_desc && |
3471 | - vmx->nested.pi_pending) { |
3472 | - vmx->nested.pi_pending = false; |
3473 | - if (!pi_test_and_clear_on(vmx->nested.pi_desc)) |
3474 | - return 0; |
3475 | - |
3476 | - max_irr = find_last_bit( |
3477 | - (unsigned long *)vmx->nested.pi_desc->pir, 256); |
3478 | + if (!vmx->nested.pi_desc || !vmx->nested.pi_pending) |
3479 | + return; |
3480 | |
3481 | - if (max_irr == 256) |
3482 | - return 0; |
3483 | + vmx->nested.pi_pending = false; |
3484 | + if (!pi_test_and_clear_on(vmx->nested.pi_desc)) |
3485 | + return; |
3486 | |
3487 | + max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); |
3488 | + if (max_irr != 256) { |
3489 | vapic_page = kmap(vmx->nested.virtual_apic_page); |
3490 | - if (!vapic_page) { |
3491 | - WARN_ON(1); |
3492 | - return -ENOMEM; |
3493 | - } |
3494 | __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); |
3495 | kunmap(vmx->nested.virtual_apic_page); |
3496 | |
3497 | @@ -4772,7 +4919,8 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) |
3498 | vmcs_write16(GUEST_INTR_STATUS, status); |
3499 | } |
3500 | } |
3501 | - return 0; |
3502 | + |
3503 | + nested_mark_vmcs12_pages_dirty(vcpu); |
3504 | } |
3505 | |
3506 | static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) |
3507 | @@ -4959,7 +5107,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) |
3508 | } |
3509 | |
3510 | if (cpu_has_vmx_msr_bitmap()) |
3511 | - vmx_set_msr_bitmap(vcpu); |
3512 | + vmx_update_msr_bitmap(vcpu); |
3513 | } |
3514 | |
3515 | static u32 vmx_exec_control(struct vcpu_vmx *vmx) |
3516 | @@ -5048,7 +5196,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) |
3517 | vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); |
3518 | } |
3519 | if (cpu_has_vmx_msr_bitmap()) |
3520 | - vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); |
3521 | + vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); |
3522 | |
3523 | vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ |
3524 | |
3525 | @@ -5122,6 +5270,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) |
3526 | ++vmx->nmsrs; |
3527 | } |
3528 | |
3529 | + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) |
3530 | + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); |
3531 | |
3532 | vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); |
3533 | |
3534 | @@ -5150,6 +5300,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) |
3535 | u64 cr0; |
3536 | |
3537 | vmx->rmode.vm86_active = 0; |
3538 | + vmx->spec_ctrl = 0; |
3539 | |
3540 | vmx->soft_vnmi_blocked = 0; |
3541 | |
3542 | @@ -6379,7 +6530,7 @@ static void wakeup_handler(void) |
3543 | |
3544 | static __init int hardware_setup(void) |
3545 | { |
3546 | - int r = -ENOMEM, i, msr; |
3547 | + int r = -ENOMEM, i; |
3548 | |
3549 | rdmsrl_safe(MSR_EFER, &host_efer); |
3550 | |
3551 | @@ -6394,41 +6545,13 @@ static __init int hardware_setup(void) |
3552 | if (!vmx_io_bitmap_b) |
3553 | goto out; |
3554 | |
3555 | - vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); |
3556 | - if (!vmx_msr_bitmap_legacy) |
3557 | - goto out1; |
3558 | - |
3559 | - vmx_msr_bitmap_legacy_x2apic = |
3560 | - (unsigned long *)__get_free_page(GFP_KERNEL); |
3561 | - if (!vmx_msr_bitmap_legacy_x2apic) |
3562 | - goto out2; |
3563 | - |
3564 | - vmx_msr_bitmap_legacy_x2apic_apicv_inactive = |
3565 | - (unsigned long *)__get_free_page(GFP_KERNEL); |
3566 | - if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive) |
3567 | - goto out3; |
3568 | - |
3569 | - vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); |
3570 | - if (!vmx_msr_bitmap_longmode) |
3571 | - goto out4; |
3572 | - |
3573 | - vmx_msr_bitmap_longmode_x2apic = |
3574 | - (unsigned long *)__get_free_page(GFP_KERNEL); |
3575 | - if (!vmx_msr_bitmap_longmode_x2apic) |
3576 | - goto out5; |
3577 | - |
3578 | - vmx_msr_bitmap_longmode_x2apic_apicv_inactive = |
3579 | - (unsigned long *)__get_free_page(GFP_KERNEL); |
3580 | - if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive) |
3581 | - goto out6; |
3582 | - |
3583 | vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); |
3584 | if (!vmx_vmread_bitmap) |
3585 | - goto out7; |
3586 | + goto out1; |
3587 | |
3588 | vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); |
3589 | if (!vmx_vmwrite_bitmap) |
3590 | - goto out8; |
3591 | + goto out2; |
3592 | |
3593 | memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); |
3594 | memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); |
3595 | @@ -6437,12 +6560,9 @@ static __init int hardware_setup(void) |
3596 | |
3597 | memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); |
3598 | |
3599 | - memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); |
3600 | - memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); |
3601 | - |
3602 | if (setup_vmcs_config(&vmcs_config) < 0) { |
3603 | r = -EIO; |
3604 | - goto out9; |
3605 | + goto out3; |
3606 | } |
3607 | |
3608 | if (boot_cpu_has(X86_FEATURE_NX)) |
3609 | @@ -6499,47 +6619,8 @@ static __init int hardware_setup(void) |
3610 | kvm_tsc_scaling_ratio_frac_bits = 48; |
3611 | } |
3612 | |
3613 | - vmx_disable_intercept_for_msr(MSR_FS_BASE, false); |
3614 | - vmx_disable_intercept_for_msr(MSR_GS_BASE, false); |
3615 | - vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); |
3616 | - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); |
3617 | - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); |
3618 | - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); |
3619 | - |
3620 | - memcpy(vmx_msr_bitmap_legacy_x2apic, |
3621 | - vmx_msr_bitmap_legacy, PAGE_SIZE); |
3622 | - memcpy(vmx_msr_bitmap_longmode_x2apic, |
3623 | - vmx_msr_bitmap_longmode, PAGE_SIZE); |
3624 | - memcpy(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, |
3625 | - vmx_msr_bitmap_legacy, PAGE_SIZE); |
3626 | - memcpy(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, |
3627 | - vmx_msr_bitmap_longmode, PAGE_SIZE); |
3628 | - |
3629 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ |
3630 | |
3631 | - /* |
3632 | - * enable_apicv && kvm_vcpu_apicv_active() |
3633 | - */ |
3634 | - for (msr = 0x800; msr <= 0x8ff; msr++) |
3635 | - vmx_disable_intercept_msr_read_x2apic(msr, true); |
3636 | - |
3637 | - /* TMCCT */ |
3638 | - vmx_enable_intercept_msr_read_x2apic(0x839, true); |
3639 | - /* TPR */ |
3640 | - vmx_disable_intercept_msr_write_x2apic(0x808, true); |
3641 | - /* EOI */ |
3642 | - vmx_disable_intercept_msr_write_x2apic(0x80b, true); |
3643 | - /* SELF-IPI */ |
3644 | - vmx_disable_intercept_msr_write_x2apic(0x83f, true); |
3645 | - |
3646 | - /* |
3647 | - * (enable_apicv && !kvm_vcpu_apicv_active()) || |
3648 | - * !enable_apicv |
3649 | - */ |
3650 | - /* TPR */ |
3651 | - vmx_disable_intercept_msr_read_x2apic(0x808, false); |
3652 | - vmx_disable_intercept_msr_write_x2apic(0x808, false); |
3653 | - |
3654 | if (enable_ept) { |
3655 | kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, |
3656 | (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, |
3657 | @@ -6585,22 +6666,10 @@ static __init int hardware_setup(void) |
3658 | |
3659 | return alloc_kvm_area(); |
3660 | |
3661 | -out9: |
3662 | - free_page((unsigned long)vmx_vmwrite_bitmap); |
3663 | -out8: |
3664 | - free_page((unsigned long)vmx_vmread_bitmap); |
3665 | -out7: |
3666 | - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive); |
3667 | -out6: |
3668 | - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); |
3669 | -out5: |
3670 | - free_page((unsigned long)vmx_msr_bitmap_longmode); |
3671 | -out4: |
3672 | - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive); |
3673 | out3: |
3674 | - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); |
3675 | + free_page((unsigned long)vmx_vmwrite_bitmap); |
3676 | out2: |
3677 | - free_page((unsigned long)vmx_msr_bitmap_legacy); |
3678 | + free_page((unsigned long)vmx_vmread_bitmap); |
3679 | out1: |
3680 | free_page((unsigned long)vmx_io_bitmap_b); |
3681 | out: |
3682 | @@ -6611,12 +6680,6 @@ static __init int hardware_setup(void) |
3683 | |
3684 | static __exit void hardware_unsetup(void) |
3685 | { |
3686 | - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); |
3687 | - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive); |
3688 | - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); |
3689 | - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive); |
3690 | - free_page((unsigned long)vmx_msr_bitmap_legacy); |
3691 | - free_page((unsigned long)vmx_msr_bitmap_longmode); |
3692 | free_page((unsigned long)vmx_io_bitmap_b); |
3693 | free_page((unsigned long)vmx_io_bitmap_a); |
3694 | free_page((unsigned long)vmx_vmwrite_bitmap); |
3695 | @@ -6663,94 +6726,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu) |
3696 | return handle_nop(vcpu); |
3697 | } |
3698 | |
3699 | -/* |
3700 | - * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. |
3701 | - * We could reuse a single VMCS for all the L2 guests, but we also want the |
3702 | - * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this |
3703 | - * allows keeping them loaded on the processor, and in the future will allow |
3704 | - * optimizations where prepare_vmcs02 doesn't need to set all the fields on |
3705 | - * every entry if they never change. |
3706 | - * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE |
3707 | - * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. |
3708 | - * |
3709 | - * The following functions allocate and free a vmcs02 in this pool. |
3710 | - */ |
3711 | - |
3712 | -/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */ |
3713 | -static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) |
3714 | -{ |
3715 | - struct vmcs02_list *item; |
3716 | - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) |
3717 | - if (item->vmptr == vmx->nested.current_vmptr) { |
3718 | - list_move(&item->list, &vmx->nested.vmcs02_pool); |
3719 | - return &item->vmcs02; |
3720 | - } |
3721 | - |
3722 | - if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { |
3723 | - /* Recycle the least recently used VMCS. */ |
3724 | - item = list_last_entry(&vmx->nested.vmcs02_pool, |
3725 | - struct vmcs02_list, list); |
3726 | - item->vmptr = vmx->nested.current_vmptr; |
3727 | - list_move(&item->list, &vmx->nested.vmcs02_pool); |
3728 | - return &item->vmcs02; |
3729 | - } |
3730 | - |
3731 | - /* Create a new VMCS */ |
3732 | - item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); |
3733 | - if (!item) |
3734 | - return NULL; |
3735 | - item->vmcs02.vmcs = alloc_vmcs(); |
3736 | - item->vmcs02.shadow_vmcs = NULL; |
3737 | - if (!item->vmcs02.vmcs) { |
3738 | - kfree(item); |
3739 | - return NULL; |
3740 | - } |
3741 | - loaded_vmcs_init(&item->vmcs02); |
3742 | - item->vmptr = vmx->nested.current_vmptr; |
3743 | - list_add(&(item->list), &(vmx->nested.vmcs02_pool)); |
3744 | - vmx->nested.vmcs02_num++; |
3745 | - return &item->vmcs02; |
3746 | -} |
3747 | - |
3748 | -/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ |
3749 | -static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) |
3750 | -{ |
3751 | - struct vmcs02_list *item; |
3752 | - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) |
3753 | - if (item->vmptr == vmptr) { |
3754 | - free_loaded_vmcs(&item->vmcs02); |
3755 | - list_del(&item->list); |
3756 | - kfree(item); |
3757 | - vmx->nested.vmcs02_num--; |
3758 | - return; |
3759 | - } |
3760 | -} |
3761 | - |
3762 | -/* |
3763 | - * Free all VMCSs saved for this vcpu, except the one pointed by |
3764 | - * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs |
3765 | - * must be &vmx->vmcs01. |
3766 | - */ |
3767 | -static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) |
3768 | -{ |
3769 | - struct vmcs02_list *item, *n; |
3770 | - |
3771 | - WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); |
3772 | - list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { |
3773 | - /* |
3774 | - * Something will leak if the above WARN triggers. Better than |
3775 | - * a use-after-free. |
3776 | - */ |
3777 | - if (vmx->loaded_vmcs == &item->vmcs02) |
3778 | - continue; |
3779 | - |
3780 | - free_loaded_vmcs(&item->vmcs02); |
3781 | - list_del(&item->list); |
3782 | - kfree(item); |
3783 | - vmx->nested.vmcs02_num--; |
3784 | - } |
3785 | -} |
3786 | - |
3787 | /* |
3788 | * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), |
3789 | * set the success or error code of an emulated VMX instruction, as specified |
3790 | @@ -7025,6 +7000,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) |
3791 | struct vmcs *shadow_vmcs; |
3792 | const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED |
3793 | | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; |
3794 | + int r; |
3795 | |
3796 | /* The Intel VMX Instruction Reference lists a bunch of bits that |
3797 | * are prerequisite to running VMXON, most notably cr4.VMXE must be |
3798 | @@ -7064,12 +7040,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu) |
3799 | return 1; |
3800 | } |
3801 | |
3802 | - if (cpu_has_vmx_msr_bitmap()) { |
3803 | - vmx->nested.msr_bitmap = |
3804 | - (unsigned long *)__get_free_page(GFP_KERNEL); |
3805 | - if (!vmx->nested.msr_bitmap) |
3806 | - goto out_msr_bitmap; |
3807 | - } |
3808 | + r = alloc_loaded_vmcs(&vmx->nested.vmcs02); |
3809 | + if (r < 0) |
3810 | + goto out_vmcs02; |
3811 | |
3812 | vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); |
3813 | if (!vmx->nested.cached_vmcs12) |
3814 | @@ -7086,9 +7059,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) |
3815 | vmx->vmcs01.shadow_vmcs = shadow_vmcs; |
3816 | } |
3817 | |
3818 | - INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); |
3819 | - vmx->nested.vmcs02_num = 0; |
3820 | - |
3821 | hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, |
3822 | HRTIMER_MODE_REL_PINNED); |
3823 | vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; |
3824 | @@ -7103,9 +7073,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu) |
3825 | kfree(vmx->nested.cached_vmcs12); |
3826 | |
3827 | out_cached_vmcs12: |
3828 | - free_page((unsigned long)vmx->nested.msr_bitmap); |
3829 | + free_loaded_vmcs(&vmx->nested.vmcs02); |
3830 | |
3831 | -out_msr_bitmap: |
3832 | +out_vmcs02: |
3833 | return -ENOMEM; |
3834 | } |
3835 | |
3836 | @@ -7181,17 +7151,13 @@ static void free_nested(struct vcpu_vmx *vmx) |
3837 | vmx->nested.vmxon = false; |
3838 | free_vpid(vmx->nested.vpid02); |
3839 | nested_release_vmcs12(vmx); |
3840 | - if (vmx->nested.msr_bitmap) { |
3841 | - free_page((unsigned long)vmx->nested.msr_bitmap); |
3842 | - vmx->nested.msr_bitmap = NULL; |
3843 | - } |
3844 | if (enable_shadow_vmcs) { |
3845 | vmcs_clear(vmx->vmcs01.shadow_vmcs); |
3846 | free_vmcs(vmx->vmcs01.shadow_vmcs); |
3847 | vmx->vmcs01.shadow_vmcs = NULL; |
3848 | } |
3849 | kfree(vmx->nested.cached_vmcs12); |
3850 | - /* Unpin physical memory we referred to in current vmcs02 */ |
3851 | + /* Unpin physical memory we referred to in the vmcs02 */ |
3852 | if (vmx->nested.apic_access_page) { |
3853 | nested_release_page(vmx->nested.apic_access_page); |
3854 | vmx->nested.apic_access_page = NULL; |
3855 | @@ -7207,7 +7173,7 @@ static void free_nested(struct vcpu_vmx *vmx) |
3856 | vmx->nested.pi_desc = NULL; |
3857 | } |
3858 | |
3859 | - nested_free_all_saved_vmcss(vmx); |
3860 | + free_loaded_vmcs(&vmx->nested.vmcs02); |
3861 | } |
3862 | |
3863 | /* Emulate the VMXOFF instruction */ |
3864 | @@ -7241,8 +7207,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) |
3865 | vmptr + offsetof(struct vmcs12, launch_state), |
3866 | &zero, sizeof(zero)); |
3867 | |
3868 | - nested_free_vmcs02(vmx, vmptr); |
3869 | - |
3870 | skip_emulated_instruction(vcpu); |
3871 | nested_vmx_succeed(vcpu); |
3872 | return 1; |
3873 | @@ -8029,6 +7993,19 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) |
3874 | vmcs_read32(VM_EXIT_INTR_ERROR_CODE), |
3875 | KVM_ISA_VMX); |
3876 | |
3877 | + /* |
3878 | + * The host physical addresses of some pages of guest memory |
3879 | + * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC |
3880 | + * Page). The CPU may write to these pages via their host |
3881 | + * physical address while L2 is running, bypassing any |
3882 | + * address-translation-based dirty tracking (e.g. EPT write |
3883 | + * protection). |
3884 | + * |
3885 | + * Mark them dirty on every exit from L2 to prevent them from |
3886 | + * getting out of sync with dirty tracking. |
3887 | + */ |
3888 | + nested_mark_vmcs12_pages_dirty(vcpu); |
3889 | + |
3890 | if (vmx->nested.nested_run_pending) |
3891 | return false; |
3892 | |
3893 | @@ -8520,7 +8497,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) |
3894 | } |
3895 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); |
3896 | |
3897 | - vmx_set_msr_bitmap(vcpu); |
3898 | + vmx_update_msr_bitmap(vcpu); |
3899 | } |
3900 | |
3901 | static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) |
3902 | @@ -8676,14 +8653,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) |
3903 | #endif |
3904 | "pushf\n\t" |
3905 | __ASM_SIZE(push) " $%c[cs]\n\t" |
3906 | - "call *%[entry]\n\t" |
3907 | + CALL_NOSPEC |
3908 | : |
3909 | #ifdef CONFIG_X86_64 |
3910 | [sp]"=&r"(tmp), |
3911 | #endif |
3912 | "+r"(__sp) |
3913 | : |
3914 | - [entry]"r"(entry), |
3915 | + THUNK_TARGET(entry), |
3916 | [ss]"i"(__KERNEL_DS), |
3917 | [cs]"i"(__KERNEL_CS) |
3918 | ); |
3919 | @@ -8909,6 +8886,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) |
3920 | |
3921 | vmx_arm_hv_timer(vcpu); |
3922 | |
3923 | + /* |
3924 | + * If this vCPU has touched SPEC_CTRL, restore the guest's value if |
3925 | + * it's non-zero. Since vmentry is serialising on affected CPUs, there |
3926 | + * is no need to worry about the conditional branch over the wrmsr |
3927 | + * being speculatively taken. |
3928 | + */ |
3929 | + if (vmx->spec_ctrl) |
3930 | + wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); |
3931 | + |
3932 | vmx->__launched = vmx->loaded_vmcs->launched; |
3933 | asm( |
3934 | /* Store host registers */ |
3935 | @@ -9027,6 +9013,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) |
3936 | #endif |
3937 | ); |
3938 | |
3939 | + /* |
3940 | + * We do not use IBRS in the kernel. If this vCPU has used the |
3941 | + * SPEC_CTRL MSR it may have left it on; save the value and |
3942 | + * turn it off. This is much more efficient than blindly adding |
3943 | + * it to the atomic save/restore list. Especially as the former |
3944 | + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. |
3945 | + * |
3946 | + * For non-nested case: |
3947 | + * If the L01 MSR bitmap does not intercept the MSR, then we need to |
3948 | + * save it. |
3949 | + * |
3950 | + * For nested case: |
3951 | + * If the L02 MSR bitmap does not intercept the MSR, then we need to |
3952 | + * save it. |
3953 | + */ |
3954 | + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) |
3955 | + rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); |
3956 | + |
3957 | + if (vmx->spec_ctrl) |
3958 | + wrmsrl(MSR_IA32_SPEC_CTRL, 0); |
3959 | + |
3960 | /* Eliminate branch target predictions from guest mode */ |
3961 | vmexit_fill_RSB(); |
3962 | |
3963 | @@ -9140,6 +9147,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) |
3964 | { |
3965 | int err; |
3966 | struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); |
3967 | + unsigned long *msr_bitmap; |
3968 | int cpu; |
3969 | |
3970 | if (!vmx) |
3971 | @@ -9172,17 +9180,24 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) |
3972 | if (!vmx->guest_msrs) |
3973 | goto free_pml; |
3974 | |
3975 | - vmx->loaded_vmcs = &vmx->vmcs01; |
3976 | - vmx->loaded_vmcs->vmcs = alloc_vmcs(); |
3977 | - vmx->loaded_vmcs->shadow_vmcs = NULL; |
3978 | - if (!vmx->loaded_vmcs->vmcs) |
3979 | - goto free_msrs; |
3980 | if (!vmm_exclusive) |
3981 | kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id()))); |
3982 | - loaded_vmcs_init(vmx->loaded_vmcs); |
3983 | + err = alloc_loaded_vmcs(&vmx->vmcs01); |
3984 | if (!vmm_exclusive) |
3985 | kvm_cpu_vmxoff(); |
3986 | + if (err < 0) |
3987 | + goto free_msrs; |
3988 | |
3989 | + msr_bitmap = vmx->vmcs01.msr_bitmap; |
3990 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); |
3991 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); |
3992 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); |
3993 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); |
3994 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); |
3995 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); |
3996 | + vmx->msr_bitmap_mode = 0; |
3997 | + |
3998 | + vmx->loaded_vmcs = &vmx->vmcs01; |
3999 | cpu = get_cpu(); |
4000 | vmx_vcpu_load(&vmx->vcpu, cpu); |
4001 | vmx->vcpu.cpu = cpu; |
4002 | @@ -9576,21 +9591,31 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, |
4003 | int msr; |
4004 | struct page *page; |
4005 | unsigned long *msr_bitmap_l1; |
4006 | - unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; |
4007 | + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; |
4008 | + /* |
4009 | + * pred_cmd & spec_ctrl are trying to verify two things: |
4010 | + * |
4011 | + * 1. L0 gave a permission to L1 to actually passthrough the MSR. This |
4012 | + * ensures that we do not accidentally generate an L02 MSR bitmap |
4013 | + * from the L12 MSR bitmap that is too permissive. |
4014 | + * 2. That L1 or L2s have actually used the MSR. This avoids |
4015 | + * unnecessarily merging of the bitmap if the MSR is unused. This |
4016 | + * works properly because we only update the L01 MSR bitmap lazily. |
4017 | + * So even if L0 should pass L1 these MSRs, the L01 bitmap is only |
4018 | + * updated to reflect this when L1 (or its L2s) actually write to |
4019 | + * the MSR. |
4020 | + */ |
4021 | + bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); |
4022 | + bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); |
4023 | |
4024 | - /* This shortcut is ok because we support only x2APIC MSRs so far. */ |
4025 | - if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) |
4026 | + if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && |
4027 | + !pred_cmd && !spec_ctrl) |
4028 | return false; |
4029 | |
4030 | page = nested_get_page(vcpu, vmcs12->msr_bitmap); |
4031 | if (!page) |
4032 | return false; |
4033 | msr_bitmap_l1 = (unsigned long *)kmap(page); |
4034 | - if (!msr_bitmap_l1) { |
4035 | - nested_release_page_clean(page); |
4036 | - WARN_ON(1); |
4037 | - return false; |
4038 | - } |
4039 | |
4040 | memset(msr_bitmap_l0, 0xff, PAGE_SIZE); |
4041 | |
4042 | @@ -9617,6 +9642,19 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, |
4043 | MSR_TYPE_W); |
4044 | } |
4045 | } |
4046 | + |
4047 | + if (spec_ctrl) |
4048 | + nested_vmx_disable_intercept_for_msr( |
4049 | + msr_bitmap_l1, msr_bitmap_l0, |
4050 | + MSR_IA32_SPEC_CTRL, |
4051 | + MSR_TYPE_R | MSR_TYPE_W); |
4052 | + |
4053 | + if (pred_cmd) |
4054 | + nested_vmx_disable_intercept_for_msr( |
4055 | + msr_bitmap_l1, msr_bitmap_l0, |
4056 | + MSR_IA32_PRED_CMD, |
4057 | + MSR_TYPE_W); |
4058 | + |
4059 | kunmap(page); |
4060 | nested_release_page_clean(page); |
4061 | |
4062 | @@ -10096,6 +10134,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) |
4063 | if (kvm_has_tsc_control) |
4064 | decache_tsc_multiplier(vmx); |
4065 | |
4066 | + if (cpu_has_vmx_msr_bitmap()) |
4067 | + vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); |
4068 | + |
4069 | if (enable_vpid) { |
4070 | /* |
4071 | * There is no direct mapping between vpid02 and vpid12, the |
4072 | @@ -10191,7 +10232,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) |
4073 | struct vmcs12 *vmcs12; |
4074 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
4075 | int cpu; |
4076 | - struct loaded_vmcs *vmcs02; |
4077 | bool ia32e; |
4078 | u32 msr_entry_idx; |
4079 | |
4080 | @@ -10331,17 +10371,13 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) |
4081 | * the nested entry. |
4082 | */ |
4083 | |
4084 | - vmcs02 = nested_get_current_vmcs02(vmx); |
4085 | - if (!vmcs02) |
4086 | - return -ENOMEM; |
4087 | - |
4088 | enter_guest_mode(vcpu); |
4089 | |
4090 | if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) |
4091 | vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); |
4092 | |
4093 | cpu = get_cpu(); |
4094 | - vmx->loaded_vmcs = vmcs02; |
4095 | + vmx->loaded_vmcs = &vmx->nested.vmcs02; |
4096 | vmx_vcpu_put(vcpu); |
4097 | vmx_vcpu_load(vcpu, cpu); |
4098 | vcpu->cpu = cpu; |
4099 | @@ -10493,7 +10529,8 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) |
4100 | return 0; |
4101 | } |
4102 | |
4103 | - return vmx_complete_nested_posted_interrupt(vcpu); |
4104 | + vmx_complete_nested_posted_interrupt(vcpu); |
4105 | + return 0; |
4106 | } |
4107 | |
4108 | static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) |
4109 | @@ -10804,7 +10841,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, |
4110 | vmcs_write64(GUEST_IA32_DEBUGCTL, 0); |
4111 | |
4112 | if (cpu_has_vmx_msr_bitmap()) |
4113 | - vmx_set_msr_bitmap(vcpu); |
4114 | + vmx_update_msr_bitmap(vcpu); |
4115 | |
4116 | if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, |
4117 | vmcs12->vm_exit_msr_load_count)) |
4118 | @@ -10855,10 +10892,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, |
4119 | vm_exit_controls_reset_shadow(vmx); |
4120 | vmx_segment_cache_clear(vmx); |
4121 | |
4122 | - /* if no vmcs02 cache requested, remove the one we used */ |
4123 | - if (VMCS02_POOL_SIZE == 0) |
4124 | - nested_free_vmcs02(vmx, vmx->nested.current_vmptr); |
4125 | - |
4126 | load_vmcs12_host_state(vcpu, vmcs12); |
4127 | |
4128 | /* Update any VMCS fields that might have changed while L2 ran */ |
4129 | diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c |
4130 | index e023ef981feb..75f756eac979 100644 |
4131 | --- a/arch/x86/kvm/x86.c |
4132 | +++ b/arch/x86/kvm/x86.c |
4133 | @@ -975,6 +975,7 @@ static u32 msrs_to_save[] = { |
4134 | #endif |
4135 | MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, |
4136 | MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, |
4137 | + MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES |
4138 | }; |
4139 | |
4140 | static unsigned num_msrs_to_save; |
4141 | diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile |
4142 | index 6bf1898ddf49..4ad7c4dd311c 100644 |
4143 | --- a/arch/x86/lib/Makefile |
4144 | +++ b/arch/x86/lib/Makefile |
4145 | @@ -26,6 +26,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o |
4146 | lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o |
4147 | lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o |
4148 | lib-$(CONFIG_RETPOLINE) += retpoline.o |
4149 | +OBJECT_FILES_NON_STANDARD_retpoline.o :=y |
4150 | |
4151 | obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o |
4152 | |
4153 | diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S |
4154 | index 37b62d412148..b12b214713a6 100644 |
4155 | --- a/arch/x86/lib/getuser.S |
4156 | +++ b/arch/x86/lib/getuser.S |
4157 | @@ -39,6 +39,8 @@ ENTRY(__get_user_1) |
4158 | mov PER_CPU_VAR(current_task), %_ASM_DX |
4159 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX |
4160 | jae bad_get_user |
4161 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ |
4162 | + and %_ASM_DX, %_ASM_AX |
4163 | ASM_STAC |
4164 | 1: movzbl (%_ASM_AX),%edx |
4165 | xor %eax,%eax |
4166 | @@ -53,6 +55,8 @@ ENTRY(__get_user_2) |
4167 | mov PER_CPU_VAR(current_task), %_ASM_DX |
4168 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX |
4169 | jae bad_get_user |
4170 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ |
4171 | + and %_ASM_DX, %_ASM_AX |
4172 | ASM_STAC |
4173 | 2: movzwl -1(%_ASM_AX),%edx |
4174 | xor %eax,%eax |
4175 | @@ -67,6 +71,8 @@ ENTRY(__get_user_4) |
4176 | mov PER_CPU_VAR(current_task), %_ASM_DX |
4177 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX |
4178 | jae bad_get_user |
4179 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ |
4180 | + and %_ASM_DX, %_ASM_AX |
4181 | ASM_STAC |
4182 | 3: movl -3(%_ASM_AX),%edx |
4183 | xor %eax,%eax |
4184 | @@ -82,6 +88,8 @@ ENTRY(__get_user_8) |
4185 | mov PER_CPU_VAR(current_task), %_ASM_DX |
4186 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX |
4187 | jae bad_get_user |
4188 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ |
4189 | + and %_ASM_DX, %_ASM_AX |
4190 | ASM_STAC |
4191 | 4: movq -7(%_ASM_AX),%rdx |
4192 | xor %eax,%eax |
4193 | @@ -93,6 +101,8 @@ ENTRY(__get_user_8) |
4194 | mov PER_CPU_VAR(current_task), %_ASM_DX |
4195 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX |
4196 | jae bad_get_user_8 |
4197 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ |
4198 | + and %_ASM_DX, %_ASM_AX |
4199 | ASM_STAC |
4200 | 4: movl -7(%_ASM_AX),%edx |
4201 | 5: movl -3(%_ASM_AX),%ecx |
4202 | diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S |
4203 | index dfb2ba91b670..480edc3a5e03 100644 |
4204 | --- a/arch/x86/lib/retpoline.S |
4205 | +++ b/arch/x86/lib/retpoline.S |
4206 | @@ -7,6 +7,7 @@ |
4207 | #include <asm/alternative-asm.h> |
4208 | #include <asm/export.h> |
4209 | #include <asm/nospec-branch.h> |
4210 | +#include <asm/bitsperlong.h> |
4211 | |
4212 | .macro THUNK reg |
4213 | .section .text.__x86.indirect_thunk |
4214 | @@ -36,7 +37,6 @@ GENERATE_THUNK(_ASM_DX) |
4215 | GENERATE_THUNK(_ASM_SI) |
4216 | GENERATE_THUNK(_ASM_DI) |
4217 | GENERATE_THUNK(_ASM_BP) |
4218 | -GENERATE_THUNK(_ASM_SP) |
4219 | #ifdef CONFIG_64BIT |
4220 | GENERATE_THUNK(r8) |
4221 | GENERATE_THUNK(r9) |
4222 | @@ -47,3 +47,58 @@ GENERATE_THUNK(r13) |
4223 | GENERATE_THUNK(r14) |
4224 | GENERATE_THUNK(r15) |
4225 | #endif |
4226 | + |
4227 | +/* |
4228 | + * Fill the CPU return stack buffer. |
4229 | + * |
4230 | + * Each entry in the RSB, if used for a speculative 'ret', contains an |
4231 | + * infinite 'pause; lfence; jmp' loop to capture speculative execution. |
4232 | + * |
4233 | + * This is required in various cases for retpoline and IBRS-based |
4234 | + * mitigations for the Spectre variant 2 vulnerability. Sometimes to |
4235 | + * eliminate potentially bogus entries from the RSB, and sometimes |
4236 | + * purely to ensure that it doesn't get empty, which on some CPUs would |
4237 | + * allow predictions from other (unwanted!) sources to be used. |
4238 | + * |
4239 | + * Google experimented with loop-unrolling and this turned out to be |
4240 | + * the optimal version - two calls, each with their own speculation |
4241 | + * trap should their return address end up getting used, in a loop. |
4242 | + */ |
4243 | +.macro STUFF_RSB nr:req sp:req |
4244 | + mov $(\nr / 2), %_ASM_BX |
4245 | + .align 16 |
4246 | +771: |
4247 | + call 772f |
4248 | +773: /* speculation trap */ |
4249 | + pause |
4250 | + lfence |
4251 | + jmp 773b |
4252 | + .align 16 |
4253 | +772: |
4254 | + call 774f |
4255 | +775: /* speculation trap */ |
4256 | + pause |
4257 | + lfence |
4258 | + jmp 775b |
4259 | + .align 16 |
4260 | +774: |
4261 | + dec %_ASM_BX |
4262 | + jnz 771b |
4263 | + add $((BITS_PER_LONG/8) * \nr), \sp |
4264 | +.endm |
4265 | + |
4266 | +#define RSB_FILL_LOOPS 16 /* To avoid underflow */ |
4267 | + |
4268 | +ENTRY(__fill_rsb) |
4269 | + STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP |
4270 | + ret |
4271 | +END(__fill_rsb) |
4272 | +EXPORT_SYMBOL_GPL(__fill_rsb) |
4273 | + |
4274 | +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ |
4275 | + |
4276 | +ENTRY(__clear_rsb) |
4277 | + STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP |
4278 | + ret |
4279 | +END(__clear_rsb) |
4280 | +EXPORT_SYMBOL_GPL(__clear_rsb) |
4281 | diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c |
4282 | index 3bc7baf2a711..5c06dbffc52f 100644 |
4283 | --- a/arch/x86/lib/usercopy_32.c |
4284 | +++ b/arch/x86/lib/usercopy_32.c |
4285 | @@ -570,12 +570,12 @@ do { \ |
4286 | unsigned long __copy_to_user_ll(void __user *to, const void *from, |
4287 | unsigned long n) |
4288 | { |
4289 | - stac(); |
4290 | + __uaccess_begin_nospec(); |
4291 | if (movsl_is_ok(to, from, n)) |
4292 | __copy_user(to, from, n); |
4293 | else |
4294 | n = __copy_user_intel(to, from, n); |
4295 | - clac(); |
4296 | + __uaccess_end(); |
4297 | return n; |
4298 | } |
4299 | EXPORT_SYMBOL(__copy_to_user_ll); |
4300 | @@ -627,7 +627,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache); |
4301 | unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, |
4302 | unsigned long n) |
4303 | { |
4304 | - stac(); |
4305 | + __uaccess_begin_nospec(); |
4306 | #ifdef CONFIG_X86_INTEL_USERCOPY |
4307 | if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) |
4308 | n = __copy_user_intel_nocache(to, from, n); |
4309 | @@ -636,7 +636,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr |
4310 | #else |
4311 | __copy_user(to, from, n); |
4312 | #endif |
4313 | - clac(); |
4314 | + __uaccess_end(); |
4315 | return n; |
4316 | } |
4317 | EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); |
4318 | diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c |
4319 | index e3af318af2db..2a07341aca46 100644 |
4320 | --- a/crypto/tcrypt.c |
4321 | +++ b/crypto/tcrypt.c |
4322 | @@ -223,11 +223,13 @@ static void sg_init_aead(struct scatterlist *sg, char *xbuf[XBUFSIZE], |
4323 | } |
4324 | |
4325 | sg_init_table(sg, np + 1); |
4326 | - np--; |
4327 | + if (rem) |
4328 | + np--; |
4329 | for (k = 0; k < np; k++) |
4330 | sg_set_buf(&sg[k + 1], xbuf[k], PAGE_SIZE); |
4331 | |
4332 | - sg_set_buf(&sg[k + 1], xbuf[k], rem); |
4333 | + if (rem) |
4334 | + sg_set_buf(&sg[k + 1], xbuf[k], rem); |
4335 | } |
4336 | |
4337 | static void test_aead_speed(const char *algo, int enc, unsigned int secs, |
4338 | diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c |
4339 | index 83f1439e57fd..6e8eaa7fe7a6 100644 |
4340 | --- a/drivers/auxdisplay/img-ascii-lcd.c |
4341 | +++ b/drivers/auxdisplay/img-ascii-lcd.c |
4342 | @@ -442,3 +442,7 @@ static struct platform_driver img_ascii_lcd_driver = { |
4343 | .remove = img_ascii_lcd_remove, |
4344 | }; |
4345 | module_platform_driver(img_ascii_lcd_driver); |
4346 | + |
4347 | +MODULE_DESCRIPTION("Imagination Technologies ASCII LCD Display"); |
4348 | +MODULE_AUTHOR("Paul Burton <paul.burton@mips.com>"); |
4349 | +MODULE_LICENSE("GPL"); |
4350 | diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c |
4351 | index a2ec6d8796a0..3322b157106d 100644 |
4352 | --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c |
4353 | +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c |
4354 | @@ -392,6 +392,31 @@ static void rcar_du_crtc_start(struct rcar_du_crtc *rcrtc) |
4355 | rcrtc->started = true; |
4356 | } |
4357 | |
4358 | +static void rcar_du_crtc_disable_planes(struct rcar_du_crtc *rcrtc) |
4359 | +{ |
4360 | + struct rcar_du_device *rcdu = rcrtc->group->dev; |
4361 | + struct drm_crtc *crtc = &rcrtc->crtc; |
4362 | + u32 status; |
4363 | + /* Make sure vblank interrupts are enabled. */ |
4364 | + drm_crtc_vblank_get(crtc); |
4365 | + /* |
4366 | + * Disable planes and calculate how many vertical blanking interrupts we |
4367 | + * have to wait for. If a vertical blanking interrupt has been triggered |
4368 | + * but not processed yet, we don't know whether it occurred before or |
4369 | + * after the planes got disabled. We thus have to wait for two vblank |
4370 | + * interrupts in that case. |
4371 | + */ |
4372 | + spin_lock_irq(&rcrtc->vblank_lock); |
4373 | + rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? DS2PR : DS1PR, 0); |
4374 | + status = rcar_du_crtc_read(rcrtc, DSSR); |
4375 | + rcrtc->vblank_count = status & DSSR_VBK ? 2 : 1; |
4376 | + spin_unlock_irq(&rcrtc->vblank_lock); |
4377 | + if (!wait_event_timeout(rcrtc->vblank_wait, rcrtc->vblank_count == 0, |
4378 | + msecs_to_jiffies(100))) |
4379 | + dev_warn(rcdu->dev, "vertical blanking timeout\n"); |
4380 | + drm_crtc_vblank_put(crtc); |
4381 | +} |
4382 | + |
4383 | static void rcar_du_crtc_stop(struct rcar_du_crtc *rcrtc) |
4384 | { |
4385 | struct drm_crtc *crtc = &rcrtc->crtc; |
4386 | @@ -400,17 +425,16 @@ static void rcar_du_crtc_stop(struct rcar_du_crtc *rcrtc) |
4387 | return; |
4388 | |
4389 | /* Disable all planes and wait for the change to take effect. This is |
4390 | - * required as the DSnPR registers are updated on vblank, and no vblank |
4391 | - * will occur once the CRTC is stopped. Disabling planes when starting |
4392 | - * the CRTC thus wouldn't be enough as it would start scanning out |
4393 | - * immediately from old frame buffers until the next vblank. |
4394 | + * required as the plane enable registers are updated on vblank, and no |
4395 | + * vblank will occur once the CRTC is stopped. Disabling planes when |
4396 | + * starting the CRTC thus wouldn't be enough as it would start scanning |
4397 | + * out immediately from old frame buffers until the next vblank. |
4398 | * |
4399 | * This increases the CRTC stop delay, especially when multiple CRTCs |
4400 | * are stopped in one operation as we now wait for one vblank per CRTC. |
4401 | * Whether this can be improved needs to be researched. |
4402 | */ |
4403 | - rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? DS2PR : DS1PR, 0); |
4404 | - drm_crtc_wait_one_vblank(crtc); |
4405 | + rcar_du_crtc_disable_planes(rcrtc); |
4406 | |
4407 | /* Disable vertical blanking interrupt reporting. We first need to wait |
4408 | * for page flip completion before stopping the CRTC as userspace |
4409 | @@ -548,10 +572,25 @@ static irqreturn_t rcar_du_crtc_irq(int irq, void *arg) |
4410 | irqreturn_t ret = IRQ_NONE; |
4411 | u32 status; |
4412 | |
4413 | + spin_lock(&rcrtc->vblank_lock); |
4414 | + |
4415 | status = rcar_du_crtc_read(rcrtc, DSSR); |
4416 | rcar_du_crtc_write(rcrtc, DSRCR, status & DSRCR_MASK); |
4417 | |
4418 | - if (status & DSSR_FRM) { |
4419 | + if (status & DSSR_VBK) { |
4420 | + /* |
4421 | + * Wake up the vblank wait if the counter reaches 0. This must |
4422 | + * be protected by the vblank_lock to avoid races in |
4423 | + * rcar_du_crtc_disable_planes(). |
4424 | + */ |
4425 | + if (rcrtc->vblank_count) { |
4426 | + if (--rcrtc->vblank_count == 0) |
4427 | + wake_up(&rcrtc->vblank_wait); |
4428 | + } |
4429 | + } |
4430 | + spin_unlock(&rcrtc->vblank_lock); |
4431 | + |
4432 | + if (status & DSSR_VBK) { |
4433 | drm_crtc_handle_vblank(&rcrtc->crtc); |
4434 | rcar_du_crtc_finish_page_flip(rcrtc); |
4435 | ret = IRQ_HANDLED; |
4436 | @@ -606,6 +645,8 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) |
4437 | } |
4438 | |
4439 | init_waitqueue_head(&rcrtc->flip_wait); |
4440 | + init_waitqueue_head(&rcrtc->vblank_wait); |
4441 | + spin_lock_init(&rcrtc->vblank_lock); |
4442 | |
4443 | rcrtc->group = rgrp; |
4444 | rcrtc->mmio_offset = mmio_offsets[index]; |
4445 | diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h |
4446 | index 6f08b7e7db06..48bef05b4c62 100644 |
4447 | --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h |
4448 | +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h |
4449 | @@ -15,6 +15,7 @@ |
4450 | #define __RCAR_DU_CRTC_H__ |
4451 | |
4452 | #include <linux/mutex.h> |
4453 | +#include <linux/spinlock.h> |
4454 | #include <linux/wait.h> |
4455 | |
4456 | #include <drm/drmP.h> |
4457 | @@ -33,6 +34,9 @@ struct rcar_du_vsp; |
4458 | * @started: whether the CRTC has been started and is running |
4459 | * @event: event to post when the pending page flip completes |
4460 | * @flip_wait: wait queue used to signal page flip completion |
4461 | + * @vblank_lock: protects vblank_wait and vblank_count |
4462 | + * @vblank_wait: wait queue used to signal vertical blanking |
4463 | + * @vblank_count: number of vertical blanking interrupts to wait for |
4464 | * @outputs: bitmask of the outputs (enum rcar_du_output) driven by this CRTC |
4465 | * @group: CRTC group this CRTC belongs to |
4466 | */ |
4467 | @@ -48,6 +52,10 @@ struct rcar_du_crtc { |
4468 | struct drm_pending_vblank_event *event; |
4469 | wait_queue_head_t flip_wait; |
4470 | |
4471 | + spinlock_t vblank_lock; |
4472 | + wait_queue_head_t vblank_wait; |
4473 | + unsigned int vblank_count; |
4474 | + |
4475 | unsigned int outputs; |
4476 | |
4477 | struct rcar_du_group *group; |
4478 | diff --git a/drivers/media/platform/soc_camera/soc_scale_crop.c b/drivers/media/platform/soc_camera/soc_scale_crop.c |
4479 | index f77252d6ccd3..d29c24854c2c 100644 |
4480 | --- a/drivers/media/platform/soc_camera/soc_scale_crop.c |
4481 | +++ b/drivers/media/platform/soc_camera/soc_scale_crop.c |
4482 | @@ -418,3 +418,7 @@ void soc_camera_calc_client_output(struct soc_camera_device *icd, |
4483 | mf->height = soc_camera_shift_scale(rect->height, shift, scale_v); |
4484 | } |
4485 | EXPORT_SYMBOL(soc_camera_calc_client_output); |
4486 | + |
4487 | +MODULE_DESCRIPTION("soc-camera scaling-cropping functions"); |
4488 | +MODULE_AUTHOR("Guennadi Liakhovetski <kernel@pengutronix.de>"); |
4489 | +MODULE_LICENSE("GPL"); |
4490 | diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c |
4491 | index bdbcd2b088a0..c3c28f0960e5 100644 |
4492 | --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c |
4493 | +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c |
4494 | @@ -3849,7 +3849,7 @@ static void qlcnic_83xx_flush_mbx_queue(struct qlcnic_adapter *adapter) |
4495 | struct list_head *head = &mbx->cmd_q; |
4496 | struct qlcnic_cmd_args *cmd = NULL; |
4497 | |
4498 | - spin_lock(&mbx->queue_lock); |
4499 | + spin_lock_bh(&mbx->queue_lock); |
4500 | |
4501 | while (!list_empty(head)) { |
4502 | cmd = list_entry(head->next, struct qlcnic_cmd_args, list); |
4503 | @@ -3860,7 +3860,7 @@ static void qlcnic_83xx_flush_mbx_queue(struct qlcnic_adapter *adapter) |
4504 | qlcnic_83xx_notify_cmd_completion(adapter, cmd); |
4505 | } |
4506 | |
4507 | - spin_unlock(&mbx->queue_lock); |
4508 | + spin_unlock_bh(&mbx->queue_lock); |
4509 | } |
4510 | |
4511 | static int qlcnic_83xx_check_mbx_status(struct qlcnic_adapter *adapter) |
4512 | @@ -3896,12 +3896,12 @@ static void qlcnic_83xx_dequeue_mbx_cmd(struct qlcnic_adapter *adapter, |
4513 | { |
4514 | struct qlcnic_mailbox *mbx = adapter->ahw->mailbox; |
4515 | |
4516 | - spin_lock(&mbx->queue_lock); |
4517 | + spin_lock_bh(&mbx->queue_lock); |
4518 | |
4519 | list_del(&cmd->list); |
4520 | mbx->num_cmds--; |
4521 | |
4522 | - spin_unlock(&mbx->queue_lock); |
4523 | + spin_unlock_bh(&mbx->queue_lock); |
4524 | |
4525 | qlcnic_83xx_notify_cmd_completion(adapter, cmd); |
4526 | } |
4527 | @@ -3966,7 +3966,7 @@ static int qlcnic_83xx_enqueue_mbx_cmd(struct qlcnic_adapter *adapter, |
4528 | init_completion(&cmd->completion); |
4529 | cmd->rsp_opcode = QLC_83XX_MBX_RESPONSE_UNKNOWN; |
4530 | |
4531 | - spin_lock(&mbx->queue_lock); |
4532 | + spin_lock_bh(&mbx->queue_lock); |
4533 | |
4534 | list_add_tail(&cmd->list, &mbx->cmd_q); |
4535 | mbx->num_cmds++; |
4536 | @@ -3974,7 +3974,7 @@ static int qlcnic_83xx_enqueue_mbx_cmd(struct qlcnic_adapter *adapter, |
4537 | *timeout = cmd->total_cmds * QLC_83XX_MBX_TIMEOUT; |
4538 | queue_work(mbx->work_q, &mbx->work); |
4539 | |
4540 | - spin_unlock(&mbx->queue_lock); |
4541 | + spin_unlock_bh(&mbx->queue_lock); |
4542 | |
4543 | return 0; |
4544 | } |
4545 | @@ -4070,15 +4070,15 @@ static void qlcnic_83xx_mailbox_worker(struct work_struct *work) |
4546 | mbx->rsp_status = QLC_83XX_MBX_RESPONSE_WAIT; |
4547 | spin_unlock_irqrestore(&mbx->aen_lock, flags); |
4548 | |
4549 | - spin_lock(&mbx->queue_lock); |
4550 | + spin_lock_bh(&mbx->queue_lock); |
4551 | |
4552 | if (list_empty(head)) { |
4553 | - spin_unlock(&mbx->queue_lock); |
4554 | + spin_unlock_bh(&mbx->queue_lock); |
4555 | return; |
4556 | } |
4557 | cmd = list_entry(head->next, struct qlcnic_cmd_args, list); |
4558 | |
4559 | - spin_unlock(&mbx->queue_lock); |
4560 | + spin_unlock_bh(&mbx->queue_lock); |
4561 | |
4562 | mbx_ops->encode_cmd(adapter, cmd); |
4563 | mbx_ops->nofity_fw(adapter, QLC_83XX_MBX_REQUEST); |
4564 | diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c |
4565 | index 298b74ebc1e9..18e68c91e651 100644 |
4566 | --- a/drivers/net/ethernet/realtek/r8169.c |
4567 | +++ b/drivers/net/ethernet/realtek/r8169.c |
4568 | @@ -1387,7 +1387,7 @@ DECLARE_RTL_COND(rtl_ocp_tx_cond) |
4569 | { |
4570 | void __iomem *ioaddr = tp->mmio_addr; |
4571 | |
4572 | - return RTL_R8(IBISR0) & 0x02; |
4573 | + return RTL_R8(IBISR0) & 0x20; |
4574 | } |
4575 | |
4576 | static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) |
4577 | @@ -1395,7 +1395,7 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) |
4578 | void __iomem *ioaddr = tp->mmio_addr; |
4579 | |
4580 | RTL_W8(IBCR2, RTL_R8(IBCR2) & ~0x01); |
4581 | - rtl_msleep_loop_wait_low(tp, &rtl_ocp_tx_cond, 50, 2000); |
4582 | + rtl_msleep_loop_wait_high(tp, &rtl_ocp_tx_cond, 50, 2000); |
4583 | RTL_W8(IBISR0, RTL_R8(IBISR0) | 0x20); |
4584 | RTL_W8(IBCR0, RTL_R8(IBCR0) & ~0x01); |
4585 | } |
4586 | diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c |
4587 | index db65d9ad4488..e1e5e8438457 100644 |
4588 | --- a/drivers/net/usb/qmi_wwan.c |
4589 | +++ b/drivers/net/usb/qmi_wwan.c |
4590 | @@ -944,6 +944,7 @@ static const struct usb_device_id products[] = { |
4591 | {QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ |
4592 | {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ |
4593 | {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ |
4594 | + {QMI_QUIRK_SET_DTR(0x2c7c, 0x0306, 4)}, /* Quectel EP06 Mini PCIe */ |
4595 | |
4596 | /* 4. Gobi 1000 devices */ |
4597 | {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ |
4598 | diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c |
4599 | index 6e5d9095b195..a635fc6b1722 100644 |
4600 | --- a/drivers/net/wireless/broadcom/b43/main.c |
4601 | +++ b/drivers/net/wireless/broadcom/b43/main.c |
4602 | @@ -71,8 +71,18 @@ MODULE_FIRMWARE("b43/ucode11.fw"); |
4603 | MODULE_FIRMWARE("b43/ucode13.fw"); |
4604 | MODULE_FIRMWARE("b43/ucode14.fw"); |
4605 | MODULE_FIRMWARE("b43/ucode15.fw"); |
4606 | +MODULE_FIRMWARE("b43/ucode16_lp.fw"); |
4607 | MODULE_FIRMWARE("b43/ucode16_mimo.fw"); |
4608 | +MODULE_FIRMWARE("b43/ucode24_lcn.fw"); |
4609 | +MODULE_FIRMWARE("b43/ucode25_lcn.fw"); |
4610 | +MODULE_FIRMWARE("b43/ucode25_mimo.fw"); |
4611 | +MODULE_FIRMWARE("b43/ucode26_mimo.fw"); |
4612 | +MODULE_FIRMWARE("b43/ucode29_mimo.fw"); |
4613 | +MODULE_FIRMWARE("b43/ucode33_lcn40.fw"); |
4614 | +MODULE_FIRMWARE("b43/ucode30_mimo.fw"); |
4615 | MODULE_FIRMWARE("b43/ucode5.fw"); |
4616 | +MODULE_FIRMWARE("b43/ucode40.fw"); |
4617 | +MODULE_FIRMWARE("b43/ucode42.fw"); |
4618 | MODULE_FIRMWARE("b43/ucode9.fw"); |
4619 | |
4620 | static int modparam_bad_frames_preempt; |
4621 | diff --git a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c |
4622 | index 866aa3ce1ac9..6cf0006d4c8d 100644 |
4623 | --- a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c |
4624 | +++ b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c |
4625 | @@ -436,3 +436,7 @@ int pxa2xx_pinctrl_exit(struct platform_device *pdev) |
4626 | return 0; |
4627 | } |
4628 | EXPORT_SYMBOL_GPL(pxa2xx_pinctrl_exit); |
4629 | + |
4630 | +MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>"); |
4631 | +MODULE_DESCRIPTION("Marvell PXA2xx pinctrl driver"); |
4632 | +MODULE_LICENSE("GPL v2"); |
4633 | diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c |
4634 | index f2303f390345..23973a8124fc 100644 |
4635 | --- a/drivers/tty/serial/serial_core.c |
4636 | +++ b/drivers/tty/serial/serial_core.c |
4637 | @@ -965,6 +965,8 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port, |
4638 | } |
4639 | } else { |
4640 | retval = uart_startup(tty, state, 1); |
4641 | + if (retval == 0) |
4642 | + tty_port_set_initialized(port, true); |
4643 | if (retval > 0) |
4644 | retval = 0; |
4645 | } |
4646 | diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c |
4647 | index 96a0661011fd..e5b7652234fc 100644 |
4648 | --- a/drivers/vhost/net.c |
4649 | +++ b/drivers/vhost/net.c |
4650 | @@ -1078,6 +1078,7 @@ static long vhost_net_reset_owner(struct vhost_net *n) |
4651 | } |
4652 | vhost_net_stop(n, &tx_sock, &rx_sock); |
4653 | vhost_net_flush(n); |
4654 | + vhost_dev_stop(&n->dev); |
4655 | vhost_dev_reset_owner(&n->dev, umem); |
4656 | vhost_net_vq_reset(n); |
4657 | done: |
4658 | diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h |
4659 | index 6e84b2cae6ad..442b54a14cbc 100644 |
4660 | --- a/include/linux/fdtable.h |
4661 | +++ b/include/linux/fdtable.h |
4662 | @@ -9,6 +9,7 @@ |
4663 | #include <linux/compiler.h> |
4664 | #include <linux/spinlock.h> |
4665 | #include <linux/rcupdate.h> |
4666 | +#include <linux/nospec.h> |
4667 | #include <linux/types.h> |
4668 | #include <linux/init.h> |
4669 | #include <linux/fs.h> |
4670 | @@ -81,8 +82,10 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i |
4671 | { |
4672 | struct fdtable *fdt = rcu_dereference_raw(files->fdt); |
4673 | |
4674 | - if (fd < fdt->max_fds) |
4675 | + if (fd < fdt->max_fds) { |
4676 | + fd = array_index_nospec(fd, fdt->max_fds); |
4677 | return rcu_dereference_raw(fdt->fd[fd]); |
4678 | + } |
4679 | return NULL; |
4680 | } |
4681 | |
4682 | diff --git a/include/linux/init.h b/include/linux/init.h |
4683 | index e30104ceb86d..8e346d1bd837 100644 |
4684 | --- a/include/linux/init.h |
4685 | +++ b/include/linux/init.h |
4686 | @@ -4,6 +4,13 @@ |
4687 | #include <linux/compiler.h> |
4688 | #include <linux/types.h> |
4689 | |
4690 | +/* Built-in __init functions needn't be compiled with retpoline */ |
4691 | +#if defined(RETPOLINE) && !defined(MODULE) |
4692 | +#define __noretpoline __attribute__((indirect_branch("keep"))) |
4693 | +#else |
4694 | +#define __noretpoline |
4695 | +#endif |
4696 | + |
4697 | /* These macros are used to mark some functions or |
4698 | * initialized data (doesn't apply to uninitialized data) |
4699 | * as `initialization' functions. The kernel can take this |
4700 | @@ -39,7 +46,7 @@ |
4701 | |
4702 | /* These are for everybody (although not all archs will actually |
4703 | discard it in modules) */ |
4704 | -#define __init __section(.init.text) __cold notrace __latent_entropy |
4705 | +#define __init __section(.init.text) __cold notrace __latent_entropy __noretpoline |
4706 | #define __initdata __section(.init.data) |
4707 | #define __initconst __section(.init.rodata) |
4708 | #define __exitdata __section(.exit.data) |
4709 | diff --git a/include/linux/module.h b/include/linux/module.h |
4710 | index 0c3207d26ac0..d2224a09b4b5 100644 |
4711 | --- a/include/linux/module.h |
4712 | +++ b/include/linux/module.h |
4713 | @@ -791,6 +791,15 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, |
4714 | static inline void module_bug_cleanup(struct module *mod) {} |
4715 | #endif /* CONFIG_GENERIC_BUG */ |
4716 | |
4717 | +#ifdef RETPOLINE |
4718 | +extern bool retpoline_module_ok(bool has_retpoline); |
4719 | +#else |
4720 | +static inline bool retpoline_module_ok(bool has_retpoline) |
4721 | +{ |
4722 | + return true; |
4723 | +} |
4724 | +#endif |
4725 | + |
4726 | #ifdef CONFIG_MODULE_SIG |
4727 | static inline bool module_sig_ok(struct module *module) |
4728 | { |
4729 | diff --git a/include/linux/nospec.h b/include/linux/nospec.h |
4730 | new file mode 100644 |
4731 | index 000000000000..b99bced39ac2 |
4732 | --- /dev/null |
4733 | +++ b/include/linux/nospec.h |
4734 | @@ -0,0 +1,72 @@ |
4735 | +// SPDX-License-Identifier: GPL-2.0 |
4736 | +// Copyright(c) 2018 Linus Torvalds. All rights reserved. |
4737 | +// Copyright(c) 2018 Alexei Starovoitov. All rights reserved. |
4738 | +// Copyright(c) 2018 Intel Corporation. All rights reserved. |
4739 | + |
4740 | +#ifndef _LINUX_NOSPEC_H |
4741 | +#define _LINUX_NOSPEC_H |
4742 | + |
4743 | +/** |
4744 | + * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise |
4745 | + * @index: array element index |
4746 | + * @size: number of elements in array |
4747 | + * |
4748 | + * When @index is out of bounds (@index >= @size), the sign bit will be |
4749 | + * set. Extend the sign bit to all bits and invert, giving a result of |
4750 | + * zero for an out of bounds index, or ~0 if within bounds [0, @size). |
4751 | + */ |
4752 | +#ifndef array_index_mask_nospec |
4753 | +static inline unsigned long array_index_mask_nospec(unsigned long index, |
4754 | + unsigned long size) |
4755 | +{ |
4756 | + /* |
4757 | + * Warn developers about inappropriate array_index_nospec() usage. |
4758 | + * |
4759 | + * Even if the CPU speculates past the WARN_ONCE branch, the |
4760 | + * sign bit of @index is taken into account when generating the |
4761 | + * mask. |
4762 | + * |
4763 | + * This warning is compiled out when the compiler can infer that |
4764 | + * @index and @size are less than LONG_MAX. |
4765 | + */ |
4766 | + if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, |
4767 | + "array_index_nospec() limited to range of [0, LONG_MAX]\n")) |
4768 | + return 0; |
4769 | + |
4770 | + /* |
4771 | + * Always calculate and emit the mask even if the compiler |
4772 | + * thinks the mask is not needed. The compiler does not take |
4773 | + * into account the value of @index under speculation. |
4774 | + */ |
4775 | + OPTIMIZER_HIDE_VAR(index); |
4776 | + return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1); |
4777 | +} |
4778 | +#endif |
4779 | + |
4780 | +/* |
4781 | + * array_index_nospec - sanitize an array index after a bounds check |
4782 | + * |
4783 | + * For a code sequence like: |
4784 | + * |
4785 | + * if (index < size) { |
4786 | + * index = array_index_nospec(index, size); |
4787 | + * val = array[index]; |
4788 | + * } |
4789 | + * |
4790 | + * ...if the CPU speculates past the bounds check then |
4791 | + * array_index_nospec() will clamp the index within the range of [0, |
4792 | + * size). |
4793 | + */ |
4794 | +#define array_index_nospec(index, size) \ |
4795 | +({ \ |
4796 | + typeof(index) _i = (index); \ |
4797 | + typeof(size) _s = (size); \ |
4798 | + unsigned long _mask = array_index_mask_nospec(_i, _s); \ |
4799 | + \ |
4800 | + BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ |
4801 | + BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ |
4802 | + \ |
4803 | + _i &= _mask; \ |
4804 | + _i; \ |
4805 | +}) |
4806 | +#endif /* _LINUX_NOSPEC_H */ |
4807 | diff --git a/kernel/module.c b/kernel/module.c |
4808 | index 0e54d5bf0097..07bfb9971f2f 100644 |
4809 | --- a/kernel/module.c |
4810 | +++ b/kernel/module.c |
4811 | @@ -2817,6 +2817,15 @@ static int check_modinfo_livepatch(struct module *mod, struct load_info *info) |
4812 | } |
4813 | #endif /* CONFIG_LIVEPATCH */ |
4814 | |
4815 | +static void check_modinfo_retpoline(struct module *mod, struct load_info *info) |
4816 | +{ |
4817 | + if (retpoline_module_ok(get_modinfo(info, "retpoline"))) |
4818 | + return; |
4819 | + |
4820 | + pr_warn("%s: loading module not compiled with retpoline compiler.\n", |
4821 | + mod->name); |
4822 | +} |
4823 | + |
4824 | /* Sets info->hdr and info->len. */ |
4825 | static int copy_module_from_user(const void __user *umod, unsigned long len, |
4826 | struct load_info *info) |
4827 | @@ -2969,6 +2978,8 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags) |
4828 | add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); |
4829 | } |
4830 | |
4831 | + check_modinfo_retpoline(mod, info); |
4832 | + |
4833 | if (get_modinfo(info, "staging")) { |
4834 | add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK); |
4835 | pr_warn("%s: module is from the staging directory, the quality " |
4836 | diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c |
4837 | index 77f396b679ce..5dce4291f0ed 100644 |
4838 | --- a/net/core/sock_reuseport.c |
4839 | +++ b/net/core/sock_reuseport.c |
4840 | @@ -93,6 +93,16 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) |
4841 | return more_reuse; |
4842 | } |
4843 | |
4844 | +static void reuseport_free_rcu(struct rcu_head *head) |
4845 | +{ |
4846 | + struct sock_reuseport *reuse; |
4847 | + |
4848 | + reuse = container_of(head, struct sock_reuseport, rcu); |
4849 | + if (reuse->prog) |
4850 | + bpf_prog_destroy(reuse->prog); |
4851 | + kfree(reuse); |
4852 | +} |
4853 | + |
4854 | /** |
4855 | * reuseport_add_sock - Add a socket to the reuseport group of another. |
4856 | * @sk: New socket to add to the group. |
4857 | @@ -101,7 +111,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) |
4858 | */ |
4859 | int reuseport_add_sock(struct sock *sk, struct sock *sk2) |
4860 | { |
4861 | - struct sock_reuseport *reuse; |
4862 | + struct sock_reuseport *old_reuse, *reuse; |
4863 | |
4864 | if (!rcu_access_pointer(sk2->sk_reuseport_cb)) { |
4865 | int err = reuseport_alloc(sk2); |
4866 | @@ -112,10 +122,13 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2) |
4867 | |
4868 | spin_lock_bh(&reuseport_lock); |
4869 | reuse = rcu_dereference_protected(sk2->sk_reuseport_cb, |
4870 | - lockdep_is_held(&reuseport_lock)), |
4871 | - WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, |
4872 | - lockdep_is_held(&reuseport_lock)), |
4873 | - "socket already in reuseport group"); |
4874 | + lockdep_is_held(&reuseport_lock)); |
4875 | + old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb, |
4876 | + lockdep_is_held(&reuseport_lock)); |
4877 | + if (old_reuse && old_reuse->num_socks != 1) { |
4878 | + spin_unlock_bh(&reuseport_lock); |
4879 | + return -EBUSY; |
4880 | + } |
4881 | |
4882 | if (reuse->num_socks == reuse->max_socks) { |
4883 | reuse = reuseport_grow(reuse); |
4884 | @@ -133,19 +146,11 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2) |
4885 | |
4886 | spin_unlock_bh(&reuseport_lock); |
4887 | |
4888 | + if (old_reuse) |
4889 | + call_rcu(&old_reuse->rcu, reuseport_free_rcu); |
4890 | return 0; |
4891 | } |
4892 | |
4893 | -static void reuseport_free_rcu(struct rcu_head *head) |
4894 | -{ |
4895 | - struct sock_reuseport *reuse; |
4896 | - |
4897 | - reuse = container_of(head, struct sock_reuseport, rcu); |
4898 | - if (reuse->prog) |
4899 | - bpf_prog_destroy(reuse->prog); |
4900 | - kfree(reuse); |
4901 | -} |
4902 | - |
4903 | void reuseport_detach_sock(struct sock *sk) |
4904 | { |
4905 | struct sock_reuseport *reuse; |
4906 | diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c |
4907 | index 9c7a4cea1628..7f5fe07d0b13 100644 |
4908 | --- a/net/ipv4/igmp.c |
4909 | +++ b/net/ipv4/igmp.c |
4910 | @@ -386,7 +386,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) |
4911 | pip->frag_off = htons(IP_DF); |
4912 | pip->ttl = 1; |
4913 | pip->daddr = fl4.daddr; |
4914 | + |
4915 | + rcu_read_lock(); |
4916 | pip->saddr = igmpv3_get_srcaddr(dev, &fl4); |
4917 | + rcu_read_unlock(); |
4918 | + |
4919 | pip->protocol = IPPROTO_IGMP; |
4920 | pip->tot_len = 0; /* filled in later */ |
4921 | ip_select_ident(net, skb, NULL); |
4922 | diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c |
4923 | index 7efa6b062049..0d1a767db1bb 100644 |
4924 | --- a/net/ipv4/tcp.c |
4925 | +++ b/net/ipv4/tcp.c |
4926 | @@ -2316,6 +2316,12 @@ int tcp_disconnect(struct sock *sk, int flags) |
4927 | |
4928 | WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); |
4929 | |
4930 | + if (sk->sk_frag.page) { |
4931 | + put_page(sk->sk_frag.page); |
4932 | + sk->sk_frag.page = NULL; |
4933 | + sk->sk_frag.offset = 0; |
4934 | + } |
4935 | + |
4936 | sk->sk_error_report(sk); |
4937 | return err; |
4938 | } |
4939 | diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c |
4940 | index e86a34fd5484..8ec60532be2b 100644 |
4941 | --- a/net/ipv4/tcp_bbr.c |
4942 | +++ b/net/ipv4/tcp_bbr.c |
4943 | @@ -452,7 +452,8 @@ static void bbr_advance_cycle_phase(struct sock *sk) |
4944 | |
4945 | bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); |
4946 | bbr->cycle_mstamp = tp->delivered_mstamp; |
4947 | - bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx]; |
4948 | + bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT : |
4949 | + bbr_pacing_gain[bbr->cycle_idx]; |
4950 | } |
4951 | |
4952 | /* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ |
4953 | @@ -461,8 +462,7 @@ static void bbr_update_cycle_phase(struct sock *sk, |
4954 | { |
4955 | struct bbr *bbr = inet_csk_ca(sk); |
4956 | |
4957 | - if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw && |
4958 | - bbr_is_next_cycle_phase(sk, rs)) |
4959 | + if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs)) |
4960 | bbr_advance_cycle_phase(sk); |
4961 | } |
4962 | |
4963 | diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c |
4964 | index 5cad76f87536..421379014995 100644 |
4965 | --- a/net/ipv6/af_inet6.c |
4966 | +++ b/net/ipv6/af_inet6.c |
4967 | @@ -274,6 +274,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) |
4968 | struct net *net = sock_net(sk); |
4969 | __be32 v4addr = 0; |
4970 | unsigned short snum; |
4971 | + bool saved_ipv6only; |
4972 | int addr_type = 0; |
4973 | int err = 0; |
4974 | |
4975 | @@ -378,19 +379,21 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) |
4976 | if (!(addr_type & IPV6_ADDR_MULTICAST)) |
4977 | np->saddr = addr->sin6_addr; |
4978 | |
4979 | + saved_ipv6only = sk->sk_ipv6only; |
4980 | + if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED) |
4981 | + sk->sk_ipv6only = 1; |
4982 | + |
4983 | /* Make sure we are allowed to bind here. */ |
4984 | if ((snum || !inet->bind_address_no_port) && |
4985 | sk->sk_prot->get_port(sk, snum)) { |
4986 | + sk->sk_ipv6only = saved_ipv6only; |
4987 | inet_reset_saddr(sk); |
4988 | err = -EADDRINUSE; |
4989 | goto out; |
4990 | } |
4991 | |
4992 | - if (addr_type != IPV6_ADDR_ANY) { |
4993 | + if (addr_type != IPV6_ADDR_ANY) |
4994 | sk->sk_userlocks |= SOCK_BINDADDR_LOCK; |
4995 | - if (addr_type != IPV6_ADDR_MAPPED) |
4996 | - sk->sk_ipv6only = 1; |
4997 | - } |
4998 | if (snum) |
4999 | sk->sk_userlocks |= SOCK_BINDPORT_LOCK; |
5000 | inet->inet_sport = htons(inet->inet_num); |
5001 | diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c |
5002 | index 117405dd07a3..a30e7e925c9b 100644 |
5003 | --- a/net/ipv6/ip6mr.c |
5004 | +++ b/net/ipv6/ip6mr.c |
5005 | @@ -495,6 +495,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) |
5006 | return ERR_PTR(-ENOENT); |
5007 | |
5008 | it->mrt = mrt; |
5009 | + it->cache = NULL; |
5010 | return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) |
5011 | : SEQ_START_TOKEN; |
5012 | } |
5013 | diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c |
5014 | index ae83c3aec308..da574a16e7b3 100644 |
5015 | --- a/net/sched/cls_u32.c |
5016 | +++ b/net/sched/cls_u32.c |
5017 | @@ -496,6 +496,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) |
5018 | static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, |
5019 | u32 flags) |
5020 | { |
5021 | + struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); |
5022 | struct net_device *dev = tp->q->dev_queue->dev; |
5023 | struct tc_cls_u32_offload u32_offload = {0}; |
5024 | struct tc_to_netdev offload; |
5025 | @@ -520,7 +521,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, |
5026 | offload.cls_u32->knode.sel = &n->sel; |
5027 | offload.cls_u32->knode.exts = &n->exts; |
5028 | if (n->ht_down) |
5029 | - offload.cls_u32->knode.link_handle = n->ht_down->handle; |
5030 | + offload.cls_u32->knode.link_handle = ht->handle; |
5031 | |
5032 | err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, |
5033 | tp->protocol, &offload); |
5034 | @@ -788,8 +789,9 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, |
5035 | static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, |
5036 | struct tc_u_knode *n) |
5037 | { |
5038 | - struct tc_u_knode *new; |
5039 | + struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); |
5040 | struct tc_u32_sel *s = &n->sel; |
5041 | + struct tc_u_knode *new; |
5042 | |
5043 | new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), |
5044 | GFP_KERNEL); |
5045 | @@ -807,11 +809,11 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, |
5046 | new->fshift = n->fshift; |
5047 | new->res = n->res; |
5048 | new->flags = n->flags; |
5049 | - RCU_INIT_POINTER(new->ht_down, n->ht_down); |
5050 | + RCU_INIT_POINTER(new->ht_down, ht); |
5051 | |
5052 | /* bump reference count as long as we hold pointer to structure */ |
5053 | - if (new->ht_down) |
5054 | - new->ht_down->refcnt++; |
5055 | + if (ht) |
5056 | + ht->refcnt++; |
5057 | |
5058 | #ifdef CONFIG_CLS_U32_PERF |
5059 | /* Statistics may be incremented by readers during update |
5060 | diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c |
5061 | index c626f679e1c8..91722e97cdd5 100644 |
5062 | --- a/net/wireless/nl80211.c |
5063 | +++ b/net/wireless/nl80211.c |
5064 | @@ -16,6 +16,7 @@ |
5065 | #include <linux/nl80211.h> |
5066 | #include <linux/rtnetlink.h> |
5067 | #include <linux/netlink.h> |
5068 | +#include <linux/nospec.h> |
5069 | #include <linux/etherdevice.h> |
5070 | #include <net/net_namespace.h> |
5071 | #include <net/genetlink.h> |
5072 | @@ -2014,20 +2015,22 @@ static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = { |
5073 | static int parse_txq_params(struct nlattr *tb[], |
5074 | struct ieee80211_txq_params *txq_params) |
5075 | { |
5076 | + u8 ac; |
5077 | + |
5078 | if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] || |
5079 | !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] || |
5080 | !tb[NL80211_TXQ_ATTR_AIFS]) |
5081 | return -EINVAL; |
5082 | |
5083 | - txq_params->ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); |
5084 | + ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); |
5085 | txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]); |
5086 | txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]); |
5087 | txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]); |
5088 | txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]); |
5089 | |
5090 | - if (txq_params->ac >= NL80211_NUM_ACS) |
5091 | + if (ac >= NL80211_NUM_ACS) |
5092 | return -EINVAL; |
5093 | - |
5094 | + txq_params->ac = array_index_nospec(ac, NL80211_NUM_ACS); |
5095 | return 0; |
5096 | } |
5097 | |
5098 | diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c |
5099 | index 845eb9b800f3..238db4ffd30c 100644 |
5100 | --- a/scripts/mod/modpost.c |
5101 | +++ b/scripts/mod/modpost.c |
5102 | @@ -2130,6 +2130,14 @@ static void add_intree_flag(struct buffer *b, int is_intree) |
5103 | buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n"); |
5104 | } |
5105 | |
5106 | +/* Cannot check for assembler */ |
5107 | +static void add_retpoline(struct buffer *b) |
5108 | +{ |
5109 | + buf_printf(b, "\n#ifdef RETPOLINE\n"); |
5110 | + buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n"); |
5111 | + buf_printf(b, "#endif\n"); |
5112 | +} |
5113 | + |
5114 | static void add_staging_flag(struct buffer *b, const char *name) |
5115 | { |
5116 | static const char *staging_dir = "drivers/staging"; |
5117 | @@ -2474,6 +2482,7 @@ int main(int argc, char **argv) |
5118 | |
5119 | add_header(&buf, mod); |
5120 | add_intree_flag(&buf, !external_module); |
5121 | + add_retpoline(&buf); |
5122 | add_staging_flag(&buf, mod->name); |
5123 | err |= add_versions(&buf, mod); |
5124 | add_depends(&buf, mod, modules); |
5125 | diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c |
5126 | index a871159bf03c..ead2fd60244d 100644 |
5127 | --- a/security/keys/encrypted-keys/encrypted.c |
5128 | +++ b/security/keys/encrypted-keys/encrypted.c |
5129 | @@ -141,23 +141,22 @@ static int valid_ecryptfs_desc(const char *ecryptfs_desc) |
5130 | */ |
5131 | static int valid_master_desc(const char *new_desc, const char *orig_desc) |
5132 | { |
5133 | - if (!memcmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) { |
5134 | - if (strlen(new_desc) == KEY_TRUSTED_PREFIX_LEN) |
5135 | - goto out; |
5136 | - if (orig_desc) |
5137 | - if (memcmp(new_desc, orig_desc, KEY_TRUSTED_PREFIX_LEN)) |
5138 | - goto out; |
5139 | - } else if (!memcmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) { |
5140 | - if (strlen(new_desc) == KEY_USER_PREFIX_LEN) |
5141 | - goto out; |
5142 | - if (orig_desc) |
5143 | - if (memcmp(new_desc, orig_desc, KEY_USER_PREFIX_LEN)) |
5144 | - goto out; |
5145 | - } else |
5146 | - goto out; |
5147 | + int prefix_len; |
5148 | + |
5149 | + if (!strncmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) |
5150 | + prefix_len = KEY_TRUSTED_PREFIX_LEN; |
5151 | + else if (!strncmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) |
5152 | + prefix_len = KEY_USER_PREFIX_LEN; |
5153 | + else |
5154 | + return -EINVAL; |
5155 | + |
5156 | + if (!new_desc[prefix_len]) |
5157 | + return -EINVAL; |
5158 | + |
5159 | + if (orig_desc && strncmp(new_desc, orig_desc, prefix_len)) |
5160 | + return -EINVAL; |
5161 | + |
5162 | return 0; |
5163 | -out: |
5164 | - return -EINVAL; |
5165 | } |
5166 | |
5167 | /* |
5168 | diff --git a/sound/soc/codecs/pcm512x-spi.c b/sound/soc/codecs/pcm512x-spi.c |
5169 | index 712ed6598c48..ebdf9bd5a64c 100644 |
5170 | --- a/sound/soc/codecs/pcm512x-spi.c |
5171 | +++ b/sound/soc/codecs/pcm512x-spi.c |
5172 | @@ -70,3 +70,7 @@ static struct spi_driver pcm512x_spi_driver = { |
5173 | }; |
5174 | |
5175 | module_spi_driver(pcm512x_spi_driver); |
5176 | + |
5177 | +MODULE_DESCRIPTION("ASoC PCM512x codec driver - SPI"); |
5178 | +MODULE_AUTHOR("Mark Brown <broonie@kernel.org>"); |
5179 | +MODULE_LICENSE("GPL v2"); |
5180 | diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c |
5181 | index f608f8d23f3d..dd88c2cb6470 100644 |
5182 | --- a/sound/soc/generic/simple-card.c |
5183 | +++ b/sound/soc/generic/simple-card.c |
5184 | @@ -232,13 +232,19 @@ static int asoc_simple_card_dai_link_of(struct device_node *node, |
5185 | snprintf(prop, sizeof(prop), "%scpu", prefix); |
5186 | cpu = of_get_child_by_name(node, prop); |
5187 | |
5188 | + if (!cpu) { |
5189 | + ret = -EINVAL; |
5190 | + dev_err(dev, "%s: Can't find %s DT node\n", __func__, prop); |
5191 | + goto dai_link_of_err; |
5192 | + } |
5193 | + |
5194 | snprintf(prop, sizeof(prop), "%splat", prefix); |
5195 | plat = of_get_child_by_name(node, prop); |
5196 | |
5197 | snprintf(prop, sizeof(prop), "%scodec", prefix); |
5198 | codec = of_get_child_by_name(node, prop); |
5199 | |
5200 | - if (!cpu || !codec) { |
5201 | + if (!codec) { |
5202 | ret = -EINVAL; |
5203 | dev_err(dev, "%s: Can't find %s DT node\n", __func__, prop); |
5204 | goto dai_link_of_err; |
5205 | diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c |
5206 | index 560cf4b51a99..a9a43acce30e 100644 |
5207 | --- a/sound/soc/sh/rcar/ssi.c |
5208 | +++ b/sound/soc/sh/rcar/ssi.c |
5209 | @@ -699,9 +699,14 @@ static int rsnd_ssi_dma_remove(struct rsnd_mod *mod, |
5210 | struct rsnd_priv *priv) |
5211 | { |
5212 | struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); |
5213 | + struct rsnd_mod *pure_ssi_mod = rsnd_io_to_mod_ssi(io); |
5214 | struct device *dev = rsnd_priv_to_dev(priv); |
5215 | int irq = ssi->irq; |
5216 | |
5217 | + /* Do nothing if non SSI (= SSI parent, multi SSI) mod */ |
5218 | + if (pure_ssi_mod != mod) |
5219 | + return 0; |
5220 | + |
5221 | /* PIO will request IRQ again */ |
5222 | devm_free_irq(dev, irq, mod); |
5223 |