Annotation of /trunk/kernel-alx-legacy/patches-4.9/0180-4.9.81-all-fixes.patch
Parent Directory | Revision Log
Revision 3608 -
(hide annotations)
(download)
Fri Aug 14 07:34:29 2020 UTC (3 years, 10 months ago) by niro
File size: 166338 byte(s)
Fri Aug 14 07:34:29 2020 UTC (3 years, 10 months ago) by niro
File size: 166338 byte(s)
-added kerenl-alx-legacy pkg
1 | niro | 3608 | diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt |
2 | index 4c2667aa4634..466c039c622b 100644 | ||
3 | --- a/Documentation/kernel-parameters.txt | ||
4 | +++ b/Documentation/kernel-parameters.txt | ||
5 | @@ -2805,8 +2805,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | ||
6 | norandmaps Don't use address space randomization. Equivalent to | ||
7 | echo 0 > /proc/sys/kernel/randomize_va_space | ||
8 | |||
9 | - noreplace-paravirt [X86,IA-64,PV_OPS] Don't patch paravirt_ops | ||
10 | - | ||
11 | noreplace-smp [X86-32,SMP] Don't replace SMP instructions | ||
12 | with UP alternatives | ||
13 | |||
14 | diff --git a/Documentation/speculation.txt b/Documentation/speculation.txt | ||
15 | new file mode 100644 | ||
16 | index 000000000000..e9e6cbae2841 | ||
17 | --- /dev/null | ||
18 | +++ b/Documentation/speculation.txt | ||
19 | @@ -0,0 +1,90 @@ | ||
20 | +This document explains potential effects of speculation, and how undesirable | ||
21 | +effects can be mitigated portably using common APIs. | ||
22 | + | ||
23 | +=========== | ||
24 | +Speculation | ||
25 | +=========== | ||
26 | + | ||
27 | +To improve performance and minimize average latencies, many contemporary CPUs | ||
28 | +employ speculative execution techniques such as branch prediction, performing | ||
29 | +work which may be discarded at a later stage. | ||
30 | + | ||
31 | +Typically speculative execution cannot be observed from architectural state, | ||
32 | +such as the contents of registers. However, in some cases it is possible to | ||
33 | +observe its impact on microarchitectural state, such as the presence or | ||
34 | +absence of data in caches. Such state may form side-channels which can be | ||
35 | +observed to extract secret information. | ||
36 | + | ||
37 | +For example, in the presence of branch prediction, it is possible for bounds | ||
38 | +checks to be ignored by code which is speculatively executed. Consider the | ||
39 | +following code: | ||
40 | + | ||
41 | + int load_array(int *array, unsigned int index) | ||
42 | + { | ||
43 | + if (index >= MAX_ARRAY_ELEMS) | ||
44 | + return 0; | ||
45 | + else | ||
46 | + return array[index]; | ||
47 | + } | ||
48 | + | ||
49 | +Which, on arm64, may be compiled to an assembly sequence such as: | ||
50 | + | ||
51 | + CMP <index>, #MAX_ARRAY_ELEMS | ||
52 | + B.LT less | ||
53 | + MOV <returnval>, #0 | ||
54 | + RET | ||
55 | + less: | ||
56 | + LDR <returnval>, [<array>, <index>] | ||
57 | + RET | ||
58 | + | ||
59 | +It is possible that a CPU mis-predicts the conditional branch, and | ||
60 | +speculatively loads array[index], even if index >= MAX_ARRAY_ELEMS. This | ||
61 | +value will subsequently be discarded, but the speculated load may affect | ||
62 | +microarchitectural state which can be subsequently measured. | ||
63 | + | ||
64 | +More complex sequences involving multiple dependent memory accesses may | ||
65 | +result in sensitive information being leaked. Consider the following | ||
66 | +code, building on the prior example: | ||
67 | + | ||
68 | + int load_dependent_arrays(int *arr1, int *arr2, int index) | ||
69 | + { | ||
70 | + int val1, val2, | ||
71 | + | ||
72 | + val1 = load_array(arr1, index); | ||
73 | + val2 = load_array(arr2, val1); | ||
74 | + | ||
75 | + return val2; | ||
76 | + } | ||
77 | + | ||
78 | +Under speculation, the first call to load_array() may return the value | ||
79 | +of an out-of-bounds address, while the second call will influence | ||
80 | +microarchitectural state dependent on this value. This may provide an | ||
81 | +arbitrary read primitive. | ||
82 | + | ||
83 | +==================================== | ||
84 | +Mitigating speculation side-channels | ||
85 | +==================================== | ||
86 | + | ||
87 | +The kernel provides a generic API to ensure that bounds checks are | ||
88 | +respected even under speculation. Architectures which are affected by | ||
89 | +speculation-based side-channels are expected to implement these | ||
90 | +primitives. | ||
91 | + | ||
92 | +The array_index_nospec() helper in <linux/nospec.h> can be used to | ||
93 | +prevent information from being leaked via side-channels. | ||
94 | + | ||
95 | +A call to array_index_nospec(index, size) returns a sanitized index | ||
96 | +value that is bounded to [0, size) even under cpu speculation | ||
97 | +conditions. | ||
98 | + | ||
99 | +This can be used to protect the earlier load_array() example: | ||
100 | + | ||
101 | + int load_array(int *array, unsigned int index) | ||
102 | + { | ||
103 | + if (index >= MAX_ARRAY_ELEMS) | ||
104 | + return 0; | ||
105 | + else { | ||
106 | + index = array_index_nospec(index, MAX_ARRAY_ELEMS); | ||
107 | + return array[index]; | ||
108 | + } | ||
109 | + } | ||
110 | diff --git a/Makefile b/Makefile | ||
111 | index 9550b6939076..4d5753f1c37b 100644 | ||
112 | --- a/Makefile | ||
113 | +++ b/Makefile | ||
114 | @@ -1,6 +1,6 @@ | ||
115 | VERSION = 4 | ||
116 | PATCHLEVEL = 9 | ||
117 | -SUBLEVEL = 80 | ||
118 | +SUBLEVEL = 81 | ||
119 | EXTRAVERSION = | ||
120 | NAME = Roaring Lionus | ||
121 | |||
122 | diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig | ||
123 | index 6eda5abbd719..0a6bb48854e3 100644 | ||
124 | --- a/arch/powerpc/Kconfig | ||
125 | +++ b/arch/powerpc/Kconfig | ||
126 | @@ -128,6 +128,7 @@ config PPC | ||
127 | select ARCH_HAS_GCOV_PROFILE_ALL | ||
128 | select GENERIC_SMP_IDLE_THREAD | ||
129 | select GENERIC_CMOS_UPDATE | ||
130 | + select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64 | ||
131 | select GENERIC_TIME_VSYSCALL_OLD | ||
132 | select GENERIC_CLOCKEVENTS | ||
133 | select GENERIC_CLOCKEVENTS_BROADCAST if SMP | ||
134 | diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h | ||
135 | index a703452d67b6..555e22d5e07f 100644 | ||
136 | --- a/arch/powerpc/include/asm/exception-64e.h | ||
137 | +++ b/arch/powerpc/include/asm/exception-64e.h | ||
138 | @@ -209,5 +209,11 @@ exc_##label##_book3e: | ||
139 | ori r3,r3,vector_offset@l; \ | ||
140 | mtspr SPRN_IVOR##vector_number,r3; | ||
141 | |||
142 | +#define RFI_TO_KERNEL \ | ||
143 | + rfi | ||
144 | + | ||
145 | +#define RFI_TO_USER \ | ||
146 | + rfi | ||
147 | + | ||
148 | #endif /* _ASM_POWERPC_EXCEPTION_64E_H */ | ||
149 | |||
150 | diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h | ||
151 | index 9a3eee661297..cab6d2a46c41 100644 | ||
152 | --- a/arch/powerpc/include/asm/exception-64s.h | ||
153 | +++ b/arch/powerpc/include/asm/exception-64s.h | ||
154 | @@ -51,6 +51,59 @@ | ||
155 | #define EX_PPR 88 /* SMT thread status register (priority) */ | ||
156 | #define EX_CTR 96 | ||
157 | |||
158 | +/* | ||
159 | + * Macros for annotating the expected destination of (h)rfid | ||
160 | + * | ||
161 | + * The nop instructions allow us to insert one or more instructions to flush the | ||
162 | + * L1-D cache when returning to userspace or a guest. | ||
163 | + */ | ||
164 | +#define RFI_FLUSH_SLOT \ | ||
165 | + RFI_FLUSH_FIXUP_SECTION; \ | ||
166 | + nop; \ | ||
167 | + nop; \ | ||
168 | + nop | ||
169 | + | ||
170 | +#define RFI_TO_KERNEL \ | ||
171 | + rfid | ||
172 | + | ||
173 | +#define RFI_TO_USER \ | ||
174 | + RFI_FLUSH_SLOT; \ | ||
175 | + rfid; \ | ||
176 | + b rfi_flush_fallback | ||
177 | + | ||
178 | +#define RFI_TO_USER_OR_KERNEL \ | ||
179 | + RFI_FLUSH_SLOT; \ | ||
180 | + rfid; \ | ||
181 | + b rfi_flush_fallback | ||
182 | + | ||
183 | +#define RFI_TO_GUEST \ | ||
184 | + RFI_FLUSH_SLOT; \ | ||
185 | + rfid; \ | ||
186 | + b rfi_flush_fallback | ||
187 | + | ||
188 | +#define HRFI_TO_KERNEL \ | ||
189 | + hrfid | ||
190 | + | ||
191 | +#define HRFI_TO_USER \ | ||
192 | + RFI_FLUSH_SLOT; \ | ||
193 | + hrfid; \ | ||
194 | + b hrfi_flush_fallback | ||
195 | + | ||
196 | +#define HRFI_TO_USER_OR_KERNEL \ | ||
197 | + RFI_FLUSH_SLOT; \ | ||
198 | + hrfid; \ | ||
199 | + b hrfi_flush_fallback | ||
200 | + | ||
201 | +#define HRFI_TO_GUEST \ | ||
202 | + RFI_FLUSH_SLOT; \ | ||
203 | + hrfid; \ | ||
204 | + b hrfi_flush_fallback | ||
205 | + | ||
206 | +#define HRFI_TO_UNKNOWN \ | ||
207 | + RFI_FLUSH_SLOT; \ | ||
208 | + hrfid; \ | ||
209 | + b hrfi_flush_fallback | ||
210 | + | ||
211 | #ifdef CONFIG_RELOCATABLE | ||
212 | #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ | ||
213 | mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ | ||
214 | diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h | ||
215 | index ddf54f5bbdd1..7b332342071c 100644 | ||
216 | --- a/arch/powerpc/include/asm/feature-fixups.h | ||
217 | +++ b/arch/powerpc/include/asm/feature-fixups.h | ||
218 | @@ -189,4 +189,19 @@ void apply_feature_fixups(void); | ||
219 | void setup_feature_keys(void); | ||
220 | #endif | ||
221 | |||
222 | +#define RFI_FLUSH_FIXUP_SECTION \ | ||
223 | +951: \ | ||
224 | + .pushsection __rfi_flush_fixup,"a"; \ | ||
225 | + .align 2; \ | ||
226 | +952: \ | ||
227 | + FTR_ENTRY_OFFSET 951b-952b; \ | ||
228 | + .popsection; | ||
229 | + | ||
230 | + | ||
231 | +#ifndef __ASSEMBLY__ | ||
232 | + | ||
233 | +extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; | ||
234 | + | ||
235 | +#endif | ||
236 | + | ||
237 | #endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */ | ||
238 | diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h | ||
239 | index 708edebcf147..0e12cb2437d1 100644 | ||
240 | --- a/arch/powerpc/include/asm/hvcall.h | ||
241 | +++ b/arch/powerpc/include/asm/hvcall.h | ||
242 | @@ -240,6 +240,7 @@ | ||
243 | #define H_GET_HCA_INFO 0x1B8 | ||
244 | #define H_GET_PERF_COUNT 0x1BC | ||
245 | #define H_MANAGE_TRACE 0x1C0 | ||
246 | +#define H_GET_CPU_CHARACTERISTICS 0x1C8 | ||
247 | #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4 | ||
248 | #define H_QUERY_INT_STATE 0x1E4 | ||
249 | #define H_POLL_PENDING 0x1D8 | ||
250 | @@ -306,6 +307,17 @@ | ||
251 | #define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3 | ||
252 | #define H_SET_MODE_RESOURCE_LE 4 | ||
253 | |||
254 | +/* H_GET_CPU_CHARACTERISTICS return values */ | ||
255 | +#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0 | ||
256 | +#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1 | ||
257 | +#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2 | ||
258 | +#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3 | ||
259 | +#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4 | ||
260 | + | ||
261 | +#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 | ||
262 | +#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 | ||
263 | +#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 | ||
264 | + | ||
265 | #ifndef __ASSEMBLY__ | ||
266 | |||
267 | /** | ||
268 | @@ -433,6 +445,11 @@ static inline unsigned long cmo_get_page_size(void) | ||
269 | } | ||
270 | #endif /* CONFIG_PPC_PSERIES */ | ||
271 | |||
272 | +struct h_cpu_char_result { | ||
273 | + u64 character; | ||
274 | + u64 behaviour; | ||
275 | +}; | ||
276 | + | ||
277 | #endif /* __ASSEMBLY__ */ | ||
278 | #endif /* __KERNEL__ */ | ||
279 | #endif /* _ASM_POWERPC_HVCALL_H */ | ||
280 | diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h | ||
281 | index 6a6792bb39fb..ea43897183fd 100644 | ||
282 | --- a/arch/powerpc/include/asm/paca.h | ||
283 | +++ b/arch/powerpc/include/asm/paca.h | ||
284 | @@ -205,6 +205,16 @@ struct paca_struct { | ||
285 | struct sibling_subcore_state *sibling_subcore_state; | ||
286 | #endif | ||
287 | #endif | ||
288 | +#ifdef CONFIG_PPC_BOOK3S_64 | ||
289 | + /* | ||
290 | + * rfi fallback flush must be in its own cacheline to prevent | ||
291 | + * other paca data leaking into the L1d | ||
292 | + */ | ||
293 | + u64 exrfi[13] __aligned(0x80); | ||
294 | + void *rfi_flush_fallback_area; | ||
295 | + u64 l1d_flush_congruence; | ||
296 | + u64 l1d_flush_sets; | ||
297 | +#endif | ||
298 | }; | ||
299 | |||
300 | #ifdef CONFIG_PPC_BOOK3S | ||
301 | diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h | ||
302 | index 1b394247afc2..4e53b8570d1f 100644 | ||
303 | --- a/arch/powerpc/include/asm/plpar_wrappers.h | ||
304 | +++ b/arch/powerpc/include/asm/plpar_wrappers.h | ||
305 | @@ -340,4 +340,18 @@ static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawr | ||
306 | return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0); | ||
307 | } | ||
308 | |||
309 | +static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) | ||
310 | +{ | ||
311 | + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; | ||
312 | + long rc; | ||
313 | + | ||
314 | + rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf); | ||
315 | + if (rc == H_SUCCESS) { | ||
316 | + p->character = retbuf[0]; | ||
317 | + p->behaviour = retbuf[1]; | ||
318 | + } | ||
319 | + | ||
320 | + return rc; | ||
321 | +} | ||
322 | + | ||
323 | #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ | ||
324 | diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h | ||
325 | index 654d64c9f3ac..6825a67cc3db 100644 | ||
326 | --- a/arch/powerpc/include/asm/setup.h | ||
327 | +++ b/arch/powerpc/include/asm/setup.h | ||
328 | @@ -38,6 +38,19 @@ static inline void pseries_big_endian_exceptions(void) {} | ||
329 | static inline void pseries_little_endian_exceptions(void) {} | ||
330 | #endif /* CONFIG_PPC_PSERIES */ | ||
331 | |||
332 | +void rfi_flush_enable(bool enable); | ||
333 | + | ||
334 | +/* These are bit flags */ | ||
335 | +enum l1d_flush_type { | ||
336 | + L1D_FLUSH_NONE = 0x1, | ||
337 | + L1D_FLUSH_FALLBACK = 0x2, | ||
338 | + L1D_FLUSH_ORI = 0x4, | ||
339 | + L1D_FLUSH_MTTRIG = 0x8, | ||
340 | +}; | ||
341 | + | ||
342 | +void __init setup_rfi_flush(enum l1d_flush_type, bool enable); | ||
343 | +void do_rfi_flush_fixups(enum l1d_flush_type types); | ||
344 | + | ||
345 | #endif /* !__ASSEMBLY__ */ | ||
346 | |||
347 | #endif /* _ASM_POWERPC_SETUP_H */ | ||
348 | diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c | ||
349 | index c833d88c423d..64bcbd580495 100644 | ||
350 | --- a/arch/powerpc/kernel/asm-offsets.c | ||
351 | +++ b/arch/powerpc/kernel/asm-offsets.c | ||
352 | @@ -240,6 +240,10 @@ int main(void) | ||
353 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
354 | DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp)); | ||
355 | DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce)); | ||
356 | + DEFINE(PACA_RFI_FLUSH_FALLBACK_AREA, offsetof(struct paca_struct, rfi_flush_fallback_area)); | ||
357 | + DEFINE(PACA_EXRFI, offsetof(struct paca_struct, exrfi)); | ||
358 | + DEFINE(PACA_L1D_FLUSH_CONGRUENCE, offsetof(struct paca_struct, l1d_flush_congruence)); | ||
359 | + DEFINE(PACA_L1D_FLUSH_SETS, offsetof(struct paca_struct, l1d_flush_sets)); | ||
360 | #endif | ||
361 | DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); | ||
362 | DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); | ||
363 | diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S | ||
364 | index caa659671599..c33b69d10919 100644 | ||
365 | --- a/arch/powerpc/kernel/entry_64.S | ||
366 | +++ b/arch/powerpc/kernel/entry_64.S | ||
367 | @@ -251,13 +251,23 @@ BEGIN_FTR_SECTION | ||
368 | END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) | ||
369 | |||
370 | ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ | ||
371 | + ld r2,GPR2(r1) | ||
372 | + ld r1,GPR1(r1) | ||
373 | + mtlr r4 | ||
374 | + mtcr r5 | ||
375 | + mtspr SPRN_SRR0,r7 | ||
376 | + mtspr SPRN_SRR1,r8 | ||
377 | + RFI_TO_USER | ||
378 | + b . /* prevent speculative execution */ | ||
379 | + | ||
380 | + /* exit to kernel */ | ||
381 | 1: ld r2,GPR2(r1) | ||
382 | ld r1,GPR1(r1) | ||
383 | mtlr r4 | ||
384 | mtcr r5 | ||
385 | mtspr SPRN_SRR0,r7 | ||
386 | mtspr SPRN_SRR1,r8 | ||
387 | - RFI | ||
388 | + RFI_TO_KERNEL | ||
389 | b . /* prevent speculative execution */ | ||
390 | |||
391 | syscall_error: | ||
392 | @@ -859,7 +869,7 @@ BEGIN_FTR_SECTION | ||
393 | END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) | ||
394 | ACCOUNT_CPU_USER_EXIT(r13, r2, r4) | ||
395 | REST_GPR(13, r1) | ||
396 | -1: | ||
397 | + | ||
398 | mtspr SPRN_SRR1,r3 | ||
399 | |||
400 | ld r2,_CCR(r1) | ||
401 | @@ -872,8 +882,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) | ||
402 | ld r3,GPR3(r1) | ||
403 | ld r4,GPR4(r1) | ||
404 | ld r1,GPR1(r1) | ||
405 | + RFI_TO_USER | ||
406 | + b . /* prevent speculative execution */ | ||
407 | |||
408 | - rfid | ||
409 | +1: mtspr SPRN_SRR1,r3 | ||
410 | + | ||
411 | + ld r2,_CCR(r1) | ||
412 | + mtcrf 0xFF,r2 | ||
413 | + ld r2,_NIP(r1) | ||
414 | + mtspr SPRN_SRR0,r2 | ||
415 | + | ||
416 | + ld r0,GPR0(r1) | ||
417 | + ld r2,GPR2(r1) | ||
418 | + ld r3,GPR3(r1) | ||
419 | + ld r4,GPR4(r1) | ||
420 | + ld r1,GPR1(r1) | ||
421 | + RFI_TO_KERNEL | ||
422 | b . /* prevent speculative execution */ | ||
423 | |||
424 | #endif /* CONFIG_PPC_BOOK3E */ | ||
425 | diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S | ||
426 | index fd68e19b9ef7..96db6c3adebe 100644 | ||
427 | --- a/arch/powerpc/kernel/exceptions-64s.S | ||
428 | +++ b/arch/powerpc/kernel/exceptions-64s.S | ||
429 | @@ -655,6 +655,8 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) | ||
430 | |||
431 | andi. r10,r12,MSR_RI /* check for unrecoverable exception */ | ||
432 | beq- 2f | ||
433 | + andi. r10,r12,MSR_PR /* check for user mode (PR != 0) */ | ||
434 | + bne 1f | ||
435 | |||
436 | /* All done -- return from exception. */ | ||
437 | |||
438 | @@ -671,7 +673,23 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) | ||
439 | ld r11,PACA_EXSLB+EX_R11(r13) | ||
440 | ld r12,PACA_EXSLB+EX_R12(r13) | ||
441 | ld r13,PACA_EXSLB+EX_R13(r13) | ||
442 | - rfid | ||
443 | + RFI_TO_KERNEL | ||
444 | + b . /* prevent speculative execution */ | ||
445 | + | ||
446 | +1: | ||
447 | +.machine push | ||
448 | +.machine "power4" | ||
449 | + mtcrf 0x80,r9 | ||
450 | + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ | ||
451 | +.machine pop | ||
452 | + | ||
453 | + RESTORE_PPR_PACA(PACA_EXSLB, r9) | ||
454 | + ld r9,PACA_EXSLB+EX_R9(r13) | ||
455 | + ld r10,PACA_EXSLB+EX_R10(r13) | ||
456 | + ld r11,PACA_EXSLB+EX_R11(r13) | ||
457 | + ld r12,PACA_EXSLB+EX_R12(r13) | ||
458 | + ld r13,PACA_EXSLB+EX_R13(r13) | ||
459 | + RFI_TO_USER | ||
460 | b . /* prevent speculative execution */ | ||
461 | |||
462 | 2: mfspr r11,SPRN_SRR0 | ||
463 | @@ -679,7 +697,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) | ||
464 | mtspr SPRN_SRR0,r10 | ||
465 | ld r10,PACAKMSR(r13) | ||
466 | mtspr SPRN_SRR1,r10 | ||
467 | - rfid | ||
468 | + RFI_TO_KERNEL | ||
469 | b . | ||
470 | |||
471 | 8: mfspr r11,SPRN_SRR0 | ||
472 | @@ -1576,6 +1594,92 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) | ||
473 | bl kernel_bad_stack | ||
474 | b 1b | ||
475 | |||
476 | + .globl rfi_flush_fallback | ||
477 | +rfi_flush_fallback: | ||
478 | + SET_SCRATCH0(r13); | ||
479 | + GET_PACA(r13); | ||
480 | + std r9,PACA_EXRFI+EX_R9(r13) | ||
481 | + std r10,PACA_EXRFI+EX_R10(r13) | ||
482 | + std r11,PACA_EXRFI+EX_R11(r13) | ||
483 | + std r12,PACA_EXRFI+EX_R12(r13) | ||
484 | + std r8,PACA_EXRFI+EX_R13(r13) | ||
485 | + mfctr r9 | ||
486 | + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) | ||
487 | + ld r11,PACA_L1D_FLUSH_SETS(r13) | ||
488 | + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) | ||
489 | + /* | ||
490 | + * The load adresses are at staggered offsets within cachelines, | ||
491 | + * which suits some pipelines better (on others it should not | ||
492 | + * hurt). | ||
493 | + */ | ||
494 | + addi r12,r12,8 | ||
495 | + mtctr r11 | ||
496 | + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ | ||
497 | + | ||
498 | + /* order ld/st prior to dcbt stop all streams with flushing */ | ||
499 | + sync | ||
500 | +1: li r8,0 | ||
501 | + .rept 8 /* 8-way set associative */ | ||
502 | + ldx r11,r10,r8 | ||
503 | + add r8,r8,r12 | ||
504 | + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not | ||
505 | + add r8,r8,r11 // Add 0, this creates a dependency on the ldx | ||
506 | + .endr | ||
507 | + addi r10,r10,128 /* 128 byte cache line */ | ||
508 | + bdnz 1b | ||
509 | + | ||
510 | + mtctr r9 | ||
511 | + ld r9,PACA_EXRFI+EX_R9(r13) | ||
512 | + ld r10,PACA_EXRFI+EX_R10(r13) | ||
513 | + ld r11,PACA_EXRFI+EX_R11(r13) | ||
514 | + ld r12,PACA_EXRFI+EX_R12(r13) | ||
515 | + ld r8,PACA_EXRFI+EX_R13(r13) | ||
516 | + GET_SCRATCH0(r13); | ||
517 | + rfid | ||
518 | + | ||
519 | + .globl hrfi_flush_fallback | ||
520 | +hrfi_flush_fallback: | ||
521 | + SET_SCRATCH0(r13); | ||
522 | + GET_PACA(r13); | ||
523 | + std r9,PACA_EXRFI+EX_R9(r13) | ||
524 | + std r10,PACA_EXRFI+EX_R10(r13) | ||
525 | + std r11,PACA_EXRFI+EX_R11(r13) | ||
526 | + std r12,PACA_EXRFI+EX_R12(r13) | ||
527 | + std r8,PACA_EXRFI+EX_R13(r13) | ||
528 | + mfctr r9 | ||
529 | + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) | ||
530 | + ld r11,PACA_L1D_FLUSH_SETS(r13) | ||
531 | + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) | ||
532 | + /* | ||
533 | + * The load adresses are at staggered offsets within cachelines, | ||
534 | + * which suits some pipelines better (on others it should not | ||
535 | + * hurt). | ||
536 | + */ | ||
537 | + addi r12,r12,8 | ||
538 | + mtctr r11 | ||
539 | + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ | ||
540 | + | ||
541 | + /* order ld/st prior to dcbt stop all streams with flushing */ | ||
542 | + sync | ||
543 | +1: li r8,0 | ||
544 | + .rept 8 /* 8-way set associative */ | ||
545 | + ldx r11,r10,r8 | ||
546 | + add r8,r8,r12 | ||
547 | + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not | ||
548 | + add r8,r8,r11 // Add 0, this creates a dependency on the ldx | ||
549 | + .endr | ||
550 | + addi r10,r10,128 /* 128 byte cache line */ | ||
551 | + bdnz 1b | ||
552 | + | ||
553 | + mtctr r9 | ||
554 | + ld r9,PACA_EXRFI+EX_R9(r13) | ||
555 | + ld r10,PACA_EXRFI+EX_R10(r13) | ||
556 | + ld r11,PACA_EXRFI+EX_R11(r13) | ||
557 | + ld r12,PACA_EXRFI+EX_R12(r13) | ||
558 | + ld r8,PACA_EXRFI+EX_R13(r13) | ||
559 | + GET_SCRATCH0(r13); | ||
560 | + hrfid | ||
561 | + | ||
562 | /* | ||
563 | * Called from arch_local_irq_enable when an interrupt needs | ||
564 | * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate | ||
565 | diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c | ||
566 | index a12be60181bf..7c30a91c1f86 100644 | ||
567 | --- a/arch/powerpc/kernel/setup_64.c | ||
568 | +++ b/arch/powerpc/kernel/setup_64.c | ||
569 | @@ -37,6 +37,7 @@ | ||
570 | #include <linux/memblock.h> | ||
571 | #include <linux/memory.h> | ||
572 | #include <linux/nmi.h> | ||
573 | +#include <linux/debugfs.h> | ||
574 | |||
575 | #include <asm/io.h> | ||
576 | #include <asm/kdump.h> | ||
577 | @@ -678,4 +679,142 @@ static int __init disable_hardlockup_detector(void) | ||
578 | return 0; | ||
579 | } | ||
580 | early_initcall(disable_hardlockup_detector); | ||
581 | + | ||
582 | +#ifdef CONFIG_PPC_BOOK3S_64 | ||
583 | +static enum l1d_flush_type enabled_flush_types; | ||
584 | +static void *l1d_flush_fallback_area; | ||
585 | +static bool no_rfi_flush; | ||
586 | +bool rfi_flush; | ||
587 | + | ||
588 | +static int __init handle_no_rfi_flush(char *p) | ||
589 | +{ | ||
590 | + pr_info("rfi-flush: disabled on command line."); | ||
591 | + no_rfi_flush = true; | ||
592 | + return 0; | ||
593 | +} | ||
594 | +early_param("no_rfi_flush", handle_no_rfi_flush); | ||
595 | + | ||
596 | +/* | ||
597 | + * The RFI flush is not KPTI, but because users will see doco that says to use | ||
598 | + * nopti we hijack that option here to also disable the RFI flush. | ||
599 | + */ | ||
600 | +static int __init handle_no_pti(char *p) | ||
601 | +{ | ||
602 | + pr_info("rfi-flush: disabling due to 'nopti' on command line.\n"); | ||
603 | + handle_no_rfi_flush(NULL); | ||
604 | + return 0; | ||
605 | +} | ||
606 | +early_param("nopti", handle_no_pti); | ||
607 | + | ||
608 | +static void do_nothing(void *unused) | ||
609 | +{ | ||
610 | + /* | ||
611 | + * We don't need to do the flush explicitly, just enter+exit kernel is | ||
612 | + * sufficient, the RFI exit handlers will do the right thing. | ||
613 | + */ | ||
614 | +} | ||
615 | + | ||
616 | +void rfi_flush_enable(bool enable) | ||
617 | +{ | ||
618 | + if (rfi_flush == enable) | ||
619 | + return; | ||
620 | + | ||
621 | + if (enable) { | ||
622 | + do_rfi_flush_fixups(enabled_flush_types); | ||
623 | + on_each_cpu(do_nothing, NULL, 1); | ||
624 | + } else | ||
625 | + do_rfi_flush_fixups(L1D_FLUSH_NONE); | ||
626 | + | ||
627 | + rfi_flush = enable; | ||
628 | +} | ||
629 | + | ||
630 | +static void init_fallback_flush(void) | ||
631 | +{ | ||
632 | + u64 l1d_size, limit; | ||
633 | + int cpu; | ||
634 | + | ||
635 | + l1d_size = ppc64_caches.dsize; | ||
636 | + limit = min(safe_stack_limit(), ppc64_rma_size); | ||
637 | + | ||
638 | + /* | ||
639 | + * Align to L1d size, and size it at 2x L1d size, to catch possible | ||
640 | + * hardware prefetch runoff. We don't have a recipe for load patterns to | ||
641 | + * reliably avoid the prefetcher. | ||
642 | + */ | ||
643 | + l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit)); | ||
644 | + memset(l1d_flush_fallback_area, 0, l1d_size * 2); | ||
645 | + | ||
646 | + for_each_possible_cpu(cpu) { | ||
647 | + /* | ||
648 | + * The fallback flush is currently coded for 8-way | ||
649 | + * associativity. Different associativity is possible, but it | ||
650 | + * will be treated as 8-way and may not evict the lines as | ||
651 | + * effectively. | ||
652 | + * | ||
653 | + * 128 byte lines are mandatory. | ||
654 | + */ | ||
655 | + u64 c = l1d_size / 8; | ||
656 | + | ||
657 | + paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area; | ||
658 | + paca[cpu].l1d_flush_congruence = c; | ||
659 | + paca[cpu].l1d_flush_sets = c / 128; | ||
660 | + } | ||
661 | +} | ||
662 | + | ||
663 | +void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) | ||
664 | +{ | ||
665 | + if (types & L1D_FLUSH_FALLBACK) { | ||
666 | + pr_info("rfi-flush: Using fallback displacement flush\n"); | ||
667 | + init_fallback_flush(); | ||
668 | + } | ||
669 | + | ||
670 | + if (types & L1D_FLUSH_ORI) | ||
671 | + pr_info("rfi-flush: Using ori type flush\n"); | ||
672 | + | ||
673 | + if (types & L1D_FLUSH_MTTRIG) | ||
674 | + pr_info("rfi-flush: Using mttrig type flush\n"); | ||
675 | + | ||
676 | + enabled_flush_types = types; | ||
677 | + | ||
678 | + if (!no_rfi_flush) | ||
679 | + rfi_flush_enable(enable); | ||
680 | +} | ||
681 | + | ||
682 | +#ifdef CONFIG_DEBUG_FS | ||
683 | +static int rfi_flush_set(void *data, u64 val) | ||
684 | +{ | ||
685 | + if (val == 1) | ||
686 | + rfi_flush_enable(true); | ||
687 | + else if (val == 0) | ||
688 | + rfi_flush_enable(false); | ||
689 | + else | ||
690 | + return -EINVAL; | ||
691 | + | ||
692 | + return 0; | ||
693 | +} | ||
694 | + | ||
695 | +static int rfi_flush_get(void *data, u64 *val) | ||
696 | +{ | ||
697 | + *val = rfi_flush ? 1 : 0; | ||
698 | + return 0; | ||
699 | +} | ||
700 | + | ||
701 | +DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); | ||
702 | + | ||
703 | +static __init int rfi_flush_debugfs_init(void) | ||
704 | +{ | ||
705 | + debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); | ||
706 | + return 0; | ||
707 | +} | ||
708 | +device_initcall(rfi_flush_debugfs_init); | ||
709 | +#endif | ||
710 | + | ||
711 | +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) | ||
712 | +{ | ||
713 | + if (rfi_flush) | ||
714 | + return sprintf(buf, "Mitigation: RFI Flush\n"); | ||
715 | + | ||
716 | + return sprintf(buf, "Vulnerable\n"); | ||
717 | +} | ||
718 | +#endif /* CONFIG_PPC_BOOK3S_64 */ | ||
719 | #endif | ||
720 | diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S | ||
721 | index 7394b770ae1f..b61fb7902018 100644 | ||
722 | --- a/arch/powerpc/kernel/vmlinux.lds.S | ||
723 | +++ b/arch/powerpc/kernel/vmlinux.lds.S | ||
724 | @@ -132,6 +132,15 @@ SECTIONS | ||
725 | /* Read-only data */ | ||
726 | RODATA | ||
727 | |||
728 | +#ifdef CONFIG_PPC64 | ||
729 | + . = ALIGN(8); | ||
730 | + __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) { | ||
731 | + __start___rfi_flush_fixup = .; | ||
732 | + *(__rfi_flush_fixup) | ||
733 | + __stop___rfi_flush_fixup = .; | ||
734 | + } | ||
735 | +#endif | ||
736 | + | ||
737 | EXCEPTION_TABLE(0) | ||
738 | |||
739 | NOTES :kernel :notes | ||
740 | diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c | ||
741 | index 043415f0bdb1..e86bfa111f3c 100644 | ||
742 | --- a/arch/powerpc/lib/feature-fixups.c | ||
743 | +++ b/arch/powerpc/lib/feature-fixups.c | ||
744 | @@ -23,6 +23,7 @@ | ||
745 | #include <asm/sections.h> | ||
746 | #include <asm/setup.h> | ||
747 | #include <asm/firmware.h> | ||
748 | +#include <asm/setup.h> | ||
749 | |||
750 | struct fixup_entry { | ||
751 | unsigned long mask; | ||
752 | @@ -115,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) | ||
753 | } | ||
754 | } | ||
755 | |||
756 | +#ifdef CONFIG_PPC_BOOK3S_64 | ||
757 | +void do_rfi_flush_fixups(enum l1d_flush_type types) | ||
758 | +{ | ||
759 | + unsigned int instrs[3], *dest; | ||
760 | + long *start, *end; | ||
761 | + int i; | ||
762 | + | ||
763 | + start = PTRRELOC(&__start___rfi_flush_fixup), | ||
764 | + end = PTRRELOC(&__stop___rfi_flush_fixup); | ||
765 | + | ||
766 | + instrs[0] = 0x60000000; /* nop */ | ||
767 | + instrs[1] = 0x60000000; /* nop */ | ||
768 | + instrs[2] = 0x60000000; /* nop */ | ||
769 | + | ||
770 | + if (types & L1D_FLUSH_FALLBACK) | ||
771 | + /* b .+16 to fallback flush */ | ||
772 | + instrs[0] = 0x48000010; | ||
773 | + | ||
774 | + i = 0; | ||
775 | + if (types & L1D_FLUSH_ORI) { | ||
776 | + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ | ||
777 | + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ | ||
778 | + } | ||
779 | + | ||
780 | + if (types & L1D_FLUSH_MTTRIG) | ||
781 | + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ | ||
782 | + | ||
783 | + for (i = 0; start < end; start++, i++) { | ||
784 | + dest = (void *)start + *start; | ||
785 | + | ||
786 | + pr_devel("patching dest %lx\n", (unsigned long)dest); | ||
787 | + | ||
788 | + patch_instruction(dest, instrs[0]); | ||
789 | + patch_instruction(dest + 1, instrs[1]); | ||
790 | + patch_instruction(dest + 2, instrs[2]); | ||
791 | + } | ||
792 | + | ||
793 | + printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i); | ||
794 | +} | ||
795 | +#endif /* CONFIG_PPC_BOOK3S_64 */ | ||
796 | + | ||
797 | void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) | ||
798 | { | ||
799 | long *start, *end; | ||
800 | diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c | ||
801 | index b33faa0015cc..6f8b4c19373a 100644 | ||
802 | --- a/arch/powerpc/platforms/powernv/setup.c | ||
803 | +++ b/arch/powerpc/platforms/powernv/setup.c | ||
804 | @@ -35,13 +35,63 @@ | ||
805 | #include <asm/opal.h> | ||
806 | #include <asm/kexec.h> | ||
807 | #include <asm/smp.h> | ||
808 | +#include <asm/tm.h> | ||
809 | +#include <asm/setup.h> | ||
810 | |||
811 | #include "powernv.h" | ||
812 | |||
813 | +static void pnv_setup_rfi_flush(void) | ||
814 | +{ | ||
815 | + struct device_node *np, *fw_features; | ||
816 | + enum l1d_flush_type type; | ||
817 | + int enable; | ||
818 | + | ||
819 | + /* Default to fallback in case fw-features are not available */ | ||
820 | + type = L1D_FLUSH_FALLBACK; | ||
821 | + enable = 1; | ||
822 | + | ||
823 | + np = of_find_node_by_name(NULL, "ibm,opal"); | ||
824 | + fw_features = of_get_child_by_name(np, "fw-features"); | ||
825 | + of_node_put(np); | ||
826 | + | ||
827 | + if (fw_features) { | ||
828 | + np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2"); | ||
829 | + if (np && of_property_read_bool(np, "enabled")) | ||
830 | + type = L1D_FLUSH_MTTRIG; | ||
831 | + | ||
832 | + of_node_put(np); | ||
833 | + | ||
834 | + np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0"); | ||
835 | + if (np && of_property_read_bool(np, "enabled")) | ||
836 | + type = L1D_FLUSH_ORI; | ||
837 | + | ||
838 | + of_node_put(np); | ||
839 | + | ||
840 | + /* Enable unless firmware says NOT to */ | ||
841 | + enable = 2; | ||
842 | + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0"); | ||
843 | + if (np && of_property_read_bool(np, "disabled")) | ||
844 | + enable--; | ||
845 | + | ||
846 | + of_node_put(np); | ||
847 | + | ||
848 | + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1"); | ||
849 | + if (np && of_property_read_bool(np, "disabled")) | ||
850 | + enable--; | ||
851 | + | ||
852 | + of_node_put(np); | ||
853 | + of_node_put(fw_features); | ||
854 | + } | ||
855 | + | ||
856 | + setup_rfi_flush(type, enable > 0); | ||
857 | +} | ||
858 | + | ||
859 | static void __init pnv_setup_arch(void) | ||
860 | { | ||
861 | set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); | ||
862 | |||
863 | + pnv_setup_rfi_flush(); | ||
864 | + | ||
865 | /* Initialize SMP */ | ||
866 | pnv_smp_init(); | ||
867 | |||
868 | diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c | ||
869 | index 97aa3f332f24..1845fc611912 100644 | ||
870 | --- a/arch/powerpc/platforms/pseries/setup.c | ||
871 | +++ b/arch/powerpc/platforms/pseries/setup.c | ||
872 | @@ -450,6 +450,39 @@ static void __init find_and_init_phbs(void) | ||
873 | of_pci_check_probe_only(); | ||
874 | } | ||
875 | |||
876 | +static void pseries_setup_rfi_flush(void) | ||
877 | +{ | ||
878 | + struct h_cpu_char_result result; | ||
879 | + enum l1d_flush_type types; | ||
880 | + bool enable; | ||
881 | + long rc; | ||
882 | + | ||
883 | + /* Enable by default */ | ||
884 | + enable = true; | ||
885 | + | ||
886 | + rc = plpar_get_cpu_characteristics(&result); | ||
887 | + if (rc == H_SUCCESS) { | ||
888 | + types = L1D_FLUSH_NONE; | ||
889 | + | ||
890 | + if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2) | ||
891 | + types |= L1D_FLUSH_MTTRIG; | ||
892 | + if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30) | ||
893 | + types |= L1D_FLUSH_ORI; | ||
894 | + | ||
895 | + /* Use fallback if nothing set in hcall */ | ||
896 | + if (types == L1D_FLUSH_NONE) | ||
897 | + types = L1D_FLUSH_FALLBACK; | ||
898 | + | ||
899 | + if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) | ||
900 | + enable = false; | ||
901 | + } else { | ||
902 | + /* Default to fallback if case hcall is not available */ | ||
903 | + types = L1D_FLUSH_FALLBACK; | ||
904 | + } | ||
905 | + | ||
906 | + setup_rfi_flush(types, enable); | ||
907 | +} | ||
908 | + | ||
909 | static void __init pSeries_setup_arch(void) | ||
910 | { | ||
911 | set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); | ||
912 | @@ -467,6 +500,8 @@ static void __init pSeries_setup_arch(void) | ||
913 | |||
914 | fwnmi_init(); | ||
915 | |||
916 | + pseries_setup_rfi_flush(); | ||
917 | + | ||
918 | /* By default, only probe PCI (can be overridden by rtas_pci) */ | ||
919 | pci_add_flags(PCI_PROBE_ONLY); | ||
920 | |||
921 | diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c | ||
922 | index bdd9cc59d20f..b0cd306dc527 100644 | ||
923 | --- a/arch/x86/entry/common.c | ||
924 | +++ b/arch/x86/entry/common.c | ||
925 | @@ -20,6 +20,7 @@ | ||
926 | #include <linux/export.h> | ||
927 | #include <linux/context_tracking.h> | ||
928 | #include <linux/user-return-notifier.h> | ||
929 | +#include <linux/nospec.h> | ||
930 | #include <linux/uprobes.h> | ||
931 | |||
932 | #include <asm/desc.h> | ||
933 | @@ -201,7 +202,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) | ||
934 | * special case only applies after poking regs and before the | ||
935 | * very next return to user mode. | ||
936 | */ | ||
937 | - current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED); | ||
938 | + ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); | ||
939 | #endif | ||
940 | |||
941 | user_enter_irqoff(); | ||
942 | @@ -277,7 +278,8 @@ __visible void do_syscall_64(struct pt_regs *regs) | ||
943 | * regs->orig_ax, which changes the behavior of some syscalls. | ||
944 | */ | ||
945 | if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) { | ||
946 | - regs->ax = sys_call_table[nr & __SYSCALL_MASK]( | ||
947 | + nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls); | ||
948 | + regs->ax = sys_call_table[nr]( | ||
949 | regs->di, regs->si, regs->dx, | ||
950 | regs->r10, regs->r8, regs->r9); | ||
951 | } | ||
952 | @@ -299,7 +301,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) | ||
953 | unsigned int nr = (unsigned int)regs->orig_ax; | ||
954 | |||
955 | #ifdef CONFIG_IA32_EMULATION | ||
956 | - current->thread.status |= TS_COMPAT; | ||
957 | + ti->status |= TS_COMPAT; | ||
958 | #endif | ||
959 | |||
960 | if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { | ||
961 | @@ -313,6 +315,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) | ||
962 | } | ||
963 | |||
964 | if (likely(nr < IA32_NR_syscalls)) { | ||
965 | + nr = array_index_nospec(nr, IA32_NR_syscalls); | ||
966 | /* | ||
967 | * It's possible that a 32-bit syscall implementation | ||
968 | * takes a 64-bit parameter but nonetheless assumes that | ||
969 | diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S | ||
970 | index a76dc738ec61..f5434b4670c1 100644 | ||
971 | --- a/arch/x86/entry/entry_32.S | ||
972 | +++ b/arch/x86/entry/entry_32.S | ||
973 | @@ -237,7 +237,8 @@ ENTRY(__switch_to_asm) | ||
974 | * exist, overwrite the RSB with entries which capture | ||
975 | * speculative execution to prevent attack. | ||
976 | */ | ||
977 | - FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW | ||
978 | + /* Clobbers %ebx */ | ||
979 | + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW | ||
980 | #endif | ||
981 | |||
982 | /* restore callee-saved registers */ | ||
983 | diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S | ||
984 | index e729e1528584..db5009ce065a 100644 | ||
985 | --- a/arch/x86/entry/entry_64.S | ||
986 | +++ b/arch/x86/entry/entry_64.S | ||
987 | @@ -177,96 +177,17 @@ GLOBAL(entry_SYSCALL_64_after_swapgs) | ||
988 | pushq %r9 /* pt_regs->r9 */ | ||
989 | pushq %r10 /* pt_regs->r10 */ | ||
990 | pushq %r11 /* pt_regs->r11 */ | ||
991 | - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ | ||
992 | + pushq %rbx /* pt_regs->rbx */ | ||
993 | + pushq %rbp /* pt_regs->rbp */ | ||
994 | + pushq %r12 /* pt_regs->r12 */ | ||
995 | + pushq %r13 /* pt_regs->r13 */ | ||
996 | + pushq %r14 /* pt_regs->r14 */ | ||
997 | + pushq %r15 /* pt_regs->r15 */ | ||
998 | |||
999 | - /* | ||
1000 | - * If we need to do entry work or if we guess we'll need to do | ||
1001 | - * exit work, go straight to the slow path. | ||
1002 | - */ | ||
1003 | - movq PER_CPU_VAR(current_task), %r11 | ||
1004 | - testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) | ||
1005 | - jnz entry_SYSCALL64_slow_path | ||
1006 | - | ||
1007 | -entry_SYSCALL_64_fastpath: | ||
1008 | - /* | ||
1009 | - * Easy case: enable interrupts and issue the syscall. If the syscall | ||
1010 | - * needs pt_regs, we'll call a stub that disables interrupts again | ||
1011 | - * and jumps to the slow path. | ||
1012 | - */ | ||
1013 | - TRACE_IRQS_ON | ||
1014 | - ENABLE_INTERRUPTS(CLBR_NONE) | ||
1015 | -#if __SYSCALL_MASK == ~0 | ||
1016 | - cmpq $__NR_syscall_max, %rax | ||
1017 | -#else | ||
1018 | - andl $__SYSCALL_MASK, %eax | ||
1019 | - cmpl $__NR_syscall_max, %eax | ||
1020 | -#endif | ||
1021 | - ja 1f /* return -ENOSYS (already in pt_regs->ax) */ | ||
1022 | - movq %r10, %rcx | ||
1023 | - | ||
1024 | - /* | ||
1025 | - * This call instruction is handled specially in stub_ptregs_64. | ||
1026 | - * It might end up jumping to the slow path. If it jumps, RAX | ||
1027 | - * and all argument registers are clobbered. | ||
1028 | - */ | ||
1029 | -#ifdef CONFIG_RETPOLINE | ||
1030 | - movq sys_call_table(, %rax, 8), %rax | ||
1031 | - call __x86_indirect_thunk_rax | ||
1032 | -#else | ||
1033 | - call *sys_call_table(, %rax, 8) | ||
1034 | -#endif | ||
1035 | -.Lentry_SYSCALL_64_after_fastpath_call: | ||
1036 | - | ||
1037 | - movq %rax, RAX(%rsp) | ||
1038 | -1: | ||
1039 | - | ||
1040 | - /* | ||
1041 | - * If we get here, then we know that pt_regs is clean for SYSRET64. | ||
1042 | - * If we see that no exit work is required (which we are required | ||
1043 | - * to check with IRQs off), then we can go straight to SYSRET64. | ||
1044 | - */ | ||
1045 | - DISABLE_INTERRUPTS(CLBR_NONE) | ||
1046 | - TRACE_IRQS_OFF | ||
1047 | - movq PER_CPU_VAR(current_task), %r11 | ||
1048 | - testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11) | ||
1049 | - jnz 1f | ||
1050 | - | ||
1051 | - LOCKDEP_SYS_EXIT | ||
1052 | - TRACE_IRQS_ON /* user mode is traced as IRQs on */ | ||
1053 | - movq RIP(%rsp), %rcx | ||
1054 | - movq EFLAGS(%rsp), %r11 | ||
1055 | - RESTORE_C_REGS_EXCEPT_RCX_R11 | ||
1056 | - /* | ||
1057 | - * This opens a window where we have a user CR3, but are | ||
1058 | - * running in the kernel. This makes using the CS | ||
1059 | - * register useless for telling whether or not we need to | ||
1060 | - * switch CR3 in NMIs. Normal interrupts are OK because | ||
1061 | - * they are off here. | ||
1062 | - */ | ||
1063 | - SWITCH_USER_CR3 | ||
1064 | - movq RSP(%rsp), %rsp | ||
1065 | - USERGS_SYSRET64 | ||
1066 | - | ||
1067 | -1: | ||
1068 | - /* | ||
1069 | - * The fast path looked good when we started, but something changed | ||
1070 | - * along the way and we need to switch to the slow path. Calling | ||
1071 | - * raise(3) will trigger this, for example. IRQs are off. | ||
1072 | - */ | ||
1073 | - TRACE_IRQS_ON | ||
1074 | - ENABLE_INTERRUPTS(CLBR_NONE) | ||
1075 | - SAVE_EXTRA_REGS | ||
1076 | - movq %rsp, %rdi | ||
1077 | - call syscall_return_slowpath /* returns with IRQs disabled */ | ||
1078 | - jmp return_from_SYSCALL_64 | ||
1079 | - | ||
1080 | -entry_SYSCALL64_slow_path: | ||
1081 | /* IRQs are off. */ | ||
1082 | - SAVE_EXTRA_REGS | ||
1083 | movq %rsp, %rdi | ||
1084 | call do_syscall_64 /* returns with IRQs disabled */ | ||
1085 | |||
1086 | -return_from_SYSCALL_64: | ||
1087 | RESTORE_EXTRA_REGS | ||
1088 | TRACE_IRQS_IRETQ /* we're about to change IF */ | ||
1089 | |||
1090 | @@ -339,6 +260,7 @@ return_from_SYSCALL_64: | ||
1091 | syscall_return_via_sysret: | ||
1092 | /* rcx and r11 are already restored (see code above) */ | ||
1093 | RESTORE_C_REGS_EXCEPT_RCX_R11 | ||
1094 | + | ||
1095 | /* | ||
1096 | * This opens a window where we have a user CR3, but are | ||
1097 | * running in the kernel. This makes using the CS | ||
1098 | @@ -363,45 +285,6 @@ opportunistic_sysret_failed: | ||
1099 | jmp restore_c_regs_and_iret | ||
1100 | END(entry_SYSCALL_64) | ||
1101 | |||
1102 | -ENTRY(stub_ptregs_64) | ||
1103 | - /* | ||
1104 | - * Syscalls marked as needing ptregs land here. | ||
1105 | - * If we are on the fast path, we need to save the extra regs, | ||
1106 | - * which we achieve by trying again on the slow path. If we are on | ||
1107 | - * the slow path, the extra regs are already saved. | ||
1108 | - * | ||
1109 | - * RAX stores a pointer to the C function implementing the syscall. | ||
1110 | - * IRQs are on. | ||
1111 | - */ | ||
1112 | - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp) | ||
1113 | - jne 1f | ||
1114 | - | ||
1115 | - /* | ||
1116 | - * Called from fast path -- disable IRQs again, pop return address | ||
1117 | - * and jump to slow path | ||
1118 | - */ | ||
1119 | - DISABLE_INTERRUPTS(CLBR_NONE) | ||
1120 | - TRACE_IRQS_OFF | ||
1121 | - popq %rax | ||
1122 | - jmp entry_SYSCALL64_slow_path | ||
1123 | - | ||
1124 | -1: | ||
1125 | - JMP_NOSPEC %rax /* Called from C */ | ||
1126 | -END(stub_ptregs_64) | ||
1127 | - | ||
1128 | -.macro ptregs_stub func | ||
1129 | -ENTRY(ptregs_\func) | ||
1130 | - leaq \func(%rip), %rax | ||
1131 | - jmp stub_ptregs_64 | ||
1132 | -END(ptregs_\func) | ||
1133 | -.endm | ||
1134 | - | ||
1135 | -/* Instantiate ptregs_stub for each ptregs-using syscall */ | ||
1136 | -#define __SYSCALL_64_QUAL_(sym) | ||
1137 | -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym | ||
1138 | -#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym) | ||
1139 | -#include <asm/syscalls_64.h> | ||
1140 | - | ||
1141 | /* | ||
1142 | * %rdi: prev task | ||
1143 | * %rsi: next task | ||
1144 | @@ -435,7 +318,8 @@ ENTRY(__switch_to_asm) | ||
1145 | * exist, overwrite the RSB with entries which capture | ||
1146 | * speculative execution to prevent attack. | ||
1147 | */ | ||
1148 | - FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW | ||
1149 | + /* Clobbers %rbx */ | ||
1150 | + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW | ||
1151 | #endif | ||
1152 | |||
1153 | /* restore callee-saved registers */ | ||
1154 | diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c | ||
1155 | index 9dbc5abb6162..6705edda4ac3 100644 | ||
1156 | --- a/arch/x86/entry/syscall_64.c | ||
1157 | +++ b/arch/x86/entry/syscall_64.c | ||
1158 | @@ -6,14 +6,11 @@ | ||
1159 | #include <asm/asm-offsets.h> | ||
1160 | #include <asm/syscall.h> | ||
1161 | |||
1162 | -#define __SYSCALL_64_QUAL_(sym) sym | ||
1163 | -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym | ||
1164 | - | ||
1165 | -#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); | ||
1166 | +#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); | ||
1167 | #include <asm/syscalls_64.h> | ||
1168 | #undef __SYSCALL_64 | ||
1169 | |||
1170 | -#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym), | ||
1171 | +#define __SYSCALL_64(nr, sym, qual) [nr] = sym, | ||
1172 | |||
1173 | extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); | ||
1174 | |||
1175 | diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c | ||
1176 | index 982c9e31daca..21298c173b0e 100644 | ||
1177 | --- a/arch/x86/events/intel/bts.c | ||
1178 | +++ b/arch/x86/events/intel/bts.c | ||
1179 | @@ -22,6 +22,7 @@ | ||
1180 | #include <linux/debugfs.h> | ||
1181 | #include <linux/device.h> | ||
1182 | #include <linux/coredump.h> | ||
1183 | +#include <linux/kaiser.h> | ||
1184 | |||
1185 | #include <asm-generic/sizes.h> | ||
1186 | #include <asm/perf_event.h> | ||
1187 | @@ -77,6 +78,23 @@ static size_t buf_size(struct page *page) | ||
1188 | return 1 << (PAGE_SHIFT + page_private(page)); | ||
1189 | } | ||
1190 | |||
1191 | +static void bts_buffer_free_aux(void *data) | ||
1192 | +{ | ||
1193 | +#ifdef CONFIG_PAGE_TABLE_ISOLATION | ||
1194 | + struct bts_buffer *buf = data; | ||
1195 | + int nbuf; | ||
1196 | + | ||
1197 | + for (nbuf = 0; nbuf < buf->nr_bufs; nbuf++) { | ||
1198 | + struct page *page = buf->buf[nbuf].page; | ||
1199 | + void *kaddr = page_address(page); | ||
1200 | + size_t page_size = buf_size(page); | ||
1201 | + | ||
1202 | + kaiser_remove_mapping((unsigned long)kaddr, page_size); | ||
1203 | + } | ||
1204 | +#endif | ||
1205 | + kfree(data); | ||
1206 | +} | ||
1207 | + | ||
1208 | static void * | ||
1209 | bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) | ||
1210 | { | ||
1211 | @@ -113,29 +131,33 @@ bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) | ||
1212 | buf->real_size = size - size % BTS_RECORD_SIZE; | ||
1213 | |||
1214 | for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) { | ||
1215 | - unsigned int __nr_pages; | ||
1216 | + void *kaddr = pages[pg]; | ||
1217 | + size_t page_size; | ||
1218 | + | ||
1219 | + page = virt_to_page(kaddr); | ||
1220 | + page_size = buf_size(page); | ||
1221 | + | ||
1222 | + if (kaiser_add_mapping((unsigned long)kaddr, | ||
1223 | + page_size, __PAGE_KERNEL) < 0) { | ||
1224 | + buf->nr_bufs = nbuf; | ||
1225 | + bts_buffer_free_aux(buf); | ||
1226 | + return NULL; | ||
1227 | + } | ||
1228 | |||
1229 | - page = virt_to_page(pages[pg]); | ||
1230 | - __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1; | ||
1231 | buf->buf[nbuf].page = page; | ||
1232 | buf->buf[nbuf].offset = offset; | ||
1233 | buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); | ||
1234 | - buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement; | ||
1235 | + buf->buf[nbuf].size = page_size - buf->buf[nbuf].displacement; | ||
1236 | pad = buf->buf[nbuf].size % BTS_RECORD_SIZE; | ||
1237 | buf->buf[nbuf].size -= pad; | ||
1238 | |||
1239 | - pg += __nr_pages; | ||
1240 | - offset += __nr_pages << PAGE_SHIFT; | ||
1241 | + pg += page_size >> PAGE_SHIFT; | ||
1242 | + offset += page_size; | ||
1243 | } | ||
1244 | |||
1245 | return buf; | ||
1246 | } | ||
1247 | |||
1248 | -static void bts_buffer_free_aux(void *data) | ||
1249 | -{ | ||
1250 | - kfree(data); | ||
1251 | -} | ||
1252 | - | ||
1253 | static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx) | ||
1254 | { | ||
1255 | return buf->buf[idx].offset + buf->buf[idx].displacement; | ||
1256 | diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h | ||
1257 | index b15aa4083dfd..166654218329 100644 | ||
1258 | --- a/arch/x86/include/asm/asm-prototypes.h | ||
1259 | +++ b/arch/x86/include/asm/asm-prototypes.h | ||
1260 | @@ -37,5 +37,7 @@ INDIRECT_THUNK(dx) | ||
1261 | INDIRECT_THUNK(si) | ||
1262 | INDIRECT_THUNK(di) | ||
1263 | INDIRECT_THUNK(bp) | ||
1264 | -INDIRECT_THUNK(sp) | ||
1265 | +asmlinkage void __fill_rsb(void); | ||
1266 | +asmlinkage void __clear_rsb(void); | ||
1267 | + | ||
1268 | #endif /* CONFIG_RETPOLINE */ | ||
1269 | diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h | ||
1270 | index 00523524edbf..7bb29a416b77 100644 | ||
1271 | --- a/arch/x86/include/asm/asm.h | ||
1272 | +++ b/arch/x86/include/asm/asm.h | ||
1273 | @@ -11,10 +11,12 @@ | ||
1274 | # define __ASM_FORM_COMMA(x) " " #x "," | ||
1275 | #endif | ||
1276 | |||
1277 | -#ifdef CONFIG_X86_32 | ||
1278 | +#ifndef __x86_64__ | ||
1279 | +/* 32 bit */ | ||
1280 | # define __ASM_SEL(a,b) __ASM_FORM(a) | ||
1281 | # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) | ||
1282 | #else | ||
1283 | +/* 64 bit */ | ||
1284 | # define __ASM_SEL(a,b) __ASM_FORM(b) | ||
1285 | # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) | ||
1286 | #endif | ||
1287 | diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h | ||
1288 | index bfb28caf97b1..857590390397 100644 | ||
1289 | --- a/arch/x86/include/asm/barrier.h | ||
1290 | +++ b/arch/x86/include/asm/barrier.h | ||
1291 | @@ -23,6 +23,34 @@ | ||
1292 | #define wmb() asm volatile("sfence" ::: "memory") | ||
1293 | #endif | ||
1294 | |||
1295 | +/** | ||
1296 | + * array_index_mask_nospec() - generate a mask that is ~0UL when the | ||
1297 | + * bounds check succeeds and 0 otherwise | ||
1298 | + * @index: array element index | ||
1299 | + * @size: number of elements in array | ||
1300 | + * | ||
1301 | + * Returns: | ||
1302 | + * 0 - (index < size) | ||
1303 | + */ | ||
1304 | +static inline unsigned long array_index_mask_nospec(unsigned long index, | ||
1305 | + unsigned long size) | ||
1306 | +{ | ||
1307 | + unsigned long mask; | ||
1308 | + | ||
1309 | + asm ("cmp %1,%2; sbb %0,%0;" | ||
1310 | + :"=r" (mask) | ||
1311 | + :"r"(size),"r" (index) | ||
1312 | + :"cc"); | ||
1313 | + return mask; | ||
1314 | +} | ||
1315 | + | ||
1316 | +/* Override the default implementation from linux/nospec.h. */ | ||
1317 | +#define array_index_mask_nospec array_index_mask_nospec | ||
1318 | + | ||
1319 | +/* Prevent speculative execution past this barrier. */ | ||
1320 | +#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ | ||
1321 | + "lfence", X86_FEATURE_LFENCE_RDTSC) | ||
1322 | + | ||
1323 | #ifdef CONFIG_X86_PPRO_FENCE | ||
1324 | #define dma_rmb() rmb() | ||
1325 | #else | ||
1326 | diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h | ||
1327 | index 9ea67a04ff4f..8c101579f535 100644 | ||
1328 | --- a/arch/x86/include/asm/cpufeature.h | ||
1329 | +++ b/arch/x86/include/asm/cpufeature.h | ||
1330 | @@ -28,6 +28,7 @@ enum cpuid_leafs | ||
1331 | CPUID_8000_000A_EDX, | ||
1332 | CPUID_7_ECX, | ||
1333 | CPUID_8000_0007_EBX, | ||
1334 | + CPUID_7_EDX, | ||
1335 | }; | ||
1336 | |||
1337 | #ifdef CONFIG_X86_FEATURE_NAMES | ||
1338 | @@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; | ||
1339 | CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \ | ||
1340 | CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \ | ||
1341 | CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ | ||
1342 | + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ | ||
1343 | REQUIRED_MASK_CHECK || \ | ||
1344 | - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) | ||
1345 | + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) | ||
1346 | |||
1347 | #define DISABLED_MASK_BIT_SET(feature_bit) \ | ||
1348 | ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ | ||
1349 | @@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; | ||
1350 | CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \ | ||
1351 | CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \ | ||
1352 | CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ | ||
1353 | + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ | ||
1354 | DISABLED_MASK_CHECK || \ | ||
1355 | - BUILD_BUG_ON_ZERO(NCAPINTS != 18)) | ||
1356 | + BUILD_BUG_ON_ZERO(NCAPINTS != 19)) | ||
1357 | |||
1358 | #define cpu_has(c, bit) \ | ||
1359 | (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ | ||
1360 | diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h | ||
1361 | index 8537a21acd8b..8eb23f5cf7f4 100644 | ||
1362 | --- a/arch/x86/include/asm/cpufeatures.h | ||
1363 | +++ b/arch/x86/include/asm/cpufeatures.h | ||
1364 | @@ -12,7 +12,7 @@ | ||
1365 | /* | ||
1366 | * Defines x86 CPU feature bits | ||
1367 | */ | ||
1368 | -#define NCAPINTS 18 /* N 32-bit words worth of info */ | ||
1369 | +#define NCAPINTS 19 /* N 32-bit words worth of info */ | ||
1370 | #define NBUGINTS 1 /* N 32-bit bug flags */ | ||
1371 | |||
1372 | /* | ||
1373 | @@ -194,16 +194,16 @@ | ||
1374 | #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ | ||
1375 | #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ | ||
1376 | |||
1377 | -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ | ||
1378 | -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ | ||
1379 | +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ | ||
1380 | +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ | ||
1381 | |||
1382 | -#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ | ||
1383 | -#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ | ||
1384 | -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ | ||
1385 | +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ | ||
1386 | |||
1387 | /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ | ||
1388 | #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ | ||
1389 | |||
1390 | +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ | ||
1391 | + | ||
1392 | /* Virtualization flags: Linux defined, word 8 */ | ||
1393 | #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ | ||
1394 | #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ | ||
1395 | @@ -260,6 +260,9 @@ | ||
1396 | /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ | ||
1397 | #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ | ||
1398 | #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ | ||
1399 | +#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ | ||
1400 | +#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ | ||
1401 | +#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ | ||
1402 | |||
1403 | /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ | ||
1404 | #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ | ||
1405 | @@ -295,6 +298,13 @@ | ||
1406 | #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ | ||
1407 | #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ | ||
1408 | |||
1409 | +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ | ||
1410 | +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ | ||
1411 | +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ | ||
1412 | +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ | ||
1413 | +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ | ||
1414 | +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ | ||
1415 | + | ||
1416 | /* | ||
1417 | * BUG word(s) | ||
1418 | */ | ||
1419 | diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h | ||
1420 | index 21c5ac15657b..1f8cca459c6c 100644 | ||
1421 | --- a/arch/x86/include/asm/disabled-features.h | ||
1422 | +++ b/arch/x86/include/asm/disabled-features.h | ||
1423 | @@ -59,6 +59,7 @@ | ||
1424 | #define DISABLED_MASK15 0 | ||
1425 | #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE) | ||
1426 | #define DISABLED_MASK17 0 | ||
1427 | -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) | ||
1428 | +#define DISABLED_MASK18 0 | ||
1429 | +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) | ||
1430 | |||
1431 | #endif /* _ASM_X86_DISABLED_FEATURES_H */ | ||
1432 | diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h | ||
1433 | index 34a46dc076d3..75b748a1deb8 100644 | ||
1434 | --- a/arch/x86/include/asm/intel-family.h | ||
1435 | +++ b/arch/x86/include/asm/intel-family.h | ||
1436 | @@ -12,6 +12,7 @@ | ||
1437 | */ | ||
1438 | |||
1439 | #define INTEL_FAM6_CORE_YONAH 0x0E | ||
1440 | + | ||
1441 | #define INTEL_FAM6_CORE2_MEROM 0x0F | ||
1442 | #define INTEL_FAM6_CORE2_MEROM_L 0x16 | ||
1443 | #define INTEL_FAM6_CORE2_PENRYN 0x17 | ||
1444 | @@ -21,6 +22,7 @@ | ||
1445 | #define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */ | ||
1446 | #define INTEL_FAM6_NEHALEM_EP 0x1A | ||
1447 | #define INTEL_FAM6_NEHALEM_EX 0x2E | ||
1448 | + | ||
1449 | #define INTEL_FAM6_WESTMERE 0x25 | ||
1450 | #define INTEL_FAM6_WESTMERE_EP 0x2C | ||
1451 | #define INTEL_FAM6_WESTMERE_EX 0x2F | ||
1452 | @@ -36,9 +38,9 @@ | ||
1453 | #define INTEL_FAM6_HASWELL_GT3E 0x46 | ||
1454 | |||
1455 | #define INTEL_FAM6_BROADWELL_CORE 0x3D | ||
1456 | -#define INTEL_FAM6_BROADWELL_XEON_D 0x56 | ||
1457 | #define INTEL_FAM6_BROADWELL_GT3E 0x47 | ||
1458 | #define INTEL_FAM6_BROADWELL_X 0x4F | ||
1459 | +#define INTEL_FAM6_BROADWELL_XEON_D 0x56 | ||
1460 | |||
1461 | #define INTEL_FAM6_SKYLAKE_MOBILE 0x4E | ||
1462 | #define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E | ||
1463 | @@ -57,9 +59,10 @@ | ||
1464 | #define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ | ||
1465 | #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ | ||
1466 | #define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ | ||
1467 | -#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */ | ||
1468 | +#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */ | ||
1469 | #define INTEL_FAM6_ATOM_GOLDMONT 0x5C | ||
1470 | #define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ | ||
1471 | +#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A | ||
1472 | |||
1473 | /* Xeon Phi */ | ||
1474 | |||
1475 | diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h | ||
1476 | index b11c4c072df8..c768bc1550a1 100644 | ||
1477 | --- a/arch/x86/include/asm/msr-index.h | ||
1478 | +++ b/arch/x86/include/asm/msr-index.h | ||
1479 | @@ -37,6 +37,13 @@ | ||
1480 | #define EFER_FFXSR (1<<_EFER_FFXSR) | ||
1481 | |||
1482 | /* Intel MSRs. Some also available on other CPUs */ | ||
1483 | +#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ | ||
1484 | +#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ | ||
1485 | +#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ | ||
1486 | + | ||
1487 | +#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ | ||
1488 | +#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ | ||
1489 | + | ||
1490 | #define MSR_IA32_PERFCTR0 0x000000c1 | ||
1491 | #define MSR_IA32_PERFCTR1 0x000000c2 | ||
1492 | #define MSR_FSB_FREQ 0x000000cd | ||
1493 | @@ -50,6 +57,11 @@ | ||
1494 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) | ||
1495 | |||
1496 | #define MSR_MTRRcap 0x000000fe | ||
1497 | + | ||
1498 | +#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a | ||
1499 | +#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ | ||
1500 | +#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ | ||
1501 | + | ||
1502 | #define MSR_IA32_BBL_CR_CTL 0x00000119 | ||
1503 | #define MSR_IA32_BBL_CR_CTL3 0x0000011e | ||
1504 | |||
1505 | diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h | ||
1506 | index b5fee97813cd..ed35b915b5c9 100644 | ||
1507 | --- a/arch/x86/include/asm/msr.h | ||
1508 | +++ b/arch/x86/include/asm/msr.h | ||
1509 | @@ -188,8 +188,7 @@ static __always_inline unsigned long long rdtsc_ordered(void) | ||
1510 | * that some other imaginary CPU is updating continuously with a | ||
1511 | * time stamp. | ||
1512 | */ | ||
1513 | - alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, | ||
1514 | - "lfence", X86_FEATURE_LFENCE_RDTSC); | ||
1515 | + barrier_nospec(); | ||
1516 | return rdtsc(); | ||
1517 | } | ||
1518 | |||
1519 | diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h | ||
1520 | index 4ad41087ce0e..300cc159b4a0 100644 | ||
1521 | --- a/arch/x86/include/asm/nospec-branch.h | ||
1522 | +++ b/arch/x86/include/asm/nospec-branch.h | ||
1523 | @@ -1,56 +1,12 @@ | ||
1524 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
1525 | |||
1526 | -#ifndef __NOSPEC_BRANCH_H__ | ||
1527 | -#define __NOSPEC_BRANCH_H__ | ||
1528 | +#ifndef _ASM_X86_NOSPEC_BRANCH_H_ | ||
1529 | +#define _ASM_X86_NOSPEC_BRANCH_H_ | ||
1530 | |||
1531 | #include <asm/alternative.h> | ||
1532 | #include <asm/alternative-asm.h> | ||
1533 | #include <asm/cpufeatures.h> | ||
1534 | |||
1535 | -/* | ||
1536 | - * Fill the CPU return stack buffer. | ||
1537 | - * | ||
1538 | - * Each entry in the RSB, if used for a speculative 'ret', contains an | ||
1539 | - * infinite 'pause; lfence; jmp' loop to capture speculative execution. | ||
1540 | - * | ||
1541 | - * This is required in various cases for retpoline and IBRS-based | ||
1542 | - * mitigations for the Spectre variant 2 vulnerability. Sometimes to | ||
1543 | - * eliminate potentially bogus entries from the RSB, and sometimes | ||
1544 | - * purely to ensure that it doesn't get empty, which on some CPUs would | ||
1545 | - * allow predictions from other (unwanted!) sources to be used. | ||
1546 | - * | ||
1547 | - * We define a CPP macro such that it can be used from both .S files and | ||
1548 | - * inline assembly. It's possible to do a .macro and then include that | ||
1549 | - * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. | ||
1550 | - */ | ||
1551 | - | ||
1552 | -#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ | ||
1553 | -#define RSB_FILL_LOOPS 16 /* To avoid underflow */ | ||
1554 | - | ||
1555 | -/* | ||
1556 | - * Google experimented with loop-unrolling and this turned out to be | ||
1557 | - * the optimal version — two calls, each with their own speculation | ||
1558 | - * trap should their return address end up getting used, in a loop. | ||
1559 | - */ | ||
1560 | -#define __FILL_RETURN_BUFFER(reg, nr, sp) \ | ||
1561 | - mov $(nr/2), reg; \ | ||
1562 | -771: \ | ||
1563 | - call 772f; \ | ||
1564 | -773: /* speculation trap */ \ | ||
1565 | - pause; \ | ||
1566 | - lfence; \ | ||
1567 | - jmp 773b; \ | ||
1568 | -772: \ | ||
1569 | - call 774f; \ | ||
1570 | -775: /* speculation trap */ \ | ||
1571 | - pause; \ | ||
1572 | - lfence; \ | ||
1573 | - jmp 775b; \ | ||
1574 | -774: \ | ||
1575 | - dec reg; \ | ||
1576 | - jnz 771b; \ | ||
1577 | - add $(BITS_PER_LONG/8) * nr, sp; | ||
1578 | - | ||
1579 | #ifdef __ASSEMBLY__ | ||
1580 | |||
1581 | /* | ||
1582 | @@ -121,17 +77,10 @@ | ||
1583 | #endif | ||
1584 | .endm | ||
1585 | |||
1586 | - /* | ||
1587 | - * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP | ||
1588 | - * monstrosity above, manually. | ||
1589 | - */ | ||
1590 | -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req | ||
1591 | +/* This clobbers the BX register */ | ||
1592 | +.macro FILL_RETURN_BUFFER nr:req ftr:req | ||
1593 | #ifdef CONFIG_RETPOLINE | ||
1594 | - ANNOTATE_NOSPEC_ALTERNATIVE | ||
1595 | - ALTERNATIVE "jmp .Lskip_rsb_\@", \ | ||
1596 | - __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ | ||
1597 | - \ftr | ||
1598 | -.Lskip_rsb_\@: | ||
1599 | + ALTERNATIVE "", "call __clear_rsb", \ftr | ||
1600 | #endif | ||
1601 | .endm | ||
1602 | |||
1603 | @@ -201,22 +150,30 @@ extern char __indirect_thunk_end[]; | ||
1604 | * On VMEXIT we must ensure that no RSB predictions learned in the guest | ||
1605 | * can be followed in the host, by overwriting the RSB completely. Both | ||
1606 | * retpoline and IBRS mitigations for Spectre v2 need this; only on future | ||
1607 | - * CPUs with IBRS_ATT *might* it be avoided. | ||
1608 | + * CPUs with IBRS_ALL *might* it be avoided. | ||
1609 | */ | ||
1610 | static inline void vmexit_fill_RSB(void) | ||
1611 | { | ||
1612 | #ifdef CONFIG_RETPOLINE | ||
1613 | - unsigned long loops; | ||
1614 | - | ||
1615 | - asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE | ||
1616 | - ALTERNATIVE("jmp 910f", | ||
1617 | - __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), | ||
1618 | - X86_FEATURE_RETPOLINE) | ||
1619 | - "910:" | ||
1620 | - : "=r" (loops), ASM_CALL_CONSTRAINT | ||
1621 | - : : "memory" ); | ||
1622 | + alternative_input("", | ||
1623 | + "call __fill_rsb", | ||
1624 | + X86_FEATURE_RETPOLINE, | ||
1625 | + ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); | ||
1626 | #endif | ||
1627 | } | ||
1628 | |||
1629 | +static inline void indirect_branch_prediction_barrier(void) | ||
1630 | +{ | ||
1631 | + asm volatile(ALTERNATIVE("", | ||
1632 | + "movl %[msr], %%ecx\n\t" | ||
1633 | + "movl %[val], %%eax\n\t" | ||
1634 | + "movl $0, %%edx\n\t" | ||
1635 | + "wrmsr", | ||
1636 | + X86_FEATURE_USE_IBPB) | ||
1637 | + : : [msr] "i" (MSR_IA32_PRED_CMD), | ||
1638 | + [val] "i" (PRED_CMD_IBPB) | ||
1639 | + : "eax", "ecx", "edx", "memory"); | ||
1640 | +} | ||
1641 | + | ||
1642 | #endif /* __ASSEMBLY__ */ | ||
1643 | -#endif /* __NOSPEC_BRANCH_H__ */ | ||
1644 | +#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ | ||
1645 | diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h | ||
1646 | index 1178a51b77f3..b6d425999f99 100644 | ||
1647 | --- a/arch/x86/include/asm/pgalloc.h | ||
1648 | +++ b/arch/x86/include/asm/pgalloc.h | ||
1649 | @@ -27,17 +27,6 @@ static inline void paravirt_release_pud(unsigned long pfn) {} | ||
1650 | */ | ||
1651 | extern gfp_t __userpte_alloc_gfp; | ||
1652 | |||
1653 | -#ifdef CONFIG_PAGE_TABLE_ISOLATION | ||
1654 | -/* | ||
1655 | - * Instead of one PGD, we acquire two PGDs. Being order-1, it is | ||
1656 | - * both 8k in size and 8k-aligned. That lets us just flip bit 12 | ||
1657 | - * in a pointer to swap between the two 4k halves. | ||
1658 | - */ | ||
1659 | -#define PGD_ALLOCATION_ORDER 1 | ||
1660 | -#else | ||
1661 | -#define PGD_ALLOCATION_ORDER 0 | ||
1662 | -#endif | ||
1663 | - | ||
1664 | /* | ||
1665 | * Allocate and free page tables. | ||
1666 | */ | ||
1667 | diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h | ||
1668 | index 2536f90cd30c..5af0401ccff2 100644 | ||
1669 | --- a/arch/x86/include/asm/pgtable.h | ||
1670 | +++ b/arch/x86/include/asm/pgtable.h | ||
1671 | @@ -20,9 +20,15 @@ | ||
1672 | |||
1673 | #ifdef CONFIG_PAGE_TABLE_ISOLATION | ||
1674 | extern int kaiser_enabled; | ||
1675 | +/* | ||
1676 | + * Instead of one PGD, we acquire two PGDs. Being order-1, it is | ||
1677 | + * both 8k in size and 8k-aligned. That lets us just flip bit 12 | ||
1678 | + * in a pointer to swap between the two 4k halves. | ||
1679 | + */ | ||
1680 | #else | ||
1681 | #define kaiser_enabled 0 | ||
1682 | #endif | ||
1683 | +#define PGD_ALLOCATION_ORDER kaiser_enabled | ||
1684 | |||
1685 | void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); | ||
1686 | void ptdump_walk_pgd_level_checkwx(void); | ||
1687 | diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h | ||
1688 | index 353f038ec645..cb866ae1bc5d 100644 | ||
1689 | --- a/arch/x86/include/asm/processor.h | ||
1690 | +++ b/arch/x86/include/asm/processor.h | ||
1691 | @@ -391,8 +391,6 @@ struct thread_struct { | ||
1692 | unsigned short gsindex; | ||
1693 | #endif | ||
1694 | |||
1695 | - u32 status; /* thread synchronous flags */ | ||
1696 | - | ||
1697 | #ifdef CONFIG_X86_64 | ||
1698 | unsigned long fsbase; | ||
1699 | unsigned long gsbase; | ||
1700 | diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h | ||
1701 | index fac9a5c0abe9..6847d85400a8 100644 | ||
1702 | --- a/arch/x86/include/asm/required-features.h | ||
1703 | +++ b/arch/x86/include/asm/required-features.h | ||
1704 | @@ -100,6 +100,7 @@ | ||
1705 | #define REQUIRED_MASK15 0 | ||
1706 | #define REQUIRED_MASK16 0 | ||
1707 | #define REQUIRED_MASK17 0 | ||
1708 | -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18) | ||
1709 | +#define REQUIRED_MASK18 0 | ||
1710 | +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) | ||
1711 | |||
1712 | #endif /* _ASM_X86_REQUIRED_FEATURES_H */ | ||
1713 | diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h | ||
1714 | index e3c95e8e61c5..03eedc21246d 100644 | ||
1715 | --- a/arch/x86/include/asm/syscall.h | ||
1716 | +++ b/arch/x86/include/asm/syscall.h | ||
1717 | @@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task, | ||
1718 | * TS_COMPAT is set for 32-bit syscall entries and then | ||
1719 | * remains set until we return to user mode. | ||
1720 | */ | ||
1721 | - if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) | ||
1722 | + if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED)) | ||
1723 | /* | ||
1724 | * Sign-extend the value so (int)-EFOO becomes (long)-EFOO | ||
1725 | * and will match correctly in comparisons. | ||
1726 | @@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task, | ||
1727 | unsigned long *args) | ||
1728 | { | ||
1729 | # ifdef CONFIG_IA32_EMULATION | ||
1730 | - if (task->thread.status & TS_COMPAT) | ||
1731 | + if (task->thread_info.status & TS_COMPAT) | ||
1732 | switch (i) { | ||
1733 | case 0: | ||
1734 | if (!n--) break; | ||
1735 | @@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task, | ||
1736 | const unsigned long *args) | ||
1737 | { | ||
1738 | # ifdef CONFIG_IA32_EMULATION | ||
1739 | - if (task->thread.status & TS_COMPAT) | ||
1740 | + if (task->thread_info.status & TS_COMPAT) | ||
1741 | switch (i) { | ||
1742 | case 0: | ||
1743 | if (!n--) break; | ||
1744 | diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h | ||
1745 | index bdf9c4c91572..89978b9c667a 100644 | ||
1746 | --- a/arch/x86/include/asm/thread_info.h | ||
1747 | +++ b/arch/x86/include/asm/thread_info.h | ||
1748 | @@ -54,6 +54,7 @@ struct task_struct; | ||
1749 | |||
1750 | struct thread_info { | ||
1751 | unsigned long flags; /* low level flags */ | ||
1752 | + u32 status; /* thread synchronous flags */ | ||
1753 | }; | ||
1754 | |||
1755 | #define INIT_THREAD_INFO(tsk) \ | ||
1756 | @@ -213,7 +214,7 @@ static inline int arch_within_stack_frames(const void * const stack, | ||
1757 | #define in_ia32_syscall() true | ||
1758 | #else | ||
1759 | #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \ | ||
1760 | - current->thread.status & TS_COMPAT) | ||
1761 | + current_thread_info()->status & TS_COMPAT) | ||
1762 | #endif | ||
1763 | |||
1764 | /* | ||
1765 | diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h | ||
1766 | index dead0f3921f3..a8d85a687cf4 100644 | ||
1767 | --- a/arch/x86/include/asm/uaccess.h | ||
1768 | +++ b/arch/x86/include/asm/uaccess.h | ||
1769 | @@ -123,6 +123,11 @@ extern int __get_user_bad(void); | ||
1770 | |||
1771 | #define __uaccess_begin() stac() | ||
1772 | #define __uaccess_end() clac() | ||
1773 | +#define __uaccess_begin_nospec() \ | ||
1774 | +({ \ | ||
1775 | + stac(); \ | ||
1776 | + barrier_nospec(); \ | ||
1777 | +}) | ||
1778 | |||
1779 | /* | ||
1780 | * This is a type: either unsigned long, if the argument fits into | ||
1781 | @@ -432,7 +437,7 @@ do { \ | ||
1782 | ({ \ | ||
1783 | int __gu_err; \ | ||
1784 | __inttype(*(ptr)) __gu_val; \ | ||
1785 | - __uaccess_begin(); \ | ||
1786 | + __uaccess_begin_nospec(); \ | ||
1787 | __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ | ||
1788 | __uaccess_end(); \ | ||
1789 | (x) = (__force __typeof__(*(ptr)))__gu_val; \ | ||
1790 | @@ -474,6 +479,10 @@ struct __large_struct { unsigned long buf[100]; }; | ||
1791 | __uaccess_begin(); \ | ||
1792 | barrier(); | ||
1793 | |||
1794 | +#define uaccess_try_nospec do { \ | ||
1795 | + current->thread.uaccess_err = 0; \ | ||
1796 | + __uaccess_begin_nospec(); \ | ||
1797 | + | ||
1798 | #define uaccess_catch(err) \ | ||
1799 | __uaccess_end(); \ | ||
1800 | (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \ | ||
1801 | @@ -538,7 +547,7 @@ struct __large_struct { unsigned long buf[100]; }; | ||
1802 | * get_user_ex(...); | ||
1803 | * } get_user_catch(err) | ||
1804 | */ | ||
1805 | -#define get_user_try uaccess_try | ||
1806 | +#define get_user_try uaccess_try_nospec | ||
1807 | #define get_user_catch(err) uaccess_catch(err) | ||
1808 | |||
1809 | #define get_user_ex(x, ptr) do { \ | ||
1810 | @@ -573,7 +582,7 @@ extern void __cmpxchg_wrong_size(void) | ||
1811 | __typeof__(ptr) __uval = (uval); \ | ||
1812 | __typeof__(*(ptr)) __old = (old); \ | ||
1813 | __typeof__(*(ptr)) __new = (new); \ | ||
1814 | - __uaccess_begin(); \ | ||
1815 | + __uaccess_begin_nospec(); \ | ||
1816 | switch (size) { \ | ||
1817 | case 1: \ | ||
1818 | { \ | ||
1819 | diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h | ||
1820 | index 7d3bdd1ed697..d6d245088dd5 100644 | ||
1821 | --- a/arch/x86/include/asm/uaccess_32.h | ||
1822 | +++ b/arch/x86/include/asm/uaccess_32.h | ||
1823 | @@ -102,17 +102,17 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) | ||
1824 | |||
1825 | switch (n) { | ||
1826 | case 1: | ||
1827 | - __uaccess_begin(); | ||
1828 | + __uaccess_begin_nospec(); | ||
1829 | __get_user_size(*(u8 *)to, from, 1, ret, 1); | ||
1830 | __uaccess_end(); | ||
1831 | return ret; | ||
1832 | case 2: | ||
1833 | - __uaccess_begin(); | ||
1834 | + __uaccess_begin_nospec(); | ||
1835 | __get_user_size(*(u16 *)to, from, 2, ret, 2); | ||
1836 | __uaccess_end(); | ||
1837 | return ret; | ||
1838 | case 4: | ||
1839 | - __uaccess_begin(); | ||
1840 | + __uaccess_begin_nospec(); | ||
1841 | __get_user_size(*(u32 *)to, from, 4, ret, 4); | ||
1842 | __uaccess_end(); | ||
1843 | return ret; | ||
1844 | @@ -130,17 +130,17 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to, | ||
1845 | |||
1846 | switch (n) { | ||
1847 | case 1: | ||
1848 | - __uaccess_begin(); | ||
1849 | + __uaccess_begin_nospec(); | ||
1850 | __get_user_size(*(u8 *)to, from, 1, ret, 1); | ||
1851 | __uaccess_end(); | ||
1852 | return ret; | ||
1853 | case 2: | ||
1854 | - __uaccess_begin(); | ||
1855 | + __uaccess_begin_nospec(); | ||
1856 | __get_user_size(*(u16 *)to, from, 2, ret, 2); | ||
1857 | __uaccess_end(); | ||
1858 | return ret; | ||
1859 | case 4: | ||
1860 | - __uaccess_begin(); | ||
1861 | + __uaccess_begin_nospec(); | ||
1862 | __get_user_size(*(u32 *)to, from, 4, ret, 4); | ||
1863 | __uaccess_end(); | ||
1864 | return ret; | ||
1865 | diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h | ||
1866 | index 673059a109fe..6e5cc08134ba 100644 | ||
1867 | --- a/arch/x86/include/asm/uaccess_64.h | ||
1868 | +++ b/arch/x86/include/asm/uaccess_64.h | ||
1869 | @@ -59,31 +59,31 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) | ||
1870 | return copy_user_generic(dst, (__force void *)src, size); | ||
1871 | switch (size) { | ||
1872 | case 1: | ||
1873 | - __uaccess_begin(); | ||
1874 | + __uaccess_begin_nospec(); | ||
1875 | __get_user_asm(*(u8 *)dst, (u8 __user *)src, | ||
1876 | ret, "b", "b", "=q", 1); | ||
1877 | __uaccess_end(); | ||
1878 | return ret; | ||
1879 | case 2: | ||
1880 | - __uaccess_begin(); | ||
1881 | + __uaccess_begin_nospec(); | ||
1882 | __get_user_asm(*(u16 *)dst, (u16 __user *)src, | ||
1883 | ret, "w", "w", "=r", 2); | ||
1884 | __uaccess_end(); | ||
1885 | return ret; | ||
1886 | case 4: | ||
1887 | - __uaccess_begin(); | ||
1888 | + __uaccess_begin_nospec(); | ||
1889 | __get_user_asm(*(u32 *)dst, (u32 __user *)src, | ||
1890 | ret, "l", "k", "=r", 4); | ||
1891 | __uaccess_end(); | ||
1892 | return ret; | ||
1893 | case 8: | ||
1894 | - __uaccess_begin(); | ||
1895 | + __uaccess_begin_nospec(); | ||
1896 | __get_user_asm(*(u64 *)dst, (u64 __user *)src, | ||
1897 | ret, "q", "", "=r", 8); | ||
1898 | __uaccess_end(); | ||
1899 | return ret; | ||
1900 | case 10: | ||
1901 | - __uaccess_begin(); | ||
1902 | + __uaccess_begin_nospec(); | ||
1903 | __get_user_asm(*(u64 *)dst, (u64 __user *)src, | ||
1904 | ret, "q", "", "=r", 10); | ||
1905 | if (likely(!ret)) | ||
1906 | @@ -93,7 +93,7 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size) | ||
1907 | __uaccess_end(); | ||
1908 | return ret; | ||
1909 | case 16: | ||
1910 | - __uaccess_begin(); | ||
1911 | + __uaccess_begin_nospec(); | ||
1912 | __get_user_asm(*(u64 *)dst, (u64 __user *)src, | ||
1913 | ret, "q", "", "=r", 16); | ||
1914 | if (likely(!ret)) | ||
1915 | diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c | ||
1916 | index 10d5a3d6affc..03b6e5c6cf23 100644 | ||
1917 | --- a/arch/x86/kernel/alternative.c | ||
1918 | +++ b/arch/x86/kernel/alternative.c | ||
1919 | @@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(char *str) | ||
1920 | } | ||
1921 | __setup("noreplace-smp", setup_noreplace_smp); | ||
1922 | |||
1923 | -#ifdef CONFIG_PARAVIRT | ||
1924 | -static int __initdata_or_module noreplace_paravirt = 0; | ||
1925 | - | ||
1926 | -static int __init setup_noreplace_paravirt(char *str) | ||
1927 | -{ | ||
1928 | - noreplace_paravirt = 1; | ||
1929 | - return 1; | ||
1930 | -} | ||
1931 | -__setup("noreplace-paravirt", setup_noreplace_paravirt); | ||
1932 | -#endif | ||
1933 | - | ||
1934 | #define DPRINTK(fmt, args...) \ | ||
1935 | do { \ | ||
1936 | if (debug_alternative) \ | ||
1937 | @@ -588,9 +577,6 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, | ||
1938 | struct paravirt_patch_site *p; | ||
1939 | char insnbuf[MAX_PATCH_LEN]; | ||
1940 | |||
1941 | - if (noreplace_paravirt) | ||
1942 | - return; | ||
1943 | - | ||
1944 | for (p = start; p < end; p++) { | ||
1945 | unsigned int used; | ||
1946 | |||
1947 | diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c | ||
1948 | index 8cacf62ec458..957ad443b786 100644 | ||
1949 | --- a/arch/x86/kernel/cpu/bugs.c | ||
1950 | +++ b/arch/x86/kernel/cpu/bugs.c | ||
1951 | @@ -10,6 +10,7 @@ | ||
1952 | #include <linux/init.h> | ||
1953 | #include <linux/utsname.h> | ||
1954 | #include <linux/cpu.h> | ||
1955 | +#include <linux/module.h> | ||
1956 | |||
1957 | #include <asm/nospec-branch.h> | ||
1958 | #include <asm/cmdline.h> | ||
1959 | @@ -89,20 +90,41 @@ static const char *spectre_v2_strings[] = { | ||
1960 | }; | ||
1961 | |||
1962 | #undef pr_fmt | ||
1963 | -#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt | ||
1964 | +#define pr_fmt(fmt) "Spectre V2 : " fmt | ||
1965 | |||
1966 | static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; | ||
1967 | |||
1968 | +#ifdef RETPOLINE | ||
1969 | +static bool spectre_v2_bad_module; | ||
1970 | + | ||
1971 | +bool retpoline_module_ok(bool has_retpoline) | ||
1972 | +{ | ||
1973 | + if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline) | ||
1974 | + return true; | ||
1975 | + | ||
1976 | + pr_err("System may be vulnerable to spectre v2\n"); | ||
1977 | + spectre_v2_bad_module = true; | ||
1978 | + return false; | ||
1979 | +} | ||
1980 | + | ||
1981 | +static inline const char *spectre_v2_module_string(void) | ||
1982 | +{ | ||
1983 | + return spectre_v2_bad_module ? " - vulnerable module loaded" : ""; | ||
1984 | +} | ||
1985 | +#else | ||
1986 | +static inline const char *spectre_v2_module_string(void) { return ""; } | ||
1987 | +#endif | ||
1988 | + | ||
1989 | static void __init spec2_print_if_insecure(const char *reason) | ||
1990 | { | ||
1991 | if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) | ||
1992 | - pr_info("%s\n", reason); | ||
1993 | + pr_info("%s selected on command line.\n", reason); | ||
1994 | } | ||
1995 | |||
1996 | static void __init spec2_print_if_secure(const char *reason) | ||
1997 | { | ||
1998 | if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) | ||
1999 | - pr_info("%s\n", reason); | ||
2000 | + pr_info("%s selected on command line.\n", reason); | ||
2001 | } | ||
2002 | |||
2003 | static inline bool retp_compiler(void) | ||
2004 | @@ -117,42 +139,68 @@ static inline bool match_option(const char *arg, int arglen, const char *opt) | ||
2005 | return len == arglen && !strncmp(arg, opt, len); | ||
2006 | } | ||
2007 | |||
2008 | +static const struct { | ||
2009 | + const char *option; | ||
2010 | + enum spectre_v2_mitigation_cmd cmd; | ||
2011 | + bool secure; | ||
2012 | +} mitigation_options[] = { | ||
2013 | + { "off", SPECTRE_V2_CMD_NONE, false }, | ||
2014 | + { "on", SPECTRE_V2_CMD_FORCE, true }, | ||
2015 | + { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, | ||
2016 | + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, | ||
2017 | + { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, | ||
2018 | + { "auto", SPECTRE_V2_CMD_AUTO, false }, | ||
2019 | +}; | ||
2020 | + | ||
2021 | static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) | ||
2022 | { | ||
2023 | char arg[20]; | ||
2024 | - int ret; | ||
2025 | - | ||
2026 | - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, | ||
2027 | - sizeof(arg)); | ||
2028 | - if (ret > 0) { | ||
2029 | - if (match_option(arg, ret, "off")) { | ||
2030 | - goto disable; | ||
2031 | - } else if (match_option(arg, ret, "on")) { | ||
2032 | - spec2_print_if_secure("force enabled on command line."); | ||
2033 | - return SPECTRE_V2_CMD_FORCE; | ||
2034 | - } else if (match_option(arg, ret, "retpoline")) { | ||
2035 | - spec2_print_if_insecure("retpoline selected on command line."); | ||
2036 | - return SPECTRE_V2_CMD_RETPOLINE; | ||
2037 | - } else if (match_option(arg, ret, "retpoline,amd")) { | ||
2038 | - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { | ||
2039 | - pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); | ||
2040 | - return SPECTRE_V2_CMD_AUTO; | ||
2041 | - } | ||
2042 | - spec2_print_if_insecure("AMD retpoline selected on command line."); | ||
2043 | - return SPECTRE_V2_CMD_RETPOLINE_AMD; | ||
2044 | - } else if (match_option(arg, ret, "retpoline,generic")) { | ||
2045 | - spec2_print_if_insecure("generic retpoline selected on command line."); | ||
2046 | - return SPECTRE_V2_CMD_RETPOLINE_GENERIC; | ||
2047 | - } else if (match_option(arg, ret, "auto")) { | ||
2048 | + int ret, i; | ||
2049 | + enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; | ||
2050 | + | ||
2051 | + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) | ||
2052 | + return SPECTRE_V2_CMD_NONE; | ||
2053 | + else { | ||
2054 | + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, | ||
2055 | + sizeof(arg)); | ||
2056 | + if (ret < 0) | ||
2057 | return SPECTRE_V2_CMD_AUTO; | ||
2058 | + | ||
2059 | + for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { | ||
2060 | + if (!match_option(arg, ret, mitigation_options[i].option)) | ||
2061 | + continue; | ||
2062 | + cmd = mitigation_options[i].cmd; | ||
2063 | + break; | ||
2064 | } | ||
2065 | + | ||
2066 | + if (i >= ARRAY_SIZE(mitigation_options)) { | ||
2067 | + pr_err("unknown option (%s). Switching to AUTO select\n", | ||
2068 | + mitigation_options[i].option); | ||
2069 | + return SPECTRE_V2_CMD_AUTO; | ||
2070 | + } | ||
2071 | + } | ||
2072 | + | ||
2073 | + if ((cmd == SPECTRE_V2_CMD_RETPOLINE || | ||
2074 | + cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || | ||
2075 | + cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && | ||
2076 | + !IS_ENABLED(CONFIG_RETPOLINE)) { | ||
2077 | + pr_err("%s selected but not compiled in. Switching to AUTO select\n", | ||
2078 | + mitigation_options[i].option); | ||
2079 | + return SPECTRE_V2_CMD_AUTO; | ||
2080 | } | ||
2081 | |||
2082 | - if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2")) | ||
2083 | + if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD && | ||
2084 | + boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { | ||
2085 | + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); | ||
2086 | return SPECTRE_V2_CMD_AUTO; | ||
2087 | -disable: | ||
2088 | - spec2_print_if_insecure("disabled on command line."); | ||
2089 | - return SPECTRE_V2_CMD_NONE; | ||
2090 | + } | ||
2091 | + | ||
2092 | + if (mitigation_options[i].secure) | ||
2093 | + spec2_print_if_secure(mitigation_options[i].option); | ||
2094 | + else | ||
2095 | + spec2_print_if_insecure(mitigation_options[i].option); | ||
2096 | + | ||
2097 | + return cmd; | ||
2098 | } | ||
2099 | |||
2100 | /* Check for Skylake-like CPUs (for RSB handling) */ | ||
2101 | @@ -190,10 +238,10 @@ static void __init spectre_v2_select_mitigation(void) | ||
2102 | return; | ||
2103 | |||
2104 | case SPECTRE_V2_CMD_FORCE: | ||
2105 | - /* FALLTRHU */ | ||
2106 | case SPECTRE_V2_CMD_AUTO: | ||
2107 | - goto retpoline_auto; | ||
2108 | - | ||
2109 | + if (IS_ENABLED(CONFIG_RETPOLINE)) | ||
2110 | + goto retpoline_auto; | ||
2111 | + break; | ||
2112 | case SPECTRE_V2_CMD_RETPOLINE_AMD: | ||
2113 | if (IS_ENABLED(CONFIG_RETPOLINE)) | ||
2114 | goto retpoline_amd; | ||
2115 | @@ -248,6 +296,12 @@ static void __init spectre_v2_select_mitigation(void) | ||
2116 | setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); | ||
2117 | pr_info("Filling RSB on context switch\n"); | ||
2118 | } | ||
2119 | + | ||
2120 | + /* Initialize Indirect Branch Prediction Barrier if supported */ | ||
2121 | + if (boot_cpu_has(X86_FEATURE_IBPB)) { | ||
2122 | + setup_force_cpu_cap(X86_FEATURE_USE_IBPB); | ||
2123 | + pr_info("Enabling Indirect Branch Prediction Barrier\n"); | ||
2124 | + } | ||
2125 | } | ||
2126 | |||
2127 | #undef pr_fmt | ||
2128 | @@ -268,7 +322,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev, | ||
2129 | { | ||
2130 | if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) | ||
2131 | return sprintf(buf, "Not affected\n"); | ||
2132 | - return sprintf(buf, "Vulnerable\n"); | ||
2133 | + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); | ||
2134 | } | ||
2135 | |||
2136 | ssize_t cpu_show_spectre_v2(struct device *dev, | ||
2137 | @@ -277,6 +331,8 @@ ssize_t cpu_show_spectre_v2(struct device *dev, | ||
2138 | if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) | ||
2139 | return sprintf(buf, "Not affected\n"); | ||
2140 | |||
2141 | - return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]); | ||
2142 | + return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], | ||
2143 | + boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", | ||
2144 | + spectre_v2_module_string()); | ||
2145 | } | ||
2146 | #endif | ||
2147 | diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c | ||
2148 | index d198ae02f2b7..08e89ed6aa87 100644 | ||
2149 | --- a/arch/x86/kernel/cpu/common.c | ||
2150 | +++ b/arch/x86/kernel/cpu/common.c | ||
2151 | @@ -44,6 +44,8 @@ | ||
2152 | #include <asm/pat.h> | ||
2153 | #include <asm/microcode.h> | ||
2154 | #include <asm/microcode_intel.h> | ||
2155 | +#include <asm/intel-family.h> | ||
2156 | +#include <asm/cpu_device_id.h> | ||
2157 | |||
2158 | #ifdef CONFIG_X86_LOCAL_APIC | ||
2159 | #include <asm/uv/uv.h> | ||
2160 | @@ -716,6 +718,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) | ||
2161 | } | ||
2162 | } | ||
2163 | |||
2164 | +static void init_speculation_control(struct cpuinfo_x86 *c) | ||
2165 | +{ | ||
2166 | + /* | ||
2167 | + * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support, | ||
2168 | + * and they also have a different bit for STIBP support. Also, | ||
2169 | + * a hypervisor might have set the individual AMD bits even on | ||
2170 | + * Intel CPUs, for finer-grained selection of what's available. | ||
2171 | + * | ||
2172 | + * We use the AMD bits in 0x8000_0008 EBX as the generic hardware | ||
2173 | + * features, which are visible in /proc/cpuinfo and used by the | ||
2174 | + * kernel. So set those accordingly from the Intel bits. | ||
2175 | + */ | ||
2176 | + if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { | ||
2177 | + set_cpu_cap(c, X86_FEATURE_IBRS); | ||
2178 | + set_cpu_cap(c, X86_FEATURE_IBPB); | ||
2179 | + } | ||
2180 | + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) | ||
2181 | + set_cpu_cap(c, X86_FEATURE_STIBP); | ||
2182 | +} | ||
2183 | + | ||
2184 | void get_cpu_cap(struct cpuinfo_x86 *c) | ||
2185 | { | ||
2186 | u32 eax, ebx, ecx, edx; | ||
2187 | @@ -737,6 +759,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) | ||
2188 | cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); | ||
2189 | c->x86_capability[CPUID_7_0_EBX] = ebx; | ||
2190 | c->x86_capability[CPUID_7_ECX] = ecx; | ||
2191 | + c->x86_capability[CPUID_7_EDX] = edx; | ||
2192 | } | ||
2193 | |||
2194 | /* Extended state features: level 0x0000000d */ | ||
2195 | @@ -809,6 +832,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) | ||
2196 | c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); | ||
2197 | |||
2198 | init_scattered_cpuid_features(c); | ||
2199 | + init_speculation_control(c); | ||
2200 | } | ||
2201 | |||
2202 | static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) | ||
2203 | @@ -837,6 +861,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) | ||
2204 | #endif | ||
2205 | } | ||
2206 | |||
2207 | +static const __initconst struct x86_cpu_id cpu_no_speculation[] = { | ||
2208 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, | ||
2209 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, | ||
2210 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, | ||
2211 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY }, | ||
2212 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY }, | ||
2213 | + { X86_VENDOR_CENTAUR, 5 }, | ||
2214 | + { X86_VENDOR_INTEL, 5 }, | ||
2215 | + { X86_VENDOR_NSC, 5 }, | ||
2216 | + { X86_VENDOR_ANY, 4 }, | ||
2217 | + {} | ||
2218 | +}; | ||
2219 | + | ||
2220 | +static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { | ||
2221 | + { X86_VENDOR_AMD }, | ||
2222 | + {} | ||
2223 | +}; | ||
2224 | + | ||
2225 | +static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) | ||
2226 | +{ | ||
2227 | + u64 ia32_cap = 0; | ||
2228 | + | ||
2229 | + if (x86_match_cpu(cpu_no_meltdown)) | ||
2230 | + return false; | ||
2231 | + | ||
2232 | + if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) | ||
2233 | + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); | ||
2234 | + | ||
2235 | + /* Rogue Data Cache Load? No! */ | ||
2236 | + if (ia32_cap & ARCH_CAP_RDCL_NO) | ||
2237 | + return false; | ||
2238 | + | ||
2239 | + return true; | ||
2240 | +} | ||
2241 | + | ||
2242 | /* | ||
2243 | * Do minimum CPU detection early. | ||
2244 | * Fields really needed: vendor, cpuid_level, family, model, mask, | ||
2245 | @@ -883,11 +942,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) | ||
2246 | |||
2247 | setup_force_cpu_cap(X86_FEATURE_ALWAYS); | ||
2248 | |||
2249 | - if (c->x86_vendor != X86_VENDOR_AMD) | ||
2250 | - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); | ||
2251 | - | ||
2252 | - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); | ||
2253 | - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); | ||
2254 | + if (!x86_match_cpu(cpu_no_speculation)) { | ||
2255 | + if (cpu_vulnerable_to_meltdown(c)) | ||
2256 | + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); | ||
2257 | + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); | ||
2258 | + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); | ||
2259 | + } | ||
2260 | |||
2261 | fpu__init_system(c); | ||
2262 | |||
2263 | diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c | ||
2264 | index fcd484d2bb03..4097b43cba2d 100644 | ||
2265 | --- a/arch/x86/kernel/cpu/intel.c | ||
2266 | +++ b/arch/x86/kernel/cpu/intel.c | ||
2267 | @@ -61,6 +61,59 @@ void check_mpx_erratum(struct cpuinfo_x86 *c) | ||
2268 | } | ||
2269 | } | ||
2270 | |||
2271 | +/* | ||
2272 | + * Early microcode releases for the Spectre v2 mitigation were broken. | ||
2273 | + * Information taken from; | ||
2274 | + * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf | ||
2275 | + * - https://kb.vmware.com/s/article/52345 | ||
2276 | + * - Microcode revisions observed in the wild | ||
2277 | + * - Release note from 20180108 microcode release | ||
2278 | + */ | ||
2279 | +struct sku_microcode { | ||
2280 | + u8 model; | ||
2281 | + u8 stepping; | ||
2282 | + u32 microcode; | ||
2283 | +}; | ||
2284 | +static const struct sku_microcode spectre_bad_microcodes[] = { | ||
2285 | + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 }, | ||
2286 | + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 }, | ||
2287 | + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 }, | ||
2288 | + { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 }, | ||
2289 | + { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 }, | ||
2290 | + { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, | ||
2291 | + { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, | ||
2292 | + { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 }, | ||
2293 | + { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, | ||
2294 | + { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, | ||
2295 | + { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, | ||
2296 | + { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, | ||
2297 | + { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 }, | ||
2298 | + { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, | ||
2299 | + { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 }, | ||
2300 | + { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 }, | ||
2301 | + { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 }, | ||
2302 | + { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, | ||
2303 | + { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, | ||
2304 | + { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, | ||
2305 | + /* Updated in the 20180108 release; blacklist until we know otherwise */ | ||
2306 | + { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 }, | ||
2307 | + /* Observed in the wild */ | ||
2308 | + { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, | ||
2309 | + { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, | ||
2310 | +}; | ||
2311 | + | ||
2312 | +static bool bad_spectre_microcode(struct cpuinfo_x86 *c) | ||
2313 | +{ | ||
2314 | + int i; | ||
2315 | + | ||
2316 | + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { | ||
2317 | + if (c->x86_model == spectre_bad_microcodes[i].model && | ||
2318 | + c->x86_mask == spectre_bad_microcodes[i].stepping) | ||
2319 | + return (c->microcode <= spectre_bad_microcodes[i].microcode); | ||
2320 | + } | ||
2321 | + return false; | ||
2322 | +} | ||
2323 | + | ||
2324 | static void early_init_intel(struct cpuinfo_x86 *c) | ||
2325 | { | ||
2326 | u64 misc_enable; | ||
2327 | @@ -87,6 +140,19 @@ static void early_init_intel(struct cpuinfo_x86 *c) | ||
2328 | rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); | ||
2329 | } | ||
2330 | |||
2331 | + /* Now if any of them are set, check the blacklist and clear the lot */ | ||
2332 | + if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || | ||
2333 | + cpu_has(c, X86_FEATURE_INTEL_STIBP) || | ||
2334 | + cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || | ||
2335 | + cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { | ||
2336 | + pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); | ||
2337 | + setup_clear_cpu_cap(X86_FEATURE_IBRS); | ||
2338 | + setup_clear_cpu_cap(X86_FEATURE_IBPB); | ||
2339 | + setup_clear_cpu_cap(X86_FEATURE_STIBP); | ||
2340 | + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); | ||
2341 | + setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); | ||
2342 | + } | ||
2343 | + | ||
2344 | /* | ||
2345 | * Atom erratum AAE44/AAF40/AAG38/AAH41: | ||
2346 | * | ||
2347 | diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c | ||
2348 | index 5ce5155f0695..0afaf00b029b 100644 | ||
2349 | --- a/arch/x86/kernel/cpu/microcode/core.c | ||
2350 | +++ b/arch/x86/kernel/cpu/microcode/core.c | ||
2351 | @@ -43,7 +43,7 @@ | ||
2352 | #define MICROCODE_VERSION "2.01" | ||
2353 | |||
2354 | static struct microcode_ops *microcode_ops; | ||
2355 | -static bool dis_ucode_ldr; | ||
2356 | +static bool dis_ucode_ldr = true; | ||
2357 | |||
2358 | /* | ||
2359 | * Synchronization. | ||
2360 | @@ -73,6 +73,7 @@ struct cpu_info_ctx { | ||
2361 | static bool __init check_loader_disabled_bsp(void) | ||
2362 | { | ||
2363 | static const char *__dis_opt_str = "dis_ucode_ldr"; | ||
2364 | + u32 a, b, c, d; | ||
2365 | |||
2366 | #ifdef CONFIG_X86_32 | ||
2367 | const char *cmdline = (const char *)__pa_nodebug(boot_command_line); | ||
2368 | @@ -85,8 +86,20 @@ static bool __init check_loader_disabled_bsp(void) | ||
2369 | bool *res = &dis_ucode_ldr; | ||
2370 | #endif | ||
2371 | |||
2372 | - if (cmdline_find_option_bool(cmdline, option)) | ||
2373 | - *res = true; | ||
2374 | + a = 1; | ||
2375 | + c = 0; | ||
2376 | + native_cpuid(&a, &b, &c, &d); | ||
2377 | + | ||
2378 | + /* | ||
2379 | + * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not | ||
2380 | + * completely accurate as xen pv guests don't see that CPUID bit set but | ||
2381 | + * that's good enough as they don't land on the BSP path anyway. | ||
2382 | + */ | ||
2383 | + if (c & BIT(31)) | ||
2384 | + return *res; | ||
2385 | + | ||
2386 | + if (cmdline_find_option_bool(cmdline, option) <= 0) | ||
2387 | + *res = false; | ||
2388 | |||
2389 | return *res; | ||
2390 | } | ||
2391 | @@ -114,9 +127,7 @@ void __init load_ucode_bsp(void) | ||
2392 | { | ||
2393 | int vendor; | ||
2394 | unsigned int family; | ||
2395 | - | ||
2396 | - if (check_loader_disabled_bsp()) | ||
2397 | - return; | ||
2398 | + bool intel = true; | ||
2399 | |||
2400 | if (!have_cpuid_p()) | ||
2401 | return; | ||
2402 | @@ -126,16 +137,27 @@ void __init load_ucode_bsp(void) | ||
2403 | |||
2404 | switch (vendor) { | ||
2405 | case X86_VENDOR_INTEL: | ||
2406 | - if (family >= 6) | ||
2407 | - load_ucode_intel_bsp(); | ||
2408 | + if (family < 6) | ||
2409 | + return; | ||
2410 | break; | ||
2411 | + | ||
2412 | case X86_VENDOR_AMD: | ||
2413 | - if (family >= 0x10) | ||
2414 | - load_ucode_amd_bsp(family); | ||
2415 | + if (family < 0x10) | ||
2416 | + return; | ||
2417 | + intel = false; | ||
2418 | break; | ||
2419 | + | ||
2420 | default: | ||
2421 | - break; | ||
2422 | + return; | ||
2423 | } | ||
2424 | + | ||
2425 | + if (check_loader_disabled_bsp()) | ||
2426 | + return; | ||
2427 | + | ||
2428 | + if (intel) | ||
2429 | + load_ucode_intel_bsp(); | ||
2430 | + else | ||
2431 | + load_ucode_amd_bsp(family); | ||
2432 | } | ||
2433 | |||
2434 | static bool check_loader_disabled_ap(void) | ||
2435 | @@ -154,9 +176,6 @@ void load_ucode_ap(void) | ||
2436 | if (check_loader_disabled_ap()) | ||
2437 | return; | ||
2438 | |||
2439 | - if (!have_cpuid_p()) | ||
2440 | - return; | ||
2441 | - | ||
2442 | vendor = x86_cpuid_vendor(); | ||
2443 | family = x86_cpuid_family(); | ||
2444 | |||
2445 | diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c | ||
2446 | index b0dd9aec183d..afbb52532791 100644 | ||
2447 | --- a/arch/x86/kernel/cpu/scattered.c | ||
2448 | +++ b/arch/x86/kernel/cpu/scattered.c | ||
2449 | @@ -31,8 +31,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) | ||
2450 | const struct cpuid_bit *cb; | ||
2451 | |||
2452 | static const struct cpuid_bit cpuid_bits[] = { | ||
2453 | - { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 }, | ||
2454 | - { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 }, | ||
2455 | { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, | ||
2456 | { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, | ||
2457 | { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, | ||
2458 | diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c | ||
2459 | index 0887d2ae3797..dffe81d3c261 100644 | ||
2460 | --- a/arch/x86/kernel/process_64.c | ||
2461 | +++ b/arch/x86/kernel/process_64.c | ||
2462 | @@ -538,7 +538,7 @@ void set_personality_ia32(bool x32) | ||
2463 | current->personality &= ~READ_IMPLIES_EXEC; | ||
2464 | /* in_compat_syscall() uses the presence of the x32 | ||
2465 | syscall bit flag to determine compat status */ | ||
2466 | - current->thread.status &= ~TS_COMPAT; | ||
2467 | + current_thread_info()->status &= ~TS_COMPAT; | ||
2468 | } else { | ||
2469 | set_thread_flag(TIF_IA32); | ||
2470 | clear_thread_flag(TIF_X32); | ||
2471 | @@ -546,7 +546,7 @@ void set_personality_ia32(bool x32) | ||
2472 | current->mm->context.ia32_compat = TIF_IA32; | ||
2473 | current->personality |= force_personality32; | ||
2474 | /* Prepare the first "return" to user space */ | ||
2475 | - current->thread.status |= TS_COMPAT; | ||
2476 | + current_thread_info()->status |= TS_COMPAT; | ||
2477 | } | ||
2478 | } | ||
2479 | EXPORT_SYMBOL_GPL(set_personality_ia32); | ||
2480 | diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c | ||
2481 | index 0e63c0267f99..e497d374412a 100644 | ||
2482 | --- a/arch/x86/kernel/ptrace.c | ||
2483 | +++ b/arch/x86/kernel/ptrace.c | ||
2484 | @@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) | ||
2485 | */ | ||
2486 | regs->orig_ax = value; | ||
2487 | if (syscall_get_nr(child, regs) >= 0) | ||
2488 | - child->thread.status |= TS_I386_REGS_POKED; | ||
2489 | + child->thread_info.status |= TS_I386_REGS_POKED; | ||
2490 | break; | ||
2491 | |||
2492 | case offsetof(struct user32, regs.eflags): | ||
2493 | diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c | ||
2494 | index 763af1d0de64..b1a5d252d482 100644 | ||
2495 | --- a/arch/x86/kernel/signal.c | ||
2496 | +++ b/arch/x86/kernel/signal.c | ||
2497 | @@ -785,7 +785,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) | ||
2498 | * than the tracee. | ||
2499 | */ | ||
2500 | #ifdef CONFIG_IA32_EMULATION | ||
2501 | - if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED)) | ||
2502 | + if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) | ||
2503 | return __NR_ia32_restart_syscall; | ||
2504 | #endif | ||
2505 | #ifdef CONFIG_X86_X32_ABI | ||
2506 | diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c | ||
2507 | index 8402907825b0..21454e254a4c 100644 | ||
2508 | --- a/arch/x86/kernel/tboot.c | ||
2509 | +++ b/arch/x86/kernel/tboot.c | ||
2510 | @@ -134,6 +134,16 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, | ||
2511 | return -1; | ||
2512 | set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); | ||
2513 | pte_unmap(pte); | ||
2514 | + | ||
2515 | + /* | ||
2516 | + * PTI poisons low addresses in the kernel page tables in the | ||
2517 | + * name of making them unusable for userspace. To execute | ||
2518 | + * code at such a low address, the poison must be cleared. | ||
2519 | + * | ||
2520 | + * Note: 'pgd' actually gets set in pud_alloc(). | ||
2521 | + */ | ||
2522 | + pgd->pgd &= ~_PAGE_NX; | ||
2523 | + | ||
2524 | return 0; | ||
2525 | } | ||
2526 | |||
2527 | diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c | ||
2528 | index 91af75e37306..93f924de06cf 100644 | ||
2529 | --- a/arch/x86/kvm/cpuid.c | ||
2530 | +++ b/arch/x86/kvm/cpuid.c | ||
2531 | @@ -355,6 +355,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
2532 | F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | | ||
2533 | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); | ||
2534 | |||
2535 | + /* cpuid 0x80000008.ebx */ | ||
2536 | + const u32 kvm_cpuid_8000_0008_ebx_x86_features = | ||
2537 | + F(IBPB) | F(IBRS); | ||
2538 | + | ||
2539 | /* cpuid 0xC0000001.edx */ | ||
2540 | const u32 kvm_cpuid_C000_0001_edx_x86_features = | ||
2541 | F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | | ||
2542 | @@ -376,6 +380,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
2543 | /* cpuid 7.0.ecx*/ | ||
2544 | const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/; | ||
2545 | |||
2546 | + /* cpuid 7.0.edx*/ | ||
2547 | + const u32 kvm_cpuid_7_0_edx_x86_features = | ||
2548 | + F(SPEC_CTRL) | F(ARCH_CAPABILITIES); | ||
2549 | + | ||
2550 | /* all calls to cpuid_count() should be made on the same cpu */ | ||
2551 | get_cpu(); | ||
2552 | |||
2553 | @@ -458,12 +466,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
2554 | /* PKU is not yet implemented for shadow paging. */ | ||
2555 | if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) | ||
2556 | entry->ecx &= ~F(PKU); | ||
2557 | + entry->edx &= kvm_cpuid_7_0_edx_x86_features; | ||
2558 | + cpuid_mask(&entry->edx, CPUID_7_EDX); | ||
2559 | } else { | ||
2560 | entry->ebx = 0; | ||
2561 | entry->ecx = 0; | ||
2562 | + entry->edx = 0; | ||
2563 | } | ||
2564 | entry->eax = 0; | ||
2565 | - entry->edx = 0; | ||
2566 | break; | ||
2567 | } | ||
2568 | case 9: | ||
2569 | @@ -607,7 +617,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
2570 | if (!g_phys_as) | ||
2571 | g_phys_as = phys_as; | ||
2572 | entry->eax = g_phys_as | (virt_as << 8); | ||
2573 | - entry->ebx = entry->edx = 0; | ||
2574 | + entry->edx = 0; | ||
2575 | + /* IBRS and IBPB aren't necessarily present in hardware cpuid */ | ||
2576 | + if (boot_cpu_has(X86_FEATURE_IBPB)) | ||
2577 | + entry->ebx |= F(IBPB); | ||
2578 | + if (boot_cpu_has(X86_FEATURE_IBRS)) | ||
2579 | + entry->ebx |= F(IBRS); | ||
2580 | + entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; | ||
2581 | + cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); | ||
2582 | break; | ||
2583 | } | ||
2584 | case 0x80000019: | ||
2585 | diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h | ||
2586 | index 9368fecca3ee..d1beb7156704 100644 | ||
2587 | --- a/arch/x86/kvm/cpuid.h | ||
2588 | +++ b/arch/x86/kvm/cpuid.h | ||
2589 | @@ -160,6 +160,37 @@ static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu) | ||
2590 | return best && (best->edx & bit(X86_FEATURE_RDTSCP)); | ||
2591 | } | ||
2592 | |||
2593 | +static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) | ||
2594 | +{ | ||
2595 | + struct kvm_cpuid_entry2 *best; | ||
2596 | + | ||
2597 | + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | ||
2598 | + if (best && (best->ebx & bit(X86_FEATURE_IBPB))) | ||
2599 | + return true; | ||
2600 | + best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
2601 | + return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); | ||
2602 | +} | ||
2603 | + | ||
2604 | +static inline bool guest_cpuid_has_ibrs(struct kvm_vcpu *vcpu) | ||
2605 | +{ | ||
2606 | + struct kvm_cpuid_entry2 *best; | ||
2607 | + | ||
2608 | + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | ||
2609 | + if (best && (best->ebx & bit(X86_FEATURE_IBRS))) | ||
2610 | + return true; | ||
2611 | + best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
2612 | + return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); | ||
2613 | +} | ||
2614 | + | ||
2615 | +static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) | ||
2616 | +{ | ||
2617 | + struct kvm_cpuid_entry2 *best; | ||
2618 | + | ||
2619 | + best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
2620 | + return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES)); | ||
2621 | +} | ||
2622 | + | ||
2623 | + | ||
2624 | /* | ||
2625 | * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 | ||
2626 | */ | ||
2627 | diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c | ||
2628 | index 6f5a3b076341..c8d573822e60 100644 | ||
2629 | --- a/arch/x86/kvm/emulate.c | ||
2630 | +++ b/arch/x86/kvm/emulate.c | ||
2631 | @@ -25,6 +25,7 @@ | ||
2632 | #include <asm/kvm_emulate.h> | ||
2633 | #include <linux/stringify.h> | ||
2634 | #include <asm/debugreg.h> | ||
2635 | +#include <asm/nospec-branch.h> | ||
2636 | |||
2637 | #include "x86.h" | ||
2638 | #include "tss.h" | ||
2639 | @@ -1012,8 +1013,8 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags) | ||
2640 | void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); | ||
2641 | |||
2642 | flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; | ||
2643 | - asm("push %[flags]; popf; call *%[fastop]" | ||
2644 | - : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags)); | ||
2645 | + asm("push %[flags]; popf; " CALL_NOSPEC | ||
2646 | + : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags)); | ||
2647 | return rc; | ||
2648 | } | ||
2649 | |||
2650 | @@ -5306,15 +5307,14 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, | ||
2651 | |||
2652 | static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) | ||
2653 | { | ||
2654 | - register void *__sp asm(_ASM_SP); | ||
2655 | ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; | ||
2656 | |||
2657 | if (!(ctxt->d & ByteOp)) | ||
2658 | fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; | ||
2659 | |||
2660 | - asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" | ||
2661 | + asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n" | ||
2662 | : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), | ||
2663 | - [fastop]"+S"(fop), "+r"(__sp) | ||
2664 | + [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT | ||
2665 | : "c"(ctxt->src2.val)); | ||
2666 | |||
2667 | ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); | ||
2668 | diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c | ||
2669 | index 24af898fb3a6..be644afab1bb 100644 | ||
2670 | --- a/arch/x86/kvm/svm.c | ||
2671 | +++ b/arch/x86/kvm/svm.c | ||
2672 | @@ -183,6 +183,8 @@ struct vcpu_svm { | ||
2673 | u64 gs_base; | ||
2674 | } host; | ||
2675 | |||
2676 | + u64 spec_ctrl; | ||
2677 | + | ||
2678 | u32 *msrpm; | ||
2679 | |||
2680 | ulong nmi_iret_rip; | ||
2681 | @@ -248,6 +250,8 @@ static const struct svm_direct_access_msrs { | ||
2682 | { .index = MSR_CSTAR, .always = true }, | ||
2683 | { .index = MSR_SYSCALL_MASK, .always = true }, | ||
2684 | #endif | ||
2685 | + { .index = MSR_IA32_SPEC_CTRL, .always = false }, | ||
2686 | + { .index = MSR_IA32_PRED_CMD, .always = false }, | ||
2687 | { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, | ||
2688 | { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, | ||
2689 | { .index = MSR_IA32_LASTINTFROMIP, .always = false }, | ||
2690 | @@ -510,6 +514,7 @@ struct svm_cpu_data { | ||
2691 | struct kvm_ldttss_desc *tss_desc; | ||
2692 | |||
2693 | struct page *save_area; | ||
2694 | + struct vmcb *current_vmcb; | ||
2695 | }; | ||
2696 | |||
2697 | static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); | ||
2698 | @@ -861,6 +866,25 @@ static bool valid_msr_intercept(u32 index) | ||
2699 | return false; | ||
2700 | } | ||
2701 | |||
2702 | +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr) | ||
2703 | +{ | ||
2704 | + u8 bit_write; | ||
2705 | + unsigned long tmp; | ||
2706 | + u32 offset; | ||
2707 | + u32 *msrpm; | ||
2708 | + | ||
2709 | + msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm: | ||
2710 | + to_svm(vcpu)->msrpm; | ||
2711 | + | ||
2712 | + offset = svm_msrpm_offset(msr); | ||
2713 | + bit_write = 2 * (msr & 0x0f) + 1; | ||
2714 | + tmp = msrpm[offset]; | ||
2715 | + | ||
2716 | + BUG_ON(offset == MSR_INVALID); | ||
2717 | + | ||
2718 | + return !!test_bit(bit_write, &tmp); | ||
2719 | +} | ||
2720 | + | ||
2721 | static void set_msr_interception(u32 *msrpm, unsigned msr, | ||
2722 | int read, int write) | ||
2723 | { | ||
2724 | @@ -1535,6 +1559,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | ||
2725 | u32 dummy; | ||
2726 | u32 eax = 1; | ||
2727 | |||
2728 | + svm->spec_ctrl = 0; | ||
2729 | + | ||
2730 | if (!init_event) { | ||
2731 | svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | | ||
2732 | MSR_IA32_APICBASE_ENABLE; | ||
2733 | @@ -1644,11 +1670,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) | ||
2734 | __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); | ||
2735 | kvm_vcpu_uninit(vcpu); | ||
2736 | kmem_cache_free(kvm_vcpu_cache, svm); | ||
2737 | + /* | ||
2738 | + * The vmcb page can be recycled, causing a false negative in | ||
2739 | + * svm_vcpu_load(). So do a full IBPB now. | ||
2740 | + */ | ||
2741 | + indirect_branch_prediction_barrier(); | ||
2742 | } | ||
2743 | |||
2744 | static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | ||
2745 | { | ||
2746 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2747 | + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); | ||
2748 | int i; | ||
2749 | |||
2750 | if (unlikely(cpu != vcpu->cpu)) { | ||
2751 | @@ -1677,6 +1709,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | ||
2752 | if (static_cpu_has(X86_FEATURE_RDTSCP)) | ||
2753 | wrmsrl(MSR_TSC_AUX, svm->tsc_aux); | ||
2754 | |||
2755 | + if (sd->current_vmcb != svm->vmcb) { | ||
2756 | + sd->current_vmcb = svm->vmcb; | ||
2757 | + indirect_branch_prediction_barrier(); | ||
2758 | + } | ||
2759 | avic_vcpu_load(vcpu, cpu); | ||
2760 | } | ||
2761 | |||
2762 | @@ -3508,6 +3544,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | ||
2763 | case MSR_VM_CR: | ||
2764 | msr_info->data = svm->nested.vm_cr_msr; | ||
2765 | break; | ||
2766 | + case MSR_IA32_SPEC_CTRL: | ||
2767 | + if (!msr_info->host_initiated && | ||
2768 | + !guest_cpuid_has_ibrs(vcpu)) | ||
2769 | + return 1; | ||
2770 | + | ||
2771 | + msr_info->data = svm->spec_ctrl; | ||
2772 | + break; | ||
2773 | case MSR_IA32_UCODE_REV: | ||
2774 | msr_info->data = 0x01000065; | ||
2775 | break; | ||
2776 | @@ -3599,6 +3642,49 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) | ||
2777 | case MSR_IA32_TSC: | ||
2778 | kvm_write_tsc(vcpu, msr); | ||
2779 | break; | ||
2780 | + case MSR_IA32_SPEC_CTRL: | ||
2781 | + if (!msr->host_initiated && | ||
2782 | + !guest_cpuid_has_ibrs(vcpu)) | ||
2783 | + return 1; | ||
2784 | + | ||
2785 | + /* The STIBP bit doesn't fault even if it's not advertised */ | ||
2786 | + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) | ||
2787 | + return 1; | ||
2788 | + | ||
2789 | + svm->spec_ctrl = data; | ||
2790 | + | ||
2791 | + if (!data) | ||
2792 | + break; | ||
2793 | + | ||
2794 | + /* | ||
2795 | + * For non-nested: | ||
2796 | + * When it's written (to non-zero) for the first time, pass | ||
2797 | + * it through. | ||
2798 | + * | ||
2799 | + * For nested: | ||
2800 | + * The handling of the MSR bitmap for L2 guests is done in | ||
2801 | + * nested_svm_vmrun_msrpm. | ||
2802 | + * We update the L1 MSR bit as well since it will end up | ||
2803 | + * touching the MSR anyway now. | ||
2804 | + */ | ||
2805 | + set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1); | ||
2806 | + break; | ||
2807 | + case MSR_IA32_PRED_CMD: | ||
2808 | + if (!msr->host_initiated && | ||
2809 | + !guest_cpuid_has_ibpb(vcpu)) | ||
2810 | + return 1; | ||
2811 | + | ||
2812 | + if (data & ~PRED_CMD_IBPB) | ||
2813 | + return 1; | ||
2814 | + | ||
2815 | + if (!data) | ||
2816 | + break; | ||
2817 | + | ||
2818 | + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); | ||
2819 | + if (is_guest_mode(vcpu)) | ||
2820 | + break; | ||
2821 | + set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); | ||
2822 | + break; | ||
2823 | case MSR_STAR: | ||
2824 | svm->vmcb->save.star = data; | ||
2825 | break; | ||
2826 | @@ -4826,6 +4912,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | ||
2827 | |||
2828 | local_irq_enable(); | ||
2829 | |||
2830 | + /* | ||
2831 | + * If this vCPU has touched SPEC_CTRL, restore the guest's value if | ||
2832 | + * it's non-zero. Since vmentry is serialising on affected CPUs, there | ||
2833 | + * is no need to worry about the conditional branch over the wrmsr | ||
2834 | + * being speculatively taken. | ||
2835 | + */ | ||
2836 | + if (svm->spec_ctrl) | ||
2837 | + wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); | ||
2838 | + | ||
2839 | asm volatile ( | ||
2840 | "push %%" _ASM_BP "; \n\t" | ||
2841 | "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" | ||
2842 | @@ -4918,6 +5013,27 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | ||
2843 | #endif | ||
2844 | ); | ||
2845 | |||
2846 | + /* | ||
2847 | + * We do not use IBRS in the kernel. If this vCPU has used the | ||
2848 | + * SPEC_CTRL MSR it may have left it on; save the value and | ||
2849 | + * turn it off. This is much more efficient than blindly adding | ||
2850 | + * it to the atomic save/restore list. Especially as the former | ||
2851 | + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. | ||
2852 | + * | ||
2853 | + * For non-nested case: | ||
2854 | + * If the L01 MSR bitmap does not intercept the MSR, then we need to | ||
2855 | + * save it. | ||
2856 | + * | ||
2857 | + * For nested case: | ||
2858 | + * If the L02 MSR bitmap does not intercept the MSR, then we need to | ||
2859 | + * save it. | ||
2860 | + */ | ||
2861 | + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) | ||
2862 | + rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); | ||
2863 | + | ||
2864 | + if (svm->spec_ctrl) | ||
2865 | + wrmsrl(MSR_IA32_SPEC_CTRL, 0); | ||
2866 | + | ||
2867 | /* Eliminate branch target predictions from guest mode */ | ||
2868 | vmexit_fill_RSB(); | ||
2869 | |||
2870 | diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c | ||
2871 | index 178a344f55f8..d49da86e3099 100644 | ||
2872 | --- a/arch/x86/kvm/vmx.c | ||
2873 | +++ b/arch/x86/kvm/vmx.c | ||
2874 | @@ -33,6 +33,7 @@ | ||
2875 | #include <linux/slab.h> | ||
2876 | #include <linux/tboot.h> | ||
2877 | #include <linux/hrtimer.h> | ||
2878 | +#include <linux/nospec.h> | ||
2879 | #include "kvm_cache_regs.h" | ||
2880 | #include "x86.h" | ||
2881 | |||
2882 | @@ -109,6 +110,14 @@ static u64 __read_mostly host_xss; | ||
2883 | static bool __read_mostly enable_pml = 1; | ||
2884 | module_param_named(pml, enable_pml, bool, S_IRUGO); | ||
2885 | |||
2886 | +#define MSR_TYPE_R 1 | ||
2887 | +#define MSR_TYPE_W 2 | ||
2888 | +#define MSR_TYPE_RW 3 | ||
2889 | + | ||
2890 | +#define MSR_BITMAP_MODE_X2APIC 1 | ||
2891 | +#define MSR_BITMAP_MODE_X2APIC_APICV 2 | ||
2892 | +#define MSR_BITMAP_MODE_LM 4 | ||
2893 | + | ||
2894 | #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL | ||
2895 | |||
2896 | /* Guest_tsc -> host_tsc conversion requires 64-bit division. */ | ||
2897 | @@ -173,7 +182,6 @@ module_param(ple_window_max, int, S_IRUGO); | ||
2898 | extern const ulong vmx_return; | ||
2899 | |||
2900 | #define NR_AUTOLOAD_MSRS 8 | ||
2901 | -#define VMCS02_POOL_SIZE 1 | ||
2902 | |||
2903 | struct vmcs { | ||
2904 | u32 revision_id; | ||
2905 | @@ -191,6 +199,7 @@ struct loaded_vmcs { | ||
2906 | struct vmcs *shadow_vmcs; | ||
2907 | int cpu; | ||
2908 | int launched; | ||
2909 | + unsigned long *msr_bitmap; | ||
2910 | struct list_head loaded_vmcss_on_cpu_link; | ||
2911 | }; | ||
2912 | |||
2913 | @@ -207,7 +216,7 @@ struct shared_msr_entry { | ||
2914 | * stored in guest memory specified by VMPTRLD, but is opaque to the guest, | ||
2915 | * which must access it using VMREAD/VMWRITE/VMCLEAR instructions. | ||
2916 | * More than one of these structures may exist, if L1 runs multiple L2 guests. | ||
2917 | - * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the | ||
2918 | + * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the | ||
2919 | * underlying hardware which will be used to run L2. | ||
2920 | * This structure is packed to ensure that its layout is identical across | ||
2921 | * machines (necessary for live migration). | ||
2922 | @@ -386,13 +395,6 @@ struct __packed vmcs12 { | ||
2923 | */ | ||
2924 | #define VMCS12_SIZE 0x1000 | ||
2925 | |||
2926 | -/* Used to remember the last vmcs02 used for some recently used vmcs12s */ | ||
2927 | -struct vmcs02_list { | ||
2928 | - struct list_head list; | ||
2929 | - gpa_t vmptr; | ||
2930 | - struct loaded_vmcs vmcs02; | ||
2931 | -}; | ||
2932 | - | ||
2933 | /* | ||
2934 | * The nested_vmx structure is part of vcpu_vmx, and holds information we need | ||
2935 | * for correct emulation of VMX (i.e., nested VMX) on this vcpu. | ||
2936 | @@ -419,15 +421,15 @@ struct nested_vmx { | ||
2937 | */ | ||
2938 | bool sync_shadow_vmcs; | ||
2939 | |||
2940 | - /* vmcs02_list cache of VMCSs recently used to run L2 guests */ | ||
2941 | - struct list_head vmcs02_pool; | ||
2942 | - int vmcs02_num; | ||
2943 | bool change_vmcs01_virtual_x2apic_mode; | ||
2944 | /* L2 must run next, and mustn't decide to exit to L1. */ | ||
2945 | bool nested_run_pending; | ||
2946 | + | ||
2947 | + struct loaded_vmcs vmcs02; | ||
2948 | + | ||
2949 | /* | ||
2950 | - * Guest pages referred to in vmcs02 with host-physical pointers, so | ||
2951 | - * we must keep them pinned while L2 runs. | ||
2952 | + * Guest pages referred to in the vmcs02 with host-physical | ||
2953 | + * pointers, so we must keep them pinned while L2 runs. | ||
2954 | */ | ||
2955 | struct page *apic_access_page; | ||
2956 | struct page *virtual_apic_page; | ||
2957 | @@ -436,8 +438,6 @@ struct nested_vmx { | ||
2958 | bool pi_pending; | ||
2959 | u16 posted_intr_nv; | ||
2960 | |||
2961 | - unsigned long *msr_bitmap; | ||
2962 | - | ||
2963 | struct hrtimer preemption_timer; | ||
2964 | bool preemption_timer_expired; | ||
2965 | |||
2966 | @@ -538,6 +538,7 @@ struct vcpu_vmx { | ||
2967 | unsigned long host_rsp; | ||
2968 | u8 fail; | ||
2969 | bool nmi_known_unmasked; | ||
2970 | + u8 msr_bitmap_mode; | ||
2971 | u32 exit_intr_info; | ||
2972 | u32 idt_vectoring_info; | ||
2973 | ulong rflags; | ||
2974 | @@ -549,6 +550,10 @@ struct vcpu_vmx { | ||
2975 | u64 msr_host_kernel_gs_base; | ||
2976 | u64 msr_guest_kernel_gs_base; | ||
2977 | #endif | ||
2978 | + | ||
2979 | + u64 arch_capabilities; | ||
2980 | + u64 spec_ctrl; | ||
2981 | + | ||
2982 | u32 vm_entry_controls_shadow; | ||
2983 | u32 vm_exit_controls_shadow; | ||
2984 | /* | ||
2985 | @@ -856,21 +861,18 @@ static const unsigned short vmcs_field_to_offset_table[] = { | ||
2986 | |||
2987 | static inline short vmcs_field_to_offset(unsigned long field) | ||
2988 | { | ||
2989 | - BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX); | ||
2990 | + const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table); | ||
2991 | + unsigned short offset; | ||
2992 | |||
2993 | - if (field >= ARRAY_SIZE(vmcs_field_to_offset_table)) | ||
2994 | + BUILD_BUG_ON(size > SHRT_MAX); | ||
2995 | + if (field >= size) | ||
2996 | return -ENOENT; | ||
2997 | |||
2998 | - /* | ||
2999 | - * FIXME: Mitigation for CVE-2017-5753. To be replaced with a | ||
3000 | - * generic mechanism. | ||
3001 | - */ | ||
3002 | - asm("lfence"); | ||
3003 | - | ||
3004 | - if (vmcs_field_to_offset_table[field] == 0) | ||
3005 | + field = array_index_nospec(field, size); | ||
3006 | + offset = vmcs_field_to_offset_table[field]; | ||
3007 | + if (offset == 0) | ||
3008 | return -ENOENT; | ||
3009 | - | ||
3010 | - return vmcs_field_to_offset_table[field]; | ||
3011 | + return offset; | ||
3012 | } | ||
3013 | |||
3014 | static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) | ||
3015 | @@ -912,6 +914,9 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); | ||
3016 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); | ||
3017 | static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); | ||
3018 | static int alloc_identity_pagetable(struct kvm *kvm); | ||
3019 | +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); | ||
3020 | +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, | ||
3021 | + u32 msr, int type); | ||
3022 | |||
3023 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | ||
3024 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | ||
3025 | @@ -931,12 +936,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); | ||
3026 | |||
3027 | static unsigned long *vmx_io_bitmap_a; | ||
3028 | static unsigned long *vmx_io_bitmap_b; | ||
3029 | -static unsigned long *vmx_msr_bitmap_legacy; | ||
3030 | -static unsigned long *vmx_msr_bitmap_longmode; | ||
3031 | -static unsigned long *vmx_msr_bitmap_legacy_x2apic; | ||
3032 | -static unsigned long *vmx_msr_bitmap_longmode_x2apic; | ||
3033 | -static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive; | ||
3034 | -static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive; | ||
3035 | static unsigned long *vmx_vmread_bitmap; | ||
3036 | static unsigned long *vmx_vmwrite_bitmap; | ||
3037 | |||
3038 | @@ -1853,6 +1852,52 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | ||
3039 | vmcs_write32(EXCEPTION_BITMAP, eb); | ||
3040 | } | ||
3041 | |||
3042 | +/* | ||
3043 | + * Check if MSR is intercepted for currently loaded MSR bitmap. | ||
3044 | + */ | ||
3045 | +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) | ||
3046 | +{ | ||
3047 | + unsigned long *msr_bitmap; | ||
3048 | + int f = sizeof(unsigned long); | ||
3049 | + | ||
3050 | + if (!cpu_has_vmx_msr_bitmap()) | ||
3051 | + return true; | ||
3052 | + | ||
3053 | + msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; | ||
3054 | + | ||
3055 | + if (msr <= 0x1fff) { | ||
3056 | + return !!test_bit(msr, msr_bitmap + 0x800 / f); | ||
3057 | + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { | ||
3058 | + msr &= 0x1fff; | ||
3059 | + return !!test_bit(msr, msr_bitmap + 0xc00 / f); | ||
3060 | + } | ||
3061 | + | ||
3062 | + return true; | ||
3063 | +} | ||
3064 | + | ||
3065 | +/* | ||
3066 | + * Check if MSR is intercepted for L01 MSR bitmap. | ||
3067 | + */ | ||
3068 | +static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) | ||
3069 | +{ | ||
3070 | + unsigned long *msr_bitmap; | ||
3071 | + int f = sizeof(unsigned long); | ||
3072 | + | ||
3073 | + if (!cpu_has_vmx_msr_bitmap()) | ||
3074 | + return true; | ||
3075 | + | ||
3076 | + msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; | ||
3077 | + | ||
3078 | + if (msr <= 0x1fff) { | ||
3079 | + return !!test_bit(msr, msr_bitmap + 0x800 / f); | ||
3080 | + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { | ||
3081 | + msr &= 0x1fff; | ||
3082 | + return !!test_bit(msr, msr_bitmap + 0xc00 / f); | ||
3083 | + } | ||
3084 | + | ||
3085 | + return true; | ||
3086 | +} | ||
3087 | + | ||
3088 | static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, | ||
3089 | unsigned long entry, unsigned long exit) | ||
3090 | { | ||
3091 | @@ -2262,6 +2307,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | ||
3092 | if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { | ||
3093 | per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; | ||
3094 | vmcs_load(vmx->loaded_vmcs->vmcs); | ||
3095 | + indirect_branch_prediction_barrier(); | ||
3096 | } | ||
3097 | |||
3098 | if (!already_loaded) { | ||
3099 | @@ -2530,36 +2576,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | ||
3100 | vmx->guest_msrs[from] = tmp; | ||
3101 | } | ||
3102 | |||
3103 | -static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) | ||
3104 | -{ | ||
3105 | - unsigned long *msr_bitmap; | ||
3106 | - | ||
3107 | - if (is_guest_mode(vcpu)) | ||
3108 | - msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; | ||
3109 | - else if (cpu_has_secondary_exec_ctrls() && | ||
3110 | - (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & | ||
3111 | - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { | ||
3112 | - if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) { | ||
3113 | - if (is_long_mode(vcpu)) | ||
3114 | - msr_bitmap = vmx_msr_bitmap_longmode_x2apic; | ||
3115 | - else | ||
3116 | - msr_bitmap = vmx_msr_bitmap_legacy_x2apic; | ||
3117 | - } else { | ||
3118 | - if (is_long_mode(vcpu)) | ||
3119 | - msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv_inactive; | ||
3120 | - else | ||
3121 | - msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv_inactive; | ||
3122 | - } | ||
3123 | - } else { | ||
3124 | - if (is_long_mode(vcpu)) | ||
3125 | - msr_bitmap = vmx_msr_bitmap_longmode; | ||
3126 | - else | ||
3127 | - msr_bitmap = vmx_msr_bitmap_legacy; | ||
3128 | - } | ||
3129 | - | ||
3130 | - vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); | ||
3131 | -} | ||
3132 | - | ||
3133 | /* | ||
3134 | * Set up the vmcs to automatically save and restore system | ||
3135 | * msrs. Don't touch the 64-bit msrs if the guest is in legacy | ||
3136 | @@ -2600,7 +2616,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) | ||
3137 | vmx->save_nmsrs = save_nmsrs; | ||
3138 | |||
3139 | if (cpu_has_vmx_msr_bitmap()) | ||
3140 | - vmx_set_msr_bitmap(&vmx->vcpu); | ||
3141 | + vmx_update_msr_bitmap(&vmx->vcpu); | ||
3142 | } | ||
3143 | |||
3144 | /* | ||
3145 | @@ -2989,6 +3005,19 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | ||
3146 | case MSR_IA32_TSC: | ||
3147 | msr_info->data = guest_read_tsc(vcpu); | ||
3148 | break; | ||
3149 | + case MSR_IA32_SPEC_CTRL: | ||
3150 | + if (!msr_info->host_initiated && | ||
3151 | + !guest_cpuid_has_ibrs(vcpu)) | ||
3152 | + return 1; | ||
3153 | + | ||
3154 | + msr_info->data = to_vmx(vcpu)->spec_ctrl; | ||
3155 | + break; | ||
3156 | + case MSR_IA32_ARCH_CAPABILITIES: | ||
3157 | + if (!msr_info->host_initiated && | ||
3158 | + !guest_cpuid_has_arch_capabilities(vcpu)) | ||
3159 | + return 1; | ||
3160 | + msr_info->data = to_vmx(vcpu)->arch_capabilities; | ||
3161 | + break; | ||
3162 | case MSR_IA32_SYSENTER_CS: | ||
3163 | msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); | ||
3164 | break; | ||
3165 | @@ -3093,6 +3122,68 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | ||
3166 | case MSR_IA32_TSC: | ||
3167 | kvm_write_tsc(vcpu, msr_info); | ||
3168 | break; | ||
3169 | + case MSR_IA32_SPEC_CTRL: | ||
3170 | + if (!msr_info->host_initiated && | ||
3171 | + !guest_cpuid_has_ibrs(vcpu)) | ||
3172 | + return 1; | ||
3173 | + | ||
3174 | + /* The STIBP bit doesn't fault even if it's not advertised */ | ||
3175 | + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) | ||
3176 | + return 1; | ||
3177 | + | ||
3178 | + vmx->spec_ctrl = data; | ||
3179 | + | ||
3180 | + if (!data) | ||
3181 | + break; | ||
3182 | + | ||
3183 | + /* | ||
3184 | + * For non-nested: | ||
3185 | + * When it's written (to non-zero) for the first time, pass | ||
3186 | + * it through. | ||
3187 | + * | ||
3188 | + * For nested: | ||
3189 | + * The handling of the MSR bitmap for L2 guests is done in | ||
3190 | + * nested_vmx_merge_msr_bitmap. We should not touch the | ||
3191 | + * vmcs02.msr_bitmap here since it gets completely overwritten | ||
3192 | + * in the merging. We update the vmcs01 here for L1 as well | ||
3193 | + * since it will end up touching the MSR anyway now. | ||
3194 | + */ | ||
3195 | + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, | ||
3196 | + MSR_IA32_SPEC_CTRL, | ||
3197 | + MSR_TYPE_RW); | ||
3198 | + break; | ||
3199 | + case MSR_IA32_PRED_CMD: | ||
3200 | + if (!msr_info->host_initiated && | ||
3201 | + !guest_cpuid_has_ibpb(vcpu)) | ||
3202 | + return 1; | ||
3203 | + | ||
3204 | + if (data & ~PRED_CMD_IBPB) | ||
3205 | + return 1; | ||
3206 | + | ||
3207 | + if (!data) | ||
3208 | + break; | ||
3209 | + | ||
3210 | + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB); | ||
3211 | + | ||
3212 | + /* | ||
3213 | + * For non-nested: | ||
3214 | + * When it's written (to non-zero) for the first time, pass | ||
3215 | + * it through. | ||
3216 | + * | ||
3217 | + * For nested: | ||
3218 | + * The handling of the MSR bitmap for L2 guests is done in | ||
3219 | + * nested_vmx_merge_msr_bitmap. We should not touch the | ||
3220 | + * vmcs02.msr_bitmap here since it gets completely overwritten | ||
3221 | + * in the merging. | ||
3222 | + */ | ||
3223 | + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, | ||
3224 | + MSR_TYPE_W); | ||
3225 | + break; | ||
3226 | + case MSR_IA32_ARCH_CAPABILITIES: | ||
3227 | + if (!msr_info->host_initiated) | ||
3228 | + return 1; | ||
3229 | + vmx->arch_capabilities = data; | ||
3230 | + break; | ||
3231 | case MSR_IA32_CR_PAT: | ||
3232 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { | ||
3233 | if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) | ||
3234 | @@ -3532,11 +3623,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) | ||
3235 | return vmcs; | ||
3236 | } | ||
3237 | |||
3238 | -static struct vmcs *alloc_vmcs(void) | ||
3239 | -{ | ||
3240 | - return alloc_vmcs_cpu(raw_smp_processor_id()); | ||
3241 | -} | ||
3242 | - | ||
3243 | static void free_vmcs(struct vmcs *vmcs) | ||
3244 | { | ||
3245 | free_pages((unsigned long)vmcs, vmcs_config.order); | ||
3246 | @@ -3552,9 +3638,38 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) | ||
3247 | loaded_vmcs_clear(loaded_vmcs); | ||
3248 | free_vmcs(loaded_vmcs->vmcs); | ||
3249 | loaded_vmcs->vmcs = NULL; | ||
3250 | + if (loaded_vmcs->msr_bitmap) | ||
3251 | + free_page((unsigned long)loaded_vmcs->msr_bitmap); | ||
3252 | WARN_ON(loaded_vmcs->shadow_vmcs != NULL); | ||
3253 | } | ||
3254 | |||
3255 | +static struct vmcs *alloc_vmcs(void) | ||
3256 | +{ | ||
3257 | + return alloc_vmcs_cpu(raw_smp_processor_id()); | ||
3258 | +} | ||
3259 | + | ||
3260 | +static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) | ||
3261 | +{ | ||
3262 | + loaded_vmcs->vmcs = alloc_vmcs(); | ||
3263 | + if (!loaded_vmcs->vmcs) | ||
3264 | + return -ENOMEM; | ||
3265 | + | ||
3266 | + loaded_vmcs->shadow_vmcs = NULL; | ||
3267 | + loaded_vmcs_init(loaded_vmcs); | ||
3268 | + | ||
3269 | + if (cpu_has_vmx_msr_bitmap()) { | ||
3270 | + loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
3271 | + if (!loaded_vmcs->msr_bitmap) | ||
3272 | + goto out_vmcs; | ||
3273 | + memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); | ||
3274 | + } | ||
3275 | + return 0; | ||
3276 | + | ||
3277 | +out_vmcs: | ||
3278 | + free_loaded_vmcs(loaded_vmcs); | ||
3279 | + return -ENOMEM; | ||
3280 | +} | ||
3281 | + | ||
3282 | static void free_kvm_area(void) | ||
3283 | { | ||
3284 | int cpu; | ||
3285 | @@ -4561,10 +4676,8 @@ static void free_vpid(int vpid) | ||
3286 | spin_unlock(&vmx_vpid_lock); | ||
3287 | } | ||
3288 | |||
3289 | -#define MSR_TYPE_R 1 | ||
3290 | -#define MSR_TYPE_W 2 | ||
3291 | -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, | ||
3292 | - u32 msr, int type) | ||
3293 | +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, | ||
3294 | + u32 msr, int type) | ||
3295 | { | ||
3296 | int f = sizeof(unsigned long); | ||
3297 | |||
3298 | @@ -4598,8 +4711,8 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, | ||
3299 | } | ||
3300 | } | ||
3301 | |||
3302 | -static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, | ||
3303 | - u32 msr, int type) | ||
3304 | +static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, | ||
3305 | + u32 msr, int type) | ||
3306 | { | ||
3307 | int f = sizeof(unsigned long); | ||
3308 | |||
3309 | @@ -4633,6 +4746,15 @@ static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, | ||
3310 | } | ||
3311 | } | ||
3312 | |||
3313 | +static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, | ||
3314 | + u32 msr, int type, bool value) | ||
3315 | +{ | ||
3316 | + if (value) | ||
3317 | + vmx_enable_intercept_for_msr(msr_bitmap, msr, type); | ||
3318 | + else | ||
3319 | + vmx_disable_intercept_for_msr(msr_bitmap, msr, type); | ||
3320 | +} | ||
3321 | + | ||
3322 | /* | ||
3323 | * If a msr is allowed by L0, we should check whether it is allowed by L1. | ||
3324 | * The corresponding bit will be cleared unless both of L0 and L1 allow it. | ||
3325 | @@ -4679,58 +4801,68 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, | ||
3326 | } | ||
3327 | } | ||
3328 | |||
3329 | -static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) | ||
3330 | +static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) | ||
3331 | { | ||
3332 | - if (!longmode_only) | ||
3333 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, | ||
3334 | - msr, MSR_TYPE_R | MSR_TYPE_W); | ||
3335 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, | ||
3336 | - msr, MSR_TYPE_R | MSR_TYPE_W); | ||
3337 | -} | ||
3338 | + u8 mode = 0; | ||
3339 | |||
3340 | -static void vmx_enable_intercept_msr_read_x2apic(u32 msr, bool apicv_active) | ||
3341 | -{ | ||
3342 | - if (apicv_active) { | ||
3343 | - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, | ||
3344 | - msr, MSR_TYPE_R); | ||
3345 | - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, | ||
3346 | - msr, MSR_TYPE_R); | ||
3347 | - } else { | ||
3348 | - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, | ||
3349 | - msr, MSR_TYPE_R); | ||
3350 | - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, | ||
3351 | - msr, MSR_TYPE_R); | ||
3352 | + if (cpu_has_secondary_exec_ctrls() && | ||
3353 | + (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & | ||
3354 | + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { | ||
3355 | + mode |= MSR_BITMAP_MODE_X2APIC; | ||
3356 | + if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) | ||
3357 | + mode |= MSR_BITMAP_MODE_X2APIC_APICV; | ||
3358 | } | ||
3359 | + | ||
3360 | + if (is_long_mode(vcpu)) | ||
3361 | + mode |= MSR_BITMAP_MODE_LM; | ||
3362 | + | ||
3363 | + return mode; | ||
3364 | } | ||
3365 | |||
3366 | -static void vmx_disable_intercept_msr_read_x2apic(u32 msr, bool apicv_active) | ||
3367 | +#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) | ||
3368 | + | ||
3369 | +static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap, | ||
3370 | + u8 mode) | ||
3371 | { | ||
3372 | - if (apicv_active) { | ||
3373 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, | ||
3374 | - msr, MSR_TYPE_R); | ||
3375 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, | ||
3376 | - msr, MSR_TYPE_R); | ||
3377 | - } else { | ||
3378 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, | ||
3379 | - msr, MSR_TYPE_R); | ||
3380 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, | ||
3381 | - msr, MSR_TYPE_R); | ||
3382 | + int msr; | ||
3383 | + | ||
3384 | + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) { | ||
3385 | + unsigned word = msr / BITS_PER_LONG; | ||
3386 | + msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0; | ||
3387 | + msr_bitmap[word + (0x800 / sizeof(long))] = ~0; | ||
3388 | + } | ||
3389 | + | ||
3390 | + if (mode & MSR_BITMAP_MODE_X2APIC) { | ||
3391 | + /* | ||
3392 | + * TPR reads and writes can be virtualized even if virtual interrupt | ||
3393 | + * delivery is not in use. | ||
3394 | + */ | ||
3395 | + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW); | ||
3396 | + if (mode & MSR_BITMAP_MODE_X2APIC_APICV) { | ||
3397 | + vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R); | ||
3398 | + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); | ||
3399 | + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); | ||
3400 | + } | ||
3401 | } | ||
3402 | } | ||
3403 | |||
3404 | -static void vmx_disable_intercept_msr_write_x2apic(u32 msr, bool apicv_active) | ||
3405 | +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) | ||
3406 | { | ||
3407 | - if (apicv_active) { | ||
3408 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, | ||
3409 | - msr, MSR_TYPE_W); | ||
3410 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, | ||
3411 | - msr, MSR_TYPE_W); | ||
3412 | - } else { | ||
3413 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, | ||
3414 | - msr, MSR_TYPE_W); | ||
3415 | - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, | ||
3416 | - msr, MSR_TYPE_W); | ||
3417 | - } | ||
3418 | + struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
3419 | + unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; | ||
3420 | + u8 mode = vmx_msr_bitmap_mode(vcpu); | ||
3421 | + u8 changed = mode ^ vmx->msr_bitmap_mode; | ||
3422 | + | ||
3423 | + if (!changed) | ||
3424 | + return; | ||
3425 | + | ||
3426 | + vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, | ||
3427 | + !(mode & MSR_BITMAP_MODE_LM)); | ||
3428 | + | ||
3429 | + if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) | ||
3430 | + vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); | ||
3431 | + | ||
3432 | + vmx->msr_bitmap_mode = mode; | ||
3433 | } | ||
3434 | |||
3435 | static bool vmx_get_enable_apicv(void) | ||
3436 | @@ -4738,30 +4870,45 @@ static bool vmx_get_enable_apicv(void) | ||
3437 | return enable_apicv; | ||
3438 | } | ||
3439 | |||
3440 | -static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) | ||
3441 | +static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu) | ||
3442 | +{ | ||
3443 | + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | ||
3444 | + gfn_t gfn; | ||
3445 | + | ||
3446 | + /* | ||
3447 | + * Don't need to mark the APIC access page dirty; it is never | ||
3448 | + * written to by the CPU during APIC virtualization. | ||
3449 | + */ | ||
3450 | + | ||
3451 | + if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { | ||
3452 | + gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT; | ||
3453 | + kvm_vcpu_mark_page_dirty(vcpu, gfn); | ||
3454 | + } | ||
3455 | + | ||
3456 | + if (nested_cpu_has_posted_intr(vmcs12)) { | ||
3457 | + gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT; | ||
3458 | + kvm_vcpu_mark_page_dirty(vcpu, gfn); | ||
3459 | + } | ||
3460 | +} | ||
3461 | + | ||
3462 | + | ||
3463 | +static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) | ||
3464 | { | ||
3465 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
3466 | int max_irr; | ||
3467 | void *vapic_page; | ||
3468 | u16 status; | ||
3469 | |||
3470 | - if (vmx->nested.pi_desc && | ||
3471 | - vmx->nested.pi_pending) { | ||
3472 | - vmx->nested.pi_pending = false; | ||
3473 | - if (!pi_test_and_clear_on(vmx->nested.pi_desc)) | ||
3474 | - return 0; | ||
3475 | - | ||
3476 | - max_irr = find_last_bit( | ||
3477 | - (unsigned long *)vmx->nested.pi_desc->pir, 256); | ||
3478 | + if (!vmx->nested.pi_desc || !vmx->nested.pi_pending) | ||
3479 | + return; | ||
3480 | |||
3481 | - if (max_irr == 256) | ||
3482 | - return 0; | ||
3483 | + vmx->nested.pi_pending = false; | ||
3484 | + if (!pi_test_and_clear_on(vmx->nested.pi_desc)) | ||
3485 | + return; | ||
3486 | |||
3487 | + max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256); | ||
3488 | + if (max_irr != 256) { | ||
3489 | vapic_page = kmap(vmx->nested.virtual_apic_page); | ||
3490 | - if (!vapic_page) { | ||
3491 | - WARN_ON(1); | ||
3492 | - return -ENOMEM; | ||
3493 | - } | ||
3494 | __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); | ||
3495 | kunmap(vmx->nested.virtual_apic_page); | ||
3496 | |||
3497 | @@ -4772,7 +4919,8 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) | ||
3498 | vmcs_write16(GUEST_INTR_STATUS, status); | ||
3499 | } | ||
3500 | } | ||
3501 | - return 0; | ||
3502 | + | ||
3503 | + nested_mark_vmcs12_pages_dirty(vcpu); | ||
3504 | } | ||
3505 | |||
3506 | static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) | ||
3507 | @@ -4959,7 +5107,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) | ||
3508 | } | ||
3509 | |||
3510 | if (cpu_has_vmx_msr_bitmap()) | ||
3511 | - vmx_set_msr_bitmap(vcpu); | ||
3512 | + vmx_update_msr_bitmap(vcpu); | ||
3513 | } | ||
3514 | |||
3515 | static u32 vmx_exec_control(struct vcpu_vmx *vmx) | ||
3516 | @@ -5048,7 +5196,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | ||
3517 | vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); | ||
3518 | } | ||
3519 | if (cpu_has_vmx_msr_bitmap()) | ||
3520 | - vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); | ||
3521 | + vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); | ||
3522 | |||
3523 | vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ | ||
3524 | |||
3525 | @@ -5122,6 +5270,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | ||
3526 | ++vmx->nmsrs; | ||
3527 | } | ||
3528 | |||
3529 | + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) | ||
3530 | + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); | ||
3531 | |||
3532 | vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); | ||
3533 | |||
3534 | @@ -5150,6 +5300,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | ||
3535 | u64 cr0; | ||
3536 | |||
3537 | vmx->rmode.vm86_active = 0; | ||
3538 | + vmx->spec_ctrl = 0; | ||
3539 | |||
3540 | vmx->soft_vnmi_blocked = 0; | ||
3541 | |||
3542 | @@ -6379,7 +6530,7 @@ static void wakeup_handler(void) | ||
3543 | |||
3544 | static __init int hardware_setup(void) | ||
3545 | { | ||
3546 | - int r = -ENOMEM, i, msr; | ||
3547 | + int r = -ENOMEM, i; | ||
3548 | |||
3549 | rdmsrl_safe(MSR_EFER, &host_efer); | ||
3550 | |||
3551 | @@ -6394,41 +6545,13 @@ static __init int hardware_setup(void) | ||
3552 | if (!vmx_io_bitmap_b) | ||
3553 | goto out; | ||
3554 | |||
3555 | - vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
3556 | - if (!vmx_msr_bitmap_legacy) | ||
3557 | - goto out1; | ||
3558 | - | ||
3559 | - vmx_msr_bitmap_legacy_x2apic = | ||
3560 | - (unsigned long *)__get_free_page(GFP_KERNEL); | ||
3561 | - if (!vmx_msr_bitmap_legacy_x2apic) | ||
3562 | - goto out2; | ||
3563 | - | ||
3564 | - vmx_msr_bitmap_legacy_x2apic_apicv_inactive = | ||
3565 | - (unsigned long *)__get_free_page(GFP_KERNEL); | ||
3566 | - if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive) | ||
3567 | - goto out3; | ||
3568 | - | ||
3569 | - vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
3570 | - if (!vmx_msr_bitmap_longmode) | ||
3571 | - goto out4; | ||
3572 | - | ||
3573 | - vmx_msr_bitmap_longmode_x2apic = | ||
3574 | - (unsigned long *)__get_free_page(GFP_KERNEL); | ||
3575 | - if (!vmx_msr_bitmap_longmode_x2apic) | ||
3576 | - goto out5; | ||
3577 | - | ||
3578 | - vmx_msr_bitmap_longmode_x2apic_apicv_inactive = | ||
3579 | - (unsigned long *)__get_free_page(GFP_KERNEL); | ||
3580 | - if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive) | ||
3581 | - goto out6; | ||
3582 | - | ||
3583 | vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
3584 | if (!vmx_vmread_bitmap) | ||
3585 | - goto out7; | ||
3586 | + goto out1; | ||
3587 | |||
3588 | vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
3589 | if (!vmx_vmwrite_bitmap) | ||
3590 | - goto out8; | ||
3591 | + goto out2; | ||
3592 | |||
3593 | memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE); | ||
3594 | memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); | ||
3595 | @@ -6437,12 +6560,9 @@ static __init int hardware_setup(void) | ||
3596 | |||
3597 | memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); | ||
3598 | |||
3599 | - memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); | ||
3600 | - memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); | ||
3601 | - | ||
3602 | if (setup_vmcs_config(&vmcs_config) < 0) { | ||
3603 | r = -EIO; | ||
3604 | - goto out9; | ||
3605 | + goto out3; | ||
3606 | } | ||
3607 | |||
3608 | if (boot_cpu_has(X86_FEATURE_NX)) | ||
3609 | @@ -6499,47 +6619,8 @@ static __init int hardware_setup(void) | ||
3610 | kvm_tsc_scaling_ratio_frac_bits = 48; | ||
3611 | } | ||
3612 | |||
3613 | - vmx_disable_intercept_for_msr(MSR_FS_BASE, false); | ||
3614 | - vmx_disable_intercept_for_msr(MSR_GS_BASE, false); | ||
3615 | - vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); | ||
3616 | - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); | ||
3617 | - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); | ||
3618 | - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); | ||
3619 | - | ||
3620 | - memcpy(vmx_msr_bitmap_legacy_x2apic, | ||
3621 | - vmx_msr_bitmap_legacy, PAGE_SIZE); | ||
3622 | - memcpy(vmx_msr_bitmap_longmode_x2apic, | ||
3623 | - vmx_msr_bitmap_longmode, PAGE_SIZE); | ||
3624 | - memcpy(vmx_msr_bitmap_legacy_x2apic_apicv_inactive, | ||
3625 | - vmx_msr_bitmap_legacy, PAGE_SIZE); | ||
3626 | - memcpy(vmx_msr_bitmap_longmode_x2apic_apicv_inactive, | ||
3627 | - vmx_msr_bitmap_longmode, PAGE_SIZE); | ||
3628 | - | ||
3629 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ | ||
3630 | |||
3631 | - /* | ||
3632 | - * enable_apicv && kvm_vcpu_apicv_active() | ||
3633 | - */ | ||
3634 | - for (msr = 0x800; msr <= 0x8ff; msr++) | ||
3635 | - vmx_disable_intercept_msr_read_x2apic(msr, true); | ||
3636 | - | ||
3637 | - /* TMCCT */ | ||
3638 | - vmx_enable_intercept_msr_read_x2apic(0x839, true); | ||
3639 | - /* TPR */ | ||
3640 | - vmx_disable_intercept_msr_write_x2apic(0x808, true); | ||
3641 | - /* EOI */ | ||
3642 | - vmx_disable_intercept_msr_write_x2apic(0x80b, true); | ||
3643 | - /* SELF-IPI */ | ||
3644 | - vmx_disable_intercept_msr_write_x2apic(0x83f, true); | ||
3645 | - | ||
3646 | - /* | ||
3647 | - * (enable_apicv && !kvm_vcpu_apicv_active()) || | ||
3648 | - * !enable_apicv | ||
3649 | - */ | ||
3650 | - /* TPR */ | ||
3651 | - vmx_disable_intercept_msr_read_x2apic(0x808, false); | ||
3652 | - vmx_disable_intercept_msr_write_x2apic(0x808, false); | ||
3653 | - | ||
3654 | if (enable_ept) { | ||
3655 | kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, | ||
3656 | (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, | ||
3657 | @@ -6585,22 +6666,10 @@ static __init int hardware_setup(void) | ||
3658 | |||
3659 | return alloc_kvm_area(); | ||
3660 | |||
3661 | -out9: | ||
3662 | - free_page((unsigned long)vmx_vmwrite_bitmap); | ||
3663 | -out8: | ||
3664 | - free_page((unsigned long)vmx_vmread_bitmap); | ||
3665 | -out7: | ||
3666 | - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive); | ||
3667 | -out6: | ||
3668 | - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); | ||
3669 | -out5: | ||
3670 | - free_page((unsigned long)vmx_msr_bitmap_longmode); | ||
3671 | -out4: | ||
3672 | - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive); | ||
3673 | out3: | ||
3674 | - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); | ||
3675 | + free_page((unsigned long)vmx_vmwrite_bitmap); | ||
3676 | out2: | ||
3677 | - free_page((unsigned long)vmx_msr_bitmap_legacy); | ||
3678 | + free_page((unsigned long)vmx_vmread_bitmap); | ||
3679 | out1: | ||
3680 | free_page((unsigned long)vmx_io_bitmap_b); | ||
3681 | out: | ||
3682 | @@ -6611,12 +6680,6 @@ static __init int hardware_setup(void) | ||
3683 | |||
3684 | static __exit void hardware_unsetup(void) | ||
3685 | { | ||
3686 | - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); | ||
3687 | - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive); | ||
3688 | - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); | ||
3689 | - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive); | ||
3690 | - free_page((unsigned long)vmx_msr_bitmap_legacy); | ||
3691 | - free_page((unsigned long)vmx_msr_bitmap_longmode); | ||
3692 | free_page((unsigned long)vmx_io_bitmap_b); | ||
3693 | free_page((unsigned long)vmx_io_bitmap_a); | ||
3694 | free_page((unsigned long)vmx_vmwrite_bitmap); | ||
3695 | @@ -6663,94 +6726,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu) | ||
3696 | return handle_nop(vcpu); | ||
3697 | } | ||
3698 | |||
3699 | -/* | ||
3700 | - * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12. | ||
3701 | - * We could reuse a single VMCS for all the L2 guests, but we also want the | ||
3702 | - * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this | ||
3703 | - * allows keeping them loaded on the processor, and in the future will allow | ||
3704 | - * optimizations where prepare_vmcs02 doesn't need to set all the fields on | ||
3705 | - * every entry if they never change. | ||
3706 | - * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE | ||
3707 | - * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first. | ||
3708 | - * | ||
3709 | - * The following functions allocate and free a vmcs02 in this pool. | ||
3710 | - */ | ||
3711 | - | ||
3712 | -/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */ | ||
3713 | -static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) | ||
3714 | -{ | ||
3715 | - struct vmcs02_list *item; | ||
3716 | - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) | ||
3717 | - if (item->vmptr == vmx->nested.current_vmptr) { | ||
3718 | - list_move(&item->list, &vmx->nested.vmcs02_pool); | ||
3719 | - return &item->vmcs02; | ||
3720 | - } | ||
3721 | - | ||
3722 | - if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) { | ||
3723 | - /* Recycle the least recently used VMCS. */ | ||
3724 | - item = list_last_entry(&vmx->nested.vmcs02_pool, | ||
3725 | - struct vmcs02_list, list); | ||
3726 | - item->vmptr = vmx->nested.current_vmptr; | ||
3727 | - list_move(&item->list, &vmx->nested.vmcs02_pool); | ||
3728 | - return &item->vmcs02; | ||
3729 | - } | ||
3730 | - | ||
3731 | - /* Create a new VMCS */ | ||
3732 | - item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); | ||
3733 | - if (!item) | ||
3734 | - return NULL; | ||
3735 | - item->vmcs02.vmcs = alloc_vmcs(); | ||
3736 | - item->vmcs02.shadow_vmcs = NULL; | ||
3737 | - if (!item->vmcs02.vmcs) { | ||
3738 | - kfree(item); | ||
3739 | - return NULL; | ||
3740 | - } | ||
3741 | - loaded_vmcs_init(&item->vmcs02); | ||
3742 | - item->vmptr = vmx->nested.current_vmptr; | ||
3743 | - list_add(&(item->list), &(vmx->nested.vmcs02_pool)); | ||
3744 | - vmx->nested.vmcs02_num++; | ||
3745 | - return &item->vmcs02; | ||
3746 | -} | ||
3747 | - | ||
3748 | -/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */ | ||
3749 | -static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) | ||
3750 | -{ | ||
3751 | - struct vmcs02_list *item; | ||
3752 | - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list) | ||
3753 | - if (item->vmptr == vmptr) { | ||
3754 | - free_loaded_vmcs(&item->vmcs02); | ||
3755 | - list_del(&item->list); | ||
3756 | - kfree(item); | ||
3757 | - vmx->nested.vmcs02_num--; | ||
3758 | - return; | ||
3759 | - } | ||
3760 | -} | ||
3761 | - | ||
3762 | -/* | ||
3763 | - * Free all VMCSs saved for this vcpu, except the one pointed by | ||
3764 | - * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs | ||
3765 | - * must be &vmx->vmcs01. | ||
3766 | - */ | ||
3767 | -static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) | ||
3768 | -{ | ||
3769 | - struct vmcs02_list *item, *n; | ||
3770 | - | ||
3771 | - WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); | ||
3772 | - list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { | ||
3773 | - /* | ||
3774 | - * Something will leak if the above WARN triggers. Better than | ||
3775 | - * a use-after-free. | ||
3776 | - */ | ||
3777 | - if (vmx->loaded_vmcs == &item->vmcs02) | ||
3778 | - continue; | ||
3779 | - | ||
3780 | - free_loaded_vmcs(&item->vmcs02); | ||
3781 | - list_del(&item->list); | ||
3782 | - kfree(item); | ||
3783 | - vmx->nested.vmcs02_num--; | ||
3784 | - } | ||
3785 | -} | ||
3786 | - | ||
3787 | /* | ||
3788 | * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), | ||
3789 | * set the success or error code of an emulated VMX instruction, as specified | ||
3790 | @@ -7025,6 +7000,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | ||
3791 | struct vmcs *shadow_vmcs; | ||
3792 | const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | ||
3793 | | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; | ||
3794 | + int r; | ||
3795 | |||
3796 | /* The Intel VMX Instruction Reference lists a bunch of bits that | ||
3797 | * are prerequisite to running VMXON, most notably cr4.VMXE must be | ||
3798 | @@ -7064,12 +7040,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | ||
3799 | return 1; | ||
3800 | } | ||
3801 | |||
3802 | - if (cpu_has_vmx_msr_bitmap()) { | ||
3803 | - vmx->nested.msr_bitmap = | ||
3804 | - (unsigned long *)__get_free_page(GFP_KERNEL); | ||
3805 | - if (!vmx->nested.msr_bitmap) | ||
3806 | - goto out_msr_bitmap; | ||
3807 | - } | ||
3808 | + r = alloc_loaded_vmcs(&vmx->nested.vmcs02); | ||
3809 | + if (r < 0) | ||
3810 | + goto out_vmcs02; | ||
3811 | |||
3812 | vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); | ||
3813 | if (!vmx->nested.cached_vmcs12) | ||
3814 | @@ -7086,9 +7059,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | ||
3815 | vmx->vmcs01.shadow_vmcs = shadow_vmcs; | ||
3816 | } | ||
3817 | |||
3818 | - INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); | ||
3819 | - vmx->nested.vmcs02_num = 0; | ||
3820 | - | ||
3821 | hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, | ||
3822 | HRTIMER_MODE_REL_PINNED); | ||
3823 | vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; | ||
3824 | @@ -7103,9 +7073,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | ||
3825 | kfree(vmx->nested.cached_vmcs12); | ||
3826 | |||
3827 | out_cached_vmcs12: | ||
3828 | - free_page((unsigned long)vmx->nested.msr_bitmap); | ||
3829 | + free_loaded_vmcs(&vmx->nested.vmcs02); | ||
3830 | |||
3831 | -out_msr_bitmap: | ||
3832 | +out_vmcs02: | ||
3833 | return -ENOMEM; | ||
3834 | } | ||
3835 | |||
3836 | @@ -7181,17 +7151,13 @@ static void free_nested(struct vcpu_vmx *vmx) | ||
3837 | vmx->nested.vmxon = false; | ||
3838 | free_vpid(vmx->nested.vpid02); | ||
3839 | nested_release_vmcs12(vmx); | ||
3840 | - if (vmx->nested.msr_bitmap) { | ||
3841 | - free_page((unsigned long)vmx->nested.msr_bitmap); | ||
3842 | - vmx->nested.msr_bitmap = NULL; | ||
3843 | - } | ||
3844 | if (enable_shadow_vmcs) { | ||
3845 | vmcs_clear(vmx->vmcs01.shadow_vmcs); | ||
3846 | free_vmcs(vmx->vmcs01.shadow_vmcs); | ||
3847 | vmx->vmcs01.shadow_vmcs = NULL; | ||
3848 | } | ||
3849 | kfree(vmx->nested.cached_vmcs12); | ||
3850 | - /* Unpin physical memory we referred to in current vmcs02 */ | ||
3851 | + /* Unpin physical memory we referred to in the vmcs02 */ | ||
3852 | if (vmx->nested.apic_access_page) { | ||
3853 | nested_release_page(vmx->nested.apic_access_page); | ||
3854 | vmx->nested.apic_access_page = NULL; | ||
3855 | @@ -7207,7 +7173,7 @@ static void free_nested(struct vcpu_vmx *vmx) | ||
3856 | vmx->nested.pi_desc = NULL; | ||
3857 | } | ||
3858 | |||
3859 | - nested_free_all_saved_vmcss(vmx); | ||
3860 | + free_loaded_vmcs(&vmx->nested.vmcs02); | ||
3861 | } | ||
3862 | |||
3863 | /* Emulate the VMXOFF instruction */ | ||
3864 | @@ -7241,8 +7207,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) | ||
3865 | vmptr + offsetof(struct vmcs12, launch_state), | ||
3866 | &zero, sizeof(zero)); | ||
3867 | |||
3868 | - nested_free_vmcs02(vmx, vmptr); | ||
3869 | - | ||
3870 | skip_emulated_instruction(vcpu); | ||
3871 | nested_vmx_succeed(vcpu); | ||
3872 | return 1; | ||
3873 | @@ -8029,6 +7993,19 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | ||
3874 | vmcs_read32(VM_EXIT_INTR_ERROR_CODE), | ||
3875 | KVM_ISA_VMX); | ||
3876 | |||
3877 | + /* | ||
3878 | + * The host physical addresses of some pages of guest memory | ||
3879 | + * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC | ||
3880 | + * Page). The CPU may write to these pages via their host | ||
3881 | + * physical address while L2 is running, bypassing any | ||
3882 | + * address-translation-based dirty tracking (e.g. EPT write | ||
3883 | + * protection). | ||
3884 | + * | ||
3885 | + * Mark them dirty on every exit from L2 to prevent them from | ||
3886 | + * getting out of sync with dirty tracking. | ||
3887 | + */ | ||
3888 | + nested_mark_vmcs12_pages_dirty(vcpu); | ||
3889 | + | ||
3890 | if (vmx->nested.nested_run_pending) | ||
3891 | return false; | ||
3892 | |||
3893 | @@ -8520,7 +8497,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) | ||
3894 | } | ||
3895 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); | ||
3896 | |||
3897 | - vmx_set_msr_bitmap(vcpu); | ||
3898 | + vmx_update_msr_bitmap(vcpu); | ||
3899 | } | ||
3900 | |||
3901 | static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) | ||
3902 | @@ -8676,14 +8653,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) | ||
3903 | #endif | ||
3904 | "pushf\n\t" | ||
3905 | __ASM_SIZE(push) " $%c[cs]\n\t" | ||
3906 | - "call *%[entry]\n\t" | ||
3907 | + CALL_NOSPEC | ||
3908 | : | ||
3909 | #ifdef CONFIG_X86_64 | ||
3910 | [sp]"=&r"(tmp), | ||
3911 | #endif | ||
3912 | "+r"(__sp) | ||
3913 | : | ||
3914 | - [entry]"r"(entry), | ||
3915 | + THUNK_TARGET(entry), | ||
3916 | [ss]"i"(__KERNEL_DS), | ||
3917 | [cs]"i"(__KERNEL_CS) | ||
3918 | ); | ||
3919 | @@ -8909,6 +8886,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | ||
3920 | |||
3921 | vmx_arm_hv_timer(vcpu); | ||
3922 | |||
3923 | + /* | ||
3924 | + * If this vCPU has touched SPEC_CTRL, restore the guest's value if | ||
3925 | + * it's non-zero. Since vmentry is serialising on affected CPUs, there | ||
3926 | + * is no need to worry about the conditional branch over the wrmsr | ||
3927 | + * being speculatively taken. | ||
3928 | + */ | ||
3929 | + if (vmx->spec_ctrl) | ||
3930 | + wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); | ||
3931 | + | ||
3932 | vmx->__launched = vmx->loaded_vmcs->launched; | ||
3933 | asm( | ||
3934 | /* Store host registers */ | ||
3935 | @@ -9027,6 +9013,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | ||
3936 | #endif | ||
3937 | ); | ||
3938 | |||
3939 | + /* | ||
3940 | + * We do not use IBRS in the kernel. If this vCPU has used the | ||
3941 | + * SPEC_CTRL MSR it may have left it on; save the value and | ||
3942 | + * turn it off. This is much more efficient than blindly adding | ||
3943 | + * it to the atomic save/restore list. Especially as the former | ||
3944 | + * (Saving guest MSRs on vmexit) doesn't even exist in KVM. | ||
3945 | + * | ||
3946 | + * For non-nested case: | ||
3947 | + * If the L01 MSR bitmap does not intercept the MSR, then we need to | ||
3948 | + * save it. | ||
3949 | + * | ||
3950 | + * For nested case: | ||
3951 | + * If the L02 MSR bitmap does not intercept the MSR, then we need to | ||
3952 | + * save it. | ||
3953 | + */ | ||
3954 | + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) | ||
3955 | + rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); | ||
3956 | + | ||
3957 | + if (vmx->spec_ctrl) | ||
3958 | + wrmsrl(MSR_IA32_SPEC_CTRL, 0); | ||
3959 | + | ||
3960 | /* Eliminate branch target predictions from guest mode */ | ||
3961 | vmexit_fill_RSB(); | ||
3962 | |||
3963 | @@ -9140,6 +9147,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | ||
3964 | { | ||
3965 | int err; | ||
3966 | struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); | ||
3967 | + unsigned long *msr_bitmap; | ||
3968 | int cpu; | ||
3969 | |||
3970 | if (!vmx) | ||
3971 | @@ -9172,17 +9180,24 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | ||
3972 | if (!vmx->guest_msrs) | ||
3973 | goto free_pml; | ||
3974 | |||
3975 | - vmx->loaded_vmcs = &vmx->vmcs01; | ||
3976 | - vmx->loaded_vmcs->vmcs = alloc_vmcs(); | ||
3977 | - vmx->loaded_vmcs->shadow_vmcs = NULL; | ||
3978 | - if (!vmx->loaded_vmcs->vmcs) | ||
3979 | - goto free_msrs; | ||
3980 | if (!vmm_exclusive) | ||
3981 | kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id()))); | ||
3982 | - loaded_vmcs_init(vmx->loaded_vmcs); | ||
3983 | + err = alloc_loaded_vmcs(&vmx->vmcs01); | ||
3984 | if (!vmm_exclusive) | ||
3985 | kvm_cpu_vmxoff(); | ||
3986 | + if (err < 0) | ||
3987 | + goto free_msrs; | ||
3988 | |||
3989 | + msr_bitmap = vmx->vmcs01.msr_bitmap; | ||
3990 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW); | ||
3991 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW); | ||
3992 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW); | ||
3993 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); | ||
3994 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); | ||
3995 | + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); | ||
3996 | + vmx->msr_bitmap_mode = 0; | ||
3997 | + | ||
3998 | + vmx->loaded_vmcs = &vmx->vmcs01; | ||
3999 | cpu = get_cpu(); | ||
4000 | vmx_vcpu_load(&vmx->vcpu, cpu); | ||
4001 | vmx->vcpu.cpu = cpu; | ||
4002 | @@ -9576,21 +9591,31 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, | ||
4003 | int msr; | ||
4004 | struct page *page; | ||
4005 | unsigned long *msr_bitmap_l1; | ||
4006 | - unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; | ||
4007 | + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; | ||
4008 | + /* | ||
4009 | + * pred_cmd & spec_ctrl are trying to verify two things: | ||
4010 | + * | ||
4011 | + * 1. L0 gave a permission to L1 to actually passthrough the MSR. This | ||
4012 | + * ensures that we do not accidentally generate an L02 MSR bitmap | ||
4013 | + * from the L12 MSR bitmap that is too permissive. | ||
4014 | + * 2. That L1 or L2s have actually used the MSR. This avoids | ||
4015 | + * unnecessarily merging of the bitmap if the MSR is unused. This | ||
4016 | + * works properly because we only update the L01 MSR bitmap lazily. | ||
4017 | + * So even if L0 should pass L1 these MSRs, the L01 bitmap is only | ||
4018 | + * updated to reflect this when L1 (or its L2s) actually write to | ||
4019 | + * the MSR. | ||
4020 | + */ | ||
4021 | + bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD); | ||
4022 | + bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL); | ||
4023 | |||
4024 | - /* This shortcut is ok because we support only x2APIC MSRs so far. */ | ||
4025 | - if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) | ||
4026 | + if (!nested_cpu_has_virt_x2apic_mode(vmcs12) && | ||
4027 | + !pred_cmd && !spec_ctrl) | ||
4028 | return false; | ||
4029 | |||
4030 | page = nested_get_page(vcpu, vmcs12->msr_bitmap); | ||
4031 | if (!page) | ||
4032 | return false; | ||
4033 | msr_bitmap_l1 = (unsigned long *)kmap(page); | ||
4034 | - if (!msr_bitmap_l1) { | ||
4035 | - nested_release_page_clean(page); | ||
4036 | - WARN_ON(1); | ||
4037 | - return false; | ||
4038 | - } | ||
4039 | |||
4040 | memset(msr_bitmap_l0, 0xff, PAGE_SIZE); | ||
4041 | |||
4042 | @@ -9617,6 +9642,19 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, | ||
4043 | MSR_TYPE_W); | ||
4044 | } | ||
4045 | } | ||
4046 | + | ||
4047 | + if (spec_ctrl) | ||
4048 | + nested_vmx_disable_intercept_for_msr( | ||
4049 | + msr_bitmap_l1, msr_bitmap_l0, | ||
4050 | + MSR_IA32_SPEC_CTRL, | ||
4051 | + MSR_TYPE_R | MSR_TYPE_W); | ||
4052 | + | ||
4053 | + if (pred_cmd) | ||
4054 | + nested_vmx_disable_intercept_for_msr( | ||
4055 | + msr_bitmap_l1, msr_bitmap_l0, | ||
4056 | + MSR_IA32_PRED_CMD, | ||
4057 | + MSR_TYPE_W); | ||
4058 | + | ||
4059 | kunmap(page); | ||
4060 | nested_release_page_clean(page); | ||
4061 | |||
4062 | @@ -10096,6 +10134,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | ||
4063 | if (kvm_has_tsc_control) | ||
4064 | decache_tsc_multiplier(vmx); | ||
4065 | |||
4066 | + if (cpu_has_vmx_msr_bitmap()) | ||
4067 | + vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); | ||
4068 | + | ||
4069 | if (enable_vpid) { | ||
4070 | /* | ||
4071 | * There is no direct mapping between vpid02 and vpid12, the | ||
4072 | @@ -10191,7 +10232,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | ||
4073 | struct vmcs12 *vmcs12; | ||
4074 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
4075 | int cpu; | ||
4076 | - struct loaded_vmcs *vmcs02; | ||
4077 | bool ia32e; | ||
4078 | u32 msr_entry_idx; | ||
4079 | |||
4080 | @@ -10331,17 +10371,13 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | ||
4081 | * the nested entry. | ||
4082 | */ | ||
4083 | |||
4084 | - vmcs02 = nested_get_current_vmcs02(vmx); | ||
4085 | - if (!vmcs02) | ||
4086 | - return -ENOMEM; | ||
4087 | - | ||
4088 | enter_guest_mode(vcpu); | ||
4089 | |||
4090 | if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) | ||
4091 | vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); | ||
4092 | |||
4093 | cpu = get_cpu(); | ||
4094 | - vmx->loaded_vmcs = vmcs02; | ||
4095 | + vmx->loaded_vmcs = &vmx->nested.vmcs02; | ||
4096 | vmx_vcpu_put(vcpu); | ||
4097 | vmx_vcpu_load(vcpu, cpu); | ||
4098 | vcpu->cpu = cpu; | ||
4099 | @@ -10493,7 +10529,8 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) | ||
4100 | return 0; | ||
4101 | } | ||
4102 | |||
4103 | - return vmx_complete_nested_posted_interrupt(vcpu); | ||
4104 | + vmx_complete_nested_posted_interrupt(vcpu); | ||
4105 | + return 0; | ||
4106 | } | ||
4107 | |||
4108 | static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) | ||
4109 | @@ -10804,7 +10841,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | ||
4110 | vmcs_write64(GUEST_IA32_DEBUGCTL, 0); | ||
4111 | |||
4112 | if (cpu_has_vmx_msr_bitmap()) | ||
4113 | - vmx_set_msr_bitmap(vcpu); | ||
4114 | + vmx_update_msr_bitmap(vcpu); | ||
4115 | |||
4116 | if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr, | ||
4117 | vmcs12->vm_exit_msr_load_count)) | ||
4118 | @@ -10855,10 +10892,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | ||
4119 | vm_exit_controls_reset_shadow(vmx); | ||
4120 | vmx_segment_cache_clear(vmx); | ||
4121 | |||
4122 | - /* if no vmcs02 cache requested, remove the one we used */ | ||
4123 | - if (VMCS02_POOL_SIZE == 0) | ||
4124 | - nested_free_vmcs02(vmx, vmx->nested.current_vmptr); | ||
4125 | - | ||
4126 | load_vmcs12_host_state(vcpu, vmcs12); | ||
4127 | |||
4128 | /* Update any VMCS fields that might have changed while L2 ran */ | ||
4129 | diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c | ||
4130 | index e023ef981feb..75f756eac979 100644 | ||
4131 | --- a/arch/x86/kvm/x86.c | ||
4132 | +++ b/arch/x86/kvm/x86.c | ||
4133 | @@ -975,6 +975,7 @@ static u32 msrs_to_save[] = { | ||
4134 | #endif | ||
4135 | MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, | ||
4136 | MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, | ||
4137 | + MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES | ||
4138 | }; | ||
4139 | |||
4140 | static unsigned num_msrs_to_save; | ||
4141 | diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile | ||
4142 | index 6bf1898ddf49..4ad7c4dd311c 100644 | ||
4143 | --- a/arch/x86/lib/Makefile | ||
4144 | +++ b/arch/x86/lib/Makefile | ||
4145 | @@ -26,6 +26,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o | ||
4146 | lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o | ||
4147 | lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o | ||
4148 | lib-$(CONFIG_RETPOLINE) += retpoline.o | ||
4149 | +OBJECT_FILES_NON_STANDARD_retpoline.o :=y | ||
4150 | |||
4151 | obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o | ||
4152 | |||
4153 | diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S | ||
4154 | index 37b62d412148..b12b214713a6 100644 | ||
4155 | --- a/arch/x86/lib/getuser.S | ||
4156 | +++ b/arch/x86/lib/getuser.S | ||
4157 | @@ -39,6 +39,8 @@ ENTRY(__get_user_1) | ||
4158 | mov PER_CPU_VAR(current_task), %_ASM_DX | ||
4159 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX | ||
4160 | jae bad_get_user | ||
4161 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ | ||
4162 | + and %_ASM_DX, %_ASM_AX | ||
4163 | ASM_STAC | ||
4164 | 1: movzbl (%_ASM_AX),%edx | ||
4165 | xor %eax,%eax | ||
4166 | @@ -53,6 +55,8 @@ ENTRY(__get_user_2) | ||
4167 | mov PER_CPU_VAR(current_task), %_ASM_DX | ||
4168 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX | ||
4169 | jae bad_get_user | ||
4170 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ | ||
4171 | + and %_ASM_DX, %_ASM_AX | ||
4172 | ASM_STAC | ||
4173 | 2: movzwl -1(%_ASM_AX),%edx | ||
4174 | xor %eax,%eax | ||
4175 | @@ -67,6 +71,8 @@ ENTRY(__get_user_4) | ||
4176 | mov PER_CPU_VAR(current_task), %_ASM_DX | ||
4177 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX | ||
4178 | jae bad_get_user | ||
4179 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ | ||
4180 | + and %_ASM_DX, %_ASM_AX | ||
4181 | ASM_STAC | ||
4182 | 3: movl -3(%_ASM_AX),%edx | ||
4183 | xor %eax,%eax | ||
4184 | @@ -82,6 +88,8 @@ ENTRY(__get_user_8) | ||
4185 | mov PER_CPU_VAR(current_task), %_ASM_DX | ||
4186 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX | ||
4187 | jae bad_get_user | ||
4188 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ | ||
4189 | + and %_ASM_DX, %_ASM_AX | ||
4190 | ASM_STAC | ||
4191 | 4: movq -7(%_ASM_AX),%rdx | ||
4192 | xor %eax,%eax | ||
4193 | @@ -93,6 +101,8 @@ ENTRY(__get_user_8) | ||
4194 | mov PER_CPU_VAR(current_task), %_ASM_DX | ||
4195 | cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX | ||
4196 | jae bad_get_user_8 | ||
4197 | + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ | ||
4198 | + and %_ASM_DX, %_ASM_AX | ||
4199 | ASM_STAC | ||
4200 | 4: movl -7(%_ASM_AX),%edx | ||
4201 | 5: movl -3(%_ASM_AX),%ecx | ||
4202 | diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S | ||
4203 | index dfb2ba91b670..480edc3a5e03 100644 | ||
4204 | --- a/arch/x86/lib/retpoline.S | ||
4205 | +++ b/arch/x86/lib/retpoline.S | ||
4206 | @@ -7,6 +7,7 @@ | ||
4207 | #include <asm/alternative-asm.h> | ||
4208 | #include <asm/export.h> | ||
4209 | #include <asm/nospec-branch.h> | ||
4210 | +#include <asm/bitsperlong.h> | ||
4211 | |||
4212 | .macro THUNK reg | ||
4213 | .section .text.__x86.indirect_thunk | ||
4214 | @@ -36,7 +37,6 @@ GENERATE_THUNK(_ASM_DX) | ||
4215 | GENERATE_THUNK(_ASM_SI) | ||
4216 | GENERATE_THUNK(_ASM_DI) | ||
4217 | GENERATE_THUNK(_ASM_BP) | ||
4218 | -GENERATE_THUNK(_ASM_SP) | ||
4219 | #ifdef CONFIG_64BIT | ||
4220 | GENERATE_THUNK(r8) | ||
4221 | GENERATE_THUNK(r9) | ||
4222 | @@ -47,3 +47,58 @@ GENERATE_THUNK(r13) | ||
4223 | GENERATE_THUNK(r14) | ||
4224 | GENERATE_THUNK(r15) | ||
4225 | #endif | ||
4226 | + | ||
4227 | +/* | ||
4228 | + * Fill the CPU return stack buffer. | ||
4229 | + * | ||
4230 | + * Each entry in the RSB, if used for a speculative 'ret', contains an | ||
4231 | + * infinite 'pause; lfence; jmp' loop to capture speculative execution. | ||
4232 | + * | ||
4233 | + * This is required in various cases for retpoline and IBRS-based | ||
4234 | + * mitigations for the Spectre variant 2 vulnerability. Sometimes to | ||
4235 | + * eliminate potentially bogus entries from the RSB, and sometimes | ||
4236 | + * purely to ensure that it doesn't get empty, which on some CPUs would | ||
4237 | + * allow predictions from other (unwanted!) sources to be used. | ||
4238 | + * | ||
4239 | + * Google experimented with loop-unrolling and this turned out to be | ||
4240 | + * the optimal version - two calls, each with their own speculation | ||
4241 | + * trap should their return address end up getting used, in a loop. | ||
4242 | + */ | ||
4243 | +.macro STUFF_RSB nr:req sp:req | ||
4244 | + mov $(\nr / 2), %_ASM_BX | ||
4245 | + .align 16 | ||
4246 | +771: | ||
4247 | + call 772f | ||
4248 | +773: /* speculation trap */ | ||
4249 | + pause | ||
4250 | + lfence | ||
4251 | + jmp 773b | ||
4252 | + .align 16 | ||
4253 | +772: | ||
4254 | + call 774f | ||
4255 | +775: /* speculation trap */ | ||
4256 | + pause | ||
4257 | + lfence | ||
4258 | + jmp 775b | ||
4259 | + .align 16 | ||
4260 | +774: | ||
4261 | + dec %_ASM_BX | ||
4262 | + jnz 771b | ||
4263 | + add $((BITS_PER_LONG/8) * \nr), \sp | ||
4264 | +.endm | ||
4265 | + | ||
4266 | +#define RSB_FILL_LOOPS 16 /* To avoid underflow */ | ||
4267 | + | ||
4268 | +ENTRY(__fill_rsb) | ||
4269 | + STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP | ||
4270 | + ret | ||
4271 | +END(__fill_rsb) | ||
4272 | +EXPORT_SYMBOL_GPL(__fill_rsb) | ||
4273 | + | ||
4274 | +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ | ||
4275 | + | ||
4276 | +ENTRY(__clear_rsb) | ||
4277 | + STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP | ||
4278 | + ret | ||
4279 | +END(__clear_rsb) | ||
4280 | +EXPORT_SYMBOL_GPL(__clear_rsb) | ||
4281 | diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c | ||
4282 | index 3bc7baf2a711..5c06dbffc52f 100644 | ||
4283 | --- a/arch/x86/lib/usercopy_32.c | ||
4284 | +++ b/arch/x86/lib/usercopy_32.c | ||
4285 | @@ -570,12 +570,12 @@ do { \ | ||
4286 | unsigned long __copy_to_user_ll(void __user *to, const void *from, | ||
4287 | unsigned long n) | ||
4288 | { | ||
4289 | - stac(); | ||
4290 | + __uaccess_begin_nospec(); | ||
4291 | if (movsl_is_ok(to, from, n)) | ||
4292 | __copy_user(to, from, n); | ||
4293 | else | ||
4294 | n = __copy_user_intel(to, from, n); | ||
4295 | - clac(); | ||
4296 | + __uaccess_end(); | ||
4297 | return n; | ||
4298 | } | ||
4299 | EXPORT_SYMBOL(__copy_to_user_ll); | ||
4300 | @@ -627,7 +627,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache); | ||
4301 | unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, | ||
4302 | unsigned long n) | ||
4303 | { | ||
4304 | - stac(); | ||
4305 | + __uaccess_begin_nospec(); | ||
4306 | #ifdef CONFIG_X86_INTEL_USERCOPY | ||
4307 | if (n > 64 && static_cpu_has(X86_FEATURE_XMM2)) | ||
4308 | n = __copy_user_intel_nocache(to, from, n); | ||
4309 | @@ -636,7 +636,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr | ||
4310 | #else | ||
4311 | __copy_user(to, from, n); | ||
4312 | #endif | ||
4313 | - clac(); | ||
4314 | + __uaccess_end(); | ||
4315 | return n; | ||
4316 | } | ||
4317 | EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); | ||
4318 | diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c | ||
4319 | index e3af318af2db..2a07341aca46 100644 | ||
4320 | --- a/crypto/tcrypt.c | ||
4321 | +++ b/crypto/tcrypt.c | ||
4322 | @@ -223,11 +223,13 @@ static void sg_init_aead(struct scatterlist *sg, char *xbuf[XBUFSIZE], | ||
4323 | } | ||
4324 | |||
4325 | sg_init_table(sg, np + 1); | ||
4326 | - np--; | ||
4327 | + if (rem) | ||
4328 | + np--; | ||
4329 | for (k = 0; k < np; k++) | ||
4330 | sg_set_buf(&sg[k + 1], xbuf[k], PAGE_SIZE); | ||
4331 | |||
4332 | - sg_set_buf(&sg[k + 1], xbuf[k], rem); | ||
4333 | + if (rem) | ||
4334 | + sg_set_buf(&sg[k + 1], xbuf[k], rem); | ||
4335 | } | ||
4336 | |||
4337 | static void test_aead_speed(const char *algo, int enc, unsigned int secs, | ||
4338 | diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c | ||
4339 | index 83f1439e57fd..6e8eaa7fe7a6 100644 | ||
4340 | --- a/drivers/auxdisplay/img-ascii-lcd.c | ||
4341 | +++ b/drivers/auxdisplay/img-ascii-lcd.c | ||
4342 | @@ -442,3 +442,7 @@ static struct platform_driver img_ascii_lcd_driver = { | ||
4343 | .remove = img_ascii_lcd_remove, | ||
4344 | }; | ||
4345 | module_platform_driver(img_ascii_lcd_driver); | ||
4346 | + | ||
4347 | +MODULE_DESCRIPTION("Imagination Technologies ASCII LCD Display"); | ||
4348 | +MODULE_AUTHOR("Paul Burton <paul.burton@mips.com>"); | ||
4349 | +MODULE_LICENSE("GPL"); | ||
4350 | diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c | ||
4351 | index a2ec6d8796a0..3322b157106d 100644 | ||
4352 | --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c | ||
4353 | +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c | ||
4354 | @@ -392,6 +392,31 @@ static void rcar_du_crtc_start(struct rcar_du_crtc *rcrtc) | ||
4355 | rcrtc->started = true; | ||
4356 | } | ||
4357 | |||
4358 | +static void rcar_du_crtc_disable_planes(struct rcar_du_crtc *rcrtc) | ||
4359 | +{ | ||
4360 | + struct rcar_du_device *rcdu = rcrtc->group->dev; | ||
4361 | + struct drm_crtc *crtc = &rcrtc->crtc; | ||
4362 | + u32 status; | ||
4363 | + /* Make sure vblank interrupts are enabled. */ | ||
4364 | + drm_crtc_vblank_get(crtc); | ||
4365 | + /* | ||
4366 | + * Disable planes and calculate how many vertical blanking interrupts we | ||
4367 | + * have to wait for. If a vertical blanking interrupt has been triggered | ||
4368 | + * but not processed yet, we don't know whether it occurred before or | ||
4369 | + * after the planes got disabled. We thus have to wait for two vblank | ||
4370 | + * interrupts in that case. | ||
4371 | + */ | ||
4372 | + spin_lock_irq(&rcrtc->vblank_lock); | ||
4373 | + rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? DS2PR : DS1PR, 0); | ||
4374 | + status = rcar_du_crtc_read(rcrtc, DSSR); | ||
4375 | + rcrtc->vblank_count = status & DSSR_VBK ? 2 : 1; | ||
4376 | + spin_unlock_irq(&rcrtc->vblank_lock); | ||
4377 | + if (!wait_event_timeout(rcrtc->vblank_wait, rcrtc->vblank_count == 0, | ||
4378 | + msecs_to_jiffies(100))) | ||
4379 | + dev_warn(rcdu->dev, "vertical blanking timeout\n"); | ||
4380 | + drm_crtc_vblank_put(crtc); | ||
4381 | +} | ||
4382 | + | ||
4383 | static void rcar_du_crtc_stop(struct rcar_du_crtc *rcrtc) | ||
4384 | { | ||
4385 | struct drm_crtc *crtc = &rcrtc->crtc; | ||
4386 | @@ -400,17 +425,16 @@ static void rcar_du_crtc_stop(struct rcar_du_crtc *rcrtc) | ||
4387 | return; | ||
4388 | |||
4389 | /* Disable all planes and wait for the change to take effect. This is | ||
4390 | - * required as the DSnPR registers are updated on vblank, and no vblank | ||
4391 | - * will occur once the CRTC is stopped. Disabling planes when starting | ||
4392 | - * the CRTC thus wouldn't be enough as it would start scanning out | ||
4393 | - * immediately from old frame buffers until the next vblank. | ||
4394 | + * required as the plane enable registers are updated on vblank, and no | ||
4395 | + * vblank will occur once the CRTC is stopped. Disabling planes when | ||
4396 | + * starting the CRTC thus wouldn't be enough as it would start scanning | ||
4397 | + * out immediately from old frame buffers until the next vblank. | ||
4398 | * | ||
4399 | * This increases the CRTC stop delay, especially when multiple CRTCs | ||
4400 | * are stopped in one operation as we now wait for one vblank per CRTC. | ||
4401 | * Whether this can be improved needs to be researched. | ||
4402 | */ | ||
4403 | - rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? DS2PR : DS1PR, 0); | ||
4404 | - drm_crtc_wait_one_vblank(crtc); | ||
4405 | + rcar_du_crtc_disable_planes(rcrtc); | ||
4406 | |||
4407 | /* Disable vertical blanking interrupt reporting. We first need to wait | ||
4408 | * for page flip completion before stopping the CRTC as userspace | ||
4409 | @@ -548,10 +572,25 @@ static irqreturn_t rcar_du_crtc_irq(int irq, void *arg) | ||
4410 | irqreturn_t ret = IRQ_NONE; | ||
4411 | u32 status; | ||
4412 | |||
4413 | + spin_lock(&rcrtc->vblank_lock); | ||
4414 | + | ||
4415 | status = rcar_du_crtc_read(rcrtc, DSSR); | ||
4416 | rcar_du_crtc_write(rcrtc, DSRCR, status & DSRCR_MASK); | ||
4417 | |||
4418 | - if (status & DSSR_FRM) { | ||
4419 | + if (status & DSSR_VBK) { | ||
4420 | + /* | ||
4421 | + * Wake up the vblank wait if the counter reaches 0. This must | ||
4422 | + * be protected by the vblank_lock to avoid races in | ||
4423 | + * rcar_du_crtc_disable_planes(). | ||
4424 | + */ | ||
4425 | + if (rcrtc->vblank_count) { | ||
4426 | + if (--rcrtc->vblank_count == 0) | ||
4427 | + wake_up(&rcrtc->vblank_wait); | ||
4428 | + } | ||
4429 | + } | ||
4430 | + spin_unlock(&rcrtc->vblank_lock); | ||
4431 | + | ||
4432 | + if (status & DSSR_VBK) { | ||
4433 | drm_crtc_handle_vblank(&rcrtc->crtc); | ||
4434 | rcar_du_crtc_finish_page_flip(rcrtc); | ||
4435 | ret = IRQ_HANDLED; | ||
4436 | @@ -606,6 +645,8 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) | ||
4437 | } | ||
4438 | |||
4439 | init_waitqueue_head(&rcrtc->flip_wait); | ||
4440 | + init_waitqueue_head(&rcrtc->vblank_wait); | ||
4441 | + spin_lock_init(&rcrtc->vblank_lock); | ||
4442 | |||
4443 | rcrtc->group = rgrp; | ||
4444 | rcrtc->mmio_offset = mmio_offsets[index]; | ||
4445 | diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h | ||
4446 | index 6f08b7e7db06..48bef05b4c62 100644 | ||
4447 | --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h | ||
4448 | +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h | ||
4449 | @@ -15,6 +15,7 @@ | ||
4450 | #define __RCAR_DU_CRTC_H__ | ||
4451 | |||
4452 | #include <linux/mutex.h> | ||
4453 | +#include <linux/spinlock.h> | ||
4454 | #include <linux/wait.h> | ||
4455 | |||
4456 | #include <drm/drmP.h> | ||
4457 | @@ -33,6 +34,9 @@ struct rcar_du_vsp; | ||
4458 | * @started: whether the CRTC has been started and is running | ||
4459 | * @event: event to post when the pending page flip completes | ||
4460 | * @flip_wait: wait queue used to signal page flip completion | ||
4461 | + * @vblank_lock: protects vblank_wait and vblank_count | ||
4462 | + * @vblank_wait: wait queue used to signal vertical blanking | ||
4463 | + * @vblank_count: number of vertical blanking interrupts to wait for | ||
4464 | * @outputs: bitmask of the outputs (enum rcar_du_output) driven by this CRTC | ||
4465 | * @group: CRTC group this CRTC belongs to | ||
4466 | */ | ||
4467 | @@ -48,6 +52,10 @@ struct rcar_du_crtc { | ||
4468 | struct drm_pending_vblank_event *event; | ||
4469 | wait_queue_head_t flip_wait; | ||
4470 | |||
4471 | + spinlock_t vblank_lock; | ||
4472 | + wait_queue_head_t vblank_wait; | ||
4473 | + unsigned int vblank_count; | ||
4474 | + | ||
4475 | unsigned int outputs; | ||
4476 | |||
4477 | struct rcar_du_group *group; | ||
4478 | diff --git a/drivers/media/platform/soc_camera/soc_scale_crop.c b/drivers/media/platform/soc_camera/soc_scale_crop.c | ||
4479 | index f77252d6ccd3..d29c24854c2c 100644 | ||
4480 | --- a/drivers/media/platform/soc_camera/soc_scale_crop.c | ||
4481 | +++ b/drivers/media/platform/soc_camera/soc_scale_crop.c | ||
4482 | @@ -418,3 +418,7 @@ void soc_camera_calc_client_output(struct soc_camera_device *icd, | ||
4483 | mf->height = soc_camera_shift_scale(rect->height, shift, scale_v); | ||
4484 | } | ||
4485 | EXPORT_SYMBOL(soc_camera_calc_client_output); | ||
4486 | + | ||
4487 | +MODULE_DESCRIPTION("soc-camera scaling-cropping functions"); | ||
4488 | +MODULE_AUTHOR("Guennadi Liakhovetski <kernel@pengutronix.de>"); | ||
4489 | +MODULE_LICENSE("GPL"); | ||
4490 | diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | ||
4491 | index bdbcd2b088a0..c3c28f0960e5 100644 | ||
4492 | --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | ||
4493 | +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | ||
4494 | @@ -3849,7 +3849,7 @@ static void qlcnic_83xx_flush_mbx_queue(struct qlcnic_adapter *adapter) | ||
4495 | struct list_head *head = &mbx->cmd_q; | ||
4496 | struct qlcnic_cmd_args *cmd = NULL; | ||
4497 | |||
4498 | - spin_lock(&mbx->queue_lock); | ||
4499 | + spin_lock_bh(&mbx->queue_lock); | ||
4500 | |||
4501 | while (!list_empty(head)) { | ||
4502 | cmd = list_entry(head->next, struct qlcnic_cmd_args, list); | ||
4503 | @@ -3860,7 +3860,7 @@ static void qlcnic_83xx_flush_mbx_queue(struct qlcnic_adapter *adapter) | ||
4504 | qlcnic_83xx_notify_cmd_completion(adapter, cmd); | ||
4505 | } | ||
4506 | |||
4507 | - spin_unlock(&mbx->queue_lock); | ||
4508 | + spin_unlock_bh(&mbx->queue_lock); | ||
4509 | } | ||
4510 | |||
4511 | static int qlcnic_83xx_check_mbx_status(struct qlcnic_adapter *adapter) | ||
4512 | @@ -3896,12 +3896,12 @@ static void qlcnic_83xx_dequeue_mbx_cmd(struct qlcnic_adapter *adapter, | ||
4513 | { | ||
4514 | struct qlcnic_mailbox *mbx = adapter->ahw->mailbox; | ||
4515 | |||
4516 | - spin_lock(&mbx->queue_lock); | ||
4517 | + spin_lock_bh(&mbx->queue_lock); | ||
4518 | |||
4519 | list_del(&cmd->list); | ||
4520 | mbx->num_cmds--; | ||
4521 | |||
4522 | - spin_unlock(&mbx->queue_lock); | ||
4523 | + spin_unlock_bh(&mbx->queue_lock); | ||
4524 | |||
4525 | qlcnic_83xx_notify_cmd_completion(adapter, cmd); | ||
4526 | } | ||
4527 | @@ -3966,7 +3966,7 @@ static int qlcnic_83xx_enqueue_mbx_cmd(struct qlcnic_adapter *adapter, | ||
4528 | init_completion(&cmd->completion); | ||
4529 | cmd->rsp_opcode = QLC_83XX_MBX_RESPONSE_UNKNOWN; | ||
4530 | |||
4531 | - spin_lock(&mbx->queue_lock); | ||
4532 | + spin_lock_bh(&mbx->queue_lock); | ||
4533 | |||
4534 | list_add_tail(&cmd->list, &mbx->cmd_q); | ||
4535 | mbx->num_cmds++; | ||
4536 | @@ -3974,7 +3974,7 @@ static int qlcnic_83xx_enqueue_mbx_cmd(struct qlcnic_adapter *adapter, | ||
4537 | *timeout = cmd->total_cmds * QLC_83XX_MBX_TIMEOUT; | ||
4538 | queue_work(mbx->work_q, &mbx->work); | ||
4539 | |||
4540 | - spin_unlock(&mbx->queue_lock); | ||
4541 | + spin_unlock_bh(&mbx->queue_lock); | ||
4542 | |||
4543 | return 0; | ||
4544 | } | ||
4545 | @@ -4070,15 +4070,15 @@ static void qlcnic_83xx_mailbox_worker(struct work_struct *work) | ||
4546 | mbx->rsp_status = QLC_83XX_MBX_RESPONSE_WAIT; | ||
4547 | spin_unlock_irqrestore(&mbx->aen_lock, flags); | ||
4548 | |||
4549 | - spin_lock(&mbx->queue_lock); | ||
4550 | + spin_lock_bh(&mbx->queue_lock); | ||
4551 | |||
4552 | if (list_empty(head)) { | ||
4553 | - spin_unlock(&mbx->queue_lock); | ||
4554 | + spin_unlock_bh(&mbx->queue_lock); | ||
4555 | return; | ||
4556 | } | ||
4557 | cmd = list_entry(head->next, struct qlcnic_cmd_args, list); | ||
4558 | |||
4559 | - spin_unlock(&mbx->queue_lock); | ||
4560 | + spin_unlock_bh(&mbx->queue_lock); | ||
4561 | |||
4562 | mbx_ops->encode_cmd(adapter, cmd); | ||
4563 | mbx_ops->nofity_fw(adapter, QLC_83XX_MBX_REQUEST); | ||
4564 | diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c | ||
4565 | index 298b74ebc1e9..18e68c91e651 100644 | ||
4566 | --- a/drivers/net/ethernet/realtek/r8169.c | ||
4567 | +++ b/drivers/net/ethernet/realtek/r8169.c | ||
4568 | @@ -1387,7 +1387,7 @@ DECLARE_RTL_COND(rtl_ocp_tx_cond) | ||
4569 | { | ||
4570 | void __iomem *ioaddr = tp->mmio_addr; | ||
4571 | |||
4572 | - return RTL_R8(IBISR0) & 0x02; | ||
4573 | + return RTL_R8(IBISR0) & 0x20; | ||
4574 | } | ||
4575 | |||
4576 | static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) | ||
4577 | @@ -1395,7 +1395,7 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) | ||
4578 | void __iomem *ioaddr = tp->mmio_addr; | ||
4579 | |||
4580 | RTL_W8(IBCR2, RTL_R8(IBCR2) & ~0x01); | ||
4581 | - rtl_msleep_loop_wait_low(tp, &rtl_ocp_tx_cond, 50, 2000); | ||
4582 | + rtl_msleep_loop_wait_high(tp, &rtl_ocp_tx_cond, 50, 2000); | ||
4583 | RTL_W8(IBISR0, RTL_R8(IBISR0) | 0x20); | ||
4584 | RTL_W8(IBCR0, RTL_R8(IBCR0) & ~0x01); | ||
4585 | } | ||
4586 | diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c | ||
4587 | index db65d9ad4488..e1e5e8438457 100644 | ||
4588 | --- a/drivers/net/usb/qmi_wwan.c | ||
4589 | +++ b/drivers/net/usb/qmi_wwan.c | ||
4590 | @@ -944,6 +944,7 @@ static const struct usb_device_id products[] = { | ||
4591 | {QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ | ||
4592 | {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ | ||
4593 | {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ | ||
4594 | + {QMI_QUIRK_SET_DTR(0x2c7c, 0x0306, 4)}, /* Quectel EP06 Mini PCIe */ | ||
4595 | |||
4596 | /* 4. Gobi 1000 devices */ | ||
4597 | {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ | ||
4598 | diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c | ||
4599 | index 6e5d9095b195..a635fc6b1722 100644 | ||
4600 | --- a/drivers/net/wireless/broadcom/b43/main.c | ||
4601 | +++ b/drivers/net/wireless/broadcom/b43/main.c | ||
4602 | @@ -71,8 +71,18 @@ MODULE_FIRMWARE("b43/ucode11.fw"); | ||
4603 | MODULE_FIRMWARE("b43/ucode13.fw"); | ||
4604 | MODULE_FIRMWARE("b43/ucode14.fw"); | ||
4605 | MODULE_FIRMWARE("b43/ucode15.fw"); | ||
4606 | +MODULE_FIRMWARE("b43/ucode16_lp.fw"); | ||
4607 | MODULE_FIRMWARE("b43/ucode16_mimo.fw"); | ||
4608 | +MODULE_FIRMWARE("b43/ucode24_lcn.fw"); | ||
4609 | +MODULE_FIRMWARE("b43/ucode25_lcn.fw"); | ||
4610 | +MODULE_FIRMWARE("b43/ucode25_mimo.fw"); | ||
4611 | +MODULE_FIRMWARE("b43/ucode26_mimo.fw"); | ||
4612 | +MODULE_FIRMWARE("b43/ucode29_mimo.fw"); | ||
4613 | +MODULE_FIRMWARE("b43/ucode33_lcn40.fw"); | ||
4614 | +MODULE_FIRMWARE("b43/ucode30_mimo.fw"); | ||
4615 | MODULE_FIRMWARE("b43/ucode5.fw"); | ||
4616 | +MODULE_FIRMWARE("b43/ucode40.fw"); | ||
4617 | +MODULE_FIRMWARE("b43/ucode42.fw"); | ||
4618 | MODULE_FIRMWARE("b43/ucode9.fw"); | ||
4619 | |||
4620 | static int modparam_bad_frames_preempt; | ||
4621 | diff --git a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c | ||
4622 | index 866aa3ce1ac9..6cf0006d4c8d 100644 | ||
4623 | --- a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c | ||
4624 | +++ b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c | ||
4625 | @@ -436,3 +436,7 @@ int pxa2xx_pinctrl_exit(struct platform_device *pdev) | ||
4626 | return 0; | ||
4627 | } | ||
4628 | EXPORT_SYMBOL_GPL(pxa2xx_pinctrl_exit); | ||
4629 | + | ||
4630 | +MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>"); | ||
4631 | +MODULE_DESCRIPTION("Marvell PXA2xx pinctrl driver"); | ||
4632 | +MODULE_LICENSE("GPL v2"); | ||
4633 | diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c | ||
4634 | index f2303f390345..23973a8124fc 100644 | ||
4635 | --- a/drivers/tty/serial/serial_core.c | ||
4636 | +++ b/drivers/tty/serial/serial_core.c | ||
4637 | @@ -965,6 +965,8 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port, | ||
4638 | } | ||
4639 | } else { | ||
4640 | retval = uart_startup(tty, state, 1); | ||
4641 | + if (retval == 0) | ||
4642 | + tty_port_set_initialized(port, true); | ||
4643 | if (retval > 0) | ||
4644 | retval = 0; | ||
4645 | } | ||
4646 | diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c | ||
4647 | index 96a0661011fd..e5b7652234fc 100644 | ||
4648 | --- a/drivers/vhost/net.c | ||
4649 | +++ b/drivers/vhost/net.c | ||
4650 | @@ -1078,6 +1078,7 @@ static long vhost_net_reset_owner(struct vhost_net *n) | ||
4651 | } | ||
4652 | vhost_net_stop(n, &tx_sock, &rx_sock); | ||
4653 | vhost_net_flush(n); | ||
4654 | + vhost_dev_stop(&n->dev); | ||
4655 | vhost_dev_reset_owner(&n->dev, umem); | ||
4656 | vhost_net_vq_reset(n); | ||
4657 | done: | ||
4658 | diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h | ||
4659 | index 6e84b2cae6ad..442b54a14cbc 100644 | ||
4660 | --- a/include/linux/fdtable.h | ||
4661 | +++ b/include/linux/fdtable.h | ||
4662 | @@ -9,6 +9,7 @@ | ||
4663 | #include <linux/compiler.h> | ||
4664 | #include <linux/spinlock.h> | ||
4665 | #include <linux/rcupdate.h> | ||
4666 | +#include <linux/nospec.h> | ||
4667 | #include <linux/types.h> | ||
4668 | #include <linux/init.h> | ||
4669 | #include <linux/fs.h> | ||
4670 | @@ -81,8 +82,10 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i | ||
4671 | { | ||
4672 | struct fdtable *fdt = rcu_dereference_raw(files->fdt); | ||
4673 | |||
4674 | - if (fd < fdt->max_fds) | ||
4675 | + if (fd < fdt->max_fds) { | ||
4676 | + fd = array_index_nospec(fd, fdt->max_fds); | ||
4677 | return rcu_dereference_raw(fdt->fd[fd]); | ||
4678 | + } | ||
4679 | return NULL; | ||
4680 | } | ||
4681 | |||
4682 | diff --git a/include/linux/init.h b/include/linux/init.h | ||
4683 | index e30104ceb86d..8e346d1bd837 100644 | ||
4684 | --- a/include/linux/init.h | ||
4685 | +++ b/include/linux/init.h | ||
4686 | @@ -4,6 +4,13 @@ | ||
4687 | #include <linux/compiler.h> | ||
4688 | #include <linux/types.h> | ||
4689 | |||
4690 | +/* Built-in __init functions needn't be compiled with retpoline */ | ||
4691 | +#if defined(RETPOLINE) && !defined(MODULE) | ||
4692 | +#define __noretpoline __attribute__((indirect_branch("keep"))) | ||
4693 | +#else | ||
4694 | +#define __noretpoline | ||
4695 | +#endif | ||
4696 | + | ||
4697 | /* These macros are used to mark some functions or | ||
4698 | * initialized data (doesn't apply to uninitialized data) | ||
4699 | * as `initialization' functions. The kernel can take this | ||
4700 | @@ -39,7 +46,7 @@ | ||
4701 | |||
4702 | /* These are for everybody (although not all archs will actually | ||
4703 | discard it in modules) */ | ||
4704 | -#define __init __section(.init.text) __cold notrace __latent_entropy | ||
4705 | +#define __init __section(.init.text) __cold notrace __latent_entropy __noretpoline | ||
4706 | #define __initdata __section(.init.data) | ||
4707 | #define __initconst __section(.init.rodata) | ||
4708 | #define __exitdata __section(.exit.data) | ||
4709 | diff --git a/include/linux/module.h b/include/linux/module.h | ||
4710 | index 0c3207d26ac0..d2224a09b4b5 100644 | ||
4711 | --- a/include/linux/module.h | ||
4712 | +++ b/include/linux/module.h | ||
4713 | @@ -791,6 +791,15 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, | ||
4714 | static inline void module_bug_cleanup(struct module *mod) {} | ||
4715 | #endif /* CONFIG_GENERIC_BUG */ | ||
4716 | |||
4717 | +#ifdef RETPOLINE | ||
4718 | +extern bool retpoline_module_ok(bool has_retpoline); | ||
4719 | +#else | ||
4720 | +static inline bool retpoline_module_ok(bool has_retpoline) | ||
4721 | +{ | ||
4722 | + return true; | ||
4723 | +} | ||
4724 | +#endif | ||
4725 | + | ||
4726 | #ifdef CONFIG_MODULE_SIG | ||
4727 | static inline bool module_sig_ok(struct module *module) | ||
4728 | { | ||
4729 | diff --git a/include/linux/nospec.h b/include/linux/nospec.h | ||
4730 | new file mode 100644 | ||
4731 | index 000000000000..b99bced39ac2 | ||
4732 | --- /dev/null | ||
4733 | +++ b/include/linux/nospec.h | ||
4734 | @@ -0,0 +1,72 @@ | ||
4735 | +// SPDX-License-Identifier: GPL-2.0 | ||
4736 | +// Copyright(c) 2018 Linus Torvalds. All rights reserved. | ||
4737 | +// Copyright(c) 2018 Alexei Starovoitov. All rights reserved. | ||
4738 | +// Copyright(c) 2018 Intel Corporation. All rights reserved. | ||
4739 | + | ||
4740 | +#ifndef _LINUX_NOSPEC_H | ||
4741 | +#define _LINUX_NOSPEC_H | ||
4742 | + | ||
4743 | +/** | ||
4744 | + * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise | ||
4745 | + * @index: array element index | ||
4746 | + * @size: number of elements in array | ||
4747 | + * | ||
4748 | + * When @index is out of bounds (@index >= @size), the sign bit will be | ||
4749 | + * set. Extend the sign bit to all bits and invert, giving a result of | ||
4750 | + * zero for an out of bounds index, or ~0 if within bounds [0, @size). | ||
4751 | + */ | ||
4752 | +#ifndef array_index_mask_nospec | ||
4753 | +static inline unsigned long array_index_mask_nospec(unsigned long index, | ||
4754 | + unsigned long size) | ||
4755 | +{ | ||
4756 | + /* | ||
4757 | + * Warn developers about inappropriate array_index_nospec() usage. | ||
4758 | + * | ||
4759 | + * Even if the CPU speculates past the WARN_ONCE branch, the | ||
4760 | + * sign bit of @index is taken into account when generating the | ||
4761 | + * mask. | ||
4762 | + * | ||
4763 | + * This warning is compiled out when the compiler can infer that | ||
4764 | + * @index and @size are less than LONG_MAX. | ||
4765 | + */ | ||
4766 | + if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, | ||
4767 | + "array_index_nospec() limited to range of [0, LONG_MAX]\n")) | ||
4768 | + return 0; | ||
4769 | + | ||
4770 | + /* | ||
4771 | + * Always calculate and emit the mask even if the compiler | ||
4772 | + * thinks the mask is not needed. The compiler does not take | ||
4773 | + * into account the value of @index under speculation. | ||
4774 | + */ | ||
4775 | + OPTIMIZER_HIDE_VAR(index); | ||
4776 | + return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1); | ||
4777 | +} | ||
4778 | +#endif | ||
4779 | + | ||
4780 | +/* | ||
4781 | + * array_index_nospec - sanitize an array index after a bounds check | ||
4782 | + * | ||
4783 | + * For a code sequence like: | ||
4784 | + * | ||
4785 | + * if (index < size) { | ||
4786 | + * index = array_index_nospec(index, size); | ||
4787 | + * val = array[index]; | ||
4788 | + * } | ||
4789 | + * | ||
4790 | + * ...if the CPU speculates past the bounds check then | ||
4791 | + * array_index_nospec() will clamp the index within the range of [0, | ||
4792 | + * size). | ||
4793 | + */ | ||
4794 | +#define array_index_nospec(index, size) \ | ||
4795 | +({ \ | ||
4796 | + typeof(index) _i = (index); \ | ||
4797 | + typeof(size) _s = (size); \ | ||
4798 | + unsigned long _mask = array_index_mask_nospec(_i, _s); \ | ||
4799 | + \ | ||
4800 | + BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \ | ||
4801 | + BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \ | ||
4802 | + \ | ||
4803 | + _i &= _mask; \ | ||
4804 | + _i; \ | ||
4805 | +}) | ||
4806 | +#endif /* _LINUX_NOSPEC_H */ | ||
4807 | diff --git a/kernel/module.c b/kernel/module.c | ||
4808 | index 0e54d5bf0097..07bfb9971f2f 100644 | ||
4809 | --- a/kernel/module.c | ||
4810 | +++ b/kernel/module.c | ||
4811 | @@ -2817,6 +2817,15 @@ static int check_modinfo_livepatch(struct module *mod, struct load_info *info) | ||
4812 | } | ||
4813 | #endif /* CONFIG_LIVEPATCH */ | ||
4814 | |||
4815 | +static void check_modinfo_retpoline(struct module *mod, struct load_info *info) | ||
4816 | +{ | ||
4817 | + if (retpoline_module_ok(get_modinfo(info, "retpoline"))) | ||
4818 | + return; | ||
4819 | + | ||
4820 | + pr_warn("%s: loading module not compiled with retpoline compiler.\n", | ||
4821 | + mod->name); | ||
4822 | +} | ||
4823 | + | ||
4824 | /* Sets info->hdr and info->len. */ | ||
4825 | static int copy_module_from_user(const void __user *umod, unsigned long len, | ||
4826 | struct load_info *info) | ||
4827 | @@ -2969,6 +2978,8 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags) | ||
4828 | add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); | ||
4829 | } | ||
4830 | |||
4831 | + check_modinfo_retpoline(mod, info); | ||
4832 | + | ||
4833 | if (get_modinfo(info, "staging")) { | ||
4834 | add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK); | ||
4835 | pr_warn("%s: module is from the staging directory, the quality " | ||
4836 | diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c | ||
4837 | index 77f396b679ce..5dce4291f0ed 100644 | ||
4838 | --- a/net/core/sock_reuseport.c | ||
4839 | +++ b/net/core/sock_reuseport.c | ||
4840 | @@ -93,6 +93,16 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) | ||
4841 | return more_reuse; | ||
4842 | } | ||
4843 | |||
4844 | +static void reuseport_free_rcu(struct rcu_head *head) | ||
4845 | +{ | ||
4846 | + struct sock_reuseport *reuse; | ||
4847 | + | ||
4848 | + reuse = container_of(head, struct sock_reuseport, rcu); | ||
4849 | + if (reuse->prog) | ||
4850 | + bpf_prog_destroy(reuse->prog); | ||
4851 | + kfree(reuse); | ||
4852 | +} | ||
4853 | + | ||
4854 | /** | ||
4855 | * reuseport_add_sock - Add a socket to the reuseport group of another. | ||
4856 | * @sk: New socket to add to the group. | ||
4857 | @@ -101,7 +111,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) | ||
4858 | */ | ||
4859 | int reuseport_add_sock(struct sock *sk, struct sock *sk2) | ||
4860 | { | ||
4861 | - struct sock_reuseport *reuse; | ||
4862 | + struct sock_reuseport *old_reuse, *reuse; | ||
4863 | |||
4864 | if (!rcu_access_pointer(sk2->sk_reuseport_cb)) { | ||
4865 | int err = reuseport_alloc(sk2); | ||
4866 | @@ -112,10 +122,13 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2) | ||
4867 | |||
4868 | spin_lock_bh(&reuseport_lock); | ||
4869 | reuse = rcu_dereference_protected(sk2->sk_reuseport_cb, | ||
4870 | - lockdep_is_held(&reuseport_lock)), | ||
4871 | - WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, | ||
4872 | - lockdep_is_held(&reuseport_lock)), | ||
4873 | - "socket already in reuseport group"); | ||
4874 | + lockdep_is_held(&reuseport_lock)); | ||
4875 | + old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb, | ||
4876 | + lockdep_is_held(&reuseport_lock)); | ||
4877 | + if (old_reuse && old_reuse->num_socks != 1) { | ||
4878 | + spin_unlock_bh(&reuseport_lock); | ||
4879 | + return -EBUSY; | ||
4880 | + } | ||
4881 | |||
4882 | if (reuse->num_socks == reuse->max_socks) { | ||
4883 | reuse = reuseport_grow(reuse); | ||
4884 | @@ -133,19 +146,11 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2) | ||
4885 | |||
4886 | spin_unlock_bh(&reuseport_lock); | ||
4887 | |||
4888 | + if (old_reuse) | ||
4889 | + call_rcu(&old_reuse->rcu, reuseport_free_rcu); | ||
4890 | return 0; | ||
4891 | } | ||
4892 | |||
4893 | -static void reuseport_free_rcu(struct rcu_head *head) | ||
4894 | -{ | ||
4895 | - struct sock_reuseport *reuse; | ||
4896 | - | ||
4897 | - reuse = container_of(head, struct sock_reuseport, rcu); | ||
4898 | - if (reuse->prog) | ||
4899 | - bpf_prog_destroy(reuse->prog); | ||
4900 | - kfree(reuse); | ||
4901 | -} | ||
4902 | - | ||
4903 | void reuseport_detach_sock(struct sock *sk) | ||
4904 | { | ||
4905 | struct sock_reuseport *reuse; | ||
4906 | diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c | ||
4907 | index 9c7a4cea1628..7f5fe07d0b13 100644 | ||
4908 | --- a/net/ipv4/igmp.c | ||
4909 | +++ b/net/ipv4/igmp.c | ||
4910 | @@ -386,7 +386,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) | ||
4911 | pip->frag_off = htons(IP_DF); | ||
4912 | pip->ttl = 1; | ||
4913 | pip->daddr = fl4.daddr; | ||
4914 | + | ||
4915 | + rcu_read_lock(); | ||
4916 | pip->saddr = igmpv3_get_srcaddr(dev, &fl4); | ||
4917 | + rcu_read_unlock(); | ||
4918 | + | ||
4919 | pip->protocol = IPPROTO_IGMP; | ||
4920 | pip->tot_len = 0; /* filled in later */ | ||
4921 | ip_select_ident(net, skb, NULL); | ||
4922 | diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c | ||
4923 | index 7efa6b062049..0d1a767db1bb 100644 | ||
4924 | --- a/net/ipv4/tcp.c | ||
4925 | +++ b/net/ipv4/tcp.c | ||
4926 | @@ -2316,6 +2316,12 @@ int tcp_disconnect(struct sock *sk, int flags) | ||
4927 | |||
4928 | WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); | ||
4929 | |||
4930 | + if (sk->sk_frag.page) { | ||
4931 | + put_page(sk->sk_frag.page); | ||
4932 | + sk->sk_frag.page = NULL; | ||
4933 | + sk->sk_frag.offset = 0; | ||
4934 | + } | ||
4935 | + | ||
4936 | sk->sk_error_report(sk); | ||
4937 | return err; | ||
4938 | } | ||
4939 | diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c | ||
4940 | index e86a34fd5484..8ec60532be2b 100644 | ||
4941 | --- a/net/ipv4/tcp_bbr.c | ||
4942 | +++ b/net/ipv4/tcp_bbr.c | ||
4943 | @@ -452,7 +452,8 @@ static void bbr_advance_cycle_phase(struct sock *sk) | ||
4944 | |||
4945 | bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); | ||
4946 | bbr->cycle_mstamp = tp->delivered_mstamp; | ||
4947 | - bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx]; | ||
4948 | + bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT : | ||
4949 | + bbr_pacing_gain[bbr->cycle_idx]; | ||
4950 | } | ||
4951 | |||
4952 | /* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ | ||
4953 | @@ -461,8 +462,7 @@ static void bbr_update_cycle_phase(struct sock *sk, | ||
4954 | { | ||
4955 | struct bbr *bbr = inet_csk_ca(sk); | ||
4956 | |||
4957 | - if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw && | ||
4958 | - bbr_is_next_cycle_phase(sk, rs)) | ||
4959 | + if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs)) | ||
4960 | bbr_advance_cycle_phase(sk); | ||
4961 | } | ||
4962 | |||
4963 | diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c | ||
4964 | index 5cad76f87536..421379014995 100644 | ||
4965 | --- a/net/ipv6/af_inet6.c | ||
4966 | +++ b/net/ipv6/af_inet6.c | ||
4967 | @@ -274,6 +274,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | ||
4968 | struct net *net = sock_net(sk); | ||
4969 | __be32 v4addr = 0; | ||
4970 | unsigned short snum; | ||
4971 | + bool saved_ipv6only; | ||
4972 | int addr_type = 0; | ||
4973 | int err = 0; | ||
4974 | |||
4975 | @@ -378,19 +379,21 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | ||
4976 | if (!(addr_type & IPV6_ADDR_MULTICAST)) | ||
4977 | np->saddr = addr->sin6_addr; | ||
4978 | |||
4979 | + saved_ipv6only = sk->sk_ipv6only; | ||
4980 | + if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED) | ||
4981 | + sk->sk_ipv6only = 1; | ||
4982 | + | ||
4983 | /* Make sure we are allowed to bind here. */ | ||
4984 | if ((snum || !inet->bind_address_no_port) && | ||
4985 | sk->sk_prot->get_port(sk, snum)) { | ||
4986 | + sk->sk_ipv6only = saved_ipv6only; | ||
4987 | inet_reset_saddr(sk); | ||
4988 | err = -EADDRINUSE; | ||
4989 | goto out; | ||
4990 | } | ||
4991 | |||
4992 | - if (addr_type != IPV6_ADDR_ANY) { | ||
4993 | + if (addr_type != IPV6_ADDR_ANY) | ||
4994 | sk->sk_userlocks |= SOCK_BINDADDR_LOCK; | ||
4995 | - if (addr_type != IPV6_ADDR_MAPPED) | ||
4996 | - sk->sk_ipv6only = 1; | ||
4997 | - } | ||
4998 | if (snum) | ||
4999 | sk->sk_userlocks |= SOCK_BINDPORT_LOCK; | ||
5000 | inet->inet_sport = htons(inet->inet_num); | ||
5001 | diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c | ||
5002 | index 117405dd07a3..a30e7e925c9b 100644 | ||
5003 | --- a/net/ipv6/ip6mr.c | ||
5004 | +++ b/net/ipv6/ip6mr.c | ||
5005 | @@ -495,6 +495,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) | ||
5006 | return ERR_PTR(-ENOENT); | ||
5007 | |||
5008 | it->mrt = mrt; | ||
5009 | + it->cache = NULL; | ||
5010 | return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) | ||
5011 | : SEQ_START_TOKEN; | ||
5012 | } | ||
5013 | diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c | ||
5014 | index ae83c3aec308..da574a16e7b3 100644 | ||
5015 | --- a/net/sched/cls_u32.c | ||
5016 | +++ b/net/sched/cls_u32.c | ||
5017 | @@ -496,6 +496,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) | ||
5018 | static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, | ||
5019 | u32 flags) | ||
5020 | { | ||
5021 | + struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); | ||
5022 | struct net_device *dev = tp->q->dev_queue->dev; | ||
5023 | struct tc_cls_u32_offload u32_offload = {0}; | ||
5024 | struct tc_to_netdev offload; | ||
5025 | @@ -520,7 +521,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, | ||
5026 | offload.cls_u32->knode.sel = &n->sel; | ||
5027 | offload.cls_u32->knode.exts = &n->exts; | ||
5028 | if (n->ht_down) | ||
5029 | - offload.cls_u32->knode.link_handle = n->ht_down->handle; | ||
5030 | + offload.cls_u32->knode.link_handle = ht->handle; | ||
5031 | |||
5032 | err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, | ||
5033 | tp->protocol, &offload); | ||
5034 | @@ -788,8 +789,9 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, | ||
5035 | static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, | ||
5036 | struct tc_u_knode *n) | ||
5037 | { | ||
5038 | - struct tc_u_knode *new; | ||
5039 | + struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); | ||
5040 | struct tc_u32_sel *s = &n->sel; | ||
5041 | + struct tc_u_knode *new; | ||
5042 | |||
5043 | new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), | ||
5044 | GFP_KERNEL); | ||
5045 | @@ -807,11 +809,11 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, | ||
5046 | new->fshift = n->fshift; | ||
5047 | new->res = n->res; | ||
5048 | new->flags = n->flags; | ||
5049 | - RCU_INIT_POINTER(new->ht_down, n->ht_down); | ||
5050 | + RCU_INIT_POINTER(new->ht_down, ht); | ||
5051 | |||
5052 | /* bump reference count as long as we hold pointer to structure */ | ||
5053 | - if (new->ht_down) | ||
5054 | - new->ht_down->refcnt++; | ||
5055 | + if (ht) | ||
5056 | + ht->refcnt++; | ||
5057 | |||
5058 | #ifdef CONFIG_CLS_U32_PERF | ||
5059 | /* Statistics may be incremented by readers during update | ||
5060 | diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c | ||
5061 | index c626f679e1c8..91722e97cdd5 100644 | ||
5062 | --- a/net/wireless/nl80211.c | ||
5063 | +++ b/net/wireless/nl80211.c | ||
5064 | @@ -16,6 +16,7 @@ | ||
5065 | #include <linux/nl80211.h> | ||
5066 | #include <linux/rtnetlink.h> | ||
5067 | #include <linux/netlink.h> | ||
5068 | +#include <linux/nospec.h> | ||
5069 | #include <linux/etherdevice.h> | ||
5070 | #include <net/net_namespace.h> | ||
5071 | #include <net/genetlink.h> | ||
5072 | @@ -2014,20 +2015,22 @@ static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = { | ||
5073 | static int parse_txq_params(struct nlattr *tb[], | ||
5074 | struct ieee80211_txq_params *txq_params) | ||
5075 | { | ||
5076 | + u8 ac; | ||
5077 | + | ||
5078 | if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] || | ||
5079 | !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] || | ||
5080 | !tb[NL80211_TXQ_ATTR_AIFS]) | ||
5081 | return -EINVAL; | ||
5082 | |||
5083 | - txq_params->ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); | ||
5084 | + ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); | ||
5085 | txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]); | ||
5086 | txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]); | ||
5087 | txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]); | ||
5088 | txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]); | ||
5089 | |||
5090 | - if (txq_params->ac >= NL80211_NUM_ACS) | ||
5091 | + if (ac >= NL80211_NUM_ACS) | ||
5092 | return -EINVAL; | ||
5093 | - | ||
5094 | + txq_params->ac = array_index_nospec(ac, NL80211_NUM_ACS); | ||
5095 | return 0; | ||
5096 | } | ||
5097 | |||
5098 | diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c | ||
5099 | index 845eb9b800f3..238db4ffd30c 100644 | ||
5100 | --- a/scripts/mod/modpost.c | ||
5101 | +++ b/scripts/mod/modpost.c | ||
5102 | @@ -2130,6 +2130,14 @@ static void add_intree_flag(struct buffer *b, int is_intree) | ||
5103 | buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n"); | ||
5104 | } | ||
5105 | |||
5106 | +/* Cannot check for assembler */ | ||
5107 | +static void add_retpoline(struct buffer *b) | ||
5108 | +{ | ||
5109 | + buf_printf(b, "\n#ifdef RETPOLINE\n"); | ||
5110 | + buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n"); | ||
5111 | + buf_printf(b, "#endif\n"); | ||
5112 | +} | ||
5113 | + | ||
5114 | static void add_staging_flag(struct buffer *b, const char *name) | ||
5115 | { | ||
5116 | static const char *staging_dir = "drivers/staging"; | ||
5117 | @@ -2474,6 +2482,7 @@ int main(int argc, char **argv) | ||
5118 | |||
5119 | add_header(&buf, mod); | ||
5120 | add_intree_flag(&buf, !external_module); | ||
5121 | + add_retpoline(&buf); | ||
5122 | add_staging_flag(&buf, mod->name); | ||
5123 | err |= add_versions(&buf, mod); | ||
5124 | add_depends(&buf, mod, modules); | ||
5125 | diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c | ||
5126 | index a871159bf03c..ead2fd60244d 100644 | ||
5127 | --- a/security/keys/encrypted-keys/encrypted.c | ||
5128 | +++ b/security/keys/encrypted-keys/encrypted.c | ||
5129 | @@ -141,23 +141,22 @@ static int valid_ecryptfs_desc(const char *ecryptfs_desc) | ||
5130 | */ | ||
5131 | static int valid_master_desc(const char *new_desc, const char *orig_desc) | ||
5132 | { | ||
5133 | - if (!memcmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) { | ||
5134 | - if (strlen(new_desc) == KEY_TRUSTED_PREFIX_LEN) | ||
5135 | - goto out; | ||
5136 | - if (orig_desc) | ||
5137 | - if (memcmp(new_desc, orig_desc, KEY_TRUSTED_PREFIX_LEN)) | ||
5138 | - goto out; | ||
5139 | - } else if (!memcmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) { | ||
5140 | - if (strlen(new_desc) == KEY_USER_PREFIX_LEN) | ||
5141 | - goto out; | ||
5142 | - if (orig_desc) | ||
5143 | - if (memcmp(new_desc, orig_desc, KEY_USER_PREFIX_LEN)) | ||
5144 | - goto out; | ||
5145 | - } else | ||
5146 | - goto out; | ||
5147 | + int prefix_len; | ||
5148 | + | ||
5149 | + if (!strncmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) | ||
5150 | + prefix_len = KEY_TRUSTED_PREFIX_LEN; | ||
5151 | + else if (!strncmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) | ||
5152 | + prefix_len = KEY_USER_PREFIX_LEN; | ||
5153 | + else | ||
5154 | + return -EINVAL; | ||
5155 | + | ||
5156 | + if (!new_desc[prefix_len]) | ||
5157 | + return -EINVAL; | ||
5158 | + | ||
5159 | + if (orig_desc && strncmp(new_desc, orig_desc, prefix_len)) | ||
5160 | + return -EINVAL; | ||
5161 | + | ||
5162 | return 0; | ||
5163 | -out: | ||
5164 | - return -EINVAL; | ||
5165 | } | ||
5166 | |||
5167 | /* | ||
5168 | diff --git a/sound/soc/codecs/pcm512x-spi.c b/sound/soc/codecs/pcm512x-spi.c | ||
5169 | index 712ed6598c48..ebdf9bd5a64c 100644 | ||
5170 | --- a/sound/soc/codecs/pcm512x-spi.c | ||
5171 | +++ b/sound/soc/codecs/pcm512x-spi.c | ||
5172 | @@ -70,3 +70,7 @@ static struct spi_driver pcm512x_spi_driver = { | ||
5173 | }; | ||
5174 | |||
5175 | module_spi_driver(pcm512x_spi_driver); | ||
5176 | + | ||
5177 | +MODULE_DESCRIPTION("ASoC PCM512x codec driver - SPI"); | ||
5178 | +MODULE_AUTHOR("Mark Brown <broonie@kernel.org>"); | ||
5179 | +MODULE_LICENSE("GPL v2"); | ||
5180 | diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c | ||
5181 | index f608f8d23f3d..dd88c2cb6470 100644 | ||
5182 | --- a/sound/soc/generic/simple-card.c | ||
5183 | +++ b/sound/soc/generic/simple-card.c | ||
5184 | @@ -232,13 +232,19 @@ static int asoc_simple_card_dai_link_of(struct device_node *node, | ||
5185 | snprintf(prop, sizeof(prop), "%scpu", prefix); | ||
5186 | cpu = of_get_child_by_name(node, prop); | ||
5187 | |||
5188 | + if (!cpu) { | ||
5189 | + ret = -EINVAL; | ||
5190 | + dev_err(dev, "%s: Can't find %s DT node\n", __func__, prop); | ||
5191 | + goto dai_link_of_err; | ||
5192 | + } | ||
5193 | + | ||
5194 | snprintf(prop, sizeof(prop), "%splat", prefix); | ||
5195 | plat = of_get_child_by_name(node, prop); | ||
5196 | |||
5197 | snprintf(prop, sizeof(prop), "%scodec", prefix); | ||
5198 | codec = of_get_child_by_name(node, prop); | ||
5199 | |||
5200 | - if (!cpu || !codec) { | ||
5201 | + if (!codec) { | ||
5202 | ret = -EINVAL; | ||
5203 | dev_err(dev, "%s: Can't find %s DT node\n", __func__, prop); | ||
5204 | goto dai_link_of_err; | ||
5205 | diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c | ||
5206 | index 560cf4b51a99..a9a43acce30e 100644 | ||
5207 | --- a/sound/soc/sh/rcar/ssi.c | ||
5208 | +++ b/sound/soc/sh/rcar/ssi.c | ||
5209 | @@ -699,9 +699,14 @@ static int rsnd_ssi_dma_remove(struct rsnd_mod *mod, | ||
5210 | struct rsnd_priv *priv) | ||
5211 | { | ||
5212 | struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); | ||
5213 | + struct rsnd_mod *pure_ssi_mod = rsnd_io_to_mod_ssi(io); | ||
5214 | struct device *dev = rsnd_priv_to_dev(priv); | ||
5215 | int irq = ssi->irq; | ||
5216 | |||
5217 | + /* Do nothing if non SSI (= SSI parent, multi SSI) mod */ | ||
5218 | + if (pure_ssi_mod != mod) | ||
5219 | + return 0; | ||
5220 | + | ||
5221 | /* PIO will request IRQ again */ | ||
5222 | devm_free_irq(dev, irq, mod); | ||
5223 |