Magellan Linux

Contents of /trunk/kernel-alx-legacy/patches-4.9/0180-4.9.81-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3608 - (show annotations) (download)
Fri Aug 14 07:34:29 2020 UTC (3 years, 8 months ago) by niro
File size: 166338 byte(s)
-added kerenl-alx-legacy pkg
1 diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
2 index 4c2667aa4634..466c039c622b 100644
3 --- a/Documentation/kernel-parameters.txt
4 +++ b/Documentation/kernel-parameters.txt
5 @@ -2805,8 +2805,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
6 norandmaps Don't use address space randomization. Equivalent to
7 echo 0 > /proc/sys/kernel/randomize_va_space
8
9 - noreplace-paravirt [X86,IA-64,PV_OPS] Don't patch paravirt_ops
10 -
11 noreplace-smp [X86-32,SMP] Don't replace SMP instructions
12 with UP alternatives
13
14 diff --git a/Documentation/speculation.txt b/Documentation/speculation.txt
15 new file mode 100644
16 index 000000000000..e9e6cbae2841
17 --- /dev/null
18 +++ b/Documentation/speculation.txt
19 @@ -0,0 +1,90 @@
20 +This document explains potential effects of speculation, and how undesirable
21 +effects can be mitigated portably using common APIs.
22 +
23 +===========
24 +Speculation
25 +===========
26 +
27 +To improve performance and minimize average latencies, many contemporary CPUs
28 +employ speculative execution techniques such as branch prediction, performing
29 +work which may be discarded at a later stage.
30 +
31 +Typically speculative execution cannot be observed from architectural state,
32 +such as the contents of registers. However, in some cases it is possible to
33 +observe its impact on microarchitectural state, such as the presence or
34 +absence of data in caches. Such state may form side-channels which can be
35 +observed to extract secret information.
36 +
37 +For example, in the presence of branch prediction, it is possible for bounds
38 +checks to be ignored by code which is speculatively executed. Consider the
39 +following code:
40 +
41 + int load_array(int *array, unsigned int index)
42 + {
43 + if (index >= MAX_ARRAY_ELEMS)
44 + return 0;
45 + else
46 + return array[index];
47 + }
48 +
49 +Which, on arm64, may be compiled to an assembly sequence such as:
50 +
51 + CMP <index>, #MAX_ARRAY_ELEMS
52 + B.LT less
53 + MOV <returnval>, #0
54 + RET
55 + less:
56 + LDR <returnval>, [<array>, <index>]
57 + RET
58 +
59 +It is possible that a CPU mis-predicts the conditional branch, and
60 +speculatively loads array[index], even if index >= MAX_ARRAY_ELEMS. This
61 +value will subsequently be discarded, but the speculated load may affect
62 +microarchitectural state which can be subsequently measured.
63 +
64 +More complex sequences involving multiple dependent memory accesses may
65 +result in sensitive information being leaked. Consider the following
66 +code, building on the prior example:
67 +
68 + int load_dependent_arrays(int *arr1, int *arr2, int index)
69 + {
70 + int val1, val2,
71 +
72 + val1 = load_array(arr1, index);
73 + val2 = load_array(arr2, val1);
74 +
75 + return val2;
76 + }
77 +
78 +Under speculation, the first call to load_array() may return the value
79 +of an out-of-bounds address, while the second call will influence
80 +microarchitectural state dependent on this value. This may provide an
81 +arbitrary read primitive.
82 +
83 +====================================
84 +Mitigating speculation side-channels
85 +====================================
86 +
87 +The kernel provides a generic API to ensure that bounds checks are
88 +respected even under speculation. Architectures which are affected by
89 +speculation-based side-channels are expected to implement these
90 +primitives.
91 +
92 +The array_index_nospec() helper in <linux/nospec.h> can be used to
93 +prevent information from being leaked via side-channels.
94 +
95 +A call to array_index_nospec(index, size) returns a sanitized index
96 +value that is bounded to [0, size) even under cpu speculation
97 +conditions.
98 +
99 +This can be used to protect the earlier load_array() example:
100 +
101 + int load_array(int *array, unsigned int index)
102 + {
103 + if (index >= MAX_ARRAY_ELEMS)
104 + return 0;
105 + else {
106 + index = array_index_nospec(index, MAX_ARRAY_ELEMS);
107 + return array[index];
108 + }
109 + }
110 diff --git a/Makefile b/Makefile
111 index 9550b6939076..4d5753f1c37b 100644
112 --- a/Makefile
113 +++ b/Makefile
114 @@ -1,6 +1,6 @@
115 VERSION = 4
116 PATCHLEVEL = 9
117 -SUBLEVEL = 80
118 +SUBLEVEL = 81
119 EXTRAVERSION =
120 NAME = Roaring Lionus
121
122 diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
123 index 6eda5abbd719..0a6bb48854e3 100644
124 --- a/arch/powerpc/Kconfig
125 +++ b/arch/powerpc/Kconfig
126 @@ -128,6 +128,7 @@ config PPC
127 select ARCH_HAS_GCOV_PROFILE_ALL
128 select GENERIC_SMP_IDLE_THREAD
129 select GENERIC_CMOS_UPDATE
130 + select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64
131 select GENERIC_TIME_VSYSCALL_OLD
132 select GENERIC_CLOCKEVENTS
133 select GENERIC_CLOCKEVENTS_BROADCAST if SMP
134 diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h
135 index a703452d67b6..555e22d5e07f 100644
136 --- a/arch/powerpc/include/asm/exception-64e.h
137 +++ b/arch/powerpc/include/asm/exception-64e.h
138 @@ -209,5 +209,11 @@ exc_##label##_book3e:
139 ori r3,r3,vector_offset@l; \
140 mtspr SPRN_IVOR##vector_number,r3;
141
142 +#define RFI_TO_KERNEL \
143 + rfi
144 +
145 +#define RFI_TO_USER \
146 + rfi
147 +
148 #endif /* _ASM_POWERPC_EXCEPTION_64E_H */
149
150 diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
151 index 9a3eee661297..cab6d2a46c41 100644
152 --- a/arch/powerpc/include/asm/exception-64s.h
153 +++ b/arch/powerpc/include/asm/exception-64s.h
154 @@ -51,6 +51,59 @@
155 #define EX_PPR 88 /* SMT thread status register (priority) */
156 #define EX_CTR 96
157
158 +/*
159 + * Macros for annotating the expected destination of (h)rfid
160 + *
161 + * The nop instructions allow us to insert one or more instructions to flush the
162 + * L1-D cache when returning to userspace or a guest.
163 + */
164 +#define RFI_FLUSH_SLOT \
165 + RFI_FLUSH_FIXUP_SECTION; \
166 + nop; \
167 + nop; \
168 + nop
169 +
170 +#define RFI_TO_KERNEL \
171 + rfid
172 +
173 +#define RFI_TO_USER \
174 + RFI_FLUSH_SLOT; \
175 + rfid; \
176 + b rfi_flush_fallback
177 +
178 +#define RFI_TO_USER_OR_KERNEL \
179 + RFI_FLUSH_SLOT; \
180 + rfid; \
181 + b rfi_flush_fallback
182 +
183 +#define RFI_TO_GUEST \
184 + RFI_FLUSH_SLOT; \
185 + rfid; \
186 + b rfi_flush_fallback
187 +
188 +#define HRFI_TO_KERNEL \
189 + hrfid
190 +
191 +#define HRFI_TO_USER \
192 + RFI_FLUSH_SLOT; \
193 + hrfid; \
194 + b hrfi_flush_fallback
195 +
196 +#define HRFI_TO_USER_OR_KERNEL \
197 + RFI_FLUSH_SLOT; \
198 + hrfid; \
199 + b hrfi_flush_fallback
200 +
201 +#define HRFI_TO_GUEST \
202 + RFI_FLUSH_SLOT; \
203 + hrfid; \
204 + b hrfi_flush_fallback
205 +
206 +#define HRFI_TO_UNKNOWN \
207 + RFI_FLUSH_SLOT; \
208 + hrfid; \
209 + b hrfi_flush_fallback
210 +
211 #ifdef CONFIG_RELOCATABLE
212 #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
213 mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
214 diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
215 index ddf54f5bbdd1..7b332342071c 100644
216 --- a/arch/powerpc/include/asm/feature-fixups.h
217 +++ b/arch/powerpc/include/asm/feature-fixups.h
218 @@ -189,4 +189,19 @@ void apply_feature_fixups(void);
219 void setup_feature_keys(void);
220 #endif
221
222 +#define RFI_FLUSH_FIXUP_SECTION \
223 +951: \
224 + .pushsection __rfi_flush_fixup,"a"; \
225 + .align 2; \
226 +952: \
227 + FTR_ENTRY_OFFSET 951b-952b; \
228 + .popsection;
229 +
230 +
231 +#ifndef __ASSEMBLY__
232 +
233 +extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
234 +
235 +#endif
236 +
237 #endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */
238 diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
239 index 708edebcf147..0e12cb2437d1 100644
240 --- a/arch/powerpc/include/asm/hvcall.h
241 +++ b/arch/powerpc/include/asm/hvcall.h
242 @@ -240,6 +240,7 @@
243 #define H_GET_HCA_INFO 0x1B8
244 #define H_GET_PERF_COUNT 0x1BC
245 #define H_MANAGE_TRACE 0x1C0
246 +#define H_GET_CPU_CHARACTERISTICS 0x1C8
247 #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
248 #define H_QUERY_INT_STATE 0x1E4
249 #define H_POLL_PENDING 0x1D8
250 @@ -306,6 +307,17 @@
251 #define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3
252 #define H_SET_MODE_RESOURCE_LE 4
253
254 +/* H_GET_CPU_CHARACTERISTICS return values */
255 +#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0
256 +#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1
257 +#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2
258 +#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3
259 +#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4
260 +
261 +#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0
262 +#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1
263 +#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2
264 +
265 #ifndef __ASSEMBLY__
266
267 /**
268 @@ -433,6 +445,11 @@ static inline unsigned long cmo_get_page_size(void)
269 }
270 #endif /* CONFIG_PPC_PSERIES */
271
272 +struct h_cpu_char_result {
273 + u64 character;
274 + u64 behaviour;
275 +};
276 +
277 #endif /* __ASSEMBLY__ */
278 #endif /* __KERNEL__ */
279 #endif /* _ASM_POWERPC_HVCALL_H */
280 diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
281 index 6a6792bb39fb..ea43897183fd 100644
282 --- a/arch/powerpc/include/asm/paca.h
283 +++ b/arch/powerpc/include/asm/paca.h
284 @@ -205,6 +205,16 @@ struct paca_struct {
285 struct sibling_subcore_state *sibling_subcore_state;
286 #endif
287 #endif
288 +#ifdef CONFIG_PPC_BOOK3S_64
289 + /*
290 + * rfi fallback flush must be in its own cacheline to prevent
291 + * other paca data leaking into the L1d
292 + */
293 + u64 exrfi[13] __aligned(0x80);
294 + void *rfi_flush_fallback_area;
295 + u64 l1d_flush_congruence;
296 + u64 l1d_flush_sets;
297 +#endif
298 };
299
300 #ifdef CONFIG_PPC_BOOK3S
301 diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
302 index 1b394247afc2..4e53b8570d1f 100644
303 --- a/arch/powerpc/include/asm/plpar_wrappers.h
304 +++ b/arch/powerpc/include/asm/plpar_wrappers.h
305 @@ -340,4 +340,18 @@ static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawr
306 return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0);
307 }
308
309 +static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
310 +{
311 + unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
312 + long rc;
313 +
314 + rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
315 + if (rc == H_SUCCESS) {
316 + p->character = retbuf[0];
317 + p->behaviour = retbuf[1];
318 + }
319 +
320 + return rc;
321 +}
322 +
323 #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
324 diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
325 index 654d64c9f3ac..6825a67cc3db 100644
326 --- a/arch/powerpc/include/asm/setup.h
327 +++ b/arch/powerpc/include/asm/setup.h
328 @@ -38,6 +38,19 @@ static inline void pseries_big_endian_exceptions(void) {}
329 static inline void pseries_little_endian_exceptions(void) {}
330 #endif /* CONFIG_PPC_PSERIES */
331
332 +void rfi_flush_enable(bool enable);
333 +
334 +/* These are bit flags */
335 +enum l1d_flush_type {
336 + L1D_FLUSH_NONE = 0x1,
337 + L1D_FLUSH_FALLBACK = 0x2,
338 + L1D_FLUSH_ORI = 0x4,
339 + L1D_FLUSH_MTTRIG = 0x8,
340 +};
341 +
342 +void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
343 +void do_rfi_flush_fixups(enum l1d_flush_type types);
344 +
345 #endif /* !__ASSEMBLY__ */
346
347 #endif /* _ASM_POWERPC_SETUP_H */
348 diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
349 index c833d88c423d..64bcbd580495 100644
350 --- a/arch/powerpc/kernel/asm-offsets.c
351 +++ b/arch/powerpc/kernel/asm-offsets.c
352 @@ -240,6 +240,10 @@ int main(void)
353 #ifdef CONFIG_PPC_BOOK3S_64
354 DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp));
355 DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce));
356 + DEFINE(PACA_RFI_FLUSH_FALLBACK_AREA, offsetof(struct paca_struct, rfi_flush_fallback_area));
357 + DEFINE(PACA_EXRFI, offsetof(struct paca_struct, exrfi));
358 + DEFINE(PACA_L1D_FLUSH_CONGRUENCE, offsetof(struct paca_struct, l1d_flush_congruence));
359 + DEFINE(PACA_L1D_FLUSH_SETS, offsetof(struct paca_struct, l1d_flush_sets));
360 #endif
361 DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
362 DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
363 diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
364 index caa659671599..c33b69d10919 100644
365 --- a/arch/powerpc/kernel/entry_64.S
366 +++ b/arch/powerpc/kernel/entry_64.S
367 @@ -251,13 +251,23 @@ BEGIN_FTR_SECTION
368 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
369
370 ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
371 + ld r2,GPR2(r1)
372 + ld r1,GPR1(r1)
373 + mtlr r4
374 + mtcr r5
375 + mtspr SPRN_SRR0,r7
376 + mtspr SPRN_SRR1,r8
377 + RFI_TO_USER
378 + b . /* prevent speculative execution */
379 +
380 + /* exit to kernel */
381 1: ld r2,GPR2(r1)
382 ld r1,GPR1(r1)
383 mtlr r4
384 mtcr r5
385 mtspr SPRN_SRR0,r7
386 mtspr SPRN_SRR1,r8
387 - RFI
388 + RFI_TO_KERNEL
389 b . /* prevent speculative execution */
390
391 syscall_error:
392 @@ -859,7 +869,7 @@ BEGIN_FTR_SECTION
393 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
394 ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
395 REST_GPR(13, r1)
396 -1:
397 +
398 mtspr SPRN_SRR1,r3
399
400 ld r2,_CCR(r1)
401 @@ -872,8 +882,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
402 ld r3,GPR3(r1)
403 ld r4,GPR4(r1)
404 ld r1,GPR1(r1)
405 + RFI_TO_USER
406 + b . /* prevent speculative execution */
407
408 - rfid
409 +1: mtspr SPRN_SRR1,r3
410 +
411 + ld r2,_CCR(r1)
412 + mtcrf 0xFF,r2
413 + ld r2,_NIP(r1)
414 + mtspr SPRN_SRR0,r2
415 +
416 + ld r0,GPR0(r1)
417 + ld r2,GPR2(r1)
418 + ld r3,GPR3(r1)
419 + ld r4,GPR4(r1)
420 + ld r1,GPR1(r1)
421 + RFI_TO_KERNEL
422 b . /* prevent speculative execution */
423
424 #endif /* CONFIG_PPC_BOOK3E */
425 diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
426 index fd68e19b9ef7..96db6c3adebe 100644
427 --- a/arch/powerpc/kernel/exceptions-64s.S
428 +++ b/arch/powerpc/kernel/exceptions-64s.S
429 @@ -655,6 +655,8 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
430
431 andi. r10,r12,MSR_RI /* check for unrecoverable exception */
432 beq- 2f
433 + andi. r10,r12,MSR_PR /* check for user mode (PR != 0) */
434 + bne 1f
435
436 /* All done -- return from exception. */
437
438 @@ -671,7 +673,23 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
439 ld r11,PACA_EXSLB+EX_R11(r13)
440 ld r12,PACA_EXSLB+EX_R12(r13)
441 ld r13,PACA_EXSLB+EX_R13(r13)
442 - rfid
443 + RFI_TO_KERNEL
444 + b . /* prevent speculative execution */
445 +
446 +1:
447 +.machine push
448 +.machine "power4"
449 + mtcrf 0x80,r9
450 + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
451 +.machine pop
452 +
453 + RESTORE_PPR_PACA(PACA_EXSLB, r9)
454 + ld r9,PACA_EXSLB+EX_R9(r13)
455 + ld r10,PACA_EXSLB+EX_R10(r13)
456 + ld r11,PACA_EXSLB+EX_R11(r13)
457 + ld r12,PACA_EXSLB+EX_R12(r13)
458 + ld r13,PACA_EXSLB+EX_R13(r13)
459 + RFI_TO_USER
460 b . /* prevent speculative execution */
461
462 2: mfspr r11,SPRN_SRR0
463 @@ -679,7 +697,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
464 mtspr SPRN_SRR0,r10
465 ld r10,PACAKMSR(r13)
466 mtspr SPRN_SRR1,r10
467 - rfid
468 + RFI_TO_KERNEL
469 b .
470
471 8: mfspr r11,SPRN_SRR0
472 @@ -1576,6 +1594,92 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
473 bl kernel_bad_stack
474 b 1b
475
476 + .globl rfi_flush_fallback
477 +rfi_flush_fallback:
478 + SET_SCRATCH0(r13);
479 + GET_PACA(r13);
480 + std r9,PACA_EXRFI+EX_R9(r13)
481 + std r10,PACA_EXRFI+EX_R10(r13)
482 + std r11,PACA_EXRFI+EX_R11(r13)
483 + std r12,PACA_EXRFI+EX_R12(r13)
484 + std r8,PACA_EXRFI+EX_R13(r13)
485 + mfctr r9
486 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
487 + ld r11,PACA_L1D_FLUSH_SETS(r13)
488 + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
489 + /*
490 + * The load adresses are at staggered offsets within cachelines,
491 + * which suits some pipelines better (on others it should not
492 + * hurt).
493 + */
494 + addi r12,r12,8
495 + mtctr r11
496 + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
497 +
498 + /* order ld/st prior to dcbt stop all streams with flushing */
499 + sync
500 +1: li r8,0
501 + .rept 8 /* 8-way set associative */
502 + ldx r11,r10,r8
503 + add r8,r8,r12
504 + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
505 + add r8,r8,r11 // Add 0, this creates a dependency on the ldx
506 + .endr
507 + addi r10,r10,128 /* 128 byte cache line */
508 + bdnz 1b
509 +
510 + mtctr r9
511 + ld r9,PACA_EXRFI+EX_R9(r13)
512 + ld r10,PACA_EXRFI+EX_R10(r13)
513 + ld r11,PACA_EXRFI+EX_R11(r13)
514 + ld r12,PACA_EXRFI+EX_R12(r13)
515 + ld r8,PACA_EXRFI+EX_R13(r13)
516 + GET_SCRATCH0(r13);
517 + rfid
518 +
519 + .globl hrfi_flush_fallback
520 +hrfi_flush_fallback:
521 + SET_SCRATCH0(r13);
522 + GET_PACA(r13);
523 + std r9,PACA_EXRFI+EX_R9(r13)
524 + std r10,PACA_EXRFI+EX_R10(r13)
525 + std r11,PACA_EXRFI+EX_R11(r13)
526 + std r12,PACA_EXRFI+EX_R12(r13)
527 + std r8,PACA_EXRFI+EX_R13(r13)
528 + mfctr r9
529 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
530 + ld r11,PACA_L1D_FLUSH_SETS(r13)
531 + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
532 + /*
533 + * The load adresses are at staggered offsets within cachelines,
534 + * which suits some pipelines better (on others it should not
535 + * hurt).
536 + */
537 + addi r12,r12,8
538 + mtctr r11
539 + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
540 +
541 + /* order ld/st prior to dcbt stop all streams with flushing */
542 + sync
543 +1: li r8,0
544 + .rept 8 /* 8-way set associative */
545 + ldx r11,r10,r8
546 + add r8,r8,r12
547 + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
548 + add r8,r8,r11 // Add 0, this creates a dependency on the ldx
549 + .endr
550 + addi r10,r10,128 /* 128 byte cache line */
551 + bdnz 1b
552 +
553 + mtctr r9
554 + ld r9,PACA_EXRFI+EX_R9(r13)
555 + ld r10,PACA_EXRFI+EX_R10(r13)
556 + ld r11,PACA_EXRFI+EX_R11(r13)
557 + ld r12,PACA_EXRFI+EX_R12(r13)
558 + ld r8,PACA_EXRFI+EX_R13(r13)
559 + GET_SCRATCH0(r13);
560 + hrfid
561 +
562 /*
563 * Called from arch_local_irq_enable when an interrupt needs
564 * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate
565 diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
566 index a12be60181bf..7c30a91c1f86 100644
567 --- a/arch/powerpc/kernel/setup_64.c
568 +++ b/arch/powerpc/kernel/setup_64.c
569 @@ -37,6 +37,7 @@
570 #include <linux/memblock.h>
571 #include <linux/memory.h>
572 #include <linux/nmi.h>
573 +#include <linux/debugfs.h>
574
575 #include <asm/io.h>
576 #include <asm/kdump.h>
577 @@ -678,4 +679,142 @@ static int __init disable_hardlockup_detector(void)
578 return 0;
579 }
580 early_initcall(disable_hardlockup_detector);
581 +
582 +#ifdef CONFIG_PPC_BOOK3S_64
583 +static enum l1d_flush_type enabled_flush_types;
584 +static void *l1d_flush_fallback_area;
585 +static bool no_rfi_flush;
586 +bool rfi_flush;
587 +
588 +static int __init handle_no_rfi_flush(char *p)
589 +{
590 + pr_info("rfi-flush: disabled on command line.");
591 + no_rfi_flush = true;
592 + return 0;
593 +}
594 +early_param("no_rfi_flush", handle_no_rfi_flush);
595 +
596 +/*
597 + * The RFI flush is not KPTI, but because users will see doco that says to use
598 + * nopti we hijack that option here to also disable the RFI flush.
599 + */
600 +static int __init handle_no_pti(char *p)
601 +{
602 + pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
603 + handle_no_rfi_flush(NULL);
604 + return 0;
605 +}
606 +early_param("nopti", handle_no_pti);
607 +
608 +static void do_nothing(void *unused)
609 +{
610 + /*
611 + * We don't need to do the flush explicitly, just enter+exit kernel is
612 + * sufficient, the RFI exit handlers will do the right thing.
613 + */
614 +}
615 +
616 +void rfi_flush_enable(bool enable)
617 +{
618 + if (rfi_flush == enable)
619 + return;
620 +
621 + if (enable) {
622 + do_rfi_flush_fixups(enabled_flush_types);
623 + on_each_cpu(do_nothing, NULL, 1);
624 + } else
625 + do_rfi_flush_fixups(L1D_FLUSH_NONE);
626 +
627 + rfi_flush = enable;
628 +}
629 +
630 +static void init_fallback_flush(void)
631 +{
632 + u64 l1d_size, limit;
633 + int cpu;
634 +
635 + l1d_size = ppc64_caches.dsize;
636 + limit = min(safe_stack_limit(), ppc64_rma_size);
637 +
638 + /*
639 + * Align to L1d size, and size it at 2x L1d size, to catch possible
640 + * hardware prefetch runoff. We don't have a recipe for load patterns to
641 + * reliably avoid the prefetcher.
642 + */
643 + l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
644 + memset(l1d_flush_fallback_area, 0, l1d_size * 2);
645 +
646 + for_each_possible_cpu(cpu) {
647 + /*
648 + * The fallback flush is currently coded for 8-way
649 + * associativity. Different associativity is possible, but it
650 + * will be treated as 8-way and may not evict the lines as
651 + * effectively.
652 + *
653 + * 128 byte lines are mandatory.
654 + */
655 + u64 c = l1d_size / 8;
656 +
657 + paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
658 + paca[cpu].l1d_flush_congruence = c;
659 + paca[cpu].l1d_flush_sets = c / 128;
660 + }
661 +}
662 +
663 +void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
664 +{
665 + if (types & L1D_FLUSH_FALLBACK) {
666 + pr_info("rfi-flush: Using fallback displacement flush\n");
667 + init_fallback_flush();
668 + }
669 +
670 + if (types & L1D_FLUSH_ORI)
671 + pr_info("rfi-flush: Using ori type flush\n");
672 +
673 + if (types & L1D_FLUSH_MTTRIG)
674 + pr_info("rfi-flush: Using mttrig type flush\n");
675 +
676 + enabled_flush_types = types;
677 +
678 + if (!no_rfi_flush)
679 + rfi_flush_enable(enable);
680 +}
681 +
682 +#ifdef CONFIG_DEBUG_FS
683 +static int rfi_flush_set(void *data, u64 val)
684 +{
685 + if (val == 1)
686 + rfi_flush_enable(true);
687 + else if (val == 0)
688 + rfi_flush_enable(false);
689 + else
690 + return -EINVAL;
691 +
692 + return 0;
693 +}
694 +
695 +static int rfi_flush_get(void *data, u64 *val)
696 +{
697 + *val = rfi_flush ? 1 : 0;
698 + return 0;
699 +}
700 +
701 +DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
702 +
703 +static __init int rfi_flush_debugfs_init(void)
704 +{
705 + debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
706 + return 0;
707 +}
708 +device_initcall(rfi_flush_debugfs_init);
709 +#endif
710 +
711 +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
712 +{
713 + if (rfi_flush)
714 + return sprintf(buf, "Mitigation: RFI Flush\n");
715 +
716 + return sprintf(buf, "Vulnerable\n");
717 +}
718 +#endif /* CONFIG_PPC_BOOK3S_64 */
719 #endif
720 diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
721 index 7394b770ae1f..b61fb7902018 100644
722 --- a/arch/powerpc/kernel/vmlinux.lds.S
723 +++ b/arch/powerpc/kernel/vmlinux.lds.S
724 @@ -132,6 +132,15 @@ SECTIONS
725 /* Read-only data */
726 RODATA
727
728 +#ifdef CONFIG_PPC64
729 + . = ALIGN(8);
730 + __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
731 + __start___rfi_flush_fixup = .;
732 + *(__rfi_flush_fixup)
733 + __stop___rfi_flush_fixup = .;
734 + }
735 +#endif
736 +
737 EXCEPTION_TABLE(0)
738
739 NOTES :kernel :notes
740 diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
741 index 043415f0bdb1..e86bfa111f3c 100644
742 --- a/arch/powerpc/lib/feature-fixups.c
743 +++ b/arch/powerpc/lib/feature-fixups.c
744 @@ -23,6 +23,7 @@
745 #include <asm/sections.h>
746 #include <asm/setup.h>
747 #include <asm/firmware.h>
748 +#include <asm/setup.h>
749
750 struct fixup_entry {
751 unsigned long mask;
752 @@ -115,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
753 }
754 }
755
756 +#ifdef CONFIG_PPC_BOOK3S_64
757 +void do_rfi_flush_fixups(enum l1d_flush_type types)
758 +{
759 + unsigned int instrs[3], *dest;
760 + long *start, *end;
761 + int i;
762 +
763 + start = PTRRELOC(&__start___rfi_flush_fixup),
764 + end = PTRRELOC(&__stop___rfi_flush_fixup);
765 +
766 + instrs[0] = 0x60000000; /* nop */
767 + instrs[1] = 0x60000000; /* nop */
768 + instrs[2] = 0x60000000; /* nop */
769 +
770 + if (types & L1D_FLUSH_FALLBACK)
771 + /* b .+16 to fallback flush */
772 + instrs[0] = 0x48000010;
773 +
774 + i = 0;
775 + if (types & L1D_FLUSH_ORI) {
776 + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
777 + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
778 + }
779 +
780 + if (types & L1D_FLUSH_MTTRIG)
781 + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
782 +
783 + for (i = 0; start < end; start++, i++) {
784 + dest = (void *)start + *start;
785 +
786 + pr_devel("patching dest %lx\n", (unsigned long)dest);
787 +
788 + patch_instruction(dest, instrs[0]);
789 + patch_instruction(dest + 1, instrs[1]);
790 + patch_instruction(dest + 2, instrs[2]);
791 + }
792 +
793 + printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
794 +}
795 +#endif /* CONFIG_PPC_BOOK3S_64 */
796 +
797 void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
798 {
799 long *start, *end;
800 diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
801 index b33faa0015cc..6f8b4c19373a 100644
802 --- a/arch/powerpc/platforms/powernv/setup.c
803 +++ b/arch/powerpc/platforms/powernv/setup.c
804 @@ -35,13 +35,63 @@
805 #include <asm/opal.h>
806 #include <asm/kexec.h>
807 #include <asm/smp.h>
808 +#include <asm/tm.h>
809 +#include <asm/setup.h>
810
811 #include "powernv.h"
812
813 +static void pnv_setup_rfi_flush(void)
814 +{
815 + struct device_node *np, *fw_features;
816 + enum l1d_flush_type type;
817 + int enable;
818 +
819 + /* Default to fallback in case fw-features are not available */
820 + type = L1D_FLUSH_FALLBACK;
821 + enable = 1;
822 +
823 + np = of_find_node_by_name(NULL, "ibm,opal");
824 + fw_features = of_get_child_by_name(np, "fw-features");
825 + of_node_put(np);
826 +
827 + if (fw_features) {
828 + np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
829 + if (np && of_property_read_bool(np, "enabled"))
830 + type = L1D_FLUSH_MTTRIG;
831 +
832 + of_node_put(np);
833 +
834 + np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
835 + if (np && of_property_read_bool(np, "enabled"))
836 + type = L1D_FLUSH_ORI;
837 +
838 + of_node_put(np);
839 +
840 + /* Enable unless firmware says NOT to */
841 + enable = 2;
842 + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
843 + if (np && of_property_read_bool(np, "disabled"))
844 + enable--;
845 +
846 + of_node_put(np);
847 +
848 + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
849 + if (np && of_property_read_bool(np, "disabled"))
850 + enable--;
851 +
852 + of_node_put(np);
853 + of_node_put(fw_features);
854 + }
855 +
856 + setup_rfi_flush(type, enable > 0);
857 +}
858 +
859 static void __init pnv_setup_arch(void)
860 {
861 set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
862
863 + pnv_setup_rfi_flush();
864 +
865 /* Initialize SMP */
866 pnv_smp_init();
867
868 diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
869 index 97aa3f332f24..1845fc611912 100644
870 --- a/arch/powerpc/platforms/pseries/setup.c
871 +++ b/arch/powerpc/platforms/pseries/setup.c
872 @@ -450,6 +450,39 @@ static void __init find_and_init_phbs(void)
873 of_pci_check_probe_only();
874 }
875
876 +static void pseries_setup_rfi_flush(void)
877 +{
878 + struct h_cpu_char_result result;
879 + enum l1d_flush_type types;
880 + bool enable;
881 + long rc;
882 +
883 + /* Enable by default */
884 + enable = true;
885 +
886 + rc = plpar_get_cpu_characteristics(&result);
887 + if (rc == H_SUCCESS) {
888 + types = L1D_FLUSH_NONE;
889 +
890 + if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
891 + types |= L1D_FLUSH_MTTRIG;
892 + if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
893 + types |= L1D_FLUSH_ORI;
894 +
895 + /* Use fallback if nothing set in hcall */
896 + if (types == L1D_FLUSH_NONE)
897 + types = L1D_FLUSH_FALLBACK;
898 +
899 + if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
900 + enable = false;
901 + } else {
902 + /* Default to fallback if case hcall is not available */
903 + types = L1D_FLUSH_FALLBACK;
904 + }
905 +
906 + setup_rfi_flush(types, enable);
907 +}
908 +
909 static void __init pSeries_setup_arch(void)
910 {
911 set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
912 @@ -467,6 +500,8 @@ static void __init pSeries_setup_arch(void)
913
914 fwnmi_init();
915
916 + pseries_setup_rfi_flush();
917 +
918 /* By default, only probe PCI (can be overridden by rtas_pci) */
919 pci_add_flags(PCI_PROBE_ONLY);
920
921 diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
922 index bdd9cc59d20f..b0cd306dc527 100644
923 --- a/arch/x86/entry/common.c
924 +++ b/arch/x86/entry/common.c
925 @@ -20,6 +20,7 @@
926 #include <linux/export.h>
927 #include <linux/context_tracking.h>
928 #include <linux/user-return-notifier.h>
929 +#include <linux/nospec.h>
930 #include <linux/uprobes.h>
931
932 #include <asm/desc.h>
933 @@ -201,7 +202,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
934 * special case only applies after poking regs and before the
935 * very next return to user mode.
936 */
937 - current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
938 + ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
939 #endif
940
941 user_enter_irqoff();
942 @@ -277,7 +278,8 @@ __visible void do_syscall_64(struct pt_regs *regs)
943 * regs->orig_ax, which changes the behavior of some syscalls.
944 */
945 if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
946 - regs->ax = sys_call_table[nr & __SYSCALL_MASK](
947 + nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls);
948 + regs->ax = sys_call_table[nr](
949 regs->di, regs->si, regs->dx,
950 regs->r10, regs->r8, regs->r9);
951 }
952 @@ -299,7 +301,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
953 unsigned int nr = (unsigned int)regs->orig_ax;
954
955 #ifdef CONFIG_IA32_EMULATION
956 - current->thread.status |= TS_COMPAT;
957 + ti->status |= TS_COMPAT;
958 #endif
959
960 if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
961 @@ -313,6 +315,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
962 }
963
964 if (likely(nr < IA32_NR_syscalls)) {
965 + nr = array_index_nospec(nr, IA32_NR_syscalls);
966 /*
967 * It's possible that a 32-bit syscall implementation
968 * takes a 64-bit parameter but nonetheless assumes that
969 diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
970 index a76dc738ec61..f5434b4670c1 100644
971 --- a/arch/x86/entry/entry_32.S
972 +++ b/arch/x86/entry/entry_32.S
973 @@ -237,7 +237,8 @@ ENTRY(__switch_to_asm)
974 * exist, overwrite the RSB with entries which capture
975 * speculative execution to prevent attack.
976 */
977 - FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
978 + /* Clobbers %ebx */
979 + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
980 #endif
981
982 /* restore callee-saved registers */
983 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
984 index e729e1528584..db5009ce065a 100644
985 --- a/arch/x86/entry/entry_64.S
986 +++ b/arch/x86/entry/entry_64.S
987 @@ -177,96 +177,17 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
988 pushq %r9 /* pt_regs->r9 */
989 pushq %r10 /* pt_regs->r10 */
990 pushq %r11 /* pt_regs->r11 */
991 - sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
992 + pushq %rbx /* pt_regs->rbx */
993 + pushq %rbp /* pt_regs->rbp */
994 + pushq %r12 /* pt_regs->r12 */
995 + pushq %r13 /* pt_regs->r13 */
996 + pushq %r14 /* pt_regs->r14 */
997 + pushq %r15 /* pt_regs->r15 */
998
999 - /*
1000 - * If we need to do entry work or if we guess we'll need to do
1001 - * exit work, go straight to the slow path.
1002 - */
1003 - movq PER_CPU_VAR(current_task), %r11
1004 - testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
1005 - jnz entry_SYSCALL64_slow_path
1006 -
1007 -entry_SYSCALL_64_fastpath:
1008 - /*
1009 - * Easy case: enable interrupts and issue the syscall. If the syscall
1010 - * needs pt_regs, we'll call a stub that disables interrupts again
1011 - * and jumps to the slow path.
1012 - */
1013 - TRACE_IRQS_ON
1014 - ENABLE_INTERRUPTS(CLBR_NONE)
1015 -#if __SYSCALL_MASK == ~0
1016 - cmpq $__NR_syscall_max, %rax
1017 -#else
1018 - andl $__SYSCALL_MASK, %eax
1019 - cmpl $__NR_syscall_max, %eax
1020 -#endif
1021 - ja 1f /* return -ENOSYS (already in pt_regs->ax) */
1022 - movq %r10, %rcx
1023 -
1024 - /*
1025 - * This call instruction is handled specially in stub_ptregs_64.
1026 - * It might end up jumping to the slow path. If it jumps, RAX
1027 - * and all argument registers are clobbered.
1028 - */
1029 -#ifdef CONFIG_RETPOLINE
1030 - movq sys_call_table(, %rax, 8), %rax
1031 - call __x86_indirect_thunk_rax
1032 -#else
1033 - call *sys_call_table(, %rax, 8)
1034 -#endif
1035 -.Lentry_SYSCALL_64_after_fastpath_call:
1036 -
1037 - movq %rax, RAX(%rsp)
1038 -1:
1039 -
1040 - /*
1041 - * If we get here, then we know that pt_regs is clean for SYSRET64.
1042 - * If we see that no exit work is required (which we are required
1043 - * to check with IRQs off), then we can go straight to SYSRET64.
1044 - */
1045 - DISABLE_INTERRUPTS(CLBR_NONE)
1046 - TRACE_IRQS_OFF
1047 - movq PER_CPU_VAR(current_task), %r11
1048 - testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
1049 - jnz 1f
1050 -
1051 - LOCKDEP_SYS_EXIT
1052 - TRACE_IRQS_ON /* user mode is traced as IRQs on */
1053 - movq RIP(%rsp), %rcx
1054 - movq EFLAGS(%rsp), %r11
1055 - RESTORE_C_REGS_EXCEPT_RCX_R11
1056 - /*
1057 - * This opens a window where we have a user CR3, but are
1058 - * running in the kernel. This makes using the CS
1059 - * register useless for telling whether or not we need to
1060 - * switch CR3 in NMIs. Normal interrupts are OK because
1061 - * they are off here.
1062 - */
1063 - SWITCH_USER_CR3
1064 - movq RSP(%rsp), %rsp
1065 - USERGS_SYSRET64
1066 -
1067 -1:
1068 - /*
1069 - * The fast path looked good when we started, but something changed
1070 - * along the way and we need to switch to the slow path. Calling
1071 - * raise(3) will trigger this, for example. IRQs are off.
1072 - */
1073 - TRACE_IRQS_ON
1074 - ENABLE_INTERRUPTS(CLBR_NONE)
1075 - SAVE_EXTRA_REGS
1076 - movq %rsp, %rdi
1077 - call syscall_return_slowpath /* returns with IRQs disabled */
1078 - jmp return_from_SYSCALL_64
1079 -
1080 -entry_SYSCALL64_slow_path:
1081 /* IRQs are off. */
1082 - SAVE_EXTRA_REGS
1083 movq %rsp, %rdi
1084 call do_syscall_64 /* returns with IRQs disabled */
1085
1086 -return_from_SYSCALL_64:
1087 RESTORE_EXTRA_REGS
1088 TRACE_IRQS_IRETQ /* we're about to change IF */
1089
1090 @@ -339,6 +260,7 @@ return_from_SYSCALL_64:
1091 syscall_return_via_sysret:
1092 /* rcx and r11 are already restored (see code above) */
1093 RESTORE_C_REGS_EXCEPT_RCX_R11
1094 +
1095 /*
1096 * This opens a window where we have a user CR3, but are
1097 * running in the kernel. This makes using the CS
1098 @@ -363,45 +285,6 @@ opportunistic_sysret_failed:
1099 jmp restore_c_regs_and_iret
1100 END(entry_SYSCALL_64)
1101
1102 -ENTRY(stub_ptregs_64)
1103 - /*
1104 - * Syscalls marked as needing ptregs land here.
1105 - * If we are on the fast path, we need to save the extra regs,
1106 - * which we achieve by trying again on the slow path. If we are on
1107 - * the slow path, the extra regs are already saved.
1108 - *
1109 - * RAX stores a pointer to the C function implementing the syscall.
1110 - * IRQs are on.
1111 - */
1112 - cmpq $.Lentry_SYSCALL_64_after_fastpath_call, (%rsp)
1113 - jne 1f
1114 -
1115 - /*
1116 - * Called from fast path -- disable IRQs again, pop return address
1117 - * and jump to slow path
1118 - */
1119 - DISABLE_INTERRUPTS(CLBR_NONE)
1120 - TRACE_IRQS_OFF
1121 - popq %rax
1122 - jmp entry_SYSCALL64_slow_path
1123 -
1124 -1:
1125 - JMP_NOSPEC %rax /* Called from C */
1126 -END(stub_ptregs_64)
1127 -
1128 -.macro ptregs_stub func
1129 -ENTRY(ptregs_\func)
1130 - leaq \func(%rip), %rax
1131 - jmp stub_ptregs_64
1132 -END(ptregs_\func)
1133 -.endm
1134 -
1135 -/* Instantiate ptregs_stub for each ptregs-using syscall */
1136 -#define __SYSCALL_64_QUAL_(sym)
1137 -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_stub sym
1138 -#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
1139 -#include <asm/syscalls_64.h>
1140 -
1141 /*
1142 * %rdi: prev task
1143 * %rsi: next task
1144 @@ -435,7 +318,8 @@ ENTRY(__switch_to_asm)
1145 * exist, overwrite the RSB with entries which capture
1146 * speculative execution to prevent attack.
1147 */
1148 - FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
1149 + /* Clobbers %rbx */
1150 + FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
1151 #endif
1152
1153 /* restore callee-saved registers */
1154 diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
1155 index 9dbc5abb6162..6705edda4ac3 100644
1156 --- a/arch/x86/entry/syscall_64.c
1157 +++ b/arch/x86/entry/syscall_64.c
1158 @@ -6,14 +6,11 @@
1159 #include <asm/asm-offsets.h>
1160 #include <asm/syscall.h>
1161
1162 -#define __SYSCALL_64_QUAL_(sym) sym
1163 -#define __SYSCALL_64_QUAL_ptregs(sym) ptregs_##sym
1164 -
1165 -#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long __SYSCALL_64_QUAL_##qual(sym)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
1166 +#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
1167 #include <asm/syscalls_64.h>
1168 #undef __SYSCALL_64
1169
1170 -#define __SYSCALL_64(nr, sym, qual) [nr] = __SYSCALL_64_QUAL_##qual(sym),
1171 +#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
1172
1173 extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
1174
1175 diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
1176 index 982c9e31daca..21298c173b0e 100644
1177 --- a/arch/x86/events/intel/bts.c
1178 +++ b/arch/x86/events/intel/bts.c
1179 @@ -22,6 +22,7 @@
1180 #include <linux/debugfs.h>
1181 #include <linux/device.h>
1182 #include <linux/coredump.h>
1183 +#include <linux/kaiser.h>
1184
1185 #include <asm-generic/sizes.h>
1186 #include <asm/perf_event.h>
1187 @@ -77,6 +78,23 @@ static size_t buf_size(struct page *page)
1188 return 1 << (PAGE_SHIFT + page_private(page));
1189 }
1190
1191 +static void bts_buffer_free_aux(void *data)
1192 +{
1193 +#ifdef CONFIG_PAGE_TABLE_ISOLATION
1194 + struct bts_buffer *buf = data;
1195 + int nbuf;
1196 +
1197 + for (nbuf = 0; nbuf < buf->nr_bufs; nbuf++) {
1198 + struct page *page = buf->buf[nbuf].page;
1199 + void *kaddr = page_address(page);
1200 + size_t page_size = buf_size(page);
1201 +
1202 + kaiser_remove_mapping((unsigned long)kaddr, page_size);
1203 + }
1204 +#endif
1205 + kfree(data);
1206 +}
1207 +
1208 static void *
1209 bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
1210 {
1211 @@ -113,29 +131,33 @@ bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
1212 buf->real_size = size - size % BTS_RECORD_SIZE;
1213
1214 for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
1215 - unsigned int __nr_pages;
1216 + void *kaddr = pages[pg];
1217 + size_t page_size;
1218 +
1219 + page = virt_to_page(kaddr);
1220 + page_size = buf_size(page);
1221 +
1222 + if (kaiser_add_mapping((unsigned long)kaddr,
1223 + page_size, __PAGE_KERNEL) < 0) {
1224 + buf->nr_bufs = nbuf;
1225 + bts_buffer_free_aux(buf);
1226 + return NULL;
1227 + }
1228
1229 - page = virt_to_page(pages[pg]);
1230 - __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
1231 buf->buf[nbuf].page = page;
1232 buf->buf[nbuf].offset = offset;
1233 buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
1234 - buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
1235 + buf->buf[nbuf].size = page_size - buf->buf[nbuf].displacement;
1236 pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
1237 buf->buf[nbuf].size -= pad;
1238
1239 - pg += __nr_pages;
1240 - offset += __nr_pages << PAGE_SHIFT;
1241 + pg += page_size >> PAGE_SHIFT;
1242 + offset += page_size;
1243 }
1244
1245 return buf;
1246 }
1247
1248 -static void bts_buffer_free_aux(void *data)
1249 -{
1250 - kfree(data);
1251 -}
1252 -
1253 static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
1254 {
1255 return buf->buf[idx].offset + buf->buf[idx].displacement;
1256 diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
1257 index b15aa4083dfd..166654218329 100644
1258 --- a/arch/x86/include/asm/asm-prototypes.h
1259 +++ b/arch/x86/include/asm/asm-prototypes.h
1260 @@ -37,5 +37,7 @@ INDIRECT_THUNK(dx)
1261 INDIRECT_THUNK(si)
1262 INDIRECT_THUNK(di)
1263 INDIRECT_THUNK(bp)
1264 -INDIRECT_THUNK(sp)
1265 +asmlinkage void __fill_rsb(void);
1266 +asmlinkage void __clear_rsb(void);
1267 +
1268 #endif /* CONFIG_RETPOLINE */
1269 diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
1270 index 00523524edbf..7bb29a416b77 100644
1271 --- a/arch/x86/include/asm/asm.h
1272 +++ b/arch/x86/include/asm/asm.h
1273 @@ -11,10 +11,12 @@
1274 # define __ASM_FORM_COMMA(x) " " #x ","
1275 #endif
1276
1277 -#ifdef CONFIG_X86_32
1278 +#ifndef __x86_64__
1279 +/* 32 bit */
1280 # define __ASM_SEL(a,b) __ASM_FORM(a)
1281 # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
1282 #else
1283 +/* 64 bit */
1284 # define __ASM_SEL(a,b) __ASM_FORM(b)
1285 # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
1286 #endif
1287 diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
1288 index bfb28caf97b1..857590390397 100644
1289 --- a/arch/x86/include/asm/barrier.h
1290 +++ b/arch/x86/include/asm/barrier.h
1291 @@ -23,6 +23,34 @@
1292 #define wmb() asm volatile("sfence" ::: "memory")
1293 #endif
1294
1295 +/**
1296 + * array_index_mask_nospec() - generate a mask that is ~0UL when the
1297 + * bounds check succeeds and 0 otherwise
1298 + * @index: array element index
1299 + * @size: number of elements in array
1300 + *
1301 + * Returns:
1302 + * 0 - (index < size)
1303 + */
1304 +static inline unsigned long array_index_mask_nospec(unsigned long index,
1305 + unsigned long size)
1306 +{
1307 + unsigned long mask;
1308 +
1309 + asm ("cmp %1,%2; sbb %0,%0;"
1310 + :"=r" (mask)
1311 + :"r"(size),"r" (index)
1312 + :"cc");
1313 + return mask;
1314 +}
1315 +
1316 +/* Override the default implementation from linux/nospec.h. */
1317 +#define array_index_mask_nospec array_index_mask_nospec
1318 +
1319 +/* Prevent speculative execution past this barrier. */
1320 +#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
1321 + "lfence", X86_FEATURE_LFENCE_RDTSC)
1322 +
1323 #ifdef CONFIG_X86_PPRO_FENCE
1324 #define dma_rmb() rmb()
1325 #else
1326 diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
1327 index 9ea67a04ff4f..8c101579f535 100644
1328 --- a/arch/x86/include/asm/cpufeature.h
1329 +++ b/arch/x86/include/asm/cpufeature.h
1330 @@ -28,6 +28,7 @@ enum cpuid_leafs
1331 CPUID_8000_000A_EDX,
1332 CPUID_7_ECX,
1333 CPUID_8000_0007_EBX,
1334 + CPUID_7_EDX,
1335 };
1336
1337 #ifdef CONFIG_X86_FEATURE_NAMES
1338 @@ -78,8 +79,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
1339 CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \
1340 CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \
1341 CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \
1342 + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
1343 REQUIRED_MASK_CHECK || \
1344 - BUILD_BUG_ON_ZERO(NCAPINTS != 18))
1345 + BUILD_BUG_ON_ZERO(NCAPINTS != 19))
1346
1347 #define DISABLED_MASK_BIT_SET(feature_bit) \
1348 ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
1349 @@ -100,8 +102,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
1350 CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \
1351 CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \
1352 CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \
1353 + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
1354 DISABLED_MASK_CHECK || \
1355 - BUILD_BUG_ON_ZERO(NCAPINTS != 18))
1356 + BUILD_BUG_ON_ZERO(NCAPINTS != 19))
1357
1358 #define cpu_has(c, bit) \
1359 (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
1360 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
1361 index 8537a21acd8b..8eb23f5cf7f4 100644
1362 --- a/arch/x86/include/asm/cpufeatures.h
1363 +++ b/arch/x86/include/asm/cpufeatures.h
1364 @@ -12,7 +12,7 @@
1365 /*
1366 * Defines x86 CPU feature bits
1367 */
1368 -#define NCAPINTS 18 /* N 32-bit words worth of info */
1369 +#define NCAPINTS 19 /* N 32-bit words worth of info */
1370 #define NBUGINTS 1 /* N 32-bit bug flags */
1371
1372 /*
1373 @@ -194,16 +194,16 @@
1374 #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
1375 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
1376
1377 -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
1378 -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
1379 +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
1380 +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
1381
1382 -#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
1383 -#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
1384 -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
1385 +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
1386
1387 /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
1388 #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
1389
1390 +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
1391 +
1392 /* Virtualization flags: Linux defined, word 8 */
1393 #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
1394 #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
1395 @@ -260,6 +260,9 @@
1396 /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
1397 #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
1398 #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */
1399 +#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
1400 +#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
1401 +#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
1402
1403 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
1404 #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
1405 @@ -295,6 +298,13 @@
1406 #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
1407 #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */
1408
1409 +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
1410 +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
1411 +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
1412 +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
1413 +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
1414 +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
1415 +
1416 /*
1417 * BUG word(s)
1418 */
1419 diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
1420 index 21c5ac15657b..1f8cca459c6c 100644
1421 --- a/arch/x86/include/asm/disabled-features.h
1422 +++ b/arch/x86/include/asm/disabled-features.h
1423 @@ -59,6 +59,7 @@
1424 #define DISABLED_MASK15 0
1425 #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE)
1426 #define DISABLED_MASK17 0
1427 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
1428 +#define DISABLED_MASK18 0
1429 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
1430
1431 #endif /* _ASM_X86_DISABLED_FEATURES_H */
1432 diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
1433 index 34a46dc076d3..75b748a1deb8 100644
1434 --- a/arch/x86/include/asm/intel-family.h
1435 +++ b/arch/x86/include/asm/intel-family.h
1436 @@ -12,6 +12,7 @@
1437 */
1438
1439 #define INTEL_FAM6_CORE_YONAH 0x0E
1440 +
1441 #define INTEL_FAM6_CORE2_MEROM 0x0F
1442 #define INTEL_FAM6_CORE2_MEROM_L 0x16
1443 #define INTEL_FAM6_CORE2_PENRYN 0x17
1444 @@ -21,6 +22,7 @@
1445 #define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */
1446 #define INTEL_FAM6_NEHALEM_EP 0x1A
1447 #define INTEL_FAM6_NEHALEM_EX 0x2E
1448 +
1449 #define INTEL_FAM6_WESTMERE 0x25
1450 #define INTEL_FAM6_WESTMERE_EP 0x2C
1451 #define INTEL_FAM6_WESTMERE_EX 0x2F
1452 @@ -36,9 +38,9 @@
1453 #define INTEL_FAM6_HASWELL_GT3E 0x46
1454
1455 #define INTEL_FAM6_BROADWELL_CORE 0x3D
1456 -#define INTEL_FAM6_BROADWELL_XEON_D 0x56
1457 #define INTEL_FAM6_BROADWELL_GT3E 0x47
1458 #define INTEL_FAM6_BROADWELL_X 0x4F
1459 +#define INTEL_FAM6_BROADWELL_XEON_D 0x56
1460
1461 #define INTEL_FAM6_SKYLAKE_MOBILE 0x4E
1462 #define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E
1463 @@ -57,9 +59,10 @@
1464 #define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */
1465 #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */
1466 #define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */
1467 -#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Annidale */
1468 +#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */
1469 #define INTEL_FAM6_ATOM_GOLDMONT 0x5C
1470 #define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */
1471 +#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A
1472
1473 /* Xeon Phi */
1474
1475 diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
1476 index b11c4c072df8..c768bc1550a1 100644
1477 --- a/arch/x86/include/asm/msr-index.h
1478 +++ b/arch/x86/include/asm/msr-index.h
1479 @@ -37,6 +37,13 @@
1480 #define EFER_FFXSR (1<<_EFER_FFXSR)
1481
1482 /* Intel MSRs. Some also available on other CPUs */
1483 +#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
1484 +#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
1485 +#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
1486 +
1487 +#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
1488 +#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
1489 +
1490 #define MSR_IA32_PERFCTR0 0x000000c1
1491 #define MSR_IA32_PERFCTR1 0x000000c2
1492 #define MSR_FSB_FREQ 0x000000cd
1493 @@ -50,6 +57,11 @@
1494 #define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
1495
1496 #define MSR_MTRRcap 0x000000fe
1497 +
1498 +#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
1499 +#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
1500 +#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
1501 +
1502 #define MSR_IA32_BBL_CR_CTL 0x00000119
1503 #define MSR_IA32_BBL_CR_CTL3 0x0000011e
1504
1505 diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
1506 index b5fee97813cd..ed35b915b5c9 100644
1507 --- a/arch/x86/include/asm/msr.h
1508 +++ b/arch/x86/include/asm/msr.h
1509 @@ -188,8 +188,7 @@ static __always_inline unsigned long long rdtsc_ordered(void)
1510 * that some other imaginary CPU is updating continuously with a
1511 * time stamp.
1512 */
1513 - alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
1514 - "lfence", X86_FEATURE_LFENCE_RDTSC);
1515 + barrier_nospec();
1516 return rdtsc();
1517 }
1518
1519 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
1520 index 4ad41087ce0e..300cc159b4a0 100644
1521 --- a/arch/x86/include/asm/nospec-branch.h
1522 +++ b/arch/x86/include/asm/nospec-branch.h
1523 @@ -1,56 +1,12 @@
1524 /* SPDX-License-Identifier: GPL-2.0 */
1525
1526 -#ifndef __NOSPEC_BRANCH_H__
1527 -#define __NOSPEC_BRANCH_H__
1528 +#ifndef _ASM_X86_NOSPEC_BRANCH_H_
1529 +#define _ASM_X86_NOSPEC_BRANCH_H_
1530
1531 #include <asm/alternative.h>
1532 #include <asm/alternative-asm.h>
1533 #include <asm/cpufeatures.h>
1534
1535 -/*
1536 - * Fill the CPU return stack buffer.
1537 - *
1538 - * Each entry in the RSB, if used for a speculative 'ret', contains an
1539 - * infinite 'pause; lfence; jmp' loop to capture speculative execution.
1540 - *
1541 - * This is required in various cases for retpoline and IBRS-based
1542 - * mitigations for the Spectre variant 2 vulnerability. Sometimes to
1543 - * eliminate potentially bogus entries from the RSB, and sometimes
1544 - * purely to ensure that it doesn't get empty, which on some CPUs would
1545 - * allow predictions from other (unwanted!) sources to be used.
1546 - *
1547 - * We define a CPP macro such that it can be used from both .S files and
1548 - * inline assembly. It's possible to do a .macro and then include that
1549 - * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
1550 - */
1551 -
1552 -#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
1553 -#define RSB_FILL_LOOPS 16 /* To avoid underflow */
1554 -
1555 -/*
1556 - * Google experimented with loop-unrolling and this turned out to be
1557 - * the optimal version — two calls, each with their own speculation
1558 - * trap should their return address end up getting used, in a loop.
1559 - */
1560 -#define __FILL_RETURN_BUFFER(reg, nr, sp) \
1561 - mov $(nr/2), reg; \
1562 -771: \
1563 - call 772f; \
1564 -773: /* speculation trap */ \
1565 - pause; \
1566 - lfence; \
1567 - jmp 773b; \
1568 -772: \
1569 - call 774f; \
1570 -775: /* speculation trap */ \
1571 - pause; \
1572 - lfence; \
1573 - jmp 775b; \
1574 -774: \
1575 - dec reg; \
1576 - jnz 771b; \
1577 - add $(BITS_PER_LONG/8) * nr, sp;
1578 -
1579 #ifdef __ASSEMBLY__
1580
1581 /*
1582 @@ -121,17 +77,10 @@
1583 #endif
1584 .endm
1585
1586 - /*
1587 - * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
1588 - * monstrosity above, manually.
1589 - */
1590 -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
1591 +/* This clobbers the BX register */
1592 +.macro FILL_RETURN_BUFFER nr:req ftr:req
1593 #ifdef CONFIG_RETPOLINE
1594 - ANNOTATE_NOSPEC_ALTERNATIVE
1595 - ALTERNATIVE "jmp .Lskip_rsb_\@", \
1596 - __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
1597 - \ftr
1598 -.Lskip_rsb_\@:
1599 + ALTERNATIVE "", "call __clear_rsb", \ftr
1600 #endif
1601 .endm
1602
1603 @@ -201,22 +150,30 @@ extern char __indirect_thunk_end[];
1604 * On VMEXIT we must ensure that no RSB predictions learned in the guest
1605 * can be followed in the host, by overwriting the RSB completely. Both
1606 * retpoline and IBRS mitigations for Spectre v2 need this; only on future
1607 - * CPUs with IBRS_ATT *might* it be avoided.
1608 + * CPUs with IBRS_ALL *might* it be avoided.
1609 */
1610 static inline void vmexit_fill_RSB(void)
1611 {
1612 #ifdef CONFIG_RETPOLINE
1613 - unsigned long loops;
1614 -
1615 - asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
1616 - ALTERNATIVE("jmp 910f",
1617 - __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
1618 - X86_FEATURE_RETPOLINE)
1619 - "910:"
1620 - : "=r" (loops), ASM_CALL_CONSTRAINT
1621 - : : "memory" );
1622 + alternative_input("",
1623 + "call __fill_rsb",
1624 + X86_FEATURE_RETPOLINE,
1625 + ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
1626 #endif
1627 }
1628
1629 +static inline void indirect_branch_prediction_barrier(void)
1630 +{
1631 + asm volatile(ALTERNATIVE("",
1632 + "movl %[msr], %%ecx\n\t"
1633 + "movl %[val], %%eax\n\t"
1634 + "movl $0, %%edx\n\t"
1635 + "wrmsr",
1636 + X86_FEATURE_USE_IBPB)
1637 + : : [msr] "i" (MSR_IA32_PRED_CMD),
1638 + [val] "i" (PRED_CMD_IBPB)
1639 + : "eax", "ecx", "edx", "memory");
1640 +}
1641 +
1642 #endif /* __ASSEMBLY__ */
1643 -#endif /* __NOSPEC_BRANCH_H__ */
1644 +#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
1645 diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
1646 index 1178a51b77f3..b6d425999f99 100644
1647 --- a/arch/x86/include/asm/pgalloc.h
1648 +++ b/arch/x86/include/asm/pgalloc.h
1649 @@ -27,17 +27,6 @@ static inline void paravirt_release_pud(unsigned long pfn) {}
1650 */
1651 extern gfp_t __userpte_alloc_gfp;
1652
1653 -#ifdef CONFIG_PAGE_TABLE_ISOLATION
1654 -/*
1655 - * Instead of one PGD, we acquire two PGDs. Being order-1, it is
1656 - * both 8k in size and 8k-aligned. That lets us just flip bit 12
1657 - * in a pointer to swap between the two 4k halves.
1658 - */
1659 -#define PGD_ALLOCATION_ORDER 1
1660 -#else
1661 -#define PGD_ALLOCATION_ORDER 0
1662 -#endif
1663 -
1664 /*
1665 * Allocate and free page tables.
1666 */
1667 diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
1668 index 2536f90cd30c..5af0401ccff2 100644
1669 --- a/arch/x86/include/asm/pgtable.h
1670 +++ b/arch/x86/include/asm/pgtable.h
1671 @@ -20,9 +20,15 @@
1672
1673 #ifdef CONFIG_PAGE_TABLE_ISOLATION
1674 extern int kaiser_enabled;
1675 +/*
1676 + * Instead of one PGD, we acquire two PGDs. Being order-1, it is
1677 + * both 8k in size and 8k-aligned. That lets us just flip bit 12
1678 + * in a pointer to swap between the two 4k halves.
1679 + */
1680 #else
1681 #define kaiser_enabled 0
1682 #endif
1683 +#define PGD_ALLOCATION_ORDER kaiser_enabled
1684
1685 void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
1686 void ptdump_walk_pgd_level_checkwx(void);
1687 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
1688 index 353f038ec645..cb866ae1bc5d 100644
1689 --- a/arch/x86/include/asm/processor.h
1690 +++ b/arch/x86/include/asm/processor.h
1691 @@ -391,8 +391,6 @@ struct thread_struct {
1692 unsigned short gsindex;
1693 #endif
1694
1695 - u32 status; /* thread synchronous flags */
1696 -
1697 #ifdef CONFIG_X86_64
1698 unsigned long fsbase;
1699 unsigned long gsbase;
1700 diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
1701 index fac9a5c0abe9..6847d85400a8 100644
1702 --- a/arch/x86/include/asm/required-features.h
1703 +++ b/arch/x86/include/asm/required-features.h
1704 @@ -100,6 +100,7 @@
1705 #define REQUIRED_MASK15 0
1706 #define REQUIRED_MASK16 0
1707 #define REQUIRED_MASK17 0
1708 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
1709 +#define REQUIRED_MASK18 0
1710 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
1711
1712 #endif /* _ASM_X86_REQUIRED_FEATURES_H */
1713 diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
1714 index e3c95e8e61c5..03eedc21246d 100644
1715 --- a/arch/x86/include/asm/syscall.h
1716 +++ b/arch/x86/include/asm/syscall.h
1717 @@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task,
1718 * TS_COMPAT is set for 32-bit syscall entries and then
1719 * remains set until we return to user mode.
1720 */
1721 - if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
1722 + if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED))
1723 /*
1724 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
1725 * and will match correctly in comparisons.
1726 @@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
1727 unsigned long *args)
1728 {
1729 # ifdef CONFIG_IA32_EMULATION
1730 - if (task->thread.status & TS_COMPAT)
1731 + if (task->thread_info.status & TS_COMPAT)
1732 switch (i) {
1733 case 0:
1734 if (!n--) break;
1735 @@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
1736 const unsigned long *args)
1737 {
1738 # ifdef CONFIG_IA32_EMULATION
1739 - if (task->thread.status & TS_COMPAT)
1740 + if (task->thread_info.status & TS_COMPAT)
1741 switch (i) {
1742 case 0:
1743 if (!n--) break;
1744 diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
1745 index bdf9c4c91572..89978b9c667a 100644
1746 --- a/arch/x86/include/asm/thread_info.h
1747 +++ b/arch/x86/include/asm/thread_info.h
1748 @@ -54,6 +54,7 @@ struct task_struct;
1749
1750 struct thread_info {
1751 unsigned long flags; /* low level flags */
1752 + u32 status; /* thread synchronous flags */
1753 };
1754
1755 #define INIT_THREAD_INFO(tsk) \
1756 @@ -213,7 +214,7 @@ static inline int arch_within_stack_frames(const void * const stack,
1757 #define in_ia32_syscall() true
1758 #else
1759 #define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
1760 - current->thread.status & TS_COMPAT)
1761 + current_thread_info()->status & TS_COMPAT)
1762 #endif
1763
1764 /*
1765 diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
1766 index dead0f3921f3..a8d85a687cf4 100644
1767 --- a/arch/x86/include/asm/uaccess.h
1768 +++ b/arch/x86/include/asm/uaccess.h
1769 @@ -123,6 +123,11 @@ extern int __get_user_bad(void);
1770
1771 #define __uaccess_begin() stac()
1772 #define __uaccess_end() clac()
1773 +#define __uaccess_begin_nospec() \
1774 +({ \
1775 + stac(); \
1776 + barrier_nospec(); \
1777 +})
1778
1779 /*
1780 * This is a type: either unsigned long, if the argument fits into
1781 @@ -432,7 +437,7 @@ do { \
1782 ({ \
1783 int __gu_err; \
1784 __inttype(*(ptr)) __gu_val; \
1785 - __uaccess_begin(); \
1786 + __uaccess_begin_nospec(); \
1787 __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
1788 __uaccess_end(); \
1789 (x) = (__force __typeof__(*(ptr)))__gu_val; \
1790 @@ -474,6 +479,10 @@ struct __large_struct { unsigned long buf[100]; };
1791 __uaccess_begin(); \
1792 barrier();
1793
1794 +#define uaccess_try_nospec do { \
1795 + current->thread.uaccess_err = 0; \
1796 + __uaccess_begin_nospec(); \
1797 +
1798 #define uaccess_catch(err) \
1799 __uaccess_end(); \
1800 (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \
1801 @@ -538,7 +547,7 @@ struct __large_struct { unsigned long buf[100]; };
1802 * get_user_ex(...);
1803 * } get_user_catch(err)
1804 */
1805 -#define get_user_try uaccess_try
1806 +#define get_user_try uaccess_try_nospec
1807 #define get_user_catch(err) uaccess_catch(err)
1808
1809 #define get_user_ex(x, ptr) do { \
1810 @@ -573,7 +582,7 @@ extern void __cmpxchg_wrong_size(void)
1811 __typeof__(ptr) __uval = (uval); \
1812 __typeof__(*(ptr)) __old = (old); \
1813 __typeof__(*(ptr)) __new = (new); \
1814 - __uaccess_begin(); \
1815 + __uaccess_begin_nospec(); \
1816 switch (size) { \
1817 case 1: \
1818 { \
1819 diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
1820 index 7d3bdd1ed697..d6d245088dd5 100644
1821 --- a/arch/x86/include/asm/uaccess_32.h
1822 +++ b/arch/x86/include/asm/uaccess_32.h
1823 @@ -102,17 +102,17 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
1824
1825 switch (n) {
1826 case 1:
1827 - __uaccess_begin();
1828 + __uaccess_begin_nospec();
1829 __get_user_size(*(u8 *)to, from, 1, ret, 1);
1830 __uaccess_end();
1831 return ret;
1832 case 2:
1833 - __uaccess_begin();
1834 + __uaccess_begin_nospec();
1835 __get_user_size(*(u16 *)to, from, 2, ret, 2);
1836 __uaccess_end();
1837 return ret;
1838 case 4:
1839 - __uaccess_begin();
1840 + __uaccess_begin_nospec();
1841 __get_user_size(*(u32 *)to, from, 4, ret, 4);
1842 __uaccess_end();
1843 return ret;
1844 @@ -130,17 +130,17 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to,
1845
1846 switch (n) {
1847 case 1:
1848 - __uaccess_begin();
1849 + __uaccess_begin_nospec();
1850 __get_user_size(*(u8 *)to, from, 1, ret, 1);
1851 __uaccess_end();
1852 return ret;
1853 case 2:
1854 - __uaccess_begin();
1855 + __uaccess_begin_nospec();
1856 __get_user_size(*(u16 *)to, from, 2, ret, 2);
1857 __uaccess_end();
1858 return ret;
1859 case 4:
1860 - __uaccess_begin();
1861 + __uaccess_begin_nospec();
1862 __get_user_size(*(u32 *)to, from, 4, ret, 4);
1863 __uaccess_end();
1864 return ret;
1865 diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
1866 index 673059a109fe..6e5cc08134ba 100644
1867 --- a/arch/x86/include/asm/uaccess_64.h
1868 +++ b/arch/x86/include/asm/uaccess_64.h
1869 @@ -59,31 +59,31 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size)
1870 return copy_user_generic(dst, (__force void *)src, size);
1871 switch (size) {
1872 case 1:
1873 - __uaccess_begin();
1874 + __uaccess_begin_nospec();
1875 __get_user_asm(*(u8 *)dst, (u8 __user *)src,
1876 ret, "b", "b", "=q", 1);
1877 __uaccess_end();
1878 return ret;
1879 case 2:
1880 - __uaccess_begin();
1881 + __uaccess_begin_nospec();
1882 __get_user_asm(*(u16 *)dst, (u16 __user *)src,
1883 ret, "w", "w", "=r", 2);
1884 __uaccess_end();
1885 return ret;
1886 case 4:
1887 - __uaccess_begin();
1888 + __uaccess_begin_nospec();
1889 __get_user_asm(*(u32 *)dst, (u32 __user *)src,
1890 ret, "l", "k", "=r", 4);
1891 __uaccess_end();
1892 return ret;
1893 case 8:
1894 - __uaccess_begin();
1895 + __uaccess_begin_nospec();
1896 __get_user_asm(*(u64 *)dst, (u64 __user *)src,
1897 ret, "q", "", "=r", 8);
1898 __uaccess_end();
1899 return ret;
1900 case 10:
1901 - __uaccess_begin();
1902 + __uaccess_begin_nospec();
1903 __get_user_asm(*(u64 *)dst, (u64 __user *)src,
1904 ret, "q", "", "=r", 10);
1905 if (likely(!ret))
1906 @@ -93,7 +93,7 @@ int __copy_from_user_nocheck(void *dst, const void __user *src, unsigned size)
1907 __uaccess_end();
1908 return ret;
1909 case 16:
1910 - __uaccess_begin();
1911 + __uaccess_begin_nospec();
1912 __get_user_asm(*(u64 *)dst, (u64 __user *)src,
1913 ret, "q", "", "=r", 16);
1914 if (likely(!ret))
1915 diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
1916 index 10d5a3d6affc..03b6e5c6cf23 100644
1917 --- a/arch/x86/kernel/alternative.c
1918 +++ b/arch/x86/kernel/alternative.c
1919 @@ -46,17 +46,6 @@ static int __init setup_noreplace_smp(char *str)
1920 }
1921 __setup("noreplace-smp", setup_noreplace_smp);
1922
1923 -#ifdef CONFIG_PARAVIRT
1924 -static int __initdata_or_module noreplace_paravirt = 0;
1925 -
1926 -static int __init setup_noreplace_paravirt(char *str)
1927 -{
1928 - noreplace_paravirt = 1;
1929 - return 1;
1930 -}
1931 -__setup("noreplace-paravirt", setup_noreplace_paravirt);
1932 -#endif
1933 -
1934 #define DPRINTK(fmt, args...) \
1935 do { \
1936 if (debug_alternative) \
1937 @@ -588,9 +577,6 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
1938 struct paravirt_patch_site *p;
1939 char insnbuf[MAX_PATCH_LEN];
1940
1941 - if (noreplace_paravirt)
1942 - return;
1943 -
1944 for (p = start; p < end; p++) {
1945 unsigned int used;
1946
1947 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
1948 index 8cacf62ec458..957ad443b786 100644
1949 --- a/arch/x86/kernel/cpu/bugs.c
1950 +++ b/arch/x86/kernel/cpu/bugs.c
1951 @@ -10,6 +10,7 @@
1952 #include <linux/init.h>
1953 #include <linux/utsname.h>
1954 #include <linux/cpu.h>
1955 +#include <linux/module.h>
1956
1957 #include <asm/nospec-branch.h>
1958 #include <asm/cmdline.h>
1959 @@ -89,20 +90,41 @@ static const char *spectre_v2_strings[] = {
1960 };
1961
1962 #undef pr_fmt
1963 -#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt
1964 +#define pr_fmt(fmt) "Spectre V2 : " fmt
1965
1966 static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
1967
1968 +#ifdef RETPOLINE
1969 +static bool spectre_v2_bad_module;
1970 +
1971 +bool retpoline_module_ok(bool has_retpoline)
1972 +{
1973 + if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline)
1974 + return true;
1975 +
1976 + pr_err("System may be vulnerable to spectre v2\n");
1977 + spectre_v2_bad_module = true;
1978 + return false;
1979 +}
1980 +
1981 +static inline const char *spectre_v2_module_string(void)
1982 +{
1983 + return spectre_v2_bad_module ? " - vulnerable module loaded" : "";
1984 +}
1985 +#else
1986 +static inline const char *spectre_v2_module_string(void) { return ""; }
1987 +#endif
1988 +
1989 static void __init spec2_print_if_insecure(const char *reason)
1990 {
1991 if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
1992 - pr_info("%s\n", reason);
1993 + pr_info("%s selected on command line.\n", reason);
1994 }
1995
1996 static void __init spec2_print_if_secure(const char *reason)
1997 {
1998 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
1999 - pr_info("%s\n", reason);
2000 + pr_info("%s selected on command line.\n", reason);
2001 }
2002
2003 static inline bool retp_compiler(void)
2004 @@ -117,42 +139,68 @@ static inline bool match_option(const char *arg, int arglen, const char *opt)
2005 return len == arglen && !strncmp(arg, opt, len);
2006 }
2007
2008 +static const struct {
2009 + const char *option;
2010 + enum spectre_v2_mitigation_cmd cmd;
2011 + bool secure;
2012 +} mitigation_options[] = {
2013 + { "off", SPECTRE_V2_CMD_NONE, false },
2014 + { "on", SPECTRE_V2_CMD_FORCE, true },
2015 + { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false },
2016 + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false },
2017 + { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
2018 + { "auto", SPECTRE_V2_CMD_AUTO, false },
2019 +};
2020 +
2021 static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
2022 {
2023 char arg[20];
2024 - int ret;
2025 -
2026 - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
2027 - sizeof(arg));
2028 - if (ret > 0) {
2029 - if (match_option(arg, ret, "off")) {
2030 - goto disable;
2031 - } else if (match_option(arg, ret, "on")) {
2032 - spec2_print_if_secure("force enabled on command line.");
2033 - return SPECTRE_V2_CMD_FORCE;
2034 - } else if (match_option(arg, ret, "retpoline")) {
2035 - spec2_print_if_insecure("retpoline selected on command line.");
2036 - return SPECTRE_V2_CMD_RETPOLINE;
2037 - } else if (match_option(arg, ret, "retpoline,amd")) {
2038 - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
2039 - pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
2040 - return SPECTRE_V2_CMD_AUTO;
2041 - }
2042 - spec2_print_if_insecure("AMD retpoline selected on command line.");
2043 - return SPECTRE_V2_CMD_RETPOLINE_AMD;
2044 - } else if (match_option(arg, ret, "retpoline,generic")) {
2045 - spec2_print_if_insecure("generic retpoline selected on command line.");
2046 - return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
2047 - } else if (match_option(arg, ret, "auto")) {
2048 + int ret, i;
2049 + enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
2050 +
2051 + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
2052 + return SPECTRE_V2_CMD_NONE;
2053 + else {
2054 + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
2055 + sizeof(arg));
2056 + if (ret < 0)
2057 return SPECTRE_V2_CMD_AUTO;
2058 +
2059 + for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
2060 + if (!match_option(arg, ret, mitigation_options[i].option))
2061 + continue;
2062 + cmd = mitigation_options[i].cmd;
2063 + break;
2064 }
2065 +
2066 + if (i >= ARRAY_SIZE(mitigation_options)) {
2067 + pr_err("unknown option (%s). Switching to AUTO select\n",
2068 + mitigation_options[i].option);
2069 + return SPECTRE_V2_CMD_AUTO;
2070 + }
2071 + }
2072 +
2073 + if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
2074 + cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
2075 + cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
2076 + !IS_ENABLED(CONFIG_RETPOLINE)) {
2077 + pr_err("%s selected but not compiled in. Switching to AUTO select\n",
2078 + mitigation_options[i].option);
2079 + return SPECTRE_V2_CMD_AUTO;
2080 }
2081
2082 - if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
2083 + if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD &&
2084 + boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
2085 + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
2086 return SPECTRE_V2_CMD_AUTO;
2087 -disable:
2088 - spec2_print_if_insecure("disabled on command line.");
2089 - return SPECTRE_V2_CMD_NONE;
2090 + }
2091 +
2092 + if (mitigation_options[i].secure)
2093 + spec2_print_if_secure(mitigation_options[i].option);
2094 + else
2095 + spec2_print_if_insecure(mitigation_options[i].option);
2096 +
2097 + return cmd;
2098 }
2099
2100 /* Check for Skylake-like CPUs (for RSB handling) */
2101 @@ -190,10 +238,10 @@ static void __init spectre_v2_select_mitigation(void)
2102 return;
2103
2104 case SPECTRE_V2_CMD_FORCE:
2105 - /* FALLTRHU */
2106 case SPECTRE_V2_CMD_AUTO:
2107 - goto retpoline_auto;
2108 -
2109 + if (IS_ENABLED(CONFIG_RETPOLINE))
2110 + goto retpoline_auto;
2111 + break;
2112 case SPECTRE_V2_CMD_RETPOLINE_AMD:
2113 if (IS_ENABLED(CONFIG_RETPOLINE))
2114 goto retpoline_amd;
2115 @@ -248,6 +296,12 @@ static void __init spectre_v2_select_mitigation(void)
2116 setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
2117 pr_info("Filling RSB on context switch\n");
2118 }
2119 +
2120 + /* Initialize Indirect Branch Prediction Barrier if supported */
2121 + if (boot_cpu_has(X86_FEATURE_IBPB)) {
2122 + setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
2123 + pr_info("Enabling Indirect Branch Prediction Barrier\n");
2124 + }
2125 }
2126
2127 #undef pr_fmt
2128 @@ -268,7 +322,7 @@ ssize_t cpu_show_spectre_v1(struct device *dev,
2129 {
2130 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
2131 return sprintf(buf, "Not affected\n");
2132 - return sprintf(buf, "Vulnerable\n");
2133 + return sprintf(buf, "Mitigation: __user pointer sanitization\n");
2134 }
2135
2136 ssize_t cpu_show_spectre_v2(struct device *dev,
2137 @@ -277,6 +331,8 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
2138 if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
2139 return sprintf(buf, "Not affected\n");
2140
2141 - return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
2142 + return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
2143 + boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
2144 + spectre_v2_module_string());
2145 }
2146 #endif
2147 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
2148 index d198ae02f2b7..08e89ed6aa87 100644
2149 --- a/arch/x86/kernel/cpu/common.c
2150 +++ b/arch/x86/kernel/cpu/common.c
2151 @@ -44,6 +44,8 @@
2152 #include <asm/pat.h>
2153 #include <asm/microcode.h>
2154 #include <asm/microcode_intel.h>
2155 +#include <asm/intel-family.h>
2156 +#include <asm/cpu_device_id.h>
2157
2158 #ifdef CONFIG_X86_LOCAL_APIC
2159 #include <asm/uv/uv.h>
2160 @@ -716,6 +718,26 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
2161 }
2162 }
2163
2164 +static void init_speculation_control(struct cpuinfo_x86 *c)
2165 +{
2166 + /*
2167 + * The Intel SPEC_CTRL CPUID bit implies IBRS and IBPB support,
2168 + * and they also have a different bit for STIBP support. Also,
2169 + * a hypervisor might have set the individual AMD bits even on
2170 + * Intel CPUs, for finer-grained selection of what's available.
2171 + *
2172 + * We use the AMD bits in 0x8000_0008 EBX as the generic hardware
2173 + * features, which are visible in /proc/cpuinfo and used by the
2174 + * kernel. So set those accordingly from the Intel bits.
2175 + */
2176 + if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
2177 + set_cpu_cap(c, X86_FEATURE_IBRS);
2178 + set_cpu_cap(c, X86_FEATURE_IBPB);
2179 + }
2180 + if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
2181 + set_cpu_cap(c, X86_FEATURE_STIBP);
2182 +}
2183 +
2184 void get_cpu_cap(struct cpuinfo_x86 *c)
2185 {
2186 u32 eax, ebx, ecx, edx;
2187 @@ -737,6 +759,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
2188 cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
2189 c->x86_capability[CPUID_7_0_EBX] = ebx;
2190 c->x86_capability[CPUID_7_ECX] = ecx;
2191 + c->x86_capability[CPUID_7_EDX] = edx;
2192 }
2193
2194 /* Extended state features: level 0x0000000d */
2195 @@ -809,6 +832,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
2196 c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
2197
2198 init_scattered_cpuid_features(c);
2199 + init_speculation_control(c);
2200 }
2201
2202 static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
2203 @@ -837,6 +861,41 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
2204 #endif
2205 }
2206
2207 +static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
2208 + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY },
2209 + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY },
2210 + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY },
2211 + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY },
2212 + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY },
2213 + { X86_VENDOR_CENTAUR, 5 },
2214 + { X86_VENDOR_INTEL, 5 },
2215 + { X86_VENDOR_NSC, 5 },
2216 + { X86_VENDOR_ANY, 4 },
2217 + {}
2218 +};
2219 +
2220 +static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
2221 + { X86_VENDOR_AMD },
2222 + {}
2223 +};
2224 +
2225 +static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c)
2226 +{
2227 + u64 ia32_cap = 0;
2228 +
2229 + if (x86_match_cpu(cpu_no_meltdown))
2230 + return false;
2231 +
2232 + if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
2233 + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
2234 +
2235 + /* Rogue Data Cache Load? No! */
2236 + if (ia32_cap & ARCH_CAP_RDCL_NO)
2237 + return false;
2238 +
2239 + return true;
2240 +}
2241 +
2242 /*
2243 * Do minimum CPU detection early.
2244 * Fields really needed: vendor, cpuid_level, family, model, mask,
2245 @@ -883,11 +942,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
2246
2247 setup_force_cpu_cap(X86_FEATURE_ALWAYS);
2248
2249 - if (c->x86_vendor != X86_VENDOR_AMD)
2250 - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
2251 -
2252 - setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
2253 - setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
2254 + if (!x86_match_cpu(cpu_no_speculation)) {
2255 + if (cpu_vulnerable_to_meltdown(c))
2256 + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
2257 + setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
2258 + setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
2259 + }
2260
2261 fpu__init_system(c);
2262
2263 diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
2264 index fcd484d2bb03..4097b43cba2d 100644
2265 --- a/arch/x86/kernel/cpu/intel.c
2266 +++ b/arch/x86/kernel/cpu/intel.c
2267 @@ -61,6 +61,59 @@ void check_mpx_erratum(struct cpuinfo_x86 *c)
2268 }
2269 }
2270
2271 +/*
2272 + * Early microcode releases for the Spectre v2 mitigation were broken.
2273 + * Information taken from;
2274 + * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf
2275 + * - https://kb.vmware.com/s/article/52345
2276 + * - Microcode revisions observed in the wild
2277 + * - Release note from 20180108 microcode release
2278 + */
2279 +struct sku_microcode {
2280 + u8 model;
2281 + u8 stepping;
2282 + u32 microcode;
2283 +};
2284 +static const struct sku_microcode spectre_bad_microcodes[] = {
2285 + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x84 },
2286 + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x84 },
2287 + { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x84 },
2288 + { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x84 },
2289 + { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x84 },
2290 + { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
2291 + { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
2292 + { INTEL_FAM6_SKYLAKE_MOBILE, 0x03, 0xc2 },
2293 + { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 },
2294 + { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 },
2295 + { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b },
2296 + { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 },
2297 + { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 },
2298 + { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 },
2299 + { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 },
2300 + { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 },
2301 + { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 },
2302 + { INTEL_FAM6_HASWELL_X, 0x02, 0x3b },
2303 + { INTEL_FAM6_HASWELL_X, 0x04, 0x10 },
2304 + { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a },
2305 + /* Updated in the 20180108 release; blacklist until we know otherwise */
2306 + { INTEL_FAM6_ATOM_GEMINI_LAKE, 0x01, 0x22 },
2307 + /* Observed in the wild */
2308 + { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b },
2309 + { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 },
2310 +};
2311 +
2312 +static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
2313 +{
2314 + int i;
2315 +
2316 + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
2317 + if (c->x86_model == spectre_bad_microcodes[i].model &&
2318 + c->x86_mask == spectre_bad_microcodes[i].stepping)
2319 + return (c->microcode <= spectre_bad_microcodes[i].microcode);
2320 + }
2321 + return false;
2322 +}
2323 +
2324 static void early_init_intel(struct cpuinfo_x86 *c)
2325 {
2326 u64 misc_enable;
2327 @@ -87,6 +140,19 @@ static void early_init_intel(struct cpuinfo_x86 *c)
2328 rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode);
2329 }
2330
2331 + /* Now if any of them are set, check the blacklist and clear the lot */
2332 + if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) ||
2333 + cpu_has(c, X86_FEATURE_INTEL_STIBP) ||
2334 + cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) ||
2335 + cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) {
2336 + pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n");
2337 + setup_clear_cpu_cap(X86_FEATURE_IBRS);
2338 + setup_clear_cpu_cap(X86_FEATURE_IBPB);
2339 + setup_clear_cpu_cap(X86_FEATURE_STIBP);
2340 + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
2341 + setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
2342 + }
2343 +
2344 /*
2345 * Atom erratum AAE44/AAF40/AAG38/AAH41:
2346 *
2347 diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
2348 index 5ce5155f0695..0afaf00b029b 100644
2349 --- a/arch/x86/kernel/cpu/microcode/core.c
2350 +++ b/arch/x86/kernel/cpu/microcode/core.c
2351 @@ -43,7 +43,7 @@
2352 #define MICROCODE_VERSION "2.01"
2353
2354 static struct microcode_ops *microcode_ops;
2355 -static bool dis_ucode_ldr;
2356 +static bool dis_ucode_ldr = true;
2357
2358 /*
2359 * Synchronization.
2360 @@ -73,6 +73,7 @@ struct cpu_info_ctx {
2361 static bool __init check_loader_disabled_bsp(void)
2362 {
2363 static const char *__dis_opt_str = "dis_ucode_ldr";
2364 + u32 a, b, c, d;
2365
2366 #ifdef CONFIG_X86_32
2367 const char *cmdline = (const char *)__pa_nodebug(boot_command_line);
2368 @@ -85,8 +86,20 @@ static bool __init check_loader_disabled_bsp(void)
2369 bool *res = &dis_ucode_ldr;
2370 #endif
2371
2372 - if (cmdline_find_option_bool(cmdline, option))
2373 - *res = true;
2374 + a = 1;
2375 + c = 0;
2376 + native_cpuid(&a, &b, &c, &d);
2377 +
2378 + /*
2379 + * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
2380 + * completely accurate as xen pv guests don't see that CPUID bit set but
2381 + * that's good enough as they don't land on the BSP path anyway.
2382 + */
2383 + if (c & BIT(31))
2384 + return *res;
2385 +
2386 + if (cmdline_find_option_bool(cmdline, option) <= 0)
2387 + *res = false;
2388
2389 return *res;
2390 }
2391 @@ -114,9 +127,7 @@ void __init load_ucode_bsp(void)
2392 {
2393 int vendor;
2394 unsigned int family;
2395 -
2396 - if (check_loader_disabled_bsp())
2397 - return;
2398 + bool intel = true;
2399
2400 if (!have_cpuid_p())
2401 return;
2402 @@ -126,16 +137,27 @@ void __init load_ucode_bsp(void)
2403
2404 switch (vendor) {
2405 case X86_VENDOR_INTEL:
2406 - if (family >= 6)
2407 - load_ucode_intel_bsp();
2408 + if (family < 6)
2409 + return;
2410 break;
2411 +
2412 case X86_VENDOR_AMD:
2413 - if (family >= 0x10)
2414 - load_ucode_amd_bsp(family);
2415 + if (family < 0x10)
2416 + return;
2417 + intel = false;
2418 break;
2419 +
2420 default:
2421 - break;
2422 + return;
2423 }
2424 +
2425 + if (check_loader_disabled_bsp())
2426 + return;
2427 +
2428 + if (intel)
2429 + load_ucode_intel_bsp();
2430 + else
2431 + load_ucode_amd_bsp(family);
2432 }
2433
2434 static bool check_loader_disabled_ap(void)
2435 @@ -154,9 +176,6 @@ void load_ucode_ap(void)
2436 if (check_loader_disabled_ap())
2437 return;
2438
2439 - if (!have_cpuid_p())
2440 - return;
2441 -
2442 vendor = x86_cpuid_vendor();
2443 family = x86_cpuid_family();
2444
2445 diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
2446 index b0dd9aec183d..afbb52532791 100644
2447 --- a/arch/x86/kernel/cpu/scattered.c
2448 +++ b/arch/x86/kernel/cpu/scattered.c
2449 @@ -31,8 +31,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
2450 const struct cpuid_bit *cb;
2451
2452 static const struct cpuid_bit cpuid_bits[] = {
2453 - { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 },
2454 - { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 },
2455 { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
2456 { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
2457 { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
2458 diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
2459 index 0887d2ae3797..dffe81d3c261 100644
2460 --- a/arch/x86/kernel/process_64.c
2461 +++ b/arch/x86/kernel/process_64.c
2462 @@ -538,7 +538,7 @@ void set_personality_ia32(bool x32)
2463 current->personality &= ~READ_IMPLIES_EXEC;
2464 /* in_compat_syscall() uses the presence of the x32
2465 syscall bit flag to determine compat status */
2466 - current->thread.status &= ~TS_COMPAT;
2467 + current_thread_info()->status &= ~TS_COMPAT;
2468 } else {
2469 set_thread_flag(TIF_IA32);
2470 clear_thread_flag(TIF_X32);
2471 @@ -546,7 +546,7 @@ void set_personality_ia32(bool x32)
2472 current->mm->context.ia32_compat = TIF_IA32;
2473 current->personality |= force_personality32;
2474 /* Prepare the first "return" to user space */
2475 - current->thread.status |= TS_COMPAT;
2476 + current_thread_info()->status |= TS_COMPAT;
2477 }
2478 }
2479 EXPORT_SYMBOL_GPL(set_personality_ia32);
2480 diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
2481 index 0e63c0267f99..e497d374412a 100644
2482 --- a/arch/x86/kernel/ptrace.c
2483 +++ b/arch/x86/kernel/ptrace.c
2484 @@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
2485 */
2486 regs->orig_ax = value;
2487 if (syscall_get_nr(child, regs) >= 0)
2488 - child->thread.status |= TS_I386_REGS_POKED;
2489 + child->thread_info.status |= TS_I386_REGS_POKED;
2490 break;
2491
2492 case offsetof(struct user32, regs.eflags):
2493 diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
2494 index 763af1d0de64..b1a5d252d482 100644
2495 --- a/arch/x86/kernel/signal.c
2496 +++ b/arch/x86/kernel/signal.c
2497 @@ -785,7 +785,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
2498 * than the tracee.
2499 */
2500 #ifdef CONFIG_IA32_EMULATION
2501 - if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
2502 + if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
2503 return __NR_ia32_restart_syscall;
2504 #endif
2505 #ifdef CONFIG_X86_X32_ABI
2506 diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
2507 index 8402907825b0..21454e254a4c 100644
2508 --- a/arch/x86/kernel/tboot.c
2509 +++ b/arch/x86/kernel/tboot.c
2510 @@ -134,6 +134,16 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
2511 return -1;
2512 set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
2513 pte_unmap(pte);
2514 +
2515 + /*
2516 + * PTI poisons low addresses in the kernel page tables in the
2517 + * name of making them unusable for userspace. To execute
2518 + * code at such a low address, the poison must be cleared.
2519 + *
2520 + * Note: 'pgd' actually gets set in pud_alloc().
2521 + */
2522 + pgd->pgd &= ~_PAGE_NX;
2523 +
2524 return 0;
2525 }
2526
2527 diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
2528 index 91af75e37306..93f924de06cf 100644
2529 --- a/arch/x86/kvm/cpuid.c
2530 +++ b/arch/x86/kvm/cpuid.c
2531 @@ -355,6 +355,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2532 F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
2533 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
2534
2535 + /* cpuid 0x80000008.ebx */
2536 + const u32 kvm_cpuid_8000_0008_ebx_x86_features =
2537 + F(IBPB) | F(IBRS);
2538 +
2539 /* cpuid 0xC0000001.edx */
2540 const u32 kvm_cpuid_C000_0001_edx_x86_features =
2541 F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
2542 @@ -376,6 +380,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2543 /* cpuid 7.0.ecx*/
2544 const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/;
2545
2546 + /* cpuid 7.0.edx*/
2547 + const u32 kvm_cpuid_7_0_edx_x86_features =
2548 + F(SPEC_CTRL) | F(ARCH_CAPABILITIES);
2549 +
2550 /* all calls to cpuid_count() should be made on the same cpu */
2551 get_cpu();
2552
2553 @@ -458,12 +466,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2554 /* PKU is not yet implemented for shadow paging. */
2555 if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
2556 entry->ecx &= ~F(PKU);
2557 + entry->edx &= kvm_cpuid_7_0_edx_x86_features;
2558 + cpuid_mask(&entry->edx, CPUID_7_EDX);
2559 } else {
2560 entry->ebx = 0;
2561 entry->ecx = 0;
2562 + entry->edx = 0;
2563 }
2564 entry->eax = 0;
2565 - entry->edx = 0;
2566 break;
2567 }
2568 case 9:
2569 @@ -607,7 +617,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2570 if (!g_phys_as)
2571 g_phys_as = phys_as;
2572 entry->eax = g_phys_as | (virt_as << 8);
2573 - entry->ebx = entry->edx = 0;
2574 + entry->edx = 0;
2575 + /* IBRS and IBPB aren't necessarily present in hardware cpuid */
2576 + if (boot_cpu_has(X86_FEATURE_IBPB))
2577 + entry->ebx |= F(IBPB);
2578 + if (boot_cpu_has(X86_FEATURE_IBRS))
2579 + entry->ebx |= F(IBRS);
2580 + entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
2581 + cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
2582 break;
2583 }
2584 case 0x80000019:
2585 diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
2586 index 9368fecca3ee..d1beb7156704 100644
2587 --- a/arch/x86/kvm/cpuid.h
2588 +++ b/arch/x86/kvm/cpuid.h
2589 @@ -160,6 +160,37 @@ static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
2590 return best && (best->edx & bit(X86_FEATURE_RDTSCP));
2591 }
2592
2593 +static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu)
2594 +{
2595 + struct kvm_cpuid_entry2 *best;
2596 +
2597 + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
2598 + if (best && (best->ebx & bit(X86_FEATURE_IBPB)))
2599 + return true;
2600 + best = kvm_find_cpuid_entry(vcpu, 7, 0);
2601 + return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL));
2602 +}
2603 +
2604 +static inline bool guest_cpuid_has_ibrs(struct kvm_vcpu *vcpu)
2605 +{
2606 + struct kvm_cpuid_entry2 *best;
2607 +
2608 + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
2609 + if (best && (best->ebx & bit(X86_FEATURE_IBRS)))
2610 + return true;
2611 + best = kvm_find_cpuid_entry(vcpu, 7, 0);
2612 + return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL));
2613 +}
2614 +
2615 +static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu)
2616 +{
2617 + struct kvm_cpuid_entry2 *best;
2618 +
2619 + best = kvm_find_cpuid_entry(vcpu, 7, 0);
2620 + return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES));
2621 +}
2622 +
2623 +
2624 /*
2625 * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3
2626 */
2627 diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
2628 index 6f5a3b076341..c8d573822e60 100644
2629 --- a/arch/x86/kvm/emulate.c
2630 +++ b/arch/x86/kvm/emulate.c
2631 @@ -25,6 +25,7 @@
2632 #include <asm/kvm_emulate.h>
2633 #include <linux/stringify.h>
2634 #include <asm/debugreg.h>
2635 +#include <asm/nospec-branch.h>
2636
2637 #include "x86.h"
2638 #include "tss.h"
2639 @@ -1012,8 +1013,8 @@ static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
2640 void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
2641
2642 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
2643 - asm("push %[flags]; popf; call *%[fastop]"
2644 - : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
2645 + asm("push %[flags]; popf; " CALL_NOSPEC
2646 + : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
2647 return rc;
2648 }
2649
2650 @@ -5306,15 +5307,14 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
2651
2652 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
2653 {
2654 - register void *__sp asm(_ASM_SP);
2655 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
2656
2657 if (!(ctxt->d & ByteOp))
2658 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
2659
2660 - asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
2661 + asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
2662 : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
2663 - [fastop]"+S"(fop), "+r"(__sp)
2664 + [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
2665 : "c"(ctxt->src2.val));
2666
2667 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
2668 diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
2669 index 24af898fb3a6..be644afab1bb 100644
2670 --- a/arch/x86/kvm/svm.c
2671 +++ b/arch/x86/kvm/svm.c
2672 @@ -183,6 +183,8 @@ struct vcpu_svm {
2673 u64 gs_base;
2674 } host;
2675
2676 + u64 spec_ctrl;
2677 +
2678 u32 *msrpm;
2679
2680 ulong nmi_iret_rip;
2681 @@ -248,6 +250,8 @@ static const struct svm_direct_access_msrs {
2682 { .index = MSR_CSTAR, .always = true },
2683 { .index = MSR_SYSCALL_MASK, .always = true },
2684 #endif
2685 + { .index = MSR_IA32_SPEC_CTRL, .always = false },
2686 + { .index = MSR_IA32_PRED_CMD, .always = false },
2687 { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
2688 { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
2689 { .index = MSR_IA32_LASTINTFROMIP, .always = false },
2690 @@ -510,6 +514,7 @@ struct svm_cpu_data {
2691 struct kvm_ldttss_desc *tss_desc;
2692
2693 struct page *save_area;
2694 + struct vmcb *current_vmcb;
2695 };
2696
2697 static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
2698 @@ -861,6 +866,25 @@ static bool valid_msr_intercept(u32 index)
2699 return false;
2700 }
2701
2702 +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
2703 +{
2704 + u8 bit_write;
2705 + unsigned long tmp;
2706 + u32 offset;
2707 + u32 *msrpm;
2708 +
2709 + msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
2710 + to_svm(vcpu)->msrpm;
2711 +
2712 + offset = svm_msrpm_offset(msr);
2713 + bit_write = 2 * (msr & 0x0f) + 1;
2714 + tmp = msrpm[offset];
2715 +
2716 + BUG_ON(offset == MSR_INVALID);
2717 +
2718 + return !!test_bit(bit_write, &tmp);
2719 +}
2720 +
2721 static void set_msr_interception(u32 *msrpm, unsigned msr,
2722 int read, int write)
2723 {
2724 @@ -1535,6 +1559,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
2725 u32 dummy;
2726 u32 eax = 1;
2727
2728 + svm->spec_ctrl = 0;
2729 +
2730 if (!init_event) {
2731 svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
2732 MSR_IA32_APICBASE_ENABLE;
2733 @@ -1644,11 +1670,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
2734 __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
2735 kvm_vcpu_uninit(vcpu);
2736 kmem_cache_free(kvm_vcpu_cache, svm);
2737 + /*
2738 + * The vmcb page can be recycled, causing a false negative in
2739 + * svm_vcpu_load(). So do a full IBPB now.
2740 + */
2741 + indirect_branch_prediction_barrier();
2742 }
2743
2744 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2745 {
2746 struct vcpu_svm *svm = to_svm(vcpu);
2747 + struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
2748 int i;
2749
2750 if (unlikely(cpu != vcpu->cpu)) {
2751 @@ -1677,6 +1709,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2752 if (static_cpu_has(X86_FEATURE_RDTSCP))
2753 wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
2754
2755 + if (sd->current_vmcb != svm->vmcb) {
2756 + sd->current_vmcb = svm->vmcb;
2757 + indirect_branch_prediction_barrier();
2758 + }
2759 avic_vcpu_load(vcpu, cpu);
2760 }
2761
2762 @@ -3508,6 +3544,13 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2763 case MSR_VM_CR:
2764 msr_info->data = svm->nested.vm_cr_msr;
2765 break;
2766 + case MSR_IA32_SPEC_CTRL:
2767 + if (!msr_info->host_initiated &&
2768 + !guest_cpuid_has_ibrs(vcpu))
2769 + return 1;
2770 +
2771 + msr_info->data = svm->spec_ctrl;
2772 + break;
2773 case MSR_IA32_UCODE_REV:
2774 msr_info->data = 0x01000065;
2775 break;
2776 @@ -3599,6 +3642,49 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
2777 case MSR_IA32_TSC:
2778 kvm_write_tsc(vcpu, msr);
2779 break;
2780 + case MSR_IA32_SPEC_CTRL:
2781 + if (!msr->host_initiated &&
2782 + !guest_cpuid_has_ibrs(vcpu))
2783 + return 1;
2784 +
2785 + /* The STIBP bit doesn't fault even if it's not advertised */
2786 + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
2787 + return 1;
2788 +
2789 + svm->spec_ctrl = data;
2790 +
2791 + if (!data)
2792 + break;
2793 +
2794 + /*
2795 + * For non-nested:
2796 + * When it's written (to non-zero) for the first time, pass
2797 + * it through.
2798 + *
2799 + * For nested:
2800 + * The handling of the MSR bitmap for L2 guests is done in
2801 + * nested_svm_vmrun_msrpm.
2802 + * We update the L1 MSR bit as well since it will end up
2803 + * touching the MSR anyway now.
2804 + */
2805 + set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
2806 + break;
2807 + case MSR_IA32_PRED_CMD:
2808 + if (!msr->host_initiated &&
2809 + !guest_cpuid_has_ibpb(vcpu))
2810 + return 1;
2811 +
2812 + if (data & ~PRED_CMD_IBPB)
2813 + return 1;
2814 +
2815 + if (!data)
2816 + break;
2817 +
2818 + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
2819 + if (is_guest_mode(vcpu))
2820 + break;
2821 + set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
2822 + break;
2823 case MSR_STAR:
2824 svm->vmcb->save.star = data;
2825 break;
2826 @@ -4826,6 +4912,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
2827
2828 local_irq_enable();
2829
2830 + /*
2831 + * If this vCPU has touched SPEC_CTRL, restore the guest's value if
2832 + * it's non-zero. Since vmentry is serialising on affected CPUs, there
2833 + * is no need to worry about the conditional branch over the wrmsr
2834 + * being speculatively taken.
2835 + */
2836 + if (svm->spec_ctrl)
2837 + wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
2838 +
2839 asm volatile (
2840 "push %%" _ASM_BP "; \n\t"
2841 "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
2842 @@ -4918,6 +5013,27 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
2843 #endif
2844 );
2845
2846 + /*
2847 + * We do not use IBRS in the kernel. If this vCPU has used the
2848 + * SPEC_CTRL MSR it may have left it on; save the value and
2849 + * turn it off. This is much more efficient than blindly adding
2850 + * it to the atomic save/restore list. Especially as the former
2851 + * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
2852 + *
2853 + * For non-nested case:
2854 + * If the L01 MSR bitmap does not intercept the MSR, then we need to
2855 + * save it.
2856 + *
2857 + * For nested case:
2858 + * If the L02 MSR bitmap does not intercept the MSR, then we need to
2859 + * save it.
2860 + */
2861 + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
2862 + rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
2863 +
2864 + if (svm->spec_ctrl)
2865 + wrmsrl(MSR_IA32_SPEC_CTRL, 0);
2866 +
2867 /* Eliminate branch target predictions from guest mode */
2868 vmexit_fill_RSB();
2869
2870 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
2871 index 178a344f55f8..d49da86e3099 100644
2872 --- a/arch/x86/kvm/vmx.c
2873 +++ b/arch/x86/kvm/vmx.c
2874 @@ -33,6 +33,7 @@
2875 #include <linux/slab.h>
2876 #include <linux/tboot.h>
2877 #include <linux/hrtimer.h>
2878 +#include <linux/nospec.h>
2879 #include "kvm_cache_regs.h"
2880 #include "x86.h"
2881
2882 @@ -109,6 +110,14 @@ static u64 __read_mostly host_xss;
2883 static bool __read_mostly enable_pml = 1;
2884 module_param_named(pml, enable_pml, bool, S_IRUGO);
2885
2886 +#define MSR_TYPE_R 1
2887 +#define MSR_TYPE_W 2
2888 +#define MSR_TYPE_RW 3
2889 +
2890 +#define MSR_BITMAP_MODE_X2APIC 1
2891 +#define MSR_BITMAP_MODE_X2APIC_APICV 2
2892 +#define MSR_BITMAP_MODE_LM 4
2893 +
2894 #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
2895
2896 /* Guest_tsc -> host_tsc conversion requires 64-bit division. */
2897 @@ -173,7 +182,6 @@ module_param(ple_window_max, int, S_IRUGO);
2898 extern const ulong vmx_return;
2899
2900 #define NR_AUTOLOAD_MSRS 8
2901 -#define VMCS02_POOL_SIZE 1
2902
2903 struct vmcs {
2904 u32 revision_id;
2905 @@ -191,6 +199,7 @@ struct loaded_vmcs {
2906 struct vmcs *shadow_vmcs;
2907 int cpu;
2908 int launched;
2909 + unsigned long *msr_bitmap;
2910 struct list_head loaded_vmcss_on_cpu_link;
2911 };
2912
2913 @@ -207,7 +216,7 @@ struct shared_msr_entry {
2914 * stored in guest memory specified by VMPTRLD, but is opaque to the guest,
2915 * which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
2916 * More than one of these structures may exist, if L1 runs multiple L2 guests.
2917 - * nested_vmx_run() will use the data here to build a vmcs02: a VMCS for the
2918 + * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
2919 * underlying hardware which will be used to run L2.
2920 * This structure is packed to ensure that its layout is identical across
2921 * machines (necessary for live migration).
2922 @@ -386,13 +395,6 @@ struct __packed vmcs12 {
2923 */
2924 #define VMCS12_SIZE 0x1000
2925
2926 -/* Used to remember the last vmcs02 used for some recently used vmcs12s */
2927 -struct vmcs02_list {
2928 - struct list_head list;
2929 - gpa_t vmptr;
2930 - struct loaded_vmcs vmcs02;
2931 -};
2932 -
2933 /*
2934 * The nested_vmx structure is part of vcpu_vmx, and holds information we need
2935 * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
2936 @@ -419,15 +421,15 @@ struct nested_vmx {
2937 */
2938 bool sync_shadow_vmcs;
2939
2940 - /* vmcs02_list cache of VMCSs recently used to run L2 guests */
2941 - struct list_head vmcs02_pool;
2942 - int vmcs02_num;
2943 bool change_vmcs01_virtual_x2apic_mode;
2944 /* L2 must run next, and mustn't decide to exit to L1. */
2945 bool nested_run_pending;
2946 +
2947 + struct loaded_vmcs vmcs02;
2948 +
2949 /*
2950 - * Guest pages referred to in vmcs02 with host-physical pointers, so
2951 - * we must keep them pinned while L2 runs.
2952 + * Guest pages referred to in the vmcs02 with host-physical
2953 + * pointers, so we must keep them pinned while L2 runs.
2954 */
2955 struct page *apic_access_page;
2956 struct page *virtual_apic_page;
2957 @@ -436,8 +438,6 @@ struct nested_vmx {
2958 bool pi_pending;
2959 u16 posted_intr_nv;
2960
2961 - unsigned long *msr_bitmap;
2962 -
2963 struct hrtimer preemption_timer;
2964 bool preemption_timer_expired;
2965
2966 @@ -538,6 +538,7 @@ struct vcpu_vmx {
2967 unsigned long host_rsp;
2968 u8 fail;
2969 bool nmi_known_unmasked;
2970 + u8 msr_bitmap_mode;
2971 u32 exit_intr_info;
2972 u32 idt_vectoring_info;
2973 ulong rflags;
2974 @@ -549,6 +550,10 @@ struct vcpu_vmx {
2975 u64 msr_host_kernel_gs_base;
2976 u64 msr_guest_kernel_gs_base;
2977 #endif
2978 +
2979 + u64 arch_capabilities;
2980 + u64 spec_ctrl;
2981 +
2982 u32 vm_entry_controls_shadow;
2983 u32 vm_exit_controls_shadow;
2984 /*
2985 @@ -856,21 +861,18 @@ static const unsigned short vmcs_field_to_offset_table[] = {
2986
2987 static inline short vmcs_field_to_offset(unsigned long field)
2988 {
2989 - BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
2990 + const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table);
2991 + unsigned short offset;
2992
2993 - if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
2994 + BUILD_BUG_ON(size > SHRT_MAX);
2995 + if (field >= size)
2996 return -ENOENT;
2997
2998 - /*
2999 - * FIXME: Mitigation for CVE-2017-5753. To be replaced with a
3000 - * generic mechanism.
3001 - */
3002 - asm("lfence");
3003 -
3004 - if (vmcs_field_to_offset_table[field] == 0)
3005 + field = array_index_nospec(field, size);
3006 + offset = vmcs_field_to_offset_table[field];
3007 + if (offset == 0)
3008 return -ENOENT;
3009 -
3010 - return vmcs_field_to_offset_table[field];
3011 + return offset;
3012 }
3013
3014 static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
3015 @@ -912,6 +914,9 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
3016 static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
3017 static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
3018 static int alloc_identity_pagetable(struct kvm *kvm);
3019 +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
3020 +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
3021 + u32 msr, int type);
3022
3023 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
3024 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
3025 @@ -931,12 +936,6 @@ static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
3026
3027 static unsigned long *vmx_io_bitmap_a;
3028 static unsigned long *vmx_io_bitmap_b;
3029 -static unsigned long *vmx_msr_bitmap_legacy;
3030 -static unsigned long *vmx_msr_bitmap_longmode;
3031 -static unsigned long *vmx_msr_bitmap_legacy_x2apic;
3032 -static unsigned long *vmx_msr_bitmap_longmode_x2apic;
3033 -static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
3034 -static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
3035 static unsigned long *vmx_vmread_bitmap;
3036 static unsigned long *vmx_vmwrite_bitmap;
3037
3038 @@ -1853,6 +1852,52 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
3039 vmcs_write32(EXCEPTION_BITMAP, eb);
3040 }
3041
3042 +/*
3043 + * Check if MSR is intercepted for currently loaded MSR bitmap.
3044 + */
3045 +static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
3046 +{
3047 + unsigned long *msr_bitmap;
3048 + int f = sizeof(unsigned long);
3049 +
3050 + if (!cpu_has_vmx_msr_bitmap())
3051 + return true;
3052 +
3053 + msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
3054 +
3055 + if (msr <= 0x1fff) {
3056 + return !!test_bit(msr, msr_bitmap + 0x800 / f);
3057 + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
3058 + msr &= 0x1fff;
3059 + return !!test_bit(msr, msr_bitmap + 0xc00 / f);
3060 + }
3061 +
3062 + return true;
3063 +}
3064 +
3065 +/*
3066 + * Check if MSR is intercepted for L01 MSR bitmap.
3067 + */
3068 +static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
3069 +{
3070 + unsigned long *msr_bitmap;
3071 + int f = sizeof(unsigned long);
3072 +
3073 + if (!cpu_has_vmx_msr_bitmap())
3074 + return true;
3075 +
3076 + msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
3077 +
3078 + if (msr <= 0x1fff) {
3079 + return !!test_bit(msr, msr_bitmap + 0x800 / f);
3080 + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
3081 + msr &= 0x1fff;
3082 + return !!test_bit(msr, msr_bitmap + 0xc00 / f);
3083 + }
3084 +
3085 + return true;
3086 +}
3087 +
3088 static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
3089 unsigned long entry, unsigned long exit)
3090 {
3091 @@ -2262,6 +2307,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3092 if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
3093 per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
3094 vmcs_load(vmx->loaded_vmcs->vmcs);
3095 + indirect_branch_prediction_barrier();
3096 }
3097
3098 if (!already_loaded) {
3099 @@ -2530,36 +2576,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
3100 vmx->guest_msrs[from] = tmp;
3101 }
3102
3103 -static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
3104 -{
3105 - unsigned long *msr_bitmap;
3106 -
3107 - if (is_guest_mode(vcpu))
3108 - msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
3109 - else if (cpu_has_secondary_exec_ctrls() &&
3110 - (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
3111 - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
3112 - if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) {
3113 - if (is_long_mode(vcpu))
3114 - msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
3115 - else
3116 - msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
3117 - } else {
3118 - if (is_long_mode(vcpu))
3119 - msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
3120 - else
3121 - msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
3122 - }
3123 - } else {
3124 - if (is_long_mode(vcpu))
3125 - msr_bitmap = vmx_msr_bitmap_longmode;
3126 - else
3127 - msr_bitmap = vmx_msr_bitmap_legacy;
3128 - }
3129 -
3130 - vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
3131 -}
3132 -
3133 /*
3134 * Set up the vmcs to automatically save and restore system
3135 * msrs. Don't touch the 64-bit msrs if the guest is in legacy
3136 @@ -2600,7 +2616,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
3137 vmx->save_nmsrs = save_nmsrs;
3138
3139 if (cpu_has_vmx_msr_bitmap())
3140 - vmx_set_msr_bitmap(&vmx->vcpu);
3141 + vmx_update_msr_bitmap(&vmx->vcpu);
3142 }
3143
3144 /*
3145 @@ -2989,6 +3005,19 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3146 case MSR_IA32_TSC:
3147 msr_info->data = guest_read_tsc(vcpu);
3148 break;
3149 + case MSR_IA32_SPEC_CTRL:
3150 + if (!msr_info->host_initiated &&
3151 + !guest_cpuid_has_ibrs(vcpu))
3152 + return 1;
3153 +
3154 + msr_info->data = to_vmx(vcpu)->spec_ctrl;
3155 + break;
3156 + case MSR_IA32_ARCH_CAPABILITIES:
3157 + if (!msr_info->host_initiated &&
3158 + !guest_cpuid_has_arch_capabilities(vcpu))
3159 + return 1;
3160 + msr_info->data = to_vmx(vcpu)->arch_capabilities;
3161 + break;
3162 case MSR_IA32_SYSENTER_CS:
3163 msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
3164 break;
3165 @@ -3093,6 +3122,68 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
3166 case MSR_IA32_TSC:
3167 kvm_write_tsc(vcpu, msr_info);
3168 break;
3169 + case MSR_IA32_SPEC_CTRL:
3170 + if (!msr_info->host_initiated &&
3171 + !guest_cpuid_has_ibrs(vcpu))
3172 + return 1;
3173 +
3174 + /* The STIBP bit doesn't fault even if it's not advertised */
3175 + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
3176 + return 1;
3177 +
3178 + vmx->spec_ctrl = data;
3179 +
3180 + if (!data)
3181 + break;
3182 +
3183 + /*
3184 + * For non-nested:
3185 + * When it's written (to non-zero) for the first time, pass
3186 + * it through.
3187 + *
3188 + * For nested:
3189 + * The handling of the MSR bitmap for L2 guests is done in
3190 + * nested_vmx_merge_msr_bitmap. We should not touch the
3191 + * vmcs02.msr_bitmap here since it gets completely overwritten
3192 + * in the merging. We update the vmcs01 here for L1 as well
3193 + * since it will end up touching the MSR anyway now.
3194 + */
3195 + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
3196 + MSR_IA32_SPEC_CTRL,
3197 + MSR_TYPE_RW);
3198 + break;
3199 + case MSR_IA32_PRED_CMD:
3200 + if (!msr_info->host_initiated &&
3201 + !guest_cpuid_has_ibpb(vcpu))
3202 + return 1;
3203 +
3204 + if (data & ~PRED_CMD_IBPB)
3205 + return 1;
3206 +
3207 + if (!data)
3208 + break;
3209 +
3210 + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
3211 +
3212 + /*
3213 + * For non-nested:
3214 + * When it's written (to non-zero) for the first time, pass
3215 + * it through.
3216 + *
3217 + * For nested:
3218 + * The handling of the MSR bitmap for L2 guests is done in
3219 + * nested_vmx_merge_msr_bitmap. We should not touch the
3220 + * vmcs02.msr_bitmap here since it gets completely overwritten
3221 + * in the merging.
3222 + */
3223 + vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
3224 + MSR_TYPE_W);
3225 + break;
3226 + case MSR_IA32_ARCH_CAPABILITIES:
3227 + if (!msr_info->host_initiated)
3228 + return 1;
3229 + vmx->arch_capabilities = data;
3230 + break;
3231 case MSR_IA32_CR_PAT:
3232 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
3233 if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
3234 @@ -3532,11 +3623,6 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
3235 return vmcs;
3236 }
3237
3238 -static struct vmcs *alloc_vmcs(void)
3239 -{
3240 - return alloc_vmcs_cpu(raw_smp_processor_id());
3241 -}
3242 -
3243 static void free_vmcs(struct vmcs *vmcs)
3244 {
3245 free_pages((unsigned long)vmcs, vmcs_config.order);
3246 @@ -3552,9 +3638,38 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
3247 loaded_vmcs_clear(loaded_vmcs);
3248 free_vmcs(loaded_vmcs->vmcs);
3249 loaded_vmcs->vmcs = NULL;
3250 + if (loaded_vmcs->msr_bitmap)
3251 + free_page((unsigned long)loaded_vmcs->msr_bitmap);
3252 WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
3253 }
3254
3255 +static struct vmcs *alloc_vmcs(void)
3256 +{
3257 + return alloc_vmcs_cpu(raw_smp_processor_id());
3258 +}
3259 +
3260 +static int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
3261 +{
3262 + loaded_vmcs->vmcs = alloc_vmcs();
3263 + if (!loaded_vmcs->vmcs)
3264 + return -ENOMEM;
3265 +
3266 + loaded_vmcs->shadow_vmcs = NULL;
3267 + loaded_vmcs_init(loaded_vmcs);
3268 +
3269 + if (cpu_has_vmx_msr_bitmap()) {
3270 + loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
3271 + if (!loaded_vmcs->msr_bitmap)
3272 + goto out_vmcs;
3273 + memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
3274 + }
3275 + return 0;
3276 +
3277 +out_vmcs:
3278 + free_loaded_vmcs(loaded_vmcs);
3279 + return -ENOMEM;
3280 +}
3281 +
3282 static void free_kvm_area(void)
3283 {
3284 int cpu;
3285 @@ -4561,10 +4676,8 @@ static void free_vpid(int vpid)
3286 spin_unlock(&vmx_vpid_lock);
3287 }
3288
3289 -#define MSR_TYPE_R 1
3290 -#define MSR_TYPE_W 2
3291 -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
3292 - u32 msr, int type)
3293 +static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
3294 + u32 msr, int type)
3295 {
3296 int f = sizeof(unsigned long);
3297
3298 @@ -4598,8 +4711,8 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
3299 }
3300 }
3301
3302 -static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
3303 - u32 msr, int type)
3304 +static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
3305 + u32 msr, int type)
3306 {
3307 int f = sizeof(unsigned long);
3308
3309 @@ -4633,6 +4746,15 @@ static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
3310 }
3311 }
3312
3313 +static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
3314 + u32 msr, int type, bool value)
3315 +{
3316 + if (value)
3317 + vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
3318 + else
3319 + vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
3320 +}
3321 +
3322 /*
3323 * If a msr is allowed by L0, we should check whether it is allowed by L1.
3324 * The corresponding bit will be cleared unless both of L0 and L1 allow it.
3325 @@ -4679,58 +4801,68 @@ static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
3326 }
3327 }
3328
3329 -static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
3330 +static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
3331 {
3332 - if (!longmode_only)
3333 - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
3334 - msr, MSR_TYPE_R | MSR_TYPE_W);
3335 - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
3336 - msr, MSR_TYPE_R | MSR_TYPE_W);
3337 -}
3338 + u8 mode = 0;
3339
3340 -static void vmx_enable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
3341 -{
3342 - if (apicv_active) {
3343 - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
3344 - msr, MSR_TYPE_R);
3345 - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
3346 - msr, MSR_TYPE_R);
3347 - } else {
3348 - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
3349 - msr, MSR_TYPE_R);
3350 - __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
3351 - msr, MSR_TYPE_R);
3352 + if (cpu_has_secondary_exec_ctrls() &&
3353 + (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
3354 + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
3355 + mode |= MSR_BITMAP_MODE_X2APIC;
3356 + if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
3357 + mode |= MSR_BITMAP_MODE_X2APIC_APICV;
3358 }
3359 +
3360 + if (is_long_mode(vcpu))
3361 + mode |= MSR_BITMAP_MODE_LM;
3362 +
3363 + return mode;
3364 }
3365
3366 -static void vmx_disable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
3367 +#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
3368 +
3369 +static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
3370 + u8 mode)
3371 {
3372 - if (apicv_active) {
3373 - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
3374 - msr, MSR_TYPE_R);
3375 - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
3376 - msr, MSR_TYPE_R);
3377 - } else {
3378 - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
3379 - msr, MSR_TYPE_R);
3380 - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
3381 - msr, MSR_TYPE_R);
3382 + int msr;
3383 +
3384 + for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
3385 + unsigned word = msr / BITS_PER_LONG;
3386 + msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
3387 + msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
3388 + }
3389 +
3390 + if (mode & MSR_BITMAP_MODE_X2APIC) {
3391 + /*
3392 + * TPR reads and writes can be virtualized even if virtual interrupt
3393 + * delivery is not in use.
3394 + */
3395 + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
3396 + if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
3397 + vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
3398 + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
3399 + vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
3400 + }
3401 }
3402 }
3403
3404 -static void vmx_disable_intercept_msr_write_x2apic(u32 msr, bool apicv_active)
3405 +static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
3406 {
3407 - if (apicv_active) {
3408 - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
3409 - msr, MSR_TYPE_W);
3410 - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
3411 - msr, MSR_TYPE_W);
3412 - } else {
3413 - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
3414 - msr, MSR_TYPE_W);
3415 - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
3416 - msr, MSR_TYPE_W);
3417 - }
3418 + struct vcpu_vmx *vmx = to_vmx(vcpu);
3419 + unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
3420 + u8 mode = vmx_msr_bitmap_mode(vcpu);
3421 + u8 changed = mode ^ vmx->msr_bitmap_mode;
3422 +
3423 + if (!changed)
3424 + return;
3425 +
3426 + vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW,
3427 + !(mode & MSR_BITMAP_MODE_LM));
3428 +
3429 + if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
3430 + vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
3431 +
3432 + vmx->msr_bitmap_mode = mode;
3433 }
3434
3435 static bool vmx_get_enable_apicv(void)
3436 @@ -4738,30 +4870,45 @@ static bool vmx_get_enable_apicv(void)
3437 return enable_apicv;
3438 }
3439
3440 -static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
3441 +static void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
3442 +{
3443 + struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
3444 + gfn_t gfn;
3445 +
3446 + /*
3447 + * Don't need to mark the APIC access page dirty; it is never
3448 + * written to by the CPU during APIC virtualization.
3449 + */
3450 +
3451 + if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
3452 + gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
3453 + kvm_vcpu_mark_page_dirty(vcpu, gfn);
3454 + }
3455 +
3456 + if (nested_cpu_has_posted_intr(vmcs12)) {
3457 + gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
3458 + kvm_vcpu_mark_page_dirty(vcpu, gfn);
3459 + }
3460 +}
3461 +
3462 +
3463 +static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
3464 {
3465 struct vcpu_vmx *vmx = to_vmx(vcpu);
3466 int max_irr;
3467 void *vapic_page;
3468 u16 status;
3469
3470 - if (vmx->nested.pi_desc &&
3471 - vmx->nested.pi_pending) {
3472 - vmx->nested.pi_pending = false;
3473 - if (!pi_test_and_clear_on(vmx->nested.pi_desc))
3474 - return 0;
3475 -
3476 - max_irr = find_last_bit(
3477 - (unsigned long *)vmx->nested.pi_desc->pir, 256);
3478 + if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
3479 + return;
3480
3481 - if (max_irr == 256)
3482 - return 0;
3483 + vmx->nested.pi_pending = false;
3484 + if (!pi_test_and_clear_on(vmx->nested.pi_desc))
3485 + return;
3486
3487 + max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
3488 + if (max_irr != 256) {
3489 vapic_page = kmap(vmx->nested.virtual_apic_page);
3490 - if (!vapic_page) {
3491 - WARN_ON(1);
3492 - return -ENOMEM;
3493 - }
3494 __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page);
3495 kunmap(vmx->nested.virtual_apic_page);
3496
3497 @@ -4772,7 +4919,8 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
3498 vmcs_write16(GUEST_INTR_STATUS, status);
3499 }
3500 }
3501 - return 0;
3502 +
3503 + nested_mark_vmcs12_pages_dirty(vcpu);
3504 }
3505
3506 static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
3507 @@ -4959,7 +5107,7 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
3508 }
3509
3510 if (cpu_has_vmx_msr_bitmap())
3511 - vmx_set_msr_bitmap(vcpu);
3512 + vmx_update_msr_bitmap(vcpu);
3513 }
3514
3515 static u32 vmx_exec_control(struct vcpu_vmx *vmx)
3516 @@ -5048,7 +5196,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
3517 vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
3518 }
3519 if (cpu_has_vmx_msr_bitmap())
3520 - vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy));
3521 + vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
3522
3523 vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
3524
3525 @@ -5122,6 +5270,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
3526 ++vmx->nmsrs;
3527 }
3528
3529 + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
3530 + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities);
3531
3532 vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
3533
3534 @@ -5150,6 +5300,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
3535 u64 cr0;
3536
3537 vmx->rmode.vm86_active = 0;
3538 + vmx->spec_ctrl = 0;
3539
3540 vmx->soft_vnmi_blocked = 0;
3541
3542 @@ -6379,7 +6530,7 @@ static void wakeup_handler(void)
3543
3544 static __init int hardware_setup(void)
3545 {
3546 - int r = -ENOMEM, i, msr;
3547 + int r = -ENOMEM, i;
3548
3549 rdmsrl_safe(MSR_EFER, &host_efer);
3550
3551 @@ -6394,41 +6545,13 @@ static __init int hardware_setup(void)
3552 if (!vmx_io_bitmap_b)
3553 goto out;
3554
3555 - vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
3556 - if (!vmx_msr_bitmap_legacy)
3557 - goto out1;
3558 -
3559 - vmx_msr_bitmap_legacy_x2apic =
3560 - (unsigned long *)__get_free_page(GFP_KERNEL);
3561 - if (!vmx_msr_bitmap_legacy_x2apic)
3562 - goto out2;
3563 -
3564 - vmx_msr_bitmap_legacy_x2apic_apicv_inactive =
3565 - (unsigned long *)__get_free_page(GFP_KERNEL);
3566 - if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive)
3567 - goto out3;
3568 -
3569 - vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
3570 - if (!vmx_msr_bitmap_longmode)
3571 - goto out4;
3572 -
3573 - vmx_msr_bitmap_longmode_x2apic =
3574 - (unsigned long *)__get_free_page(GFP_KERNEL);
3575 - if (!vmx_msr_bitmap_longmode_x2apic)
3576 - goto out5;
3577 -
3578 - vmx_msr_bitmap_longmode_x2apic_apicv_inactive =
3579 - (unsigned long *)__get_free_page(GFP_KERNEL);
3580 - if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive)
3581 - goto out6;
3582 -
3583 vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
3584 if (!vmx_vmread_bitmap)
3585 - goto out7;
3586 + goto out1;
3587
3588 vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
3589 if (!vmx_vmwrite_bitmap)
3590 - goto out8;
3591 + goto out2;
3592
3593 memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
3594 memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
3595 @@ -6437,12 +6560,9 @@ static __init int hardware_setup(void)
3596
3597 memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
3598
3599 - memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
3600 - memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
3601 -
3602 if (setup_vmcs_config(&vmcs_config) < 0) {
3603 r = -EIO;
3604 - goto out9;
3605 + goto out3;
3606 }
3607
3608 if (boot_cpu_has(X86_FEATURE_NX))
3609 @@ -6499,47 +6619,8 @@ static __init int hardware_setup(void)
3610 kvm_tsc_scaling_ratio_frac_bits = 48;
3611 }
3612
3613 - vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
3614 - vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
3615 - vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
3616 - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
3617 - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
3618 - vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
3619 -
3620 - memcpy(vmx_msr_bitmap_legacy_x2apic,
3621 - vmx_msr_bitmap_legacy, PAGE_SIZE);
3622 - memcpy(vmx_msr_bitmap_longmode_x2apic,
3623 - vmx_msr_bitmap_longmode, PAGE_SIZE);
3624 - memcpy(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
3625 - vmx_msr_bitmap_legacy, PAGE_SIZE);
3626 - memcpy(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
3627 - vmx_msr_bitmap_longmode, PAGE_SIZE);
3628 -
3629 set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
3630
3631 - /*
3632 - * enable_apicv && kvm_vcpu_apicv_active()
3633 - */
3634 - for (msr = 0x800; msr <= 0x8ff; msr++)
3635 - vmx_disable_intercept_msr_read_x2apic(msr, true);
3636 -
3637 - /* TMCCT */
3638 - vmx_enable_intercept_msr_read_x2apic(0x839, true);
3639 - /* TPR */
3640 - vmx_disable_intercept_msr_write_x2apic(0x808, true);
3641 - /* EOI */
3642 - vmx_disable_intercept_msr_write_x2apic(0x80b, true);
3643 - /* SELF-IPI */
3644 - vmx_disable_intercept_msr_write_x2apic(0x83f, true);
3645 -
3646 - /*
3647 - * (enable_apicv && !kvm_vcpu_apicv_active()) ||
3648 - * !enable_apicv
3649 - */
3650 - /* TPR */
3651 - vmx_disable_intercept_msr_read_x2apic(0x808, false);
3652 - vmx_disable_intercept_msr_write_x2apic(0x808, false);
3653 -
3654 if (enable_ept) {
3655 kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
3656 (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
3657 @@ -6585,22 +6666,10 @@ static __init int hardware_setup(void)
3658
3659 return alloc_kvm_area();
3660
3661 -out9:
3662 - free_page((unsigned long)vmx_vmwrite_bitmap);
3663 -out8:
3664 - free_page((unsigned long)vmx_vmread_bitmap);
3665 -out7:
3666 - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
3667 -out6:
3668 - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
3669 -out5:
3670 - free_page((unsigned long)vmx_msr_bitmap_longmode);
3671 -out4:
3672 - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
3673 out3:
3674 - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
3675 + free_page((unsigned long)vmx_vmwrite_bitmap);
3676 out2:
3677 - free_page((unsigned long)vmx_msr_bitmap_legacy);
3678 + free_page((unsigned long)vmx_vmread_bitmap);
3679 out1:
3680 free_page((unsigned long)vmx_io_bitmap_b);
3681 out:
3682 @@ -6611,12 +6680,6 @@ static __init int hardware_setup(void)
3683
3684 static __exit void hardware_unsetup(void)
3685 {
3686 - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
3687 - free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
3688 - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
3689 - free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
3690 - free_page((unsigned long)vmx_msr_bitmap_legacy);
3691 - free_page((unsigned long)vmx_msr_bitmap_longmode);
3692 free_page((unsigned long)vmx_io_bitmap_b);
3693 free_page((unsigned long)vmx_io_bitmap_a);
3694 free_page((unsigned long)vmx_vmwrite_bitmap);
3695 @@ -6663,94 +6726,6 @@ static int handle_monitor(struct kvm_vcpu *vcpu)
3696 return handle_nop(vcpu);
3697 }
3698
3699 -/*
3700 - * To run an L2 guest, we need a vmcs02 based on the L1-specified vmcs12.
3701 - * We could reuse a single VMCS for all the L2 guests, but we also want the
3702 - * option to allocate a separate vmcs02 for each separate loaded vmcs12 - this
3703 - * allows keeping them loaded on the processor, and in the future will allow
3704 - * optimizations where prepare_vmcs02 doesn't need to set all the fields on
3705 - * every entry if they never change.
3706 - * So we keep, in vmx->nested.vmcs02_pool, a cache of size VMCS02_POOL_SIZE
3707 - * (>=0) with a vmcs02 for each recently loaded vmcs12s, most recent first.
3708 - *
3709 - * The following functions allocate and free a vmcs02 in this pool.
3710 - */
3711 -
3712 -/* Get a VMCS from the pool to use as vmcs02 for the current vmcs12. */
3713 -static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
3714 -{
3715 - struct vmcs02_list *item;
3716 - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
3717 - if (item->vmptr == vmx->nested.current_vmptr) {
3718 - list_move(&item->list, &vmx->nested.vmcs02_pool);
3719 - return &item->vmcs02;
3720 - }
3721 -
3722 - if (vmx->nested.vmcs02_num >= max(VMCS02_POOL_SIZE, 1)) {
3723 - /* Recycle the least recently used VMCS. */
3724 - item = list_last_entry(&vmx->nested.vmcs02_pool,
3725 - struct vmcs02_list, list);
3726 - item->vmptr = vmx->nested.current_vmptr;
3727 - list_move(&item->list, &vmx->nested.vmcs02_pool);
3728 - return &item->vmcs02;
3729 - }
3730 -
3731 - /* Create a new VMCS */
3732 - item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
3733 - if (!item)
3734 - return NULL;
3735 - item->vmcs02.vmcs = alloc_vmcs();
3736 - item->vmcs02.shadow_vmcs = NULL;
3737 - if (!item->vmcs02.vmcs) {
3738 - kfree(item);
3739 - return NULL;
3740 - }
3741 - loaded_vmcs_init(&item->vmcs02);
3742 - item->vmptr = vmx->nested.current_vmptr;
3743 - list_add(&(item->list), &(vmx->nested.vmcs02_pool));
3744 - vmx->nested.vmcs02_num++;
3745 - return &item->vmcs02;
3746 -}
3747 -
3748 -/* Free and remove from pool a vmcs02 saved for a vmcs12 (if there is one) */
3749 -static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
3750 -{
3751 - struct vmcs02_list *item;
3752 - list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
3753 - if (item->vmptr == vmptr) {
3754 - free_loaded_vmcs(&item->vmcs02);
3755 - list_del(&item->list);
3756 - kfree(item);
3757 - vmx->nested.vmcs02_num--;
3758 - return;
3759 - }
3760 -}
3761 -
3762 -/*
3763 - * Free all VMCSs saved for this vcpu, except the one pointed by
3764 - * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
3765 - * must be &vmx->vmcs01.
3766 - */
3767 -static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
3768 -{
3769 - struct vmcs02_list *item, *n;
3770 -
3771 - WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
3772 - list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
3773 - /*
3774 - * Something will leak if the above WARN triggers. Better than
3775 - * a use-after-free.
3776 - */
3777 - if (vmx->loaded_vmcs == &item->vmcs02)
3778 - continue;
3779 -
3780 - free_loaded_vmcs(&item->vmcs02);
3781 - list_del(&item->list);
3782 - kfree(item);
3783 - vmx->nested.vmcs02_num--;
3784 - }
3785 -}
3786 -
3787 /*
3788 * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
3789 * set the success or error code of an emulated VMX instruction, as specified
3790 @@ -7025,6 +7000,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
3791 struct vmcs *shadow_vmcs;
3792 const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
3793 | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
3794 + int r;
3795
3796 /* The Intel VMX Instruction Reference lists a bunch of bits that
3797 * are prerequisite to running VMXON, most notably cr4.VMXE must be
3798 @@ -7064,12 +7040,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
3799 return 1;
3800 }
3801
3802 - if (cpu_has_vmx_msr_bitmap()) {
3803 - vmx->nested.msr_bitmap =
3804 - (unsigned long *)__get_free_page(GFP_KERNEL);
3805 - if (!vmx->nested.msr_bitmap)
3806 - goto out_msr_bitmap;
3807 - }
3808 + r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
3809 + if (r < 0)
3810 + goto out_vmcs02;
3811
3812 vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
3813 if (!vmx->nested.cached_vmcs12)
3814 @@ -7086,9 +7059,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
3815 vmx->vmcs01.shadow_vmcs = shadow_vmcs;
3816 }
3817
3818 - INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
3819 - vmx->nested.vmcs02_num = 0;
3820 -
3821 hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
3822 HRTIMER_MODE_REL_PINNED);
3823 vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
3824 @@ -7103,9 +7073,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
3825 kfree(vmx->nested.cached_vmcs12);
3826
3827 out_cached_vmcs12:
3828 - free_page((unsigned long)vmx->nested.msr_bitmap);
3829 + free_loaded_vmcs(&vmx->nested.vmcs02);
3830
3831 -out_msr_bitmap:
3832 +out_vmcs02:
3833 return -ENOMEM;
3834 }
3835
3836 @@ -7181,17 +7151,13 @@ static void free_nested(struct vcpu_vmx *vmx)
3837 vmx->nested.vmxon = false;
3838 free_vpid(vmx->nested.vpid02);
3839 nested_release_vmcs12(vmx);
3840 - if (vmx->nested.msr_bitmap) {
3841 - free_page((unsigned long)vmx->nested.msr_bitmap);
3842 - vmx->nested.msr_bitmap = NULL;
3843 - }
3844 if (enable_shadow_vmcs) {
3845 vmcs_clear(vmx->vmcs01.shadow_vmcs);
3846 free_vmcs(vmx->vmcs01.shadow_vmcs);
3847 vmx->vmcs01.shadow_vmcs = NULL;
3848 }
3849 kfree(vmx->nested.cached_vmcs12);
3850 - /* Unpin physical memory we referred to in current vmcs02 */
3851 + /* Unpin physical memory we referred to in the vmcs02 */
3852 if (vmx->nested.apic_access_page) {
3853 nested_release_page(vmx->nested.apic_access_page);
3854 vmx->nested.apic_access_page = NULL;
3855 @@ -7207,7 +7173,7 @@ static void free_nested(struct vcpu_vmx *vmx)
3856 vmx->nested.pi_desc = NULL;
3857 }
3858
3859 - nested_free_all_saved_vmcss(vmx);
3860 + free_loaded_vmcs(&vmx->nested.vmcs02);
3861 }
3862
3863 /* Emulate the VMXOFF instruction */
3864 @@ -7241,8 +7207,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
3865 vmptr + offsetof(struct vmcs12, launch_state),
3866 &zero, sizeof(zero));
3867
3868 - nested_free_vmcs02(vmx, vmptr);
3869 -
3870 skip_emulated_instruction(vcpu);
3871 nested_vmx_succeed(vcpu);
3872 return 1;
3873 @@ -8029,6 +7993,19 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
3874 vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
3875 KVM_ISA_VMX);
3876
3877 + /*
3878 + * The host physical addresses of some pages of guest memory
3879 + * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
3880 + * Page). The CPU may write to these pages via their host
3881 + * physical address while L2 is running, bypassing any
3882 + * address-translation-based dirty tracking (e.g. EPT write
3883 + * protection).
3884 + *
3885 + * Mark them dirty on every exit from L2 to prevent them from
3886 + * getting out of sync with dirty tracking.
3887 + */
3888 + nested_mark_vmcs12_pages_dirty(vcpu);
3889 +
3890 if (vmx->nested.nested_run_pending)
3891 return false;
3892
3893 @@ -8520,7 +8497,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
3894 }
3895 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
3896
3897 - vmx_set_msr_bitmap(vcpu);
3898 + vmx_update_msr_bitmap(vcpu);
3899 }
3900
3901 static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
3902 @@ -8676,14 +8653,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
3903 #endif
3904 "pushf\n\t"
3905 __ASM_SIZE(push) " $%c[cs]\n\t"
3906 - "call *%[entry]\n\t"
3907 + CALL_NOSPEC
3908 :
3909 #ifdef CONFIG_X86_64
3910 [sp]"=&r"(tmp),
3911 #endif
3912 "+r"(__sp)
3913 :
3914 - [entry]"r"(entry),
3915 + THUNK_TARGET(entry),
3916 [ss]"i"(__KERNEL_DS),
3917 [cs]"i"(__KERNEL_CS)
3918 );
3919 @@ -8909,6 +8886,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
3920
3921 vmx_arm_hv_timer(vcpu);
3922
3923 + /*
3924 + * If this vCPU has touched SPEC_CTRL, restore the guest's value if
3925 + * it's non-zero. Since vmentry is serialising on affected CPUs, there
3926 + * is no need to worry about the conditional branch over the wrmsr
3927 + * being speculatively taken.
3928 + */
3929 + if (vmx->spec_ctrl)
3930 + wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
3931 +
3932 vmx->__launched = vmx->loaded_vmcs->launched;
3933 asm(
3934 /* Store host registers */
3935 @@ -9027,6 +9013,27 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
3936 #endif
3937 );
3938
3939 + /*
3940 + * We do not use IBRS in the kernel. If this vCPU has used the
3941 + * SPEC_CTRL MSR it may have left it on; save the value and
3942 + * turn it off. This is much more efficient than blindly adding
3943 + * it to the atomic save/restore list. Especially as the former
3944 + * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
3945 + *
3946 + * For non-nested case:
3947 + * If the L01 MSR bitmap does not intercept the MSR, then we need to
3948 + * save it.
3949 + *
3950 + * For nested case:
3951 + * If the L02 MSR bitmap does not intercept the MSR, then we need to
3952 + * save it.
3953 + */
3954 + if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
3955 + rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
3956 +
3957 + if (vmx->spec_ctrl)
3958 + wrmsrl(MSR_IA32_SPEC_CTRL, 0);
3959 +
3960 /* Eliminate branch target predictions from guest mode */
3961 vmexit_fill_RSB();
3962
3963 @@ -9140,6 +9147,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
3964 {
3965 int err;
3966 struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
3967 + unsigned long *msr_bitmap;
3968 int cpu;
3969
3970 if (!vmx)
3971 @@ -9172,17 +9180,24 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
3972 if (!vmx->guest_msrs)
3973 goto free_pml;
3974
3975 - vmx->loaded_vmcs = &vmx->vmcs01;
3976 - vmx->loaded_vmcs->vmcs = alloc_vmcs();
3977 - vmx->loaded_vmcs->shadow_vmcs = NULL;
3978 - if (!vmx->loaded_vmcs->vmcs)
3979 - goto free_msrs;
3980 if (!vmm_exclusive)
3981 kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id())));
3982 - loaded_vmcs_init(vmx->loaded_vmcs);
3983 + err = alloc_loaded_vmcs(&vmx->vmcs01);
3984 if (!vmm_exclusive)
3985 kvm_cpu_vmxoff();
3986 + if (err < 0)
3987 + goto free_msrs;
3988
3989 + msr_bitmap = vmx->vmcs01.msr_bitmap;
3990 + vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
3991 + vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
3992 + vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
3993 + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
3994 + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
3995 + vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
3996 + vmx->msr_bitmap_mode = 0;
3997 +
3998 + vmx->loaded_vmcs = &vmx->vmcs01;
3999 cpu = get_cpu();
4000 vmx_vcpu_load(&vmx->vcpu, cpu);
4001 vmx->vcpu.cpu = cpu;
4002 @@ -9576,21 +9591,31 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
4003 int msr;
4004 struct page *page;
4005 unsigned long *msr_bitmap_l1;
4006 - unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
4007 + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
4008 + /*
4009 + * pred_cmd & spec_ctrl are trying to verify two things:
4010 + *
4011 + * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
4012 + * ensures that we do not accidentally generate an L02 MSR bitmap
4013 + * from the L12 MSR bitmap that is too permissive.
4014 + * 2. That L1 or L2s have actually used the MSR. This avoids
4015 + * unnecessarily merging of the bitmap if the MSR is unused. This
4016 + * works properly because we only update the L01 MSR bitmap lazily.
4017 + * So even if L0 should pass L1 these MSRs, the L01 bitmap is only
4018 + * updated to reflect this when L1 (or its L2s) actually write to
4019 + * the MSR.
4020 + */
4021 + bool pred_cmd = msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD);
4022 + bool spec_ctrl = msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL);
4023
4024 - /* This shortcut is ok because we support only x2APIC MSRs so far. */
4025 - if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
4026 + if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
4027 + !pred_cmd && !spec_ctrl)
4028 return false;
4029
4030 page = nested_get_page(vcpu, vmcs12->msr_bitmap);
4031 if (!page)
4032 return false;
4033 msr_bitmap_l1 = (unsigned long *)kmap(page);
4034 - if (!msr_bitmap_l1) {
4035 - nested_release_page_clean(page);
4036 - WARN_ON(1);
4037 - return false;
4038 - }
4039
4040 memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
4041
4042 @@ -9617,6 +9642,19 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
4043 MSR_TYPE_W);
4044 }
4045 }
4046 +
4047 + if (spec_ctrl)
4048 + nested_vmx_disable_intercept_for_msr(
4049 + msr_bitmap_l1, msr_bitmap_l0,
4050 + MSR_IA32_SPEC_CTRL,
4051 + MSR_TYPE_R | MSR_TYPE_W);
4052 +
4053 + if (pred_cmd)
4054 + nested_vmx_disable_intercept_for_msr(
4055 + msr_bitmap_l1, msr_bitmap_l0,
4056 + MSR_IA32_PRED_CMD,
4057 + MSR_TYPE_W);
4058 +
4059 kunmap(page);
4060 nested_release_page_clean(page);
4061
4062 @@ -10096,6 +10134,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
4063 if (kvm_has_tsc_control)
4064 decache_tsc_multiplier(vmx);
4065
4066 + if (cpu_has_vmx_msr_bitmap())
4067 + vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
4068 +
4069 if (enable_vpid) {
4070 /*
4071 * There is no direct mapping between vpid02 and vpid12, the
4072 @@ -10191,7 +10232,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
4073 struct vmcs12 *vmcs12;
4074 struct vcpu_vmx *vmx = to_vmx(vcpu);
4075 int cpu;
4076 - struct loaded_vmcs *vmcs02;
4077 bool ia32e;
4078 u32 msr_entry_idx;
4079
4080 @@ -10331,17 +10371,13 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
4081 * the nested entry.
4082 */
4083
4084 - vmcs02 = nested_get_current_vmcs02(vmx);
4085 - if (!vmcs02)
4086 - return -ENOMEM;
4087 -
4088 enter_guest_mode(vcpu);
4089
4090 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
4091 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
4092
4093 cpu = get_cpu();
4094 - vmx->loaded_vmcs = vmcs02;
4095 + vmx->loaded_vmcs = &vmx->nested.vmcs02;
4096 vmx_vcpu_put(vcpu);
4097 vmx_vcpu_load(vcpu, cpu);
4098 vcpu->cpu = cpu;
4099 @@ -10493,7 +10529,8 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
4100 return 0;
4101 }
4102
4103 - return vmx_complete_nested_posted_interrupt(vcpu);
4104 + vmx_complete_nested_posted_interrupt(vcpu);
4105 + return 0;
4106 }
4107
4108 static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
4109 @@ -10804,7 +10841,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
4110 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
4111
4112 if (cpu_has_vmx_msr_bitmap())
4113 - vmx_set_msr_bitmap(vcpu);
4114 + vmx_update_msr_bitmap(vcpu);
4115
4116 if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
4117 vmcs12->vm_exit_msr_load_count))
4118 @@ -10855,10 +10892,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
4119 vm_exit_controls_reset_shadow(vmx);
4120 vmx_segment_cache_clear(vmx);
4121
4122 - /* if no vmcs02 cache requested, remove the one we used */
4123 - if (VMCS02_POOL_SIZE == 0)
4124 - nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
4125 -
4126 load_vmcs12_host_state(vcpu, vmcs12);
4127
4128 /* Update any VMCS fields that might have changed while L2 ran */
4129 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
4130 index e023ef981feb..75f756eac979 100644
4131 --- a/arch/x86/kvm/x86.c
4132 +++ b/arch/x86/kvm/x86.c
4133 @@ -975,6 +975,7 @@ static u32 msrs_to_save[] = {
4134 #endif
4135 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
4136 MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
4137 + MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES
4138 };
4139
4140 static unsigned num_msrs_to_save;
4141 diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
4142 index 6bf1898ddf49..4ad7c4dd311c 100644
4143 --- a/arch/x86/lib/Makefile
4144 +++ b/arch/x86/lib/Makefile
4145 @@ -26,6 +26,7 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
4146 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
4147 lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
4148 lib-$(CONFIG_RETPOLINE) += retpoline.o
4149 +OBJECT_FILES_NON_STANDARD_retpoline.o :=y
4150
4151 obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
4152
4153 diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
4154 index 37b62d412148..b12b214713a6 100644
4155 --- a/arch/x86/lib/getuser.S
4156 +++ b/arch/x86/lib/getuser.S
4157 @@ -39,6 +39,8 @@ ENTRY(__get_user_1)
4158 mov PER_CPU_VAR(current_task), %_ASM_DX
4159 cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
4160 jae bad_get_user
4161 + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
4162 + and %_ASM_DX, %_ASM_AX
4163 ASM_STAC
4164 1: movzbl (%_ASM_AX),%edx
4165 xor %eax,%eax
4166 @@ -53,6 +55,8 @@ ENTRY(__get_user_2)
4167 mov PER_CPU_VAR(current_task), %_ASM_DX
4168 cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
4169 jae bad_get_user
4170 + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
4171 + and %_ASM_DX, %_ASM_AX
4172 ASM_STAC
4173 2: movzwl -1(%_ASM_AX),%edx
4174 xor %eax,%eax
4175 @@ -67,6 +71,8 @@ ENTRY(__get_user_4)
4176 mov PER_CPU_VAR(current_task), %_ASM_DX
4177 cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
4178 jae bad_get_user
4179 + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
4180 + and %_ASM_DX, %_ASM_AX
4181 ASM_STAC
4182 3: movl -3(%_ASM_AX),%edx
4183 xor %eax,%eax
4184 @@ -82,6 +88,8 @@ ENTRY(__get_user_8)
4185 mov PER_CPU_VAR(current_task), %_ASM_DX
4186 cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
4187 jae bad_get_user
4188 + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
4189 + and %_ASM_DX, %_ASM_AX
4190 ASM_STAC
4191 4: movq -7(%_ASM_AX),%rdx
4192 xor %eax,%eax
4193 @@ -93,6 +101,8 @@ ENTRY(__get_user_8)
4194 mov PER_CPU_VAR(current_task), %_ASM_DX
4195 cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
4196 jae bad_get_user_8
4197 + sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
4198 + and %_ASM_DX, %_ASM_AX
4199 ASM_STAC
4200 4: movl -7(%_ASM_AX),%edx
4201 5: movl -3(%_ASM_AX),%ecx
4202 diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
4203 index dfb2ba91b670..480edc3a5e03 100644
4204 --- a/arch/x86/lib/retpoline.S
4205 +++ b/arch/x86/lib/retpoline.S
4206 @@ -7,6 +7,7 @@
4207 #include <asm/alternative-asm.h>
4208 #include <asm/export.h>
4209 #include <asm/nospec-branch.h>
4210 +#include <asm/bitsperlong.h>
4211
4212 .macro THUNK reg
4213 .section .text.__x86.indirect_thunk
4214 @@ -36,7 +37,6 @@ GENERATE_THUNK(_ASM_DX)
4215 GENERATE_THUNK(_ASM_SI)
4216 GENERATE_THUNK(_ASM_DI)
4217 GENERATE_THUNK(_ASM_BP)
4218 -GENERATE_THUNK(_ASM_SP)
4219 #ifdef CONFIG_64BIT
4220 GENERATE_THUNK(r8)
4221 GENERATE_THUNK(r9)
4222 @@ -47,3 +47,58 @@ GENERATE_THUNK(r13)
4223 GENERATE_THUNK(r14)
4224 GENERATE_THUNK(r15)
4225 #endif
4226 +
4227 +/*
4228 + * Fill the CPU return stack buffer.
4229 + *
4230 + * Each entry in the RSB, if used for a speculative 'ret', contains an
4231 + * infinite 'pause; lfence; jmp' loop to capture speculative execution.
4232 + *
4233 + * This is required in various cases for retpoline and IBRS-based
4234 + * mitigations for the Spectre variant 2 vulnerability. Sometimes to
4235 + * eliminate potentially bogus entries from the RSB, and sometimes
4236 + * purely to ensure that it doesn't get empty, which on some CPUs would
4237 + * allow predictions from other (unwanted!) sources to be used.
4238 + *
4239 + * Google experimented with loop-unrolling and this turned out to be
4240 + * the optimal version - two calls, each with their own speculation
4241 + * trap should their return address end up getting used, in a loop.
4242 + */
4243 +.macro STUFF_RSB nr:req sp:req
4244 + mov $(\nr / 2), %_ASM_BX
4245 + .align 16
4246 +771:
4247 + call 772f
4248 +773: /* speculation trap */
4249 + pause
4250 + lfence
4251 + jmp 773b
4252 + .align 16
4253 +772:
4254 + call 774f
4255 +775: /* speculation trap */
4256 + pause
4257 + lfence
4258 + jmp 775b
4259 + .align 16
4260 +774:
4261 + dec %_ASM_BX
4262 + jnz 771b
4263 + add $((BITS_PER_LONG/8) * \nr), \sp
4264 +.endm
4265 +
4266 +#define RSB_FILL_LOOPS 16 /* To avoid underflow */
4267 +
4268 +ENTRY(__fill_rsb)
4269 + STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
4270 + ret
4271 +END(__fill_rsb)
4272 +EXPORT_SYMBOL_GPL(__fill_rsb)
4273 +
4274 +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
4275 +
4276 +ENTRY(__clear_rsb)
4277 + STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
4278 + ret
4279 +END(__clear_rsb)
4280 +EXPORT_SYMBOL_GPL(__clear_rsb)
4281 diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
4282 index 3bc7baf2a711..5c06dbffc52f 100644
4283 --- a/arch/x86/lib/usercopy_32.c
4284 +++ b/arch/x86/lib/usercopy_32.c
4285 @@ -570,12 +570,12 @@ do { \
4286 unsigned long __copy_to_user_ll(void __user *to, const void *from,
4287 unsigned long n)
4288 {
4289 - stac();
4290 + __uaccess_begin_nospec();
4291 if (movsl_is_ok(to, from, n))
4292 __copy_user(to, from, n);
4293 else
4294 n = __copy_user_intel(to, from, n);
4295 - clac();
4296 + __uaccess_end();
4297 return n;
4298 }
4299 EXPORT_SYMBOL(__copy_to_user_ll);
4300 @@ -627,7 +627,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache);
4301 unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
4302 unsigned long n)
4303 {
4304 - stac();
4305 + __uaccess_begin_nospec();
4306 #ifdef CONFIG_X86_INTEL_USERCOPY
4307 if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
4308 n = __copy_user_intel_nocache(to, from, n);
4309 @@ -636,7 +636,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
4310 #else
4311 __copy_user(to, from, n);
4312 #endif
4313 - clac();
4314 + __uaccess_end();
4315 return n;
4316 }
4317 EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
4318 diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
4319 index e3af318af2db..2a07341aca46 100644
4320 --- a/crypto/tcrypt.c
4321 +++ b/crypto/tcrypt.c
4322 @@ -223,11 +223,13 @@ static void sg_init_aead(struct scatterlist *sg, char *xbuf[XBUFSIZE],
4323 }
4324
4325 sg_init_table(sg, np + 1);
4326 - np--;
4327 + if (rem)
4328 + np--;
4329 for (k = 0; k < np; k++)
4330 sg_set_buf(&sg[k + 1], xbuf[k], PAGE_SIZE);
4331
4332 - sg_set_buf(&sg[k + 1], xbuf[k], rem);
4333 + if (rem)
4334 + sg_set_buf(&sg[k + 1], xbuf[k], rem);
4335 }
4336
4337 static void test_aead_speed(const char *algo, int enc, unsigned int secs,
4338 diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c
4339 index 83f1439e57fd..6e8eaa7fe7a6 100644
4340 --- a/drivers/auxdisplay/img-ascii-lcd.c
4341 +++ b/drivers/auxdisplay/img-ascii-lcd.c
4342 @@ -442,3 +442,7 @@ static struct platform_driver img_ascii_lcd_driver = {
4343 .remove = img_ascii_lcd_remove,
4344 };
4345 module_platform_driver(img_ascii_lcd_driver);
4346 +
4347 +MODULE_DESCRIPTION("Imagination Technologies ASCII LCD Display");
4348 +MODULE_AUTHOR("Paul Burton <paul.burton@mips.com>");
4349 +MODULE_LICENSE("GPL");
4350 diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
4351 index a2ec6d8796a0..3322b157106d 100644
4352 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
4353 +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
4354 @@ -392,6 +392,31 @@ static void rcar_du_crtc_start(struct rcar_du_crtc *rcrtc)
4355 rcrtc->started = true;
4356 }
4357
4358 +static void rcar_du_crtc_disable_planes(struct rcar_du_crtc *rcrtc)
4359 +{
4360 + struct rcar_du_device *rcdu = rcrtc->group->dev;
4361 + struct drm_crtc *crtc = &rcrtc->crtc;
4362 + u32 status;
4363 + /* Make sure vblank interrupts are enabled. */
4364 + drm_crtc_vblank_get(crtc);
4365 + /*
4366 + * Disable planes and calculate how many vertical blanking interrupts we
4367 + * have to wait for. If a vertical blanking interrupt has been triggered
4368 + * but not processed yet, we don't know whether it occurred before or
4369 + * after the planes got disabled. We thus have to wait for two vblank
4370 + * interrupts in that case.
4371 + */
4372 + spin_lock_irq(&rcrtc->vblank_lock);
4373 + rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? DS2PR : DS1PR, 0);
4374 + status = rcar_du_crtc_read(rcrtc, DSSR);
4375 + rcrtc->vblank_count = status & DSSR_VBK ? 2 : 1;
4376 + spin_unlock_irq(&rcrtc->vblank_lock);
4377 + if (!wait_event_timeout(rcrtc->vblank_wait, rcrtc->vblank_count == 0,
4378 + msecs_to_jiffies(100)))
4379 + dev_warn(rcdu->dev, "vertical blanking timeout\n");
4380 + drm_crtc_vblank_put(crtc);
4381 +}
4382 +
4383 static void rcar_du_crtc_stop(struct rcar_du_crtc *rcrtc)
4384 {
4385 struct drm_crtc *crtc = &rcrtc->crtc;
4386 @@ -400,17 +425,16 @@ static void rcar_du_crtc_stop(struct rcar_du_crtc *rcrtc)
4387 return;
4388
4389 /* Disable all planes and wait for the change to take effect. This is
4390 - * required as the DSnPR registers are updated on vblank, and no vblank
4391 - * will occur once the CRTC is stopped. Disabling planes when starting
4392 - * the CRTC thus wouldn't be enough as it would start scanning out
4393 - * immediately from old frame buffers until the next vblank.
4394 + * required as the plane enable registers are updated on vblank, and no
4395 + * vblank will occur once the CRTC is stopped. Disabling planes when
4396 + * starting the CRTC thus wouldn't be enough as it would start scanning
4397 + * out immediately from old frame buffers until the next vblank.
4398 *
4399 * This increases the CRTC stop delay, especially when multiple CRTCs
4400 * are stopped in one operation as we now wait for one vblank per CRTC.
4401 * Whether this can be improved needs to be researched.
4402 */
4403 - rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? DS2PR : DS1PR, 0);
4404 - drm_crtc_wait_one_vblank(crtc);
4405 + rcar_du_crtc_disable_planes(rcrtc);
4406
4407 /* Disable vertical blanking interrupt reporting. We first need to wait
4408 * for page flip completion before stopping the CRTC as userspace
4409 @@ -548,10 +572,25 @@ static irqreturn_t rcar_du_crtc_irq(int irq, void *arg)
4410 irqreturn_t ret = IRQ_NONE;
4411 u32 status;
4412
4413 + spin_lock(&rcrtc->vblank_lock);
4414 +
4415 status = rcar_du_crtc_read(rcrtc, DSSR);
4416 rcar_du_crtc_write(rcrtc, DSRCR, status & DSRCR_MASK);
4417
4418 - if (status & DSSR_FRM) {
4419 + if (status & DSSR_VBK) {
4420 + /*
4421 + * Wake up the vblank wait if the counter reaches 0. This must
4422 + * be protected by the vblank_lock to avoid races in
4423 + * rcar_du_crtc_disable_planes().
4424 + */
4425 + if (rcrtc->vblank_count) {
4426 + if (--rcrtc->vblank_count == 0)
4427 + wake_up(&rcrtc->vblank_wait);
4428 + }
4429 + }
4430 + spin_unlock(&rcrtc->vblank_lock);
4431 +
4432 + if (status & DSSR_VBK) {
4433 drm_crtc_handle_vblank(&rcrtc->crtc);
4434 rcar_du_crtc_finish_page_flip(rcrtc);
4435 ret = IRQ_HANDLED;
4436 @@ -606,6 +645,8 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
4437 }
4438
4439 init_waitqueue_head(&rcrtc->flip_wait);
4440 + init_waitqueue_head(&rcrtc->vblank_wait);
4441 + spin_lock_init(&rcrtc->vblank_lock);
4442
4443 rcrtc->group = rgrp;
4444 rcrtc->mmio_offset = mmio_offsets[index];
4445 diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
4446 index 6f08b7e7db06..48bef05b4c62 100644
4447 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
4448 +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
4449 @@ -15,6 +15,7 @@
4450 #define __RCAR_DU_CRTC_H__
4451
4452 #include <linux/mutex.h>
4453 +#include <linux/spinlock.h>
4454 #include <linux/wait.h>
4455
4456 #include <drm/drmP.h>
4457 @@ -33,6 +34,9 @@ struct rcar_du_vsp;
4458 * @started: whether the CRTC has been started and is running
4459 * @event: event to post when the pending page flip completes
4460 * @flip_wait: wait queue used to signal page flip completion
4461 + * @vblank_lock: protects vblank_wait and vblank_count
4462 + * @vblank_wait: wait queue used to signal vertical blanking
4463 + * @vblank_count: number of vertical blanking interrupts to wait for
4464 * @outputs: bitmask of the outputs (enum rcar_du_output) driven by this CRTC
4465 * @group: CRTC group this CRTC belongs to
4466 */
4467 @@ -48,6 +52,10 @@ struct rcar_du_crtc {
4468 struct drm_pending_vblank_event *event;
4469 wait_queue_head_t flip_wait;
4470
4471 + spinlock_t vblank_lock;
4472 + wait_queue_head_t vblank_wait;
4473 + unsigned int vblank_count;
4474 +
4475 unsigned int outputs;
4476
4477 struct rcar_du_group *group;
4478 diff --git a/drivers/media/platform/soc_camera/soc_scale_crop.c b/drivers/media/platform/soc_camera/soc_scale_crop.c
4479 index f77252d6ccd3..d29c24854c2c 100644
4480 --- a/drivers/media/platform/soc_camera/soc_scale_crop.c
4481 +++ b/drivers/media/platform/soc_camera/soc_scale_crop.c
4482 @@ -418,3 +418,7 @@ void soc_camera_calc_client_output(struct soc_camera_device *icd,
4483 mf->height = soc_camera_shift_scale(rect->height, shift, scale_v);
4484 }
4485 EXPORT_SYMBOL(soc_camera_calc_client_output);
4486 +
4487 +MODULE_DESCRIPTION("soc-camera scaling-cropping functions");
4488 +MODULE_AUTHOR("Guennadi Liakhovetski <kernel@pengutronix.de>");
4489 +MODULE_LICENSE("GPL");
4490 diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
4491 index bdbcd2b088a0..c3c28f0960e5 100644
4492 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
4493 +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
4494 @@ -3849,7 +3849,7 @@ static void qlcnic_83xx_flush_mbx_queue(struct qlcnic_adapter *adapter)
4495 struct list_head *head = &mbx->cmd_q;
4496 struct qlcnic_cmd_args *cmd = NULL;
4497
4498 - spin_lock(&mbx->queue_lock);
4499 + spin_lock_bh(&mbx->queue_lock);
4500
4501 while (!list_empty(head)) {
4502 cmd = list_entry(head->next, struct qlcnic_cmd_args, list);
4503 @@ -3860,7 +3860,7 @@ static void qlcnic_83xx_flush_mbx_queue(struct qlcnic_adapter *adapter)
4504 qlcnic_83xx_notify_cmd_completion(adapter, cmd);
4505 }
4506
4507 - spin_unlock(&mbx->queue_lock);
4508 + spin_unlock_bh(&mbx->queue_lock);
4509 }
4510
4511 static int qlcnic_83xx_check_mbx_status(struct qlcnic_adapter *adapter)
4512 @@ -3896,12 +3896,12 @@ static void qlcnic_83xx_dequeue_mbx_cmd(struct qlcnic_adapter *adapter,
4513 {
4514 struct qlcnic_mailbox *mbx = adapter->ahw->mailbox;
4515
4516 - spin_lock(&mbx->queue_lock);
4517 + spin_lock_bh(&mbx->queue_lock);
4518
4519 list_del(&cmd->list);
4520 mbx->num_cmds--;
4521
4522 - spin_unlock(&mbx->queue_lock);
4523 + spin_unlock_bh(&mbx->queue_lock);
4524
4525 qlcnic_83xx_notify_cmd_completion(adapter, cmd);
4526 }
4527 @@ -3966,7 +3966,7 @@ static int qlcnic_83xx_enqueue_mbx_cmd(struct qlcnic_adapter *adapter,
4528 init_completion(&cmd->completion);
4529 cmd->rsp_opcode = QLC_83XX_MBX_RESPONSE_UNKNOWN;
4530
4531 - spin_lock(&mbx->queue_lock);
4532 + spin_lock_bh(&mbx->queue_lock);
4533
4534 list_add_tail(&cmd->list, &mbx->cmd_q);
4535 mbx->num_cmds++;
4536 @@ -3974,7 +3974,7 @@ static int qlcnic_83xx_enqueue_mbx_cmd(struct qlcnic_adapter *adapter,
4537 *timeout = cmd->total_cmds * QLC_83XX_MBX_TIMEOUT;
4538 queue_work(mbx->work_q, &mbx->work);
4539
4540 - spin_unlock(&mbx->queue_lock);
4541 + spin_unlock_bh(&mbx->queue_lock);
4542
4543 return 0;
4544 }
4545 @@ -4070,15 +4070,15 @@ static void qlcnic_83xx_mailbox_worker(struct work_struct *work)
4546 mbx->rsp_status = QLC_83XX_MBX_RESPONSE_WAIT;
4547 spin_unlock_irqrestore(&mbx->aen_lock, flags);
4548
4549 - spin_lock(&mbx->queue_lock);
4550 + spin_lock_bh(&mbx->queue_lock);
4551
4552 if (list_empty(head)) {
4553 - spin_unlock(&mbx->queue_lock);
4554 + spin_unlock_bh(&mbx->queue_lock);
4555 return;
4556 }
4557 cmd = list_entry(head->next, struct qlcnic_cmd_args, list);
4558
4559 - spin_unlock(&mbx->queue_lock);
4560 + spin_unlock_bh(&mbx->queue_lock);
4561
4562 mbx_ops->encode_cmd(adapter, cmd);
4563 mbx_ops->nofity_fw(adapter, QLC_83XX_MBX_REQUEST);
4564 diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
4565 index 298b74ebc1e9..18e68c91e651 100644
4566 --- a/drivers/net/ethernet/realtek/r8169.c
4567 +++ b/drivers/net/ethernet/realtek/r8169.c
4568 @@ -1387,7 +1387,7 @@ DECLARE_RTL_COND(rtl_ocp_tx_cond)
4569 {
4570 void __iomem *ioaddr = tp->mmio_addr;
4571
4572 - return RTL_R8(IBISR0) & 0x02;
4573 + return RTL_R8(IBISR0) & 0x20;
4574 }
4575
4576 static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
4577 @@ -1395,7 +1395,7 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
4578 void __iomem *ioaddr = tp->mmio_addr;
4579
4580 RTL_W8(IBCR2, RTL_R8(IBCR2) & ~0x01);
4581 - rtl_msleep_loop_wait_low(tp, &rtl_ocp_tx_cond, 50, 2000);
4582 + rtl_msleep_loop_wait_high(tp, &rtl_ocp_tx_cond, 50, 2000);
4583 RTL_W8(IBISR0, RTL_R8(IBISR0) | 0x20);
4584 RTL_W8(IBCR0, RTL_R8(IBCR0) & ~0x01);
4585 }
4586 diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
4587 index db65d9ad4488..e1e5e8438457 100644
4588 --- a/drivers/net/usb/qmi_wwan.c
4589 +++ b/drivers/net/usb/qmi_wwan.c
4590 @@ -944,6 +944,7 @@ static const struct usb_device_id products[] = {
4591 {QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */
4592 {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */
4593 {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */
4594 + {QMI_QUIRK_SET_DTR(0x2c7c, 0x0306, 4)}, /* Quectel EP06 Mini PCIe */
4595
4596 /* 4. Gobi 1000 devices */
4597 {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */
4598 diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c
4599 index 6e5d9095b195..a635fc6b1722 100644
4600 --- a/drivers/net/wireless/broadcom/b43/main.c
4601 +++ b/drivers/net/wireless/broadcom/b43/main.c
4602 @@ -71,8 +71,18 @@ MODULE_FIRMWARE("b43/ucode11.fw");
4603 MODULE_FIRMWARE("b43/ucode13.fw");
4604 MODULE_FIRMWARE("b43/ucode14.fw");
4605 MODULE_FIRMWARE("b43/ucode15.fw");
4606 +MODULE_FIRMWARE("b43/ucode16_lp.fw");
4607 MODULE_FIRMWARE("b43/ucode16_mimo.fw");
4608 +MODULE_FIRMWARE("b43/ucode24_lcn.fw");
4609 +MODULE_FIRMWARE("b43/ucode25_lcn.fw");
4610 +MODULE_FIRMWARE("b43/ucode25_mimo.fw");
4611 +MODULE_FIRMWARE("b43/ucode26_mimo.fw");
4612 +MODULE_FIRMWARE("b43/ucode29_mimo.fw");
4613 +MODULE_FIRMWARE("b43/ucode33_lcn40.fw");
4614 +MODULE_FIRMWARE("b43/ucode30_mimo.fw");
4615 MODULE_FIRMWARE("b43/ucode5.fw");
4616 +MODULE_FIRMWARE("b43/ucode40.fw");
4617 +MODULE_FIRMWARE("b43/ucode42.fw");
4618 MODULE_FIRMWARE("b43/ucode9.fw");
4619
4620 static int modparam_bad_frames_preempt;
4621 diff --git a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
4622 index 866aa3ce1ac9..6cf0006d4c8d 100644
4623 --- a/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
4624 +++ b/drivers/pinctrl/pxa/pinctrl-pxa2xx.c
4625 @@ -436,3 +436,7 @@ int pxa2xx_pinctrl_exit(struct platform_device *pdev)
4626 return 0;
4627 }
4628 EXPORT_SYMBOL_GPL(pxa2xx_pinctrl_exit);
4629 +
4630 +MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>");
4631 +MODULE_DESCRIPTION("Marvell PXA2xx pinctrl driver");
4632 +MODULE_LICENSE("GPL v2");
4633 diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
4634 index f2303f390345..23973a8124fc 100644
4635 --- a/drivers/tty/serial/serial_core.c
4636 +++ b/drivers/tty/serial/serial_core.c
4637 @@ -965,6 +965,8 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port,
4638 }
4639 } else {
4640 retval = uart_startup(tty, state, 1);
4641 + if (retval == 0)
4642 + tty_port_set_initialized(port, true);
4643 if (retval > 0)
4644 retval = 0;
4645 }
4646 diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
4647 index 96a0661011fd..e5b7652234fc 100644
4648 --- a/drivers/vhost/net.c
4649 +++ b/drivers/vhost/net.c
4650 @@ -1078,6 +1078,7 @@ static long vhost_net_reset_owner(struct vhost_net *n)
4651 }
4652 vhost_net_stop(n, &tx_sock, &rx_sock);
4653 vhost_net_flush(n);
4654 + vhost_dev_stop(&n->dev);
4655 vhost_dev_reset_owner(&n->dev, umem);
4656 vhost_net_vq_reset(n);
4657 done:
4658 diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
4659 index 6e84b2cae6ad..442b54a14cbc 100644
4660 --- a/include/linux/fdtable.h
4661 +++ b/include/linux/fdtable.h
4662 @@ -9,6 +9,7 @@
4663 #include <linux/compiler.h>
4664 #include <linux/spinlock.h>
4665 #include <linux/rcupdate.h>
4666 +#include <linux/nospec.h>
4667 #include <linux/types.h>
4668 #include <linux/init.h>
4669 #include <linux/fs.h>
4670 @@ -81,8 +82,10 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i
4671 {
4672 struct fdtable *fdt = rcu_dereference_raw(files->fdt);
4673
4674 - if (fd < fdt->max_fds)
4675 + if (fd < fdt->max_fds) {
4676 + fd = array_index_nospec(fd, fdt->max_fds);
4677 return rcu_dereference_raw(fdt->fd[fd]);
4678 + }
4679 return NULL;
4680 }
4681
4682 diff --git a/include/linux/init.h b/include/linux/init.h
4683 index e30104ceb86d..8e346d1bd837 100644
4684 --- a/include/linux/init.h
4685 +++ b/include/linux/init.h
4686 @@ -4,6 +4,13 @@
4687 #include <linux/compiler.h>
4688 #include <linux/types.h>
4689
4690 +/* Built-in __init functions needn't be compiled with retpoline */
4691 +#if defined(RETPOLINE) && !defined(MODULE)
4692 +#define __noretpoline __attribute__((indirect_branch("keep")))
4693 +#else
4694 +#define __noretpoline
4695 +#endif
4696 +
4697 /* These macros are used to mark some functions or
4698 * initialized data (doesn't apply to uninitialized data)
4699 * as `initialization' functions. The kernel can take this
4700 @@ -39,7 +46,7 @@
4701
4702 /* These are for everybody (although not all archs will actually
4703 discard it in modules) */
4704 -#define __init __section(.init.text) __cold notrace __latent_entropy
4705 +#define __init __section(.init.text) __cold notrace __latent_entropy __noretpoline
4706 #define __initdata __section(.init.data)
4707 #define __initconst __section(.init.rodata)
4708 #define __exitdata __section(.exit.data)
4709 diff --git a/include/linux/module.h b/include/linux/module.h
4710 index 0c3207d26ac0..d2224a09b4b5 100644
4711 --- a/include/linux/module.h
4712 +++ b/include/linux/module.h
4713 @@ -791,6 +791,15 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr,
4714 static inline void module_bug_cleanup(struct module *mod) {}
4715 #endif /* CONFIG_GENERIC_BUG */
4716
4717 +#ifdef RETPOLINE
4718 +extern bool retpoline_module_ok(bool has_retpoline);
4719 +#else
4720 +static inline bool retpoline_module_ok(bool has_retpoline)
4721 +{
4722 + return true;
4723 +}
4724 +#endif
4725 +
4726 #ifdef CONFIG_MODULE_SIG
4727 static inline bool module_sig_ok(struct module *module)
4728 {
4729 diff --git a/include/linux/nospec.h b/include/linux/nospec.h
4730 new file mode 100644
4731 index 000000000000..b99bced39ac2
4732 --- /dev/null
4733 +++ b/include/linux/nospec.h
4734 @@ -0,0 +1,72 @@
4735 +// SPDX-License-Identifier: GPL-2.0
4736 +// Copyright(c) 2018 Linus Torvalds. All rights reserved.
4737 +// Copyright(c) 2018 Alexei Starovoitov. All rights reserved.
4738 +// Copyright(c) 2018 Intel Corporation. All rights reserved.
4739 +
4740 +#ifndef _LINUX_NOSPEC_H
4741 +#define _LINUX_NOSPEC_H
4742 +
4743 +/**
4744 + * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
4745 + * @index: array element index
4746 + * @size: number of elements in array
4747 + *
4748 + * When @index is out of bounds (@index >= @size), the sign bit will be
4749 + * set. Extend the sign bit to all bits and invert, giving a result of
4750 + * zero for an out of bounds index, or ~0 if within bounds [0, @size).
4751 + */
4752 +#ifndef array_index_mask_nospec
4753 +static inline unsigned long array_index_mask_nospec(unsigned long index,
4754 + unsigned long size)
4755 +{
4756 + /*
4757 + * Warn developers about inappropriate array_index_nospec() usage.
4758 + *
4759 + * Even if the CPU speculates past the WARN_ONCE branch, the
4760 + * sign bit of @index is taken into account when generating the
4761 + * mask.
4762 + *
4763 + * This warning is compiled out when the compiler can infer that
4764 + * @index and @size are less than LONG_MAX.
4765 + */
4766 + if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX,
4767 + "array_index_nospec() limited to range of [0, LONG_MAX]\n"))
4768 + return 0;
4769 +
4770 + /*
4771 + * Always calculate and emit the mask even if the compiler
4772 + * thinks the mask is not needed. The compiler does not take
4773 + * into account the value of @index under speculation.
4774 + */
4775 + OPTIMIZER_HIDE_VAR(index);
4776 + return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1);
4777 +}
4778 +#endif
4779 +
4780 +/*
4781 + * array_index_nospec - sanitize an array index after a bounds check
4782 + *
4783 + * For a code sequence like:
4784 + *
4785 + * if (index < size) {
4786 + * index = array_index_nospec(index, size);
4787 + * val = array[index];
4788 + * }
4789 + *
4790 + * ...if the CPU speculates past the bounds check then
4791 + * array_index_nospec() will clamp the index within the range of [0,
4792 + * size).
4793 + */
4794 +#define array_index_nospec(index, size) \
4795 +({ \
4796 + typeof(index) _i = (index); \
4797 + typeof(size) _s = (size); \
4798 + unsigned long _mask = array_index_mask_nospec(_i, _s); \
4799 + \
4800 + BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \
4801 + BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \
4802 + \
4803 + _i &= _mask; \
4804 + _i; \
4805 +})
4806 +#endif /* _LINUX_NOSPEC_H */
4807 diff --git a/kernel/module.c b/kernel/module.c
4808 index 0e54d5bf0097..07bfb9971f2f 100644
4809 --- a/kernel/module.c
4810 +++ b/kernel/module.c
4811 @@ -2817,6 +2817,15 @@ static int check_modinfo_livepatch(struct module *mod, struct load_info *info)
4812 }
4813 #endif /* CONFIG_LIVEPATCH */
4814
4815 +static void check_modinfo_retpoline(struct module *mod, struct load_info *info)
4816 +{
4817 + if (retpoline_module_ok(get_modinfo(info, "retpoline")))
4818 + return;
4819 +
4820 + pr_warn("%s: loading module not compiled with retpoline compiler.\n",
4821 + mod->name);
4822 +}
4823 +
4824 /* Sets info->hdr and info->len. */
4825 static int copy_module_from_user(const void __user *umod, unsigned long len,
4826 struct load_info *info)
4827 @@ -2969,6 +2978,8 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags)
4828 add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK);
4829 }
4830
4831 + check_modinfo_retpoline(mod, info);
4832 +
4833 if (get_modinfo(info, "staging")) {
4834 add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK);
4835 pr_warn("%s: module is from the staging directory, the quality "
4836 diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
4837 index 77f396b679ce..5dce4291f0ed 100644
4838 --- a/net/core/sock_reuseport.c
4839 +++ b/net/core/sock_reuseport.c
4840 @@ -93,6 +93,16 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
4841 return more_reuse;
4842 }
4843
4844 +static void reuseport_free_rcu(struct rcu_head *head)
4845 +{
4846 + struct sock_reuseport *reuse;
4847 +
4848 + reuse = container_of(head, struct sock_reuseport, rcu);
4849 + if (reuse->prog)
4850 + bpf_prog_destroy(reuse->prog);
4851 + kfree(reuse);
4852 +}
4853 +
4854 /**
4855 * reuseport_add_sock - Add a socket to the reuseport group of another.
4856 * @sk: New socket to add to the group.
4857 @@ -101,7 +111,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
4858 */
4859 int reuseport_add_sock(struct sock *sk, struct sock *sk2)
4860 {
4861 - struct sock_reuseport *reuse;
4862 + struct sock_reuseport *old_reuse, *reuse;
4863
4864 if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
4865 int err = reuseport_alloc(sk2);
4866 @@ -112,10 +122,13 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2)
4867
4868 spin_lock_bh(&reuseport_lock);
4869 reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
4870 - lockdep_is_held(&reuseport_lock)),
4871 - WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
4872 - lockdep_is_held(&reuseport_lock)),
4873 - "socket already in reuseport group");
4874 + lockdep_is_held(&reuseport_lock));
4875 + old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
4876 + lockdep_is_held(&reuseport_lock));
4877 + if (old_reuse && old_reuse->num_socks != 1) {
4878 + spin_unlock_bh(&reuseport_lock);
4879 + return -EBUSY;
4880 + }
4881
4882 if (reuse->num_socks == reuse->max_socks) {
4883 reuse = reuseport_grow(reuse);
4884 @@ -133,19 +146,11 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2)
4885
4886 spin_unlock_bh(&reuseport_lock);
4887
4888 + if (old_reuse)
4889 + call_rcu(&old_reuse->rcu, reuseport_free_rcu);
4890 return 0;
4891 }
4892
4893 -static void reuseport_free_rcu(struct rcu_head *head)
4894 -{
4895 - struct sock_reuseport *reuse;
4896 -
4897 - reuse = container_of(head, struct sock_reuseport, rcu);
4898 - if (reuse->prog)
4899 - bpf_prog_destroy(reuse->prog);
4900 - kfree(reuse);
4901 -}
4902 -
4903 void reuseport_detach_sock(struct sock *sk)
4904 {
4905 struct sock_reuseport *reuse;
4906 diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
4907 index 9c7a4cea1628..7f5fe07d0b13 100644
4908 --- a/net/ipv4/igmp.c
4909 +++ b/net/ipv4/igmp.c
4910 @@ -386,7 +386,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
4911 pip->frag_off = htons(IP_DF);
4912 pip->ttl = 1;
4913 pip->daddr = fl4.daddr;
4914 +
4915 + rcu_read_lock();
4916 pip->saddr = igmpv3_get_srcaddr(dev, &fl4);
4917 + rcu_read_unlock();
4918 +
4919 pip->protocol = IPPROTO_IGMP;
4920 pip->tot_len = 0; /* filled in later */
4921 ip_select_ident(net, skb, NULL);
4922 diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
4923 index 7efa6b062049..0d1a767db1bb 100644
4924 --- a/net/ipv4/tcp.c
4925 +++ b/net/ipv4/tcp.c
4926 @@ -2316,6 +2316,12 @@ int tcp_disconnect(struct sock *sk, int flags)
4927
4928 WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
4929
4930 + if (sk->sk_frag.page) {
4931 + put_page(sk->sk_frag.page);
4932 + sk->sk_frag.page = NULL;
4933 + sk->sk_frag.offset = 0;
4934 + }
4935 +
4936 sk->sk_error_report(sk);
4937 return err;
4938 }
4939 diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
4940 index e86a34fd5484..8ec60532be2b 100644
4941 --- a/net/ipv4/tcp_bbr.c
4942 +++ b/net/ipv4/tcp_bbr.c
4943 @@ -452,7 +452,8 @@ static void bbr_advance_cycle_phase(struct sock *sk)
4944
4945 bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
4946 bbr->cycle_mstamp = tp->delivered_mstamp;
4947 - bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx];
4948 + bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT :
4949 + bbr_pacing_gain[bbr->cycle_idx];
4950 }
4951
4952 /* Gain cycling: cycle pacing gain to converge to fair share of available bw. */
4953 @@ -461,8 +462,7 @@ static void bbr_update_cycle_phase(struct sock *sk,
4954 {
4955 struct bbr *bbr = inet_csk_ca(sk);
4956
4957 - if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw &&
4958 - bbr_is_next_cycle_phase(sk, rs))
4959 + if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs))
4960 bbr_advance_cycle_phase(sk);
4961 }
4962
4963 diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
4964 index 5cad76f87536..421379014995 100644
4965 --- a/net/ipv6/af_inet6.c
4966 +++ b/net/ipv6/af_inet6.c
4967 @@ -274,6 +274,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
4968 struct net *net = sock_net(sk);
4969 __be32 v4addr = 0;
4970 unsigned short snum;
4971 + bool saved_ipv6only;
4972 int addr_type = 0;
4973 int err = 0;
4974
4975 @@ -378,19 +379,21 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
4976 if (!(addr_type & IPV6_ADDR_MULTICAST))
4977 np->saddr = addr->sin6_addr;
4978
4979 + saved_ipv6only = sk->sk_ipv6only;
4980 + if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED)
4981 + sk->sk_ipv6only = 1;
4982 +
4983 /* Make sure we are allowed to bind here. */
4984 if ((snum || !inet->bind_address_no_port) &&
4985 sk->sk_prot->get_port(sk, snum)) {
4986 + sk->sk_ipv6only = saved_ipv6only;
4987 inet_reset_saddr(sk);
4988 err = -EADDRINUSE;
4989 goto out;
4990 }
4991
4992 - if (addr_type != IPV6_ADDR_ANY) {
4993 + if (addr_type != IPV6_ADDR_ANY)
4994 sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
4995 - if (addr_type != IPV6_ADDR_MAPPED)
4996 - sk->sk_ipv6only = 1;
4997 - }
4998 if (snum)
4999 sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
5000 inet->inet_sport = htons(inet->inet_num);
5001 diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
5002 index 117405dd07a3..a30e7e925c9b 100644
5003 --- a/net/ipv6/ip6mr.c
5004 +++ b/net/ipv6/ip6mr.c
5005 @@ -495,6 +495,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
5006 return ERR_PTR(-ENOENT);
5007
5008 it->mrt = mrt;
5009 + it->cache = NULL;
5010 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
5011 : SEQ_START_TOKEN;
5012 }
5013 diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
5014 index ae83c3aec308..da574a16e7b3 100644
5015 --- a/net/sched/cls_u32.c
5016 +++ b/net/sched/cls_u32.c
5017 @@ -496,6 +496,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
5018 static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
5019 u32 flags)
5020 {
5021 + struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
5022 struct net_device *dev = tp->q->dev_queue->dev;
5023 struct tc_cls_u32_offload u32_offload = {0};
5024 struct tc_to_netdev offload;
5025 @@ -520,7 +521,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
5026 offload.cls_u32->knode.sel = &n->sel;
5027 offload.cls_u32->knode.exts = &n->exts;
5028 if (n->ht_down)
5029 - offload.cls_u32->knode.link_handle = n->ht_down->handle;
5030 + offload.cls_u32->knode.link_handle = ht->handle;
5031
5032 err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
5033 tp->protocol, &offload);
5034 @@ -788,8 +789,9 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
5035 static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
5036 struct tc_u_knode *n)
5037 {
5038 - struct tc_u_knode *new;
5039 + struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
5040 struct tc_u32_sel *s = &n->sel;
5041 + struct tc_u_knode *new;
5042
5043 new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
5044 GFP_KERNEL);
5045 @@ -807,11 +809,11 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
5046 new->fshift = n->fshift;
5047 new->res = n->res;
5048 new->flags = n->flags;
5049 - RCU_INIT_POINTER(new->ht_down, n->ht_down);
5050 + RCU_INIT_POINTER(new->ht_down, ht);
5051
5052 /* bump reference count as long as we hold pointer to structure */
5053 - if (new->ht_down)
5054 - new->ht_down->refcnt++;
5055 + if (ht)
5056 + ht->refcnt++;
5057
5058 #ifdef CONFIG_CLS_U32_PERF
5059 /* Statistics may be incremented by readers during update
5060 diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
5061 index c626f679e1c8..91722e97cdd5 100644
5062 --- a/net/wireless/nl80211.c
5063 +++ b/net/wireless/nl80211.c
5064 @@ -16,6 +16,7 @@
5065 #include <linux/nl80211.h>
5066 #include <linux/rtnetlink.h>
5067 #include <linux/netlink.h>
5068 +#include <linux/nospec.h>
5069 #include <linux/etherdevice.h>
5070 #include <net/net_namespace.h>
5071 #include <net/genetlink.h>
5072 @@ -2014,20 +2015,22 @@ static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = {
5073 static int parse_txq_params(struct nlattr *tb[],
5074 struct ieee80211_txq_params *txq_params)
5075 {
5076 + u8 ac;
5077 +
5078 if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] ||
5079 !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] ||
5080 !tb[NL80211_TXQ_ATTR_AIFS])
5081 return -EINVAL;
5082
5083 - txq_params->ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]);
5084 + ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]);
5085 txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]);
5086 txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]);
5087 txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]);
5088 txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]);
5089
5090 - if (txq_params->ac >= NL80211_NUM_ACS)
5091 + if (ac >= NL80211_NUM_ACS)
5092 return -EINVAL;
5093 -
5094 + txq_params->ac = array_index_nospec(ac, NL80211_NUM_ACS);
5095 return 0;
5096 }
5097
5098 diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
5099 index 845eb9b800f3..238db4ffd30c 100644
5100 --- a/scripts/mod/modpost.c
5101 +++ b/scripts/mod/modpost.c
5102 @@ -2130,6 +2130,14 @@ static void add_intree_flag(struct buffer *b, int is_intree)
5103 buf_printf(b, "\nMODULE_INFO(intree, \"Y\");\n");
5104 }
5105
5106 +/* Cannot check for assembler */
5107 +static void add_retpoline(struct buffer *b)
5108 +{
5109 + buf_printf(b, "\n#ifdef RETPOLINE\n");
5110 + buf_printf(b, "MODULE_INFO(retpoline, \"Y\");\n");
5111 + buf_printf(b, "#endif\n");
5112 +}
5113 +
5114 static void add_staging_flag(struct buffer *b, const char *name)
5115 {
5116 static const char *staging_dir = "drivers/staging";
5117 @@ -2474,6 +2482,7 @@ int main(int argc, char **argv)
5118
5119 add_header(&buf, mod);
5120 add_intree_flag(&buf, !external_module);
5121 + add_retpoline(&buf);
5122 add_staging_flag(&buf, mod->name);
5123 err |= add_versions(&buf, mod);
5124 add_depends(&buf, mod, modules);
5125 diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
5126 index a871159bf03c..ead2fd60244d 100644
5127 --- a/security/keys/encrypted-keys/encrypted.c
5128 +++ b/security/keys/encrypted-keys/encrypted.c
5129 @@ -141,23 +141,22 @@ static int valid_ecryptfs_desc(const char *ecryptfs_desc)
5130 */
5131 static int valid_master_desc(const char *new_desc, const char *orig_desc)
5132 {
5133 - if (!memcmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) {
5134 - if (strlen(new_desc) == KEY_TRUSTED_PREFIX_LEN)
5135 - goto out;
5136 - if (orig_desc)
5137 - if (memcmp(new_desc, orig_desc, KEY_TRUSTED_PREFIX_LEN))
5138 - goto out;
5139 - } else if (!memcmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) {
5140 - if (strlen(new_desc) == KEY_USER_PREFIX_LEN)
5141 - goto out;
5142 - if (orig_desc)
5143 - if (memcmp(new_desc, orig_desc, KEY_USER_PREFIX_LEN))
5144 - goto out;
5145 - } else
5146 - goto out;
5147 + int prefix_len;
5148 +
5149 + if (!strncmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN))
5150 + prefix_len = KEY_TRUSTED_PREFIX_LEN;
5151 + else if (!strncmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN))
5152 + prefix_len = KEY_USER_PREFIX_LEN;
5153 + else
5154 + return -EINVAL;
5155 +
5156 + if (!new_desc[prefix_len])
5157 + return -EINVAL;
5158 +
5159 + if (orig_desc && strncmp(new_desc, orig_desc, prefix_len))
5160 + return -EINVAL;
5161 +
5162 return 0;
5163 -out:
5164 - return -EINVAL;
5165 }
5166
5167 /*
5168 diff --git a/sound/soc/codecs/pcm512x-spi.c b/sound/soc/codecs/pcm512x-spi.c
5169 index 712ed6598c48..ebdf9bd5a64c 100644
5170 --- a/sound/soc/codecs/pcm512x-spi.c
5171 +++ b/sound/soc/codecs/pcm512x-spi.c
5172 @@ -70,3 +70,7 @@ static struct spi_driver pcm512x_spi_driver = {
5173 };
5174
5175 module_spi_driver(pcm512x_spi_driver);
5176 +
5177 +MODULE_DESCRIPTION("ASoC PCM512x codec driver - SPI");
5178 +MODULE_AUTHOR("Mark Brown <broonie@kernel.org>");
5179 +MODULE_LICENSE("GPL v2");
5180 diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c
5181 index f608f8d23f3d..dd88c2cb6470 100644
5182 --- a/sound/soc/generic/simple-card.c
5183 +++ b/sound/soc/generic/simple-card.c
5184 @@ -232,13 +232,19 @@ static int asoc_simple_card_dai_link_of(struct device_node *node,
5185 snprintf(prop, sizeof(prop), "%scpu", prefix);
5186 cpu = of_get_child_by_name(node, prop);
5187
5188 + if (!cpu) {
5189 + ret = -EINVAL;
5190 + dev_err(dev, "%s: Can't find %s DT node\n", __func__, prop);
5191 + goto dai_link_of_err;
5192 + }
5193 +
5194 snprintf(prop, sizeof(prop), "%splat", prefix);
5195 plat = of_get_child_by_name(node, prop);
5196
5197 snprintf(prop, sizeof(prop), "%scodec", prefix);
5198 codec = of_get_child_by_name(node, prop);
5199
5200 - if (!cpu || !codec) {
5201 + if (!codec) {
5202 ret = -EINVAL;
5203 dev_err(dev, "%s: Can't find %s DT node\n", __func__, prop);
5204 goto dai_link_of_err;
5205 diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c
5206 index 560cf4b51a99..a9a43acce30e 100644
5207 --- a/sound/soc/sh/rcar/ssi.c
5208 +++ b/sound/soc/sh/rcar/ssi.c
5209 @@ -699,9 +699,14 @@ static int rsnd_ssi_dma_remove(struct rsnd_mod *mod,
5210 struct rsnd_priv *priv)
5211 {
5212 struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod);
5213 + struct rsnd_mod *pure_ssi_mod = rsnd_io_to_mod_ssi(io);
5214 struct device *dev = rsnd_priv_to_dev(priv);
5215 int irq = ssi->irq;
5216
5217 + /* Do nothing if non SSI (= SSI parent, multi SSI) mod */
5218 + if (pure_ssi_mod != mod)
5219 + return 0;
5220 +
5221 /* PIO will request IRQ again */
5222 devm_free_irq(dev, irq, mod);
5223