Annotation of /trunk/kernel-alx/patches-4.19/0142-4.19.43-all-fixes.patch
Parent Directory | Revision Log
Revision 3421 -
(hide annotations)
(download)
Fri Aug 2 11:47:50 2019 UTC (5 years, 1 month ago) by niro
File size: 140299 byte(s)
Fri Aug 2 11:47:50 2019 UTC (5 years, 1 month ago) by niro
File size: 140299 byte(s)
-linux-4.19.43
1 | niro | 3421 | diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu |
2 | index 73318225a368..8718d4ad227b 100644 | ||
3 | --- a/Documentation/ABI/testing/sysfs-devices-system-cpu | ||
4 | +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu | ||
5 | @@ -477,6 +477,7 @@ What: /sys/devices/system/cpu/vulnerabilities | ||
6 | /sys/devices/system/cpu/vulnerabilities/spectre_v2 | ||
7 | /sys/devices/system/cpu/vulnerabilities/spec_store_bypass | ||
8 | /sys/devices/system/cpu/vulnerabilities/l1tf | ||
9 | + /sys/devices/system/cpu/vulnerabilities/mds | ||
10 | Date: January 2018 | ||
11 | Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> | ||
12 | Description: Information about CPU vulnerabilities | ||
13 | @@ -489,8 +490,7 @@ Description: Information about CPU vulnerabilities | ||
14 | "Vulnerable" CPU is affected and no mitigation in effect | ||
15 | "Mitigation: $M" CPU is affected and mitigation $M is in effect | ||
16 | |||
17 | - Details about the l1tf file can be found in | ||
18 | - Documentation/admin-guide/l1tf.rst | ||
19 | + See also: Documentation/admin-guide/hw-vuln/index.rst | ||
20 | |||
21 | What: /sys/devices/system/cpu/smt | ||
22 | /sys/devices/system/cpu/smt/active | ||
23 | diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst | ||
24 | new file mode 100644 | ||
25 | index 000000000000..ffc064c1ec68 | ||
26 | --- /dev/null | ||
27 | +++ b/Documentation/admin-guide/hw-vuln/index.rst | ||
28 | @@ -0,0 +1,13 @@ | ||
29 | +======================== | ||
30 | +Hardware vulnerabilities | ||
31 | +======================== | ||
32 | + | ||
33 | +This section describes CPU vulnerabilities and provides an overview of the | ||
34 | +possible mitigations along with guidance for selecting mitigations if they | ||
35 | +are configurable at compile, boot or run time. | ||
36 | + | ||
37 | +.. toctree:: | ||
38 | + :maxdepth: 1 | ||
39 | + | ||
40 | + l1tf | ||
41 | + mds | ||
42 | diff --git a/Documentation/admin-guide/hw-vuln/l1tf.rst b/Documentation/admin-guide/hw-vuln/l1tf.rst | ||
43 | new file mode 100644 | ||
44 | index 000000000000..31653a9f0e1b | ||
45 | --- /dev/null | ||
46 | +++ b/Documentation/admin-guide/hw-vuln/l1tf.rst | ||
47 | @@ -0,0 +1,615 @@ | ||
48 | +L1TF - L1 Terminal Fault | ||
49 | +======================== | ||
50 | + | ||
51 | +L1 Terminal Fault is a hardware vulnerability which allows unprivileged | ||
52 | +speculative access to data which is available in the Level 1 Data Cache | ||
53 | +when the page table entry controlling the virtual address, which is used | ||
54 | +for the access, has the Present bit cleared or other reserved bits set. | ||
55 | + | ||
56 | +Affected processors | ||
57 | +------------------- | ||
58 | + | ||
59 | +This vulnerability affects a wide range of Intel processors. The | ||
60 | +vulnerability is not present on: | ||
61 | + | ||
62 | + - Processors from AMD, Centaur and other non Intel vendors | ||
63 | + | ||
64 | + - Older processor models, where the CPU family is < 6 | ||
65 | + | ||
66 | + - A range of Intel ATOM processors (Cedarview, Cloverview, Lincroft, | ||
67 | + Penwell, Pineview, Silvermont, Airmont, Merrifield) | ||
68 | + | ||
69 | + - The Intel XEON PHI family | ||
70 | + | ||
71 | + - Intel processors which have the ARCH_CAP_RDCL_NO bit set in the | ||
72 | + IA32_ARCH_CAPABILITIES MSR. If the bit is set the CPU is not affected | ||
73 | + by the Meltdown vulnerability either. These CPUs should become | ||
74 | + available by end of 2018. | ||
75 | + | ||
76 | +Whether a processor is affected or not can be read out from the L1TF | ||
77 | +vulnerability file in sysfs. See :ref:`l1tf_sys_info`. | ||
78 | + | ||
79 | +Related CVEs | ||
80 | +------------ | ||
81 | + | ||
82 | +The following CVE entries are related to the L1TF vulnerability: | ||
83 | + | ||
84 | + ============= ================= ============================== | ||
85 | + CVE-2018-3615 L1 Terminal Fault SGX related aspects | ||
86 | + CVE-2018-3620 L1 Terminal Fault OS, SMM related aspects | ||
87 | + CVE-2018-3646 L1 Terminal Fault Virtualization related aspects | ||
88 | + ============= ================= ============================== | ||
89 | + | ||
90 | +Problem | ||
91 | +------- | ||
92 | + | ||
93 | +If an instruction accesses a virtual address for which the relevant page | ||
94 | +table entry (PTE) has the Present bit cleared or other reserved bits set, | ||
95 | +then speculative execution ignores the invalid PTE and loads the referenced | ||
96 | +data if it is present in the Level 1 Data Cache, as if the page referenced | ||
97 | +by the address bits in the PTE was still present and accessible. | ||
98 | + | ||
99 | +While this is a purely speculative mechanism and the instruction will raise | ||
100 | +a page fault when it is retired eventually, the pure act of loading the | ||
101 | +data and making it available to other speculative instructions opens up the | ||
102 | +opportunity for side channel attacks to unprivileged malicious code, | ||
103 | +similar to the Meltdown attack. | ||
104 | + | ||
105 | +While Meltdown breaks the user space to kernel space protection, L1TF | ||
106 | +allows to attack any physical memory address in the system and the attack | ||
107 | +works across all protection domains. It allows an attack of SGX and also | ||
108 | +works from inside virtual machines because the speculation bypasses the | ||
109 | +extended page table (EPT) protection mechanism. | ||
110 | + | ||
111 | + | ||
112 | +Attack scenarios | ||
113 | +---------------- | ||
114 | + | ||
115 | +1. Malicious user space | ||
116 | +^^^^^^^^^^^^^^^^^^^^^^^ | ||
117 | + | ||
118 | + Operating Systems store arbitrary information in the address bits of a | ||
119 | + PTE which is marked non present. This allows a malicious user space | ||
120 | + application to attack the physical memory to which these PTEs resolve. | ||
121 | + In some cases user-space can maliciously influence the information | ||
122 | + encoded in the address bits of the PTE, thus making attacks more | ||
123 | + deterministic and more practical. | ||
124 | + | ||
125 | + The Linux kernel contains a mitigation for this attack vector, PTE | ||
126 | + inversion, which is permanently enabled and has no performance | ||
127 | + impact. The kernel ensures that the address bits of PTEs, which are not | ||
128 | + marked present, never point to cacheable physical memory space. | ||
129 | + | ||
130 | + A system with an up to date kernel is protected against attacks from | ||
131 | + malicious user space applications. | ||
132 | + | ||
133 | +2. Malicious guest in a virtual machine | ||
134 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
135 | + | ||
136 | + The fact that L1TF breaks all domain protections allows malicious guest | ||
137 | + OSes, which can control the PTEs directly, and malicious guest user | ||
138 | + space applications, which run on an unprotected guest kernel lacking the | ||
139 | + PTE inversion mitigation for L1TF, to attack physical host memory. | ||
140 | + | ||
141 | + A special aspect of L1TF in the context of virtualization is symmetric | ||
142 | + multi threading (SMT). The Intel implementation of SMT is called | ||
143 | + HyperThreading. The fact that Hyperthreads on the affected processors | ||
144 | + share the L1 Data Cache (L1D) is important for this. As the flaw allows | ||
145 | + only to attack data which is present in L1D, a malicious guest running | ||
146 | + on one Hyperthread can attack the data which is brought into the L1D by | ||
147 | + the context which runs on the sibling Hyperthread of the same physical | ||
148 | + core. This context can be host OS, host user space or a different guest. | ||
149 | + | ||
150 | + If the processor does not support Extended Page Tables, the attack is | ||
151 | + only possible, when the hypervisor does not sanitize the content of the | ||
152 | + effective (shadow) page tables. | ||
153 | + | ||
154 | + While solutions exist to mitigate these attack vectors fully, these | ||
155 | + mitigations are not enabled by default in the Linux kernel because they | ||
156 | + can affect performance significantly. The kernel provides several | ||
157 | + mechanisms which can be utilized to address the problem depending on the | ||
158 | + deployment scenario. The mitigations, their protection scope and impact | ||
159 | + are described in the next sections. | ||
160 | + | ||
161 | + The default mitigations and the rationale for choosing them are explained | ||
162 | + at the end of this document. See :ref:`default_mitigations`. | ||
163 | + | ||
164 | +.. _l1tf_sys_info: | ||
165 | + | ||
166 | +L1TF system information | ||
167 | +----------------------- | ||
168 | + | ||
169 | +The Linux kernel provides a sysfs interface to enumerate the current L1TF | ||
170 | +status of the system: whether the system is vulnerable, and which | ||
171 | +mitigations are active. The relevant sysfs file is: | ||
172 | + | ||
173 | +/sys/devices/system/cpu/vulnerabilities/l1tf | ||
174 | + | ||
175 | +The possible values in this file are: | ||
176 | + | ||
177 | + =========================== =============================== | ||
178 | + 'Not affected' The processor is not vulnerable | ||
179 | + 'Mitigation: PTE Inversion' The host protection is active | ||
180 | + =========================== =============================== | ||
181 | + | ||
182 | +If KVM/VMX is enabled and the processor is vulnerable then the following | ||
183 | +information is appended to the 'Mitigation: PTE Inversion' part: | ||
184 | + | ||
185 | + - SMT status: | ||
186 | + | ||
187 | + ===================== ================ | ||
188 | + 'VMX: SMT vulnerable' SMT is enabled | ||
189 | + 'VMX: SMT disabled' SMT is disabled | ||
190 | + ===================== ================ | ||
191 | + | ||
192 | + - L1D Flush mode: | ||
193 | + | ||
194 | + ================================ ==================================== | ||
195 | + 'L1D vulnerable' L1D flushing is disabled | ||
196 | + | ||
197 | + 'L1D conditional cache flushes' L1D flush is conditionally enabled | ||
198 | + | ||
199 | + 'L1D cache flushes' L1D flush is unconditionally enabled | ||
200 | + ================================ ==================================== | ||
201 | + | ||
202 | +The resulting grade of protection is discussed in the following sections. | ||
203 | + | ||
204 | + | ||
205 | +Host mitigation mechanism | ||
206 | +------------------------- | ||
207 | + | ||
208 | +The kernel is unconditionally protected against L1TF attacks from malicious | ||
209 | +user space running on the host. | ||
210 | + | ||
211 | + | ||
212 | +Guest mitigation mechanisms | ||
213 | +--------------------------- | ||
214 | + | ||
215 | +.. _l1d_flush: | ||
216 | + | ||
217 | +1. L1D flush on VMENTER | ||
218 | +^^^^^^^^^^^^^^^^^^^^^^^ | ||
219 | + | ||
220 | + To make sure that a guest cannot attack data which is present in the L1D | ||
221 | + the hypervisor flushes the L1D before entering the guest. | ||
222 | + | ||
223 | + Flushing the L1D evicts not only the data which should not be accessed | ||
224 | + by a potentially malicious guest, it also flushes the guest | ||
225 | + data. Flushing the L1D has a performance impact as the processor has to | ||
226 | + bring the flushed guest data back into the L1D. Depending on the | ||
227 | + frequency of VMEXIT/VMENTER and the type of computations in the guest | ||
228 | + performance degradation in the range of 1% to 50% has been observed. For | ||
229 | + scenarios where guest VMEXIT/VMENTER are rare the performance impact is | ||
230 | + minimal. Virtio and mechanisms like posted interrupts are designed to | ||
231 | + confine the VMEXITs to a bare minimum, but specific configurations and | ||
232 | + application scenarios might still suffer from a high VMEXIT rate. | ||
233 | + | ||
234 | + The kernel provides two L1D flush modes: | ||
235 | + - conditional ('cond') | ||
236 | + - unconditional ('always') | ||
237 | + | ||
238 | + The conditional mode avoids L1D flushing after VMEXITs which execute | ||
239 | + only audited code paths before the corresponding VMENTER. These code | ||
240 | + paths have been verified that they cannot expose secrets or other | ||
241 | + interesting data to an attacker, but they can leak information about the | ||
242 | + address space layout of the hypervisor. | ||
243 | + | ||
244 | + Unconditional mode flushes L1D on all VMENTER invocations and provides | ||
245 | + maximum protection. It has a higher overhead than the conditional | ||
246 | + mode. The overhead cannot be quantified correctly as it depends on the | ||
247 | + workload scenario and the resulting number of VMEXITs. | ||
248 | + | ||
249 | + The general recommendation is to enable L1D flush on VMENTER. The kernel | ||
250 | + defaults to conditional mode on affected processors. | ||
251 | + | ||
252 | + **Note**, that L1D flush does not prevent the SMT problem because the | ||
253 | + sibling thread will also bring back its data into the L1D which makes it | ||
254 | + attackable again. | ||
255 | + | ||
256 | + L1D flush can be controlled by the administrator via the kernel command | ||
257 | + line and sysfs control files. See :ref:`mitigation_control_command_line` | ||
258 | + and :ref:`mitigation_control_kvm`. | ||
259 | + | ||
260 | +.. _guest_confinement: | ||
261 | + | ||
262 | +2. Guest VCPU confinement to dedicated physical cores | ||
263 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
264 | + | ||
265 | + To address the SMT problem, it is possible to make a guest or a group of | ||
266 | + guests affine to one or more physical cores. The proper mechanism for | ||
267 | + that is to utilize exclusive cpusets to ensure that no other guest or | ||
268 | + host tasks can run on these cores. | ||
269 | + | ||
270 | + If only a single guest or related guests run on sibling SMT threads on | ||
271 | + the same physical core then they can only attack their own memory and | ||
272 | + restricted parts of the host memory. | ||
273 | + | ||
274 | + Host memory is attackable, when one of the sibling SMT threads runs in | ||
275 | + host OS (hypervisor) context and the other in guest context. The amount | ||
276 | + of valuable information from the host OS context depends on the context | ||
277 | + which the host OS executes, i.e. interrupts, soft interrupts and kernel | ||
278 | + threads. The amount of valuable data from these contexts cannot be | ||
279 | + declared as non-interesting for an attacker without deep inspection of | ||
280 | + the code. | ||
281 | + | ||
282 | + **Note**, that assigning guests to a fixed set of physical cores affects | ||
283 | + the ability of the scheduler to do load balancing and might have | ||
284 | + negative effects on CPU utilization depending on the hosting | ||
285 | + scenario. Disabling SMT might be a viable alternative for particular | ||
286 | + scenarios. | ||
287 | + | ||
288 | + For further information about confining guests to a single or to a group | ||
289 | + of cores consult the cpusets documentation: | ||
290 | + | ||
291 | + https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt | ||
292 | + | ||
293 | +.. _interrupt_isolation: | ||
294 | + | ||
295 | +3. Interrupt affinity | ||
296 | +^^^^^^^^^^^^^^^^^^^^^ | ||
297 | + | ||
298 | + Interrupts can be made affine to logical CPUs. This is not universally | ||
299 | + true because there are types of interrupts which are truly per CPU | ||
300 | + interrupts, e.g. the local timer interrupt. Aside of that multi queue | ||
301 | + devices affine their interrupts to single CPUs or groups of CPUs per | ||
302 | + queue without allowing the administrator to control the affinities. | ||
303 | + | ||
304 | + Moving the interrupts, which can be affinity controlled, away from CPUs | ||
305 | + which run untrusted guests, reduces the attack vector space. | ||
306 | + | ||
307 | + Whether the interrupts with are affine to CPUs, which run untrusted | ||
308 | + guests, provide interesting data for an attacker depends on the system | ||
309 | + configuration and the scenarios which run on the system. While for some | ||
310 | + of the interrupts it can be assumed that they won't expose interesting | ||
311 | + information beyond exposing hints about the host OS memory layout, there | ||
312 | + is no way to make general assumptions. | ||
313 | + | ||
314 | + Interrupt affinity can be controlled by the administrator via the | ||
315 | + /proc/irq/$NR/smp_affinity[_list] files. Limited documentation is | ||
316 | + available at: | ||
317 | + | ||
318 | + https://www.kernel.org/doc/Documentation/IRQ-affinity.txt | ||
319 | + | ||
320 | +.. _smt_control: | ||
321 | + | ||
322 | +4. SMT control | ||
323 | +^^^^^^^^^^^^^^ | ||
324 | + | ||
325 | + To prevent the SMT issues of L1TF it might be necessary to disable SMT | ||
326 | + completely. Disabling SMT can have a significant performance impact, but | ||
327 | + the impact depends on the hosting scenario and the type of workloads. | ||
328 | + The impact of disabling SMT needs also to be weighted against the impact | ||
329 | + of other mitigation solutions like confining guests to dedicated cores. | ||
330 | + | ||
331 | + The kernel provides a sysfs interface to retrieve the status of SMT and | ||
332 | + to control it. It also provides a kernel command line interface to | ||
333 | + control SMT. | ||
334 | + | ||
335 | + The kernel command line interface consists of the following options: | ||
336 | + | ||
337 | + =========== ========================================================== | ||
338 | + nosmt Affects the bring up of the secondary CPUs during boot. The | ||
339 | + kernel tries to bring all present CPUs online during the | ||
340 | + boot process. "nosmt" makes sure that from each physical | ||
341 | + core only one - the so called primary (hyper) thread is | ||
342 | + activated. Due to a design flaw of Intel processors related | ||
343 | + to Machine Check Exceptions the non primary siblings have | ||
344 | + to be brought up at least partially and are then shut down | ||
345 | + again. "nosmt" can be undone via the sysfs interface. | ||
346 | + | ||
347 | + nosmt=force Has the same effect as "nosmt" but it does not allow to | ||
348 | + undo the SMT disable via the sysfs interface. | ||
349 | + =========== ========================================================== | ||
350 | + | ||
351 | + The sysfs interface provides two files: | ||
352 | + | ||
353 | + - /sys/devices/system/cpu/smt/control | ||
354 | + - /sys/devices/system/cpu/smt/active | ||
355 | + | ||
356 | + /sys/devices/system/cpu/smt/control: | ||
357 | + | ||
358 | + This file allows to read out the SMT control state and provides the | ||
359 | + ability to disable or (re)enable SMT. The possible states are: | ||
360 | + | ||
361 | + ============== =================================================== | ||
362 | + on SMT is supported by the CPU and enabled. All | ||
363 | + logical CPUs can be onlined and offlined without | ||
364 | + restrictions. | ||
365 | + | ||
366 | + off SMT is supported by the CPU and disabled. Only | ||
367 | + the so called primary SMT threads can be onlined | ||
368 | + and offlined without restrictions. An attempt to | ||
369 | + online a non-primary sibling is rejected | ||
370 | + | ||
371 | + forceoff Same as 'off' but the state cannot be controlled. | ||
372 | + Attempts to write to the control file are rejected. | ||
373 | + | ||
374 | + notsupported The processor does not support SMT. It's therefore | ||
375 | + not affected by the SMT implications of L1TF. | ||
376 | + Attempts to write to the control file are rejected. | ||
377 | + ============== =================================================== | ||
378 | + | ||
379 | + The possible states which can be written into this file to control SMT | ||
380 | + state are: | ||
381 | + | ||
382 | + - on | ||
383 | + - off | ||
384 | + - forceoff | ||
385 | + | ||
386 | + /sys/devices/system/cpu/smt/active: | ||
387 | + | ||
388 | + This file reports whether SMT is enabled and active, i.e. if on any | ||
389 | + physical core two or more sibling threads are online. | ||
390 | + | ||
391 | + SMT control is also possible at boot time via the l1tf kernel command | ||
392 | + line parameter in combination with L1D flush control. See | ||
393 | + :ref:`mitigation_control_command_line`. | ||
394 | + | ||
395 | +5. Disabling EPT | ||
396 | +^^^^^^^^^^^^^^^^ | ||
397 | + | ||
398 | + Disabling EPT for virtual machines provides full mitigation for L1TF even | ||
399 | + with SMT enabled, because the effective page tables for guests are | ||
400 | + managed and sanitized by the hypervisor. Though disabling EPT has a | ||
401 | + significant performance impact especially when the Meltdown mitigation | ||
402 | + KPTI is enabled. | ||
403 | + | ||
404 | + EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter. | ||
405 | + | ||
406 | +There is ongoing research and development for new mitigation mechanisms to | ||
407 | +address the performance impact of disabling SMT or EPT. | ||
408 | + | ||
409 | +.. _mitigation_control_command_line: | ||
410 | + | ||
411 | +Mitigation control on the kernel command line | ||
412 | +--------------------------------------------- | ||
413 | + | ||
414 | +The kernel command line allows to control the L1TF mitigations at boot | ||
415 | +time with the option "l1tf=". The valid arguments for this option are: | ||
416 | + | ||
417 | + ============ ============================================================= | ||
418 | + full Provides all available mitigations for the L1TF | ||
419 | + vulnerability. Disables SMT and enables all mitigations in | ||
420 | + the hypervisors, i.e. unconditional L1D flushing | ||
421 | + | ||
422 | + SMT control and L1D flush control via the sysfs interface | ||
423 | + is still possible after boot. Hypervisors will issue a | ||
424 | + warning when the first VM is started in a potentially | ||
425 | + insecure configuration, i.e. SMT enabled or L1D flush | ||
426 | + disabled. | ||
427 | + | ||
428 | + full,force Same as 'full', but disables SMT and L1D flush runtime | ||
429 | + control. Implies the 'nosmt=force' command line option. | ||
430 | + (i.e. sysfs control of SMT is disabled.) | ||
431 | + | ||
432 | + flush Leaves SMT enabled and enables the default hypervisor | ||
433 | + mitigation, i.e. conditional L1D flushing | ||
434 | + | ||
435 | + SMT control and L1D flush control via the sysfs interface | ||
436 | + is still possible after boot. Hypervisors will issue a | ||
437 | + warning when the first VM is started in a potentially | ||
438 | + insecure configuration, i.e. SMT enabled or L1D flush | ||
439 | + disabled. | ||
440 | + | ||
441 | + flush,nosmt Disables SMT and enables the default hypervisor mitigation, | ||
442 | + i.e. conditional L1D flushing. | ||
443 | + | ||
444 | + SMT control and L1D flush control via the sysfs interface | ||
445 | + is still possible after boot. Hypervisors will issue a | ||
446 | + warning when the first VM is started in a potentially | ||
447 | + insecure configuration, i.e. SMT enabled or L1D flush | ||
448 | + disabled. | ||
449 | + | ||
450 | + flush,nowarn Same as 'flush', but hypervisors will not warn when a VM is | ||
451 | + started in a potentially insecure configuration. | ||
452 | + | ||
453 | + off Disables hypervisor mitigations and doesn't emit any | ||
454 | + warnings. | ||
455 | + It also drops the swap size and available RAM limit restrictions | ||
456 | + on both hypervisor and bare metal. | ||
457 | + | ||
458 | + ============ ============================================================= | ||
459 | + | ||
460 | +The default is 'flush'. For details about L1D flushing see :ref:`l1d_flush`. | ||
461 | + | ||
462 | + | ||
463 | +.. _mitigation_control_kvm: | ||
464 | + | ||
465 | +Mitigation control for KVM - module parameter | ||
466 | +------------------------------------------------------------- | ||
467 | + | ||
468 | +The KVM hypervisor mitigation mechanism, flushing the L1D cache when | ||
469 | +entering a guest, can be controlled with a module parameter. | ||
470 | + | ||
471 | +The option/parameter is "kvm-intel.vmentry_l1d_flush=". It takes the | ||
472 | +following arguments: | ||
473 | + | ||
474 | + ============ ============================================================== | ||
475 | + always L1D cache flush on every VMENTER. | ||
476 | + | ||
477 | + cond Flush L1D on VMENTER only when the code between VMEXIT and | ||
478 | + VMENTER can leak host memory which is considered | ||
479 | + interesting for an attacker. This still can leak host memory | ||
480 | + which allows e.g. to determine the hosts address space layout. | ||
481 | + | ||
482 | + never Disables the mitigation | ||
483 | + ============ ============================================================== | ||
484 | + | ||
485 | +The parameter can be provided on the kernel command line, as a module | ||
486 | +parameter when loading the modules and at runtime modified via the sysfs | ||
487 | +file: | ||
488 | + | ||
489 | +/sys/module/kvm_intel/parameters/vmentry_l1d_flush | ||
490 | + | ||
491 | +The default is 'cond'. If 'l1tf=full,force' is given on the kernel command | ||
492 | +line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush | ||
493 | +module parameter is ignored and writes to the sysfs file are rejected. | ||
494 | + | ||
495 | +.. _mitigation_selection: | ||
496 | + | ||
497 | +Mitigation selection guide | ||
498 | +-------------------------- | ||
499 | + | ||
500 | +1. No virtualization in use | ||
501 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
502 | + | ||
503 | + The system is protected by the kernel unconditionally and no further | ||
504 | + action is required. | ||
505 | + | ||
506 | +2. Virtualization with trusted guests | ||
507 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
508 | + | ||
509 | + If the guest comes from a trusted source and the guest OS kernel is | ||
510 | + guaranteed to have the L1TF mitigations in place the system is fully | ||
511 | + protected against L1TF and no further action is required. | ||
512 | + | ||
513 | + To avoid the overhead of the default L1D flushing on VMENTER the | ||
514 | + administrator can disable the flushing via the kernel command line and | ||
515 | + sysfs control files. See :ref:`mitigation_control_command_line` and | ||
516 | + :ref:`mitigation_control_kvm`. | ||
517 | + | ||
518 | + | ||
519 | +3. Virtualization with untrusted guests | ||
520 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
521 | + | ||
522 | +3.1. SMT not supported or disabled | ||
523 | +"""""""""""""""""""""""""""""""""" | ||
524 | + | ||
525 | + If SMT is not supported by the processor or disabled in the BIOS or by | ||
526 | + the kernel, it's only required to enforce L1D flushing on VMENTER. | ||
527 | + | ||
528 | + Conditional L1D flushing is the default behaviour and can be tuned. See | ||
529 | + :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`. | ||
530 | + | ||
531 | +3.2. EPT not supported or disabled | ||
532 | +"""""""""""""""""""""""""""""""""" | ||
533 | + | ||
534 | + If EPT is not supported by the processor or disabled in the hypervisor, | ||
535 | + the system is fully protected. SMT can stay enabled and L1D flushing on | ||
536 | + VMENTER is not required. | ||
537 | + | ||
538 | + EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter. | ||
539 | + | ||
540 | +3.3. SMT and EPT supported and active | ||
541 | +""""""""""""""""""""""""""""""""""""" | ||
542 | + | ||
543 | + If SMT and EPT are supported and active then various degrees of | ||
544 | + mitigations can be employed: | ||
545 | + | ||
546 | + - L1D flushing on VMENTER: | ||
547 | + | ||
548 | + L1D flushing on VMENTER is the minimal protection requirement, but it | ||
549 | + is only potent in combination with other mitigation methods. | ||
550 | + | ||
551 | + Conditional L1D flushing is the default behaviour and can be tuned. See | ||
552 | + :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`. | ||
553 | + | ||
554 | + - Guest confinement: | ||
555 | + | ||
556 | + Confinement of guests to a single or a group of physical cores which | ||
557 | + are not running any other processes, can reduce the attack surface | ||
558 | + significantly, but interrupts, soft interrupts and kernel threads can | ||
559 | + still expose valuable data to a potential attacker. See | ||
560 | + :ref:`guest_confinement`. | ||
561 | + | ||
562 | + - Interrupt isolation: | ||
563 | + | ||
564 | + Isolating the guest CPUs from interrupts can reduce the attack surface | ||
565 | + further, but still allows a malicious guest to explore a limited amount | ||
566 | + of host physical memory. This can at least be used to gain knowledge | ||
567 | + about the host address space layout. The interrupts which have a fixed | ||
568 | + affinity to the CPUs which run the untrusted guests can depending on | ||
569 | + the scenario still trigger soft interrupts and schedule kernel threads | ||
570 | + which might expose valuable information. See | ||
571 | + :ref:`interrupt_isolation`. | ||
572 | + | ||
573 | +The above three mitigation methods combined can provide protection to a | ||
574 | +certain degree, but the risk of the remaining attack surface has to be | ||
575 | +carefully analyzed. For full protection the following methods are | ||
576 | +available: | ||
577 | + | ||
578 | + - Disabling SMT: | ||
579 | + | ||
580 | + Disabling SMT and enforcing the L1D flushing provides the maximum | ||
581 | + amount of protection. This mitigation is not depending on any of the | ||
582 | + above mitigation methods. | ||
583 | + | ||
584 | + SMT control and L1D flushing can be tuned by the command line | ||
585 | + parameters 'nosmt', 'l1tf', 'kvm-intel.vmentry_l1d_flush' and at run | ||
586 | + time with the matching sysfs control files. See :ref:`smt_control`, | ||
587 | + :ref:`mitigation_control_command_line` and | ||
588 | + :ref:`mitigation_control_kvm`. | ||
589 | + | ||
590 | + - Disabling EPT: | ||
591 | + | ||
592 | + Disabling EPT provides the maximum amount of protection as well. It is | ||
593 | + not depending on any of the above mitigation methods. SMT can stay | ||
594 | + enabled and L1D flushing is not required, but the performance impact is | ||
595 | + significant. | ||
596 | + | ||
597 | + EPT can be disabled in the hypervisor via the 'kvm-intel.ept' | ||
598 | + parameter. | ||
599 | + | ||
600 | +3.4. Nested virtual machines | ||
601 | +"""""""""""""""""""""""""""" | ||
602 | + | ||
603 | +When nested virtualization is in use, three operating systems are involved: | ||
604 | +the bare metal hypervisor, the nested hypervisor and the nested virtual | ||
605 | +machine. VMENTER operations from the nested hypervisor into the nested | ||
606 | +guest will always be processed by the bare metal hypervisor. If KVM is the | ||
607 | +bare metal hypervisor it will: | ||
608 | + | ||
609 | + - Flush the L1D cache on every switch from the nested hypervisor to the | ||
610 | + nested virtual machine, so that the nested hypervisor's secrets are not | ||
611 | + exposed to the nested virtual machine; | ||
612 | + | ||
613 | + - Flush the L1D cache on every switch from the nested virtual machine to | ||
614 | + the nested hypervisor; this is a complex operation, and flushing the L1D | ||
615 | + cache avoids that the bare metal hypervisor's secrets are exposed to the | ||
616 | + nested virtual machine; | ||
617 | + | ||
618 | + - Instruct the nested hypervisor to not perform any L1D cache flush. This | ||
619 | + is an optimization to avoid double L1D flushing. | ||
620 | + | ||
621 | + | ||
622 | +.. _default_mitigations: | ||
623 | + | ||
624 | +Default mitigations | ||
625 | +------------------- | ||
626 | + | ||
627 | + The kernel default mitigations for vulnerable processors are: | ||
628 | + | ||
629 | + - PTE inversion to protect against malicious user space. This is done | ||
630 | + unconditionally and cannot be controlled. The swap storage is limited | ||
631 | + to ~16TB. | ||
632 | + | ||
633 | + - L1D conditional flushing on VMENTER when EPT is enabled for | ||
634 | + a guest. | ||
635 | + | ||
636 | + The kernel does not by default enforce the disabling of SMT, which leaves | ||
637 | + SMT systems vulnerable when running untrusted guests with EPT enabled. | ||
638 | + | ||
639 | + The rationale for this choice is: | ||
640 | + | ||
641 | + - Force disabling SMT can break existing setups, especially with | ||
642 | + unattended updates. | ||
643 | + | ||
644 | + - If regular users run untrusted guests on their machine, then L1TF is | ||
645 | + just an add on to other malware which might be embedded in an untrusted | ||
646 | + guest, e.g. spam-bots or attacks on the local network. | ||
647 | + | ||
648 | + There is no technical way to prevent a user from running untrusted code | ||
649 | + on their machines blindly. | ||
650 | + | ||
651 | + - It's technically extremely unlikely and from today's knowledge even | ||
652 | + impossible that L1TF can be exploited via the most popular attack | ||
653 | + mechanisms like JavaScript because these mechanisms have no way to | ||
654 | + control PTEs. If this would be possible and not other mitigation would | ||
655 | + be possible, then the default might be different. | ||
656 | + | ||
657 | + - The administrators of cloud and hosting setups have to carefully | ||
658 | + analyze the risk for their scenarios and make the appropriate | ||
659 | + mitigation choices, which might even vary across their deployed | ||
660 | + machines and also result in other changes of their overall setup. | ||
661 | + There is no way for the kernel to provide a sensible default for this | ||
662 | + kind of scenarios. | ||
663 | diff --git a/Documentation/admin-guide/hw-vuln/mds.rst b/Documentation/admin-guide/hw-vuln/mds.rst | ||
664 | new file mode 100644 | ||
665 | index 000000000000..e3a796c0d3a2 | ||
666 | --- /dev/null | ||
667 | +++ b/Documentation/admin-guide/hw-vuln/mds.rst | ||
668 | @@ -0,0 +1,308 @@ | ||
669 | +MDS - Microarchitectural Data Sampling | ||
670 | +====================================== | ||
671 | + | ||
672 | +Microarchitectural Data Sampling is a hardware vulnerability which allows | ||
673 | +unprivileged speculative access to data which is available in various CPU | ||
674 | +internal buffers. | ||
675 | + | ||
676 | +Affected processors | ||
677 | +------------------- | ||
678 | + | ||
679 | +This vulnerability affects a wide range of Intel processors. The | ||
680 | +vulnerability is not present on: | ||
681 | + | ||
682 | + - Processors from AMD, Centaur and other non Intel vendors | ||
683 | + | ||
684 | + - Older processor models, where the CPU family is < 6 | ||
685 | + | ||
686 | + - Some Atoms (Bonnell, Saltwell, Goldmont, GoldmontPlus) | ||
687 | + | ||
688 | + - Intel processors which have the ARCH_CAP_MDS_NO bit set in the | ||
689 | + IA32_ARCH_CAPABILITIES MSR. | ||
690 | + | ||
691 | +Whether a processor is affected or not can be read out from the MDS | ||
692 | +vulnerability file in sysfs. See :ref:`mds_sys_info`. | ||
693 | + | ||
694 | +Not all processors are affected by all variants of MDS, but the mitigation | ||
695 | +is identical for all of them so the kernel treats them as a single | ||
696 | +vulnerability. | ||
697 | + | ||
698 | +Related CVEs | ||
699 | +------------ | ||
700 | + | ||
701 | +The following CVE entries are related to the MDS vulnerability: | ||
702 | + | ||
703 | + ============== ===== =================================================== | ||
704 | + CVE-2018-12126 MSBDS Microarchitectural Store Buffer Data Sampling | ||
705 | + CVE-2018-12130 MFBDS Microarchitectural Fill Buffer Data Sampling | ||
706 | + CVE-2018-12127 MLPDS Microarchitectural Load Port Data Sampling | ||
707 | + CVE-2019-11091 MDSUM Microarchitectural Data Sampling Uncacheable Memory | ||
708 | + ============== ===== =================================================== | ||
709 | + | ||
710 | +Problem | ||
711 | +------- | ||
712 | + | ||
713 | +When performing store, load, L1 refill operations, processors write data | ||
714 | +into temporary microarchitectural structures (buffers). The data in the | ||
715 | +buffer can be forwarded to load operations as an optimization. | ||
716 | + | ||
717 | +Under certain conditions, usually a fault/assist caused by a load | ||
718 | +operation, data unrelated to the load memory address can be speculatively | ||
719 | +forwarded from the buffers. Because the load operation causes a fault or | ||
720 | +assist and its result will be discarded, the forwarded data will not cause | ||
721 | +incorrect program execution or state changes. But a malicious operation | ||
722 | +may be able to forward this speculative data to a disclosure gadget which | ||
723 | +allows in turn to infer the value via a cache side channel attack. | ||
724 | + | ||
725 | +Because the buffers are potentially shared between Hyper-Threads cross | ||
726 | +Hyper-Thread attacks are possible. | ||
727 | + | ||
728 | +Deeper technical information is available in the MDS specific x86 | ||
729 | +architecture section: :ref:`Documentation/x86/mds.rst <mds>`. | ||
730 | + | ||
731 | + | ||
732 | +Attack scenarios | ||
733 | +---------------- | ||
734 | + | ||
735 | +Attacks against the MDS vulnerabilities can be mounted from malicious non | ||
736 | +priviledged user space applications running on hosts or guest. Malicious | ||
737 | +guest OSes can obviously mount attacks as well. | ||
738 | + | ||
739 | +Contrary to other speculation based vulnerabilities the MDS vulnerability | ||
740 | +does not allow the attacker to control the memory target address. As a | ||
741 | +consequence the attacks are purely sampling based, but as demonstrated with | ||
742 | +the TLBleed attack samples can be postprocessed successfully. | ||
743 | + | ||
744 | +Web-Browsers | ||
745 | +^^^^^^^^^^^^ | ||
746 | + | ||
747 | + It's unclear whether attacks through Web-Browsers are possible at | ||
748 | + all. The exploitation through Java-Script is considered very unlikely, | ||
749 | + but other widely used web technologies like Webassembly could possibly be | ||
750 | + abused. | ||
751 | + | ||
752 | + | ||
753 | +.. _mds_sys_info: | ||
754 | + | ||
755 | +MDS system information | ||
756 | +----------------------- | ||
757 | + | ||
758 | +The Linux kernel provides a sysfs interface to enumerate the current MDS | ||
759 | +status of the system: whether the system is vulnerable, and which | ||
760 | +mitigations are active. The relevant sysfs file is: | ||
761 | + | ||
762 | +/sys/devices/system/cpu/vulnerabilities/mds | ||
763 | + | ||
764 | +The possible values in this file are: | ||
765 | + | ||
766 | + .. list-table:: | ||
767 | + | ||
768 | + * - 'Not affected' | ||
769 | + - The processor is not vulnerable | ||
770 | + * - 'Vulnerable' | ||
771 | + - The processor is vulnerable, but no mitigation enabled | ||
772 | + * - 'Vulnerable: Clear CPU buffers attempted, no microcode' | ||
773 | + - The processor is vulnerable but microcode is not updated. | ||
774 | + | ||
775 | + The mitigation is enabled on a best effort basis. See :ref:`vmwerv` | ||
776 | + * - 'Mitigation: Clear CPU buffers' | ||
777 | + - The processor is vulnerable and the CPU buffer clearing mitigation is | ||
778 | + enabled. | ||
779 | + | ||
780 | +If the processor is vulnerable then the following information is appended | ||
781 | +to the above information: | ||
782 | + | ||
783 | + ======================== ============================================ | ||
784 | + 'SMT vulnerable' SMT is enabled | ||
785 | + 'SMT mitigated' SMT is enabled and mitigated | ||
786 | + 'SMT disabled' SMT is disabled | ||
787 | + 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown | ||
788 | + ======================== ============================================ | ||
789 | + | ||
790 | +.. _vmwerv: | ||
791 | + | ||
792 | +Best effort mitigation mode | ||
793 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
794 | + | ||
795 | + If the processor is vulnerable, but the availability of the microcode based | ||
796 | + mitigation mechanism is not advertised via CPUID the kernel selects a best | ||
797 | + effort mitigation mode. This mode invokes the mitigation instructions | ||
798 | + without a guarantee that they clear the CPU buffers. | ||
799 | + | ||
800 | + This is done to address virtualization scenarios where the host has the | ||
801 | + microcode update applied, but the hypervisor is not yet updated to expose | ||
802 | + the CPUID to the guest. If the host has updated microcode the protection | ||
803 | + takes effect otherwise a few cpu cycles are wasted pointlessly. | ||
804 | + | ||
805 | + The state in the mds sysfs file reflects this situation accordingly. | ||
806 | + | ||
807 | + | ||
808 | +Mitigation mechanism | ||
809 | +------------------------- | ||
810 | + | ||
811 | +The kernel detects the affected CPUs and the presence of the microcode | ||
812 | +which is required. | ||
813 | + | ||
814 | +If a CPU is affected and the microcode is available, then the kernel | ||
815 | +enables the mitigation by default. The mitigation can be controlled at boot | ||
816 | +time via a kernel command line option. See | ||
817 | +:ref:`mds_mitigation_control_command_line`. | ||
818 | + | ||
819 | +.. _cpu_buffer_clear: | ||
820 | + | ||
821 | +CPU buffer clearing | ||
822 | +^^^^^^^^^^^^^^^^^^^ | ||
823 | + | ||
824 | + The mitigation for MDS clears the affected CPU buffers on return to user | ||
825 | + space and when entering a guest. | ||
826 | + | ||
827 | + If SMT is enabled it also clears the buffers on idle entry when the CPU | ||
828 | + is only affected by MSBDS and not any other MDS variant, because the | ||
829 | + other variants cannot be protected against cross Hyper-Thread attacks. | ||
830 | + | ||
831 | + For CPUs which are only affected by MSBDS the user space, guest and idle | ||
832 | + transition mitigations are sufficient and SMT is not affected. | ||
833 | + | ||
834 | +.. _virt_mechanism: | ||
835 | + | ||
836 | +Virtualization mitigation | ||
837 | +^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
838 | + | ||
839 | + The protection for host to guest transition depends on the L1TF | ||
840 | + vulnerability of the CPU: | ||
841 | + | ||
842 | + - CPU is affected by L1TF: | ||
843 | + | ||
844 | + If the L1D flush mitigation is enabled and up to date microcode is | ||
845 | + available, the L1D flush mitigation is automatically protecting the | ||
846 | + guest transition. | ||
847 | + | ||
848 | + If the L1D flush mitigation is disabled then the MDS mitigation is | ||
849 | + invoked explicit when the host MDS mitigation is enabled. | ||
850 | + | ||
851 | + For details on L1TF and virtualization see: | ||
852 | + :ref:`Documentation/admin-guide/hw-vuln//l1tf.rst <mitigation_control_kvm>`. | ||
853 | + | ||
854 | + - CPU is not affected by L1TF: | ||
855 | + | ||
856 | + CPU buffers are flushed before entering the guest when the host MDS | ||
857 | + mitigation is enabled. | ||
858 | + | ||
859 | + The resulting MDS protection matrix for the host to guest transition: | ||
860 | + | ||
861 | + ============ ===== ============= ============ ================= | ||
862 | + L1TF MDS VMX-L1FLUSH Host MDS MDS-State | ||
863 | + | ||
864 | + Don't care No Don't care N/A Not affected | ||
865 | + | ||
866 | + Yes Yes Disabled Off Vulnerable | ||
867 | + | ||
868 | + Yes Yes Disabled Full Mitigated | ||
869 | + | ||
870 | + Yes Yes Enabled Don't care Mitigated | ||
871 | + | ||
872 | + No Yes N/A Off Vulnerable | ||
873 | + | ||
874 | + No Yes N/A Full Mitigated | ||
875 | + ============ ===== ============= ============ ================= | ||
876 | + | ||
877 | + This only covers the host to guest transition, i.e. prevents leakage from | ||
878 | + host to guest, but does not protect the guest internally. Guests need to | ||
879 | + have their own protections. | ||
880 | + | ||
881 | +.. _xeon_phi: | ||
882 | + | ||
883 | +XEON PHI specific considerations | ||
884 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
885 | + | ||
886 | + The XEON PHI processor family is affected by MSBDS which can be exploited | ||
887 | + cross Hyper-Threads when entering idle states. Some XEON PHI variants allow | ||
888 | + to use MWAIT in user space (Ring 3) which opens an potential attack vector | ||
889 | + for malicious user space. The exposure can be disabled on the kernel | ||
890 | + command line with the 'ring3mwait=disable' command line option. | ||
891 | + | ||
892 | + XEON PHI is not affected by the other MDS variants and MSBDS is mitigated | ||
893 | + before the CPU enters a idle state. As XEON PHI is not affected by L1TF | ||
894 | + either disabling SMT is not required for full protection. | ||
895 | + | ||
896 | +.. _mds_smt_control: | ||
897 | + | ||
898 | +SMT control | ||
899 | +^^^^^^^^^^^ | ||
900 | + | ||
901 | + All MDS variants except MSBDS can be attacked cross Hyper-Threads. That | ||
902 | + means on CPUs which are affected by MFBDS or MLPDS it is necessary to | ||
903 | + disable SMT for full protection. These are most of the affected CPUs; the | ||
904 | + exception is XEON PHI, see :ref:`xeon_phi`. | ||
905 | + | ||
906 | + Disabling SMT can have a significant performance impact, but the impact | ||
907 | + depends on the type of workloads. | ||
908 | + | ||
909 | + See the relevant chapter in the L1TF mitigation documentation for details: | ||
910 | + :ref:`Documentation/admin-guide/hw-vuln/l1tf.rst <smt_control>`. | ||
911 | + | ||
912 | + | ||
913 | +.. _mds_mitigation_control_command_line: | ||
914 | + | ||
915 | +Mitigation control on the kernel command line | ||
916 | +--------------------------------------------- | ||
917 | + | ||
918 | +The kernel command line allows to control the MDS mitigations at boot | ||
919 | +time with the option "mds=". The valid arguments for this option are: | ||
920 | + | ||
921 | + ============ ============================================================= | ||
922 | + full If the CPU is vulnerable, enable all available mitigations | ||
923 | + for the MDS vulnerability, CPU buffer clearing on exit to | ||
924 | + userspace and when entering a VM. Idle transitions are | ||
925 | + protected as well if SMT is enabled. | ||
926 | + | ||
927 | + It does not automatically disable SMT. | ||
928 | + | ||
929 | + full,nosmt The same as mds=full, with SMT disabled on vulnerable | ||
930 | + CPUs. This is the complete mitigation. | ||
931 | + | ||
932 | + off Disables MDS mitigations completely. | ||
933 | + | ||
934 | + ============ ============================================================= | ||
935 | + | ||
936 | +Not specifying this option is equivalent to "mds=full". | ||
937 | + | ||
938 | + | ||
939 | +Mitigation selection guide | ||
940 | +-------------------------- | ||
941 | + | ||
942 | +1. Trusted userspace | ||
943 | +^^^^^^^^^^^^^^^^^^^^ | ||
944 | + | ||
945 | + If all userspace applications are from a trusted source and do not | ||
946 | + execute untrusted code which is supplied externally, then the mitigation | ||
947 | + can be disabled. | ||
948 | + | ||
949 | + | ||
950 | +2. Virtualization with trusted guests | ||
951 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
952 | + | ||
953 | + The same considerations as above versus trusted user space apply. | ||
954 | + | ||
955 | +3. Virtualization with untrusted guests | ||
956 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
957 | + | ||
958 | + The protection depends on the state of the L1TF mitigations. | ||
959 | + See :ref:`virt_mechanism`. | ||
960 | + | ||
961 | + If the MDS mitigation is enabled and SMT is disabled, guest to host and | ||
962 | + guest to guest attacks are prevented. | ||
963 | + | ||
964 | +.. _mds_default_mitigations: | ||
965 | + | ||
966 | +Default mitigations | ||
967 | +------------------- | ||
968 | + | ||
969 | + The kernel default mitigations for vulnerable processors are: | ||
970 | + | ||
971 | + - Enable CPU buffer clearing | ||
972 | + | ||
973 | + The kernel does not by default enforce the disabling of SMT, which leaves | ||
974 | + SMT systems vulnerable when running untrusted code. The same rationale as | ||
975 | + for L1TF applies. | ||
976 | + See :ref:`Documentation/admin-guide/hw-vuln//l1tf.rst <default_mitigations>`. | ||
977 | diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst | ||
978 | index 0873685bab0f..89abc5057349 100644 | ||
979 | --- a/Documentation/admin-guide/index.rst | ||
980 | +++ b/Documentation/admin-guide/index.rst | ||
981 | @@ -17,14 +17,12 @@ etc. | ||
982 | kernel-parameters | ||
983 | devices | ||
984 | |||
985 | -This section describes CPU vulnerabilities and provides an overview of the | ||
986 | -possible mitigations along with guidance for selecting mitigations if they | ||
987 | -are configurable at compile, boot or run time. | ||
988 | +This section describes CPU vulnerabilities and their mitigations. | ||
989 | |||
990 | .. toctree:: | ||
991 | :maxdepth: 1 | ||
992 | |||
993 | - l1tf | ||
994 | + hw-vuln/index | ||
995 | |||
996 | Here is a set of documents aimed at users who are trying to track down | ||
997 | problems and bugs in particular. | ||
998 | diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt | ||
999 | index 8b6567f7cb9b..a29301d6e6c6 100644 | ||
1000 | --- a/Documentation/admin-guide/kernel-parameters.txt | ||
1001 | +++ b/Documentation/admin-guide/kernel-parameters.txt | ||
1002 | @@ -2079,7 +2079,7 @@ | ||
1003 | |||
1004 | Default is 'flush'. | ||
1005 | |||
1006 | - For details see: Documentation/admin-guide/l1tf.rst | ||
1007 | + For details see: Documentation/admin-guide/hw-vuln/l1tf.rst | ||
1008 | |||
1009 | l2cr= [PPC] | ||
1010 | |||
1011 | @@ -2319,6 +2319,32 @@ | ||
1012 | Format: <first>,<last> | ||
1013 | Specifies range of consoles to be captured by the MDA. | ||
1014 | |||
1015 | + mds= [X86,INTEL] | ||
1016 | + Control mitigation for the Micro-architectural Data | ||
1017 | + Sampling (MDS) vulnerability. | ||
1018 | + | ||
1019 | + Certain CPUs are vulnerable to an exploit against CPU | ||
1020 | + internal buffers which can forward information to a | ||
1021 | + disclosure gadget under certain conditions. | ||
1022 | + | ||
1023 | + In vulnerable processors, the speculatively | ||
1024 | + forwarded data can be used in a cache side channel | ||
1025 | + attack, to access data to which the attacker does | ||
1026 | + not have direct access. | ||
1027 | + | ||
1028 | + This parameter controls the MDS mitigation. The | ||
1029 | + options are: | ||
1030 | + | ||
1031 | + full - Enable MDS mitigation on vulnerable CPUs | ||
1032 | + full,nosmt - Enable MDS mitigation and disable | ||
1033 | + SMT on vulnerable CPUs | ||
1034 | + off - Unconditionally disable MDS mitigation | ||
1035 | + | ||
1036 | + Not specifying this option is equivalent to | ||
1037 | + mds=full. | ||
1038 | + | ||
1039 | + For details see: Documentation/admin-guide/hw-vuln/mds.rst | ||
1040 | + | ||
1041 | mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory | ||
1042 | Amount of memory to be used when the kernel is not able | ||
1043 | to see the whole system memory or for test. | ||
1044 | @@ -2476,6 +2502,40 @@ | ||
1045 | in the "bleeding edge" mini2440 support kernel at | ||
1046 | http://repo.or.cz/w/linux-2.6/mini2440.git | ||
1047 | |||
1048 | + mitigations= | ||
1049 | + [X86,PPC,S390] Control optional mitigations for CPU | ||
1050 | + vulnerabilities. This is a set of curated, | ||
1051 | + arch-independent options, each of which is an | ||
1052 | + aggregation of existing arch-specific options. | ||
1053 | + | ||
1054 | + off | ||
1055 | + Disable all optional CPU mitigations. This | ||
1056 | + improves system performance, but it may also | ||
1057 | + expose users to several CPU vulnerabilities. | ||
1058 | + Equivalent to: nopti [X86,PPC] | ||
1059 | + nospectre_v1 [PPC] | ||
1060 | + nobp=0 [S390] | ||
1061 | + nospectre_v2 [X86,PPC,S390] | ||
1062 | + spectre_v2_user=off [X86] | ||
1063 | + spec_store_bypass_disable=off [X86,PPC] | ||
1064 | + l1tf=off [X86] | ||
1065 | + mds=off [X86] | ||
1066 | + | ||
1067 | + auto (default) | ||
1068 | + Mitigate all CPU vulnerabilities, but leave SMT | ||
1069 | + enabled, even if it's vulnerable. This is for | ||
1070 | + users who don't want to be surprised by SMT | ||
1071 | + getting disabled across kernel upgrades, or who | ||
1072 | + have other ways of avoiding SMT-based attacks. | ||
1073 | + Equivalent to: (default behavior) | ||
1074 | + | ||
1075 | + auto,nosmt | ||
1076 | + Mitigate all CPU vulnerabilities, disabling SMT | ||
1077 | + if needed. This is for users who always want to | ||
1078 | + be fully mitigated, even if it means losing SMT. | ||
1079 | + Equivalent to: l1tf=flush,nosmt [X86] | ||
1080 | + mds=full,nosmt [X86] | ||
1081 | + | ||
1082 | mminit_loglevel= | ||
1083 | [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this | ||
1084 | parameter allows control of the logging verbosity for | ||
1085 | diff --git a/Documentation/admin-guide/l1tf.rst b/Documentation/admin-guide/l1tf.rst | ||
1086 | deleted file mode 100644 | ||
1087 | index 9f5924f81f89..000000000000 | ||
1088 | --- a/Documentation/admin-guide/l1tf.rst | ||
1089 | +++ /dev/null | ||
1090 | @@ -1,614 +0,0 @@ | ||
1091 | -L1TF - L1 Terminal Fault | ||
1092 | -======================== | ||
1093 | - | ||
1094 | -L1 Terminal Fault is a hardware vulnerability which allows unprivileged | ||
1095 | -speculative access to data which is available in the Level 1 Data Cache | ||
1096 | -when the page table entry controlling the virtual address, which is used | ||
1097 | -for the access, has the Present bit cleared or other reserved bits set. | ||
1098 | - | ||
1099 | -Affected processors | ||
1100 | -------------------- | ||
1101 | - | ||
1102 | -This vulnerability affects a wide range of Intel processors. The | ||
1103 | -vulnerability is not present on: | ||
1104 | - | ||
1105 | - - Processors from AMD, Centaur and other non Intel vendors | ||
1106 | - | ||
1107 | - - Older processor models, where the CPU family is < 6 | ||
1108 | - | ||
1109 | - - A range of Intel ATOM processors (Cedarview, Cloverview, Lincroft, | ||
1110 | - Penwell, Pineview, Silvermont, Airmont, Merrifield) | ||
1111 | - | ||
1112 | - - The Intel XEON PHI family | ||
1113 | - | ||
1114 | - - Intel processors which have the ARCH_CAP_RDCL_NO bit set in the | ||
1115 | - IA32_ARCH_CAPABILITIES MSR. If the bit is set the CPU is not affected | ||
1116 | - by the Meltdown vulnerability either. These CPUs should become | ||
1117 | - available by end of 2018. | ||
1118 | - | ||
1119 | -Whether a processor is affected or not can be read out from the L1TF | ||
1120 | -vulnerability file in sysfs. See :ref:`l1tf_sys_info`. | ||
1121 | - | ||
1122 | -Related CVEs | ||
1123 | ------------- | ||
1124 | - | ||
1125 | -The following CVE entries are related to the L1TF vulnerability: | ||
1126 | - | ||
1127 | - ============= ================= ============================== | ||
1128 | - CVE-2018-3615 L1 Terminal Fault SGX related aspects | ||
1129 | - CVE-2018-3620 L1 Terminal Fault OS, SMM related aspects | ||
1130 | - CVE-2018-3646 L1 Terminal Fault Virtualization related aspects | ||
1131 | - ============= ================= ============================== | ||
1132 | - | ||
1133 | -Problem | ||
1134 | -------- | ||
1135 | - | ||
1136 | -If an instruction accesses a virtual address for which the relevant page | ||
1137 | -table entry (PTE) has the Present bit cleared or other reserved bits set, | ||
1138 | -then speculative execution ignores the invalid PTE and loads the referenced | ||
1139 | -data if it is present in the Level 1 Data Cache, as if the page referenced | ||
1140 | -by the address bits in the PTE was still present and accessible. | ||
1141 | - | ||
1142 | -While this is a purely speculative mechanism and the instruction will raise | ||
1143 | -a page fault when it is retired eventually, the pure act of loading the | ||
1144 | -data and making it available to other speculative instructions opens up the | ||
1145 | -opportunity for side channel attacks to unprivileged malicious code, | ||
1146 | -similar to the Meltdown attack. | ||
1147 | - | ||
1148 | -While Meltdown breaks the user space to kernel space protection, L1TF | ||
1149 | -allows to attack any physical memory address in the system and the attack | ||
1150 | -works across all protection domains. It allows an attack of SGX and also | ||
1151 | -works from inside virtual machines because the speculation bypasses the | ||
1152 | -extended page table (EPT) protection mechanism. | ||
1153 | - | ||
1154 | - | ||
1155 | -Attack scenarios | ||
1156 | ----------------- | ||
1157 | - | ||
1158 | -1. Malicious user space | ||
1159 | -^^^^^^^^^^^^^^^^^^^^^^^ | ||
1160 | - | ||
1161 | - Operating Systems store arbitrary information in the address bits of a | ||
1162 | - PTE which is marked non present. This allows a malicious user space | ||
1163 | - application to attack the physical memory to which these PTEs resolve. | ||
1164 | - In some cases user-space can maliciously influence the information | ||
1165 | - encoded in the address bits of the PTE, thus making attacks more | ||
1166 | - deterministic and more practical. | ||
1167 | - | ||
1168 | - The Linux kernel contains a mitigation for this attack vector, PTE | ||
1169 | - inversion, which is permanently enabled and has no performance | ||
1170 | - impact. The kernel ensures that the address bits of PTEs, which are not | ||
1171 | - marked present, never point to cacheable physical memory space. | ||
1172 | - | ||
1173 | - A system with an up to date kernel is protected against attacks from | ||
1174 | - malicious user space applications. | ||
1175 | - | ||
1176 | -2. Malicious guest in a virtual machine | ||
1177 | -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
1178 | - | ||
1179 | - The fact that L1TF breaks all domain protections allows malicious guest | ||
1180 | - OSes, which can control the PTEs directly, and malicious guest user | ||
1181 | - space applications, which run on an unprotected guest kernel lacking the | ||
1182 | - PTE inversion mitigation for L1TF, to attack physical host memory. | ||
1183 | - | ||
1184 | - A special aspect of L1TF in the context of virtualization is symmetric | ||
1185 | - multi threading (SMT). The Intel implementation of SMT is called | ||
1186 | - HyperThreading. The fact that Hyperthreads on the affected processors | ||
1187 | - share the L1 Data Cache (L1D) is important for this. As the flaw allows | ||
1188 | - only to attack data which is present in L1D, a malicious guest running | ||
1189 | - on one Hyperthread can attack the data which is brought into the L1D by | ||
1190 | - the context which runs on the sibling Hyperthread of the same physical | ||
1191 | - core. This context can be host OS, host user space or a different guest. | ||
1192 | - | ||
1193 | - If the processor does not support Extended Page Tables, the attack is | ||
1194 | - only possible, when the hypervisor does not sanitize the content of the | ||
1195 | - effective (shadow) page tables. | ||
1196 | - | ||
1197 | - While solutions exist to mitigate these attack vectors fully, these | ||
1198 | - mitigations are not enabled by default in the Linux kernel because they | ||
1199 | - can affect performance significantly. The kernel provides several | ||
1200 | - mechanisms which can be utilized to address the problem depending on the | ||
1201 | - deployment scenario. The mitigations, their protection scope and impact | ||
1202 | - are described in the next sections. | ||
1203 | - | ||
1204 | - The default mitigations and the rationale for choosing them are explained | ||
1205 | - at the end of this document. See :ref:`default_mitigations`. | ||
1206 | - | ||
1207 | -.. _l1tf_sys_info: | ||
1208 | - | ||
1209 | -L1TF system information | ||
1210 | ------------------------ | ||
1211 | - | ||
1212 | -The Linux kernel provides a sysfs interface to enumerate the current L1TF | ||
1213 | -status of the system: whether the system is vulnerable, and which | ||
1214 | -mitigations are active. The relevant sysfs file is: | ||
1215 | - | ||
1216 | -/sys/devices/system/cpu/vulnerabilities/l1tf | ||
1217 | - | ||
1218 | -The possible values in this file are: | ||
1219 | - | ||
1220 | - =========================== =============================== | ||
1221 | - 'Not affected' The processor is not vulnerable | ||
1222 | - 'Mitigation: PTE Inversion' The host protection is active | ||
1223 | - =========================== =============================== | ||
1224 | - | ||
1225 | -If KVM/VMX is enabled and the processor is vulnerable then the following | ||
1226 | -information is appended to the 'Mitigation: PTE Inversion' part: | ||
1227 | - | ||
1228 | - - SMT status: | ||
1229 | - | ||
1230 | - ===================== ================ | ||
1231 | - 'VMX: SMT vulnerable' SMT is enabled | ||
1232 | - 'VMX: SMT disabled' SMT is disabled | ||
1233 | - ===================== ================ | ||
1234 | - | ||
1235 | - - L1D Flush mode: | ||
1236 | - | ||
1237 | - ================================ ==================================== | ||
1238 | - 'L1D vulnerable' L1D flushing is disabled | ||
1239 | - | ||
1240 | - 'L1D conditional cache flushes' L1D flush is conditionally enabled | ||
1241 | - | ||
1242 | - 'L1D cache flushes' L1D flush is unconditionally enabled | ||
1243 | - ================================ ==================================== | ||
1244 | - | ||
1245 | -The resulting grade of protection is discussed in the following sections. | ||
1246 | - | ||
1247 | - | ||
1248 | -Host mitigation mechanism | ||
1249 | -------------------------- | ||
1250 | - | ||
1251 | -The kernel is unconditionally protected against L1TF attacks from malicious | ||
1252 | -user space running on the host. | ||
1253 | - | ||
1254 | - | ||
1255 | -Guest mitigation mechanisms | ||
1256 | ---------------------------- | ||
1257 | - | ||
1258 | -.. _l1d_flush: | ||
1259 | - | ||
1260 | -1. L1D flush on VMENTER | ||
1261 | -^^^^^^^^^^^^^^^^^^^^^^^ | ||
1262 | - | ||
1263 | - To make sure that a guest cannot attack data which is present in the L1D | ||
1264 | - the hypervisor flushes the L1D before entering the guest. | ||
1265 | - | ||
1266 | - Flushing the L1D evicts not only the data which should not be accessed | ||
1267 | - by a potentially malicious guest, it also flushes the guest | ||
1268 | - data. Flushing the L1D has a performance impact as the processor has to | ||
1269 | - bring the flushed guest data back into the L1D. Depending on the | ||
1270 | - frequency of VMEXIT/VMENTER and the type of computations in the guest | ||
1271 | - performance degradation in the range of 1% to 50% has been observed. For | ||
1272 | - scenarios where guest VMEXIT/VMENTER are rare the performance impact is | ||
1273 | - minimal. Virtio and mechanisms like posted interrupts are designed to | ||
1274 | - confine the VMEXITs to a bare minimum, but specific configurations and | ||
1275 | - application scenarios might still suffer from a high VMEXIT rate. | ||
1276 | - | ||
1277 | - The kernel provides two L1D flush modes: | ||
1278 | - - conditional ('cond') | ||
1279 | - - unconditional ('always') | ||
1280 | - | ||
1281 | - The conditional mode avoids L1D flushing after VMEXITs which execute | ||
1282 | - only audited code paths before the corresponding VMENTER. These code | ||
1283 | - paths have been verified that they cannot expose secrets or other | ||
1284 | - interesting data to an attacker, but they can leak information about the | ||
1285 | - address space layout of the hypervisor. | ||
1286 | - | ||
1287 | - Unconditional mode flushes L1D on all VMENTER invocations and provides | ||
1288 | - maximum protection. It has a higher overhead than the conditional | ||
1289 | - mode. The overhead cannot be quantified correctly as it depends on the | ||
1290 | - workload scenario and the resulting number of VMEXITs. | ||
1291 | - | ||
1292 | - The general recommendation is to enable L1D flush on VMENTER. The kernel | ||
1293 | - defaults to conditional mode on affected processors. | ||
1294 | - | ||
1295 | - **Note**, that L1D flush does not prevent the SMT problem because the | ||
1296 | - sibling thread will also bring back its data into the L1D which makes it | ||
1297 | - attackable again. | ||
1298 | - | ||
1299 | - L1D flush can be controlled by the administrator via the kernel command | ||
1300 | - line and sysfs control files. See :ref:`mitigation_control_command_line` | ||
1301 | - and :ref:`mitigation_control_kvm`. | ||
1302 | - | ||
1303 | -.. _guest_confinement: | ||
1304 | - | ||
1305 | -2. Guest VCPU confinement to dedicated physical cores | ||
1306 | -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
1307 | - | ||
1308 | - To address the SMT problem, it is possible to make a guest or a group of | ||
1309 | - guests affine to one or more physical cores. The proper mechanism for | ||
1310 | - that is to utilize exclusive cpusets to ensure that no other guest or | ||
1311 | - host tasks can run on these cores. | ||
1312 | - | ||
1313 | - If only a single guest or related guests run on sibling SMT threads on | ||
1314 | - the same physical core then they can only attack their own memory and | ||
1315 | - restricted parts of the host memory. | ||
1316 | - | ||
1317 | - Host memory is attackable, when one of the sibling SMT threads runs in | ||
1318 | - host OS (hypervisor) context and the other in guest context. The amount | ||
1319 | - of valuable information from the host OS context depends on the context | ||
1320 | - which the host OS executes, i.e. interrupts, soft interrupts and kernel | ||
1321 | - threads. The amount of valuable data from these contexts cannot be | ||
1322 | - declared as non-interesting for an attacker without deep inspection of | ||
1323 | - the code. | ||
1324 | - | ||
1325 | - **Note**, that assigning guests to a fixed set of physical cores affects | ||
1326 | - the ability of the scheduler to do load balancing and might have | ||
1327 | - negative effects on CPU utilization depending on the hosting | ||
1328 | - scenario. Disabling SMT might be a viable alternative for particular | ||
1329 | - scenarios. | ||
1330 | - | ||
1331 | - For further information about confining guests to a single or to a group | ||
1332 | - of cores consult the cpusets documentation: | ||
1333 | - | ||
1334 | - https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt | ||
1335 | - | ||
1336 | -.. _interrupt_isolation: | ||
1337 | - | ||
1338 | -3. Interrupt affinity | ||
1339 | -^^^^^^^^^^^^^^^^^^^^^ | ||
1340 | - | ||
1341 | - Interrupts can be made affine to logical CPUs. This is not universally | ||
1342 | - true because there are types of interrupts which are truly per CPU | ||
1343 | - interrupts, e.g. the local timer interrupt. Aside of that multi queue | ||
1344 | - devices affine their interrupts to single CPUs or groups of CPUs per | ||
1345 | - queue without allowing the administrator to control the affinities. | ||
1346 | - | ||
1347 | - Moving the interrupts, which can be affinity controlled, away from CPUs | ||
1348 | - which run untrusted guests, reduces the attack vector space. | ||
1349 | - | ||
1350 | - Whether the interrupts with are affine to CPUs, which run untrusted | ||
1351 | - guests, provide interesting data for an attacker depends on the system | ||
1352 | - configuration and the scenarios which run on the system. While for some | ||
1353 | - of the interrupts it can be assumed that they won't expose interesting | ||
1354 | - information beyond exposing hints about the host OS memory layout, there | ||
1355 | - is no way to make general assumptions. | ||
1356 | - | ||
1357 | - Interrupt affinity can be controlled by the administrator via the | ||
1358 | - /proc/irq/$NR/smp_affinity[_list] files. Limited documentation is | ||
1359 | - available at: | ||
1360 | - | ||
1361 | - https://www.kernel.org/doc/Documentation/IRQ-affinity.txt | ||
1362 | - | ||
1363 | -.. _smt_control: | ||
1364 | - | ||
1365 | -4. SMT control | ||
1366 | -^^^^^^^^^^^^^^ | ||
1367 | - | ||
1368 | - To prevent the SMT issues of L1TF it might be necessary to disable SMT | ||
1369 | - completely. Disabling SMT can have a significant performance impact, but | ||
1370 | - the impact depends on the hosting scenario and the type of workloads. | ||
1371 | - The impact of disabling SMT needs also to be weighted against the impact | ||
1372 | - of other mitigation solutions like confining guests to dedicated cores. | ||
1373 | - | ||
1374 | - The kernel provides a sysfs interface to retrieve the status of SMT and | ||
1375 | - to control it. It also provides a kernel command line interface to | ||
1376 | - control SMT. | ||
1377 | - | ||
1378 | - The kernel command line interface consists of the following options: | ||
1379 | - | ||
1380 | - =========== ========================================================== | ||
1381 | - nosmt Affects the bring up of the secondary CPUs during boot. The | ||
1382 | - kernel tries to bring all present CPUs online during the | ||
1383 | - boot process. "nosmt" makes sure that from each physical | ||
1384 | - core only one - the so called primary (hyper) thread is | ||
1385 | - activated. Due to a design flaw of Intel processors related | ||
1386 | - to Machine Check Exceptions the non primary siblings have | ||
1387 | - to be brought up at least partially and are then shut down | ||
1388 | - again. "nosmt" can be undone via the sysfs interface. | ||
1389 | - | ||
1390 | - nosmt=force Has the same effect as "nosmt" but it does not allow to | ||
1391 | - undo the SMT disable via the sysfs interface. | ||
1392 | - =========== ========================================================== | ||
1393 | - | ||
1394 | - The sysfs interface provides two files: | ||
1395 | - | ||
1396 | - - /sys/devices/system/cpu/smt/control | ||
1397 | - - /sys/devices/system/cpu/smt/active | ||
1398 | - | ||
1399 | - /sys/devices/system/cpu/smt/control: | ||
1400 | - | ||
1401 | - This file allows to read out the SMT control state and provides the | ||
1402 | - ability to disable or (re)enable SMT. The possible states are: | ||
1403 | - | ||
1404 | - ============== =================================================== | ||
1405 | - on SMT is supported by the CPU and enabled. All | ||
1406 | - logical CPUs can be onlined and offlined without | ||
1407 | - restrictions. | ||
1408 | - | ||
1409 | - off SMT is supported by the CPU and disabled. Only | ||
1410 | - the so called primary SMT threads can be onlined | ||
1411 | - and offlined without restrictions. An attempt to | ||
1412 | - online a non-primary sibling is rejected | ||
1413 | - | ||
1414 | - forceoff Same as 'off' but the state cannot be controlled. | ||
1415 | - Attempts to write to the control file are rejected. | ||
1416 | - | ||
1417 | - notsupported The processor does not support SMT. It's therefore | ||
1418 | - not affected by the SMT implications of L1TF. | ||
1419 | - Attempts to write to the control file are rejected. | ||
1420 | - ============== =================================================== | ||
1421 | - | ||
1422 | - The possible states which can be written into this file to control SMT | ||
1423 | - state are: | ||
1424 | - | ||
1425 | - - on | ||
1426 | - - off | ||
1427 | - - forceoff | ||
1428 | - | ||
1429 | - /sys/devices/system/cpu/smt/active: | ||
1430 | - | ||
1431 | - This file reports whether SMT is enabled and active, i.e. if on any | ||
1432 | - physical core two or more sibling threads are online. | ||
1433 | - | ||
1434 | - SMT control is also possible at boot time via the l1tf kernel command | ||
1435 | - line parameter in combination with L1D flush control. See | ||
1436 | - :ref:`mitigation_control_command_line`. | ||
1437 | - | ||
1438 | -5. Disabling EPT | ||
1439 | -^^^^^^^^^^^^^^^^ | ||
1440 | - | ||
1441 | - Disabling EPT for virtual machines provides full mitigation for L1TF even | ||
1442 | - with SMT enabled, because the effective page tables for guests are | ||
1443 | - managed and sanitized by the hypervisor. Though disabling EPT has a | ||
1444 | - significant performance impact especially when the Meltdown mitigation | ||
1445 | - KPTI is enabled. | ||
1446 | - | ||
1447 | - EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter. | ||
1448 | - | ||
1449 | -There is ongoing research and development for new mitigation mechanisms to | ||
1450 | -address the performance impact of disabling SMT or EPT. | ||
1451 | - | ||
1452 | -.. _mitigation_control_command_line: | ||
1453 | - | ||
1454 | -Mitigation control on the kernel command line | ||
1455 | ---------------------------------------------- | ||
1456 | - | ||
1457 | -The kernel command line allows to control the L1TF mitigations at boot | ||
1458 | -time with the option "l1tf=". The valid arguments for this option are: | ||
1459 | - | ||
1460 | - ============ ============================================================= | ||
1461 | - full Provides all available mitigations for the L1TF | ||
1462 | - vulnerability. Disables SMT and enables all mitigations in | ||
1463 | - the hypervisors, i.e. unconditional L1D flushing | ||
1464 | - | ||
1465 | - SMT control and L1D flush control via the sysfs interface | ||
1466 | - is still possible after boot. Hypervisors will issue a | ||
1467 | - warning when the first VM is started in a potentially | ||
1468 | - insecure configuration, i.e. SMT enabled or L1D flush | ||
1469 | - disabled. | ||
1470 | - | ||
1471 | - full,force Same as 'full', but disables SMT and L1D flush runtime | ||
1472 | - control. Implies the 'nosmt=force' command line option. | ||
1473 | - (i.e. sysfs control of SMT is disabled.) | ||
1474 | - | ||
1475 | - flush Leaves SMT enabled and enables the default hypervisor | ||
1476 | - mitigation, i.e. conditional L1D flushing | ||
1477 | - | ||
1478 | - SMT control and L1D flush control via the sysfs interface | ||
1479 | - is still possible after boot. Hypervisors will issue a | ||
1480 | - warning when the first VM is started in a potentially | ||
1481 | - insecure configuration, i.e. SMT enabled or L1D flush | ||
1482 | - disabled. | ||
1483 | - | ||
1484 | - flush,nosmt Disables SMT and enables the default hypervisor mitigation, | ||
1485 | - i.e. conditional L1D flushing. | ||
1486 | - | ||
1487 | - SMT control and L1D flush control via the sysfs interface | ||
1488 | - is still possible after boot. Hypervisors will issue a | ||
1489 | - warning when the first VM is started in a potentially | ||
1490 | - insecure configuration, i.e. SMT enabled or L1D flush | ||
1491 | - disabled. | ||
1492 | - | ||
1493 | - flush,nowarn Same as 'flush', but hypervisors will not warn when a VM is | ||
1494 | - started in a potentially insecure configuration. | ||
1495 | - | ||
1496 | - off Disables hypervisor mitigations and doesn't emit any | ||
1497 | - warnings. | ||
1498 | - It also drops the swap size and available RAM limit restrictions | ||
1499 | - on both hypervisor and bare metal. | ||
1500 | - | ||
1501 | - ============ ============================================================= | ||
1502 | - | ||
1503 | -The default is 'flush'. For details about L1D flushing see :ref:`l1d_flush`. | ||
1504 | - | ||
1505 | - | ||
1506 | -.. _mitigation_control_kvm: | ||
1507 | - | ||
1508 | -Mitigation control for KVM - module parameter | ||
1509 | -------------------------------------------------------------- | ||
1510 | - | ||
1511 | -The KVM hypervisor mitigation mechanism, flushing the L1D cache when | ||
1512 | -entering a guest, can be controlled with a module parameter. | ||
1513 | - | ||
1514 | -The option/parameter is "kvm-intel.vmentry_l1d_flush=". It takes the | ||
1515 | -following arguments: | ||
1516 | - | ||
1517 | - ============ ============================================================== | ||
1518 | - always L1D cache flush on every VMENTER. | ||
1519 | - | ||
1520 | - cond Flush L1D on VMENTER only when the code between VMEXIT and | ||
1521 | - VMENTER can leak host memory which is considered | ||
1522 | - interesting for an attacker. This still can leak host memory | ||
1523 | - which allows e.g. to determine the hosts address space layout. | ||
1524 | - | ||
1525 | - never Disables the mitigation | ||
1526 | - ============ ============================================================== | ||
1527 | - | ||
1528 | -The parameter can be provided on the kernel command line, as a module | ||
1529 | -parameter when loading the modules and at runtime modified via the sysfs | ||
1530 | -file: | ||
1531 | - | ||
1532 | -/sys/module/kvm_intel/parameters/vmentry_l1d_flush | ||
1533 | - | ||
1534 | -The default is 'cond'. If 'l1tf=full,force' is given on the kernel command | ||
1535 | -line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush | ||
1536 | -module parameter is ignored and writes to the sysfs file are rejected. | ||
1537 | - | ||
1538 | - | ||
1539 | -Mitigation selection guide | ||
1540 | --------------------------- | ||
1541 | - | ||
1542 | -1. No virtualization in use | ||
1543 | -^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
1544 | - | ||
1545 | - The system is protected by the kernel unconditionally and no further | ||
1546 | - action is required. | ||
1547 | - | ||
1548 | -2. Virtualization with trusted guests | ||
1549 | -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
1550 | - | ||
1551 | - If the guest comes from a trusted source and the guest OS kernel is | ||
1552 | - guaranteed to have the L1TF mitigations in place the system is fully | ||
1553 | - protected against L1TF and no further action is required. | ||
1554 | - | ||
1555 | - To avoid the overhead of the default L1D flushing on VMENTER the | ||
1556 | - administrator can disable the flushing via the kernel command line and | ||
1557 | - sysfs control files. See :ref:`mitigation_control_command_line` and | ||
1558 | - :ref:`mitigation_control_kvm`. | ||
1559 | - | ||
1560 | - | ||
1561 | -3. Virtualization with untrusted guests | ||
1562 | -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
1563 | - | ||
1564 | -3.1. SMT not supported or disabled | ||
1565 | -"""""""""""""""""""""""""""""""""" | ||
1566 | - | ||
1567 | - If SMT is not supported by the processor or disabled in the BIOS or by | ||
1568 | - the kernel, it's only required to enforce L1D flushing on VMENTER. | ||
1569 | - | ||
1570 | - Conditional L1D flushing is the default behaviour and can be tuned. See | ||
1571 | - :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`. | ||
1572 | - | ||
1573 | -3.2. EPT not supported or disabled | ||
1574 | -"""""""""""""""""""""""""""""""""" | ||
1575 | - | ||
1576 | - If EPT is not supported by the processor or disabled in the hypervisor, | ||
1577 | - the system is fully protected. SMT can stay enabled and L1D flushing on | ||
1578 | - VMENTER is not required. | ||
1579 | - | ||
1580 | - EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter. | ||
1581 | - | ||
1582 | -3.3. SMT and EPT supported and active | ||
1583 | -""""""""""""""""""""""""""""""""""""" | ||
1584 | - | ||
1585 | - If SMT and EPT are supported and active then various degrees of | ||
1586 | - mitigations can be employed: | ||
1587 | - | ||
1588 | - - L1D flushing on VMENTER: | ||
1589 | - | ||
1590 | - L1D flushing on VMENTER is the minimal protection requirement, but it | ||
1591 | - is only potent in combination with other mitigation methods. | ||
1592 | - | ||
1593 | - Conditional L1D flushing is the default behaviour and can be tuned. See | ||
1594 | - :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`. | ||
1595 | - | ||
1596 | - - Guest confinement: | ||
1597 | - | ||
1598 | - Confinement of guests to a single or a group of physical cores which | ||
1599 | - are not running any other processes, can reduce the attack surface | ||
1600 | - significantly, but interrupts, soft interrupts and kernel threads can | ||
1601 | - still expose valuable data to a potential attacker. See | ||
1602 | - :ref:`guest_confinement`. | ||
1603 | - | ||
1604 | - - Interrupt isolation: | ||
1605 | - | ||
1606 | - Isolating the guest CPUs from interrupts can reduce the attack surface | ||
1607 | - further, but still allows a malicious guest to explore a limited amount | ||
1608 | - of host physical memory. This can at least be used to gain knowledge | ||
1609 | - about the host address space layout. The interrupts which have a fixed | ||
1610 | - affinity to the CPUs which run the untrusted guests can depending on | ||
1611 | - the scenario still trigger soft interrupts and schedule kernel threads | ||
1612 | - which might expose valuable information. See | ||
1613 | - :ref:`interrupt_isolation`. | ||
1614 | - | ||
1615 | -The above three mitigation methods combined can provide protection to a | ||
1616 | -certain degree, but the risk of the remaining attack surface has to be | ||
1617 | -carefully analyzed. For full protection the following methods are | ||
1618 | -available: | ||
1619 | - | ||
1620 | - - Disabling SMT: | ||
1621 | - | ||
1622 | - Disabling SMT and enforcing the L1D flushing provides the maximum | ||
1623 | - amount of protection. This mitigation is not depending on any of the | ||
1624 | - above mitigation methods. | ||
1625 | - | ||
1626 | - SMT control and L1D flushing can be tuned by the command line | ||
1627 | - parameters 'nosmt', 'l1tf', 'kvm-intel.vmentry_l1d_flush' and at run | ||
1628 | - time with the matching sysfs control files. See :ref:`smt_control`, | ||
1629 | - :ref:`mitigation_control_command_line` and | ||
1630 | - :ref:`mitigation_control_kvm`. | ||
1631 | - | ||
1632 | - - Disabling EPT: | ||
1633 | - | ||
1634 | - Disabling EPT provides the maximum amount of protection as well. It is | ||
1635 | - not depending on any of the above mitigation methods. SMT can stay | ||
1636 | - enabled and L1D flushing is not required, but the performance impact is | ||
1637 | - significant. | ||
1638 | - | ||
1639 | - EPT can be disabled in the hypervisor via the 'kvm-intel.ept' | ||
1640 | - parameter. | ||
1641 | - | ||
1642 | -3.4. Nested virtual machines | ||
1643 | -"""""""""""""""""""""""""""" | ||
1644 | - | ||
1645 | -When nested virtualization is in use, three operating systems are involved: | ||
1646 | -the bare metal hypervisor, the nested hypervisor and the nested virtual | ||
1647 | -machine. VMENTER operations from the nested hypervisor into the nested | ||
1648 | -guest will always be processed by the bare metal hypervisor. If KVM is the | ||
1649 | -bare metal hypervisor it wiil: | ||
1650 | - | ||
1651 | - - Flush the L1D cache on every switch from the nested hypervisor to the | ||
1652 | - nested virtual machine, so that the nested hypervisor's secrets are not | ||
1653 | - exposed to the nested virtual machine; | ||
1654 | - | ||
1655 | - - Flush the L1D cache on every switch from the nested virtual machine to | ||
1656 | - the nested hypervisor; this is a complex operation, and flushing the L1D | ||
1657 | - cache avoids that the bare metal hypervisor's secrets are exposed to the | ||
1658 | - nested virtual machine; | ||
1659 | - | ||
1660 | - - Instruct the nested hypervisor to not perform any L1D cache flush. This | ||
1661 | - is an optimization to avoid double L1D flushing. | ||
1662 | - | ||
1663 | - | ||
1664 | -.. _default_mitigations: | ||
1665 | - | ||
1666 | -Default mitigations | ||
1667 | -------------------- | ||
1668 | - | ||
1669 | - The kernel default mitigations for vulnerable processors are: | ||
1670 | - | ||
1671 | - - PTE inversion to protect against malicious user space. This is done | ||
1672 | - unconditionally and cannot be controlled. The swap storage is limited | ||
1673 | - to ~16TB. | ||
1674 | - | ||
1675 | - - L1D conditional flushing on VMENTER when EPT is enabled for | ||
1676 | - a guest. | ||
1677 | - | ||
1678 | - The kernel does not by default enforce the disabling of SMT, which leaves | ||
1679 | - SMT systems vulnerable when running untrusted guests with EPT enabled. | ||
1680 | - | ||
1681 | - The rationale for this choice is: | ||
1682 | - | ||
1683 | - - Force disabling SMT can break existing setups, especially with | ||
1684 | - unattended updates. | ||
1685 | - | ||
1686 | - - If regular users run untrusted guests on their machine, then L1TF is | ||
1687 | - just an add on to other malware which might be embedded in an untrusted | ||
1688 | - guest, e.g. spam-bots or attacks on the local network. | ||
1689 | - | ||
1690 | - There is no technical way to prevent a user from running untrusted code | ||
1691 | - on their machines blindly. | ||
1692 | - | ||
1693 | - - It's technically extremely unlikely and from today's knowledge even | ||
1694 | - impossible that L1TF can be exploited via the most popular attack | ||
1695 | - mechanisms like JavaScript because these mechanisms have no way to | ||
1696 | - control PTEs. If this would be possible and not other mitigation would | ||
1697 | - be possible, then the default might be different. | ||
1698 | - | ||
1699 | - - The administrators of cloud and hosting setups have to carefully | ||
1700 | - analyze the risk for their scenarios and make the appropriate | ||
1701 | - mitigation choices, which might even vary across their deployed | ||
1702 | - machines and also result in other changes of their overall setup. | ||
1703 | - There is no way for the kernel to provide a sensible default for this | ||
1704 | - kind of scenarios. | ||
1705 | diff --git a/Documentation/index.rst b/Documentation/index.rst | ||
1706 | index 5db7e87c7cb1..1cdc139adb40 100644 | ||
1707 | --- a/Documentation/index.rst | ||
1708 | +++ b/Documentation/index.rst | ||
1709 | @@ -104,6 +104,7 @@ implementation. | ||
1710 | :maxdepth: 2 | ||
1711 | |||
1712 | sh/index | ||
1713 | + x86/index | ||
1714 | |||
1715 | Filesystem Documentation | ||
1716 | ------------------------ | ||
1717 | diff --git a/Documentation/x86/conf.py b/Documentation/x86/conf.py | ||
1718 | new file mode 100644 | ||
1719 | index 000000000000..33c5c3142e20 | ||
1720 | --- /dev/null | ||
1721 | +++ b/Documentation/x86/conf.py | ||
1722 | @@ -0,0 +1,10 @@ | ||
1723 | +# -*- coding: utf-8; mode: python -*- | ||
1724 | + | ||
1725 | +project = "X86 architecture specific documentation" | ||
1726 | + | ||
1727 | +tags.add("subproject") | ||
1728 | + | ||
1729 | +latex_documents = [ | ||
1730 | + ('index', 'x86.tex', project, | ||
1731 | + 'The kernel development community', 'manual'), | ||
1732 | +] | ||
1733 | diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst | ||
1734 | new file mode 100644 | ||
1735 | index 000000000000..ef389dcf1b1d | ||
1736 | --- /dev/null | ||
1737 | +++ b/Documentation/x86/index.rst | ||
1738 | @@ -0,0 +1,8 @@ | ||
1739 | +========================== | ||
1740 | +x86 architecture specifics | ||
1741 | +========================== | ||
1742 | + | ||
1743 | +.. toctree:: | ||
1744 | + :maxdepth: 1 | ||
1745 | + | ||
1746 | + mds | ||
1747 | diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst | ||
1748 | new file mode 100644 | ||
1749 | index 000000000000..534e9baa4e1d | ||
1750 | --- /dev/null | ||
1751 | +++ b/Documentation/x86/mds.rst | ||
1752 | @@ -0,0 +1,225 @@ | ||
1753 | +Microarchitectural Data Sampling (MDS) mitigation | ||
1754 | +================================================= | ||
1755 | + | ||
1756 | +.. _mds: | ||
1757 | + | ||
1758 | +Overview | ||
1759 | +-------- | ||
1760 | + | ||
1761 | +Microarchitectural Data Sampling (MDS) is a family of side channel attacks | ||
1762 | +on internal buffers in Intel CPUs. The variants are: | ||
1763 | + | ||
1764 | + - Microarchitectural Store Buffer Data Sampling (MSBDS) (CVE-2018-12126) | ||
1765 | + - Microarchitectural Fill Buffer Data Sampling (MFBDS) (CVE-2018-12130) | ||
1766 | + - Microarchitectural Load Port Data Sampling (MLPDS) (CVE-2018-12127) | ||
1767 | + - Microarchitectural Data Sampling Uncacheable Memory (MDSUM) (CVE-2019-11091) | ||
1768 | + | ||
1769 | +MSBDS leaks Store Buffer Entries which can be speculatively forwarded to a | ||
1770 | +dependent load (store-to-load forwarding) as an optimization. The forward | ||
1771 | +can also happen to a faulting or assisting load operation for a different | ||
1772 | +memory address, which can be exploited under certain conditions. Store | ||
1773 | +buffers are partitioned between Hyper-Threads so cross thread forwarding is | ||
1774 | +not possible. But if a thread enters or exits a sleep state the store | ||
1775 | +buffer is repartitioned which can expose data from one thread to the other. | ||
1776 | + | ||
1777 | +MFBDS leaks Fill Buffer Entries. Fill buffers are used internally to manage | ||
1778 | +L1 miss situations and to hold data which is returned or sent in response | ||
1779 | +to a memory or I/O operation. Fill buffers can forward data to a load | ||
1780 | +operation and also write data to the cache. When the fill buffer is | ||
1781 | +deallocated it can retain the stale data of the preceding operations which | ||
1782 | +can then be forwarded to a faulting or assisting load operation, which can | ||
1783 | +be exploited under certain conditions. Fill buffers are shared between | ||
1784 | +Hyper-Threads so cross thread leakage is possible. | ||
1785 | + | ||
1786 | +MLPDS leaks Load Port Data. Load ports are used to perform load operations | ||
1787 | +from memory or I/O. The received data is then forwarded to the register | ||
1788 | +file or a subsequent operation. In some implementations the Load Port can | ||
1789 | +contain stale data from a previous operation which can be forwarded to | ||
1790 | +faulting or assisting loads under certain conditions, which again can be | ||
1791 | +exploited eventually. Load ports are shared between Hyper-Threads so cross | ||
1792 | +thread leakage is possible. | ||
1793 | + | ||
1794 | +MDSUM is a special case of MSBDS, MFBDS and MLPDS. An uncacheable load from | ||
1795 | +memory that takes a fault or assist can leave data in a microarchitectural | ||
1796 | +structure that may later be observed using one of the same methods used by | ||
1797 | +MSBDS, MFBDS or MLPDS. | ||
1798 | + | ||
1799 | +Exposure assumptions | ||
1800 | +-------------------- | ||
1801 | + | ||
1802 | +It is assumed that attack code resides in user space or in a guest with one | ||
1803 | +exception. The rationale behind this assumption is that the code construct | ||
1804 | +needed for exploiting MDS requires: | ||
1805 | + | ||
1806 | + - to control the load to trigger a fault or assist | ||
1807 | + | ||
1808 | + - to have a disclosure gadget which exposes the speculatively accessed | ||
1809 | + data for consumption through a side channel. | ||
1810 | + | ||
1811 | + - to control the pointer through which the disclosure gadget exposes the | ||
1812 | + data | ||
1813 | + | ||
1814 | +The existence of such a construct in the kernel cannot be excluded with | ||
1815 | +100% certainty, but the complexity involved makes it extremly unlikely. | ||
1816 | + | ||
1817 | +There is one exception, which is untrusted BPF. The functionality of | ||
1818 | +untrusted BPF is limited, but it needs to be thoroughly investigated | ||
1819 | +whether it can be used to create such a construct. | ||
1820 | + | ||
1821 | + | ||
1822 | +Mitigation strategy | ||
1823 | +------------------- | ||
1824 | + | ||
1825 | +All variants have the same mitigation strategy at least for the single CPU | ||
1826 | +thread case (SMT off): Force the CPU to clear the affected buffers. | ||
1827 | + | ||
1828 | +This is achieved by using the otherwise unused and obsolete VERW | ||
1829 | +instruction in combination with a microcode update. The microcode clears | ||
1830 | +the affected CPU buffers when the VERW instruction is executed. | ||
1831 | + | ||
1832 | +For virtualization there are two ways to achieve CPU buffer | ||
1833 | +clearing. Either the modified VERW instruction or via the L1D Flush | ||
1834 | +command. The latter is issued when L1TF mitigation is enabled so the extra | ||
1835 | +VERW can be avoided. If the CPU is not affected by L1TF then VERW needs to | ||
1836 | +be issued. | ||
1837 | + | ||
1838 | +If the VERW instruction with the supplied segment selector argument is | ||
1839 | +executed on a CPU without the microcode update there is no side effect | ||
1840 | +other than a small number of pointlessly wasted CPU cycles. | ||
1841 | + | ||
1842 | +This does not protect against cross Hyper-Thread attacks except for MSBDS | ||
1843 | +which is only exploitable cross Hyper-thread when one of the Hyper-Threads | ||
1844 | +enters a C-state. | ||
1845 | + | ||
1846 | +The kernel provides a function to invoke the buffer clearing: | ||
1847 | + | ||
1848 | + mds_clear_cpu_buffers() | ||
1849 | + | ||
1850 | +The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state | ||
1851 | +(idle) transitions. | ||
1852 | + | ||
1853 | +As a special quirk to address virtualization scenarios where the host has | ||
1854 | +the microcode updated, but the hypervisor does not (yet) expose the | ||
1855 | +MD_CLEAR CPUID bit to guests, the kernel issues the VERW instruction in the | ||
1856 | +hope that it might actually clear the buffers. The state is reflected | ||
1857 | +accordingly. | ||
1858 | + | ||
1859 | +According to current knowledge additional mitigations inside the kernel | ||
1860 | +itself are not required because the necessary gadgets to expose the leaked | ||
1861 | +data cannot be controlled in a way which allows exploitation from malicious | ||
1862 | +user space or VM guests. | ||
1863 | + | ||
1864 | +Kernel internal mitigation modes | ||
1865 | +-------------------------------- | ||
1866 | + | ||
1867 | + ======= ============================================================ | ||
1868 | + off Mitigation is disabled. Either the CPU is not affected or | ||
1869 | + mds=off is supplied on the kernel command line | ||
1870 | + | ||
1871 | + full Mitigation is enabled. CPU is affected and MD_CLEAR is | ||
1872 | + advertised in CPUID. | ||
1873 | + | ||
1874 | + vmwerv Mitigation is enabled. CPU is affected and MD_CLEAR is not | ||
1875 | + advertised in CPUID. That is mainly for virtualization | ||
1876 | + scenarios where the host has the updated microcode but the | ||
1877 | + hypervisor does not expose MD_CLEAR in CPUID. It's a best | ||
1878 | + effort approach without guarantee. | ||
1879 | + ======= ============================================================ | ||
1880 | + | ||
1881 | +If the CPU is affected and mds=off is not supplied on the kernel command | ||
1882 | +line then the kernel selects the appropriate mitigation mode depending on | ||
1883 | +the availability of the MD_CLEAR CPUID bit. | ||
1884 | + | ||
1885 | +Mitigation points | ||
1886 | +----------------- | ||
1887 | + | ||
1888 | +1. Return to user space | ||
1889 | +^^^^^^^^^^^^^^^^^^^^^^^ | ||
1890 | + | ||
1891 | + When transitioning from kernel to user space the CPU buffers are flushed | ||
1892 | + on affected CPUs when the mitigation is not disabled on the kernel | ||
1893 | + command line. The migitation is enabled through the static key | ||
1894 | + mds_user_clear. | ||
1895 | + | ||
1896 | + The mitigation is invoked in prepare_exit_to_usermode() which covers | ||
1897 | + most of the kernel to user space transitions. There are a few exceptions | ||
1898 | + which are not invoking prepare_exit_to_usermode() on return to user | ||
1899 | + space. These exceptions use the paranoid exit code. | ||
1900 | + | ||
1901 | + - Non Maskable Interrupt (NMI): | ||
1902 | + | ||
1903 | + Access to sensible data like keys, credentials in the NMI context is | ||
1904 | + mostly theoretical: The CPU can do prefetching or execute a | ||
1905 | + misspeculated code path and thereby fetching data which might end up | ||
1906 | + leaking through a buffer. | ||
1907 | + | ||
1908 | + But for mounting other attacks the kernel stack address of the task is | ||
1909 | + already valuable information. So in full mitigation mode, the NMI is | ||
1910 | + mitigated on the return from do_nmi() to provide almost complete | ||
1911 | + coverage. | ||
1912 | + | ||
1913 | + - Double fault (#DF): | ||
1914 | + | ||
1915 | + A double fault is usually fatal, but the ESPFIX workaround, which can | ||
1916 | + be triggered from user space through modify_ldt(2) is a recoverable | ||
1917 | + double fault. #DF uses the paranoid exit path, so explicit mitigation | ||
1918 | + in the double fault handler is required. | ||
1919 | + | ||
1920 | + - Machine Check Exception (#MC): | ||
1921 | + | ||
1922 | + Another corner case is a #MC which hits between the CPU buffer clear | ||
1923 | + invocation and the actual return to user. As this still is in kernel | ||
1924 | + space it takes the paranoid exit path which does not clear the CPU | ||
1925 | + buffers. So the #MC handler repopulates the buffers to some | ||
1926 | + extent. Machine checks are not reliably controllable and the window is | ||
1927 | + extremly small so mitigation would just tick a checkbox that this | ||
1928 | + theoretical corner case is covered. To keep the amount of special | ||
1929 | + cases small, ignore #MC. | ||
1930 | + | ||
1931 | + - Debug Exception (#DB): | ||
1932 | + | ||
1933 | + This takes the paranoid exit path only when the INT1 breakpoint is in | ||
1934 | + kernel space. #DB on a user space address takes the regular exit path, | ||
1935 | + so no extra mitigation required. | ||
1936 | + | ||
1937 | + | ||
1938 | +2. C-State transition | ||
1939 | +^^^^^^^^^^^^^^^^^^^^^ | ||
1940 | + | ||
1941 | + When a CPU goes idle and enters a C-State the CPU buffers need to be | ||
1942 | + cleared on affected CPUs when SMT is active. This addresses the | ||
1943 | + repartitioning of the store buffer when one of the Hyper-Threads enters | ||
1944 | + a C-State. | ||
1945 | + | ||
1946 | + When SMT is inactive, i.e. either the CPU does not support it or all | ||
1947 | + sibling threads are offline CPU buffer clearing is not required. | ||
1948 | + | ||
1949 | + The idle clearing is enabled on CPUs which are only affected by MSBDS | ||
1950 | + and not by any other MDS variant. The other MDS variants cannot be | ||
1951 | + protected against cross Hyper-Thread attacks because the Fill Buffer and | ||
1952 | + the Load Ports are shared. So on CPUs affected by other variants, the | ||
1953 | + idle clearing would be a window dressing exercise and is therefore not | ||
1954 | + activated. | ||
1955 | + | ||
1956 | + The invocation is controlled by the static key mds_idle_clear which is | ||
1957 | + switched depending on the chosen mitigation mode and the SMT state of | ||
1958 | + the system. | ||
1959 | + | ||
1960 | + The buffer clear is only invoked before entering the C-State to prevent | ||
1961 | + that stale data from the idling CPU from spilling to the Hyper-Thread | ||
1962 | + sibling after the store buffer got repartitioned and all entries are | ||
1963 | + available to the non idle sibling. | ||
1964 | + | ||
1965 | + When coming out of idle the store buffer is partitioned again so each | ||
1966 | + sibling has half of it available. The back from idle CPU could be then | ||
1967 | + speculatively exposed to contents of the sibling. The buffers are | ||
1968 | + flushed either on exit to user space or on VMENTER so malicious code | ||
1969 | + in user space or the guest cannot speculatively access them. | ||
1970 | + | ||
1971 | + The mitigation is hooked into all variants of halt()/mwait(), but does | ||
1972 | + not cover the legacy ACPI IO-Port mechanism because the ACPI idle driver | ||
1973 | + has been superseded by the intel_idle driver around 2010 and is | ||
1974 | + preferred on all affected CPUs which are expected to gain the MD_CLEAR | ||
1975 | + functionality in microcode. Aside of that the IO-Port mechanism is a | ||
1976 | + legacy interface which is only used on older systems which are either | ||
1977 | + not affected or do not receive microcode updates anymore. | ||
1978 | diff --git a/Makefile b/Makefile | ||
1979 | index 914d69b9e3fd..be894b3a97d5 100644 | ||
1980 | --- a/Makefile | ||
1981 | +++ b/Makefile | ||
1982 | @@ -1,7 +1,7 @@ | ||
1983 | # SPDX-License-Identifier: GPL-2.0 | ||
1984 | VERSION = 4 | ||
1985 | PATCHLEVEL = 19 | ||
1986 | -SUBLEVEL = 42 | ||
1987 | +SUBLEVEL = 43 | ||
1988 | EXTRAVERSION = | ||
1989 | NAME = "People's Front" | ||
1990 | |||
1991 | diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c | ||
1992 | index 1341325599a7..4ccbf611a3c5 100644 | ||
1993 | --- a/arch/powerpc/kernel/security.c | ||
1994 | +++ b/arch/powerpc/kernel/security.c | ||
1995 | @@ -56,7 +56,7 @@ void setup_barrier_nospec(void) | ||
1996 | enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && | ||
1997 | security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR); | ||
1998 | |||
1999 | - if (!no_nospec) | ||
2000 | + if (!no_nospec && !cpu_mitigations_off()) | ||
2001 | enable_barrier_nospec(enable); | ||
2002 | } | ||
2003 | |||
2004 | @@ -115,7 +115,7 @@ static int __init handle_nospectre_v2(char *p) | ||
2005 | early_param("nospectre_v2", handle_nospectre_v2); | ||
2006 | void setup_spectre_v2(void) | ||
2007 | { | ||
2008 | - if (no_spectrev2) | ||
2009 | + if (no_spectrev2 || cpu_mitigations_off()) | ||
2010 | do_btb_flush_fixups(); | ||
2011 | else | ||
2012 | btb_flush_enabled = true; | ||
2013 | @@ -299,7 +299,7 @@ void setup_stf_barrier(void) | ||
2014 | |||
2015 | stf_enabled_flush_types = type; | ||
2016 | |||
2017 | - if (!no_stf_barrier) | ||
2018 | + if (!no_stf_barrier && !cpu_mitigations_off()) | ||
2019 | stf_barrier_enable(enable); | ||
2020 | } | ||
2021 | |||
2022 | diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c | ||
2023 | index faf00222b324..eaf7300be5ab 100644 | ||
2024 | --- a/arch/powerpc/kernel/setup_64.c | ||
2025 | +++ b/arch/powerpc/kernel/setup_64.c | ||
2026 | @@ -955,7 +955,7 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable) | ||
2027 | |||
2028 | enabled_flush_types = types; | ||
2029 | |||
2030 | - if (!no_rfi_flush) | ||
2031 | + if (!no_rfi_flush && !cpu_mitigations_off()) | ||
2032 | rfi_flush_enable(enable); | ||
2033 | } | ||
2034 | |||
2035 | diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c | ||
2036 | index bdddaae96559..649135cbedd5 100644 | ||
2037 | --- a/arch/s390/kernel/nospec-branch.c | ||
2038 | +++ b/arch/s390/kernel/nospec-branch.c | ||
2039 | @@ -1,6 +1,7 @@ | ||
2040 | // SPDX-License-Identifier: GPL-2.0 | ||
2041 | #include <linux/module.h> | ||
2042 | #include <linux/device.h> | ||
2043 | +#include <linux/cpu.h> | ||
2044 | #include <asm/nospec-branch.h> | ||
2045 | |||
2046 | static int __init nobp_setup_early(char *str) | ||
2047 | @@ -58,7 +59,7 @@ early_param("nospectre_v2", nospectre_v2_setup_early); | ||
2048 | |||
2049 | void __init nospec_auto_detect(void) | ||
2050 | { | ||
2051 | - if (test_facility(156)) { | ||
2052 | + if (test_facility(156) || cpu_mitigations_off()) { | ||
2053 | /* | ||
2054 | * The machine supports etokens. | ||
2055 | * Disable expolines and disable nobp. | ||
2056 | diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c | ||
2057 | index 3b2490b81918..8353348ddeaf 100644 | ||
2058 | --- a/arch/x86/entry/common.c | ||
2059 | +++ b/arch/x86/entry/common.c | ||
2060 | @@ -31,6 +31,7 @@ | ||
2061 | #include <asm/vdso.h> | ||
2062 | #include <linux/uaccess.h> | ||
2063 | #include <asm/cpufeature.h> | ||
2064 | +#include <asm/nospec-branch.h> | ||
2065 | |||
2066 | #define CREATE_TRACE_POINTS | ||
2067 | #include <trace/events/syscalls.h> | ||
2068 | @@ -212,6 +213,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) | ||
2069 | #endif | ||
2070 | |||
2071 | user_enter_irqoff(); | ||
2072 | + | ||
2073 | + mds_user_clear_cpu_buffers(); | ||
2074 | } | ||
2075 | |||
2076 | #define SYSCALL_EXIT_WORK_FLAGS \ | ||
2077 | diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c | ||
2078 | index f9958ad4d335..a759e59990fb 100644 | ||
2079 | --- a/arch/x86/events/intel/core.c | ||
2080 | +++ b/arch/x86/events/intel/core.c | ||
2081 | @@ -4132,11 +4132,11 @@ __init int intel_pmu_init(void) | ||
2082 | name = "nehalem"; | ||
2083 | break; | ||
2084 | |||
2085 | - case INTEL_FAM6_ATOM_PINEVIEW: | ||
2086 | - case INTEL_FAM6_ATOM_LINCROFT: | ||
2087 | - case INTEL_FAM6_ATOM_PENWELL: | ||
2088 | - case INTEL_FAM6_ATOM_CLOVERVIEW: | ||
2089 | - case INTEL_FAM6_ATOM_CEDARVIEW: | ||
2090 | + case INTEL_FAM6_ATOM_BONNELL: | ||
2091 | + case INTEL_FAM6_ATOM_BONNELL_MID: | ||
2092 | + case INTEL_FAM6_ATOM_SALTWELL: | ||
2093 | + case INTEL_FAM6_ATOM_SALTWELL_MID: | ||
2094 | + case INTEL_FAM6_ATOM_SALTWELL_TABLET: | ||
2095 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | ||
2096 | sizeof(hw_cache_event_ids)); | ||
2097 | |||
2098 | @@ -4149,9 +4149,11 @@ __init int intel_pmu_init(void) | ||
2099 | name = "bonnell"; | ||
2100 | break; | ||
2101 | |||
2102 | - case INTEL_FAM6_ATOM_SILVERMONT1: | ||
2103 | - case INTEL_FAM6_ATOM_SILVERMONT2: | ||
2104 | + case INTEL_FAM6_ATOM_SILVERMONT: | ||
2105 | + case INTEL_FAM6_ATOM_SILVERMONT_X: | ||
2106 | + case INTEL_FAM6_ATOM_SILVERMONT_MID: | ||
2107 | case INTEL_FAM6_ATOM_AIRMONT: | ||
2108 | + case INTEL_FAM6_ATOM_AIRMONT_MID: | ||
2109 | memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, | ||
2110 | sizeof(hw_cache_event_ids)); | ||
2111 | memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, | ||
2112 | @@ -4170,7 +4172,7 @@ __init int intel_pmu_init(void) | ||
2113 | break; | ||
2114 | |||
2115 | case INTEL_FAM6_ATOM_GOLDMONT: | ||
2116 | - case INTEL_FAM6_ATOM_DENVERTON: | ||
2117 | + case INTEL_FAM6_ATOM_GOLDMONT_X: | ||
2118 | memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, | ||
2119 | sizeof(hw_cache_event_ids)); | ||
2120 | memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, | ||
2121 | @@ -4196,7 +4198,7 @@ __init int intel_pmu_init(void) | ||
2122 | name = "goldmont"; | ||
2123 | break; | ||
2124 | |||
2125 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: | ||
2126 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: | ||
2127 | memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, | ||
2128 | sizeof(hw_cache_event_ids)); | ||
2129 | memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs, | ||
2130 | diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c | ||
2131 | index 6eb76106c469..56194c571299 100644 | ||
2132 | --- a/arch/x86/events/intel/cstate.c | ||
2133 | +++ b/arch/x86/events/intel/cstate.c | ||
2134 | @@ -559,8 +559,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { | ||
2135 | |||
2136 | X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates), | ||
2137 | |||
2138 | - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates), | ||
2139 | - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates), | ||
2140 | + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT, slm_cstates), | ||
2141 | + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT_X, slm_cstates), | ||
2142 | X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates), | ||
2143 | |||
2144 | X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE, snb_cstates), | ||
2145 | @@ -581,9 +581,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { | ||
2146 | X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates), | ||
2147 | |||
2148 | X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates), | ||
2149 | - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_DENVERTON, glm_cstates), | ||
2150 | + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_X, glm_cstates), | ||
2151 | |||
2152 | - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GEMINI_LAKE, glm_cstates), | ||
2153 | + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates), | ||
2154 | { }, | ||
2155 | }; | ||
2156 | MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); | ||
2157 | diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c | ||
2158 | index 32f3e9423e99..91039ffed633 100644 | ||
2159 | --- a/arch/x86/events/intel/rapl.c | ||
2160 | +++ b/arch/x86/events/intel/rapl.c | ||
2161 | @@ -777,9 +777,9 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { | ||
2162 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, skl_rapl_init), | ||
2163 | |||
2164 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init), | ||
2165 | - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init), | ||
2166 | + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init), | ||
2167 | |||
2168 | - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GEMINI_LAKE, hsw_rapl_init), | ||
2169 | + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init), | ||
2170 | {}, | ||
2171 | }; | ||
2172 | |||
2173 | diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c | ||
2174 | index b4771a6ddbc1..1b9f85abf9bc 100644 | ||
2175 | --- a/arch/x86/events/msr.c | ||
2176 | +++ b/arch/x86/events/msr.c | ||
2177 | @@ -69,14 +69,14 @@ static bool test_intel(int idx) | ||
2178 | case INTEL_FAM6_BROADWELL_GT3E: | ||
2179 | case INTEL_FAM6_BROADWELL_X: | ||
2180 | |||
2181 | - case INTEL_FAM6_ATOM_SILVERMONT1: | ||
2182 | - case INTEL_FAM6_ATOM_SILVERMONT2: | ||
2183 | + case INTEL_FAM6_ATOM_SILVERMONT: | ||
2184 | + case INTEL_FAM6_ATOM_SILVERMONT_X: | ||
2185 | case INTEL_FAM6_ATOM_AIRMONT: | ||
2186 | |||
2187 | case INTEL_FAM6_ATOM_GOLDMONT: | ||
2188 | - case INTEL_FAM6_ATOM_DENVERTON: | ||
2189 | + case INTEL_FAM6_ATOM_GOLDMONT_X: | ||
2190 | |||
2191 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: | ||
2192 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: | ||
2193 | |||
2194 | case INTEL_FAM6_XEON_PHI_KNL: | ||
2195 | case INTEL_FAM6_XEON_PHI_KNM: | ||
2196 | diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h | ||
2197 | index 7b31ee5223fc..69037da75ea0 100644 | ||
2198 | --- a/arch/x86/include/asm/cpufeatures.h | ||
2199 | +++ b/arch/x86/include/asm/cpufeatures.h | ||
2200 | @@ -341,6 +341,7 @@ | ||
2201 | #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ | ||
2202 | #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ | ||
2203 | #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ | ||
2204 | +#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ | ||
2205 | #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ | ||
2206 | #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ | ||
2207 | #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ | ||
2208 | @@ -378,5 +379,7 @@ | ||
2209 | #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ | ||
2210 | #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ | ||
2211 | #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ | ||
2212 | +#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ | ||
2213 | +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ | ||
2214 | |||
2215 | #endif /* _ASM_X86_CPUFEATURES_H */ | ||
2216 | diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h | ||
2217 | index 0ad25cc895ae..058b1a1994c4 100644 | ||
2218 | --- a/arch/x86/include/asm/intel-family.h | ||
2219 | +++ b/arch/x86/include/asm/intel-family.h | ||
2220 | @@ -8,9 +8,6 @@ | ||
2221 | * The "_X" parts are generally the EP and EX Xeons, or the | ||
2222 | * "Extreme" ones, like Broadwell-E. | ||
2223 | * | ||
2224 | - * Things ending in "2" are usually because we have no better | ||
2225 | - * name for them. There's no processor called "SILVERMONT2". | ||
2226 | - * | ||
2227 | * While adding a new CPUID for a new microarchitecture, add a new | ||
2228 | * group to keep logically sorted out in chronological order. Within | ||
2229 | * that group keep the CPUID for the variants sorted by model number. | ||
2230 | @@ -59,19 +56,23 @@ | ||
2231 | |||
2232 | /* "Small Core" Processors (Atom) */ | ||
2233 | |||
2234 | -#define INTEL_FAM6_ATOM_PINEVIEW 0x1C | ||
2235 | -#define INTEL_FAM6_ATOM_LINCROFT 0x26 | ||
2236 | -#define INTEL_FAM6_ATOM_PENWELL 0x27 | ||
2237 | -#define INTEL_FAM6_ATOM_CLOVERVIEW 0x35 | ||
2238 | -#define INTEL_FAM6_ATOM_CEDARVIEW 0x36 | ||
2239 | -#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */ | ||
2240 | -#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ | ||
2241 | -#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ | ||
2242 | -#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ | ||
2243 | -#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */ | ||
2244 | -#define INTEL_FAM6_ATOM_GOLDMONT 0x5C | ||
2245 | -#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ | ||
2246 | -#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A | ||
2247 | +#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ | ||
2248 | +#define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */ | ||
2249 | + | ||
2250 | +#define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */ | ||
2251 | +#define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */ | ||
2252 | +#define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */ | ||
2253 | + | ||
2254 | +#define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */ | ||
2255 | +#define INTEL_FAM6_ATOM_SILVERMONT_X 0x4D /* Avaton, Rangely */ | ||
2256 | +#define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */ | ||
2257 | + | ||
2258 | +#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */ | ||
2259 | +#define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */ | ||
2260 | + | ||
2261 | +#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */ | ||
2262 | +#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */ | ||
2263 | +#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */ | ||
2264 | |||
2265 | /* Xeon Phi */ | ||
2266 | |||
2267 | diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h | ||
2268 | index 15450a675031..c99c66b41e53 100644 | ||
2269 | --- a/arch/x86/include/asm/irqflags.h | ||
2270 | +++ b/arch/x86/include/asm/irqflags.h | ||
2271 | @@ -6,6 +6,8 @@ | ||
2272 | |||
2273 | #ifndef __ASSEMBLY__ | ||
2274 | |||
2275 | +#include <asm/nospec-branch.h> | ||
2276 | + | ||
2277 | /* Provide __cpuidle; we can't safely include <linux/cpu.h> */ | ||
2278 | #define __cpuidle __attribute__((__section__(".cpuidle.text"))) | ||
2279 | |||
2280 | @@ -54,11 +56,13 @@ static inline void native_irq_enable(void) | ||
2281 | |||
2282 | static inline __cpuidle void native_safe_halt(void) | ||
2283 | { | ||
2284 | + mds_idle_clear_cpu_buffers(); | ||
2285 | asm volatile("sti; hlt": : :"memory"); | ||
2286 | } | ||
2287 | |||
2288 | static inline __cpuidle void native_halt(void) | ||
2289 | { | ||
2290 | + mds_idle_clear_cpu_buffers(); | ||
2291 | asm volatile("hlt": : :"memory"); | ||
2292 | } | ||
2293 | |||
2294 | diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h | ||
2295 | index f14ca0be1e3f..f85f43db9225 100644 | ||
2296 | --- a/arch/x86/include/asm/msr-index.h | ||
2297 | +++ b/arch/x86/include/asm/msr-index.h | ||
2298 | @@ -2,6 +2,8 @@ | ||
2299 | #ifndef _ASM_X86_MSR_INDEX_H | ||
2300 | #define _ASM_X86_MSR_INDEX_H | ||
2301 | |||
2302 | +#include <linux/bits.h> | ||
2303 | + | ||
2304 | /* | ||
2305 | * CPU model specific register (MSR) numbers. | ||
2306 | * | ||
2307 | @@ -40,14 +42,14 @@ | ||
2308 | /* Intel MSRs. Some also available on other CPUs */ | ||
2309 | |||
2310 | #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ | ||
2311 | -#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ | ||
2312 | +#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */ | ||
2313 | #define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ | ||
2314 | -#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ | ||
2315 | +#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ | ||
2316 | #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ | ||
2317 | -#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ | ||
2318 | +#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ | ||
2319 | |||
2320 | #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ | ||
2321 | -#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ | ||
2322 | +#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ | ||
2323 | |||
2324 | #define MSR_PPIN_CTL 0x0000004e | ||
2325 | #define MSR_PPIN 0x0000004f | ||
2326 | @@ -69,20 +71,25 @@ | ||
2327 | #define MSR_MTRRcap 0x000000fe | ||
2328 | |||
2329 | #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a | ||
2330 | -#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ | ||
2331 | -#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ | ||
2332 | -#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH (1 << 3) /* Skip L1D flush on vmentry */ | ||
2333 | -#define ARCH_CAP_SSB_NO (1 << 4) /* | ||
2334 | - * Not susceptible to Speculative Store Bypass | ||
2335 | - * attack, so no Speculative Store Bypass | ||
2336 | - * control required. | ||
2337 | - */ | ||
2338 | +#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ | ||
2339 | +#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ | ||
2340 | +#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ | ||
2341 | +#define ARCH_CAP_SSB_NO BIT(4) /* | ||
2342 | + * Not susceptible to Speculative Store Bypass | ||
2343 | + * attack, so no Speculative Store Bypass | ||
2344 | + * control required. | ||
2345 | + */ | ||
2346 | +#define ARCH_CAP_MDS_NO BIT(5) /* | ||
2347 | + * Not susceptible to | ||
2348 | + * Microarchitectural Data | ||
2349 | + * Sampling (MDS) vulnerabilities. | ||
2350 | + */ | ||
2351 | |||
2352 | #define MSR_IA32_FLUSH_CMD 0x0000010b | ||
2353 | -#define L1D_FLUSH (1 << 0) /* | ||
2354 | - * Writeback and invalidate the | ||
2355 | - * L1 data cache. | ||
2356 | - */ | ||
2357 | +#define L1D_FLUSH BIT(0) /* | ||
2358 | + * Writeback and invalidate the | ||
2359 | + * L1 data cache. | ||
2360 | + */ | ||
2361 | |||
2362 | #define MSR_IA32_BBL_CR_CTL 0x00000119 | ||
2363 | #define MSR_IA32_BBL_CR_CTL3 0x0000011e | ||
2364 | diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h | ||
2365 | index 39a2fb29378a..eb0f80ce8524 100644 | ||
2366 | --- a/arch/x86/include/asm/mwait.h | ||
2367 | +++ b/arch/x86/include/asm/mwait.h | ||
2368 | @@ -6,6 +6,7 @@ | ||
2369 | #include <linux/sched/idle.h> | ||
2370 | |||
2371 | #include <asm/cpufeature.h> | ||
2372 | +#include <asm/nospec-branch.h> | ||
2373 | |||
2374 | #define MWAIT_SUBSTATE_MASK 0xf | ||
2375 | #define MWAIT_CSTATE_MASK 0xf | ||
2376 | @@ -40,6 +41,8 @@ static inline void __monitorx(const void *eax, unsigned long ecx, | ||
2377 | |||
2378 | static inline void __mwait(unsigned long eax, unsigned long ecx) | ||
2379 | { | ||
2380 | + mds_idle_clear_cpu_buffers(); | ||
2381 | + | ||
2382 | /* "mwait %eax, %ecx;" */ | ||
2383 | asm volatile(".byte 0x0f, 0x01, 0xc9;" | ||
2384 | :: "a" (eax), "c" (ecx)); | ||
2385 | @@ -74,6 +77,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) | ||
2386 | static inline void __mwaitx(unsigned long eax, unsigned long ebx, | ||
2387 | unsigned long ecx) | ||
2388 | { | ||
2389 | + /* No MDS buffer clear as this is AMD/HYGON only */ | ||
2390 | + | ||
2391 | /* "mwaitx %eax, %ebx, %ecx;" */ | ||
2392 | asm volatile(".byte 0x0f, 0x01, 0xfb;" | ||
2393 | :: "a" (eax), "b" (ebx), "c" (ecx)); | ||
2394 | @@ -81,6 +86,8 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, | ||
2395 | |||
2396 | static inline void __sti_mwait(unsigned long eax, unsigned long ecx) | ||
2397 | { | ||
2398 | + mds_idle_clear_cpu_buffers(); | ||
2399 | + | ||
2400 | trace_hardirqs_on(); | ||
2401 | /* "mwait %eax, %ecx;" */ | ||
2402 | asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" | ||
2403 | diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h | ||
2404 | index 032b6009baab..599c273f5d00 100644 | ||
2405 | --- a/arch/x86/include/asm/nospec-branch.h | ||
2406 | +++ b/arch/x86/include/asm/nospec-branch.h | ||
2407 | @@ -317,6 +317,56 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); | ||
2408 | DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); | ||
2409 | DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); | ||
2410 | |||
2411 | +DECLARE_STATIC_KEY_FALSE(mds_user_clear); | ||
2412 | +DECLARE_STATIC_KEY_FALSE(mds_idle_clear); | ||
2413 | + | ||
2414 | +#include <asm/segment.h> | ||
2415 | + | ||
2416 | +/** | ||
2417 | + * mds_clear_cpu_buffers - Mitigation for MDS vulnerability | ||
2418 | + * | ||
2419 | + * This uses the otherwise unused and obsolete VERW instruction in | ||
2420 | + * combination with microcode which triggers a CPU buffer flush when the | ||
2421 | + * instruction is executed. | ||
2422 | + */ | ||
2423 | +static inline void mds_clear_cpu_buffers(void) | ||
2424 | +{ | ||
2425 | + static const u16 ds = __KERNEL_DS; | ||
2426 | + | ||
2427 | + /* | ||
2428 | + * Has to be the memory-operand variant because only that | ||
2429 | + * guarantees the CPU buffer flush functionality according to | ||
2430 | + * documentation. The register-operand variant does not. | ||
2431 | + * Works with any segment selector, but a valid writable | ||
2432 | + * data segment is the fastest variant. | ||
2433 | + * | ||
2434 | + * "cc" clobber is required because VERW modifies ZF. | ||
2435 | + */ | ||
2436 | + asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); | ||
2437 | +} | ||
2438 | + | ||
2439 | +/** | ||
2440 | + * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability | ||
2441 | + * | ||
2442 | + * Clear CPU buffers if the corresponding static key is enabled | ||
2443 | + */ | ||
2444 | +static inline void mds_user_clear_cpu_buffers(void) | ||
2445 | +{ | ||
2446 | + if (static_branch_likely(&mds_user_clear)) | ||
2447 | + mds_clear_cpu_buffers(); | ||
2448 | +} | ||
2449 | + | ||
2450 | +/** | ||
2451 | + * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability | ||
2452 | + * | ||
2453 | + * Clear CPU buffers if the corresponding static key is enabled | ||
2454 | + */ | ||
2455 | +static inline void mds_idle_clear_cpu_buffers(void) | ||
2456 | +{ | ||
2457 | + if (static_branch_likely(&mds_idle_clear)) | ||
2458 | + mds_clear_cpu_buffers(); | ||
2459 | +} | ||
2460 | + | ||
2461 | #endif /* __ASSEMBLY__ */ | ||
2462 | |||
2463 | /* | ||
2464 | diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h | ||
2465 | index d53c54b842da..b54f25697beb 100644 | ||
2466 | --- a/arch/x86/include/asm/processor.h | ||
2467 | +++ b/arch/x86/include/asm/processor.h | ||
2468 | @@ -997,4 +997,10 @@ enum l1tf_mitigations { | ||
2469 | |||
2470 | extern enum l1tf_mitigations l1tf_mitigation; | ||
2471 | |||
2472 | +enum mds_mitigations { | ||
2473 | + MDS_MITIGATION_OFF, | ||
2474 | + MDS_MITIGATION_FULL, | ||
2475 | + MDS_MITIGATION_VMWERV, | ||
2476 | +}; | ||
2477 | + | ||
2478 | #endif /* _ASM_X86_PROCESSOR_H */ | ||
2479 | diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c | ||
2480 | index e5258bd64200..9b096f26d1c8 100644 | ||
2481 | --- a/arch/x86/kernel/cpu/bugs.c | ||
2482 | +++ b/arch/x86/kernel/cpu/bugs.c | ||
2483 | @@ -35,6 +35,7 @@ | ||
2484 | static void __init spectre_v2_select_mitigation(void); | ||
2485 | static void __init ssb_select_mitigation(void); | ||
2486 | static void __init l1tf_select_mitigation(void); | ||
2487 | +static void __init mds_select_mitigation(void); | ||
2488 | |||
2489 | /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ | ||
2490 | u64 x86_spec_ctrl_base; | ||
2491 | @@ -61,6 +62,13 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); | ||
2492 | /* Control unconditional IBPB in switch_mm() */ | ||
2493 | DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); | ||
2494 | |||
2495 | +/* Control MDS CPU buffer clear before returning to user space */ | ||
2496 | +DEFINE_STATIC_KEY_FALSE(mds_user_clear); | ||
2497 | +EXPORT_SYMBOL_GPL(mds_user_clear); | ||
2498 | +/* Control MDS CPU buffer clear before idling (halt, mwait) */ | ||
2499 | +DEFINE_STATIC_KEY_FALSE(mds_idle_clear); | ||
2500 | +EXPORT_SYMBOL_GPL(mds_idle_clear); | ||
2501 | + | ||
2502 | void __init check_bugs(void) | ||
2503 | { | ||
2504 | identify_boot_cpu(); | ||
2505 | @@ -99,6 +107,10 @@ void __init check_bugs(void) | ||
2506 | |||
2507 | l1tf_select_mitigation(); | ||
2508 | |||
2509 | + mds_select_mitigation(); | ||
2510 | + | ||
2511 | + arch_smt_update(); | ||
2512 | + | ||
2513 | #ifdef CONFIG_X86_32 | ||
2514 | /* | ||
2515 | * Check whether we are able to run this kernel safely on SMP. | ||
2516 | @@ -204,6 +216,61 @@ static void x86_amd_ssb_disable(void) | ||
2517 | wrmsrl(MSR_AMD64_LS_CFG, msrval); | ||
2518 | } | ||
2519 | |||
2520 | +#undef pr_fmt | ||
2521 | +#define pr_fmt(fmt) "MDS: " fmt | ||
2522 | + | ||
2523 | +/* Default mitigation for MDS-affected CPUs */ | ||
2524 | +static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL; | ||
2525 | +static bool mds_nosmt __ro_after_init = false; | ||
2526 | + | ||
2527 | +static const char * const mds_strings[] = { | ||
2528 | + [MDS_MITIGATION_OFF] = "Vulnerable", | ||
2529 | + [MDS_MITIGATION_FULL] = "Mitigation: Clear CPU buffers", | ||
2530 | + [MDS_MITIGATION_VMWERV] = "Vulnerable: Clear CPU buffers attempted, no microcode", | ||
2531 | +}; | ||
2532 | + | ||
2533 | +static void __init mds_select_mitigation(void) | ||
2534 | +{ | ||
2535 | + if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) { | ||
2536 | + mds_mitigation = MDS_MITIGATION_OFF; | ||
2537 | + return; | ||
2538 | + } | ||
2539 | + | ||
2540 | + if (mds_mitigation == MDS_MITIGATION_FULL) { | ||
2541 | + if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) | ||
2542 | + mds_mitigation = MDS_MITIGATION_VMWERV; | ||
2543 | + | ||
2544 | + static_branch_enable(&mds_user_clear); | ||
2545 | + | ||
2546 | + if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && | ||
2547 | + (mds_nosmt || cpu_mitigations_auto_nosmt())) | ||
2548 | + cpu_smt_disable(false); | ||
2549 | + } | ||
2550 | + | ||
2551 | + pr_info("%s\n", mds_strings[mds_mitigation]); | ||
2552 | +} | ||
2553 | + | ||
2554 | +static int __init mds_cmdline(char *str) | ||
2555 | +{ | ||
2556 | + if (!boot_cpu_has_bug(X86_BUG_MDS)) | ||
2557 | + return 0; | ||
2558 | + | ||
2559 | + if (!str) | ||
2560 | + return -EINVAL; | ||
2561 | + | ||
2562 | + if (!strcmp(str, "off")) | ||
2563 | + mds_mitigation = MDS_MITIGATION_OFF; | ||
2564 | + else if (!strcmp(str, "full")) | ||
2565 | + mds_mitigation = MDS_MITIGATION_FULL; | ||
2566 | + else if (!strcmp(str, "full,nosmt")) { | ||
2567 | + mds_mitigation = MDS_MITIGATION_FULL; | ||
2568 | + mds_nosmt = true; | ||
2569 | + } | ||
2570 | + | ||
2571 | + return 0; | ||
2572 | +} | ||
2573 | +early_param("mds", mds_cmdline); | ||
2574 | + | ||
2575 | #undef pr_fmt | ||
2576 | #define pr_fmt(fmt) "Spectre V2 : " fmt | ||
2577 | |||
2578 | @@ -428,7 +495,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) | ||
2579 | char arg[20]; | ||
2580 | int ret, i; | ||
2581 | |||
2582 | - if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) | ||
2583 | + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2") || | ||
2584 | + cpu_mitigations_off()) | ||
2585 | return SPECTRE_V2_CMD_NONE; | ||
2586 | |||
2587 | ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); | ||
2588 | @@ -560,9 +628,6 @@ specv2_set_mode: | ||
2589 | |||
2590 | /* Set up IBPB and STIBP depending on the general spectre V2 command */ | ||
2591 | spectre_v2_user_select_mitigation(cmd); | ||
2592 | - | ||
2593 | - /* Enable STIBP if appropriate */ | ||
2594 | - arch_smt_update(); | ||
2595 | } | ||
2596 | |||
2597 | static void update_stibp_msr(void * __unused) | ||
2598 | @@ -596,6 +661,31 @@ static void update_indir_branch_cond(void) | ||
2599 | static_branch_disable(&switch_to_cond_stibp); | ||
2600 | } | ||
2601 | |||
2602 | +#undef pr_fmt | ||
2603 | +#define pr_fmt(fmt) fmt | ||
2604 | + | ||
2605 | +/* Update the static key controlling the MDS CPU buffer clear in idle */ | ||
2606 | +static void update_mds_branch_idle(void) | ||
2607 | +{ | ||
2608 | + /* | ||
2609 | + * Enable the idle clearing if SMT is active on CPUs which are | ||
2610 | + * affected only by MSBDS and not any other MDS variant. | ||
2611 | + * | ||
2612 | + * The other variants cannot be mitigated when SMT is enabled, so | ||
2613 | + * clearing the buffers on idle just to prevent the Store Buffer | ||
2614 | + * repartitioning leak would be a window dressing exercise. | ||
2615 | + */ | ||
2616 | + if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) | ||
2617 | + return; | ||
2618 | + | ||
2619 | + if (sched_smt_active()) | ||
2620 | + static_branch_enable(&mds_idle_clear); | ||
2621 | + else | ||
2622 | + static_branch_disable(&mds_idle_clear); | ||
2623 | +} | ||
2624 | + | ||
2625 | +#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" | ||
2626 | + | ||
2627 | void arch_smt_update(void) | ||
2628 | { | ||
2629 | /* Enhanced IBRS implies STIBP. No update required. */ | ||
2630 | @@ -616,6 +706,17 @@ void arch_smt_update(void) | ||
2631 | break; | ||
2632 | } | ||
2633 | |||
2634 | + switch (mds_mitigation) { | ||
2635 | + case MDS_MITIGATION_FULL: | ||
2636 | + case MDS_MITIGATION_VMWERV: | ||
2637 | + if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) | ||
2638 | + pr_warn_once(MDS_MSG_SMT); | ||
2639 | + update_mds_branch_idle(); | ||
2640 | + break; | ||
2641 | + case MDS_MITIGATION_OFF: | ||
2642 | + break; | ||
2643 | + } | ||
2644 | + | ||
2645 | mutex_unlock(&spec_ctrl_mutex); | ||
2646 | } | ||
2647 | |||
2648 | @@ -657,7 +758,8 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) | ||
2649 | char arg[20]; | ||
2650 | int ret, i; | ||
2651 | |||
2652 | - if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) { | ||
2653 | + if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable") || | ||
2654 | + cpu_mitigations_off()) { | ||
2655 | return SPEC_STORE_BYPASS_CMD_NONE; | ||
2656 | } else { | ||
2657 | ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable", | ||
2658 | @@ -978,6 +1080,11 @@ static void __init l1tf_select_mitigation(void) | ||
2659 | if (!boot_cpu_has_bug(X86_BUG_L1TF)) | ||
2660 | return; | ||
2661 | |||
2662 | + if (cpu_mitigations_off()) | ||
2663 | + l1tf_mitigation = L1TF_MITIGATION_OFF; | ||
2664 | + else if (cpu_mitigations_auto_nosmt()) | ||
2665 | + l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT; | ||
2666 | + | ||
2667 | override_cache_bits(&boot_cpu_data); | ||
2668 | |||
2669 | switch (l1tf_mitigation) { | ||
2670 | @@ -1006,7 +1113,7 @@ static void __init l1tf_select_mitigation(void) | ||
2671 | pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n", | ||
2672 | half_pa); | ||
2673 | pr_info("However, doing so will make a part of your RAM unusable.\n"); | ||
2674 | - pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html might help you decide.\n"); | ||
2675 | + pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html might help you decide.\n"); | ||
2676 | return; | ||
2677 | } | ||
2678 | |||
2679 | @@ -1039,6 +1146,7 @@ static int __init l1tf_cmdline(char *str) | ||
2680 | early_param("l1tf", l1tf_cmdline); | ||
2681 | |||
2682 | #undef pr_fmt | ||
2683 | +#define pr_fmt(fmt) fmt | ||
2684 | |||
2685 | #ifdef CONFIG_SYSFS | ||
2686 | |||
2687 | @@ -1077,6 +1185,23 @@ static ssize_t l1tf_show_state(char *buf) | ||
2688 | } | ||
2689 | #endif | ||
2690 | |||
2691 | +static ssize_t mds_show_state(char *buf) | ||
2692 | +{ | ||
2693 | + if (!hypervisor_is_type(X86_HYPER_NATIVE)) { | ||
2694 | + return sprintf(buf, "%s; SMT Host state unknown\n", | ||
2695 | + mds_strings[mds_mitigation]); | ||
2696 | + } | ||
2697 | + | ||
2698 | + if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) { | ||
2699 | + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], | ||
2700 | + (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" : | ||
2701 | + sched_smt_active() ? "mitigated" : "disabled")); | ||
2702 | + } | ||
2703 | + | ||
2704 | + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], | ||
2705 | + sched_smt_active() ? "vulnerable" : "disabled"); | ||
2706 | +} | ||
2707 | + | ||
2708 | static char *stibp_state(void) | ||
2709 | { | ||
2710 | if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) | ||
2711 | @@ -1141,6 +1266,10 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr | ||
2712 | if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV)) | ||
2713 | return l1tf_show_state(buf); | ||
2714 | break; | ||
2715 | + | ||
2716 | + case X86_BUG_MDS: | ||
2717 | + return mds_show_state(buf); | ||
2718 | + | ||
2719 | default: | ||
2720 | break; | ||
2721 | } | ||
2722 | @@ -1172,4 +1301,9 @@ ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *b | ||
2723 | { | ||
2724 | return cpu_show_common(dev, attr, buf, X86_BUG_L1TF); | ||
2725 | } | ||
2726 | + | ||
2727 | +ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf) | ||
2728 | +{ | ||
2729 | + return cpu_show_common(dev, attr, buf, X86_BUG_MDS); | ||
2730 | +} | ||
2731 | #endif | ||
2732 | diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c | ||
2733 | index 44c4ef3d989b..1073118b9bf0 100644 | ||
2734 | --- a/arch/x86/kernel/cpu/common.c | ||
2735 | +++ b/arch/x86/kernel/cpu/common.c | ||
2736 | @@ -948,60 +948,73 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) | ||
2737 | #endif | ||
2738 | } | ||
2739 | |||
2740 | -static const __initconst struct x86_cpu_id cpu_no_speculation[] = { | ||
2741 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, | ||
2742 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, | ||
2743 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, | ||
2744 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY }, | ||
2745 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY }, | ||
2746 | - { X86_VENDOR_CENTAUR, 5 }, | ||
2747 | - { X86_VENDOR_INTEL, 5 }, | ||
2748 | - { X86_VENDOR_NSC, 5 }, | ||
2749 | - { X86_VENDOR_ANY, 4 }, | ||
2750 | +#define NO_SPECULATION BIT(0) | ||
2751 | +#define NO_MELTDOWN BIT(1) | ||
2752 | +#define NO_SSB BIT(2) | ||
2753 | +#define NO_L1TF BIT(3) | ||
2754 | +#define NO_MDS BIT(4) | ||
2755 | +#define MSBDS_ONLY BIT(5) | ||
2756 | + | ||
2757 | +#define VULNWL(_vendor, _family, _model, _whitelist) \ | ||
2758 | + { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } | ||
2759 | + | ||
2760 | +#define VULNWL_INTEL(model, whitelist) \ | ||
2761 | + VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist) | ||
2762 | + | ||
2763 | +#define VULNWL_AMD(family, whitelist) \ | ||
2764 | + VULNWL(AMD, family, X86_MODEL_ANY, whitelist) | ||
2765 | + | ||
2766 | +static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { | ||
2767 | + VULNWL(ANY, 4, X86_MODEL_ANY, NO_SPECULATION), | ||
2768 | + VULNWL(CENTAUR, 5, X86_MODEL_ANY, NO_SPECULATION), | ||
2769 | + VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION), | ||
2770 | + VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), | ||
2771 | + | ||
2772 | + /* Intel Family 6 */ | ||
2773 | + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION), | ||
2774 | + VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION), | ||
2775 | + VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION), | ||
2776 | + VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), | ||
2777 | + VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), | ||
2778 | + | ||
2779 | + VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), | ||
2780 | + VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY), | ||
2781 | + VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY), | ||
2782 | + VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), | ||
2783 | + VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY), | ||
2784 | + VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY), | ||
2785 | + | ||
2786 | + VULNWL_INTEL(CORE_YONAH, NO_SSB), | ||
2787 | + | ||
2788 | + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY), | ||
2789 | + | ||
2790 | + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF), | ||
2791 | + VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF), | ||
2792 | + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF), | ||
2793 | + | ||
2794 | + /* AMD Family 0xf - 0x12 */ | ||
2795 | + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), | ||
2796 | + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), | ||
2797 | + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), | ||
2798 | + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), | ||
2799 | + | ||
2800 | + /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ | ||
2801 | + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS), | ||
2802 | {} | ||
2803 | }; | ||
2804 | |||
2805 | -static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { | ||
2806 | - { X86_VENDOR_AMD }, | ||
2807 | - {} | ||
2808 | -}; | ||
2809 | - | ||
2810 | -/* Only list CPUs which speculate but are non susceptible to SSB */ | ||
2811 | -static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { | ||
2812 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, | ||
2813 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, | ||
2814 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, | ||
2815 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD }, | ||
2816 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, | ||
2817 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, | ||
2818 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, | ||
2819 | - { X86_VENDOR_AMD, 0x12, }, | ||
2820 | - { X86_VENDOR_AMD, 0x11, }, | ||
2821 | - { X86_VENDOR_AMD, 0x10, }, | ||
2822 | - { X86_VENDOR_AMD, 0xf, }, | ||
2823 | - {} | ||
2824 | -}; | ||
2825 | +static bool __init cpu_matches(unsigned long which) | ||
2826 | +{ | ||
2827 | + const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist); | ||
2828 | |||
2829 | -static const __initconst struct x86_cpu_id cpu_no_l1tf[] = { | ||
2830 | - /* in addition to cpu_no_speculation */ | ||
2831 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, | ||
2832 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, | ||
2833 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, | ||
2834 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD }, | ||
2835 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MOOREFIELD }, | ||
2836 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT }, | ||
2837 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON }, | ||
2838 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GEMINI_LAKE }, | ||
2839 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, | ||
2840 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, | ||
2841 | - {} | ||
2842 | -}; | ||
2843 | + return m && !!(m->driver_data & which); | ||
2844 | +} | ||
2845 | |||
2846 | static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) | ||
2847 | { | ||
2848 | u64 ia32_cap = 0; | ||
2849 | |||
2850 | - if (x86_match_cpu(cpu_no_speculation)) | ||
2851 | + if (cpu_matches(NO_SPECULATION)) | ||
2852 | return; | ||
2853 | |||
2854 | setup_force_cpu_bug(X86_BUG_SPECTRE_V1); | ||
2855 | @@ -1010,15 +1023,20 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) | ||
2856 | if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) | ||
2857 | rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); | ||
2858 | |||
2859 | - if (!x86_match_cpu(cpu_no_spec_store_bypass) && | ||
2860 | - !(ia32_cap & ARCH_CAP_SSB_NO) && | ||
2861 | + if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && | ||
2862 | !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) | ||
2863 | setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); | ||
2864 | |||
2865 | if (ia32_cap & ARCH_CAP_IBRS_ALL) | ||
2866 | setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); | ||
2867 | |||
2868 | - if (x86_match_cpu(cpu_no_meltdown)) | ||
2869 | + if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) { | ||
2870 | + setup_force_cpu_bug(X86_BUG_MDS); | ||
2871 | + if (cpu_matches(MSBDS_ONLY)) | ||
2872 | + setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); | ||
2873 | + } | ||
2874 | + | ||
2875 | + if (cpu_matches(NO_MELTDOWN)) | ||
2876 | return; | ||
2877 | |||
2878 | /* Rogue Data Cache Load? No! */ | ||
2879 | @@ -1027,7 +1045,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) | ||
2880 | |||
2881 | setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); | ||
2882 | |||
2883 | - if (x86_match_cpu(cpu_no_l1tf)) | ||
2884 | + if (cpu_matches(NO_L1TF)) | ||
2885 | return; | ||
2886 | |||
2887 | setup_force_cpu_bug(X86_BUG_L1TF); | ||
2888 | diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c | ||
2889 | index f8c260d522ca..912d53939f4f 100644 | ||
2890 | --- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c | ||
2891 | +++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c | ||
2892 | @@ -91,7 +91,7 @@ static u64 get_prefetch_disable_bits(void) | ||
2893 | */ | ||
2894 | return 0xF; | ||
2895 | case INTEL_FAM6_ATOM_GOLDMONT: | ||
2896 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: | ||
2897 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: | ||
2898 | /* | ||
2899 | * SDM defines bits of MSR_MISC_FEATURE_CONTROL register | ||
2900 | * as: | ||
2901 | @@ -995,7 +995,7 @@ static int measure_cycles_perf_fn(void *_plr) | ||
2902 | |||
2903 | switch (boot_cpu_data.x86_model) { | ||
2904 | case INTEL_FAM6_ATOM_GOLDMONT: | ||
2905 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: | ||
2906 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: | ||
2907 | l2_hit_bits = (0x52ULL << 16) | (0x2 << 8) | 0xd1; | ||
2908 | l2_miss_bits = (0x52ULL << 16) | (0x10 << 8) | 0xd1; | ||
2909 | break; | ||
2910 | diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c | ||
2911 | index 18bc9b51ac9b..086cf1d1d71d 100644 | ||
2912 | --- a/arch/x86/kernel/nmi.c | ||
2913 | +++ b/arch/x86/kernel/nmi.c | ||
2914 | @@ -34,6 +34,7 @@ | ||
2915 | #include <asm/x86_init.h> | ||
2916 | #include <asm/reboot.h> | ||
2917 | #include <asm/cache.h> | ||
2918 | +#include <asm/nospec-branch.h> | ||
2919 | |||
2920 | #define CREATE_TRACE_POINTS | ||
2921 | #include <trace/events/nmi.h> | ||
2922 | @@ -533,6 +534,9 @@ nmi_restart: | ||
2923 | write_cr2(this_cpu_read(nmi_cr2)); | ||
2924 | if (this_cpu_dec_return(nmi_state)) | ||
2925 | goto nmi_restart; | ||
2926 | + | ||
2927 | + if (user_mode(regs)) | ||
2928 | + mds_user_clear_cpu_buffers(); | ||
2929 | } | ||
2930 | NOKPROBE_SYMBOL(do_nmi); | ||
2931 | |||
2932 | diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c | ||
2933 | index e6db475164ed..0a5efd764914 100644 | ||
2934 | --- a/arch/x86/kernel/traps.c | ||
2935 | +++ b/arch/x86/kernel/traps.c | ||
2936 | @@ -58,6 +58,7 @@ | ||
2937 | #include <asm/alternative.h> | ||
2938 | #include <asm/fpu/xstate.h> | ||
2939 | #include <asm/trace/mpx.h> | ||
2940 | +#include <asm/nospec-branch.h> | ||
2941 | #include <asm/mpx.h> | ||
2942 | #include <asm/vm86.h> | ||
2943 | #include <asm/umip.h> | ||
2944 | @@ -387,6 +388,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | ||
2945 | regs->ip = (unsigned long)general_protection; | ||
2946 | regs->sp = (unsigned long)&gpregs->orig_ax; | ||
2947 | |||
2948 | + /* | ||
2949 | + * This situation can be triggered by userspace via | ||
2950 | + * modify_ldt(2) and the return does not take the regular | ||
2951 | + * user space exit, so a CPU buffer clear is required when | ||
2952 | + * MDS mitigation is enabled. | ||
2953 | + */ | ||
2954 | + mds_user_clear_cpu_buffers(); | ||
2955 | return; | ||
2956 | } | ||
2957 | #endif | ||
2958 | diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c | ||
2959 | index 6d5dc5dabfd7..03b7529333a6 100644 | ||
2960 | --- a/arch/x86/kernel/tsc.c | ||
2961 | +++ b/arch/x86/kernel/tsc.c | ||
2962 | @@ -636,7 +636,7 @@ unsigned long native_calibrate_tsc(void) | ||
2963 | case INTEL_FAM6_KABYLAKE_DESKTOP: | ||
2964 | crystal_khz = 24000; /* 24.0 MHz */ | ||
2965 | break; | ||
2966 | - case INTEL_FAM6_ATOM_DENVERTON: | ||
2967 | + case INTEL_FAM6_ATOM_GOLDMONT_X: | ||
2968 | crystal_khz = 25000; /* 25.0 MHz */ | ||
2969 | break; | ||
2970 | case INTEL_FAM6_ATOM_GOLDMONT: | ||
2971 | diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c | ||
2972 | index 27ef714d886c..3d0e9aeea7c8 100644 | ||
2973 | --- a/arch/x86/kernel/tsc_msr.c | ||
2974 | +++ b/arch/x86/kernel/tsc_msr.c | ||
2975 | @@ -59,12 +59,12 @@ static const struct freq_desc freq_desc_ann = { | ||
2976 | }; | ||
2977 | |||
2978 | static const struct x86_cpu_id tsc_msr_cpu_ids[] = { | ||
2979 | - INTEL_CPU_FAM6(ATOM_PENWELL, freq_desc_pnw), | ||
2980 | - INTEL_CPU_FAM6(ATOM_CLOVERVIEW, freq_desc_clv), | ||
2981 | - INTEL_CPU_FAM6(ATOM_SILVERMONT1, freq_desc_byt), | ||
2982 | + INTEL_CPU_FAM6(ATOM_SALTWELL_MID, freq_desc_pnw), | ||
2983 | + INTEL_CPU_FAM6(ATOM_SALTWELL_TABLET, freq_desc_clv), | ||
2984 | + INTEL_CPU_FAM6(ATOM_SILVERMONT, freq_desc_byt), | ||
2985 | + INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, freq_desc_tng), | ||
2986 | INTEL_CPU_FAM6(ATOM_AIRMONT, freq_desc_cht), | ||
2987 | - INTEL_CPU_FAM6(ATOM_MERRIFIELD, freq_desc_tng), | ||
2988 | - INTEL_CPU_FAM6(ATOM_MOOREFIELD, freq_desc_ann), | ||
2989 | + INTEL_CPU_FAM6(ATOM_AIRMONT_MID, freq_desc_ann), | ||
2990 | {} | ||
2991 | }; | ||
2992 | |||
2993 | diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c | ||
2994 | index 98d13c6a64be..b810102a9cfa 100644 | ||
2995 | --- a/arch/x86/kvm/cpuid.c | ||
2996 | +++ b/arch/x86/kvm/cpuid.c | ||
2997 | @@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
2998 | /* cpuid 0x80000008.ebx */ | ||
2999 | const u32 kvm_cpuid_8000_0008_ebx_x86_features = | ||
3000 | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | | ||
3001 | - F(AMD_SSB_NO); | ||
3002 | + F(AMD_SSB_NO) | F(AMD_STIBP); | ||
3003 | |||
3004 | /* cpuid 0xC0000001.edx */ | ||
3005 | const u32 kvm_cpuid_C000_0001_edx_x86_features = | ||
3006 | @@ -412,7 +412,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
3007 | /* cpuid 7.0.edx*/ | ||
3008 | const u32 kvm_cpuid_7_0_edx_x86_features = | ||
3009 | F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | | ||
3010 | - F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES); | ||
3011 | + F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | | ||
3012 | + F(MD_CLEAR); | ||
3013 | |||
3014 | /* all calls to cpuid_count() should be made on the same cpu */ | ||
3015 | get_cpu(); | ||
3016 | diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c | ||
3017 | index 215339c7d161..73d6d585dd66 100644 | ||
3018 | --- a/arch/x86/kvm/vmx.c | ||
3019 | +++ b/arch/x86/kvm/vmx.c | ||
3020 | @@ -10765,8 +10765,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | ||
3021 | evmcs_rsp = static_branch_unlikely(&enable_evmcs) ? | ||
3022 | (unsigned long)¤t_evmcs->host_rsp : 0; | ||
3023 | |||
3024 | + /* L1D Flush includes CPU buffer clear to mitigate MDS */ | ||
3025 | if (static_branch_unlikely(&vmx_l1d_should_flush)) | ||
3026 | vmx_l1d_flush(vcpu); | ||
3027 | + else if (static_branch_unlikely(&mds_user_clear)) | ||
3028 | + mds_clear_cpu_buffers(); | ||
3029 | |||
3030 | asm( | ||
3031 | /* Store host registers */ | ||
3032 | @@ -11127,8 +11130,8 @@ free_vcpu: | ||
3033 | return ERR_PTR(err); | ||
3034 | } | ||
3035 | |||
3036 | -#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" | ||
3037 | -#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" | ||
3038 | +#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" | ||
3039 | +#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" | ||
3040 | |||
3041 | static int vmx_vm_init(struct kvm *kvm) | ||
3042 | { | ||
3043 | diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c | ||
3044 | index c1fc1ae6b429..4df3e5c89d57 100644 | ||
3045 | --- a/arch/x86/mm/pti.c | ||
3046 | +++ b/arch/x86/mm/pti.c | ||
3047 | @@ -35,6 +35,7 @@ | ||
3048 | #include <linux/spinlock.h> | ||
3049 | #include <linux/mm.h> | ||
3050 | #include <linux/uaccess.h> | ||
3051 | +#include <linux/cpu.h> | ||
3052 | |||
3053 | #include <asm/cpufeature.h> | ||
3054 | #include <asm/hypervisor.h> | ||
3055 | @@ -115,7 +116,8 @@ void __init pti_check_boottime_disable(void) | ||
3056 | } | ||
3057 | } | ||
3058 | |||
3059 | - if (cmdline_find_option_bool(boot_command_line, "nopti")) { | ||
3060 | + if (cmdline_find_option_bool(boot_command_line, "nopti") || | ||
3061 | + cpu_mitigations_off()) { | ||
3062 | pti_mode = PTI_FORCE_OFF; | ||
3063 | pti_print_if_insecure("disabled on command line."); | ||
3064 | return; | ||
3065 | diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c | ||
3066 | index 034813d4ab1e..41dae0f0d898 100644 | ||
3067 | --- a/arch/x86/platform/atom/punit_atom_debug.c | ||
3068 | +++ b/arch/x86/platform/atom/punit_atom_debug.c | ||
3069 | @@ -143,8 +143,8 @@ static void punit_dbgfs_unregister(void) | ||
3070 | (kernel_ulong_t)&drv_data } | ||
3071 | |||
3072 | static const struct x86_cpu_id intel_punit_cpu_ids[] = { | ||
3073 | - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, punit_device_byt), | ||
3074 | - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, punit_device_tng), | ||
3075 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT, punit_device_byt), | ||
3076 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, punit_device_tng), | ||
3077 | ICPU(INTEL_FAM6_ATOM_AIRMONT, punit_device_cht), | ||
3078 | {} | ||
3079 | }; | ||
3080 | diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c | ||
3081 | index 5a0483e7bf66..31dce781364c 100644 | ||
3082 | --- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c | ||
3083 | +++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c | ||
3084 | @@ -68,7 +68,7 @@ static struct bt_sfi_data tng_bt_sfi_data __initdata = { | ||
3085 | { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (kernel_ulong_t)&ddata } | ||
3086 | |||
3087 | static const struct x86_cpu_id bt_sfi_cpu_ids[] = { | ||
3088 | - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, tng_bt_sfi_data), | ||
3089 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, tng_bt_sfi_data), | ||
3090 | {} | ||
3091 | }; | ||
3092 | |||
3093 | diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c | ||
3094 | index 969bf8d515c0..c651e206d796 100644 | ||
3095 | --- a/drivers/acpi/acpi_lpss.c | ||
3096 | +++ b/drivers/acpi/acpi_lpss.c | ||
3097 | @@ -292,7 +292,7 @@ static const struct lpss_device_desc bsw_spi_dev_desc = { | ||
3098 | #define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, } | ||
3099 | |||
3100 | static const struct x86_cpu_id lpss_cpu_ids[] = { | ||
3101 | - ICPU(INTEL_FAM6_ATOM_SILVERMONT1), /* Valleyview, Bay Trail */ | ||
3102 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT), /* Valleyview, Bay Trail */ | ||
3103 | ICPU(INTEL_FAM6_ATOM_AIRMONT), /* Braswell, Cherry Trail */ | ||
3104 | {} | ||
3105 | }; | ||
3106 | diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c | ||
3107 | index 06c31ec3cc70..9a8e286dd86f 100644 | ||
3108 | --- a/drivers/acpi/x86/utils.c | ||
3109 | +++ b/drivers/acpi/x86/utils.c | ||
3110 | @@ -54,7 +54,7 @@ static const struct always_present_id always_present_ids[] = { | ||
3111 | * Bay / Cherry Trail PWM directly poked by GPU driver in win10, | ||
3112 | * but Linux uses a separate PWM driver, harmless if not used. | ||
3113 | */ | ||
3114 | - ENTRY("80860F09", "1", ICPU(INTEL_FAM6_ATOM_SILVERMONT1), {}), | ||
3115 | + ENTRY("80860F09", "1", ICPU(INTEL_FAM6_ATOM_SILVERMONT), {}), | ||
3116 | ENTRY("80862288", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT), {}), | ||
3117 | /* | ||
3118 | * The INT0002 device is necessary to clear wakeup interrupt sources | ||
3119 | diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c | ||
3120 | index eb9443d5bae1..2fd6ca1021c2 100644 | ||
3121 | --- a/drivers/base/cpu.c | ||
3122 | +++ b/drivers/base/cpu.c | ||
3123 | @@ -546,11 +546,18 @@ ssize_t __weak cpu_show_l1tf(struct device *dev, | ||
3124 | return sprintf(buf, "Not affected\n"); | ||
3125 | } | ||
3126 | |||
3127 | +ssize_t __weak cpu_show_mds(struct device *dev, | ||
3128 | + struct device_attribute *attr, char *buf) | ||
3129 | +{ | ||
3130 | + return sprintf(buf, "Not affected\n"); | ||
3131 | +} | ||
3132 | + | ||
3133 | static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); | ||
3134 | static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); | ||
3135 | static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); | ||
3136 | static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); | ||
3137 | static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); | ||
3138 | +static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); | ||
3139 | |||
3140 | static struct attribute *cpu_root_vulnerabilities_attrs[] = { | ||
3141 | &dev_attr_meltdown.attr, | ||
3142 | @@ -558,6 +565,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { | ||
3143 | &dev_attr_spectre_v2.attr, | ||
3144 | &dev_attr_spec_store_bypass.attr, | ||
3145 | &dev_attr_l1tf.attr, | ||
3146 | + &dev_attr_mds.attr, | ||
3147 | NULL | ||
3148 | }; | ||
3149 | |||
3150 | diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c | ||
3151 | index a005711f909e..29f25d5d65e0 100644 | ||
3152 | --- a/drivers/cpufreq/intel_pstate.c | ||
3153 | +++ b/drivers/cpufreq/intel_pstate.c | ||
3154 | @@ -1779,7 +1779,7 @@ static const struct pstate_funcs knl_funcs = { | ||
3155 | static const struct x86_cpu_id intel_pstate_cpu_ids[] = { | ||
3156 | ICPU(INTEL_FAM6_SANDYBRIDGE, core_funcs), | ||
3157 | ICPU(INTEL_FAM6_SANDYBRIDGE_X, core_funcs), | ||
3158 | - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, silvermont_funcs), | ||
3159 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT, silvermont_funcs), | ||
3160 | ICPU(INTEL_FAM6_IVYBRIDGE, core_funcs), | ||
3161 | ICPU(INTEL_FAM6_HASWELL_CORE, core_funcs), | ||
3162 | ICPU(INTEL_FAM6_BROADWELL_CORE, core_funcs), | ||
3163 | @@ -1796,7 +1796,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { | ||
3164 | ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_funcs), | ||
3165 | ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_funcs), | ||
3166 | ICPU(INTEL_FAM6_ATOM_GOLDMONT, core_funcs), | ||
3167 | - ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, core_funcs), | ||
3168 | + ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, core_funcs), | ||
3169 | ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs), | ||
3170 | {} | ||
3171 | }; | ||
3172 | diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c | ||
3173 | index df28b65358d2..903a4f1fadcc 100644 | ||
3174 | --- a/drivers/edac/pnd2_edac.c | ||
3175 | +++ b/drivers/edac/pnd2_edac.c | ||
3176 | @@ -1541,7 +1541,7 @@ static struct dunit_ops dnv_ops = { | ||
3177 | |||
3178 | static const struct x86_cpu_id pnd2_cpuids[] = { | ||
3179 | { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT, 0, (kernel_ulong_t)&apl_ops }, | ||
3180 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON, 0, (kernel_ulong_t)&dnv_ops }, | ||
3181 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_X, 0, (kernel_ulong_t)&dnv_ops }, | ||
3182 | { } | ||
3183 | }; | ||
3184 | MODULE_DEVICE_TABLE(x86cpu, pnd2_cpuids); | ||
3185 | diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c | ||
3186 | index b2ccce5fb071..c4bb67ed8da3 100644 | ||
3187 | --- a/drivers/idle/intel_idle.c | ||
3188 | +++ b/drivers/idle/intel_idle.c | ||
3189 | @@ -1076,14 +1076,14 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { | ||
3190 | ICPU(INTEL_FAM6_WESTMERE, idle_cpu_nehalem), | ||
3191 | ICPU(INTEL_FAM6_WESTMERE_EP, idle_cpu_nehalem), | ||
3192 | ICPU(INTEL_FAM6_NEHALEM_EX, idle_cpu_nehalem), | ||
3193 | - ICPU(INTEL_FAM6_ATOM_PINEVIEW, idle_cpu_atom), | ||
3194 | - ICPU(INTEL_FAM6_ATOM_LINCROFT, idle_cpu_lincroft), | ||
3195 | + ICPU(INTEL_FAM6_ATOM_BONNELL, idle_cpu_atom), | ||
3196 | + ICPU(INTEL_FAM6_ATOM_BONNELL_MID, idle_cpu_lincroft), | ||
3197 | ICPU(INTEL_FAM6_WESTMERE_EX, idle_cpu_nehalem), | ||
3198 | ICPU(INTEL_FAM6_SANDYBRIDGE, idle_cpu_snb), | ||
3199 | ICPU(INTEL_FAM6_SANDYBRIDGE_X, idle_cpu_snb), | ||
3200 | - ICPU(INTEL_FAM6_ATOM_CEDARVIEW, idle_cpu_atom), | ||
3201 | - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, idle_cpu_byt), | ||
3202 | - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, idle_cpu_tangier), | ||
3203 | + ICPU(INTEL_FAM6_ATOM_SALTWELL, idle_cpu_atom), | ||
3204 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT, idle_cpu_byt), | ||
3205 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, idle_cpu_tangier), | ||
3206 | ICPU(INTEL_FAM6_ATOM_AIRMONT, idle_cpu_cht), | ||
3207 | ICPU(INTEL_FAM6_IVYBRIDGE, idle_cpu_ivb), | ||
3208 | ICPU(INTEL_FAM6_IVYBRIDGE_X, idle_cpu_ivt), | ||
3209 | @@ -1091,7 +1091,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { | ||
3210 | ICPU(INTEL_FAM6_HASWELL_X, idle_cpu_hsw), | ||
3211 | ICPU(INTEL_FAM6_HASWELL_ULT, idle_cpu_hsw), | ||
3212 | ICPU(INTEL_FAM6_HASWELL_GT3E, idle_cpu_hsw), | ||
3213 | - ICPU(INTEL_FAM6_ATOM_SILVERMONT2, idle_cpu_avn), | ||
3214 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT_X, idle_cpu_avn), | ||
3215 | ICPU(INTEL_FAM6_BROADWELL_CORE, idle_cpu_bdw), | ||
3216 | ICPU(INTEL_FAM6_BROADWELL_GT3E, idle_cpu_bdw), | ||
3217 | ICPU(INTEL_FAM6_BROADWELL_X, idle_cpu_bdw), | ||
3218 | @@ -1104,8 +1104,8 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { | ||
3219 | ICPU(INTEL_FAM6_XEON_PHI_KNL, idle_cpu_knl), | ||
3220 | ICPU(INTEL_FAM6_XEON_PHI_KNM, idle_cpu_knl), | ||
3221 | ICPU(INTEL_FAM6_ATOM_GOLDMONT, idle_cpu_bxt), | ||
3222 | - ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, idle_cpu_bxt), | ||
3223 | - ICPU(INTEL_FAM6_ATOM_DENVERTON, idle_cpu_dnv), | ||
3224 | + ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, idle_cpu_bxt), | ||
3225 | + ICPU(INTEL_FAM6_ATOM_GOLDMONT_X, idle_cpu_dnv), | ||
3226 | {} | ||
3227 | }; | ||
3228 | |||
3229 | @@ -1322,7 +1322,7 @@ static void intel_idle_state_table_update(void) | ||
3230 | ivt_idle_state_table_update(); | ||
3231 | break; | ||
3232 | case INTEL_FAM6_ATOM_GOLDMONT: | ||
3233 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: | ||
3234 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: | ||
3235 | bxt_idle_state_table_update(); | ||
3236 | break; | ||
3237 | case INTEL_FAM6_SKYLAKE_DESKTOP: | ||
3238 | diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c | ||
3239 | index c61109f7b793..57c1ec322e42 100644 | ||
3240 | --- a/drivers/mmc/host/sdhci-acpi.c | ||
3241 | +++ b/drivers/mmc/host/sdhci-acpi.c | ||
3242 | @@ -247,7 +247,7 @@ static const struct sdhci_acpi_chip sdhci_acpi_chip_int = { | ||
3243 | static bool sdhci_acpi_byt(void) | ||
3244 | { | ||
3245 | static const struct x86_cpu_id byt[] = { | ||
3246 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, | ||
3247 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, | ||
3248 | {} | ||
3249 | }; | ||
3250 | |||
3251 | diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c | ||
3252 | index 314e135014dc..30fbe2ea6eab 100644 | ||
3253 | --- a/drivers/pci/pci-mid.c | ||
3254 | +++ b/drivers/pci/pci-mid.c | ||
3255 | @@ -62,8 +62,8 @@ static const struct pci_platform_pm_ops mid_pci_platform_pm = { | ||
3256 | * arch/x86/platform/intel-mid/pwr.c. | ||
3257 | */ | ||
3258 | static const struct x86_cpu_id lpss_cpu_ids[] = { | ||
3259 | - ICPU(INTEL_FAM6_ATOM_PENWELL), | ||
3260 | - ICPU(INTEL_FAM6_ATOM_MERRIFIELD), | ||
3261 | + ICPU(INTEL_FAM6_ATOM_SALTWELL_MID), | ||
3262 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID), | ||
3263 | {} | ||
3264 | }; | ||
3265 | |||
3266 | diff --git a/drivers/platform/x86/intel_int0002_vgpio.c b/drivers/platform/x86/intel_int0002_vgpio.c | ||
3267 | index a473dc51b18d..e89ad4964dc1 100644 | ||
3268 | --- a/drivers/platform/x86/intel_int0002_vgpio.c | ||
3269 | +++ b/drivers/platform/x86/intel_int0002_vgpio.c | ||
3270 | @@ -60,7 +60,7 @@ static const struct x86_cpu_id int0002_cpu_ids[] = { | ||
3271 | /* | ||
3272 | * Limit ourselves to Cherry Trail for now, until testing shows we | ||
3273 | * need to handle the INT0002 device on Baytrail too. | ||
3274 | - * ICPU(INTEL_FAM6_ATOM_SILVERMONT1), * Valleyview, Bay Trail * | ||
3275 | + * ICPU(INTEL_FAM6_ATOM_SILVERMONT), * Valleyview, Bay Trail * | ||
3276 | */ | ||
3277 | ICPU(INTEL_FAM6_ATOM_AIRMONT), /* Braswell, Cherry Trail */ | ||
3278 | {} | ||
3279 | diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c | ||
3280 | index d79fbf924b13..5ad44204a9c3 100644 | ||
3281 | --- a/drivers/platform/x86/intel_mid_powerbtn.c | ||
3282 | +++ b/drivers/platform/x86/intel_mid_powerbtn.c | ||
3283 | @@ -125,8 +125,8 @@ static const struct mid_pb_ddata mrfld_ddata = { | ||
3284 | { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (kernel_ulong_t)&ddata } | ||
3285 | |||
3286 | static const struct x86_cpu_id mid_pb_cpu_ids[] = { | ||
3287 | - ICPU(INTEL_FAM6_ATOM_PENWELL, mfld_ddata), | ||
3288 | - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, mrfld_ddata), | ||
3289 | + ICPU(INTEL_FAM6_ATOM_SALTWELL_MID, mfld_ddata), | ||
3290 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, mrfld_ddata), | ||
3291 | {} | ||
3292 | }; | ||
3293 | |||
3294 | diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c | ||
3295 | index 1423fa8710fd..b998d7da97fb 100644 | ||
3296 | --- a/drivers/platform/x86/intel_telemetry_debugfs.c | ||
3297 | +++ b/drivers/platform/x86/intel_telemetry_debugfs.c | ||
3298 | @@ -320,7 +320,7 @@ static struct telemetry_debugfs_conf telem_apl_debugfs_conf = { | ||
3299 | |||
3300 | static const struct x86_cpu_id telemetry_debugfs_cpu_ids[] = { | ||
3301 | TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GOLDMONT, telem_apl_debugfs_conf), | ||
3302 | - TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE, telem_apl_debugfs_conf), | ||
3303 | + TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, telem_apl_debugfs_conf), | ||
3304 | {} | ||
3305 | }; | ||
3306 | |||
3307 | diff --git a/drivers/platform/x86/intel_telemetry_pltdrv.c b/drivers/platform/x86/intel_telemetry_pltdrv.c | ||
3308 | index 2f889d6c270e..fcc6bee51a42 100644 | ||
3309 | --- a/drivers/platform/x86/intel_telemetry_pltdrv.c | ||
3310 | +++ b/drivers/platform/x86/intel_telemetry_pltdrv.c | ||
3311 | @@ -192,7 +192,7 @@ static struct telemetry_plt_config telem_glk_config = { | ||
3312 | |||
3313 | static const struct x86_cpu_id telemetry_cpu_ids[] = { | ||
3314 | TELEM_CPU(INTEL_FAM6_ATOM_GOLDMONT, telem_apl_config), | ||
3315 | - TELEM_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE, telem_glk_config), | ||
3316 | + TELEM_CPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, telem_glk_config), | ||
3317 | {} | ||
3318 | }; | ||
3319 | |||
3320 | diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c | ||
3321 | index 295d8dcba48c..8cbfcce57a06 100644 | ||
3322 | --- a/drivers/powercap/intel_rapl.c | ||
3323 | +++ b/drivers/powercap/intel_rapl.c | ||
3324 | @@ -1164,13 +1164,13 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { | ||
3325 | RAPL_CPU(INTEL_FAM6_KABYLAKE_DESKTOP, rapl_defaults_core), | ||
3326 | RAPL_CPU(INTEL_FAM6_CANNONLAKE_MOBILE, rapl_defaults_core), | ||
3327 | |||
3328 | - RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT1, rapl_defaults_byt), | ||
3329 | + RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT, rapl_defaults_byt), | ||
3330 | RAPL_CPU(INTEL_FAM6_ATOM_AIRMONT, rapl_defaults_cht), | ||
3331 | - RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD, rapl_defaults_tng), | ||
3332 | - RAPL_CPU(INTEL_FAM6_ATOM_MOOREFIELD, rapl_defaults_ann), | ||
3333 | + RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT_MID, rapl_defaults_tng), | ||
3334 | + RAPL_CPU(INTEL_FAM6_ATOM_AIRMONT_MID, rapl_defaults_ann), | ||
3335 | RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT, rapl_defaults_core), | ||
3336 | - RAPL_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE, rapl_defaults_core), | ||
3337 | - RAPL_CPU(INTEL_FAM6_ATOM_DENVERTON, rapl_defaults_core), | ||
3338 | + RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, rapl_defaults_core), | ||
3339 | + RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT_X, rapl_defaults_core), | ||
3340 | |||
3341 | RAPL_CPU(INTEL_FAM6_XEON_PHI_KNL, rapl_defaults_hsw_server), | ||
3342 | RAPL_CPU(INTEL_FAM6_XEON_PHI_KNM, rapl_defaults_hsw_server), | ||
3343 | diff --git a/drivers/thermal/intel_soc_dts_thermal.c b/drivers/thermal/intel_soc_dts_thermal.c | ||
3344 | index 1e47511a6bd5..d748527d7a38 100644 | ||
3345 | --- a/drivers/thermal/intel_soc_dts_thermal.c | ||
3346 | +++ b/drivers/thermal/intel_soc_dts_thermal.c | ||
3347 | @@ -45,7 +45,7 @@ static irqreturn_t soc_irq_thread_fn(int irq, void *dev_data) | ||
3348 | } | ||
3349 | |||
3350 | static const struct x86_cpu_id soc_thermal_ids[] = { | ||
3351 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1, 0, | ||
3352 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT, 0, | ||
3353 | BYT_SOC_DTS_APIC_IRQ}, | ||
3354 | {} | ||
3355 | }; | ||
3356 | diff --git a/include/linux/cpu.h b/include/linux/cpu.h | ||
3357 | index 5041357d0297..57ae83c4d5f4 100644 | ||
3358 | --- a/include/linux/cpu.h | ||
3359 | +++ b/include/linux/cpu.h | ||
3360 | @@ -57,6 +57,8 @@ extern ssize_t cpu_show_spec_store_bypass(struct device *dev, | ||
3361 | struct device_attribute *attr, char *buf); | ||
3362 | extern ssize_t cpu_show_l1tf(struct device *dev, | ||
3363 | struct device_attribute *attr, char *buf); | ||
3364 | +extern ssize_t cpu_show_mds(struct device *dev, | ||
3365 | + struct device_attribute *attr, char *buf); | ||
3366 | |||
3367 | extern __printf(4, 5) | ||
3368 | struct device *cpu_device_create(struct device *parent, void *drvdata, | ||
3369 | @@ -187,4 +189,28 @@ static inline void cpu_smt_disable(bool force) { } | ||
3370 | static inline void cpu_smt_check_topology(void) { } | ||
3371 | #endif | ||
3372 | |||
3373 | +/* | ||
3374 | + * These are used for a global "mitigations=" cmdline option for toggling | ||
3375 | + * optional CPU mitigations. | ||
3376 | + */ | ||
3377 | +enum cpu_mitigations { | ||
3378 | + CPU_MITIGATIONS_OFF, | ||
3379 | + CPU_MITIGATIONS_AUTO, | ||
3380 | + CPU_MITIGATIONS_AUTO_NOSMT, | ||
3381 | +}; | ||
3382 | + | ||
3383 | +extern enum cpu_mitigations cpu_mitigations; | ||
3384 | + | ||
3385 | +/* mitigations=off */ | ||
3386 | +static inline bool cpu_mitigations_off(void) | ||
3387 | +{ | ||
3388 | + return cpu_mitigations == CPU_MITIGATIONS_OFF; | ||
3389 | +} | ||
3390 | + | ||
3391 | +/* mitigations=auto,nosmt */ | ||
3392 | +static inline bool cpu_mitigations_auto_nosmt(void) | ||
3393 | +{ | ||
3394 | + return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; | ||
3395 | +} | ||
3396 | + | ||
3397 | #endif /* _LINUX_CPU_H_ */ | ||
3398 | diff --git a/kernel/cpu.c b/kernel/cpu.c | ||
3399 | index dc250ec2c096..bc6c880a093f 100644 | ||
3400 | --- a/kernel/cpu.c | ||
3401 | +++ b/kernel/cpu.c | ||
3402 | @@ -2278,3 +2278,18 @@ void __init boot_cpu_hotplug_init(void) | ||
3403 | #endif | ||
3404 | this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); | ||
3405 | } | ||
3406 | + | ||
3407 | +enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO; | ||
3408 | + | ||
3409 | +static int __init mitigations_parse_cmdline(char *arg) | ||
3410 | +{ | ||
3411 | + if (!strcmp(arg, "off")) | ||
3412 | + cpu_mitigations = CPU_MITIGATIONS_OFF; | ||
3413 | + else if (!strcmp(arg, "auto")) | ||
3414 | + cpu_mitigations = CPU_MITIGATIONS_AUTO; | ||
3415 | + else if (!strcmp(arg, "auto,nosmt")) | ||
3416 | + cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT; | ||
3417 | + | ||
3418 | + return 0; | ||
3419 | +} | ||
3420 | +early_param("mitigations", mitigations_parse_cmdline); | ||
3421 | diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c | ||
3422 | index b74bbee111c6..c6c8d20be1d2 100644 | ||
3423 | --- a/sound/soc/intel/boards/bytcr_rt5651.c | ||
3424 | +++ b/sound/soc/intel/boards/bytcr_rt5651.c | ||
3425 | @@ -787,7 +787,7 @@ static struct snd_soc_card byt_rt5651_card = { | ||
3426 | }; | ||
3427 | |||
3428 | static const struct x86_cpu_id baytrail_cpu_ids[] = { | ||
3429 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, /* Valleyview */ | ||
3430 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, /* Valleyview */ | ||
3431 | {} | ||
3432 | }; | ||
3433 | |||
3434 | diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile | ||
3435 | index 2ab25aa38263..ff058bfbca3e 100644 | ||
3436 | --- a/tools/power/x86/turbostat/Makefile | ||
3437 | +++ b/tools/power/x86/turbostat/Makefile | ||
3438 | @@ -9,7 +9,7 @@ ifeq ("$(origin O)", "command line") | ||
3439 | endif | ||
3440 | |||
3441 | turbostat : turbostat.c | ||
3442 | -CFLAGS += -Wall | ||
3443 | +CFLAGS += -Wall -I../../../include | ||
3444 | CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' | ||
3445 | CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"' | ||
3446 | |||
3447 | diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c | ||
3448 | index 83964f796edb..fbb53c952b73 100644 | ||
3449 | --- a/tools/power/x86/turbostat/turbostat.c | ||
3450 | +++ b/tools/power/x86/turbostat/turbostat.c | ||
3451 | @@ -2082,7 +2082,7 @@ int has_turbo_ratio_group_limits(int family, int model) | ||
3452 | switch (model) { | ||
3453 | case INTEL_FAM6_ATOM_GOLDMONT: | ||
3454 | case INTEL_FAM6_SKYLAKE_X: | ||
3455 | - case INTEL_FAM6_ATOM_DENVERTON: | ||
3456 | + case INTEL_FAM6_ATOM_GOLDMONT_X: | ||
3457 | return 1; | ||
3458 | } | ||
3459 | return 0; | ||
3460 | @@ -3149,9 +3149,9 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) | ||
3461 | pkg_cstate_limits = skx_pkg_cstate_limits; | ||
3462 | has_misc_feature_control = 1; | ||
3463 | break; | ||
3464 | - case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */ | ||
3465 | + case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ | ||
3466 | no_MSR_MISC_PWR_MGMT = 1; | ||
3467 | - case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */ | ||
3468 | + case INTEL_FAM6_ATOM_SILVERMONT_X: /* AVN */ | ||
3469 | pkg_cstate_limits = slv_pkg_cstate_limits; | ||
3470 | break; | ||
3471 | case INTEL_FAM6_ATOM_AIRMONT: /* AMT */ | ||
3472 | @@ -3163,8 +3163,8 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) | ||
3473 | pkg_cstate_limits = phi_pkg_cstate_limits; | ||
3474 | break; | ||
3475 | case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ | ||
3476 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: | ||
3477 | - case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ | ||
3478 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: | ||
3479 | + case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */ | ||
3480 | pkg_cstate_limits = bxt_pkg_cstate_limits; | ||
3481 | break; | ||
3482 | default: | ||
3483 | @@ -3193,9 +3193,9 @@ int has_slv_msrs(unsigned int family, unsigned int model) | ||
3484 | return 0; | ||
3485 | |||
3486 | switch (model) { | ||
3487 | - case INTEL_FAM6_ATOM_SILVERMONT1: | ||
3488 | - case INTEL_FAM6_ATOM_MERRIFIELD: | ||
3489 | - case INTEL_FAM6_ATOM_MOOREFIELD: | ||
3490 | + case INTEL_FAM6_ATOM_SILVERMONT: | ||
3491 | + case INTEL_FAM6_ATOM_SILVERMONT_MID: | ||
3492 | + case INTEL_FAM6_ATOM_AIRMONT_MID: | ||
3493 | return 1; | ||
3494 | } | ||
3495 | return 0; | ||
3496 | @@ -3207,7 +3207,7 @@ int is_dnv(unsigned int family, unsigned int model) | ||
3497 | return 0; | ||
3498 | |||
3499 | switch (model) { | ||
3500 | - case INTEL_FAM6_ATOM_DENVERTON: | ||
3501 | + case INTEL_FAM6_ATOM_GOLDMONT_X: | ||
3502 | return 1; | ||
3503 | } | ||
3504 | return 0; | ||
3505 | @@ -3724,8 +3724,8 @@ double get_tdp(unsigned int model) | ||
3506 | return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; | ||
3507 | |||
3508 | switch (model) { | ||
3509 | - case INTEL_FAM6_ATOM_SILVERMONT1: | ||
3510 | - case INTEL_FAM6_ATOM_SILVERMONT2: | ||
3511 | + case INTEL_FAM6_ATOM_SILVERMONT: | ||
3512 | + case INTEL_FAM6_ATOM_SILVERMONT_X: | ||
3513 | return 30.0; | ||
3514 | default: | ||
3515 | return 135.0; | ||
3516 | @@ -3791,7 +3791,7 @@ void rapl_probe(unsigned int family, unsigned int model) | ||
3517 | } | ||
3518 | break; | ||
3519 | case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ | ||
3520 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: | ||
3521 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: | ||
3522 | do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO; | ||
3523 | if (rapl_joules) | ||
3524 | BIC_PRESENT(BIC_Pkg_J); | ||
3525 | @@ -3850,8 +3850,8 @@ void rapl_probe(unsigned int family, unsigned int model) | ||
3526 | BIC_PRESENT(BIC_RAMWatt); | ||
3527 | } | ||
3528 | break; | ||
3529 | - case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */ | ||
3530 | - case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */ | ||
3531 | + case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ | ||
3532 | + case INTEL_FAM6_ATOM_SILVERMONT_X: /* AVN */ | ||
3533 | do_rapl = RAPL_PKG | RAPL_CORES; | ||
3534 | if (rapl_joules) { | ||
3535 | BIC_PRESENT(BIC_Pkg_J); | ||
3536 | @@ -3861,7 +3861,7 @@ void rapl_probe(unsigned int family, unsigned int model) | ||
3537 | BIC_PRESENT(BIC_CorWatt); | ||
3538 | } | ||
3539 | break; | ||
3540 | - case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ | ||
3541 | + case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */ | ||
3542 | do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS; | ||
3543 | BIC_PRESENT(BIC_PKG__); | ||
3544 | BIC_PRESENT(BIC_RAM__); | ||
3545 | @@ -3884,7 +3884,7 @@ void rapl_probe(unsigned int family, unsigned int model) | ||
3546 | return; | ||
3547 | |||
3548 | rapl_power_units = 1.0 / (1 << (msr & 0xF)); | ||
3549 | - if (model == INTEL_FAM6_ATOM_SILVERMONT1) | ||
3550 | + if (model == INTEL_FAM6_ATOM_SILVERMONT) | ||
3551 | rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000; | ||
3552 | else | ||
3553 | rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); | ||
3554 | @@ -4141,8 +4141,8 @@ int has_snb_msrs(unsigned int family, unsigned int model) | ||
3555 | case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ | ||
3556 | case INTEL_FAM6_SKYLAKE_X: /* SKX */ | ||
3557 | case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ | ||
3558 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: | ||
3559 | - case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ | ||
3560 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: | ||
3561 | + case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */ | ||
3562 | return 1; | ||
3563 | } | ||
3564 | return 0; | ||
3565 | @@ -4174,7 +4174,7 @@ int has_hsw_msrs(unsigned int family, unsigned int model) | ||
3566 | case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ | ||
3567 | case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ | ||
3568 | case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ | ||
3569 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: | ||
3570 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: | ||
3571 | return 1; | ||
3572 | } | ||
3573 | return 0; | ||
3574 | @@ -4209,8 +4209,8 @@ int is_slm(unsigned int family, unsigned int model) | ||
3575 | if (!genuine_intel) | ||
3576 | return 0; | ||
3577 | switch (model) { | ||
3578 | - case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */ | ||
3579 | - case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */ | ||
3580 | + case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ | ||
3581 | + case INTEL_FAM6_ATOM_SILVERMONT_X: /* AVN */ | ||
3582 | return 1; | ||
3583 | } | ||
3584 | return 0; | ||
3585 | @@ -4581,11 +4581,11 @@ void process_cpuid() | ||
3586 | case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ | ||
3587 | crystal_hz = 24000000; /* 24.0 MHz */ | ||
3588 | break; | ||
3589 | - case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ | ||
3590 | + case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */ | ||
3591 | crystal_hz = 25000000; /* 25.0 MHz */ | ||
3592 | break; | ||
3593 | case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ | ||
3594 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: | ||
3595 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: | ||
3596 | crystal_hz = 19200000; /* 19.2 MHz */ | ||
3597 | break; | ||
3598 | default: | ||
3599 | diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile | ||
3600 | index f4534fb8b951..da781b430937 100644 | ||
3601 | --- a/tools/power/x86/x86_energy_perf_policy/Makefile | ||
3602 | +++ b/tools/power/x86/x86_energy_perf_policy/Makefile | ||
3603 | @@ -9,7 +9,7 @@ ifeq ("$(origin O)", "command line") | ||
3604 | endif | ||
3605 | |||
3606 | x86_energy_perf_policy : x86_energy_perf_policy.c | ||
3607 | -CFLAGS += -Wall | ||
3608 | +CFLAGS += -Wall -I../../../include | ||
3609 | CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' | ||
3610 | |||
3611 | %: %.c |