Contents of /trunk/kernel-alx/patches-4.19/0142-4.19.43-all-fixes.patch
Parent Directory | Revision Log
Revision 3421 -
(show annotations)
(download)
Fri Aug 2 11:47:50 2019 UTC (5 years, 1 month ago) by niro
File size: 140299 byte(s)
Fri Aug 2 11:47:50 2019 UTC (5 years, 1 month ago) by niro
File size: 140299 byte(s)
-linux-4.19.43
1 | diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu |
2 | index 73318225a368..8718d4ad227b 100644 |
3 | --- a/Documentation/ABI/testing/sysfs-devices-system-cpu |
4 | +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu |
5 | @@ -477,6 +477,7 @@ What: /sys/devices/system/cpu/vulnerabilities |
6 | /sys/devices/system/cpu/vulnerabilities/spectre_v2 |
7 | /sys/devices/system/cpu/vulnerabilities/spec_store_bypass |
8 | /sys/devices/system/cpu/vulnerabilities/l1tf |
9 | + /sys/devices/system/cpu/vulnerabilities/mds |
10 | Date: January 2018 |
11 | Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> |
12 | Description: Information about CPU vulnerabilities |
13 | @@ -489,8 +490,7 @@ Description: Information about CPU vulnerabilities |
14 | "Vulnerable" CPU is affected and no mitigation in effect |
15 | "Mitigation: $M" CPU is affected and mitigation $M is in effect |
16 | |
17 | - Details about the l1tf file can be found in |
18 | - Documentation/admin-guide/l1tf.rst |
19 | + See also: Documentation/admin-guide/hw-vuln/index.rst |
20 | |
21 | What: /sys/devices/system/cpu/smt |
22 | /sys/devices/system/cpu/smt/active |
23 | diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst |
24 | new file mode 100644 |
25 | index 000000000000..ffc064c1ec68 |
26 | --- /dev/null |
27 | +++ b/Documentation/admin-guide/hw-vuln/index.rst |
28 | @@ -0,0 +1,13 @@ |
29 | +======================== |
30 | +Hardware vulnerabilities |
31 | +======================== |
32 | + |
33 | +This section describes CPU vulnerabilities and provides an overview of the |
34 | +possible mitigations along with guidance for selecting mitigations if they |
35 | +are configurable at compile, boot or run time. |
36 | + |
37 | +.. toctree:: |
38 | + :maxdepth: 1 |
39 | + |
40 | + l1tf |
41 | + mds |
42 | diff --git a/Documentation/admin-guide/hw-vuln/l1tf.rst b/Documentation/admin-guide/hw-vuln/l1tf.rst |
43 | new file mode 100644 |
44 | index 000000000000..31653a9f0e1b |
45 | --- /dev/null |
46 | +++ b/Documentation/admin-guide/hw-vuln/l1tf.rst |
47 | @@ -0,0 +1,615 @@ |
48 | +L1TF - L1 Terminal Fault |
49 | +======================== |
50 | + |
51 | +L1 Terminal Fault is a hardware vulnerability which allows unprivileged |
52 | +speculative access to data which is available in the Level 1 Data Cache |
53 | +when the page table entry controlling the virtual address, which is used |
54 | +for the access, has the Present bit cleared or other reserved bits set. |
55 | + |
56 | +Affected processors |
57 | +------------------- |
58 | + |
59 | +This vulnerability affects a wide range of Intel processors. The |
60 | +vulnerability is not present on: |
61 | + |
62 | + - Processors from AMD, Centaur and other non Intel vendors |
63 | + |
64 | + - Older processor models, where the CPU family is < 6 |
65 | + |
66 | + - A range of Intel ATOM processors (Cedarview, Cloverview, Lincroft, |
67 | + Penwell, Pineview, Silvermont, Airmont, Merrifield) |
68 | + |
69 | + - The Intel XEON PHI family |
70 | + |
71 | + - Intel processors which have the ARCH_CAP_RDCL_NO bit set in the |
72 | + IA32_ARCH_CAPABILITIES MSR. If the bit is set the CPU is not affected |
73 | + by the Meltdown vulnerability either. These CPUs should become |
74 | + available by end of 2018. |
75 | + |
76 | +Whether a processor is affected or not can be read out from the L1TF |
77 | +vulnerability file in sysfs. See :ref:`l1tf_sys_info`. |
78 | + |
79 | +Related CVEs |
80 | +------------ |
81 | + |
82 | +The following CVE entries are related to the L1TF vulnerability: |
83 | + |
84 | + ============= ================= ============================== |
85 | + CVE-2018-3615 L1 Terminal Fault SGX related aspects |
86 | + CVE-2018-3620 L1 Terminal Fault OS, SMM related aspects |
87 | + CVE-2018-3646 L1 Terminal Fault Virtualization related aspects |
88 | + ============= ================= ============================== |
89 | + |
90 | +Problem |
91 | +------- |
92 | + |
93 | +If an instruction accesses a virtual address for which the relevant page |
94 | +table entry (PTE) has the Present bit cleared or other reserved bits set, |
95 | +then speculative execution ignores the invalid PTE and loads the referenced |
96 | +data if it is present in the Level 1 Data Cache, as if the page referenced |
97 | +by the address bits in the PTE was still present and accessible. |
98 | + |
99 | +While this is a purely speculative mechanism and the instruction will raise |
100 | +a page fault when it is retired eventually, the pure act of loading the |
101 | +data and making it available to other speculative instructions opens up the |
102 | +opportunity for side channel attacks to unprivileged malicious code, |
103 | +similar to the Meltdown attack. |
104 | + |
105 | +While Meltdown breaks the user space to kernel space protection, L1TF |
106 | +allows to attack any physical memory address in the system and the attack |
107 | +works across all protection domains. It allows an attack of SGX and also |
108 | +works from inside virtual machines because the speculation bypasses the |
109 | +extended page table (EPT) protection mechanism. |
110 | + |
111 | + |
112 | +Attack scenarios |
113 | +---------------- |
114 | + |
115 | +1. Malicious user space |
116 | +^^^^^^^^^^^^^^^^^^^^^^^ |
117 | + |
118 | + Operating Systems store arbitrary information in the address bits of a |
119 | + PTE which is marked non present. This allows a malicious user space |
120 | + application to attack the physical memory to which these PTEs resolve. |
121 | + In some cases user-space can maliciously influence the information |
122 | + encoded in the address bits of the PTE, thus making attacks more |
123 | + deterministic and more practical. |
124 | + |
125 | + The Linux kernel contains a mitigation for this attack vector, PTE |
126 | + inversion, which is permanently enabled and has no performance |
127 | + impact. The kernel ensures that the address bits of PTEs, which are not |
128 | + marked present, never point to cacheable physical memory space. |
129 | + |
130 | + A system with an up to date kernel is protected against attacks from |
131 | + malicious user space applications. |
132 | + |
133 | +2. Malicious guest in a virtual machine |
134 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
135 | + |
136 | + The fact that L1TF breaks all domain protections allows malicious guest |
137 | + OSes, which can control the PTEs directly, and malicious guest user |
138 | + space applications, which run on an unprotected guest kernel lacking the |
139 | + PTE inversion mitigation for L1TF, to attack physical host memory. |
140 | + |
141 | + A special aspect of L1TF in the context of virtualization is symmetric |
142 | + multi threading (SMT). The Intel implementation of SMT is called |
143 | + HyperThreading. The fact that Hyperthreads on the affected processors |
144 | + share the L1 Data Cache (L1D) is important for this. As the flaw allows |
145 | + only to attack data which is present in L1D, a malicious guest running |
146 | + on one Hyperthread can attack the data which is brought into the L1D by |
147 | + the context which runs on the sibling Hyperthread of the same physical |
148 | + core. This context can be host OS, host user space or a different guest. |
149 | + |
150 | + If the processor does not support Extended Page Tables, the attack is |
151 | + only possible, when the hypervisor does not sanitize the content of the |
152 | + effective (shadow) page tables. |
153 | + |
154 | + While solutions exist to mitigate these attack vectors fully, these |
155 | + mitigations are not enabled by default in the Linux kernel because they |
156 | + can affect performance significantly. The kernel provides several |
157 | + mechanisms which can be utilized to address the problem depending on the |
158 | + deployment scenario. The mitigations, their protection scope and impact |
159 | + are described in the next sections. |
160 | + |
161 | + The default mitigations and the rationale for choosing them are explained |
162 | + at the end of this document. See :ref:`default_mitigations`. |
163 | + |
164 | +.. _l1tf_sys_info: |
165 | + |
166 | +L1TF system information |
167 | +----------------------- |
168 | + |
169 | +The Linux kernel provides a sysfs interface to enumerate the current L1TF |
170 | +status of the system: whether the system is vulnerable, and which |
171 | +mitigations are active. The relevant sysfs file is: |
172 | + |
173 | +/sys/devices/system/cpu/vulnerabilities/l1tf |
174 | + |
175 | +The possible values in this file are: |
176 | + |
177 | + =========================== =============================== |
178 | + 'Not affected' The processor is not vulnerable |
179 | + 'Mitigation: PTE Inversion' The host protection is active |
180 | + =========================== =============================== |
181 | + |
182 | +If KVM/VMX is enabled and the processor is vulnerable then the following |
183 | +information is appended to the 'Mitigation: PTE Inversion' part: |
184 | + |
185 | + - SMT status: |
186 | + |
187 | + ===================== ================ |
188 | + 'VMX: SMT vulnerable' SMT is enabled |
189 | + 'VMX: SMT disabled' SMT is disabled |
190 | + ===================== ================ |
191 | + |
192 | + - L1D Flush mode: |
193 | + |
194 | + ================================ ==================================== |
195 | + 'L1D vulnerable' L1D flushing is disabled |
196 | + |
197 | + 'L1D conditional cache flushes' L1D flush is conditionally enabled |
198 | + |
199 | + 'L1D cache flushes' L1D flush is unconditionally enabled |
200 | + ================================ ==================================== |
201 | + |
202 | +The resulting grade of protection is discussed in the following sections. |
203 | + |
204 | + |
205 | +Host mitigation mechanism |
206 | +------------------------- |
207 | + |
208 | +The kernel is unconditionally protected against L1TF attacks from malicious |
209 | +user space running on the host. |
210 | + |
211 | + |
212 | +Guest mitigation mechanisms |
213 | +--------------------------- |
214 | + |
215 | +.. _l1d_flush: |
216 | + |
217 | +1. L1D flush on VMENTER |
218 | +^^^^^^^^^^^^^^^^^^^^^^^ |
219 | + |
220 | + To make sure that a guest cannot attack data which is present in the L1D |
221 | + the hypervisor flushes the L1D before entering the guest. |
222 | + |
223 | + Flushing the L1D evicts not only the data which should not be accessed |
224 | + by a potentially malicious guest, it also flushes the guest |
225 | + data. Flushing the L1D has a performance impact as the processor has to |
226 | + bring the flushed guest data back into the L1D. Depending on the |
227 | + frequency of VMEXIT/VMENTER and the type of computations in the guest |
228 | + performance degradation in the range of 1% to 50% has been observed. For |
229 | + scenarios where guest VMEXIT/VMENTER are rare the performance impact is |
230 | + minimal. Virtio and mechanisms like posted interrupts are designed to |
231 | + confine the VMEXITs to a bare minimum, but specific configurations and |
232 | + application scenarios might still suffer from a high VMEXIT rate. |
233 | + |
234 | + The kernel provides two L1D flush modes: |
235 | + - conditional ('cond') |
236 | + - unconditional ('always') |
237 | + |
238 | + The conditional mode avoids L1D flushing after VMEXITs which execute |
239 | + only audited code paths before the corresponding VMENTER. These code |
240 | + paths have been verified that they cannot expose secrets or other |
241 | + interesting data to an attacker, but they can leak information about the |
242 | + address space layout of the hypervisor. |
243 | + |
244 | + Unconditional mode flushes L1D on all VMENTER invocations and provides |
245 | + maximum protection. It has a higher overhead than the conditional |
246 | + mode. The overhead cannot be quantified correctly as it depends on the |
247 | + workload scenario and the resulting number of VMEXITs. |
248 | + |
249 | + The general recommendation is to enable L1D flush on VMENTER. The kernel |
250 | + defaults to conditional mode on affected processors. |
251 | + |
252 | + **Note**, that L1D flush does not prevent the SMT problem because the |
253 | + sibling thread will also bring back its data into the L1D which makes it |
254 | + attackable again. |
255 | + |
256 | + L1D flush can be controlled by the administrator via the kernel command |
257 | + line and sysfs control files. See :ref:`mitigation_control_command_line` |
258 | + and :ref:`mitigation_control_kvm`. |
259 | + |
260 | +.. _guest_confinement: |
261 | + |
262 | +2. Guest VCPU confinement to dedicated physical cores |
263 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
264 | + |
265 | + To address the SMT problem, it is possible to make a guest or a group of |
266 | + guests affine to one or more physical cores. The proper mechanism for |
267 | + that is to utilize exclusive cpusets to ensure that no other guest or |
268 | + host tasks can run on these cores. |
269 | + |
270 | + If only a single guest or related guests run on sibling SMT threads on |
271 | + the same physical core then they can only attack their own memory and |
272 | + restricted parts of the host memory. |
273 | + |
274 | + Host memory is attackable, when one of the sibling SMT threads runs in |
275 | + host OS (hypervisor) context and the other in guest context. The amount |
276 | + of valuable information from the host OS context depends on the context |
277 | + which the host OS executes, i.e. interrupts, soft interrupts and kernel |
278 | + threads. The amount of valuable data from these contexts cannot be |
279 | + declared as non-interesting for an attacker without deep inspection of |
280 | + the code. |
281 | + |
282 | + **Note**, that assigning guests to a fixed set of physical cores affects |
283 | + the ability of the scheduler to do load balancing and might have |
284 | + negative effects on CPU utilization depending on the hosting |
285 | + scenario. Disabling SMT might be a viable alternative for particular |
286 | + scenarios. |
287 | + |
288 | + For further information about confining guests to a single or to a group |
289 | + of cores consult the cpusets documentation: |
290 | + |
291 | + https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt |
292 | + |
293 | +.. _interrupt_isolation: |
294 | + |
295 | +3. Interrupt affinity |
296 | +^^^^^^^^^^^^^^^^^^^^^ |
297 | + |
298 | + Interrupts can be made affine to logical CPUs. This is not universally |
299 | + true because there are types of interrupts which are truly per CPU |
300 | + interrupts, e.g. the local timer interrupt. Aside of that multi queue |
301 | + devices affine their interrupts to single CPUs or groups of CPUs per |
302 | + queue without allowing the administrator to control the affinities. |
303 | + |
304 | + Moving the interrupts, which can be affinity controlled, away from CPUs |
305 | + which run untrusted guests, reduces the attack vector space. |
306 | + |
307 | + Whether the interrupts with are affine to CPUs, which run untrusted |
308 | + guests, provide interesting data for an attacker depends on the system |
309 | + configuration and the scenarios which run on the system. While for some |
310 | + of the interrupts it can be assumed that they won't expose interesting |
311 | + information beyond exposing hints about the host OS memory layout, there |
312 | + is no way to make general assumptions. |
313 | + |
314 | + Interrupt affinity can be controlled by the administrator via the |
315 | + /proc/irq/$NR/smp_affinity[_list] files. Limited documentation is |
316 | + available at: |
317 | + |
318 | + https://www.kernel.org/doc/Documentation/IRQ-affinity.txt |
319 | + |
320 | +.. _smt_control: |
321 | + |
322 | +4. SMT control |
323 | +^^^^^^^^^^^^^^ |
324 | + |
325 | + To prevent the SMT issues of L1TF it might be necessary to disable SMT |
326 | + completely. Disabling SMT can have a significant performance impact, but |
327 | + the impact depends on the hosting scenario and the type of workloads. |
328 | + The impact of disabling SMT needs also to be weighted against the impact |
329 | + of other mitigation solutions like confining guests to dedicated cores. |
330 | + |
331 | + The kernel provides a sysfs interface to retrieve the status of SMT and |
332 | + to control it. It also provides a kernel command line interface to |
333 | + control SMT. |
334 | + |
335 | + The kernel command line interface consists of the following options: |
336 | + |
337 | + =========== ========================================================== |
338 | + nosmt Affects the bring up of the secondary CPUs during boot. The |
339 | + kernel tries to bring all present CPUs online during the |
340 | + boot process. "nosmt" makes sure that from each physical |
341 | + core only one - the so called primary (hyper) thread is |
342 | + activated. Due to a design flaw of Intel processors related |
343 | + to Machine Check Exceptions the non primary siblings have |
344 | + to be brought up at least partially and are then shut down |
345 | + again. "nosmt" can be undone via the sysfs interface. |
346 | + |
347 | + nosmt=force Has the same effect as "nosmt" but it does not allow to |
348 | + undo the SMT disable via the sysfs interface. |
349 | + =========== ========================================================== |
350 | + |
351 | + The sysfs interface provides two files: |
352 | + |
353 | + - /sys/devices/system/cpu/smt/control |
354 | + - /sys/devices/system/cpu/smt/active |
355 | + |
356 | + /sys/devices/system/cpu/smt/control: |
357 | + |
358 | + This file allows to read out the SMT control state and provides the |
359 | + ability to disable or (re)enable SMT. The possible states are: |
360 | + |
361 | + ============== =================================================== |
362 | + on SMT is supported by the CPU and enabled. All |
363 | + logical CPUs can be onlined and offlined without |
364 | + restrictions. |
365 | + |
366 | + off SMT is supported by the CPU and disabled. Only |
367 | + the so called primary SMT threads can be onlined |
368 | + and offlined without restrictions. An attempt to |
369 | + online a non-primary sibling is rejected |
370 | + |
371 | + forceoff Same as 'off' but the state cannot be controlled. |
372 | + Attempts to write to the control file are rejected. |
373 | + |
374 | + notsupported The processor does not support SMT. It's therefore |
375 | + not affected by the SMT implications of L1TF. |
376 | + Attempts to write to the control file are rejected. |
377 | + ============== =================================================== |
378 | + |
379 | + The possible states which can be written into this file to control SMT |
380 | + state are: |
381 | + |
382 | + - on |
383 | + - off |
384 | + - forceoff |
385 | + |
386 | + /sys/devices/system/cpu/smt/active: |
387 | + |
388 | + This file reports whether SMT is enabled and active, i.e. if on any |
389 | + physical core two or more sibling threads are online. |
390 | + |
391 | + SMT control is also possible at boot time via the l1tf kernel command |
392 | + line parameter in combination with L1D flush control. See |
393 | + :ref:`mitigation_control_command_line`. |
394 | + |
395 | +5. Disabling EPT |
396 | +^^^^^^^^^^^^^^^^ |
397 | + |
398 | + Disabling EPT for virtual machines provides full mitigation for L1TF even |
399 | + with SMT enabled, because the effective page tables for guests are |
400 | + managed and sanitized by the hypervisor. Though disabling EPT has a |
401 | + significant performance impact especially when the Meltdown mitigation |
402 | + KPTI is enabled. |
403 | + |
404 | + EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter. |
405 | + |
406 | +There is ongoing research and development for new mitigation mechanisms to |
407 | +address the performance impact of disabling SMT or EPT. |
408 | + |
409 | +.. _mitigation_control_command_line: |
410 | + |
411 | +Mitigation control on the kernel command line |
412 | +--------------------------------------------- |
413 | + |
414 | +The kernel command line allows to control the L1TF mitigations at boot |
415 | +time with the option "l1tf=". The valid arguments for this option are: |
416 | + |
417 | + ============ ============================================================= |
418 | + full Provides all available mitigations for the L1TF |
419 | + vulnerability. Disables SMT and enables all mitigations in |
420 | + the hypervisors, i.e. unconditional L1D flushing |
421 | + |
422 | + SMT control and L1D flush control via the sysfs interface |
423 | + is still possible after boot. Hypervisors will issue a |
424 | + warning when the first VM is started in a potentially |
425 | + insecure configuration, i.e. SMT enabled or L1D flush |
426 | + disabled. |
427 | + |
428 | + full,force Same as 'full', but disables SMT and L1D flush runtime |
429 | + control. Implies the 'nosmt=force' command line option. |
430 | + (i.e. sysfs control of SMT is disabled.) |
431 | + |
432 | + flush Leaves SMT enabled and enables the default hypervisor |
433 | + mitigation, i.e. conditional L1D flushing |
434 | + |
435 | + SMT control and L1D flush control via the sysfs interface |
436 | + is still possible after boot. Hypervisors will issue a |
437 | + warning when the first VM is started in a potentially |
438 | + insecure configuration, i.e. SMT enabled or L1D flush |
439 | + disabled. |
440 | + |
441 | + flush,nosmt Disables SMT and enables the default hypervisor mitigation, |
442 | + i.e. conditional L1D flushing. |
443 | + |
444 | + SMT control and L1D flush control via the sysfs interface |
445 | + is still possible after boot. Hypervisors will issue a |
446 | + warning when the first VM is started in a potentially |
447 | + insecure configuration, i.e. SMT enabled or L1D flush |
448 | + disabled. |
449 | + |
450 | + flush,nowarn Same as 'flush', but hypervisors will not warn when a VM is |
451 | + started in a potentially insecure configuration. |
452 | + |
453 | + off Disables hypervisor mitigations and doesn't emit any |
454 | + warnings. |
455 | + It also drops the swap size and available RAM limit restrictions |
456 | + on both hypervisor and bare metal. |
457 | + |
458 | + ============ ============================================================= |
459 | + |
460 | +The default is 'flush'. For details about L1D flushing see :ref:`l1d_flush`. |
461 | + |
462 | + |
463 | +.. _mitigation_control_kvm: |
464 | + |
465 | +Mitigation control for KVM - module parameter |
466 | +------------------------------------------------------------- |
467 | + |
468 | +The KVM hypervisor mitigation mechanism, flushing the L1D cache when |
469 | +entering a guest, can be controlled with a module parameter. |
470 | + |
471 | +The option/parameter is "kvm-intel.vmentry_l1d_flush=". It takes the |
472 | +following arguments: |
473 | + |
474 | + ============ ============================================================== |
475 | + always L1D cache flush on every VMENTER. |
476 | + |
477 | + cond Flush L1D on VMENTER only when the code between VMEXIT and |
478 | + VMENTER can leak host memory which is considered |
479 | + interesting for an attacker. This still can leak host memory |
480 | + which allows e.g. to determine the hosts address space layout. |
481 | + |
482 | + never Disables the mitigation |
483 | + ============ ============================================================== |
484 | + |
485 | +The parameter can be provided on the kernel command line, as a module |
486 | +parameter when loading the modules and at runtime modified via the sysfs |
487 | +file: |
488 | + |
489 | +/sys/module/kvm_intel/parameters/vmentry_l1d_flush |
490 | + |
491 | +The default is 'cond'. If 'l1tf=full,force' is given on the kernel command |
492 | +line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush |
493 | +module parameter is ignored and writes to the sysfs file are rejected. |
494 | + |
495 | +.. _mitigation_selection: |
496 | + |
497 | +Mitigation selection guide |
498 | +-------------------------- |
499 | + |
500 | +1. No virtualization in use |
501 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
502 | + |
503 | + The system is protected by the kernel unconditionally and no further |
504 | + action is required. |
505 | + |
506 | +2. Virtualization with trusted guests |
507 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
508 | + |
509 | + If the guest comes from a trusted source and the guest OS kernel is |
510 | + guaranteed to have the L1TF mitigations in place the system is fully |
511 | + protected against L1TF and no further action is required. |
512 | + |
513 | + To avoid the overhead of the default L1D flushing on VMENTER the |
514 | + administrator can disable the flushing via the kernel command line and |
515 | + sysfs control files. See :ref:`mitigation_control_command_line` and |
516 | + :ref:`mitigation_control_kvm`. |
517 | + |
518 | + |
519 | +3. Virtualization with untrusted guests |
520 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
521 | + |
522 | +3.1. SMT not supported or disabled |
523 | +"""""""""""""""""""""""""""""""""" |
524 | + |
525 | + If SMT is not supported by the processor or disabled in the BIOS or by |
526 | + the kernel, it's only required to enforce L1D flushing on VMENTER. |
527 | + |
528 | + Conditional L1D flushing is the default behaviour and can be tuned. See |
529 | + :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`. |
530 | + |
531 | +3.2. EPT not supported or disabled |
532 | +"""""""""""""""""""""""""""""""""" |
533 | + |
534 | + If EPT is not supported by the processor or disabled in the hypervisor, |
535 | + the system is fully protected. SMT can stay enabled and L1D flushing on |
536 | + VMENTER is not required. |
537 | + |
538 | + EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter. |
539 | + |
540 | +3.3. SMT and EPT supported and active |
541 | +""""""""""""""""""""""""""""""""""""" |
542 | + |
543 | + If SMT and EPT are supported and active then various degrees of |
544 | + mitigations can be employed: |
545 | + |
546 | + - L1D flushing on VMENTER: |
547 | + |
548 | + L1D flushing on VMENTER is the minimal protection requirement, but it |
549 | + is only potent in combination with other mitigation methods. |
550 | + |
551 | + Conditional L1D flushing is the default behaviour and can be tuned. See |
552 | + :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`. |
553 | + |
554 | + - Guest confinement: |
555 | + |
556 | + Confinement of guests to a single or a group of physical cores which |
557 | + are not running any other processes, can reduce the attack surface |
558 | + significantly, but interrupts, soft interrupts and kernel threads can |
559 | + still expose valuable data to a potential attacker. See |
560 | + :ref:`guest_confinement`. |
561 | + |
562 | + - Interrupt isolation: |
563 | + |
564 | + Isolating the guest CPUs from interrupts can reduce the attack surface |
565 | + further, but still allows a malicious guest to explore a limited amount |
566 | + of host physical memory. This can at least be used to gain knowledge |
567 | + about the host address space layout. The interrupts which have a fixed |
568 | + affinity to the CPUs which run the untrusted guests can depending on |
569 | + the scenario still trigger soft interrupts and schedule kernel threads |
570 | + which might expose valuable information. See |
571 | + :ref:`interrupt_isolation`. |
572 | + |
573 | +The above three mitigation methods combined can provide protection to a |
574 | +certain degree, but the risk of the remaining attack surface has to be |
575 | +carefully analyzed. For full protection the following methods are |
576 | +available: |
577 | + |
578 | + - Disabling SMT: |
579 | + |
580 | + Disabling SMT and enforcing the L1D flushing provides the maximum |
581 | + amount of protection. This mitigation is not depending on any of the |
582 | + above mitigation methods. |
583 | + |
584 | + SMT control and L1D flushing can be tuned by the command line |
585 | + parameters 'nosmt', 'l1tf', 'kvm-intel.vmentry_l1d_flush' and at run |
586 | + time with the matching sysfs control files. See :ref:`smt_control`, |
587 | + :ref:`mitigation_control_command_line` and |
588 | + :ref:`mitigation_control_kvm`. |
589 | + |
590 | + - Disabling EPT: |
591 | + |
592 | + Disabling EPT provides the maximum amount of protection as well. It is |
593 | + not depending on any of the above mitigation methods. SMT can stay |
594 | + enabled and L1D flushing is not required, but the performance impact is |
595 | + significant. |
596 | + |
597 | + EPT can be disabled in the hypervisor via the 'kvm-intel.ept' |
598 | + parameter. |
599 | + |
600 | +3.4. Nested virtual machines |
601 | +"""""""""""""""""""""""""""" |
602 | + |
603 | +When nested virtualization is in use, three operating systems are involved: |
604 | +the bare metal hypervisor, the nested hypervisor and the nested virtual |
605 | +machine. VMENTER operations from the nested hypervisor into the nested |
606 | +guest will always be processed by the bare metal hypervisor. If KVM is the |
607 | +bare metal hypervisor it will: |
608 | + |
609 | + - Flush the L1D cache on every switch from the nested hypervisor to the |
610 | + nested virtual machine, so that the nested hypervisor's secrets are not |
611 | + exposed to the nested virtual machine; |
612 | + |
613 | + - Flush the L1D cache on every switch from the nested virtual machine to |
614 | + the nested hypervisor; this is a complex operation, and flushing the L1D |
615 | + cache avoids that the bare metal hypervisor's secrets are exposed to the |
616 | + nested virtual machine; |
617 | + |
618 | + - Instruct the nested hypervisor to not perform any L1D cache flush. This |
619 | + is an optimization to avoid double L1D flushing. |
620 | + |
621 | + |
622 | +.. _default_mitigations: |
623 | + |
624 | +Default mitigations |
625 | +------------------- |
626 | + |
627 | + The kernel default mitigations for vulnerable processors are: |
628 | + |
629 | + - PTE inversion to protect against malicious user space. This is done |
630 | + unconditionally and cannot be controlled. The swap storage is limited |
631 | + to ~16TB. |
632 | + |
633 | + - L1D conditional flushing on VMENTER when EPT is enabled for |
634 | + a guest. |
635 | + |
636 | + The kernel does not by default enforce the disabling of SMT, which leaves |
637 | + SMT systems vulnerable when running untrusted guests with EPT enabled. |
638 | + |
639 | + The rationale for this choice is: |
640 | + |
641 | + - Force disabling SMT can break existing setups, especially with |
642 | + unattended updates. |
643 | + |
644 | + - If regular users run untrusted guests on their machine, then L1TF is |
645 | + just an add on to other malware which might be embedded in an untrusted |
646 | + guest, e.g. spam-bots or attacks on the local network. |
647 | + |
648 | + There is no technical way to prevent a user from running untrusted code |
649 | + on their machines blindly. |
650 | + |
651 | + - It's technically extremely unlikely and from today's knowledge even |
652 | + impossible that L1TF can be exploited via the most popular attack |
653 | + mechanisms like JavaScript because these mechanisms have no way to |
654 | + control PTEs. If this would be possible and not other mitigation would |
655 | + be possible, then the default might be different. |
656 | + |
657 | + - The administrators of cloud and hosting setups have to carefully |
658 | + analyze the risk for their scenarios and make the appropriate |
659 | + mitigation choices, which might even vary across their deployed |
660 | + machines and also result in other changes of their overall setup. |
661 | + There is no way for the kernel to provide a sensible default for this |
662 | + kind of scenarios. |
663 | diff --git a/Documentation/admin-guide/hw-vuln/mds.rst b/Documentation/admin-guide/hw-vuln/mds.rst |
664 | new file mode 100644 |
665 | index 000000000000..e3a796c0d3a2 |
666 | --- /dev/null |
667 | +++ b/Documentation/admin-guide/hw-vuln/mds.rst |
668 | @@ -0,0 +1,308 @@ |
669 | +MDS - Microarchitectural Data Sampling |
670 | +====================================== |
671 | + |
672 | +Microarchitectural Data Sampling is a hardware vulnerability which allows |
673 | +unprivileged speculative access to data which is available in various CPU |
674 | +internal buffers. |
675 | + |
676 | +Affected processors |
677 | +------------------- |
678 | + |
679 | +This vulnerability affects a wide range of Intel processors. The |
680 | +vulnerability is not present on: |
681 | + |
682 | + - Processors from AMD, Centaur and other non Intel vendors |
683 | + |
684 | + - Older processor models, where the CPU family is < 6 |
685 | + |
686 | + - Some Atoms (Bonnell, Saltwell, Goldmont, GoldmontPlus) |
687 | + |
688 | + - Intel processors which have the ARCH_CAP_MDS_NO bit set in the |
689 | + IA32_ARCH_CAPABILITIES MSR. |
690 | + |
691 | +Whether a processor is affected or not can be read out from the MDS |
692 | +vulnerability file in sysfs. See :ref:`mds_sys_info`. |
693 | + |
694 | +Not all processors are affected by all variants of MDS, but the mitigation |
695 | +is identical for all of them so the kernel treats them as a single |
696 | +vulnerability. |
697 | + |
698 | +Related CVEs |
699 | +------------ |
700 | + |
701 | +The following CVE entries are related to the MDS vulnerability: |
702 | + |
703 | + ============== ===== =================================================== |
704 | + CVE-2018-12126 MSBDS Microarchitectural Store Buffer Data Sampling |
705 | + CVE-2018-12130 MFBDS Microarchitectural Fill Buffer Data Sampling |
706 | + CVE-2018-12127 MLPDS Microarchitectural Load Port Data Sampling |
707 | + CVE-2019-11091 MDSUM Microarchitectural Data Sampling Uncacheable Memory |
708 | + ============== ===== =================================================== |
709 | + |
710 | +Problem |
711 | +------- |
712 | + |
713 | +When performing store, load, L1 refill operations, processors write data |
714 | +into temporary microarchitectural structures (buffers). The data in the |
715 | +buffer can be forwarded to load operations as an optimization. |
716 | + |
717 | +Under certain conditions, usually a fault/assist caused by a load |
718 | +operation, data unrelated to the load memory address can be speculatively |
719 | +forwarded from the buffers. Because the load operation causes a fault or |
720 | +assist and its result will be discarded, the forwarded data will not cause |
721 | +incorrect program execution or state changes. But a malicious operation |
722 | +may be able to forward this speculative data to a disclosure gadget which |
723 | +allows in turn to infer the value via a cache side channel attack. |
724 | + |
725 | +Because the buffers are potentially shared between Hyper-Threads cross |
726 | +Hyper-Thread attacks are possible. |
727 | + |
728 | +Deeper technical information is available in the MDS specific x86 |
729 | +architecture section: :ref:`Documentation/x86/mds.rst <mds>`. |
730 | + |
731 | + |
732 | +Attack scenarios |
733 | +---------------- |
734 | + |
735 | +Attacks against the MDS vulnerabilities can be mounted from malicious non |
736 | +priviledged user space applications running on hosts or guest. Malicious |
737 | +guest OSes can obviously mount attacks as well. |
738 | + |
739 | +Contrary to other speculation based vulnerabilities the MDS vulnerability |
740 | +does not allow the attacker to control the memory target address. As a |
741 | +consequence the attacks are purely sampling based, but as demonstrated with |
742 | +the TLBleed attack samples can be postprocessed successfully. |
743 | + |
744 | +Web-Browsers |
745 | +^^^^^^^^^^^^ |
746 | + |
747 | + It's unclear whether attacks through Web-Browsers are possible at |
748 | + all. The exploitation through Java-Script is considered very unlikely, |
749 | + but other widely used web technologies like Webassembly could possibly be |
750 | + abused. |
751 | + |
752 | + |
753 | +.. _mds_sys_info: |
754 | + |
755 | +MDS system information |
756 | +----------------------- |
757 | + |
758 | +The Linux kernel provides a sysfs interface to enumerate the current MDS |
759 | +status of the system: whether the system is vulnerable, and which |
760 | +mitigations are active. The relevant sysfs file is: |
761 | + |
762 | +/sys/devices/system/cpu/vulnerabilities/mds |
763 | + |
764 | +The possible values in this file are: |
765 | + |
766 | + .. list-table:: |
767 | + |
768 | + * - 'Not affected' |
769 | + - The processor is not vulnerable |
770 | + * - 'Vulnerable' |
771 | + - The processor is vulnerable, but no mitigation enabled |
772 | + * - 'Vulnerable: Clear CPU buffers attempted, no microcode' |
773 | + - The processor is vulnerable but microcode is not updated. |
774 | + |
775 | + The mitigation is enabled on a best effort basis. See :ref:`vmwerv` |
776 | + * - 'Mitigation: Clear CPU buffers' |
777 | + - The processor is vulnerable and the CPU buffer clearing mitigation is |
778 | + enabled. |
779 | + |
780 | +If the processor is vulnerable then the following information is appended |
781 | +to the above information: |
782 | + |
783 | + ======================== ============================================ |
784 | + 'SMT vulnerable' SMT is enabled |
785 | + 'SMT mitigated' SMT is enabled and mitigated |
786 | + 'SMT disabled' SMT is disabled |
787 | + 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown |
788 | + ======================== ============================================ |
789 | + |
790 | +.. _vmwerv: |
791 | + |
792 | +Best effort mitigation mode |
793 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
794 | + |
795 | + If the processor is vulnerable, but the availability of the microcode based |
796 | + mitigation mechanism is not advertised via CPUID the kernel selects a best |
797 | + effort mitigation mode. This mode invokes the mitigation instructions |
798 | + without a guarantee that they clear the CPU buffers. |
799 | + |
800 | + This is done to address virtualization scenarios where the host has the |
801 | + microcode update applied, but the hypervisor is not yet updated to expose |
802 | + the CPUID to the guest. If the host has updated microcode the protection |
803 | + takes effect otherwise a few cpu cycles are wasted pointlessly. |
804 | + |
805 | + The state in the mds sysfs file reflects this situation accordingly. |
806 | + |
807 | + |
808 | +Mitigation mechanism |
809 | +------------------------- |
810 | + |
811 | +The kernel detects the affected CPUs and the presence of the microcode |
812 | +which is required. |
813 | + |
814 | +If a CPU is affected and the microcode is available, then the kernel |
815 | +enables the mitigation by default. The mitigation can be controlled at boot |
816 | +time via a kernel command line option. See |
817 | +:ref:`mds_mitigation_control_command_line`. |
818 | + |
819 | +.. _cpu_buffer_clear: |
820 | + |
821 | +CPU buffer clearing |
822 | +^^^^^^^^^^^^^^^^^^^ |
823 | + |
824 | + The mitigation for MDS clears the affected CPU buffers on return to user |
825 | + space and when entering a guest. |
826 | + |
827 | + If SMT is enabled it also clears the buffers on idle entry when the CPU |
828 | + is only affected by MSBDS and not any other MDS variant, because the |
829 | + other variants cannot be protected against cross Hyper-Thread attacks. |
830 | + |
831 | + For CPUs which are only affected by MSBDS the user space, guest and idle |
832 | + transition mitigations are sufficient and SMT is not affected. |
833 | + |
834 | +.. _virt_mechanism: |
835 | + |
836 | +Virtualization mitigation |
837 | +^^^^^^^^^^^^^^^^^^^^^^^^^ |
838 | + |
839 | + The protection for host to guest transition depends on the L1TF |
840 | + vulnerability of the CPU: |
841 | + |
842 | + - CPU is affected by L1TF: |
843 | + |
844 | + If the L1D flush mitigation is enabled and up to date microcode is |
845 | + available, the L1D flush mitigation is automatically protecting the |
846 | + guest transition. |
847 | + |
848 | + If the L1D flush mitigation is disabled then the MDS mitigation is |
849 | + invoked explicit when the host MDS mitigation is enabled. |
850 | + |
851 | + For details on L1TF and virtualization see: |
852 | + :ref:`Documentation/admin-guide/hw-vuln//l1tf.rst <mitigation_control_kvm>`. |
853 | + |
854 | + - CPU is not affected by L1TF: |
855 | + |
856 | + CPU buffers are flushed before entering the guest when the host MDS |
857 | + mitigation is enabled. |
858 | + |
859 | + The resulting MDS protection matrix for the host to guest transition: |
860 | + |
861 | + ============ ===== ============= ============ ================= |
862 | + L1TF MDS VMX-L1FLUSH Host MDS MDS-State |
863 | + |
864 | + Don't care No Don't care N/A Not affected |
865 | + |
866 | + Yes Yes Disabled Off Vulnerable |
867 | + |
868 | + Yes Yes Disabled Full Mitigated |
869 | + |
870 | + Yes Yes Enabled Don't care Mitigated |
871 | + |
872 | + No Yes N/A Off Vulnerable |
873 | + |
874 | + No Yes N/A Full Mitigated |
875 | + ============ ===== ============= ============ ================= |
876 | + |
877 | + This only covers the host to guest transition, i.e. prevents leakage from |
878 | + host to guest, but does not protect the guest internally. Guests need to |
879 | + have their own protections. |
880 | + |
881 | +.. _xeon_phi: |
882 | + |
883 | +XEON PHI specific considerations |
884 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
885 | + |
886 | + The XEON PHI processor family is affected by MSBDS which can be exploited |
887 | + cross Hyper-Threads when entering idle states. Some XEON PHI variants allow |
888 | + to use MWAIT in user space (Ring 3) which opens an potential attack vector |
889 | + for malicious user space. The exposure can be disabled on the kernel |
890 | + command line with the 'ring3mwait=disable' command line option. |
891 | + |
892 | + XEON PHI is not affected by the other MDS variants and MSBDS is mitigated |
893 | + before the CPU enters a idle state. As XEON PHI is not affected by L1TF |
894 | + either disabling SMT is not required for full protection. |
895 | + |
896 | +.. _mds_smt_control: |
897 | + |
898 | +SMT control |
899 | +^^^^^^^^^^^ |
900 | + |
901 | + All MDS variants except MSBDS can be attacked cross Hyper-Threads. That |
902 | + means on CPUs which are affected by MFBDS or MLPDS it is necessary to |
903 | + disable SMT for full protection. These are most of the affected CPUs; the |
904 | + exception is XEON PHI, see :ref:`xeon_phi`. |
905 | + |
906 | + Disabling SMT can have a significant performance impact, but the impact |
907 | + depends on the type of workloads. |
908 | + |
909 | + See the relevant chapter in the L1TF mitigation documentation for details: |
910 | + :ref:`Documentation/admin-guide/hw-vuln/l1tf.rst <smt_control>`. |
911 | + |
912 | + |
913 | +.. _mds_mitigation_control_command_line: |
914 | + |
915 | +Mitigation control on the kernel command line |
916 | +--------------------------------------------- |
917 | + |
918 | +The kernel command line allows to control the MDS mitigations at boot |
919 | +time with the option "mds=". The valid arguments for this option are: |
920 | + |
921 | + ============ ============================================================= |
922 | + full If the CPU is vulnerable, enable all available mitigations |
923 | + for the MDS vulnerability, CPU buffer clearing on exit to |
924 | + userspace and when entering a VM. Idle transitions are |
925 | + protected as well if SMT is enabled. |
926 | + |
927 | + It does not automatically disable SMT. |
928 | + |
929 | + full,nosmt The same as mds=full, with SMT disabled on vulnerable |
930 | + CPUs. This is the complete mitigation. |
931 | + |
932 | + off Disables MDS mitigations completely. |
933 | + |
934 | + ============ ============================================================= |
935 | + |
936 | +Not specifying this option is equivalent to "mds=full". |
937 | + |
938 | + |
939 | +Mitigation selection guide |
940 | +-------------------------- |
941 | + |
942 | +1. Trusted userspace |
943 | +^^^^^^^^^^^^^^^^^^^^ |
944 | + |
945 | + If all userspace applications are from a trusted source and do not |
946 | + execute untrusted code which is supplied externally, then the mitigation |
947 | + can be disabled. |
948 | + |
949 | + |
950 | +2. Virtualization with trusted guests |
951 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
952 | + |
953 | + The same considerations as above versus trusted user space apply. |
954 | + |
955 | +3. Virtualization with untrusted guests |
956 | +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
957 | + |
958 | + The protection depends on the state of the L1TF mitigations. |
959 | + See :ref:`virt_mechanism`. |
960 | + |
961 | + If the MDS mitigation is enabled and SMT is disabled, guest to host and |
962 | + guest to guest attacks are prevented. |
963 | + |
964 | +.. _mds_default_mitigations: |
965 | + |
966 | +Default mitigations |
967 | +------------------- |
968 | + |
969 | + The kernel default mitigations for vulnerable processors are: |
970 | + |
971 | + - Enable CPU buffer clearing |
972 | + |
973 | + The kernel does not by default enforce the disabling of SMT, which leaves |
974 | + SMT systems vulnerable when running untrusted code. The same rationale as |
975 | + for L1TF applies. |
976 | + See :ref:`Documentation/admin-guide/hw-vuln//l1tf.rst <default_mitigations>`. |
977 | diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst |
978 | index 0873685bab0f..89abc5057349 100644 |
979 | --- a/Documentation/admin-guide/index.rst |
980 | +++ b/Documentation/admin-guide/index.rst |
981 | @@ -17,14 +17,12 @@ etc. |
982 | kernel-parameters |
983 | devices |
984 | |
985 | -This section describes CPU vulnerabilities and provides an overview of the |
986 | -possible mitigations along with guidance for selecting mitigations if they |
987 | -are configurable at compile, boot or run time. |
988 | +This section describes CPU vulnerabilities and their mitigations. |
989 | |
990 | .. toctree:: |
991 | :maxdepth: 1 |
992 | |
993 | - l1tf |
994 | + hw-vuln/index |
995 | |
996 | Here is a set of documents aimed at users who are trying to track down |
997 | problems and bugs in particular. |
998 | diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt |
999 | index 8b6567f7cb9b..a29301d6e6c6 100644 |
1000 | --- a/Documentation/admin-guide/kernel-parameters.txt |
1001 | +++ b/Documentation/admin-guide/kernel-parameters.txt |
1002 | @@ -2079,7 +2079,7 @@ |
1003 | |
1004 | Default is 'flush'. |
1005 | |
1006 | - For details see: Documentation/admin-guide/l1tf.rst |
1007 | + For details see: Documentation/admin-guide/hw-vuln/l1tf.rst |
1008 | |
1009 | l2cr= [PPC] |
1010 | |
1011 | @@ -2319,6 +2319,32 @@ |
1012 | Format: <first>,<last> |
1013 | Specifies range of consoles to be captured by the MDA. |
1014 | |
1015 | + mds= [X86,INTEL] |
1016 | + Control mitigation for the Micro-architectural Data |
1017 | + Sampling (MDS) vulnerability. |
1018 | + |
1019 | + Certain CPUs are vulnerable to an exploit against CPU |
1020 | + internal buffers which can forward information to a |
1021 | + disclosure gadget under certain conditions. |
1022 | + |
1023 | + In vulnerable processors, the speculatively |
1024 | + forwarded data can be used in a cache side channel |
1025 | + attack, to access data to which the attacker does |
1026 | + not have direct access. |
1027 | + |
1028 | + This parameter controls the MDS mitigation. The |
1029 | + options are: |
1030 | + |
1031 | + full - Enable MDS mitigation on vulnerable CPUs |
1032 | + full,nosmt - Enable MDS mitigation and disable |
1033 | + SMT on vulnerable CPUs |
1034 | + off - Unconditionally disable MDS mitigation |
1035 | + |
1036 | + Not specifying this option is equivalent to |
1037 | + mds=full. |
1038 | + |
1039 | + For details see: Documentation/admin-guide/hw-vuln/mds.rst |
1040 | + |
1041 | mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory |
1042 | Amount of memory to be used when the kernel is not able |
1043 | to see the whole system memory or for test. |
1044 | @@ -2476,6 +2502,40 @@ |
1045 | in the "bleeding edge" mini2440 support kernel at |
1046 | http://repo.or.cz/w/linux-2.6/mini2440.git |
1047 | |
1048 | + mitigations= |
1049 | + [X86,PPC,S390] Control optional mitigations for CPU |
1050 | + vulnerabilities. This is a set of curated, |
1051 | + arch-independent options, each of which is an |
1052 | + aggregation of existing arch-specific options. |
1053 | + |
1054 | + off |
1055 | + Disable all optional CPU mitigations. This |
1056 | + improves system performance, but it may also |
1057 | + expose users to several CPU vulnerabilities. |
1058 | + Equivalent to: nopti [X86,PPC] |
1059 | + nospectre_v1 [PPC] |
1060 | + nobp=0 [S390] |
1061 | + nospectre_v2 [X86,PPC,S390] |
1062 | + spectre_v2_user=off [X86] |
1063 | + spec_store_bypass_disable=off [X86,PPC] |
1064 | + l1tf=off [X86] |
1065 | + mds=off [X86] |
1066 | + |
1067 | + auto (default) |
1068 | + Mitigate all CPU vulnerabilities, but leave SMT |
1069 | + enabled, even if it's vulnerable. This is for |
1070 | + users who don't want to be surprised by SMT |
1071 | + getting disabled across kernel upgrades, or who |
1072 | + have other ways of avoiding SMT-based attacks. |
1073 | + Equivalent to: (default behavior) |
1074 | + |
1075 | + auto,nosmt |
1076 | + Mitigate all CPU vulnerabilities, disabling SMT |
1077 | + if needed. This is for users who always want to |
1078 | + be fully mitigated, even if it means losing SMT. |
1079 | + Equivalent to: l1tf=flush,nosmt [X86] |
1080 | + mds=full,nosmt [X86] |
1081 | + |
1082 | mminit_loglevel= |
1083 | [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this |
1084 | parameter allows control of the logging verbosity for |
1085 | diff --git a/Documentation/admin-guide/l1tf.rst b/Documentation/admin-guide/l1tf.rst |
1086 | deleted file mode 100644 |
1087 | index 9f5924f81f89..000000000000 |
1088 | --- a/Documentation/admin-guide/l1tf.rst |
1089 | +++ /dev/null |
1090 | @@ -1,614 +0,0 @@ |
1091 | -L1TF - L1 Terminal Fault |
1092 | -======================== |
1093 | - |
1094 | -L1 Terminal Fault is a hardware vulnerability which allows unprivileged |
1095 | -speculative access to data which is available in the Level 1 Data Cache |
1096 | -when the page table entry controlling the virtual address, which is used |
1097 | -for the access, has the Present bit cleared or other reserved bits set. |
1098 | - |
1099 | -Affected processors |
1100 | -------------------- |
1101 | - |
1102 | -This vulnerability affects a wide range of Intel processors. The |
1103 | -vulnerability is not present on: |
1104 | - |
1105 | - - Processors from AMD, Centaur and other non Intel vendors |
1106 | - |
1107 | - - Older processor models, where the CPU family is < 6 |
1108 | - |
1109 | - - A range of Intel ATOM processors (Cedarview, Cloverview, Lincroft, |
1110 | - Penwell, Pineview, Silvermont, Airmont, Merrifield) |
1111 | - |
1112 | - - The Intel XEON PHI family |
1113 | - |
1114 | - - Intel processors which have the ARCH_CAP_RDCL_NO bit set in the |
1115 | - IA32_ARCH_CAPABILITIES MSR. If the bit is set the CPU is not affected |
1116 | - by the Meltdown vulnerability either. These CPUs should become |
1117 | - available by end of 2018. |
1118 | - |
1119 | -Whether a processor is affected or not can be read out from the L1TF |
1120 | -vulnerability file in sysfs. See :ref:`l1tf_sys_info`. |
1121 | - |
1122 | -Related CVEs |
1123 | ------------- |
1124 | - |
1125 | -The following CVE entries are related to the L1TF vulnerability: |
1126 | - |
1127 | - ============= ================= ============================== |
1128 | - CVE-2018-3615 L1 Terminal Fault SGX related aspects |
1129 | - CVE-2018-3620 L1 Terminal Fault OS, SMM related aspects |
1130 | - CVE-2018-3646 L1 Terminal Fault Virtualization related aspects |
1131 | - ============= ================= ============================== |
1132 | - |
1133 | -Problem |
1134 | -------- |
1135 | - |
1136 | -If an instruction accesses a virtual address for which the relevant page |
1137 | -table entry (PTE) has the Present bit cleared or other reserved bits set, |
1138 | -then speculative execution ignores the invalid PTE and loads the referenced |
1139 | -data if it is present in the Level 1 Data Cache, as if the page referenced |
1140 | -by the address bits in the PTE was still present and accessible. |
1141 | - |
1142 | -While this is a purely speculative mechanism and the instruction will raise |
1143 | -a page fault when it is retired eventually, the pure act of loading the |
1144 | -data and making it available to other speculative instructions opens up the |
1145 | -opportunity for side channel attacks to unprivileged malicious code, |
1146 | -similar to the Meltdown attack. |
1147 | - |
1148 | -While Meltdown breaks the user space to kernel space protection, L1TF |
1149 | -allows to attack any physical memory address in the system and the attack |
1150 | -works across all protection domains. It allows an attack of SGX and also |
1151 | -works from inside virtual machines because the speculation bypasses the |
1152 | -extended page table (EPT) protection mechanism. |
1153 | - |
1154 | - |
1155 | -Attack scenarios |
1156 | ----------------- |
1157 | - |
1158 | -1. Malicious user space |
1159 | -^^^^^^^^^^^^^^^^^^^^^^^ |
1160 | - |
1161 | - Operating Systems store arbitrary information in the address bits of a |
1162 | - PTE which is marked non present. This allows a malicious user space |
1163 | - application to attack the physical memory to which these PTEs resolve. |
1164 | - In some cases user-space can maliciously influence the information |
1165 | - encoded in the address bits of the PTE, thus making attacks more |
1166 | - deterministic and more practical. |
1167 | - |
1168 | - The Linux kernel contains a mitigation for this attack vector, PTE |
1169 | - inversion, which is permanently enabled and has no performance |
1170 | - impact. The kernel ensures that the address bits of PTEs, which are not |
1171 | - marked present, never point to cacheable physical memory space. |
1172 | - |
1173 | - A system with an up to date kernel is protected against attacks from |
1174 | - malicious user space applications. |
1175 | - |
1176 | -2. Malicious guest in a virtual machine |
1177 | -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
1178 | - |
1179 | - The fact that L1TF breaks all domain protections allows malicious guest |
1180 | - OSes, which can control the PTEs directly, and malicious guest user |
1181 | - space applications, which run on an unprotected guest kernel lacking the |
1182 | - PTE inversion mitigation for L1TF, to attack physical host memory. |
1183 | - |
1184 | - A special aspect of L1TF in the context of virtualization is symmetric |
1185 | - multi threading (SMT). The Intel implementation of SMT is called |
1186 | - HyperThreading. The fact that Hyperthreads on the affected processors |
1187 | - share the L1 Data Cache (L1D) is important for this. As the flaw allows |
1188 | - only to attack data which is present in L1D, a malicious guest running |
1189 | - on one Hyperthread can attack the data which is brought into the L1D by |
1190 | - the context which runs on the sibling Hyperthread of the same physical |
1191 | - core. This context can be host OS, host user space or a different guest. |
1192 | - |
1193 | - If the processor does not support Extended Page Tables, the attack is |
1194 | - only possible, when the hypervisor does not sanitize the content of the |
1195 | - effective (shadow) page tables. |
1196 | - |
1197 | - While solutions exist to mitigate these attack vectors fully, these |
1198 | - mitigations are not enabled by default in the Linux kernel because they |
1199 | - can affect performance significantly. The kernel provides several |
1200 | - mechanisms which can be utilized to address the problem depending on the |
1201 | - deployment scenario. The mitigations, their protection scope and impact |
1202 | - are described in the next sections. |
1203 | - |
1204 | - The default mitigations and the rationale for choosing them are explained |
1205 | - at the end of this document. See :ref:`default_mitigations`. |
1206 | - |
1207 | -.. _l1tf_sys_info: |
1208 | - |
1209 | -L1TF system information |
1210 | ------------------------ |
1211 | - |
1212 | -The Linux kernel provides a sysfs interface to enumerate the current L1TF |
1213 | -status of the system: whether the system is vulnerable, and which |
1214 | -mitigations are active. The relevant sysfs file is: |
1215 | - |
1216 | -/sys/devices/system/cpu/vulnerabilities/l1tf |
1217 | - |
1218 | -The possible values in this file are: |
1219 | - |
1220 | - =========================== =============================== |
1221 | - 'Not affected' The processor is not vulnerable |
1222 | - 'Mitigation: PTE Inversion' The host protection is active |
1223 | - =========================== =============================== |
1224 | - |
1225 | -If KVM/VMX is enabled and the processor is vulnerable then the following |
1226 | -information is appended to the 'Mitigation: PTE Inversion' part: |
1227 | - |
1228 | - - SMT status: |
1229 | - |
1230 | - ===================== ================ |
1231 | - 'VMX: SMT vulnerable' SMT is enabled |
1232 | - 'VMX: SMT disabled' SMT is disabled |
1233 | - ===================== ================ |
1234 | - |
1235 | - - L1D Flush mode: |
1236 | - |
1237 | - ================================ ==================================== |
1238 | - 'L1D vulnerable' L1D flushing is disabled |
1239 | - |
1240 | - 'L1D conditional cache flushes' L1D flush is conditionally enabled |
1241 | - |
1242 | - 'L1D cache flushes' L1D flush is unconditionally enabled |
1243 | - ================================ ==================================== |
1244 | - |
1245 | -The resulting grade of protection is discussed in the following sections. |
1246 | - |
1247 | - |
1248 | -Host mitigation mechanism |
1249 | -------------------------- |
1250 | - |
1251 | -The kernel is unconditionally protected against L1TF attacks from malicious |
1252 | -user space running on the host. |
1253 | - |
1254 | - |
1255 | -Guest mitigation mechanisms |
1256 | ---------------------------- |
1257 | - |
1258 | -.. _l1d_flush: |
1259 | - |
1260 | -1. L1D flush on VMENTER |
1261 | -^^^^^^^^^^^^^^^^^^^^^^^ |
1262 | - |
1263 | - To make sure that a guest cannot attack data which is present in the L1D |
1264 | - the hypervisor flushes the L1D before entering the guest. |
1265 | - |
1266 | - Flushing the L1D evicts not only the data which should not be accessed |
1267 | - by a potentially malicious guest, it also flushes the guest |
1268 | - data. Flushing the L1D has a performance impact as the processor has to |
1269 | - bring the flushed guest data back into the L1D. Depending on the |
1270 | - frequency of VMEXIT/VMENTER and the type of computations in the guest |
1271 | - performance degradation in the range of 1% to 50% has been observed. For |
1272 | - scenarios where guest VMEXIT/VMENTER are rare the performance impact is |
1273 | - minimal. Virtio and mechanisms like posted interrupts are designed to |
1274 | - confine the VMEXITs to a bare minimum, but specific configurations and |
1275 | - application scenarios might still suffer from a high VMEXIT rate. |
1276 | - |
1277 | - The kernel provides two L1D flush modes: |
1278 | - - conditional ('cond') |
1279 | - - unconditional ('always') |
1280 | - |
1281 | - The conditional mode avoids L1D flushing after VMEXITs which execute |
1282 | - only audited code paths before the corresponding VMENTER. These code |
1283 | - paths have been verified that they cannot expose secrets or other |
1284 | - interesting data to an attacker, but they can leak information about the |
1285 | - address space layout of the hypervisor. |
1286 | - |
1287 | - Unconditional mode flushes L1D on all VMENTER invocations and provides |
1288 | - maximum protection. It has a higher overhead than the conditional |
1289 | - mode. The overhead cannot be quantified correctly as it depends on the |
1290 | - workload scenario and the resulting number of VMEXITs. |
1291 | - |
1292 | - The general recommendation is to enable L1D flush on VMENTER. The kernel |
1293 | - defaults to conditional mode on affected processors. |
1294 | - |
1295 | - **Note**, that L1D flush does not prevent the SMT problem because the |
1296 | - sibling thread will also bring back its data into the L1D which makes it |
1297 | - attackable again. |
1298 | - |
1299 | - L1D flush can be controlled by the administrator via the kernel command |
1300 | - line and sysfs control files. See :ref:`mitigation_control_command_line` |
1301 | - and :ref:`mitigation_control_kvm`. |
1302 | - |
1303 | -.. _guest_confinement: |
1304 | - |
1305 | -2. Guest VCPU confinement to dedicated physical cores |
1306 | -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
1307 | - |
1308 | - To address the SMT problem, it is possible to make a guest or a group of |
1309 | - guests affine to one or more physical cores. The proper mechanism for |
1310 | - that is to utilize exclusive cpusets to ensure that no other guest or |
1311 | - host tasks can run on these cores. |
1312 | - |
1313 | - If only a single guest or related guests run on sibling SMT threads on |
1314 | - the same physical core then they can only attack their own memory and |
1315 | - restricted parts of the host memory. |
1316 | - |
1317 | - Host memory is attackable, when one of the sibling SMT threads runs in |
1318 | - host OS (hypervisor) context and the other in guest context. The amount |
1319 | - of valuable information from the host OS context depends on the context |
1320 | - which the host OS executes, i.e. interrupts, soft interrupts and kernel |
1321 | - threads. The amount of valuable data from these contexts cannot be |
1322 | - declared as non-interesting for an attacker without deep inspection of |
1323 | - the code. |
1324 | - |
1325 | - **Note**, that assigning guests to a fixed set of physical cores affects |
1326 | - the ability of the scheduler to do load balancing and might have |
1327 | - negative effects on CPU utilization depending on the hosting |
1328 | - scenario. Disabling SMT might be a viable alternative for particular |
1329 | - scenarios. |
1330 | - |
1331 | - For further information about confining guests to a single or to a group |
1332 | - of cores consult the cpusets documentation: |
1333 | - |
1334 | - https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt |
1335 | - |
1336 | -.. _interrupt_isolation: |
1337 | - |
1338 | -3. Interrupt affinity |
1339 | -^^^^^^^^^^^^^^^^^^^^^ |
1340 | - |
1341 | - Interrupts can be made affine to logical CPUs. This is not universally |
1342 | - true because there are types of interrupts which are truly per CPU |
1343 | - interrupts, e.g. the local timer interrupt. Aside of that multi queue |
1344 | - devices affine their interrupts to single CPUs or groups of CPUs per |
1345 | - queue without allowing the administrator to control the affinities. |
1346 | - |
1347 | - Moving the interrupts, which can be affinity controlled, away from CPUs |
1348 | - which run untrusted guests, reduces the attack vector space. |
1349 | - |
1350 | - Whether the interrupts with are affine to CPUs, which run untrusted |
1351 | - guests, provide interesting data for an attacker depends on the system |
1352 | - configuration and the scenarios which run on the system. While for some |
1353 | - of the interrupts it can be assumed that they won't expose interesting |
1354 | - information beyond exposing hints about the host OS memory layout, there |
1355 | - is no way to make general assumptions. |
1356 | - |
1357 | - Interrupt affinity can be controlled by the administrator via the |
1358 | - /proc/irq/$NR/smp_affinity[_list] files. Limited documentation is |
1359 | - available at: |
1360 | - |
1361 | - https://www.kernel.org/doc/Documentation/IRQ-affinity.txt |
1362 | - |
1363 | -.. _smt_control: |
1364 | - |
1365 | -4. SMT control |
1366 | -^^^^^^^^^^^^^^ |
1367 | - |
1368 | - To prevent the SMT issues of L1TF it might be necessary to disable SMT |
1369 | - completely. Disabling SMT can have a significant performance impact, but |
1370 | - the impact depends on the hosting scenario and the type of workloads. |
1371 | - The impact of disabling SMT needs also to be weighted against the impact |
1372 | - of other mitigation solutions like confining guests to dedicated cores. |
1373 | - |
1374 | - The kernel provides a sysfs interface to retrieve the status of SMT and |
1375 | - to control it. It also provides a kernel command line interface to |
1376 | - control SMT. |
1377 | - |
1378 | - The kernel command line interface consists of the following options: |
1379 | - |
1380 | - =========== ========================================================== |
1381 | - nosmt Affects the bring up of the secondary CPUs during boot. The |
1382 | - kernel tries to bring all present CPUs online during the |
1383 | - boot process. "nosmt" makes sure that from each physical |
1384 | - core only one - the so called primary (hyper) thread is |
1385 | - activated. Due to a design flaw of Intel processors related |
1386 | - to Machine Check Exceptions the non primary siblings have |
1387 | - to be brought up at least partially and are then shut down |
1388 | - again. "nosmt" can be undone via the sysfs interface. |
1389 | - |
1390 | - nosmt=force Has the same effect as "nosmt" but it does not allow to |
1391 | - undo the SMT disable via the sysfs interface. |
1392 | - =========== ========================================================== |
1393 | - |
1394 | - The sysfs interface provides two files: |
1395 | - |
1396 | - - /sys/devices/system/cpu/smt/control |
1397 | - - /sys/devices/system/cpu/smt/active |
1398 | - |
1399 | - /sys/devices/system/cpu/smt/control: |
1400 | - |
1401 | - This file allows to read out the SMT control state and provides the |
1402 | - ability to disable or (re)enable SMT. The possible states are: |
1403 | - |
1404 | - ============== =================================================== |
1405 | - on SMT is supported by the CPU and enabled. All |
1406 | - logical CPUs can be onlined and offlined without |
1407 | - restrictions. |
1408 | - |
1409 | - off SMT is supported by the CPU and disabled. Only |
1410 | - the so called primary SMT threads can be onlined |
1411 | - and offlined without restrictions. An attempt to |
1412 | - online a non-primary sibling is rejected |
1413 | - |
1414 | - forceoff Same as 'off' but the state cannot be controlled. |
1415 | - Attempts to write to the control file are rejected. |
1416 | - |
1417 | - notsupported The processor does not support SMT. It's therefore |
1418 | - not affected by the SMT implications of L1TF. |
1419 | - Attempts to write to the control file are rejected. |
1420 | - ============== =================================================== |
1421 | - |
1422 | - The possible states which can be written into this file to control SMT |
1423 | - state are: |
1424 | - |
1425 | - - on |
1426 | - - off |
1427 | - - forceoff |
1428 | - |
1429 | - /sys/devices/system/cpu/smt/active: |
1430 | - |
1431 | - This file reports whether SMT is enabled and active, i.e. if on any |
1432 | - physical core two or more sibling threads are online. |
1433 | - |
1434 | - SMT control is also possible at boot time via the l1tf kernel command |
1435 | - line parameter in combination with L1D flush control. See |
1436 | - :ref:`mitigation_control_command_line`. |
1437 | - |
1438 | -5. Disabling EPT |
1439 | -^^^^^^^^^^^^^^^^ |
1440 | - |
1441 | - Disabling EPT for virtual machines provides full mitigation for L1TF even |
1442 | - with SMT enabled, because the effective page tables for guests are |
1443 | - managed and sanitized by the hypervisor. Though disabling EPT has a |
1444 | - significant performance impact especially when the Meltdown mitigation |
1445 | - KPTI is enabled. |
1446 | - |
1447 | - EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter. |
1448 | - |
1449 | -There is ongoing research and development for new mitigation mechanisms to |
1450 | -address the performance impact of disabling SMT or EPT. |
1451 | - |
1452 | -.. _mitigation_control_command_line: |
1453 | - |
1454 | -Mitigation control on the kernel command line |
1455 | ---------------------------------------------- |
1456 | - |
1457 | -The kernel command line allows to control the L1TF mitigations at boot |
1458 | -time with the option "l1tf=". The valid arguments for this option are: |
1459 | - |
1460 | - ============ ============================================================= |
1461 | - full Provides all available mitigations for the L1TF |
1462 | - vulnerability. Disables SMT and enables all mitigations in |
1463 | - the hypervisors, i.e. unconditional L1D flushing |
1464 | - |
1465 | - SMT control and L1D flush control via the sysfs interface |
1466 | - is still possible after boot. Hypervisors will issue a |
1467 | - warning when the first VM is started in a potentially |
1468 | - insecure configuration, i.e. SMT enabled or L1D flush |
1469 | - disabled. |
1470 | - |
1471 | - full,force Same as 'full', but disables SMT and L1D flush runtime |
1472 | - control. Implies the 'nosmt=force' command line option. |
1473 | - (i.e. sysfs control of SMT is disabled.) |
1474 | - |
1475 | - flush Leaves SMT enabled and enables the default hypervisor |
1476 | - mitigation, i.e. conditional L1D flushing |
1477 | - |
1478 | - SMT control and L1D flush control via the sysfs interface |
1479 | - is still possible after boot. Hypervisors will issue a |
1480 | - warning when the first VM is started in a potentially |
1481 | - insecure configuration, i.e. SMT enabled or L1D flush |
1482 | - disabled. |
1483 | - |
1484 | - flush,nosmt Disables SMT and enables the default hypervisor mitigation, |
1485 | - i.e. conditional L1D flushing. |
1486 | - |
1487 | - SMT control and L1D flush control via the sysfs interface |
1488 | - is still possible after boot. Hypervisors will issue a |
1489 | - warning when the first VM is started in a potentially |
1490 | - insecure configuration, i.e. SMT enabled or L1D flush |
1491 | - disabled. |
1492 | - |
1493 | - flush,nowarn Same as 'flush', but hypervisors will not warn when a VM is |
1494 | - started in a potentially insecure configuration. |
1495 | - |
1496 | - off Disables hypervisor mitigations and doesn't emit any |
1497 | - warnings. |
1498 | - It also drops the swap size and available RAM limit restrictions |
1499 | - on both hypervisor and bare metal. |
1500 | - |
1501 | - ============ ============================================================= |
1502 | - |
1503 | -The default is 'flush'. For details about L1D flushing see :ref:`l1d_flush`. |
1504 | - |
1505 | - |
1506 | -.. _mitigation_control_kvm: |
1507 | - |
1508 | -Mitigation control for KVM - module parameter |
1509 | -------------------------------------------------------------- |
1510 | - |
1511 | -The KVM hypervisor mitigation mechanism, flushing the L1D cache when |
1512 | -entering a guest, can be controlled with a module parameter. |
1513 | - |
1514 | -The option/parameter is "kvm-intel.vmentry_l1d_flush=". It takes the |
1515 | -following arguments: |
1516 | - |
1517 | - ============ ============================================================== |
1518 | - always L1D cache flush on every VMENTER. |
1519 | - |
1520 | - cond Flush L1D on VMENTER only when the code between VMEXIT and |
1521 | - VMENTER can leak host memory which is considered |
1522 | - interesting for an attacker. This still can leak host memory |
1523 | - which allows e.g. to determine the hosts address space layout. |
1524 | - |
1525 | - never Disables the mitigation |
1526 | - ============ ============================================================== |
1527 | - |
1528 | -The parameter can be provided on the kernel command line, as a module |
1529 | -parameter when loading the modules and at runtime modified via the sysfs |
1530 | -file: |
1531 | - |
1532 | -/sys/module/kvm_intel/parameters/vmentry_l1d_flush |
1533 | - |
1534 | -The default is 'cond'. If 'l1tf=full,force' is given on the kernel command |
1535 | -line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush |
1536 | -module parameter is ignored and writes to the sysfs file are rejected. |
1537 | - |
1538 | - |
1539 | -Mitigation selection guide |
1540 | --------------------------- |
1541 | - |
1542 | -1. No virtualization in use |
1543 | -^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
1544 | - |
1545 | - The system is protected by the kernel unconditionally and no further |
1546 | - action is required. |
1547 | - |
1548 | -2. Virtualization with trusted guests |
1549 | -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
1550 | - |
1551 | - If the guest comes from a trusted source and the guest OS kernel is |
1552 | - guaranteed to have the L1TF mitigations in place the system is fully |
1553 | - protected against L1TF and no further action is required. |
1554 | - |
1555 | - To avoid the overhead of the default L1D flushing on VMENTER the |
1556 | - administrator can disable the flushing via the kernel command line and |
1557 | - sysfs control files. See :ref:`mitigation_control_command_line` and |
1558 | - :ref:`mitigation_control_kvm`. |
1559 | - |
1560 | - |
1561 | -3. Virtualization with untrusted guests |
1562 | -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
1563 | - |
1564 | -3.1. SMT not supported or disabled |
1565 | -"""""""""""""""""""""""""""""""""" |
1566 | - |
1567 | - If SMT is not supported by the processor or disabled in the BIOS or by |
1568 | - the kernel, it's only required to enforce L1D flushing on VMENTER. |
1569 | - |
1570 | - Conditional L1D flushing is the default behaviour and can be tuned. See |
1571 | - :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`. |
1572 | - |
1573 | -3.2. EPT not supported or disabled |
1574 | -"""""""""""""""""""""""""""""""""" |
1575 | - |
1576 | - If EPT is not supported by the processor or disabled in the hypervisor, |
1577 | - the system is fully protected. SMT can stay enabled and L1D flushing on |
1578 | - VMENTER is not required. |
1579 | - |
1580 | - EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter. |
1581 | - |
1582 | -3.3. SMT and EPT supported and active |
1583 | -""""""""""""""""""""""""""""""""""""" |
1584 | - |
1585 | - If SMT and EPT are supported and active then various degrees of |
1586 | - mitigations can be employed: |
1587 | - |
1588 | - - L1D flushing on VMENTER: |
1589 | - |
1590 | - L1D flushing on VMENTER is the minimal protection requirement, but it |
1591 | - is only potent in combination with other mitigation methods. |
1592 | - |
1593 | - Conditional L1D flushing is the default behaviour and can be tuned. See |
1594 | - :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`. |
1595 | - |
1596 | - - Guest confinement: |
1597 | - |
1598 | - Confinement of guests to a single or a group of physical cores which |
1599 | - are not running any other processes, can reduce the attack surface |
1600 | - significantly, but interrupts, soft interrupts and kernel threads can |
1601 | - still expose valuable data to a potential attacker. See |
1602 | - :ref:`guest_confinement`. |
1603 | - |
1604 | - - Interrupt isolation: |
1605 | - |
1606 | - Isolating the guest CPUs from interrupts can reduce the attack surface |
1607 | - further, but still allows a malicious guest to explore a limited amount |
1608 | - of host physical memory. This can at least be used to gain knowledge |
1609 | - about the host address space layout. The interrupts which have a fixed |
1610 | - affinity to the CPUs which run the untrusted guests can depending on |
1611 | - the scenario still trigger soft interrupts and schedule kernel threads |
1612 | - which might expose valuable information. See |
1613 | - :ref:`interrupt_isolation`. |
1614 | - |
1615 | -The above three mitigation methods combined can provide protection to a |
1616 | -certain degree, but the risk of the remaining attack surface has to be |
1617 | -carefully analyzed. For full protection the following methods are |
1618 | -available: |
1619 | - |
1620 | - - Disabling SMT: |
1621 | - |
1622 | - Disabling SMT and enforcing the L1D flushing provides the maximum |
1623 | - amount of protection. This mitigation is not depending on any of the |
1624 | - above mitigation methods. |
1625 | - |
1626 | - SMT control and L1D flushing can be tuned by the command line |
1627 | - parameters 'nosmt', 'l1tf', 'kvm-intel.vmentry_l1d_flush' and at run |
1628 | - time with the matching sysfs control files. See :ref:`smt_control`, |
1629 | - :ref:`mitigation_control_command_line` and |
1630 | - :ref:`mitigation_control_kvm`. |
1631 | - |
1632 | - - Disabling EPT: |
1633 | - |
1634 | - Disabling EPT provides the maximum amount of protection as well. It is |
1635 | - not depending on any of the above mitigation methods. SMT can stay |
1636 | - enabled and L1D flushing is not required, but the performance impact is |
1637 | - significant. |
1638 | - |
1639 | - EPT can be disabled in the hypervisor via the 'kvm-intel.ept' |
1640 | - parameter. |
1641 | - |
1642 | -3.4. Nested virtual machines |
1643 | -"""""""""""""""""""""""""""" |
1644 | - |
1645 | -When nested virtualization is in use, three operating systems are involved: |
1646 | -the bare metal hypervisor, the nested hypervisor and the nested virtual |
1647 | -machine. VMENTER operations from the nested hypervisor into the nested |
1648 | -guest will always be processed by the bare metal hypervisor. If KVM is the |
1649 | -bare metal hypervisor it wiil: |
1650 | - |
1651 | - - Flush the L1D cache on every switch from the nested hypervisor to the |
1652 | - nested virtual machine, so that the nested hypervisor's secrets are not |
1653 | - exposed to the nested virtual machine; |
1654 | - |
1655 | - - Flush the L1D cache on every switch from the nested virtual machine to |
1656 | - the nested hypervisor; this is a complex operation, and flushing the L1D |
1657 | - cache avoids that the bare metal hypervisor's secrets are exposed to the |
1658 | - nested virtual machine; |
1659 | - |
1660 | - - Instruct the nested hypervisor to not perform any L1D cache flush. This |
1661 | - is an optimization to avoid double L1D flushing. |
1662 | - |
1663 | - |
1664 | -.. _default_mitigations: |
1665 | - |
1666 | -Default mitigations |
1667 | -------------------- |
1668 | - |
1669 | - The kernel default mitigations for vulnerable processors are: |
1670 | - |
1671 | - - PTE inversion to protect against malicious user space. This is done |
1672 | - unconditionally and cannot be controlled. The swap storage is limited |
1673 | - to ~16TB. |
1674 | - |
1675 | - - L1D conditional flushing on VMENTER when EPT is enabled for |
1676 | - a guest. |
1677 | - |
1678 | - The kernel does not by default enforce the disabling of SMT, which leaves |
1679 | - SMT systems vulnerable when running untrusted guests with EPT enabled. |
1680 | - |
1681 | - The rationale for this choice is: |
1682 | - |
1683 | - - Force disabling SMT can break existing setups, especially with |
1684 | - unattended updates. |
1685 | - |
1686 | - - If regular users run untrusted guests on their machine, then L1TF is |
1687 | - just an add on to other malware which might be embedded in an untrusted |
1688 | - guest, e.g. spam-bots or attacks on the local network. |
1689 | - |
1690 | - There is no technical way to prevent a user from running untrusted code |
1691 | - on their machines blindly. |
1692 | - |
1693 | - - It's technically extremely unlikely and from today's knowledge even |
1694 | - impossible that L1TF can be exploited via the most popular attack |
1695 | - mechanisms like JavaScript because these mechanisms have no way to |
1696 | - control PTEs. If this would be possible and not other mitigation would |
1697 | - be possible, then the default might be different. |
1698 | - |
1699 | - - The administrators of cloud and hosting setups have to carefully |
1700 | - analyze the risk for their scenarios and make the appropriate |
1701 | - mitigation choices, which might even vary across their deployed |
1702 | - machines and also result in other changes of their overall setup. |
1703 | - There is no way for the kernel to provide a sensible default for this |
1704 | - kind of scenarios. |
1705 | diff --git a/Documentation/index.rst b/Documentation/index.rst |
1706 | index 5db7e87c7cb1..1cdc139adb40 100644 |
1707 | --- a/Documentation/index.rst |
1708 | +++ b/Documentation/index.rst |
1709 | @@ -104,6 +104,7 @@ implementation. |
1710 | :maxdepth: 2 |
1711 | |
1712 | sh/index |
1713 | + x86/index |
1714 | |
1715 | Filesystem Documentation |
1716 | ------------------------ |
1717 | diff --git a/Documentation/x86/conf.py b/Documentation/x86/conf.py |
1718 | new file mode 100644 |
1719 | index 000000000000..33c5c3142e20 |
1720 | --- /dev/null |
1721 | +++ b/Documentation/x86/conf.py |
1722 | @@ -0,0 +1,10 @@ |
1723 | +# -*- coding: utf-8; mode: python -*- |
1724 | + |
1725 | +project = "X86 architecture specific documentation" |
1726 | + |
1727 | +tags.add("subproject") |
1728 | + |
1729 | +latex_documents = [ |
1730 | + ('index', 'x86.tex', project, |
1731 | + 'The kernel development community', 'manual'), |
1732 | +] |
1733 | diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst |
1734 | new file mode 100644 |
1735 | index 000000000000..ef389dcf1b1d |
1736 | --- /dev/null |
1737 | +++ b/Documentation/x86/index.rst |
1738 | @@ -0,0 +1,8 @@ |
1739 | +========================== |
1740 | +x86 architecture specifics |
1741 | +========================== |
1742 | + |
1743 | +.. toctree:: |
1744 | + :maxdepth: 1 |
1745 | + |
1746 | + mds |
1747 | diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst |
1748 | new file mode 100644 |
1749 | index 000000000000..534e9baa4e1d |
1750 | --- /dev/null |
1751 | +++ b/Documentation/x86/mds.rst |
1752 | @@ -0,0 +1,225 @@ |
1753 | +Microarchitectural Data Sampling (MDS) mitigation |
1754 | +================================================= |
1755 | + |
1756 | +.. _mds: |
1757 | + |
1758 | +Overview |
1759 | +-------- |
1760 | + |
1761 | +Microarchitectural Data Sampling (MDS) is a family of side channel attacks |
1762 | +on internal buffers in Intel CPUs. The variants are: |
1763 | + |
1764 | + - Microarchitectural Store Buffer Data Sampling (MSBDS) (CVE-2018-12126) |
1765 | + - Microarchitectural Fill Buffer Data Sampling (MFBDS) (CVE-2018-12130) |
1766 | + - Microarchitectural Load Port Data Sampling (MLPDS) (CVE-2018-12127) |
1767 | + - Microarchitectural Data Sampling Uncacheable Memory (MDSUM) (CVE-2019-11091) |
1768 | + |
1769 | +MSBDS leaks Store Buffer Entries which can be speculatively forwarded to a |
1770 | +dependent load (store-to-load forwarding) as an optimization. The forward |
1771 | +can also happen to a faulting or assisting load operation for a different |
1772 | +memory address, which can be exploited under certain conditions. Store |
1773 | +buffers are partitioned between Hyper-Threads so cross thread forwarding is |
1774 | +not possible. But if a thread enters or exits a sleep state the store |
1775 | +buffer is repartitioned which can expose data from one thread to the other. |
1776 | + |
1777 | +MFBDS leaks Fill Buffer Entries. Fill buffers are used internally to manage |
1778 | +L1 miss situations and to hold data which is returned or sent in response |
1779 | +to a memory or I/O operation. Fill buffers can forward data to a load |
1780 | +operation and also write data to the cache. When the fill buffer is |
1781 | +deallocated it can retain the stale data of the preceding operations which |
1782 | +can then be forwarded to a faulting or assisting load operation, which can |
1783 | +be exploited under certain conditions. Fill buffers are shared between |
1784 | +Hyper-Threads so cross thread leakage is possible. |
1785 | + |
1786 | +MLPDS leaks Load Port Data. Load ports are used to perform load operations |
1787 | +from memory or I/O. The received data is then forwarded to the register |
1788 | +file or a subsequent operation. In some implementations the Load Port can |
1789 | +contain stale data from a previous operation which can be forwarded to |
1790 | +faulting or assisting loads under certain conditions, which again can be |
1791 | +exploited eventually. Load ports are shared between Hyper-Threads so cross |
1792 | +thread leakage is possible. |
1793 | + |
1794 | +MDSUM is a special case of MSBDS, MFBDS and MLPDS. An uncacheable load from |
1795 | +memory that takes a fault or assist can leave data in a microarchitectural |
1796 | +structure that may later be observed using one of the same methods used by |
1797 | +MSBDS, MFBDS or MLPDS. |
1798 | + |
1799 | +Exposure assumptions |
1800 | +-------------------- |
1801 | + |
1802 | +It is assumed that attack code resides in user space or in a guest with one |
1803 | +exception. The rationale behind this assumption is that the code construct |
1804 | +needed for exploiting MDS requires: |
1805 | + |
1806 | + - to control the load to trigger a fault or assist |
1807 | + |
1808 | + - to have a disclosure gadget which exposes the speculatively accessed |
1809 | + data for consumption through a side channel. |
1810 | + |
1811 | + - to control the pointer through which the disclosure gadget exposes the |
1812 | + data |
1813 | + |
1814 | +The existence of such a construct in the kernel cannot be excluded with |
1815 | +100% certainty, but the complexity involved makes it extremly unlikely. |
1816 | + |
1817 | +There is one exception, which is untrusted BPF. The functionality of |
1818 | +untrusted BPF is limited, but it needs to be thoroughly investigated |
1819 | +whether it can be used to create such a construct. |
1820 | + |
1821 | + |
1822 | +Mitigation strategy |
1823 | +------------------- |
1824 | + |
1825 | +All variants have the same mitigation strategy at least for the single CPU |
1826 | +thread case (SMT off): Force the CPU to clear the affected buffers. |
1827 | + |
1828 | +This is achieved by using the otherwise unused and obsolete VERW |
1829 | +instruction in combination with a microcode update. The microcode clears |
1830 | +the affected CPU buffers when the VERW instruction is executed. |
1831 | + |
1832 | +For virtualization there are two ways to achieve CPU buffer |
1833 | +clearing. Either the modified VERW instruction or via the L1D Flush |
1834 | +command. The latter is issued when L1TF mitigation is enabled so the extra |
1835 | +VERW can be avoided. If the CPU is not affected by L1TF then VERW needs to |
1836 | +be issued. |
1837 | + |
1838 | +If the VERW instruction with the supplied segment selector argument is |
1839 | +executed on a CPU without the microcode update there is no side effect |
1840 | +other than a small number of pointlessly wasted CPU cycles. |
1841 | + |
1842 | +This does not protect against cross Hyper-Thread attacks except for MSBDS |
1843 | +which is only exploitable cross Hyper-thread when one of the Hyper-Threads |
1844 | +enters a C-state. |
1845 | + |
1846 | +The kernel provides a function to invoke the buffer clearing: |
1847 | + |
1848 | + mds_clear_cpu_buffers() |
1849 | + |
1850 | +The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state |
1851 | +(idle) transitions. |
1852 | + |
1853 | +As a special quirk to address virtualization scenarios where the host has |
1854 | +the microcode updated, but the hypervisor does not (yet) expose the |
1855 | +MD_CLEAR CPUID bit to guests, the kernel issues the VERW instruction in the |
1856 | +hope that it might actually clear the buffers. The state is reflected |
1857 | +accordingly. |
1858 | + |
1859 | +According to current knowledge additional mitigations inside the kernel |
1860 | +itself are not required because the necessary gadgets to expose the leaked |
1861 | +data cannot be controlled in a way which allows exploitation from malicious |
1862 | +user space or VM guests. |
1863 | + |
1864 | +Kernel internal mitigation modes |
1865 | +-------------------------------- |
1866 | + |
1867 | + ======= ============================================================ |
1868 | + off Mitigation is disabled. Either the CPU is not affected or |
1869 | + mds=off is supplied on the kernel command line |
1870 | + |
1871 | + full Mitigation is enabled. CPU is affected and MD_CLEAR is |
1872 | + advertised in CPUID. |
1873 | + |
1874 | + vmwerv Mitigation is enabled. CPU is affected and MD_CLEAR is not |
1875 | + advertised in CPUID. That is mainly for virtualization |
1876 | + scenarios where the host has the updated microcode but the |
1877 | + hypervisor does not expose MD_CLEAR in CPUID. It's a best |
1878 | + effort approach without guarantee. |
1879 | + ======= ============================================================ |
1880 | + |
1881 | +If the CPU is affected and mds=off is not supplied on the kernel command |
1882 | +line then the kernel selects the appropriate mitigation mode depending on |
1883 | +the availability of the MD_CLEAR CPUID bit. |
1884 | + |
1885 | +Mitigation points |
1886 | +----------------- |
1887 | + |
1888 | +1. Return to user space |
1889 | +^^^^^^^^^^^^^^^^^^^^^^^ |
1890 | + |
1891 | + When transitioning from kernel to user space the CPU buffers are flushed |
1892 | + on affected CPUs when the mitigation is not disabled on the kernel |
1893 | + command line. The migitation is enabled through the static key |
1894 | + mds_user_clear. |
1895 | + |
1896 | + The mitigation is invoked in prepare_exit_to_usermode() which covers |
1897 | + most of the kernel to user space transitions. There are a few exceptions |
1898 | + which are not invoking prepare_exit_to_usermode() on return to user |
1899 | + space. These exceptions use the paranoid exit code. |
1900 | + |
1901 | + - Non Maskable Interrupt (NMI): |
1902 | + |
1903 | + Access to sensible data like keys, credentials in the NMI context is |
1904 | + mostly theoretical: The CPU can do prefetching or execute a |
1905 | + misspeculated code path and thereby fetching data which might end up |
1906 | + leaking through a buffer. |
1907 | + |
1908 | + But for mounting other attacks the kernel stack address of the task is |
1909 | + already valuable information. So in full mitigation mode, the NMI is |
1910 | + mitigated on the return from do_nmi() to provide almost complete |
1911 | + coverage. |
1912 | + |
1913 | + - Double fault (#DF): |
1914 | + |
1915 | + A double fault is usually fatal, but the ESPFIX workaround, which can |
1916 | + be triggered from user space through modify_ldt(2) is a recoverable |
1917 | + double fault. #DF uses the paranoid exit path, so explicit mitigation |
1918 | + in the double fault handler is required. |
1919 | + |
1920 | + - Machine Check Exception (#MC): |
1921 | + |
1922 | + Another corner case is a #MC which hits between the CPU buffer clear |
1923 | + invocation and the actual return to user. As this still is in kernel |
1924 | + space it takes the paranoid exit path which does not clear the CPU |
1925 | + buffers. So the #MC handler repopulates the buffers to some |
1926 | + extent. Machine checks are not reliably controllable and the window is |
1927 | + extremly small so mitigation would just tick a checkbox that this |
1928 | + theoretical corner case is covered. To keep the amount of special |
1929 | + cases small, ignore #MC. |
1930 | + |
1931 | + - Debug Exception (#DB): |
1932 | + |
1933 | + This takes the paranoid exit path only when the INT1 breakpoint is in |
1934 | + kernel space. #DB on a user space address takes the regular exit path, |
1935 | + so no extra mitigation required. |
1936 | + |
1937 | + |
1938 | +2. C-State transition |
1939 | +^^^^^^^^^^^^^^^^^^^^^ |
1940 | + |
1941 | + When a CPU goes idle and enters a C-State the CPU buffers need to be |
1942 | + cleared on affected CPUs when SMT is active. This addresses the |
1943 | + repartitioning of the store buffer when one of the Hyper-Threads enters |
1944 | + a C-State. |
1945 | + |
1946 | + When SMT is inactive, i.e. either the CPU does not support it or all |
1947 | + sibling threads are offline CPU buffer clearing is not required. |
1948 | + |
1949 | + The idle clearing is enabled on CPUs which are only affected by MSBDS |
1950 | + and not by any other MDS variant. The other MDS variants cannot be |
1951 | + protected against cross Hyper-Thread attacks because the Fill Buffer and |
1952 | + the Load Ports are shared. So on CPUs affected by other variants, the |
1953 | + idle clearing would be a window dressing exercise and is therefore not |
1954 | + activated. |
1955 | + |
1956 | + The invocation is controlled by the static key mds_idle_clear which is |
1957 | + switched depending on the chosen mitigation mode and the SMT state of |
1958 | + the system. |
1959 | + |
1960 | + The buffer clear is only invoked before entering the C-State to prevent |
1961 | + that stale data from the idling CPU from spilling to the Hyper-Thread |
1962 | + sibling after the store buffer got repartitioned and all entries are |
1963 | + available to the non idle sibling. |
1964 | + |
1965 | + When coming out of idle the store buffer is partitioned again so each |
1966 | + sibling has half of it available. The back from idle CPU could be then |
1967 | + speculatively exposed to contents of the sibling. The buffers are |
1968 | + flushed either on exit to user space or on VMENTER so malicious code |
1969 | + in user space or the guest cannot speculatively access them. |
1970 | + |
1971 | + The mitigation is hooked into all variants of halt()/mwait(), but does |
1972 | + not cover the legacy ACPI IO-Port mechanism because the ACPI idle driver |
1973 | + has been superseded by the intel_idle driver around 2010 and is |
1974 | + preferred on all affected CPUs which are expected to gain the MD_CLEAR |
1975 | + functionality in microcode. Aside of that the IO-Port mechanism is a |
1976 | + legacy interface which is only used on older systems which are either |
1977 | + not affected or do not receive microcode updates anymore. |
1978 | diff --git a/Makefile b/Makefile |
1979 | index 914d69b9e3fd..be894b3a97d5 100644 |
1980 | --- a/Makefile |
1981 | +++ b/Makefile |
1982 | @@ -1,7 +1,7 @@ |
1983 | # SPDX-License-Identifier: GPL-2.0 |
1984 | VERSION = 4 |
1985 | PATCHLEVEL = 19 |
1986 | -SUBLEVEL = 42 |
1987 | +SUBLEVEL = 43 |
1988 | EXTRAVERSION = |
1989 | NAME = "People's Front" |
1990 | |
1991 | diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c |
1992 | index 1341325599a7..4ccbf611a3c5 100644 |
1993 | --- a/arch/powerpc/kernel/security.c |
1994 | +++ b/arch/powerpc/kernel/security.c |
1995 | @@ -56,7 +56,7 @@ void setup_barrier_nospec(void) |
1996 | enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && |
1997 | security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR); |
1998 | |
1999 | - if (!no_nospec) |
2000 | + if (!no_nospec && !cpu_mitigations_off()) |
2001 | enable_barrier_nospec(enable); |
2002 | } |
2003 | |
2004 | @@ -115,7 +115,7 @@ static int __init handle_nospectre_v2(char *p) |
2005 | early_param("nospectre_v2", handle_nospectre_v2); |
2006 | void setup_spectre_v2(void) |
2007 | { |
2008 | - if (no_spectrev2) |
2009 | + if (no_spectrev2 || cpu_mitigations_off()) |
2010 | do_btb_flush_fixups(); |
2011 | else |
2012 | btb_flush_enabled = true; |
2013 | @@ -299,7 +299,7 @@ void setup_stf_barrier(void) |
2014 | |
2015 | stf_enabled_flush_types = type; |
2016 | |
2017 | - if (!no_stf_barrier) |
2018 | + if (!no_stf_barrier && !cpu_mitigations_off()) |
2019 | stf_barrier_enable(enable); |
2020 | } |
2021 | |
2022 | diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c |
2023 | index faf00222b324..eaf7300be5ab 100644 |
2024 | --- a/arch/powerpc/kernel/setup_64.c |
2025 | +++ b/arch/powerpc/kernel/setup_64.c |
2026 | @@ -955,7 +955,7 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable) |
2027 | |
2028 | enabled_flush_types = types; |
2029 | |
2030 | - if (!no_rfi_flush) |
2031 | + if (!no_rfi_flush && !cpu_mitigations_off()) |
2032 | rfi_flush_enable(enable); |
2033 | } |
2034 | |
2035 | diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c |
2036 | index bdddaae96559..649135cbedd5 100644 |
2037 | --- a/arch/s390/kernel/nospec-branch.c |
2038 | +++ b/arch/s390/kernel/nospec-branch.c |
2039 | @@ -1,6 +1,7 @@ |
2040 | // SPDX-License-Identifier: GPL-2.0 |
2041 | #include <linux/module.h> |
2042 | #include <linux/device.h> |
2043 | +#include <linux/cpu.h> |
2044 | #include <asm/nospec-branch.h> |
2045 | |
2046 | static int __init nobp_setup_early(char *str) |
2047 | @@ -58,7 +59,7 @@ early_param("nospectre_v2", nospectre_v2_setup_early); |
2048 | |
2049 | void __init nospec_auto_detect(void) |
2050 | { |
2051 | - if (test_facility(156)) { |
2052 | + if (test_facility(156) || cpu_mitigations_off()) { |
2053 | /* |
2054 | * The machine supports etokens. |
2055 | * Disable expolines and disable nobp. |
2056 | diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c |
2057 | index 3b2490b81918..8353348ddeaf 100644 |
2058 | --- a/arch/x86/entry/common.c |
2059 | +++ b/arch/x86/entry/common.c |
2060 | @@ -31,6 +31,7 @@ |
2061 | #include <asm/vdso.h> |
2062 | #include <linux/uaccess.h> |
2063 | #include <asm/cpufeature.h> |
2064 | +#include <asm/nospec-branch.h> |
2065 | |
2066 | #define CREATE_TRACE_POINTS |
2067 | #include <trace/events/syscalls.h> |
2068 | @@ -212,6 +213,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) |
2069 | #endif |
2070 | |
2071 | user_enter_irqoff(); |
2072 | + |
2073 | + mds_user_clear_cpu_buffers(); |
2074 | } |
2075 | |
2076 | #define SYSCALL_EXIT_WORK_FLAGS \ |
2077 | diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c |
2078 | index f9958ad4d335..a759e59990fb 100644 |
2079 | --- a/arch/x86/events/intel/core.c |
2080 | +++ b/arch/x86/events/intel/core.c |
2081 | @@ -4132,11 +4132,11 @@ __init int intel_pmu_init(void) |
2082 | name = "nehalem"; |
2083 | break; |
2084 | |
2085 | - case INTEL_FAM6_ATOM_PINEVIEW: |
2086 | - case INTEL_FAM6_ATOM_LINCROFT: |
2087 | - case INTEL_FAM6_ATOM_PENWELL: |
2088 | - case INTEL_FAM6_ATOM_CLOVERVIEW: |
2089 | - case INTEL_FAM6_ATOM_CEDARVIEW: |
2090 | + case INTEL_FAM6_ATOM_BONNELL: |
2091 | + case INTEL_FAM6_ATOM_BONNELL_MID: |
2092 | + case INTEL_FAM6_ATOM_SALTWELL: |
2093 | + case INTEL_FAM6_ATOM_SALTWELL_MID: |
2094 | + case INTEL_FAM6_ATOM_SALTWELL_TABLET: |
2095 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, |
2096 | sizeof(hw_cache_event_ids)); |
2097 | |
2098 | @@ -4149,9 +4149,11 @@ __init int intel_pmu_init(void) |
2099 | name = "bonnell"; |
2100 | break; |
2101 | |
2102 | - case INTEL_FAM6_ATOM_SILVERMONT1: |
2103 | - case INTEL_FAM6_ATOM_SILVERMONT2: |
2104 | + case INTEL_FAM6_ATOM_SILVERMONT: |
2105 | + case INTEL_FAM6_ATOM_SILVERMONT_X: |
2106 | + case INTEL_FAM6_ATOM_SILVERMONT_MID: |
2107 | case INTEL_FAM6_ATOM_AIRMONT: |
2108 | + case INTEL_FAM6_ATOM_AIRMONT_MID: |
2109 | memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, |
2110 | sizeof(hw_cache_event_ids)); |
2111 | memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, |
2112 | @@ -4170,7 +4172,7 @@ __init int intel_pmu_init(void) |
2113 | break; |
2114 | |
2115 | case INTEL_FAM6_ATOM_GOLDMONT: |
2116 | - case INTEL_FAM6_ATOM_DENVERTON: |
2117 | + case INTEL_FAM6_ATOM_GOLDMONT_X: |
2118 | memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, |
2119 | sizeof(hw_cache_event_ids)); |
2120 | memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, |
2121 | @@ -4196,7 +4198,7 @@ __init int intel_pmu_init(void) |
2122 | name = "goldmont"; |
2123 | break; |
2124 | |
2125 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: |
2126 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: |
2127 | memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, |
2128 | sizeof(hw_cache_event_ids)); |
2129 | memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs, |
2130 | diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c |
2131 | index 6eb76106c469..56194c571299 100644 |
2132 | --- a/arch/x86/events/intel/cstate.c |
2133 | +++ b/arch/x86/events/intel/cstate.c |
2134 | @@ -559,8 +559,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { |
2135 | |
2136 | X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates), |
2137 | |
2138 | - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates), |
2139 | - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates), |
2140 | + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT, slm_cstates), |
2141 | + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT_X, slm_cstates), |
2142 | X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates), |
2143 | |
2144 | X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE, snb_cstates), |
2145 | @@ -581,9 +581,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { |
2146 | X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates), |
2147 | |
2148 | X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates), |
2149 | - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_DENVERTON, glm_cstates), |
2150 | + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_X, glm_cstates), |
2151 | |
2152 | - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GEMINI_LAKE, glm_cstates), |
2153 | + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates), |
2154 | { }, |
2155 | }; |
2156 | MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); |
2157 | diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c |
2158 | index 32f3e9423e99..91039ffed633 100644 |
2159 | --- a/arch/x86/events/intel/rapl.c |
2160 | +++ b/arch/x86/events/intel/rapl.c |
2161 | @@ -777,9 +777,9 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { |
2162 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, skl_rapl_init), |
2163 | |
2164 | X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init), |
2165 | - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init), |
2166 | + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init), |
2167 | |
2168 | - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GEMINI_LAKE, hsw_rapl_init), |
2169 | + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init), |
2170 | {}, |
2171 | }; |
2172 | |
2173 | diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c |
2174 | index b4771a6ddbc1..1b9f85abf9bc 100644 |
2175 | --- a/arch/x86/events/msr.c |
2176 | +++ b/arch/x86/events/msr.c |
2177 | @@ -69,14 +69,14 @@ static bool test_intel(int idx) |
2178 | case INTEL_FAM6_BROADWELL_GT3E: |
2179 | case INTEL_FAM6_BROADWELL_X: |
2180 | |
2181 | - case INTEL_FAM6_ATOM_SILVERMONT1: |
2182 | - case INTEL_FAM6_ATOM_SILVERMONT2: |
2183 | + case INTEL_FAM6_ATOM_SILVERMONT: |
2184 | + case INTEL_FAM6_ATOM_SILVERMONT_X: |
2185 | case INTEL_FAM6_ATOM_AIRMONT: |
2186 | |
2187 | case INTEL_FAM6_ATOM_GOLDMONT: |
2188 | - case INTEL_FAM6_ATOM_DENVERTON: |
2189 | + case INTEL_FAM6_ATOM_GOLDMONT_X: |
2190 | |
2191 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: |
2192 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: |
2193 | |
2194 | case INTEL_FAM6_XEON_PHI_KNL: |
2195 | case INTEL_FAM6_XEON_PHI_KNM: |
2196 | diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h |
2197 | index 7b31ee5223fc..69037da75ea0 100644 |
2198 | --- a/arch/x86/include/asm/cpufeatures.h |
2199 | +++ b/arch/x86/include/asm/cpufeatures.h |
2200 | @@ -341,6 +341,7 @@ |
2201 | #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ |
2202 | #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ |
2203 | #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ |
2204 | +#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ |
2205 | #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ |
2206 | #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ |
2207 | #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ |
2208 | @@ -378,5 +379,7 @@ |
2209 | #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ |
2210 | #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ |
2211 | #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ |
2212 | +#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ |
2213 | +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ |
2214 | |
2215 | #endif /* _ASM_X86_CPUFEATURES_H */ |
2216 | diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h |
2217 | index 0ad25cc895ae..058b1a1994c4 100644 |
2218 | --- a/arch/x86/include/asm/intel-family.h |
2219 | +++ b/arch/x86/include/asm/intel-family.h |
2220 | @@ -8,9 +8,6 @@ |
2221 | * The "_X" parts are generally the EP and EX Xeons, or the |
2222 | * "Extreme" ones, like Broadwell-E. |
2223 | * |
2224 | - * Things ending in "2" are usually because we have no better |
2225 | - * name for them. There's no processor called "SILVERMONT2". |
2226 | - * |
2227 | * While adding a new CPUID for a new microarchitecture, add a new |
2228 | * group to keep logically sorted out in chronological order. Within |
2229 | * that group keep the CPUID for the variants sorted by model number. |
2230 | @@ -59,19 +56,23 @@ |
2231 | |
2232 | /* "Small Core" Processors (Atom) */ |
2233 | |
2234 | -#define INTEL_FAM6_ATOM_PINEVIEW 0x1C |
2235 | -#define INTEL_FAM6_ATOM_LINCROFT 0x26 |
2236 | -#define INTEL_FAM6_ATOM_PENWELL 0x27 |
2237 | -#define INTEL_FAM6_ATOM_CLOVERVIEW 0x35 |
2238 | -#define INTEL_FAM6_ATOM_CEDARVIEW 0x36 |
2239 | -#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */ |
2240 | -#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ |
2241 | -#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ |
2242 | -#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ |
2243 | -#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */ |
2244 | -#define INTEL_FAM6_ATOM_GOLDMONT 0x5C |
2245 | -#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ |
2246 | -#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A |
2247 | +#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ |
2248 | +#define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */ |
2249 | + |
2250 | +#define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */ |
2251 | +#define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */ |
2252 | +#define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */ |
2253 | + |
2254 | +#define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */ |
2255 | +#define INTEL_FAM6_ATOM_SILVERMONT_X 0x4D /* Avaton, Rangely */ |
2256 | +#define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */ |
2257 | + |
2258 | +#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */ |
2259 | +#define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */ |
2260 | + |
2261 | +#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */ |
2262 | +#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */ |
2263 | +#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */ |
2264 | |
2265 | /* Xeon Phi */ |
2266 | |
2267 | diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h |
2268 | index 15450a675031..c99c66b41e53 100644 |
2269 | --- a/arch/x86/include/asm/irqflags.h |
2270 | +++ b/arch/x86/include/asm/irqflags.h |
2271 | @@ -6,6 +6,8 @@ |
2272 | |
2273 | #ifndef __ASSEMBLY__ |
2274 | |
2275 | +#include <asm/nospec-branch.h> |
2276 | + |
2277 | /* Provide __cpuidle; we can't safely include <linux/cpu.h> */ |
2278 | #define __cpuidle __attribute__((__section__(".cpuidle.text"))) |
2279 | |
2280 | @@ -54,11 +56,13 @@ static inline void native_irq_enable(void) |
2281 | |
2282 | static inline __cpuidle void native_safe_halt(void) |
2283 | { |
2284 | + mds_idle_clear_cpu_buffers(); |
2285 | asm volatile("sti; hlt": : :"memory"); |
2286 | } |
2287 | |
2288 | static inline __cpuidle void native_halt(void) |
2289 | { |
2290 | + mds_idle_clear_cpu_buffers(); |
2291 | asm volatile("hlt": : :"memory"); |
2292 | } |
2293 | |
2294 | diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h |
2295 | index f14ca0be1e3f..f85f43db9225 100644 |
2296 | --- a/arch/x86/include/asm/msr-index.h |
2297 | +++ b/arch/x86/include/asm/msr-index.h |
2298 | @@ -2,6 +2,8 @@ |
2299 | #ifndef _ASM_X86_MSR_INDEX_H |
2300 | #define _ASM_X86_MSR_INDEX_H |
2301 | |
2302 | +#include <linux/bits.h> |
2303 | + |
2304 | /* |
2305 | * CPU model specific register (MSR) numbers. |
2306 | * |
2307 | @@ -40,14 +42,14 @@ |
2308 | /* Intel MSRs. Some also available on other CPUs */ |
2309 | |
2310 | #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ |
2311 | -#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ |
2312 | +#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */ |
2313 | #define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ |
2314 | -#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ |
2315 | +#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ |
2316 | #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ |
2317 | -#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ |
2318 | +#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ |
2319 | |
2320 | #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ |
2321 | -#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ |
2322 | +#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ |
2323 | |
2324 | #define MSR_PPIN_CTL 0x0000004e |
2325 | #define MSR_PPIN 0x0000004f |
2326 | @@ -69,20 +71,25 @@ |
2327 | #define MSR_MTRRcap 0x000000fe |
2328 | |
2329 | #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a |
2330 | -#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ |
2331 | -#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ |
2332 | -#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH (1 << 3) /* Skip L1D flush on vmentry */ |
2333 | -#define ARCH_CAP_SSB_NO (1 << 4) /* |
2334 | - * Not susceptible to Speculative Store Bypass |
2335 | - * attack, so no Speculative Store Bypass |
2336 | - * control required. |
2337 | - */ |
2338 | +#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ |
2339 | +#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ |
2340 | +#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ |
2341 | +#define ARCH_CAP_SSB_NO BIT(4) /* |
2342 | + * Not susceptible to Speculative Store Bypass |
2343 | + * attack, so no Speculative Store Bypass |
2344 | + * control required. |
2345 | + */ |
2346 | +#define ARCH_CAP_MDS_NO BIT(5) /* |
2347 | + * Not susceptible to |
2348 | + * Microarchitectural Data |
2349 | + * Sampling (MDS) vulnerabilities. |
2350 | + */ |
2351 | |
2352 | #define MSR_IA32_FLUSH_CMD 0x0000010b |
2353 | -#define L1D_FLUSH (1 << 0) /* |
2354 | - * Writeback and invalidate the |
2355 | - * L1 data cache. |
2356 | - */ |
2357 | +#define L1D_FLUSH BIT(0) /* |
2358 | + * Writeback and invalidate the |
2359 | + * L1 data cache. |
2360 | + */ |
2361 | |
2362 | #define MSR_IA32_BBL_CR_CTL 0x00000119 |
2363 | #define MSR_IA32_BBL_CR_CTL3 0x0000011e |
2364 | diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h |
2365 | index 39a2fb29378a..eb0f80ce8524 100644 |
2366 | --- a/arch/x86/include/asm/mwait.h |
2367 | +++ b/arch/x86/include/asm/mwait.h |
2368 | @@ -6,6 +6,7 @@ |
2369 | #include <linux/sched/idle.h> |
2370 | |
2371 | #include <asm/cpufeature.h> |
2372 | +#include <asm/nospec-branch.h> |
2373 | |
2374 | #define MWAIT_SUBSTATE_MASK 0xf |
2375 | #define MWAIT_CSTATE_MASK 0xf |
2376 | @@ -40,6 +41,8 @@ static inline void __monitorx(const void *eax, unsigned long ecx, |
2377 | |
2378 | static inline void __mwait(unsigned long eax, unsigned long ecx) |
2379 | { |
2380 | + mds_idle_clear_cpu_buffers(); |
2381 | + |
2382 | /* "mwait %eax, %ecx;" */ |
2383 | asm volatile(".byte 0x0f, 0x01, 0xc9;" |
2384 | :: "a" (eax), "c" (ecx)); |
2385 | @@ -74,6 +77,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) |
2386 | static inline void __mwaitx(unsigned long eax, unsigned long ebx, |
2387 | unsigned long ecx) |
2388 | { |
2389 | + /* No MDS buffer clear as this is AMD/HYGON only */ |
2390 | + |
2391 | /* "mwaitx %eax, %ebx, %ecx;" */ |
2392 | asm volatile(".byte 0x0f, 0x01, 0xfb;" |
2393 | :: "a" (eax), "b" (ebx), "c" (ecx)); |
2394 | @@ -81,6 +86,8 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, |
2395 | |
2396 | static inline void __sti_mwait(unsigned long eax, unsigned long ecx) |
2397 | { |
2398 | + mds_idle_clear_cpu_buffers(); |
2399 | + |
2400 | trace_hardirqs_on(); |
2401 | /* "mwait %eax, %ecx;" */ |
2402 | asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" |
2403 | diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h |
2404 | index 032b6009baab..599c273f5d00 100644 |
2405 | --- a/arch/x86/include/asm/nospec-branch.h |
2406 | +++ b/arch/x86/include/asm/nospec-branch.h |
2407 | @@ -317,6 +317,56 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); |
2408 | DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); |
2409 | DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); |
2410 | |
2411 | +DECLARE_STATIC_KEY_FALSE(mds_user_clear); |
2412 | +DECLARE_STATIC_KEY_FALSE(mds_idle_clear); |
2413 | + |
2414 | +#include <asm/segment.h> |
2415 | + |
2416 | +/** |
2417 | + * mds_clear_cpu_buffers - Mitigation for MDS vulnerability |
2418 | + * |
2419 | + * This uses the otherwise unused and obsolete VERW instruction in |
2420 | + * combination with microcode which triggers a CPU buffer flush when the |
2421 | + * instruction is executed. |
2422 | + */ |
2423 | +static inline void mds_clear_cpu_buffers(void) |
2424 | +{ |
2425 | + static const u16 ds = __KERNEL_DS; |
2426 | + |
2427 | + /* |
2428 | + * Has to be the memory-operand variant because only that |
2429 | + * guarantees the CPU buffer flush functionality according to |
2430 | + * documentation. The register-operand variant does not. |
2431 | + * Works with any segment selector, but a valid writable |
2432 | + * data segment is the fastest variant. |
2433 | + * |
2434 | + * "cc" clobber is required because VERW modifies ZF. |
2435 | + */ |
2436 | + asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); |
2437 | +} |
2438 | + |
2439 | +/** |
2440 | + * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability |
2441 | + * |
2442 | + * Clear CPU buffers if the corresponding static key is enabled |
2443 | + */ |
2444 | +static inline void mds_user_clear_cpu_buffers(void) |
2445 | +{ |
2446 | + if (static_branch_likely(&mds_user_clear)) |
2447 | + mds_clear_cpu_buffers(); |
2448 | +} |
2449 | + |
2450 | +/** |
2451 | + * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability |
2452 | + * |
2453 | + * Clear CPU buffers if the corresponding static key is enabled |
2454 | + */ |
2455 | +static inline void mds_idle_clear_cpu_buffers(void) |
2456 | +{ |
2457 | + if (static_branch_likely(&mds_idle_clear)) |
2458 | + mds_clear_cpu_buffers(); |
2459 | +} |
2460 | + |
2461 | #endif /* __ASSEMBLY__ */ |
2462 | |
2463 | /* |
2464 | diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h |
2465 | index d53c54b842da..b54f25697beb 100644 |
2466 | --- a/arch/x86/include/asm/processor.h |
2467 | +++ b/arch/x86/include/asm/processor.h |
2468 | @@ -997,4 +997,10 @@ enum l1tf_mitigations { |
2469 | |
2470 | extern enum l1tf_mitigations l1tf_mitigation; |
2471 | |
2472 | +enum mds_mitigations { |
2473 | + MDS_MITIGATION_OFF, |
2474 | + MDS_MITIGATION_FULL, |
2475 | + MDS_MITIGATION_VMWERV, |
2476 | +}; |
2477 | + |
2478 | #endif /* _ASM_X86_PROCESSOR_H */ |
2479 | diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c |
2480 | index e5258bd64200..9b096f26d1c8 100644 |
2481 | --- a/arch/x86/kernel/cpu/bugs.c |
2482 | +++ b/arch/x86/kernel/cpu/bugs.c |
2483 | @@ -35,6 +35,7 @@ |
2484 | static void __init spectre_v2_select_mitigation(void); |
2485 | static void __init ssb_select_mitigation(void); |
2486 | static void __init l1tf_select_mitigation(void); |
2487 | +static void __init mds_select_mitigation(void); |
2488 | |
2489 | /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ |
2490 | u64 x86_spec_ctrl_base; |
2491 | @@ -61,6 +62,13 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); |
2492 | /* Control unconditional IBPB in switch_mm() */ |
2493 | DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); |
2494 | |
2495 | +/* Control MDS CPU buffer clear before returning to user space */ |
2496 | +DEFINE_STATIC_KEY_FALSE(mds_user_clear); |
2497 | +EXPORT_SYMBOL_GPL(mds_user_clear); |
2498 | +/* Control MDS CPU buffer clear before idling (halt, mwait) */ |
2499 | +DEFINE_STATIC_KEY_FALSE(mds_idle_clear); |
2500 | +EXPORT_SYMBOL_GPL(mds_idle_clear); |
2501 | + |
2502 | void __init check_bugs(void) |
2503 | { |
2504 | identify_boot_cpu(); |
2505 | @@ -99,6 +107,10 @@ void __init check_bugs(void) |
2506 | |
2507 | l1tf_select_mitigation(); |
2508 | |
2509 | + mds_select_mitigation(); |
2510 | + |
2511 | + arch_smt_update(); |
2512 | + |
2513 | #ifdef CONFIG_X86_32 |
2514 | /* |
2515 | * Check whether we are able to run this kernel safely on SMP. |
2516 | @@ -204,6 +216,61 @@ static void x86_amd_ssb_disable(void) |
2517 | wrmsrl(MSR_AMD64_LS_CFG, msrval); |
2518 | } |
2519 | |
2520 | +#undef pr_fmt |
2521 | +#define pr_fmt(fmt) "MDS: " fmt |
2522 | + |
2523 | +/* Default mitigation for MDS-affected CPUs */ |
2524 | +static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL; |
2525 | +static bool mds_nosmt __ro_after_init = false; |
2526 | + |
2527 | +static const char * const mds_strings[] = { |
2528 | + [MDS_MITIGATION_OFF] = "Vulnerable", |
2529 | + [MDS_MITIGATION_FULL] = "Mitigation: Clear CPU buffers", |
2530 | + [MDS_MITIGATION_VMWERV] = "Vulnerable: Clear CPU buffers attempted, no microcode", |
2531 | +}; |
2532 | + |
2533 | +static void __init mds_select_mitigation(void) |
2534 | +{ |
2535 | + if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) { |
2536 | + mds_mitigation = MDS_MITIGATION_OFF; |
2537 | + return; |
2538 | + } |
2539 | + |
2540 | + if (mds_mitigation == MDS_MITIGATION_FULL) { |
2541 | + if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) |
2542 | + mds_mitigation = MDS_MITIGATION_VMWERV; |
2543 | + |
2544 | + static_branch_enable(&mds_user_clear); |
2545 | + |
2546 | + if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && |
2547 | + (mds_nosmt || cpu_mitigations_auto_nosmt())) |
2548 | + cpu_smt_disable(false); |
2549 | + } |
2550 | + |
2551 | + pr_info("%s\n", mds_strings[mds_mitigation]); |
2552 | +} |
2553 | + |
2554 | +static int __init mds_cmdline(char *str) |
2555 | +{ |
2556 | + if (!boot_cpu_has_bug(X86_BUG_MDS)) |
2557 | + return 0; |
2558 | + |
2559 | + if (!str) |
2560 | + return -EINVAL; |
2561 | + |
2562 | + if (!strcmp(str, "off")) |
2563 | + mds_mitigation = MDS_MITIGATION_OFF; |
2564 | + else if (!strcmp(str, "full")) |
2565 | + mds_mitigation = MDS_MITIGATION_FULL; |
2566 | + else if (!strcmp(str, "full,nosmt")) { |
2567 | + mds_mitigation = MDS_MITIGATION_FULL; |
2568 | + mds_nosmt = true; |
2569 | + } |
2570 | + |
2571 | + return 0; |
2572 | +} |
2573 | +early_param("mds", mds_cmdline); |
2574 | + |
2575 | #undef pr_fmt |
2576 | #define pr_fmt(fmt) "Spectre V2 : " fmt |
2577 | |
2578 | @@ -428,7 +495,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) |
2579 | char arg[20]; |
2580 | int ret, i; |
2581 | |
2582 | - if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) |
2583 | + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2") || |
2584 | + cpu_mitigations_off()) |
2585 | return SPECTRE_V2_CMD_NONE; |
2586 | |
2587 | ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); |
2588 | @@ -560,9 +628,6 @@ specv2_set_mode: |
2589 | |
2590 | /* Set up IBPB and STIBP depending on the general spectre V2 command */ |
2591 | spectre_v2_user_select_mitigation(cmd); |
2592 | - |
2593 | - /* Enable STIBP if appropriate */ |
2594 | - arch_smt_update(); |
2595 | } |
2596 | |
2597 | static void update_stibp_msr(void * __unused) |
2598 | @@ -596,6 +661,31 @@ static void update_indir_branch_cond(void) |
2599 | static_branch_disable(&switch_to_cond_stibp); |
2600 | } |
2601 | |
2602 | +#undef pr_fmt |
2603 | +#define pr_fmt(fmt) fmt |
2604 | + |
2605 | +/* Update the static key controlling the MDS CPU buffer clear in idle */ |
2606 | +static void update_mds_branch_idle(void) |
2607 | +{ |
2608 | + /* |
2609 | + * Enable the idle clearing if SMT is active on CPUs which are |
2610 | + * affected only by MSBDS and not any other MDS variant. |
2611 | + * |
2612 | + * The other variants cannot be mitigated when SMT is enabled, so |
2613 | + * clearing the buffers on idle just to prevent the Store Buffer |
2614 | + * repartitioning leak would be a window dressing exercise. |
2615 | + */ |
2616 | + if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) |
2617 | + return; |
2618 | + |
2619 | + if (sched_smt_active()) |
2620 | + static_branch_enable(&mds_idle_clear); |
2621 | + else |
2622 | + static_branch_disable(&mds_idle_clear); |
2623 | +} |
2624 | + |
2625 | +#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" |
2626 | + |
2627 | void arch_smt_update(void) |
2628 | { |
2629 | /* Enhanced IBRS implies STIBP. No update required. */ |
2630 | @@ -616,6 +706,17 @@ void arch_smt_update(void) |
2631 | break; |
2632 | } |
2633 | |
2634 | + switch (mds_mitigation) { |
2635 | + case MDS_MITIGATION_FULL: |
2636 | + case MDS_MITIGATION_VMWERV: |
2637 | + if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) |
2638 | + pr_warn_once(MDS_MSG_SMT); |
2639 | + update_mds_branch_idle(); |
2640 | + break; |
2641 | + case MDS_MITIGATION_OFF: |
2642 | + break; |
2643 | + } |
2644 | + |
2645 | mutex_unlock(&spec_ctrl_mutex); |
2646 | } |
2647 | |
2648 | @@ -657,7 +758,8 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) |
2649 | char arg[20]; |
2650 | int ret, i; |
2651 | |
2652 | - if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) { |
2653 | + if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable") || |
2654 | + cpu_mitigations_off()) { |
2655 | return SPEC_STORE_BYPASS_CMD_NONE; |
2656 | } else { |
2657 | ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable", |
2658 | @@ -978,6 +1080,11 @@ static void __init l1tf_select_mitigation(void) |
2659 | if (!boot_cpu_has_bug(X86_BUG_L1TF)) |
2660 | return; |
2661 | |
2662 | + if (cpu_mitigations_off()) |
2663 | + l1tf_mitigation = L1TF_MITIGATION_OFF; |
2664 | + else if (cpu_mitigations_auto_nosmt()) |
2665 | + l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT; |
2666 | + |
2667 | override_cache_bits(&boot_cpu_data); |
2668 | |
2669 | switch (l1tf_mitigation) { |
2670 | @@ -1006,7 +1113,7 @@ static void __init l1tf_select_mitigation(void) |
2671 | pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n", |
2672 | half_pa); |
2673 | pr_info("However, doing so will make a part of your RAM unusable.\n"); |
2674 | - pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html might help you decide.\n"); |
2675 | + pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html might help you decide.\n"); |
2676 | return; |
2677 | } |
2678 | |
2679 | @@ -1039,6 +1146,7 @@ static int __init l1tf_cmdline(char *str) |
2680 | early_param("l1tf", l1tf_cmdline); |
2681 | |
2682 | #undef pr_fmt |
2683 | +#define pr_fmt(fmt) fmt |
2684 | |
2685 | #ifdef CONFIG_SYSFS |
2686 | |
2687 | @@ -1077,6 +1185,23 @@ static ssize_t l1tf_show_state(char *buf) |
2688 | } |
2689 | #endif |
2690 | |
2691 | +static ssize_t mds_show_state(char *buf) |
2692 | +{ |
2693 | + if (!hypervisor_is_type(X86_HYPER_NATIVE)) { |
2694 | + return sprintf(buf, "%s; SMT Host state unknown\n", |
2695 | + mds_strings[mds_mitigation]); |
2696 | + } |
2697 | + |
2698 | + if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) { |
2699 | + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], |
2700 | + (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" : |
2701 | + sched_smt_active() ? "mitigated" : "disabled")); |
2702 | + } |
2703 | + |
2704 | + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], |
2705 | + sched_smt_active() ? "vulnerable" : "disabled"); |
2706 | +} |
2707 | + |
2708 | static char *stibp_state(void) |
2709 | { |
2710 | if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) |
2711 | @@ -1141,6 +1266,10 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr |
2712 | if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV)) |
2713 | return l1tf_show_state(buf); |
2714 | break; |
2715 | + |
2716 | + case X86_BUG_MDS: |
2717 | + return mds_show_state(buf); |
2718 | + |
2719 | default: |
2720 | break; |
2721 | } |
2722 | @@ -1172,4 +1301,9 @@ ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *b |
2723 | { |
2724 | return cpu_show_common(dev, attr, buf, X86_BUG_L1TF); |
2725 | } |
2726 | + |
2727 | +ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf) |
2728 | +{ |
2729 | + return cpu_show_common(dev, attr, buf, X86_BUG_MDS); |
2730 | +} |
2731 | #endif |
2732 | diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c |
2733 | index 44c4ef3d989b..1073118b9bf0 100644 |
2734 | --- a/arch/x86/kernel/cpu/common.c |
2735 | +++ b/arch/x86/kernel/cpu/common.c |
2736 | @@ -948,60 +948,73 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) |
2737 | #endif |
2738 | } |
2739 | |
2740 | -static const __initconst struct x86_cpu_id cpu_no_speculation[] = { |
2741 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY }, |
2742 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY }, |
2743 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY }, |
2744 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY }, |
2745 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY }, |
2746 | - { X86_VENDOR_CENTAUR, 5 }, |
2747 | - { X86_VENDOR_INTEL, 5 }, |
2748 | - { X86_VENDOR_NSC, 5 }, |
2749 | - { X86_VENDOR_ANY, 4 }, |
2750 | +#define NO_SPECULATION BIT(0) |
2751 | +#define NO_MELTDOWN BIT(1) |
2752 | +#define NO_SSB BIT(2) |
2753 | +#define NO_L1TF BIT(3) |
2754 | +#define NO_MDS BIT(4) |
2755 | +#define MSBDS_ONLY BIT(5) |
2756 | + |
2757 | +#define VULNWL(_vendor, _family, _model, _whitelist) \ |
2758 | + { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } |
2759 | + |
2760 | +#define VULNWL_INTEL(model, whitelist) \ |
2761 | + VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist) |
2762 | + |
2763 | +#define VULNWL_AMD(family, whitelist) \ |
2764 | + VULNWL(AMD, family, X86_MODEL_ANY, whitelist) |
2765 | + |
2766 | +static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { |
2767 | + VULNWL(ANY, 4, X86_MODEL_ANY, NO_SPECULATION), |
2768 | + VULNWL(CENTAUR, 5, X86_MODEL_ANY, NO_SPECULATION), |
2769 | + VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION), |
2770 | + VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), |
2771 | + |
2772 | + /* Intel Family 6 */ |
2773 | + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION), |
2774 | + VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION), |
2775 | + VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION), |
2776 | + VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), |
2777 | + VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), |
2778 | + |
2779 | + VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), |
2780 | + VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY), |
2781 | + VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY), |
2782 | + VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), |
2783 | + VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY), |
2784 | + VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY), |
2785 | + |
2786 | + VULNWL_INTEL(CORE_YONAH, NO_SSB), |
2787 | + |
2788 | + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY), |
2789 | + |
2790 | + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF), |
2791 | + VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF), |
2792 | + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF), |
2793 | + |
2794 | + /* AMD Family 0xf - 0x12 */ |
2795 | + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), |
2796 | + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), |
2797 | + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), |
2798 | + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), |
2799 | + |
2800 | + /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ |
2801 | + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS), |
2802 | {} |
2803 | }; |
2804 | |
2805 | -static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { |
2806 | - { X86_VENDOR_AMD }, |
2807 | - {} |
2808 | -}; |
2809 | - |
2810 | -/* Only list CPUs which speculate but are non susceptible to SSB */ |
2811 | -static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { |
2812 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, |
2813 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, |
2814 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, |
2815 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD }, |
2816 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, |
2817 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, |
2818 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, |
2819 | - { X86_VENDOR_AMD, 0x12, }, |
2820 | - { X86_VENDOR_AMD, 0x11, }, |
2821 | - { X86_VENDOR_AMD, 0x10, }, |
2822 | - { X86_VENDOR_AMD, 0xf, }, |
2823 | - {} |
2824 | -}; |
2825 | +static bool __init cpu_matches(unsigned long which) |
2826 | +{ |
2827 | + const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist); |
2828 | |
2829 | -static const __initconst struct x86_cpu_id cpu_no_l1tf[] = { |
2830 | - /* in addition to cpu_no_speculation */ |
2831 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, |
2832 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, |
2833 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, |
2834 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD }, |
2835 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MOOREFIELD }, |
2836 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT }, |
2837 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON }, |
2838 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GEMINI_LAKE }, |
2839 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, |
2840 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, |
2841 | - {} |
2842 | -}; |
2843 | + return m && !!(m->driver_data & which); |
2844 | +} |
2845 | |
2846 | static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) |
2847 | { |
2848 | u64 ia32_cap = 0; |
2849 | |
2850 | - if (x86_match_cpu(cpu_no_speculation)) |
2851 | + if (cpu_matches(NO_SPECULATION)) |
2852 | return; |
2853 | |
2854 | setup_force_cpu_bug(X86_BUG_SPECTRE_V1); |
2855 | @@ -1010,15 +1023,20 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) |
2856 | if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) |
2857 | rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); |
2858 | |
2859 | - if (!x86_match_cpu(cpu_no_spec_store_bypass) && |
2860 | - !(ia32_cap & ARCH_CAP_SSB_NO) && |
2861 | + if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && |
2862 | !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) |
2863 | setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); |
2864 | |
2865 | if (ia32_cap & ARCH_CAP_IBRS_ALL) |
2866 | setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); |
2867 | |
2868 | - if (x86_match_cpu(cpu_no_meltdown)) |
2869 | + if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) { |
2870 | + setup_force_cpu_bug(X86_BUG_MDS); |
2871 | + if (cpu_matches(MSBDS_ONLY)) |
2872 | + setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); |
2873 | + } |
2874 | + |
2875 | + if (cpu_matches(NO_MELTDOWN)) |
2876 | return; |
2877 | |
2878 | /* Rogue Data Cache Load? No! */ |
2879 | @@ -1027,7 +1045,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) |
2880 | |
2881 | setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); |
2882 | |
2883 | - if (x86_match_cpu(cpu_no_l1tf)) |
2884 | + if (cpu_matches(NO_L1TF)) |
2885 | return; |
2886 | |
2887 | setup_force_cpu_bug(X86_BUG_L1TF); |
2888 | diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c |
2889 | index f8c260d522ca..912d53939f4f 100644 |
2890 | --- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c |
2891 | +++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c |
2892 | @@ -91,7 +91,7 @@ static u64 get_prefetch_disable_bits(void) |
2893 | */ |
2894 | return 0xF; |
2895 | case INTEL_FAM6_ATOM_GOLDMONT: |
2896 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: |
2897 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: |
2898 | /* |
2899 | * SDM defines bits of MSR_MISC_FEATURE_CONTROL register |
2900 | * as: |
2901 | @@ -995,7 +995,7 @@ static int measure_cycles_perf_fn(void *_plr) |
2902 | |
2903 | switch (boot_cpu_data.x86_model) { |
2904 | case INTEL_FAM6_ATOM_GOLDMONT: |
2905 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: |
2906 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: |
2907 | l2_hit_bits = (0x52ULL << 16) | (0x2 << 8) | 0xd1; |
2908 | l2_miss_bits = (0x52ULL << 16) | (0x10 << 8) | 0xd1; |
2909 | break; |
2910 | diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c |
2911 | index 18bc9b51ac9b..086cf1d1d71d 100644 |
2912 | --- a/arch/x86/kernel/nmi.c |
2913 | +++ b/arch/x86/kernel/nmi.c |
2914 | @@ -34,6 +34,7 @@ |
2915 | #include <asm/x86_init.h> |
2916 | #include <asm/reboot.h> |
2917 | #include <asm/cache.h> |
2918 | +#include <asm/nospec-branch.h> |
2919 | |
2920 | #define CREATE_TRACE_POINTS |
2921 | #include <trace/events/nmi.h> |
2922 | @@ -533,6 +534,9 @@ nmi_restart: |
2923 | write_cr2(this_cpu_read(nmi_cr2)); |
2924 | if (this_cpu_dec_return(nmi_state)) |
2925 | goto nmi_restart; |
2926 | + |
2927 | + if (user_mode(regs)) |
2928 | + mds_user_clear_cpu_buffers(); |
2929 | } |
2930 | NOKPROBE_SYMBOL(do_nmi); |
2931 | |
2932 | diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c |
2933 | index e6db475164ed..0a5efd764914 100644 |
2934 | --- a/arch/x86/kernel/traps.c |
2935 | +++ b/arch/x86/kernel/traps.c |
2936 | @@ -58,6 +58,7 @@ |
2937 | #include <asm/alternative.h> |
2938 | #include <asm/fpu/xstate.h> |
2939 | #include <asm/trace/mpx.h> |
2940 | +#include <asm/nospec-branch.h> |
2941 | #include <asm/mpx.h> |
2942 | #include <asm/vm86.h> |
2943 | #include <asm/umip.h> |
2944 | @@ -387,6 +388,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) |
2945 | regs->ip = (unsigned long)general_protection; |
2946 | regs->sp = (unsigned long)&gpregs->orig_ax; |
2947 | |
2948 | + /* |
2949 | + * This situation can be triggered by userspace via |
2950 | + * modify_ldt(2) and the return does not take the regular |
2951 | + * user space exit, so a CPU buffer clear is required when |
2952 | + * MDS mitigation is enabled. |
2953 | + */ |
2954 | + mds_user_clear_cpu_buffers(); |
2955 | return; |
2956 | } |
2957 | #endif |
2958 | diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c |
2959 | index 6d5dc5dabfd7..03b7529333a6 100644 |
2960 | --- a/arch/x86/kernel/tsc.c |
2961 | +++ b/arch/x86/kernel/tsc.c |
2962 | @@ -636,7 +636,7 @@ unsigned long native_calibrate_tsc(void) |
2963 | case INTEL_FAM6_KABYLAKE_DESKTOP: |
2964 | crystal_khz = 24000; /* 24.0 MHz */ |
2965 | break; |
2966 | - case INTEL_FAM6_ATOM_DENVERTON: |
2967 | + case INTEL_FAM6_ATOM_GOLDMONT_X: |
2968 | crystal_khz = 25000; /* 25.0 MHz */ |
2969 | break; |
2970 | case INTEL_FAM6_ATOM_GOLDMONT: |
2971 | diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c |
2972 | index 27ef714d886c..3d0e9aeea7c8 100644 |
2973 | --- a/arch/x86/kernel/tsc_msr.c |
2974 | +++ b/arch/x86/kernel/tsc_msr.c |
2975 | @@ -59,12 +59,12 @@ static const struct freq_desc freq_desc_ann = { |
2976 | }; |
2977 | |
2978 | static const struct x86_cpu_id tsc_msr_cpu_ids[] = { |
2979 | - INTEL_CPU_FAM6(ATOM_PENWELL, freq_desc_pnw), |
2980 | - INTEL_CPU_FAM6(ATOM_CLOVERVIEW, freq_desc_clv), |
2981 | - INTEL_CPU_FAM6(ATOM_SILVERMONT1, freq_desc_byt), |
2982 | + INTEL_CPU_FAM6(ATOM_SALTWELL_MID, freq_desc_pnw), |
2983 | + INTEL_CPU_FAM6(ATOM_SALTWELL_TABLET, freq_desc_clv), |
2984 | + INTEL_CPU_FAM6(ATOM_SILVERMONT, freq_desc_byt), |
2985 | + INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, freq_desc_tng), |
2986 | INTEL_CPU_FAM6(ATOM_AIRMONT, freq_desc_cht), |
2987 | - INTEL_CPU_FAM6(ATOM_MERRIFIELD, freq_desc_tng), |
2988 | - INTEL_CPU_FAM6(ATOM_MOOREFIELD, freq_desc_ann), |
2989 | + INTEL_CPU_FAM6(ATOM_AIRMONT_MID, freq_desc_ann), |
2990 | {} |
2991 | }; |
2992 | |
2993 | diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c |
2994 | index 98d13c6a64be..b810102a9cfa 100644 |
2995 | --- a/arch/x86/kvm/cpuid.c |
2996 | +++ b/arch/x86/kvm/cpuid.c |
2997 | @@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
2998 | /* cpuid 0x80000008.ebx */ |
2999 | const u32 kvm_cpuid_8000_0008_ebx_x86_features = |
3000 | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | |
3001 | - F(AMD_SSB_NO); |
3002 | + F(AMD_SSB_NO) | F(AMD_STIBP); |
3003 | |
3004 | /* cpuid 0xC0000001.edx */ |
3005 | const u32 kvm_cpuid_C000_0001_edx_x86_features = |
3006 | @@ -412,7 +412,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
3007 | /* cpuid 7.0.edx*/ |
3008 | const u32 kvm_cpuid_7_0_edx_x86_features = |
3009 | F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | |
3010 | - F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES); |
3011 | + F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | |
3012 | + F(MD_CLEAR); |
3013 | |
3014 | /* all calls to cpuid_count() should be made on the same cpu */ |
3015 | get_cpu(); |
3016 | diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c |
3017 | index 215339c7d161..73d6d585dd66 100644 |
3018 | --- a/arch/x86/kvm/vmx.c |
3019 | +++ b/arch/x86/kvm/vmx.c |
3020 | @@ -10765,8 +10765,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) |
3021 | evmcs_rsp = static_branch_unlikely(&enable_evmcs) ? |
3022 | (unsigned long)¤t_evmcs->host_rsp : 0; |
3023 | |
3024 | + /* L1D Flush includes CPU buffer clear to mitigate MDS */ |
3025 | if (static_branch_unlikely(&vmx_l1d_should_flush)) |
3026 | vmx_l1d_flush(vcpu); |
3027 | + else if (static_branch_unlikely(&mds_user_clear)) |
3028 | + mds_clear_cpu_buffers(); |
3029 | |
3030 | asm( |
3031 | /* Store host registers */ |
3032 | @@ -11127,8 +11130,8 @@ free_vcpu: |
3033 | return ERR_PTR(err); |
3034 | } |
3035 | |
3036 | -#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" |
3037 | -#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" |
3038 | +#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" |
3039 | +#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" |
3040 | |
3041 | static int vmx_vm_init(struct kvm *kvm) |
3042 | { |
3043 | diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c |
3044 | index c1fc1ae6b429..4df3e5c89d57 100644 |
3045 | --- a/arch/x86/mm/pti.c |
3046 | +++ b/arch/x86/mm/pti.c |
3047 | @@ -35,6 +35,7 @@ |
3048 | #include <linux/spinlock.h> |
3049 | #include <linux/mm.h> |
3050 | #include <linux/uaccess.h> |
3051 | +#include <linux/cpu.h> |
3052 | |
3053 | #include <asm/cpufeature.h> |
3054 | #include <asm/hypervisor.h> |
3055 | @@ -115,7 +116,8 @@ void __init pti_check_boottime_disable(void) |
3056 | } |
3057 | } |
3058 | |
3059 | - if (cmdline_find_option_bool(boot_command_line, "nopti")) { |
3060 | + if (cmdline_find_option_bool(boot_command_line, "nopti") || |
3061 | + cpu_mitigations_off()) { |
3062 | pti_mode = PTI_FORCE_OFF; |
3063 | pti_print_if_insecure("disabled on command line."); |
3064 | return; |
3065 | diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c |
3066 | index 034813d4ab1e..41dae0f0d898 100644 |
3067 | --- a/arch/x86/platform/atom/punit_atom_debug.c |
3068 | +++ b/arch/x86/platform/atom/punit_atom_debug.c |
3069 | @@ -143,8 +143,8 @@ static void punit_dbgfs_unregister(void) |
3070 | (kernel_ulong_t)&drv_data } |
3071 | |
3072 | static const struct x86_cpu_id intel_punit_cpu_ids[] = { |
3073 | - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, punit_device_byt), |
3074 | - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, punit_device_tng), |
3075 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT, punit_device_byt), |
3076 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, punit_device_tng), |
3077 | ICPU(INTEL_FAM6_ATOM_AIRMONT, punit_device_cht), |
3078 | {} |
3079 | }; |
3080 | diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c |
3081 | index 5a0483e7bf66..31dce781364c 100644 |
3082 | --- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c |
3083 | +++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c |
3084 | @@ -68,7 +68,7 @@ static struct bt_sfi_data tng_bt_sfi_data __initdata = { |
3085 | { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (kernel_ulong_t)&ddata } |
3086 | |
3087 | static const struct x86_cpu_id bt_sfi_cpu_ids[] = { |
3088 | - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, tng_bt_sfi_data), |
3089 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, tng_bt_sfi_data), |
3090 | {} |
3091 | }; |
3092 | |
3093 | diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c |
3094 | index 969bf8d515c0..c651e206d796 100644 |
3095 | --- a/drivers/acpi/acpi_lpss.c |
3096 | +++ b/drivers/acpi/acpi_lpss.c |
3097 | @@ -292,7 +292,7 @@ static const struct lpss_device_desc bsw_spi_dev_desc = { |
3098 | #define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, } |
3099 | |
3100 | static const struct x86_cpu_id lpss_cpu_ids[] = { |
3101 | - ICPU(INTEL_FAM6_ATOM_SILVERMONT1), /* Valleyview, Bay Trail */ |
3102 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT), /* Valleyview, Bay Trail */ |
3103 | ICPU(INTEL_FAM6_ATOM_AIRMONT), /* Braswell, Cherry Trail */ |
3104 | {} |
3105 | }; |
3106 | diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c |
3107 | index 06c31ec3cc70..9a8e286dd86f 100644 |
3108 | --- a/drivers/acpi/x86/utils.c |
3109 | +++ b/drivers/acpi/x86/utils.c |
3110 | @@ -54,7 +54,7 @@ static const struct always_present_id always_present_ids[] = { |
3111 | * Bay / Cherry Trail PWM directly poked by GPU driver in win10, |
3112 | * but Linux uses a separate PWM driver, harmless if not used. |
3113 | */ |
3114 | - ENTRY("80860F09", "1", ICPU(INTEL_FAM6_ATOM_SILVERMONT1), {}), |
3115 | + ENTRY("80860F09", "1", ICPU(INTEL_FAM6_ATOM_SILVERMONT), {}), |
3116 | ENTRY("80862288", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT), {}), |
3117 | /* |
3118 | * The INT0002 device is necessary to clear wakeup interrupt sources |
3119 | diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c |
3120 | index eb9443d5bae1..2fd6ca1021c2 100644 |
3121 | --- a/drivers/base/cpu.c |
3122 | +++ b/drivers/base/cpu.c |
3123 | @@ -546,11 +546,18 @@ ssize_t __weak cpu_show_l1tf(struct device *dev, |
3124 | return sprintf(buf, "Not affected\n"); |
3125 | } |
3126 | |
3127 | +ssize_t __weak cpu_show_mds(struct device *dev, |
3128 | + struct device_attribute *attr, char *buf) |
3129 | +{ |
3130 | + return sprintf(buf, "Not affected\n"); |
3131 | +} |
3132 | + |
3133 | static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); |
3134 | static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); |
3135 | static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); |
3136 | static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); |
3137 | static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); |
3138 | +static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); |
3139 | |
3140 | static struct attribute *cpu_root_vulnerabilities_attrs[] = { |
3141 | &dev_attr_meltdown.attr, |
3142 | @@ -558,6 +565,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { |
3143 | &dev_attr_spectre_v2.attr, |
3144 | &dev_attr_spec_store_bypass.attr, |
3145 | &dev_attr_l1tf.attr, |
3146 | + &dev_attr_mds.attr, |
3147 | NULL |
3148 | }; |
3149 | |
3150 | diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c |
3151 | index a005711f909e..29f25d5d65e0 100644 |
3152 | --- a/drivers/cpufreq/intel_pstate.c |
3153 | +++ b/drivers/cpufreq/intel_pstate.c |
3154 | @@ -1779,7 +1779,7 @@ static const struct pstate_funcs knl_funcs = { |
3155 | static const struct x86_cpu_id intel_pstate_cpu_ids[] = { |
3156 | ICPU(INTEL_FAM6_SANDYBRIDGE, core_funcs), |
3157 | ICPU(INTEL_FAM6_SANDYBRIDGE_X, core_funcs), |
3158 | - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, silvermont_funcs), |
3159 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT, silvermont_funcs), |
3160 | ICPU(INTEL_FAM6_IVYBRIDGE, core_funcs), |
3161 | ICPU(INTEL_FAM6_HASWELL_CORE, core_funcs), |
3162 | ICPU(INTEL_FAM6_BROADWELL_CORE, core_funcs), |
3163 | @@ -1796,7 +1796,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { |
3164 | ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_funcs), |
3165 | ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_funcs), |
3166 | ICPU(INTEL_FAM6_ATOM_GOLDMONT, core_funcs), |
3167 | - ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, core_funcs), |
3168 | + ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, core_funcs), |
3169 | ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs), |
3170 | {} |
3171 | }; |
3172 | diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c |
3173 | index df28b65358d2..903a4f1fadcc 100644 |
3174 | --- a/drivers/edac/pnd2_edac.c |
3175 | +++ b/drivers/edac/pnd2_edac.c |
3176 | @@ -1541,7 +1541,7 @@ static struct dunit_ops dnv_ops = { |
3177 | |
3178 | static const struct x86_cpu_id pnd2_cpuids[] = { |
3179 | { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT, 0, (kernel_ulong_t)&apl_ops }, |
3180 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON, 0, (kernel_ulong_t)&dnv_ops }, |
3181 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_X, 0, (kernel_ulong_t)&dnv_ops }, |
3182 | { } |
3183 | }; |
3184 | MODULE_DEVICE_TABLE(x86cpu, pnd2_cpuids); |
3185 | diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c |
3186 | index b2ccce5fb071..c4bb67ed8da3 100644 |
3187 | --- a/drivers/idle/intel_idle.c |
3188 | +++ b/drivers/idle/intel_idle.c |
3189 | @@ -1076,14 +1076,14 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { |
3190 | ICPU(INTEL_FAM6_WESTMERE, idle_cpu_nehalem), |
3191 | ICPU(INTEL_FAM6_WESTMERE_EP, idle_cpu_nehalem), |
3192 | ICPU(INTEL_FAM6_NEHALEM_EX, idle_cpu_nehalem), |
3193 | - ICPU(INTEL_FAM6_ATOM_PINEVIEW, idle_cpu_atom), |
3194 | - ICPU(INTEL_FAM6_ATOM_LINCROFT, idle_cpu_lincroft), |
3195 | + ICPU(INTEL_FAM6_ATOM_BONNELL, idle_cpu_atom), |
3196 | + ICPU(INTEL_FAM6_ATOM_BONNELL_MID, idle_cpu_lincroft), |
3197 | ICPU(INTEL_FAM6_WESTMERE_EX, idle_cpu_nehalem), |
3198 | ICPU(INTEL_FAM6_SANDYBRIDGE, idle_cpu_snb), |
3199 | ICPU(INTEL_FAM6_SANDYBRIDGE_X, idle_cpu_snb), |
3200 | - ICPU(INTEL_FAM6_ATOM_CEDARVIEW, idle_cpu_atom), |
3201 | - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, idle_cpu_byt), |
3202 | - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, idle_cpu_tangier), |
3203 | + ICPU(INTEL_FAM6_ATOM_SALTWELL, idle_cpu_atom), |
3204 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT, idle_cpu_byt), |
3205 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, idle_cpu_tangier), |
3206 | ICPU(INTEL_FAM6_ATOM_AIRMONT, idle_cpu_cht), |
3207 | ICPU(INTEL_FAM6_IVYBRIDGE, idle_cpu_ivb), |
3208 | ICPU(INTEL_FAM6_IVYBRIDGE_X, idle_cpu_ivt), |
3209 | @@ -1091,7 +1091,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { |
3210 | ICPU(INTEL_FAM6_HASWELL_X, idle_cpu_hsw), |
3211 | ICPU(INTEL_FAM6_HASWELL_ULT, idle_cpu_hsw), |
3212 | ICPU(INTEL_FAM6_HASWELL_GT3E, idle_cpu_hsw), |
3213 | - ICPU(INTEL_FAM6_ATOM_SILVERMONT2, idle_cpu_avn), |
3214 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT_X, idle_cpu_avn), |
3215 | ICPU(INTEL_FAM6_BROADWELL_CORE, idle_cpu_bdw), |
3216 | ICPU(INTEL_FAM6_BROADWELL_GT3E, idle_cpu_bdw), |
3217 | ICPU(INTEL_FAM6_BROADWELL_X, idle_cpu_bdw), |
3218 | @@ -1104,8 +1104,8 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { |
3219 | ICPU(INTEL_FAM6_XEON_PHI_KNL, idle_cpu_knl), |
3220 | ICPU(INTEL_FAM6_XEON_PHI_KNM, idle_cpu_knl), |
3221 | ICPU(INTEL_FAM6_ATOM_GOLDMONT, idle_cpu_bxt), |
3222 | - ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, idle_cpu_bxt), |
3223 | - ICPU(INTEL_FAM6_ATOM_DENVERTON, idle_cpu_dnv), |
3224 | + ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, idle_cpu_bxt), |
3225 | + ICPU(INTEL_FAM6_ATOM_GOLDMONT_X, idle_cpu_dnv), |
3226 | {} |
3227 | }; |
3228 | |
3229 | @@ -1322,7 +1322,7 @@ static void intel_idle_state_table_update(void) |
3230 | ivt_idle_state_table_update(); |
3231 | break; |
3232 | case INTEL_FAM6_ATOM_GOLDMONT: |
3233 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: |
3234 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: |
3235 | bxt_idle_state_table_update(); |
3236 | break; |
3237 | case INTEL_FAM6_SKYLAKE_DESKTOP: |
3238 | diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c |
3239 | index c61109f7b793..57c1ec322e42 100644 |
3240 | --- a/drivers/mmc/host/sdhci-acpi.c |
3241 | +++ b/drivers/mmc/host/sdhci-acpi.c |
3242 | @@ -247,7 +247,7 @@ static const struct sdhci_acpi_chip sdhci_acpi_chip_int = { |
3243 | static bool sdhci_acpi_byt(void) |
3244 | { |
3245 | static const struct x86_cpu_id byt[] = { |
3246 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, |
3247 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, |
3248 | {} |
3249 | }; |
3250 | |
3251 | diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c |
3252 | index 314e135014dc..30fbe2ea6eab 100644 |
3253 | --- a/drivers/pci/pci-mid.c |
3254 | +++ b/drivers/pci/pci-mid.c |
3255 | @@ -62,8 +62,8 @@ static const struct pci_platform_pm_ops mid_pci_platform_pm = { |
3256 | * arch/x86/platform/intel-mid/pwr.c. |
3257 | */ |
3258 | static const struct x86_cpu_id lpss_cpu_ids[] = { |
3259 | - ICPU(INTEL_FAM6_ATOM_PENWELL), |
3260 | - ICPU(INTEL_FAM6_ATOM_MERRIFIELD), |
3261 | + ICPU(INTEL_FAM6_ATOM_SALTWELL_MID), |
3262 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID), |
3263 | {} |
3264 | }; |
3265 | |
3266 | diff --git a/drivers/platform/x86/intel_int0002_vgpio.c b/drivers/platform/x86/intel_int0002_vgpio.c |
3267 | index a473dc51b18d..e89ad4964dc1 100644 |
3268 | --- a/drivers/platform/x86/intel_int0002_vgpio.c |
3269 | +++ b/drivers/platform/x86/intel_int0002_vgpio.c |
3270 | @@ -60,7 +60,7 @@ static const struct x86_cpu_id int0002_cpu_ids[] = { |
3271 | /* |
3272 | * Limit ourselves to Cherry Trail for now, until testing shows we |
3273 | * need to handle the INT0002 device on Baytrail too. |
3274 | - * ICPU(INTEL_FAM6_ATOM_SILVERMONT1), * Valleyview, Bay Trail * |
3275 | + * ICPU(INTEL_FAM6_ATOM_SILVERMONT), * Valleyview, Bay Trail * |
3276 | */ |
3277 | ICPU(INTEL_FAM6_ATOM_AIRMONT), /* Braswell, Cherry Trail */ |
3278 | {} |
3279 | diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c |
3280 | index d79fbf924b13..5ad44204a9c3 100644 |
3281 | --- a/drivers/platform/x86/intel_mid_powerbtn.c |
3282 | +++ b/drivers/platform/x86/intel_mid_powerbtn.c |
3283 | @@ -125,8 +125,8 @@ static const struct mid_pb_ddata mrfld_ddata = { |
3284 | { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (kernel_ulong_t)&ddata } |
3285 | |
3286 | static const struct x86_cpu_id mid_pb_cpu_ids[] = { |
3287 | - ICPU(INTEL_FAM6_ATOM_PENWELL, mfld_ddata), |
3288 | - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, mrfld_ddata), |
3289 | + ICPU(INTEL_FAM6_ATOM_SALTWELL_MID, mfld_ddata), |
3290 | + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, mrfld_ddata), |
3291 | {} |
3292 | }; |
3293 | |
3294 | diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c |
3295 | index 1423fa8710fd..b998d7da97fb 100644 |
3296 | --- a/drivers/platform/x86/intel_telemetry_debugfs.c |
3297 | +++ b/drivers/platform/x86/intel_telemetry_debugfs.c |
3298 | @@ -320,7 +320,7 @@ static struct telemetry_debugfs_conf telem_apl_debugfs_conf = { |
3299 | |
3300 | static const struct x86_cpu_id telemetry_debugfs_cpu_ids[] = { |
3301 | TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GOLDMONT, telem_apl_debugfs_conf), |
3302 | - TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE, telem_apl_debugfs_conf), |
3303 | + TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, telem_apl_debugfs_conf), |
3304 | {} |
3305 | }; |
3306 | |
3307 | diff --git a/drivers/platform/x86/intel_telemetry_pltdrv.c b/drivers/platform/x86/intel_telemetry_pltdrv.c |
3308 | index 2f889d6c270e..fcc6bee51a42 100644 |
3309 | --- a/drivers/platform/x86/intel_telemetry_pltdrv.c |
3310 | +++ b/drivers/platform/x86/intel_telemetry_pltdrv.c |
3311 | @@ -192,7 +192,7 @@ static struct telemetry_plt_config telem_glk_config = { |
3312 | |
3313 | static const struct x86_cpu_id telemetry_cpu_ids[] = { |
3314 | TELEM_CPU(INTEL_FAM6_ATOM_GOLDMONT, telem_apl_config), |
3315 | - TELEM_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE, telem_glk_config), |
3316 | + TELEM_CPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, telem_glk_config), |
3317 | {} |
3318 | }; |
3319 | |
3320 | diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c |
3321 | index 295d8dcba48c..8cbfcce57a06 100644 |
3322 | --- a/drivers/powercap/intel_rapl.c |
3323 | +++ b/drivers/powercap/intel_rapl.c |
3324 | @@ -1164,13 +1164,13 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { |
3325 | RAPL_CPU(INTEL_FAM6_KABYLAKE_DESKTOP, rapl_defaults_core), |
3326 | RAPL_CPU(INTEL_FAM6_CANNONLAKE_MOBILE, rapl_defaults_core), |
3327 | |
3328 | - RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT1, rapl_defaults_byt), |
3329 | + RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT, rapl_defaults_byt), |
3330 | RAPL_CPU(INTEL_FAM6_ATOM_AIRMONT, rapl_defaults_cht), |
3331 | - RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD, rapl_defaults_tng), |
3332 | - RAPL_CPU(INTEL_FAM6_ATOM_MOOREFIELD, rapl_defaults_ann), |
3333 | + RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT_MID, rapl_defaults_tng), |
3334 | + RAPL_CPU(INTEL_FAM6_ATOM_AIRMONT_MID, rapl_defaults_ann), |
3335 | RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT, rapl_defaults_core), |
3336 | - RAPL_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE, rapl_defaults_core), |
3337 | - RAPL_CPU(INTEL_FAM6_ATOM_DENVERTON, rapl_defaults_core), |
3338 | + RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, rapl_defaults_core), |
3339 | + RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT_X, rapl_defaults_core), |
3340 | |
3341 | RAPL_CPU(INTEL_FAM6_XEON_PHI_KNL, rapl_defaults_hsw_server), |
3342 | RAPL_CPU(INTEL_FAM6_XEON_PHI_KNM, rapl_defaults_hsw_server), |
3343 | diff --git a/drivers/thermal/intel_soc_dts_thermal.c b/drivers/thermal/intel_soc_dts_thermal.c |
3344 | index 1e47511a6bd5..d748527d7a38 100644 |
3345 | --- a/drivers/thermal/intel_soc_dts_thermal.c |
3346 | +++ b/drivers/thermal/intel_soc_dts_thermal.c |
3347 | @@ -45,7 +45,7 @@ static irqreturn_t soc_irq_thread_fn(int irq, void *dev_data) |
3348 | } |
3349 | |
3350 | static const struct x86_cpu_id soc_thermal_ids[] = { |
3351 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1, 0, |
3352 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT, 0, |
3353 | BYT_SOC_DTS_APIC_IRQ}, |
3354 | {} |
3355 | }; |
3356 | diff --git a/include/linux/cpu.h b/include/linux/cpu.h |
3357 | index 5041357d0297..57ae83c4d5f4 100644 |
3358 | --- a/include/linux/cpu.h |
3359 | +++ b/include/linux/cpu.h |
3360 | @@ -57,6 +57,8 @@ extern ssize_t cpu_show_spec_store_bypass(struct device *dev, |
3361 | struct device_attribute *attr, char *buf); |
3362 | extern ssize_t cpu_show_l1tf(struct device *dev, |
3363 | struct device_attribute *attr, char *buf); |
3364 | +extern ssize_t cpu_show_mds(struct device *dev, |
3365 | + struct device_attribute *attr, char *buf); |
3366 | |
3367 | extern __printf(4, 5) |
3368 | struct device *cpu_device_create(struct device *parent, void *drvdata, |
3369 | @@ -187,4 +189,28 @@ static inline void cpu_smt_disable(bool force) { } |
3370 | static inline void cpu_smt_check_topology(void) { } |
3371 | #endif |
3372 | |
3373 | +/* |
3374 | + * These are used for a global "mitigations=" cmdline option for toggling |
3375 | + * optional CPU mitigations. |
3376 | + */ |
3377 | +enum cpu_mitigations { |
3378 | + CPU_MITIGATIONS_OFF, |
3379 | + CPU_MITIGATIONS_AUTO, |
3380 | + CPU_MITIGATIONS_AUTO_NOSMT, |
3381 | +}; |
3382 | + |
3383 | +extern enum cpu_mitigations cpu_mitigations; |
3384 | + |
3385 | +/* mitigations=off */ |
3386 | +static inline bool cpu_mitigations_off(void) |
3387 | +{ |
3388 | + return cpu_mitigations == CPU_MITIGATIONS_OFF; |
3389 | +} |
3390 | + |
3391 | +/* mitigations=auto,nosmt */ |
3392 | +static inline bool cpu_mitigations_auto_nosmt(void) |
3393 | +{ |
3394 | + return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; |
3395 | +} |
3396 | + |
3397 | #endif /* _LINUX_CPU_H_ */ |
3398 | diff --git a/kernel/cpu.c b/kernel/cpu.c |
3399 | index dc250ec2c096..bc6c880a093f 100644 |
3400 | --- a/kernel/cpu.c |
3401 | +++ b/kernel/cpu.c |
3402 | @@ -2278,3 +2278,18 @@ void __init boot_cpu_hotplug_init(void) |
3403 | #endif |
3404 | this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); |
3405 | } |
3406 | + |
3407 | +enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO; |
3408 | + |
3409 | +static int __init mitigations_parse_cmdline(char *arg) |
3410 | +{ |
3411 | + if (!strcmp(arg, "off")) |
3412 | + cpu_mitigations = CPU_MITIGATIONS_OFF; |
3413 | + else if (!strcmp(arg, "auto")) |
3414 | + cpu_mitigations = CPU_MITIGATIONS_AUTO; |
3415 | + else if (!strcmp(arg, "auto,nosmt")) |
3416 | + cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT; |
3417 | + |
3418 | + return 0; |
3419 | +} |
3420 | +early_param("mitigations", mitigations_parse_cmdline); |
3421 | diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c |
3422 | index b74bbee111c6..c6c8d20be1d2 100644 |
3423 | --- a/sound/soc/intel/boards/bytcr_rt5651.c |
3424 | +++ b/sound/soc/intel/boards/bytcr_rt5651.c |
3425 | @@ -787,7 +787,7 @@ static struct snd_soc_card byt_rt5651_card = { |
3426 | }; |
3427 | |
3428 | static const struct x86_cpu_id baytrail_cpu_ids[] = { |
3429 | - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, /* Valleyview */ |
3430 | + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, /* Valleyview */ |
3431 | {} |
3432 | }; |
3433 | |
3434 | diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile |
3435 | index 2ab25aa38263..ff058bfbca3e 100644 |
3436 | --- a/tools/power/x86/turbostat/Makefile |
3437 | +++ b/tools/power/x86/turbostat/Makefile |
3438 | @@ -9,7 +9,7 @@ ifeq ("$(origin O)", "command line") |
3439 | endif |
3440 | |
3441 | turbostat : turbostat.c |
3442 | -CFLAGS += -Wall |
3443 | +CFLAGS += -Wall -I../../../include |
3444 | CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' |
3445 | CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"' |
3446 | |
3447 | diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c |
3448 | index 83964f796edb..fbb53c952b73 100644 |
3449 | --- a/tools/power/x86/turbostat/turbostat.c |
3450 | +++ b/tools/power/x86/turbostat/turbostat.c |
3451 | @@ -2082,7 +2082,7 @@ int has_turbo_ratio_group_limits(int family, int model) |
3452 | switch (model) { |
3453 | case INTEL_FAM6_ATOM_GOLDMONT: |
3454 | case INTEL_FAM6_SKYLAKE_X: |
3455 | - case INTEL_FAM6_ATOM_DENVERTON: |
3456 | + case INTEL_FAM6_ATOM_GOLDMONT_X: |
3457 | return 1; |
3458 | } |
3459 | return 0; |
3460 | @@ -3149,9 +3149,9 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) |
3461 | pkg_cstate_limits = skx_pkg_cstate_limits; |
3462 | has_misc_feature_control = 1; |
3463 | break; |
3464 | - case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */ |
3465 | + case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ |
3466 | no_MSR_MISC_PWR_MGMT = 1; |
3467 | - case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */ |
3468 | + case INTEL_FAM6_ATOM_SILVERMONT_X: /* AVN */ |
3469 | pkg_cstate_limits = slv_pkg_cstate_limits; |
3470 | break; |
3471 | case INTEL_FAM6_ATOM_AIRMONT: /* AMT */ |
3472 | @@ -3163,8 +3163,8 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) |
3473 | pkg_cstate_limits = phi_pkg_cstate_limits; |
3474 | break; |
3475 | case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ |
3476 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: |
3477 | - case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ |
3478 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: |
3479 | + case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */ |
3480 | pkg_cstate_limits = bxt_pkg_cstate_limits; |
3481 | break; |
3482 | default: |
3483 | @@ -3193,9 +3193,9 @@ int has_slv_msrs(unsigned int family, unsigned int model) |
3484 | return 0; |
3485 | |
3486 | switch (model) { |
3487 | - case INTEL_FAM6_ATOM_SILVERMONT1: |
3488 | - case INTEL_FAM6_ATOM_MERRIFIELD: |
3489 | - case INTEL_FAM6_ATOM_MOOREFIELD: |
3490 | + case INTEL_FAM6_ATOM_SILVERMONT: |
3491 | + case INTEL_FAM6_ATOM_SILVERMONT_MID: |
3492 | + case INTEL_FAM6_ATOM_AIRMONT_MID: |
3493 | return 1; |
3494 | } |
3495 | return 0; |
3496 | @@ -3207,7 +3207,7 @@ int is_dnv(unsigned int family, unsigned int model) |
3497 | return 0; |
3498 | |
3499 | switch (model) { |
3500 | - case INTEL_FAM6_ATOM_DENVERTON: |
3501 | + case INTEL_FAM6_ATOM_GOLDMONT_X: |
3502 | return 1; |
3503 | } |
3504 | return 0; |
3505 | @@ -3724,8 +3724,8 @@ double get_tdp(unsigned int model) |
3506 | return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; |
3507 | |
3508 | switch (model) { |
3509 | - case INTEL_FAM6_ATOM_SILVERMONT1: |
3510 | - case INTEL_FAM6_ATOM_SILVERMONT2: |
3511 | + case INTEL_FAM6_ATOM_SILVERMONT: |
3512 | + case INTEL_FAM6_ATOM_SILVERMONT_X: |
3513 | return 30.0; |
3514 | default: |
3515 | return 135.0; |
3516 | @@ -3791,7 +3791,7 @@ void rapl_probe(unsigned int family, unsigned int model) |
3517 | } |
3518 | break; |
3519 | case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ |
3520 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: |
3521 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: |
3522 | do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO; |
3523 | if (rapl_joules) |
3524 | BIC_PRESENT(BIC_Pkg_J); |
3525 | @@ -3850,8 +3850,8 @@ void rapl_probe(unsigned int family, unsigned int model) |
3526 | BIC_PRESENT(BIC_RAMWatt); |
3527 | } |
3528 | break; |
3529 | - case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */ |
3530 | - case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */ |
3531 | + case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ |
3532 | + case INTEL_FAM6_ATOM_SILVERMONT_X: /* AVN */ |
3533 | do_rapl = RAPL_PKG | RAPL_CORES; |
3534 | if (rapl_joules) { |
3535 | BIC_PRESENT(BIC_Pkg_J); |
3536 | @@ -3861,7 +3861,7 @@ void rapl_probe(unsigned int family, unsigned int model) |
3537 | BIC_PRESENT(BIC_CorWatt); |
3538 | } |
3539 | break; |
3540 | - case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ |
3541 | + case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */ |
3542 | do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS; |
3543 | BIC_PRESENT(BIC_PKG__); |
3544 | BIC_PRESENT(BIC_RAM__); |
3545 | @@ -3884,7 +3884,7 @@ void rapl_probe(unsigned int family, unsigned int model) |
3546 | return; |
3547 | |
3548 | rapl_power_units = 1.0 / (1 << (msr & 0xF)); |
3549 | - if (model == INTEL_FAM6_ATOM_SILVERMONT1) |
3550 | + if (model == INTEL_FAM6_ATOM_SILVERMONT) |
3551 | rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000; |
3552 | else |
3553 | rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); |
3554 | @@ -4141,8 +4141,8 @@ int has_snb_msrs(unsigned int family, unsigned int model) |
3555 | case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ |
3556 | case INTEL_FAM6_SKYLAKE_X: /* SKX */ |
3557 | case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ |
3558 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: |
3559 | - case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ |
3560 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: |
3561 | + case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */ |
3562 | return 1; |
3563 | } |
3564 | return 0; |
3565 | @@ -4174,7 +4174,7 @@ int has_hsw_msrs(unsigned int family, unsigned int model) |
3566 | case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ |
3567 | case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */ |
3568 | case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ |
3569 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: |
3570 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: |
3571 | return 1; |
3572 | } |
3573 | return 0; |
3574 | @@ -4209,8 +4209,8 @@ int is_slm(unsigned int family, unsigned int model) |
3575 | if (!genuine_intel) |
3576 | return 0; |
3577 | switch (model) { |
3578 | - case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */ |
3579 | - case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */ |
3580 | + case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ |
3581 | + case INTEL_FAM6_ATOM_SILVERMONT_X: /* AVN */ |
3582 | return 1; |
3583 | } |
3584 | return 0; |
3585 | @@ -4581,11 +4581,11 @@ void process_cpuid() |
3586 | case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */ |
3587 | crystal_hz = 24000000; /* 24.0 MHz */ |
3588 | break; |
3589 | - case INTEL_FAM6_ATOM_DENVERTON: /* DNV */ |
3590 | + case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */ |
3591 | crystal_hz = 25000000; /* 25.0 MHz */ |
3592 | break; |
3593 | case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ |
3594 | - case INTEL_FAM6_ATOM_GEMINI_LAKE: |
3595 | + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: |
3596 | crystal_hz = 19200000; /* 19.2 MHz */ |
3597 | break; |
3598 | default: |
3599 | diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile |
3600 | index f4534fb8b951..da781b430937 100644 |
3601 | --- a/tools/power/x86/x86_energy_perf_policy/Makefile |
3602 | +++ b/tools/power/x86/x86_energy_perf_policy/Makefile |
3603 | @@ -9,7 +9,7 @@ ifeq ("$(origin O)", "command line") |
3604 | endif |
3605 | |
3606 | x86_energy_perf_policy : x86_energy_perf_policy.c |
3607 | -CFLAGS += -Wall |
3608 | +CFLAGS += -Wall -I../../../include |
3609 | CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' |
3610 | |
3611 | %: %.c |