Magellan Linux

Contents of /trunk/kernel-alx/patches-4.19/0142-4.19.43-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3421 - (show annotations) (download)
Fri Aug 2 11:47:50 2019 UTC (4 years, 9 months ago) by niro
File size: 140299 byte(s)
-linux-4.19.43
1 diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
2 index 73318225a368..8718d4ad227b 100644
3 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu
4 +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
5 @@ -477,6 +477,7 @@ What: /sys/devices/system/cpu/vulnerabilities
6 /sys/devices/system/cpu/vulnerabilities/spectre_v2
7 /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
8 /sys/devices/system/cpu/vulnerabilities/l1tf
9 + /sys/devices/system/cpu/vulnerabilities/mds
10 Date: January 2018
11 Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
12 Description: Information about CPU vulnerabilities
13 @@ -489,8 +490,7 @@ Description: Information about CPU vulnerabilities
14 "Vulnerable" CPU is affected and no mitigation in effect
15 "Mitigation: $M" CPU is affected and mitigation $M is in effect
16
17 - Details about the l1tf file can be found in
18 - Documentation/admin-guide/l1tf.rst
19 + See also: Documentation/admin-guide/hw-vuln/index.rst
20
21 What: /sys/devices/system/cpu/smt
22 /sys/devices/system/cpu/smt/active
23 diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
24 new file mode 100644
25 index 000000000000..ffc064c1ec68
26 --- /dev/null
27 +++ b/Documentation/admin-guide/hw-vuln/index.rst
28 @@ -0,0 +1,13 @@
29 +========================
30 +Hardware vulnerabilities
31 +========================
32 +
33 +This section describes CPU vulnerabilities and provides an overview of the
34 +possible mitigations along with guidance for selecting mitigations if they
35 +are configurable at compile, boot or run time.
36 +
37 +.. toctree::
38 + :maxdepth: 1
39 +
40 + l1tf
41 + mds
42 diff --git a/Documentation/admin-guide/hw-vuln/l1tf.rst b/Documentation/admin-guide/hw-vuln/l1tf.rst
43 new file mode 100644
44 index 000000000000..31653a9f0e1b
45 --- /dev/null
46 +++ b/Documentation/admin-guide/hw-vuln/l1tf.rst
47 @@ -0,0 +1,615 @@
48 +L1TF - L1 Terminal Fault
49 +========================
50 +
51 +L1 Terminal Fault is a hardware vulnerability which allows unprivileged
52 +speculative access to data which is available in the Level 1 Data Cache
53 +when the page table entry controlling the virtual address, which is used
54 +for the access, has the Present bit cleared or other reserved bits set.
55 +
56 +Affected processors
57 +-------------------
58 +
59 +This vulnerability affects a wide range of Intel processors. The
60 +vulnerability is not present on:
61 +
62 + - Processors from AMD, Centaur and other non Intel vendors
63 +
64 + - Older processor models, where the CPU family is < 6
65 +
66 + - A range of Intel ATOM processors (Cedarview, Cloverview, Lincroft,
67 + Penwell, Pineview, Silvermont, Airmont, Merrifield)
68 +
69 + - The Intel XEON PHI family
70 +
71 + - Intel processors which have the ARCH_CAP_RDCL_NO bit set in the
72 + IA32_ARCH_CAPABILITIES MSR. If the bit is set the CPU is not affected
73 + by the Meltdown vulnerability either. These CPUs should become
74 + available by end of 2018.
75 +
76 +Whether a processor is affected or not can be read out from the L1TF
77 +vulnerability file in sysfs. See :ref:`l1tf_sys_info`.
78 +
79 +Related CVEs
80 +------------
81 +
82 +The following CVE entries are related to the L1TF vulnerability:
83 +
84 + ============= ================= ==============================
85 + CVE-2018-3615 L1 Terminal Fault SGX related aspects
86 + CVE-2018-3620 L1 Terminal Fault OS, SMM related aspects
87 + CVE-2018-3646 L1 Terminal Fault Virtualization related aspects
88 + ============= ================= ==============================
89 +
90 +Problem
91 +-------
92 +
93 +If an instruction accesses a virtual address for which the relevant page
94 +table entry (PTE) has the Present bit cleared or other reserved bits set,
95 +then speculative execution ignores the invalid PTE and loads the referenced
96 +data if it is present in the Level 1 Data Cache, as if the page referenced
97 +by the address bits in the PTE was still present and accessible.
98 +
99 +While this is a purely speculative mechanism and the instruction will raise
100 +a page fault when it is retired eventually, the pure act of loading the
101 +data and making it available to other speculative instructions opens up the
102 +opportunity for side channel attacks to unprivileged malicious code,
103 +similar to the Meltdown attack.
104 +
105 +While Meltdown breaks the user space to kernel space protection, L1TF
106 +allows to attack any physical memory address in the system and the attack
107 +works across all protection domains. It allows an attack of SGX and also
108 +works from inside virtual machines because the speculation bypasses the
109 +extended page table (EPT) protection mechanism.
110 +
111 +
112 +Attack scenarios
113 +----------------
114 +
115 +1. Malicious user space
116 +^^^^^^^^^^^^^^^^^^^^^^^
117 +
118 + Operating Systems store arbitrary information in the address bits of a
119 + PTE which is marked non present. This allows a malicious user space
120 + application to attack the physical memory to which these PTEs resolve.
121 + In some cases user-space can maliciously influence the information
122 + encoded in the address bits of the PTE, thus making attacks more
123 + deterministic and more practical.
124 +
125 + The Linux kernel contains a mitigation for this attack vector, PTE
126 + inversion, which is permanently enabled and has no performance
127 + impact. The kernel ensures that the address bits of PTEs, which are not
128 + marked present, never point to cacheable physical memory space.
129 +
130 + A system with an up to date kernel is protected against attacks from
131 + malicious user space applications.
132 +
133 +2. Malicious guest in a virtual machine
134 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
135 +
136 + The fact that L1TF breaks all domain protections allows malicious guest
137 + OSes, which can control the PTEs directly, and malicious guest user
138 + space applications, which run on an unprotected guest kernel lacking the
139 + PTE inversion mitigation for L1TF, to attack physical host memory.
140 +
141 + A special aspect of L1TF in the context of virtualization is symmetric
142 + multi threading (SMT). The Intel implementation of SMT is called
143 + HyperThreading. The fact that Hyperthreads on the affected processors
144 + share the L1 Data Cache (L1D) is important for this. As the flaw allows
145 + only to attack data which is present in L1D, a malicious guest running
146 + on one Hyperthread can attack the data which is brought into the L1D by
147 + the context which runs on the sibling Hyperthread of the same physical
148 + core. This context can be host OS, host user space or a different guest.
149 +
150 + If the processor does not support Extended Page Tables, the attack is
151 + only possible, when the hypervisor does not sanitize the content of the
152 + effective (shadow) page tables.
153 +
154 + While solutions exist to mitigate these attack vectors fully, these
155 + mitigations are not enabled by default in the Linux kernel because they
156 + can affect performance significantly. The kernel provides several
157 + mechanisms which can be utilized to address the problem depending on the
158 + deployment scenario. The mitigations, their protection scope and impact
159 + are described in the next sections.
160 +
161 + The default mitigations and the rationale for choosing them are explained
162 + at the end of this document. See :ref:`default_mitigations`.
163 +
164 +.. _l1tf_sys_info:
165 +
166 +L1TF system information
167 +-----------------------
168 +
169 +The Linux kernel provides a sysfs interface to enumerate the current L1TF
170 +status of the system: whether the system is vulnerable, and which
171 +mitigations are active. The relevant sysfs file is:
172 +
173 +/sys/devices/system/cpu/vulnerabilities/l1tf
174 +
175 +The possible values in this file are:
176 +
177 + =========================== ===============================
178 + 'Not affected' The processor is not vulnerable
179 + 'Mitigation: PTE Inversion' The host protection is active
180 + =========================== ===============================
181 +
182 +If KVM/VMX is enabled and the processor is vulnerable then the following
183 +information is appended to the 'Mitigation: PTE Inversion' part:
184 +
185 + - SMT status:
186 +
187 + ===================== ================
188 + 'VMX: SMT vulnerable' SMT is enabled
189 + 'VMX: SMT disabled' SMT is disabled
190 + ===================== ================
191 +
192 + - L1D Flush mode:
193 +
194 + ================================ ====================================
195 + 'L1D vulnerable' L1D flushing is disabled
196 +
197 + 'L1D conditional cache flushes' L1D flush is conditionally enabled
198 +
199 + 'L1D cache flushes' L1D flush is unconditionally enabled
200 + ================================ ====================================
201 +
202 +The resulting grade of protection is discussed in the following sections.
203 +
204 +
205 +Host mitigation mechanism
206 +-------------------------
207 +
208 +The kernel is unconditionally protected against L1TF attacks from malicious
209 +user space running on the host.
210 +
211 +
212 +Guest mitigation mechanisms
213 +---------------------------
214 +
215 +.. _l1d_flush:
216 +
217 +1. L1D flush on VMENTER
218 +^^^^^^^^^^^^^^^^^^^^^^^
219 +
220 + To make sure that a guest cannot attack data which is present in the L1D
221 + the hypervisor flushes the L1D before entering the guest.
222 +
223 + Flushing the L1D evicts not only the data which should not be accessed
224 + by a potentially malicious guest, it also flushes the guest
225 + data. Flushing the L1D has a performance impact as the processor has to
226 + bring the flushed guest data back into the L1D. Depending on the
227 + frequency of VMEXIT/VMENTER and the type of computations in the guest
228 + performance degradation in the range of 1% to 50% has been observed. For
229 + scenarios where guest VMEXIT/VMENTER are rare the performance impact is
230 + minimal. Virtio and mechanisms like posted interrupts are designed to
231 + confine the VMEXITs to a bare minimum, but specific configurations and
232 + application scenarios might still suffer from a high VMEXIT rate.
233 +
234 + The kernel provides two L1D flush modes:
235 + - conditional ('cond')
236 + - unconditional ('always')
237 +
238 + The conditional mode avoids L1D flushing after VMEXITs which execute
239 + only audited code paths before the corresponding VMENTER. These code
240 + paths have been verified that they cannot expose secrets or other
241 + interesting data to an attacker, but they can leak information about the
242 + address space layout of the hypervisor.
243 +
244 + Unconditional mode flushes L1D on all VMENTER invocations and provides
245 + maximum protection. It has a higher overhead than the conditional
246 + mode. The overhead cannot be quantified correctly as it depends on the
247 + workload scenario and the resulting number of VMEXITs.
248 +
249 + The general recommendation is to enable L1D flush on VMENTER. The kernel
250 + defaults to conditional mode on affected processors.
251 +
252 + **Note**, that L1D flush does not prevent the SMT problem because the
253 + sibling thread will also bring back its data into the L1D which makes it
254 + attackable again.
255 +
256 + L1D flush can be controlled by the administrator via the kernel command
257 + line and sysfs control files. See :ref:`mitigation_control_command_line`
258 + and :ref:`mitigation_control_kvm`.
259 +
260 +.. _guest_confinement:
261 +
262 +2. Guest VCPU confinement to dedicated physical cores
263 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
264 +
265 + To address the SMT problem, it is possible to make a guest or a group of
266 + guests affine to one or more physical cores. The proper mechanism for
267 + that is to utilize exclusive cpusets to ensure that no other guest or
268 + host tasks can run on these cores.
269 +
270 + If only a single guest or related guests run on sibling SMT threads on
271 + the same physical core then they can only attack their own memory and
272 + restricted parts of the host memory.
273 +
274 + Host memory is attackable, when one of the sibling SMT threads runs in
275 + host OS (hypervisor) context and the other in guest context. The amount
276 + of valuable information from the host OS context depends on the context
277 + which the host OS executes, i.e. interrupts, soft interrupts and kernel
278 + threads. The amount of valuable data from these contexts cannot be
279 + declared as non-interesting for an attacker without deep inspection of
280 + the code.
281 +
282 + **Note**, that assigning guests to a fixed set of physical cores affects
283 + the ability of the scheduler to do load balancing and might have
284 + negative effects on CPU utilization depending on the hosting
285 + scenario. Disabling SMT might be a viable alternative for particular
286 + scenarios.
287 +
288 + For further information about confining guests to a single or to a group
289 + of cores consult the cpusets documentation:
290 +
291 + https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt
292 +
293 +.. _interrupt_isolation:
294 +
295 +3. Interrupt affinity
296 +^^^^^^^^^^^^^^^^^^^^^
297 +
298 + Interrupts can be made affine to logical CPUs. This is not universally
299 + true because there are types of interrupts which are truly per CPU
300 + interrupts, e.g. the local timer interrupt. Aside of that multi queue
301 + devices affine their interrupts to single CPUs or groups of CPUs per
302 + queue without allowing the administrator to control the affinities.
303 +
304 + Moving the interrupts, which can be affinity controlled, away from CPUs
305 + which run untrusted guests, reduces the attack vector space.
306 +
307 + Whether the interrupts with are affine to CPUs, which run untrusted
308 + guests, provide interesting data for an attacker depends on the system
309 + configuration and the scenarios which run on the system. While for some
310 + of the interrupts it can be assumed that they won't expose interesting
311 + information beyond exposing hints about the host OS memory layout, there
312 + is no way to make general assumptions.
313 +
314 + Interrupt affinity can be controlled by the administrator via the
315 + /proc/irq/$NR/smp_affinity[_list] files. Limited documentation is
316 + available at:
317 +
318 + https://www.kernel.org/doc/Documentation/IRQ-affinity.txt
319 +
320 +.. _smt_control:
321 +
322 +4. SMT control
323 +^^^^^^^^^^^^^^
324 +
325 + To prevent the SMT issues of L1TF it might be necessary to disable SMT
326 + completely. Disabling SMT can have a significant performance impact, but
327 + the impact depends on the hosting scenario and the type of workloads.
328 + The impact of disabling SMT needs also to be weighted against the impact
329 + of other mitigation solutions like confining guests to dedicated cores.
330 +
331 + The kernel provides a sysfs interface to retrieve the status of SMT and
332 + to control it. It also provides a kernel command line interface to
333 + control SMT.
334 +
335 + The kernel command line interface consists of the following options:
336 +
337 + =========== ==========================================================
338 + nosmt Affects the bring up of the secondary CPUs during boot. The
339 + kernel tries to bring all present CPUs online during the
340 + boot process. "nosmt" makes sure that from each physical
341 + core only one - the so called primary (hyper) thread is
342 + activated. Due to a design flaw of Intel processors related
343 + to Machine Check Exceptions the non primary siblings have
344 + to be brought up at least partially and are then shut down
345 + again. "nosmt" can be undone via the sysfs interface.
346 +
347 + nosmt=force Has the same effect as "nosmt" but it does not allow to
348 + undo the SMT disable via the sysfs interface.
349 + =========== ==========================================================
350 +
351 + The sysfs interface provides two files:
352 +
353 + - /sys/devices/system/cpu/smt/control
354 + - /sys/devices/system/cpu/smt/active
355 +
356 + /sys/devices/system/cpu/smt/control:
357 +
358 + This file allows to read out the SMT control state and provides the
359 + ability to disable or (re)enable SMT. The possible states are:
360 +
361 + ============== ===================================================
362 + on SMT is supported by the CPU and enabled. All
363 + logical CPUs can be onlined and offlined without
364 + restrictions.
365 +
366 + off SMT is supported by the CPU and disabled. Only
367 + the so called primary SMT threads can be onlined
368 + and offlined without restrictions. An attempt to
369 + online a non-primary sibling is rejected
370 +
371 + forceoff Same as 'off' but the state cannot be controlled.
372 + Attempts to write to the control file are rejected.
373 +
374 + notsupported The processor does not support SMT. It's therefore
375 + not affected by the SMT implications of L1TF.
376 + Attempts to write to the control file are rejected.
377 + ============== ===================================================
378 +
379 + The possible states which can be written into this file to control SMT
380 + state are:
381 +
382 + - on
383 + - off
384 + - forceoff
385 +
386 + /sys/devices/system/cpu/smt/active:
387 +
388 + This file reports whether SMT is enabled and active, i.e. if on any
389 + physical core two or more sibling threads are online.
390 +
391 + SMT control is also possible at boot time via the l1tf kernel command
392 + line parameter in combination with L1D flush control. See
393 + :ref:`mitigation_control_command_line`.
394 +
395 +5. Disabling EPT
396 +^^^^^^^^^^^^^^^^
397 +
398 + Disabling EPT for virtual machines provides full mitigation for L1TF even
399 + with SMT enabled, because the effective page tables for guests are
400 + managed and sanitized by the hypervisor. Though disabling EPT has a
401 + significant performance impact especially when the Meltdown mitigation
402 + KPTI is enabled.
403 +
404 + EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter.
405 +
406 +There is ongoing research and development for new mitigation mechanisms to
407 +address the performance impact of disabling SMT or EPT.
408 +
409 +.. _mitigation_control_command_line:
410 +
411 +Mitigation control on the kernel command line
412 +---------------------------------------------
413 +
414 +The kernel command line allows to control the L1TF mitigations at boot
415 +time with the option "l1tf=". The valid arguments for this option are:
416 +
417 + ============ =============================================================
418 + full Provides all available mitigations for the L1TF
419 + vulnerability. Disables SMT and enables all mitigations in
420 + the hypervisors, i.e. unconditional L1D flushing
421 +
422 + SMT control and L1D flush control via the sysfs interface
423 + is still possible after boot. Hypervisors will issue a
424 + warning when the first VM is started in a potentially
425 + insecure configuration, i.e. SMT enabled or L1D flush
426 + disabled.
427 +
428 + full,force Same as 'full', but disables SMT and L1D flush runtime
429 + control. Implies the 'nosmt=force' command line option.
430 + (i.e. sysfs control of SMT is disabled.)
431 +
432 + flush Leaves SMT enabled and enables the default hypervisor
433 + mitigation, i.e. conditional L1D flushing
434 +
435 + SMT control and L1D flush control via the sysfs interface
436 + is still possible after boot. Hypervisors will issue a
437 + warning when the first VM is started in a potentially
438 + insecure configuration, i.e. SMT enabled or L1D flush
439 + disabled.
440 +
441 + flush,nosmt Disables SMT and enables the default hypervisor mitigation,
442 + i.e. conditional L1D flushing.
443 +
444 + SMT control and L1D flush control via the sysfs interface
445 + is still possible after boot. Hypervisors will issue a
446 + warning when the first VM is started in a potentially
447 + insecure configuration, i.e. SMT enabled or L1D flush
448 + disabled.
449 +
450 + flush,nowarn Same as 'flush', but hypervisors will not warn when a VM is
451 + started in a potentially insecure configuration.
452 +
453 + off Disables hypervisor mitigations and doesn't emit any
454 + warnings.
455 + It also drops the swap size and available RAM limit restrictions
456 + on both hypervisor and bare metal.
457 +
458 + ============ =============================================================
459 +
460 +The default is 'flush'. For details about L1D flushing see :ref:`l1d_flush`.
461 +
462 +
463 +.. _mitigation_control_kvm:
464 +
465 +Mitigation control for KVM - module parameter
466 +-------------------------------------------------------------
467 +
468 +The KVM hypervisor mitigation mechanism, flushing the L1D cache when
469 +entering a guest, can be controlled with a module parameter.
470 +
471 +The option/parameter is "kvm-intel.vmentry_l1d_flush=". It takes the
472 +following arguments:
473 +
474 + ============ ==============================================================
475 + always L1D cache flush on every VMENTER.
476 +
477 + cond Flush L1D on VMENTER only when the code between VMEXIT and
478 + VMENTER can leak host memory which is considered
479 + interesting for an attacker. This still can leak host memory
480 + which allows e.g. to determine the hosts address space layout.
481 +
482 + never Disables the mitigation
483 + ============ ==============================================================
484 +
485 +The parameter can be provided on the kernel command line, as a module
486 +parameter when loading the modules and at runtime modified via the sysfs
487 +file:
488 +
489 +/sys/module/kvm_intel/parameters/vmentry_l1d_flush
490 +
491 +The default is 'cond'. If 'l1tf=full,force' is given on the kernel command
492 +line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush
493 +module parameter is ignored and writes to the sysfs file are rejected.
494 +
495 +.. _mitigation_selection:
496 +
497 +Mitigation selection guide
498 +--------------------------
499 +
500 +1. No virtualization in use
501 +^^^^^^^^^^^^^^^^^^^^^^^^^^^
502 +
503 + The system is protected by the kernel unconditionally and no further
504 + action is required.
505 +
506 +2. Virtualization with trusted guests
507 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
508 +
509 + If the guest comes from a trusted source and the guest OS kernel is
510 + guaranteed to have the L1TF mitigations in place the system is fully
511 + protected against L1TF and no further action is required.
512 +
513 + To avoid the overhead of the default L1D flushing on VMENTER the
514 + administrator can disable the flushing via the kernel command line and
515 + sysfs control files. See :ref:`mitigation_control_command_line` and
516 + :ref:`mitigation_control_kvm`.
517 +
518 +
519 +3. Virtualization with untrusted guests
520 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
521 +
522 +3.1. SMT not supported or disabled
523 +""""""""""""""""""""""""""""""""""
524 +
525 + If SMT is not supported by the processor or disabled in the BIOS or by
526 + the kernel, it's only required to enforce L1D flushing on VMENTER.
527 +
528 + Conditional L1D flushing is the default behaviour and can be tuned. See
529 + :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`.
530 +
531 +3.2. EPT not supported or disabled
532 +""""""""""""""""""""""""""""""""""
533 +
534 + If EPT is not supported by the processor or disabled in the hypervisor,
535 + the system is fully protected. SMT can stay enabled and L1D flushing on
536 + VMENTER is not required.
537 +
538 + EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter.
539 +
540 +3.3. SMT and EPT supported and active
541 +"""""""""""""""""""""""""""""""""""""
542 +
543 + If SMT and EPT are supported and active then various degrees of
544 + mitigations can be employed:
545 +
546 + - L1D flushing on VMENTER:
547 +
548 + L1D flushing on VMENTER is the minimal protection requirement, but it
549 + is only potent in combination with other mitigation methods.
550 +
551 + Conditional L1D flushing is the default behaviour and can be tuned. See
552 + :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`.
553 +
554 + - Guest confinement:
555 +
556 + Confinement of guests to a single or a group of physical cores which
557 + are not running any other processes, can reduce the attack surface
558 + significantly, but interrupts, soft interrupts and kernel threads can
559 + still expose valuable data to a potential attacker. See
560 + :ref:`guest_confinement`.
561 +
562 + - Interrupt isolation:
563 +
564 + Isolating the guest CPUs from interrupts can reduce the attack surface
565 + further, but still allows a malicious guest to explore a limited amount
566 + of host physical memory. This can at least be used to gain knowledge
567 + about the host address space layout. The interrupts which have a fixed
568 + affinity to the CPUs which run the untrusted guests can depending on
569 + the scenario still trigger soft interrupts and schedule kernel threads
570 + which might expose valuable information. See
571 + :ref:`interrupt_isolation`.
572 +
573 +The above three mitigation methods combined can provide protection to a
574 +certain degree, but the risk of the remaining attack surface has to be
575 +carefully analyzed. For full protection the following methods are
576 +available:
577 +
578 + - Disabling SMT:
579 +
580 + Disabling SMT and enforcing the L1D flushing provides the maximum
581 + amount of protection. This mitigation is not depending on any of the
582 + above mitigation methods.
583 +
584 + SMT control and L1D flushing can be tuned by the command line
585 + parameters 'nosmt', 'l1tf', 'kvm-intel.vmentry_l1d_flush' and at run
586 + time with the matching sysfs control files. See :ref:`smt_control`,
587 + :ref:`mitigation_control_command_line` and
588 + :ref:`mitigation_control_kvm`.
589 +
590 + - Disabling EPT:
591 +
592 + Disabling EPT provides the maximum amount of protection as well. It is
593 + not depending on any of the above mitigation methods. SMT can stay
594 + enabled and L1D flushing is not required, but the performance impact is
595 + significant.
596 +
597 + EPT can be disabled in the hypervisor via the 'kvm-intel.ept'
598 + parameter.
599 +
600 +3.4. Nested virtual machines
601 +""""""""""""""""""""""""""""
602 +
603 +When nested virtualization is in use, three operating systems are involved:
604 +the bare metal hypervisor, the nested hypervisor and the nested virtual
605 +machine. VMENTER operations from the nested hypervisor into the nested
606 +guest will always be processed by the bare metal hypervisor. If KVM is the
607 +bare metal hypervisor it will:
608 +
609 + - Flush the L1D cache on every switch from the nested hypervisor to the
610 + nested virtual machine, so that the nested hypervisor's secrets are not
611 + exposed to the nested virtual machine;
612 +
613 + - Flush the L1D cache on every switch from the nested virtual machine to
614 + the nested hypervisor; this is a complex operation, and flushing the L1D
615 + cache avoids that the bare metal hypervisor's secrets are exposed to the
616 + nested virtual machine;
617 +
618 + - Instruct the nested hypervisor to not perform any L1D cache flush. This
619 + is an optimization to avoid double L1D flushing.
620 +
621 +
622 +.. _default_mitigations:
623 +
624 +Default mitigations
625 +-------------------
626 +
627 + The kernel default mitigations for vulnerable processors are:
628 +
629 + - PTE inversion to protect against malicious user space. This is done
630 + unconditionally and cannot be controlled. The swap storage is limited
631 + to ~16TB.
632 +
633 + - L1D conditional flushing on VMENTER when EPT is enabled for
634 + a guest.
635 +
636 + The kernel does not by default enforce the disabling of SMT, which leaves
637 + SMT systems vulnerable when running untrusted guests with EPT enabled.
638 +
639 + The rationale for this choice is:
640 +
641 + - Force disabling SMT can break existing setups, especially with
642 + unattended updates.
643 +
644 + - If regular users run untrusted guests on their machine, then L1TF is
645 + just an add on to other malware which might be embedded in an untrusted
646 + guest, e.g. spam-bots or attacks on the local network.
647 +
648 + There is no technical way to prevent a user from running untrusted code
649 + on their machines blindly.
650 +
651 + - It's technically extremely unlikely and from today's knowledge even
652 + impossible that L1TF can be exploited via the most popular attack
653 + mechanisms like JavaScript because these mechanisms have no way to
654 + control PTEs. If this would be possible and not other mitigation would
655 + be possible, then the default might be different.
656 +
657 + - The administrators of cloud and hosting setups have to carefully
658 + analyze the risk for their scenarios and make the appropriate
659 + mitigation choices, which might even vary across their deployed
660 + machines and also result in other changes of their overall setup.
661 + There is no way for the kernel to provide a sensible default for this
662 + kind of scenarios.
663 diff --git a/Documentation/admin-guide/hw-vuln/mds.rst b/Documentation/admin-guide/hw-vuln/mds.rst
664 new file mode 100644
665 index 000000000000..e3a796c0d3a2
666 --- /dev/null
667 +++ b/Documentation/admin-guide/hw-vuln/mds.rst
668 @@ -0,0 +1,308 @@
669 +MDS - Microarchitectural Data Sampling
670 +======================================
671 +
672 +Microarchitectural Data Sampling is a hardware vulnerability which allows
673 +unprivileged speculative access to data which is available in various CPU
674 +internal buffers.
675 +
676 +Affected processors
677 +-------------------
678 +
679 +This vulnerability affects a wide range of Intel processors. The
680 +vulnerability is not present on:
681 +
682 + - Processors from AMD, Centaur and other non Intel vendors
683 +
684 + - Older processor models, where the CPU family is < 6
685 +
686 + - Some Atoms (Bonnell, Saltwell, Goldmont, GoldmontPlus)
687 +
688 + - Intel processors which have the ARCH_CAP_MDS_NO bit set in the
689 + IA32_ARCH_CAPABILITIES MSR.
690 +
691 +Whether a processor is affected or not can be read out from the MDS
692 +vulnerability file in sysfs. See :ref:`mds_sys_info`.
693 +
694 +Not all processors are affected by all variants of MDS, but the mitigation
695 +is identical for all of them so the kernel treats them as a single
696 +vulnerability.
697 +
698 +Related CVEs
699 +------------
700 +
701 +The following CVE entries are related to the MDS vulnerability:
702 +
703 + ============== ===== ===================================================
704 + CVE-2018-12126 MSBDS Microarchitectural Store Buffer Data Sampling
705 + CVE-2018-12130 MFBDS Microarchitectural Fill Buffer Data Sampling
706 + CVE-2018-12127 MLPDS Microarchitectural Load Port Data Sampling
707 + CVE-2019-11091 MDSUM Microarchitectural Data Sampling Uncacheable Memory
708 + ============== ===== ===================================================
709 +
710 +Problem
711 +-------
712 +
713 +When performing store, load, L1 refill operations, processors write data
714 +into temporary microarchitectural structures (buffers). The data in the
715 +buffer can be forwarded to load operations as an optimization.
716 +
717 +Under certain conditions, usually a fault/assist caused by a load
718 +operation, data unrelated to the load memory address can be speculatively
719 +forwarded from the buffers. Because the load operation causes a fault or
720 +assist and its result will be discarded, the forwarded data will not cause
721 +incorrect program execution or state changes. But a malicious operation
722 +may be able to forward this speculative data to a disclosure gadget which
723 +allows in turn to infer the value via a cache side channel attack.
724 +
725 +Because the buffers are potentially shared between Hyper-Threads cross
726 +Hyper-Thread attacks are possible.
727 +
728 +Deeper technical information is available in the MDS specific x86
729 +architecture section: :ref:`Documentation/x86/mds.rst <mds>`.
730 +
731 +
732 +Attack scenarios
733 +----------------
734 +
735 +Attacks against the MDS vulnerabilities can be mounted from malicious non
736 +priviledged user space applications running on hosts or guest. Malicious
737 +guest OSes can obviously mount attacks as well.
738 +
739 +Contrary to other speculation based vulnerabilities the MDS vulnerability
740 +does not allow the attacker to control the memory target address. As a
741 +consequence the attacks are purely sampling based, but as demonstrated with
742 +the TLBleed attack samples can be postprocessed successfully.
743 +
744 +Web-Browsers
745 +^^^^^^^^^^^^
746 +
747 + It's unclear whether attacks through Web-Browsers are possible at
748 + all. The exploitation through Java-Script is considered very unlikely,
749 + but other widely used web technologies like Webassembly could possibly be
750 + abused.
751 +
752 +
753 +.. _mds_sys_info:
754 +
755 +MDS system information
756 +-----------------------
757 +
758 +The Linux kernel provides a sysfs interface to enumerate the current MDS
759 +status of the system: whether the system is vulnerable, and which
760 +mitigations are active. The relevant sysfs file is:
761 +
762 +/sys/devices/system/cpu/vulnerabilities/mds
763 +
764 +The possible values in this file are:
765 +
766 + .. list-table::
767 +
768 + * - 'Not affected'
769 + - The processor is not vulnerable
770 + * - 'Vulnerable'
771 + - The processor is vulnerable, but no mitigation enabled
772 + * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
773 + - The processor is vulnerable but microcode is not updated.
774 +
775 + The mitigation is enabled on a best effort basis. See :ref:`vmwerv`
776 + * - 'Mitigation: Clear CPU buffers'
777 + - The processor is vulnerable and the CPU buffer clearing mitigation is
778 + enabled.
779 +
780 +If the processor is vulnerable then the following information is appended
781 +to the above information:
782 +
783 + ======================== ============================================
784 + 'SMT vulnerable' SMT is enabled
785 + 'SMT mitigated' SMT is enabled and mitigated
786 + 'SMT disabled' SMT is disabled
787 + 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown
788 + ======================== ============================================
789 +
790 +.. _vmwerv:
791 +
792 +Best effort mitigation mode
793 +^^^^^^^^^^^^^^^^^^^^^^^^^^^
794 +
795 + If the processor is vulnerable, but the availability of the microcode based
796 + mitigation mechanism is not advertised via CPUID the kernel selects a best
797 + effort mitigation mode. This mode invokes the mitigation instructions
798 + without a guarantee that they clear the CPU buffers.
799 +
800 + This is done to address virtualization scenarios where the host has the
801 + microcode update applied, but the hypervisor is not yet updated to expose
802 + the CPUID to the guest. If the host has updated microcode the protection
803 + takes effect otherwise a few cpu cycles are wasted pointlessly.
804 +
805 + The state in the mds sysfs file reflects this situation accordingly.
806 +
807 +
808 +Mitigation mechanism
809 +-------------------------
810 +
811 +The kernel detects the affected CPUs and the presence of the microcode
812 +which is required.
813 +
814 +If a CPU is affected and the microcode is available, then the kernel
815 +enables the mitigation by default. The mitigation can be controlled at boot
816 +time via a kernel command line option. See
817 +:ref:`mds_mitigation_control_command_line`.
818 +
819 +.. _cpu_buffer_clear:
820 +
821 +CPU buffer clearing
822 +^^^^^^^^^^^^^^^^^^^
823 +
824 + The mitigation for MDS clears the affected CPU buffers on return to user
825 + space and when entering a guest.
826 +
827 + If SMT is enabled it also clears the buffers on idle entry when the CPU
828 + is only affected by MSBDS and not any other MDS variant, because the
829 + other variants cannot be protected against cross Hyper-Thread attacks.
830 +
831 + For CPUs which are only affected by MSBDS the user space, guest and idle
832 + transition mitigations are sufficient and SMT is not affected.
833 +
834 +.. _virt_mechanism:
835 +
836 +Virtualization mitigation
837 +^^^^^^^^^^^^^^^^^^^^^^^^^
838 +
839 + The protection for host to guest transition depends on the L1TF
840 + vulnerability of the CPU:
841 +
842 + - CPU is affected by L1TF:
843 +
844 + If the L1D flush mitigation is enabled and up to date microcode is
845 + available, the L1D flush mitigation is automatically protecting the
846 + guest transition.
847 +
848 + If the L1D flush mitigation is disabled then the MDS mitigation is
849 + invoked explicit when the host MDS mitigation is enabled.
850 +
851 + For details on L1TF and virtualization see:
852 + :ref:`Documentation/admin-guide/hw-vuln//l1tf.rst <mitigation_control_kvm>`.
853 +
854 + - CPU is not affected by L1TF:
855 +
856 + CPU buffers are flushed before entering the guest when the host MDS
857 + mitigation is enabled.
858 +
859 + The resulting MDS protection matrix for the host to guest transition:
860 +
861 + ============ ===== ============= ============ =================
862 + L1TF MDS VMX-L1FLUSH Host MDS MDS-State
863 +
864 + Don't care No Don't care N/A Not affected
865 +
866 + Yes Yes Disabled Off Vulnerable
867 +
868 + Yes Yes Disabled Full Mitigated
869 +
870 + Yes Yes Enabled Don't care Mitigated
871 +
872 + No Yes N/A Off Vulnerable
873 +
874 + No Yes N/A Full Mitigated
875 + ============ ===== ============= ============ =================
876 +
877 + This only covers the host to guest transition, i.e. prevents leakage from
878 + host to guest, but does not protect the guest internally. Guests need to
879 + have their own protections.
880 +
881 +.. _xeon_phi:
882 +
883 +XEON PHI specific considerations
884 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
885 +
886 + The XEON PHI processor family is affected by MSBDS which can be exploited
887 + cross Hyper-Threads when entering idle states. Some XEON PHI variants allow
888 + to use MWAIT in user space (Ring 3) which opens an potential attack vector
889 + for malicious user space. The exposure can be disabled on the kernel
890 + command line with the 'ring3mwait=disable' command line option.
891 +
892 + XEON PHI is not affected by the other MDS variants and MSBDS is mitigated
893 + before the CPU enters a idle state. As XEON PHI is not affected by L1TF
894 + either disabling SMT is not required for full protection.
895 +
896 +.. _mds_smt_control:
897 +
898 +SMT control
899 +^^^^^^^^^^^
900 +
901 + All MDS variants except MSBDS can be attacked cross Hyper-Threads. That
902 + means on CPUs which are affected by MFBDS or MLPDS it is necessary to
903 + disable SMT for full protection. These are most of the affected CPUs; the
904 + exception is XEON PHI, see :ref:`xeon_phi`.
905 +
906 + Disabling SMT can have a significant performance impact, but the impact
907 + depends on the type of workloads.
908 +
909 + See the relevant chapter in the L1TF mitigation documentation for details:
910 + :ref:`Documentation/admin-guide/hw-vuln/l1tf.rst <smt_control>`.
911 +
912 +
913 +.. _mds_mitigation_control_command_line:
914 +
915 +Mitigation control on the kernel command line
916 +---------------------------------------------
917 +
918 +The kernel command line allows to control the MDS mitigations at boot
919 +time with the option "mds=". The valid arguments for this option are:
920 +
921 + ============ =============================================================
922 + full If the CPU is vulnerable, enable all available mitigations
923 + for the MDS vulnerability, CPU buffer clearing on exit to
924 + userspace and when entering a VM. Idle transitions are
925 + protected as well if SMT is enabled.
926 +
927 + It does not automatically disable SMT.
928 +
929 + full,nosmt The same as mds=full, with SMT disabled on vulnerable
930 + CPUs. This is the complete mitigation.
931 +
932 + off Disables MDS mitigations completely.
933 +
934 + ============ =============================================================
935 +
936 +Not specifying this option is equivalent to "mds=full".
937 +
938 +
939 +Mitigation selection guide
940 +--------------------------
941 +
942 +1. Trusted userspace
943 +^^^^^^^^^^^^^^^^^^^^
944 +
945 + If all userspace applications are from a trusted source and do not
946 + execute untrusted code which is supplied externally, then the mitigation
947 + can be disabled.
948 +
949 +
950 +2. Virtualization with trusted guests
951 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
952 +
953 + The same considerations as above versus trusted user space apply.
954 +
955 +3. Virtualization with untrusted guests
956 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
957 +
958 + The protection depends on the state of the L1TF mitigations.
959 + See :ref:`virt_mechanism`.
960 +
961 + If the MDS mitigation is enabled and SMT is disabled, guest to host and
962 + guest to guest attacks are prevented.
963 +
964 +.. _mds_default_mitigations:
965 +
966 +Default mitigations
967 +-------------------
968 +
969 + The kernel default mitigations for vulnerable processors are:
970 +
971 + - Enable CPU buffer clearing
972 +
973 + The kernel does not by default enforce the disabling of SMT, which leaves
974 + SMT systems vulnerable when running untrusted code. The same rationale as
975 + for L1TF applies.
976 + See :ref:`Documentation/admin-guide/hw-vuln//l1tf.rst <default_mitigations>`.
977 diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
978 index 0873685bab0f..89abc5057349 100644
979 --- a/Documentation/admin-guide/index.rst
980 +++ b/Documentation/admin-guide/index.rst
981 @@ -17,14 +17,12 @@ etc.
982 kernel-parameters
983 devices
984
985 -This section describes CPU vulnerabilities and provides an overview of the
986 -possible mitigations along with guidance for selecting mitigations if they
987 -are configurable at compile, boot or run time.
988 +This section describes CPU vulnerabilities and their mitigations.
989
990 .. toctree::
991 :maxdepth: 1
992
993 - l1tf
994 + hw-vuln/index
995
996 Here is a set of documents aimed at users who are trying to track down
997 problems and bugs in particular.
998 diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
999 index 8b6567f7cb9b..a29301d6e6c6 100644
1000 --- a/Documentation/admin-guide/kernel-parameters.txt
1001 +++ b/Documentation/admin-guide/kernel-parameters.txt
1002 @@ -2079,7 +2079,7 @@
1003
1004 Default is 'flush'.
1005
1006 - For details see: Documentation/admin-guide/l1tf.rst
1007 + For details see: Documentation/admin-guide/hw-vuln/l1tf.rst
1008
1009 l2cr= [PPC]
1010
1011 @@ -2319,6 +2319,32 @@
1012 Format: <first>,<last>
1013 Specifies range of consoles to be captured by the MDA.
1014
1015 + mds= [X86,INTEL]
1016 + Control mitigation for the Micro-architectural Data
1017 + Sampling (MDS) vulnerability.
1018 +
1019 + Certain CPUs are vulnerable to an exploit against CPU
1020 + internal buffers which can forward information to a
1021 + disclosure gadget under certain conditions.
1022 +
1023 + In vulnerable processors, the speculatively
1024 + forwarded data can be used in a cache side channel
1025 + attack, to access data to which the attacker does
1026 + not have direct access.
1027 +
1028 + This parameter controls the MDS mitigation. The
1029 + options are:
1030 +
1031 + full - Enable MDS mitigation on vulnerable CPUs
1032 + full,nosmt - Enable MDS mitigation and disable
1033 + SMT on vulnerable CPUs
1034 + off - Unconditionally disable MDS mitigation
1035 +
1036 + Not specifying this option is equivalent to
1037 + mds=full.
1038 +
1039 + For details see: Documentation/admin-guide/hw-vuln/mds.rst
1040 +
1041 mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
1042 Amount of memory to be used when the kernel is not able
1043 to see the whole system memory or for test.
1044 @@ -2476,6 +2502,40 @@
1045 in the "bleeding edge" mini2440 support kernel at
1046 http://repo.or.cz/w/linux-2.6/mini2440.git
1047
1048 + mitigations=
1049 + [X86,PPC,S390] Control optional mitigations for CPU
1050 + vulnerabilities. This is a set of curated,
1051 + arch-independent options, each of which is an
1052 + aggregation of existing arch-specific options.
1053 +
1054 + off
1055 + Disable all optional CPU mitigations. This
1056 + improves system performance, but it may also
1057 + expose users to several CPU vulnerabilities.
1058 + Equivalent to: nopti [X86,PPC]
1059 + nospectre_v1 [PPC]
1060 + nobp=0 [S390]
1061 + nospectre_v2 [X86,PPC,S390]
1062 + spectre_v2_user=off [X86]
1063 + spec_store_bypass_disable=off [X86,PPC]
1064 + l1tf=off [X86]
1065 + mds=off [X86]
1066 +
1067 + auto (default)
1068 + Mitigate all CPU vulnerabilities, but leave SMT
1069 + enabled, even if it's vulnerable. This is for
1070 + users who don't want to be surprised by SMT
1071 + getting disabled across kernel upgrades, or who
1072 + have other ways of avoiding SMT-based attacks.
1073 + Equivalent to: (default behavior)
1074 +
1075 + auto,nosmt
1076 + Mitigate all CPU vulnerabilities, disabling SMT
1077 + if needed. This is for users who always want to
1078 + be fully mitigated, even if it means losing SMT.
1079 + Equivalent to: l1tf=flush,nosmt [X86]
1080 + mds=full,nosmt [X86]
1081 +
1082 mminit_loglevel=
1083 [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
1084 parameter allows control of the logging verbosity for
1085 diff --git a/Documentation/admin-guide/l1tf.rst b/Documentation/admin-guide/l1tf.rst
1086 deleted file mode 100644
1087 index 9f5924f81f89..000000000000
1088 --- a/Documentation/admin-guide/l1tf.rst
1089 +++ /dev/null
1090 @@ -1,614 +0,0 @@
1091 -L1TF - L1 Terminal Fault
1092 -========================
1093 -
1094 -L1 Terminal Fault is a hardware vulnerability which allows unprivileged
1095 -speculative access to data which is available in the Level 1 Data Cache
1096 -when the page table entry controlling the virtual address, which is used
1097 -for the access, has the Present bit cleared or other reserved bits set.
1098 -
1099 -Affected processors
1100 --------------------
1101 -
1102 -This vulnerability affects a wide range of Intel processors. The
1103 -vulnerability is not present on:
1104 -
1105 - - Processors from AMD, Centaur and other non Intel vendors
1106 -
1107 - - Older processor models, where the CPU family is < 6
1108 -
1109 - - A range of Intel ATOM processors (Cedarview, Cloverview, Lincroft,
1110 - Penwell, Pineview, Silvermont, Airmont, Merrifield)
1111 -
1112 - - The Intel XEON PHI family
1113 -
1114 - - Intel processors which have the ARCH_CAP_RDCL_NO bit set in the
1115 - IA32_ARCH_CAPABILITIES MSR. If the bit is set the CPU is not affected
1116 - by the Meltdown vulnerability either. These CPUs should become
1117 - available by end of 2018.
1118 -
1119 -Whether a processor is affected or not can be read out from the L1TF
1120 -vulnerability file in sysfs. See :ref:`l1tf_sys_info`.
1121 -
1122 -Related CVEs
1123 -------------
1124 -
1125 -The following CVE entries are related to the L1TF vulnerability:
1126 -
1127 - ============= ================= ==============================
1128 - CVE-2018-3615 L1 Terminal Fault SGX related aspects
1129 - CVE-2018-3620 L1 Terminal Fault OS, SMM related aspects
1130 - CVE-2018-3646 L1 Terminal Fault Virtualization related aspects
1131 - ============= ================= ==============================
1132 -
1133 -Problem
1134 --------
1135 -
1136 -If an instruction accesses a virtual address for which the relevant page
1137 -table entry (PTE) has the Present bit cleared or other reserved bits set,
1138 -then speculative execution ignores the invalid PTE and loads the referenced
1139 -data if it is present in the Level 1 Data Cache, as if the page referenced
1140 -by the address bits in the PTE was still present and accessible.
1141 -
1142 -While this is a purely speculative mechanism and the instruction will raise
1143 -a page fault when it is retired eventually, the pure act of loading the
1144 -data and making it available to other speculative instructions opens up the
1145 -opportunity for side channel attacks to unprivileged malicious code,
1146 -similar to the Meltdown attack.
1147 -
1148 -While Meltdown breaks the user space to kernel space protection, L1TF
1149 -allows to attack any physical memory address in the system and the attack
1150 -works across all protection domains. It allows an attack of SGX and also
1151 -works from inside virtual machines because the speculation bypasses the
1152 -extended page table (EPT) protection mechanism.
1153 -
1154 -
1155 -Attack scenarios
1156 -----------------
1157 -
1158 -1. Malicious user space
1159 -^^^^^^^^^^^^^^^^^^^^^^^
1160 -
1161 - Operating Systems store arbitrary information in the address bits of a
1162 - PTE which is marked non present. This allows a malicious user space
1163 - application to attack the physical memory to which these PTEs resolve.
1164 - In some cases user-space can maliciously influence the information
1165 - encoded in the address bits of the PTE, thus making attacks more
1166 - deterministic and more practical.
1167 -
1168 - The Linux kernel contains a mitigation for this attack vector, PTE
1169 - inversion, which is permanently enabled and has no performance
1170 - impact. The kernel ensures that the address bits of PTEs, which are not
1171 - marked present, never point to cacheable physical memory space.
1172 -
1173 - A system with an up to date kernel is protected against attacks from
1174 - malicious user space applications.
1175 -
1176 -2. Malicious guest in a virtual machine
1177 -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1178 -
1179 - The fact that L1TF breaks all domain protections allows malicious guest
1180 - OSes, which can control the PTEs directly, and malicious guest user
1181 - space applications, which run on an unprotected guest kernel lacking the
1182 - PTE inversion mitigation for L1TF, to attack physical host memory.
1183 -
1184 - A special aspect of L1TF in the context of virtualization is symmetric
1185 - multi threading (SMT). The Intel implementation of SMT is called
1186 - HyperThreading. The fact that Hyperthreads on the affected processors
1187 - share the L1 Data Cache (L1D) is important for this. As the flaw allows
1188 - only to attack data which is present in L1D, a malicious guest running
1189 - on one Hyperthread can attack the data which is brought into the L1D by
1190 - the context which runs on the sibling Hyperthread of the same physical
1191 - core. This context can be host OS, host user space or a different guest.
1192 -
1193 - If the processor does not support Extended Page Tables, the attack is
1194 - only possible, when the hypervisor does not sanitize the content of the
1195 - effective (shadow) page tables.
1196 -
1197 - While solutions exist to mitigate these attack vectors fully, these
1198 - mitigations are not enabled by default in the Linux kernel because they
1199 - can affect performance significantly. The kernel provides several
1200 - mechanisms which can be utilized to address the problem depending on the
1201 - deployment scenario. The mitigations, their protection scope and impact
1202 - are described in the next sections.
1203 -
1204 - The default mitigations and the rationale for choosing them are explained
1205 - at the end of this document. See :ref:`default_mitigations`.
1206 -
1207 -.. _l1tf_sys_info:
1208 -
1209 -L1TF system information
1210 ------------------------
1211 -
1212 -The Linux kernel provides a sysfs interface to enumerate the current L1TF
1213 -status of the system: whether the system is vulnerable, and which
1214 -mitigations are active. The relevant sysfs file is:
1215 -
1216 -/sys/devices/system/cpu/vulnerabilities/l1tf
1217 -
1218 -The possible values in this file are:
1219 -
1220 - =========================== ===============================
1221 - 'Not affected' The processor is not vulnerable
1222 - 'Mitigation: PTE Inversion' The host protection is active
1223 - =========================== ===============================
1224 -
1225 -If KVM/VMX is enabled and the processor is vulnerable then the following
1226 -information is appended to the 'Mitigation: PTE Inversion' part:
1227 -
1228 - - SMT status:
1229 -
1230 - ===================== ================
1231 - 'VMX: SMT vulnerable' SMT is enabled
1232 - 'VMX: SMT disabled' SMT is disabled
1233 - ===================== ================
1234 -
1235 - - L1D Flush mode:
1236 -
1237 - ================================ ====================================
1238 - 'L1D vulnerable' L1D flushing is disabled
1239 -
1240 - 'L1D conditional cache flushes' L1D flush is conditionally enabled
1241 -
1242 - 'L1D cache flushes' L1D flush is unconditionally enabled
1243 - ================================ ====================================
1244 -
1245 -The resulting grade of protection is discussed in the following sections.
1246 -
1247 -
1248 -Host mitigation mechanism
1249 --------------------------
1250 -
1251 -The kernel is unconditionally protected against L1TF attacks from malicious
1252 -user space running on the host.
1253 -
1254 -
1255 -Guest mitigation mechanisms
1256 ----------------------------
1257 -
1258 -.. _l1d_flush:
1259 -
1260 -1. L1D flush on VMENTER
1261 -^^^^^^^^^^^^^^^^^^^^^^^
1262 -
1263 - To make sure that a guest cannot attack data which is present in the L1D
1264 - the hypervisor flushes the L1D before entering the guest.
1265 -
1266 - Flushing the L1D evicts not only the data which should not be accessed
1267 - by a potentially malicious guest, it also flushes the guest
1268 - data. Flushing the L1D has a performance impact as the processor has to
1269 - bring the flushed guest data back into the L1D. Depending on the
1270 - frequency of VMEXIT/VMENTER and the type of computations in the guest
1271 - performance degradation in the range of 1% to 50% has been observed. For
1272 - scenarios where guest VMEXIT/VMENTER are rare the performance impact is
1273 - minimal. Virtio and mechanisms like posted interrupts are designed to
1274 - confine the VMEXITs to a bare minimum, but specific configurations and
1275 - application scenarios might still suffer from a high VMEXIT rate.
1276 -
1277 - The kernel provides two L1D flush modes:
1278 - - conditional ('cond')
1279 - - unconditional ('always')
1280 -
1281 - The conditional mode avoids L1D flushing after VMEXITs which execute
1282 - only audited code paths before the corresponding VMENTER. These code
1283 - paths have been verified that they cannot expose secrets or other
1284 - interesting data to an attacker, but they can leak information about the
1285 - address space layout of the hypervisor.
1286 -
1287 - Unconditional mode flushes L1D on all VMENTER invocations and provides
1288 - maximum protection. It has a higher overhead than the conditional
1289 - mode. The overhead cannot be quantified correctly as it depends on the
1290 - workload scenario and the resulting number of VMEXITs.
1291 -
1292 - The general recommendation is to enable L1D flush on VMENTER. The kernel
1293 - defaults to conditional mode on affected processors.
1294 -
1295 - **Note**, that L1D flush does not prevent the SMT problem because the
1296 - sibling thread will also bring back its data into the L1D which makes it
1297 - attackable again.
1298 -
1299 - L1D flush can be controlled by the administrator via the kernel command
1300 - line and sysfs control files. See :ref:`mitigation_control_command_line`
1301 - and :ref:`mitigation_control_kvm`.
1302 -
1303 -.. _guest_confinement:
1304 -
1305 -2. Guest VCPU confinement to dedicated physical cores
1306 -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1307 -
1308 - To address the SMT problem, it is possible to make a guest or a group of
1309 - guests affine to one or more physical cores. The proper mechanism for
1310 - that is to utilize exclusive cpusets to ensure that no other guest or
1311 - host tasks can run on these cores.
1312 -
1313 - If only a single guest or related guests run on sibling SMT threads on
1314 - the same physical core then they can only attack their own memory and
1315 - restricted parts of the host memory.
1316 -
1317 - Host memory is attackable, when one of the sibling SMT threads runs in
1318 - host OS (hypervisor) context and the other in guest context. The amount
1319 - of valuable information from the host OS context depends on the context
1320 - which the host OS executes, i.e. interrupts, soft interrupts and kernel
1321 - threads. The amount of valuable data from these contexts cannot be
1322 - declared as non-interesting for an attacker without deep inspection of
1323 - the code.
1324 -
1325 - **Note**, that assigning guests to a fixed set of physical cores affects
1326 - the ability of the scheduler to do load balancing and might have
1327 - negative effects on CPU utilization depending on the hosting
1328 - scenario. Disabling SMT might be a viable alternative for particular
1329 - scenarios.
1330 -
1331 - For further information about confining guests to a single or to a group
1332 - of cores consult the cpusets documentation:
1333 -
1334 - https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt
1335 -
1336 -.. _interrupt_isolation:
1337 -
1338 -3. Interrupt affinity
1339 -^^^^^^^^^^^^^^^^^^^^^
1340 -
1341 - Interrupts can be made affine to logical CPUs. This is not universally
1342 - true because there are types of interrupts which are truly per CPU
1343 - interrupts, e.g. the local timer interrupt. Aside of that multi queue
1344 - devices affine their interrupts to single CPUs or groups of CPUs per
1345 - queue without allowing the administrator to control the affinities.
1346 -
1347 - Moving the interrupts, which can be affinity controlled, away from CPUs
1348 - which run untrusted guests, reduces the attack vector space.
1349 -
1350 - Whether the interrupts with are affine to CPUs, which run untrusted
1351 - guests, provide interesting data for an attacker depends on the system
1352 - configuration and the scenarios which run on the system. While for some
1353 - of the interrupts it can be assumed that they won't expose interesting
1354 - information beyond exposing hints about the host OS memory layout, there
1355 - is no way to make general assumptions.
1356 -
1357 - Interrupt affinity can be controlled by the administrator via the
1358 - /proc/irq/$NR/smp_affinity[_list] files. Limited documentation is
1359 - available at:
1360 -
1361 - https://www.kernel.org/doc/Documentation/IRQ-affinity.txt
1362 -
1363 -.. _smt_control:
1364 -
1365 -4. SMT control
1366 -^^^^^^^^^^^^^^
1367 -
1368 - To prevent the SMT issues of L1TF it might be necessary to disable SMT
1369 - completely. Disabling SMT can have a significant performance impact, but
1370 - the impact depends on the hosting scenario and the type of workloads.
1371 - The impact of disabling SMT needs also to be weighted against the impact
1372 - of other mitigation solutions like confining guests to dedicated cores.
1373 -
1374 - The kernel provides a sysfs interface to retrieve the status of SMT and
1375 - to control it. It also provides a kernel command line interface to
1376 - control SMT.
1377 -
1378 - The kernel command line interface consists of the following options:
1379 -
1380 - =========== ==========================================================
1381 - nosmt Affects the bring up of the secondary CPUs during boot. The
1382 - kernel tries to bring all present CPUs online during the
1383 - boot process. "nosmt" makes sure that from each physical
1384 - core only one - the so called primary (hyper) thread is
1385 - activated. Due to a design flaw of Intel processors related
1386 - to Machine Check Exceptions the non primary siblings have
1387 - to be brought up at least partially and are then shut down
1388 - again. "nosmt" can be undone via the sysfs interface.
1389 -
1390 - nosmt=force Has the same effect as "nosmt" but it does not allow to
1391 - undo the SMT disable via the sysfs interface.
1392 - =========== ==========================================================
1393 -
1394 - The sysfs interface provides two files:
1395 -
1396 - - /sys/devices/system/cpu/smt/control
1397 - - /sys/devices/system/cpu/smt/active
1398 -
1399 - /sys/devices/system/cpu/smt/control:
1400 -
1401 - This file allows to read out the SMT control state and provides the
1402 - ability to disable or (re)enable SMT. The possible states are:
1403 -
1404 - ============== ===================================================
1405 - on SMT is supported by the CPU and enabled. All
1406 - logical CPUs can be onlined and offlined without
1407 - restrictions.
1408 -
1409 - off SMT is supported by the CPU and disabled. Only
1410 - the so called primary SMT threads can be onlined
1411 - and offlined without restrictions. An attempt to
1412 - online a non-primary sibling is rejected
1413 -
1414 - forceoff Same as 'off' but the state cannot be controlled.
1415 - Attempts to write to the control file are rejected.
1416 -
1417 - notsupported The processor does not support SMT. It's therefore
1418 - not affected by the SMT implications of L1TF.
1419 - Attempts to write to the control file are rejected.
1420 - ============== ===================================================
1421 -
1422 - The possible states which can be written into this file to control SMT
1423 - state are:
1424 -
1425 - - on
1426 - - off
1427 - - forceoff
1428 -
1429 - /sys/devices/system/cpu/smt/active:
1430 -
1431 - This file reports whether SMT is enabled and active, i.e. if on any
1432 - physical core two or more sibling threads are online.
1433 -
1434 - SMT control is also possible at boot time via the l1tf kernel command
1435 - line parameter in combination with L1D flush control. See
1436 - :ref:`mitigation_control_command_line`.
1437 -
1438 -5. Disabling EPT
1439 -^^^^^^^^^^^^^^^^
1440 -
1441 - Disabling EPT for virtual machines provides full mitigation for L1TF even
1442 - with SMT enabled, because the effective page tables for guests are
1443 - managed and sanitized by the hypervisor. Though disabling EPT has a
1444 - significant performance impact especially when the Meltdown mitigation
1445 - KPTI is enabled.
1446 -
1447 - EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter.
1448 -
1449 -There is ongoing research and development for new mitigation mechanisms to
1450 -address the performance impact of disabling SMT or EPT.
1451 -
1452 -.. _mitigation_control_command_line:
1453 -
1454 -Mitigation control on the kernel command line
1455 ----------------------------------------------
1456 -
1457 -The kernel command line allows to control the L1TF mitigations at boot
1458 -time with the option "l1tf=". The valid arguments for this option are:
1459 -
1460 - ============ =============================================================
1461 - full Provides all available mitigations for the L1TF
1462 - vulnerability. Disables SMT and enables all mitigations in
1463 - the hypervisors, i.e. unconditional L1D flushing
1464 -
1465 - SMT control and L1D flush control via the sysfs interface
1466 - is still possible after boot. Hypervisors will issue a
1467 - warning when the first VM is started in a potentially
1468 - insecure configuration, i.e. SMT enabled or L1D flush
1469 - disabled.
1470 -
1471 - full,force Same as 'full', but disables SMT and L1D flush runtime
1472 - control. Implies the 'nosmt=force' command line option.
1473 - (i.e. sysfs control of SMT is disabled.)
1474 -
1475 - flush Leaves SMT enabled and enables the default hypervisor
1476 - mitigation, i.e. conditional L1D flushing
1477 -
1478 - SMT control and L1D flush control via the sysfs interface
1479 - is still possible after boot. Hypervisors will issue a
1480 - warning when the first VM is started in a potentially
1481 - insecure configuration, i.e. SMT enabled or L1D flush
1482 - disabled.
1483 -
1484 - flush,nosmt Disables SMT and enables the default hypervisor mitigation,
1485 - i.e. conditional L1D flushing.
1486 -
1487 - SMT control and L1D flush control via the sysfs interface
1488 - is still possible after boot. Hypervisors will issue a
1489 - warning when the first VM is started in a potentially
1490 - insecure configuration, i.e. SMT enabled or L1D flush
1491 - disabled.
1492 -
1493 - flush,nowarn Same as 'flush', but hypervisors will not warn when a VM is
1494 - started in a potentially insecure configuration.
1495 -
1496 - off Disables hypervisor mitigations and doesn't emit any
1497 - warnings.
1498 - It also drops the swap size and available RAM limit restrictions
1499 - on both hypervisor and bare metal.
1500 -
1501 - ============ =============================================================
1502 -
1503 -The default is 'flush'. For details about L1D flushing see :ref:`l1d_flush`.
1504 -
1505 -
1506 -.. _mitigation_control_kvm:
1507 -
1508 -Mitigation control for KVM - module parameter
1509 --------------------------------------------------------------
1510 -
1511 -The KVM hypervisor mitigation mechanism, flushing the L1D cache when
1512 -entering a guest, can be controlled with a module parameter.
1513 -
1514 -The option/parameter is "kvm-intel.vmentry_l1d_flush=". It takes the
1515 -following arguments:
1516 -
1517 - ============ ==============================================================
1518 - always L1D cache flush on every VMENTER.
1519 -
1520 - cond Flush L1D on VMENTER only when the code between VMEXIT and
1521 - VMENTER can leak host memory which is considered
1522 - interesting for an attacker. This still can leak host memory
1523 - which allows e.g. to determine the hosts address space layout.
1524 -
1525 - never Disables the mitigation
1526 - ============ ==============================================================
1527 -
1528 -The parameter can be provided on the kernel command line, as a module
1529 -parameter when loading the modules and at runtime modified via the sysfs
1530 -file:
1531 -
1532 -/sys/module/kvm_intel/parameters/vmentry_l1d_flush
1533 -
1534 -The default is 'cond'. If 'l1tf=full,force' is given on the kernel command
1535 -line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush
1536 -module parameter is ignored and writes to the sysfs file are rejected.
1537 -
1538 -
1539 -Mitigation selection guide
1540 ---------------------------
1541 -
1542 -1. No virtualization in use
1543 -^^^^^^^^^^^^^^^^^^^^^^^^^^^
1544 -
1545 - The system is protected by the kernel unconditionally and no further
1546 - action is required.
1547 -
1548 -2. Virtualization with trusted guests
1549 -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1550 -
1551 - If the guest comes from a trusted source and the guest OS kernel is
1552 - guaranteed to have the L1TF mitigations in place the system is fully
1553 - protected against L1TF and no further action is required.
1554 -
1555 - To avoid the overhead of the default L1D flushing on VMENTER the
1556 - administrator can disable the flushing via the kernel command line and
1557 - sysfs control files. See :ref:`mitigation_control_command_line` and
1558 - :ref:`mitigation_control_kvm`.
1559 -
1560 -
1561 -3. Virtualization with untrusted guests
1562 -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1563 -
1564 -3.1. SMT not supported or disabled
1565 -""""""""""""""""""""""""""""""""""
1566 -
1567 - If SMT is not supported by the processor or disabled in the BIOS or by
1568 - the kernel, it's only required to enforce L1D flushing on VMENTER.
1569 -
1570 - Conditional L1D flushing is the default behaviour and can be tuned. See
1571 - :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`.
1572 -
1573 -3.2. EPT not supported or disabled
1574 -""""""""""""""""""""""""""""""""""
1575 -
1576 - If EPT is not supported by the processor or disabled in the hypervisor,
1577 - the system is fully protected. SMT can stay enabled and L1D flushing on
1578 - VMENTER is not required.
1579 -
1580 - EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter.
1581 -
1582 -3.3. SMT and EPT supported and active
1583 -"""""""""""""""""""""""""""""""""""""
1584 -
1585 - If SMT and EPT are supported and active then various degrees of
1586 - mitigations can be employed:
1587 -
1588 - - L1D flushing on VMENTER:
1589 -
1590 - L1D flushing on VMENTER is the minimal protection requirement, but it
1591 - is only potent in combination with other mitigation methods.
1592 -
1593 - Conditional L1D flushing is the default behaviour and can be tuned. See
1594 - :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`.
1595 -
1596 - - Guest confinement:
1597 -
1598 - Confinement of guests to a single or a group of physical cores which
1599 - are not running any other processes, can reduce the attack surface
1600 - significantly, but interrupts, soft interrupts and kernel threads can
1601 - still expose valuable data to a potential attacker. See
1602 - :ref:`guest_confinement`.
1603 -
1604 - - Interrupt isolation:
1605 -
1606 - Isolating the guest CPUs from interrupts can reduce the attack surface
1607 - further, but still allows a malicious guest to explore a limited amount
1608 - of host physical memory. This can at least be used to gain knowledge
1609 - about the host address space layout. The interrupts which have a fixed
1610 - affinity to the CPUs which run the untrusted guests can depending on
1611 - the scenario still trigger soft interrupts and schedule kernel threads
1612 - which might expose valuable information. See
1613 - :ref:`interrupt_isolation`.
1614 -
1615 -The above three mitigation methods combined can provide protection to a
1616 -certain degree, but the risk of the remaining attack surface has to be
1617 -carefully analyzed. For full protection the following methods are
1618 -available:
1619 -
1620 - - Disabling SMT:
1621 -
1622 - Disabling SMT and enforcing the L1D flushing provides the maximum
1623 - amount of protection. This mitigation is not depending on any of the
1624 - above mitigation methods.
1625 -
1626 - SMT control and L1D flushing can be tuned by the command line
1627 - parameters 'nosmt', 'l1tf', 'kvm-intel.vmentry_l1d_flush' and at run
1628 - time with the matching sysfs control files. See :ref:`smt_control`,
1629 - :ref:`mitigation_control_command_line` and
1630 - :ref:`mitigation_control_kvm`.
1631 -
1632 - - Disabling EPT:
1633 -
1634 - Disabling EPT provides the maximum amount of protection as well. It is
1635 - not depending on any of the above mitigation methods. SMT can stay
1636 - enabled and L1D flushing is not required, but the performance impact is
1637 - significant.
1638 -
1639 - EPT can be disabled in the hypervisor via the 'kvm-intel.ept'
1640 - parameter.
1641 -
1642 -3.4. Nested virtual machines
1643 -""""""""""""""""""""""""""""
1644 -
1645 -When nested virtualization is in use, three operating systems are involved:
1646 -the bare metal hypervisor, the nested hypervisor and the nested virtual
1647 -machine. VMENTER operations from the nested hypervisor into the nested
1648 -guest will always be processed by the bare metal hypervisor. If KVM is the
1649 -bare metal hypervisor it wiil:
1650 -
1651 - - Flush the L1D cache on every switch from the nested hypervisor to the
1652 - nested virtual machine, so that the nested hypervisor's secrets are not
1653 - exposed to the nested virtual machine;
1654 -
1655 - - Flush the L1D cache on every switch from the nested virtual machine to
1656 - the nested hypervisor; this is a complex operation, and flushing the L1D
1657 - cache avoids that the bare metal hypervisor's secrets are exposed to the
1658 - nested virtual machine;
1659 -
1660 - - Instruct the nested hypervisor to not perform any L1D cache flush. This
1661 - is an optimization to avoid double L1D flushing.
1662 -
1663 -
1664 -.. _default_mitigations:
1665 -
1666 -Default mitigations
1667 --------------------
1668 -
1669 - The kernel default mitigations for vulnerable processors are:
1670 -
1671 - - PTE inversion to protect against malicious user space. This is done
1672 - unconditionally and cannot be controlled. The swap storage is limited
1673 - to ~16TB.
1674 -
1675 - - L1D conditional flushing on VMENTER when EPT is enabled for
1676 - a guest.
1677 -
1678 - The kernel does not by default enforce the disabling of SMT, which leaves
1679 - SMT systems vulnerable when running untrusted guests with EPT enabled.
1680 -
1681 - The rationale for this choice is:
1682 -
1683 - - Force disabling SMT can break existing setups, especially with
1684 - unattended updates.
1685 -
1686 - - If regular users run untrusted guests on their machine, then L1TF is
1687 - just an add on to other malware which might be embedded in an untrusted
1688 - guest, e.g. spam-bots or attacks on the local network.
1689 -
1690 - There is no technical way to prevent a user from running untrusted code
1691 - on their machines blindly.
1692 -
1693 - - It's technically extremely unlikely and from today's knowledge even
1694 - impossible that L1TF can be exploited via the most popular attack
1695 - mechanisms like JavaScript because these mechanisms have no way to
1696 - control PTEs. If this would be possible and not other mitigation would
1697 - be possible, then the default might be different.
1698 -
1699 - - The administrators of cloud and hosting setups have to carefully
1700 - analyze the risk for their scenarios and make the appropriate
1701 - mitigation choices, which might even vary across their deployed
1702 - machines and also result in other changes of their overall setup.
1703 - There is no way for the kernel to provide a sensible default for this
1704 - kind of scenarios.
1705 diff --git a/Documentation/index.rst b/Documentation/index.rst
1706 index 5db7e87c7cb1..1cdc139adb40 100644
1707 --- a/Documentation/index.rst
1708 +++ b/Documentation/index.rst
1709 @@ -104,6 +104,7 @@ implementation.
1710 :maxdepth: 2
1711
1712 sh/index
1713 + x86/index
1714
1715 Filesystem Documentation
1716 ------------------------
1717 diff --git a/Documentation/x86/conf.py b/Documentation/x86/conf.py
1718 new file mode 100644
1719 index 000000000000..33c5c3142e20
1720 --- /dev/null
1721 +++ b/Documentation/x86/conf.py
1722 @@ -0,0 +1,10 @@
1723 +# -*- coding: utf-8; mode: python -*-
1724 +
1725 +project = "X86 architecture specific documentation"
1726 +
1727 +tags.add("subproject")
1728 +
1729 +latex_documents = [
1730 + ('index', 'x86.tex', project,
1731 + 'The kernel development community', 'manual'),
1732 +]
1733 diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
1734 new file mode 100644
1735 index 000000000000..ef389dcf1b1d
1736 --- /dev/null
1737 +++ b/Documentation/x86/index.rst
1738 @@ -0,0 +1,8 @@
1739 +==========================
1740 +x86 architecture specifics
1741 +==========================
1742 +
1743 +.. toctree::
1744 + :maxdepth: 1
1745 +
1746 + mds
1747 diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst
1748 new file mode 100644
1749 index 000000000000..534e9baa4e1d
1750 --- /dev/null
1751 +++ b/Documentation/x86/mds.rst
1752 @@ -0,0 +1,225 @@
1753 +Microarchitectural Data Sampling (MDS) mitigation
1754 +=================================================
1755 +
1756 +.. _mds:
1757 +
1758 +Overview
1759 +--------
1760 +
1761 +Microarchitectural Data Sampling (MDS) is a family of side channel attacks
1762 +on internal buffers in Intel CPUs. The variants are:
1763 +
1764 + - Microarchitectural Store Buffer Data Sampling (MSBDS) (CVE-2018-12126)
1765 + - Microarchitectural Fill Buffer Data Sampling (MFBDS) (CVE-2018-12130)
1766 + - Microarchitectural Load Port Data Sampling (MLPDS) (CVE-2018-12127)
1767 + - Microarchitectural Data Sampling Uncacheable Memory (MDSUM) (CVE-2019-11091)
1768 +
1769 +MSBDS leaks Store Buffer Entries which can be speculatively forwarded to a
1770 +dependent load (store-to-load forwarding) as an optimization. The forward
1771 +can also happen to a faulting or assisting load operation for a different
1772 +memory address, which can be exploited under certain conditions. Store
1773 +buffers are partitioned between Hyper-Threads so cross thread forwarding is
1774 +not possible. But if a thread enters or exits a sleep state the store
1775 +buffer is repartitioned which can expose data from one thread to the other.
1776 +
1777 +MFBDS leaks Fill Buffer Entries. Fill buffers are used internally to manage
1778 +L1 miss situations and to hold data which is returned or sent in response
1779 +to a memory or I/O operation. Fill buffers can forward data to a load
1780 +operation and also write data to the cache. When the fill buffer is
1781 +deallocated it can retain the stale data of the preceding operations which
1782 +can then be forwarded to a faulting or assisting load operation, which can
1783 +be exploited under certain conditions. Fill buffers are shared between
1784 +Hyper-Threads so cross thread leakage is possible.
1785 +
1786 +MLPDS leaks Load Port Data. Load ports are used to perform load operations
1787 +from memory or I/O. The received data is then forwarded to the register
1788 +file or a subsequent operation. In some implementations the Load Port can
1789 +contain stale data from a previous operation which can be forwarded to
1790 +faulting or assisting loads under certain conditions, which again can be
1791 +exploited eventually. Load ports are shared between Hyper-Threads so cross
1792 +thread leakage is possible.
1793 +
1794 +MDSUM is a special case of MSBDS, MFBDS and MLPDS. An uncacheable load from
1795 +memory that takes a fault or assist can leave data in a microarchitectural
1796 +structure that may later be observed using one of the same methods used by
1797 +MSBDS, MFBDS or MLPDS.
1798 +
1799 +Exposure assumptions
1800 +--------------------
1801 +
1802 +It is assumed that attack code resides in user space or in a guest with one
1803 +exception. The rationale behind this assumption is that the code construct
1804 +needed for exploiting MDS requires:
1805 +
1806 + - to control the load to trigger a fault or assist
1807 +
1808 + - to have a disclosure gadget which exposes the speculatively accessed
1809 + data for consumption through a side channel.
1810 +
1811 + - to control the pointer through which the disclosure gadget exposes the
1812 + data
1813 +
1814 +The existence of such a construct in the kernel cannot be excluded with
1815 +100% certainty, but the complexity involved makes it extremly unlikely.
1816 +
1817 +There is one exception, which is untrusted BPF. The functionality of
1818 +untrusted BPF is limited, but it needs to be thoroughly investigated
1819 +whether it can be used to create such a construct.
1820 +
1821 +
1822 +Mitigation strategy
1823 +-------------------
1824 +
1825 +All variants have the same mitigation strategy at least for the single CPU
1826 +thread case (SMT off): Force the CPU to clear the affected buffers.
1827 +
1828 +This is achieved by using the otherwise unused and obsolete VERW
1829 +instruction in combination with a microcode update. The microcode clears
1830 +the affected CPU buffers when the VERW instruction is executed.
1831 +
1832 +For virtualization there are two ways to achieve CPU buffer
1833 +clearing. Either the modified VERW instruction or via the L1D Flush
1834 +command. The latter is issued when L1TF mitigation is enabled so the extra
1835 +VERW can be avoided. If the CPU is not affected by L1TF then VERW needs to
1836 +be issued.
1837 +
1838 +If the VERW instruction with the supplied segment selector argument is
1839 +executed on a CPU without the microcode update there is no side effect
1840 +other than a small number of pointlessly wasted CPU cycles.
1841 +
1842 +This does not protect against cross Hyper-Thread attacks except for MSBDS
1843 +which is only exploitable cross Hyper-thread when one of the Hyper-Threads
1844 +enters a C-state.
1845 +
1846 +The kernel provides a function to invoke the buffer clearing:
1847 +
1848 + mds_clear_cpu_buffers()
1849 +
1850 +The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state
1851 +(idle) transitions.
1852 +
1853 +As a special quirk to address virtualization scenarios where the host has
1854 +the microcode updated, but the hypervisor does not (yet) expose the
1855 +MD_CLEAR CPUID bit to guests, the kernel issues the VERW instruction in the
1856 +hope that it might actually clear the buffers. The state is reflected
1857 +accordingly.
1858 +
1859 +According to current knowledge additional mitigations inside the kernel
1860 +itself are not required because the necessary gadgets to expose the leaked
1861 +data cannot be controlled in a way which allows exploitation from malicious
1862 +user space or VM guests.
1863 +
1864 +Kernel internal mitigation modes
1865 +--------------------------------
1866 +
1867 + ======= ============================================================
1868 + off Mitigation is disabled. Either the CPU is not affected or
1869 + mds=off is supplied on the kernel command line
1870 +
1871 + full Mitigation is enabled. CPU is affected and MD_CLEAR is
1872 + advertised in CPUID.
1873 +
1874 + vmwerv Mitigation is enabled. CPU is affected and MD_CLEAR is not
1875 + advertised in CPUID. That is mainly for virtualization
1876 + scenarios where the host has the updated microcode but the
1877 + hypervisor does not expose MD_CLEAR in CPUID. It's a best
1878 + effort approach without guarantee.
1879 + ======= ============================================================
1880 +
1881 +If the CPU is affected and mds=off is not supplied on the kernel command
1882 +line then the kernel selects the appropriate mitigation mode depending on
1883 +the availability of the MD_CLEAR CPUID bit.
1884 +
1885 +Mitigation points
1886 +-----------------
1887 +
1888 +1. Return to user space
1889 +^^^^^^^^^^^^^^^^^^^^^^^
1890 +
1891 + When transitioning from kernel to user space the CPU buffers are flushed
1892 + on affected CPUs when the mitigation is not disabled on the kernel
1893 + command line. The migitation is enabled through the static key
1894 + mds_user_clear.
1895 +
1896 + The mitigation is invoked in prepare_exit_to_usermode() which covers
1897 + most of the kernel to user space transitions. There are a few exceptions
1898 + which are not invoking prepare_exit_to_usermode() on return to user
1899 + space. These exceptions use the paranoid exit code.
1900 +
1901 + - Non Maskable Interrupt (NMI):
1902 +
1903 + Access to sensible data like keys, credentials in the NMI context is
1904 + mostly theoretical: The CPU can do prefetching or execute a
1905 + misspeculated code path and thereby fetching data which might end up
1906 + leaking through a buffer.
1907 +
1908 + But for mounting other attacks the kernel stack address of the task is
1909 + already valuable information. So in full mitigation mode, the NMI is
1910 + mitigated on the return from do_nmi() to provide almost complete
1911 + coverage.
1912 +
1913 + - Double fault (#DF):
1914 +
1915 + A double fault is usually fatal, but the ESPFIX workaround, which can
1916 + be triggered from user space through modify_ldt(2) is a recoverable
1917 + double fault. #DF uses the paranoid exit path, so explicit mitigation
1918 + in the double fault handler is required.
1919 +
1920 + - Machine Check Exception (#MC):
1921 +
1922 + Another corner case is a #MC which hits between the CPU buffer clear
1923 + invocation and the actual return to user. As this still is in kernel
1924 + space it takes the paranoid exit path which does not clear the CPU
1925 + buffers. So the #MC handler repopulates the buffers to some
1926 + extent. Machine checks are not reliably controllable and the window is
1927 + extremly small so mitigation would just tick a checkbox that this
1928 + theoretical corner case is covered. To keep the amount of special
1929 + cases small, ignore #MC.
1930 +
1931 + - Debug Exception (#DB):
1932 +
1933 + This takes the paranoid exit path only when the INT1 breakpoint is in
1934 + kernel space. #DB on a user space address takes the regular exit path,
1935 + so no extra mitigation required.
1936 +
1937 +
1938 +2. C-State transition
1939 +^^^^^^^^^^^^^^^^^^^^^
1940 +
1941 + When a CPU goes idle and enters a C-State the CPU buffers need to be
1942 + cleared on affected CPUs when SMT is active. This addresses the
1943 + repartitioning of the store buffer when one of the Hyper-Threads enters
1944 + a C-State.
1945 +
1946 + When SMT is inactive, i.e. either the CPU does not support it or all
1947 + sibling threads are offline CPU buffer clearing is not required.
1948 +
1949 + The idle clearing is enabled on CPUs which are only affected by MSBDS
1950 + and not by any other MDS variant. The other MDS variants cannot be
1951 + protected against cross Hyper-Thread attacks because the Fill Buffer and
1952 + the Load Ports are shared. So on CPUs affected by other variants, the
1953 + idle clearing would be a window dressing exercise and is therefore not
1954 + activated.
1955 +
1956 + The invocation is controlled by the static key mds_idle_clear which is
1957 + switched depending on the chosen mitigation mode and the SMT state of
1958 + the system.
1959 +
1960 + The buffer clear is only invoked before entering the C-State to prevent
1961 + that stale data from the idling CPU from spilling to the Hyper-Thread
1962 + sibling after the store buffer got repartitioned and all entries are
1963 + available to the non idle sibling.
1964 +
1965 + When coming out of idle the store buffer is partitioned again so each
1966 + sibling has half of it available. The back from idle CPU could be then
1967 + speculatively exposed to contents of the sibling. The buffers are
1968 + flushed either on exit to user space or on VMENTER so malicious code
1969 + in user space or the guest cannot speculatively access them.
1970 +
1971 + The mitigation is hooked into all variants of halt()/mwait(), but does
1972 + not cover the legacy ACPI IO-Port mechanism because the ACPI idle driver
1973 + has been superseded by the intel_idle driver around 2010 and is
1974 + preferred on all affected CPUs which are expected to gain the MD_CLEAR
1975 + functionality in microcode. Aside of that the IO-Port mechanism is a
1976 + legacy interface which is only used on older systems which are either
1977 + not affected or do not receive microcode updates anymore.
1978 diff --git a/Makefile b/Makefile
1979 index 914d69b9e3fd..be894b3a97d5 100644
1980 --- a/Makefile
1981 +++ b/Makefile
1982 @@ -1,7 +1,7 @@
1983 # SPDX-License-Identifier: GPL-2.0
1984 VERSION = 4
1985 PATCHLEVEL = 19
1986 -SUBLEVEL = 42
1987 +SUBLEVEL = 43
1988 EXTRAVERSION =
1989 NAME = "People's Front"
1990
1991 diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
1992 index 1341325599a7..4ccbf611a3c5 100644
1993 --- a/arch/powerpc/kernel/security.c
1994 +++ b/arch/powerpc/kernel/security.c
1995 @@ -56,7 +56,7 @@ void setup_barrier_nospec(void)
1996 enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
1997 security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR);
1998
1999 - if (!no_nospec)
2000 + if (!no_nospec && !cpu_mitigations_off())
2001 enable_barrier_nospec(enable);
2002 }
2003
2004 @@ -115,7 +115,7 @@ static int __init handle_nospectre_v2(char *p)
2005 early_param("nospectre_v2", handle_nospectre_v2);
2006 void setup_spectre_v2(void)
2007 {
2008 - if (no_spectrev2)
2009 + if (no_spectrev2 || cpu_mitigations_off())
2010 do_btb_flush_fixups();
2011 else
2012 btb_flush_enabled = true;
2013 @@ -299,7 +299,7 @@ void setup_stf_barrier(void)
2014
2015 stf_enabled_flush_types = type;
2016
2017 - if (!no_stf_barrier)
2018 + if (!no_stf_barrier && !cpu_mitigations_off())
2019 stf_barrier_enable(enable);
2020 }
2021
2022 diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
2023 index faf00222b324..eaf7300be5ab 100644
2024 --- a/arch/powerpc/kernel/setup_64.c
2025 +++ b/arch/powerpc/kernel/setup_64.c
2026 @@ -955,7 +955,7 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable)
2027
2028 enabled_flush_types = types;
2029
2030 - if (!no_rfi_flush)
2031 + if (!no_rfi_flush && !cpu_mitigations_off())
2032 rfi_flush_enable(enable);
2033 }
2034
2035 diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
2036 index bdddaae96559..649135cbedd5 100644
2037 --- a/arch/s390/kernel/nospec-branch.c
2038 +++ b/arch/s390/kernel/nospec-branch.c
2039 @@ -1,6 +1,7 @@
2040 // SPDX-License-Identifier: GPL-2.0
2041 #include <linux/module.h>
2042 #include <linux/device.h>
2043 +#include <linux/cpu.h>
2044 #include <asm/nospec-branch.h>
2045
2046 static int __init nobp_setup_early(char *str)
2047 @@ -58,7 +59,7 @@ early_param("nospectre_v2", nospectre_v2_setup_early);
2048
2049 void __init nospec_auto_detect(void)
2050 {
2051 - if (test_facility(156)) {
2052 + if (test_facility(156) || cpu_mitigations_off()) {
2053 /*
2054 * The machine supports etokens.
2055 * Disable expolines and disable nobp.
2056 diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
2057 index 3b2490b81918..8353348ddeaf 100644
2058 --- a/arch/x86/entry/common.c
2059 +++ b/arch/x86/entry/common.c
2060 @@ -31,6 +31,7 @@
2061 #include <asm/vdso.h>
2062 #include <linux/uaccess.h>
2063 #include <asm/cpufeature.h>
2064 +#include <asm/nospec-branch.h>
2065
2066 #define CREATE_TRACE_POINTS
2067 #include <trace/events/syscalls.h>
2068 @@ -212,6 +213,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
2069 #endif
2070
2071 user_enter_irqoff();
2072 +
2073 + mds_user_clear_cpu_buffers();
2074 }
2075
2076 #define SYSCALL_EXIT_WORK_FLAGS \
2077 diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
2078 index f9958ad4d335..a759e59990fb 100644
2079 --- a/arch/x86/events/intel/core.c
2080 +++ b/arch/x86/events/intel/core.c
2081 @@ -4132,11 +4132,11 @@ __init int intel_pmu_init(void)
2082 name = "nehalem";
2083 break;
2084
2085 - case INTEL_FAM6_ATOM_PINEVIEW:
2086 - case INTEL_FAM6_ATOM_LINCROFT:
2087 - case INTEL_FAM6_ATOM_PENWELL:
2088 - case INTEL_FAM6_ATOM_CLOVERVIEW:
2089 - case INTEL_FAM6_ATOM_CEDARVIEW:
2090 + case INTEL_FAM6_ATOM_BONNELL:
2091 + case INTEL_FAM6_ATOM_BONNELL_MID:
2092 + case INTEL_FAM6_ATOM_SALTWELL:
2093 + case INTEL_FAM6_ATOM_SALTWELL_MID:
2094 + case INTEL_FAM6_ATOM_SALTWELL_TABLET:
2095 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
2096 sizeof(hw_cache_event_ids));
2097
2098 @@ -4149,9 +4149,11 @@ __init int intel_pmu_init(void)
2099 name = "bonnell";
2100 break;
2101
2102 - case INTEL_FAM6_ATOM_SILVERMONT1:
2103 - case INTEL_FAM6_ATOM_SILVERMONT2:
2104 + case INTEL_FAM6_ATOM_SILVERMONT:
2105 + case INTEL_FAM6_ATOM_SILVERMONT_X:
2106 + case INTEL_FAM6_ATOM_SILVERMONT_MID:
2107 case INTEL_FAM6_ATOM_AIRMONT:
2108 + case INTEL_FAM6_ATOM_AIRMONT_MID:
2109 memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
2110 sizeof(hw_cache_event_ids));
2111 memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
2112 @@ -4170,7 +4172,7 @@ __init int intel_pmu_init(void)
2113 break;
2114
2115 case INTEL_FAM6_ATOM_GOLDMONT:
2116 - case INTEL_FAM6_ATOM_DENVERTON:
2117 + case INTEL_FAM6_ATOM_GOLDMONT_X:
2118 memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
2119 sizeof(hw_cache_event_ids));
2120 memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
2121 @@ -4196,7 +4198,7 @@ __init int intel_pmu_init(void)
2122 name = "goldmont";
2123 break;
2124
2125 - case INTEL_FAM6_ATOM_GEMINI_LAKE:
2126 + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
2127 memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
2128 sizeof(hw_cache_event_ids));
2129 memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
2130 diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
2131 index 6eb76106c469..56194c571299 100644
2132 --- a/arch/x86/events/intel/cstate.c
2133 +++ b/arch/x86/events/intel/cstate.c
2134 @@ -559,8 +559,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
2135
2136 X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
2137
2138 - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates),
2139 - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates),
2140 + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT, slm_cstates),
2141 + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT_X, slm_cstates),
2142 X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates),
2143
2144 X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE, snb_cstates),
2145 @@ -581,9 +581,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
2146 X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
2147
2148 X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
2149 - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_DENVERTON, glm_cstates),
2150 + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_X, glm_cstates),
2151
2152 - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GEMINI_LAKE, glm_cstates),
2153 + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
2154 { },
2155 };
2156 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
2157 diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
2158 index 32f3e9423e99..91039ffed633 100644
2159 --- a/arch/x86/events/intel/rapl.c
2160 +++ b/arch/x86/events/intel/rapl.c
2161 @@ -777,9 +777,9 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
2162 X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, skl_rapl_init),
2163
2164 X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
2165 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init),
2166 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init),
2167
2168 - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GEMINI_LAKE, hsw_rapl_init),
2169 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init),
2170 {},
2171 };
2172
2173 diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
2174 index b4771a6ddbc1..1b9f85abf9bc 100644
2175 --- a/arch/x86/events/msr.c
2176 +++ b/arch/x86/events/msr.c
2177 @@ -69,14 +69,14 @@ static bool test_intel(int idx)
2178 case INTEL_FAM6_BROADWELL_GT3E:
2179 case INTEL_FAM6_BROADWELL_X:
2180
2181 - case INTEL_FAM6_ATOM_SILVERMONT1:
2182 - case INTEL_FAM6_ATOM_SILVERMONT2:
2183 + case INTEL_FAM6_ATOM_SILVERMONT:
2184 + case INTEL_FAM6_ATOM_SILVERMONT_X:
2185 case INTEL_FAM6_ATOM_AIRMONT:
2186
2187 case INTEL_FAM6_ATOM_GOLDMONT:
2188 - case INTEL_FAM6_ATOM_DENVERTON:
2189 + case INTEL_FAM6_ATOM_GOLDMONT_X:
2190
2191 - case INTEL_FAM6_ATOM_GEMINI_LAKE:
2192 + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
2193
2194 case INTEL_FAM6_XEON_PHI_KNL:
2195 case INTEL_FAM6_XEON_PHI_KNM:
2196 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
2197 index 7b31ee5223fc..69037da75ea0 100644
2198 --- a/arch/x86/include/asm/cpufeatures.h
2199 +++ b/arch/x86/include/asm/cpufeatures.h
2200 @@ -341,6 +341,7 @@
2201 #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
2202 #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
2203 #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
2204 +#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
2205 #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
2206 #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
2207 #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
2208 @@ -378,5 +379,7 @@
2209 #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
2210 #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
2211 #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
2212 +#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
2213 +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
2214
2215 #endif /* _ASM_X86_CPUFEATURES_H */
2216 diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
2217 index 0ad25cc895ae..058b1a1994c4 100644
2218 --- a/arch/x86/include/asm/intel-family.h
2219 +++ b/arch/x86/include/asm/intel-family.h
2220 @@ -8,9 +8,6 @@
2221 * The "_X" parts are generally the EP and EX Xeons, or the
2222 * "Extreme" ones, like Broadwell-E.
2223 *
2224 - * Things ending in "2" are usually because we have no better
2225 - * name for them. There's no processor called "SILVERMONT2".
2226 - *
2227 * While adding a new CPUID for a new microarchitecture, add a new
2228 * group to keep logically sorted out in chronological order. Within
2229 * that group keep the CPUID for the variants sorted by model number.
2230 @@ -59,19 +56,23 @@
2231
2232 /* "Small Core" Processors (Atom) */
2233
2234 -#define INTEL_FAM6_ATOM_PINEVIEW 0x1C
2235 -#define INTEL_FAM6_ATOM_LINCROFT 0x26
2236 -#define INTEL_FAM6_ATOM_PENWELL 0x27
2237 -#define INTEL_FAM6_ATOM_CLOVERVIEW 0x35
2238 -#define INTEL_FAM6_ATOM_CEDARVIEW 0x36
2239 -#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */
2240 -#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */
2241 -#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */
2242 -#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */
2243 -#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */
2244 -#define INTEL_FAM6_ATOM_GOLDMONT 0x5C
2245 -#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */
2246 -#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A
2247 +#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
2248 +#define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */
2249 +
2250 +#define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */
2251 +#define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */
2252 +#define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */
2253 +
2254 +#define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */
2255 +#define INTEL_FAM6_ATOM_SILVERMONT_X 0x4D /* Avaton, Rangely */
2256 +#define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */
2257 +
2258 +#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */
2259 +#define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */
2260 +
2261 +#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
2262 +#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */
2263 +#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
2264
2265 /* Xeon Phi */
2266
2267 diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
2268 index 15450a675031..c99c66b41e53 100644
2269 --- a/arch/x86/include/asm/irqflags.h
2270 +++ b/arch/x86/include/asm/irqflags.h
2271 @@ -6,6 +6,8 @@
2272
2273 #ifndef __ASSEMBLY__
2274
2275 +#include <asm/nospec-branch.h>
2276 +
2277 /* Provide __cpuidle; we can't safely include <linux/cpu.h> */
2278 #define __cpuidle __attribute__((__section__(".cpuidle.text")))
2279
2280 @@ -54,11 +56,13 @@ static inline void native_irq_enable(void)
2281
2282 static inline __cpuidle void native_safe_halt(void)
2283 {
2284 + mds_idle_clear_cpu_buffers();
2285 asm volatile("sti; hlt": : :"memory");
2286 }
2287
2288 static inline __cpuidle void native_halt(void)
2289 {
2290 + mds_idle_clear_cpu_buffers();
2291 asm volatile("hlt": : :"memory");
2292 }
2293
2294 diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
2295 index f14ca0be1e3f..f85f43db9225 100644
2296 --- a/arch/x86/include/asm/msr-index.h
2297 +++ b/arch/x86/include/asm/msr-index.h
2298 @@ -2,6 +2,8 @@
2299 #ifndef _ASM_X86_MSR_INDEX_H
2300 #define _ASM_X86_MSR_INDEX_H
2301
2302 +#include <linux/bits.h>
2303 +
2304 /*
2305 * CPU model specific register (MSR) numbers.
2306 *
2307 @@ -40,14 +42,14 @@
2308 /* Intel MSRs. Some also available on other CPUs */
2309
2310 #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
2311 -#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
2312 +#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */
2313 #define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */
2314 -#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
2315 +#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
2316 #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
2317 -#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
2318 +#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
2319
2320 #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
2321 -#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
2322 +#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
2323
2324 #define MSR_PPIN_CTL 0x0000004e
2325 #define MSR_PPIN 0x0000004f
2326 @@ -69,20 +71,25 @@
2327 #define MSR_MTRRcap 0x000000fe
2328
2329 #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
2330 -#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
2331 -#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
2332 -#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH (1 << 3) /* Skip L1D flush on vmentry */
2333 -#define ARCH_CAP_SSB_NO (1 << 4) /*
2334 - * Not susceptible to Speculative Store Bypass
2335 - * attack, so no Speculative Store Bypass
2336 - * control required.
2337 - */
2338 +#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
2339 +#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
2340 +#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
2341 +#define ARCH_CAP_SSB_NO BIT(4) /*
2342 + * Not susceptible to Speculative Store Bypass
2343 + * attack, so no Speculative Store Bypass
2344 + * control required.
2345 + */
2346 +#define ARCH_CAP_MDS_NO BIT(5) /*
2347 + * Not susceptible to
2348 + * Microarchitectural Data
2349 + * Sampling (MDS) vulnerabilities.
2350 + */
2351
2352 #define MSR_IA32_FLUSH_CMD 0x0000010b
2353 -#define L1D_FLUSH (1 << 0) /*
2354 - * Writeback and invalidate the
2355 - * L1 data cache.
2356 - */
2357 +#define L1D_FLUSH BIT(0) /*
2358 + * Writeback and invalidate the
2359 + * L1 data cache.
2360 + */
2361
2362 #define MSR_IA32_BBL_CR_CTL 0x00000119
2363 #define MSR_IA32_BBL_CR_CTL3 0x0000011e
2364 diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
2365 index 39a2fb29378a..eb0f80ce8524 100644
2366 --- a/arch/x86/include/asm/mwait.h
2367 +++ b/arch/x86/include/asm/mwait.h
2368 @@ -6,6 +6,7 @@
2369 #include <linux/sched/idle.h>
2370
2371 #include <asm/cpufeature.h>
2372 +#include <asm/nospec-branch.h>
2373
2374 #define MWAIT_SUBSTATE_MASK 0xf
2375 #define MWAIT_CSTATE_MASK 0xf
2376 @@ -40,6 +41,8 @@ static inline void __monitorx(const void *eax, unsigned long ecx,
2377
2378 static inline void __mwait(unsigned long eax, unsigned long ecx)
2379 {
2380 + mds_idle_clear_cpu_buffers();
2381 +
2382 /* "mwait %eax, %ecx;" */
2383 asm volatile(".byte 0x0f, 0x01, 0xc9;"
2384 :: "a" (eax), "c" (ecx));
2385 @@ -74,6 +77,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
2386 static inline void __mwaitx(unsigned long eax, unsigned long ebx,
2387 unsigned long ecx)
2388 {
2389 + /* No MDS buffer clear as this is AMD/HYGON only */
2390 +
2391 /* "mwaitx %eax, %ebx, %ecx;" */
2392 asm volatile(".byte 0x0f, 0x01, 0xfb;"
2393 :: "a" (eax), "b" (ebx), "c" (ecx));
2394 @@ -81,6 +86,8 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx,
2395
2396 static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
2397 {
2398 + mds_idle_clear_cpu_buffers();
2399 +
2400 trace_hardirqs_on();
2401 /* "mwait %eax, %ecx;" */
2402 asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
2403 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
2404 index 032b6009baab..599c273f5d00 100644
2405 --- a/arch/x86/include/asm/nospec-branch.h
2406 +++ b/arch/x86/include/asm/nospec-branch.h
2407 @@ -317,6 +317,56 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
2408 DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
2409 DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
2410
2411 +DECLARE_STATIC_KEY_FALSE(mds_user_clear);
2412 +DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
2413 +
2414 +#include <asm/segment.h>
2415 +
2416 +/**
2417 + * mds_clear_cpu_buffers - Mitigation for MDS vulnerability
2418 + *
2419 + * This uses the otherwise unused and obsolete VERW instruction in
2420 + * combination with microcode which triggers a CPU buffer flush when the
2421 + * instruction is executed.
2422 + */
2423 +static inline void mds_clear_cpu_buffers(void)
2424 +{
2425 + static const u16 ds = __KERNEL_DS;
2426 +
2427 + /*
2428 + * Has to be the memory-operand variant because only that
2429 + * guarantees the CPU buffer flush functionality according to
2430 + * documentation. The register-operand variant does not.
2431 + * Works with any segment selector, but a valid writable
2432 + * data segment is the fastest variant.
2433 + *
2434 + * "cc" clobber is required because VERW modifies ZF.
2435 + */
2436 + asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
2437 +}
2438 +
2439 +/**
2440 + * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability
2441 + *
2442 + * Clear CPU buffers if the corresponding static key is enabled
2443 + */
2444 +static inline void mds_user_clear_cpu_buffers(void)
2445 +{
2446 + if (static_branch_likely(&mds_user_clear))
2447 + mds_clear_cpu_buffers();
2448 +}
2449 +
2450 +/**
2451 + * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
2452 + *
2453 + * Clear CPU buffers if the corresponding static key is enabled
2454 + */
2455 +static inline void mds_idle_clear_cpu_buffers(void)
2456 +{
2457 + if (static_branch_likely(&mds_idle_clear))
2458 + mds_clear_cpu_buffers();
2459 +}
2460 +
2461 #endif /* __ASSEMBLY__ */
2462
2463 /*
2464 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
2465 index d53c54b842da..b54f25697beb 100644
2466 --- a/arch/x86/include/asm/processor.h
2467 +++ b/arch/x86/include/asm/processor.h
2468 @@ -997,4 +997,10 @@ enum l1tf_mitigations {
2469
2470 extern enum l1tf_mitigations l1tf_mitigation;
2471
2472 +enum mds_mitigations {
2473 + MDS_MITIGATION_OFF,
2474 + MDS_MITIGATION_FULL,
2475 + MDS_MITIGATION_VMWERV,
2476 +};
2477 +
2478 #endif /* _ASM_X86_PROCESSOR_H */
2479 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
2480 index e5258bd64200..9b096f26d1c8 100644
2481 --- a/arch/x86/kernel/cpu/bugs.c
2482 +++ b/arch/x86/kernel/cpu/bugs.c
2483 @@ -35,6 +35,7 @@
2484 static void __init spectre_v2_select_mitigation(void);
2485 static void __init ssb_select_mitigation(void);
2486 static void __init l1tf_select_mitigation(void);
2487 +static void __init mds_select_mitigation(void);
2488
2489 /* The base value of the SPEC_CTRL MSR that always has to be preserved. */
2490 u64 x86_spec_ctrl_base;
2491 @@ -61,6 +62,13 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
2492 /* Control unconditional IBPB in switch_mm() */
2493 DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
2494
2495 +/* Control MDS CPU buffer clear before returning to user space */
2496 +DEFINE_STATIC_KEY_FALSE(mds_user_clear);
2497 +EXPORT_SYMBOL_GPL(mds_user_clear);
2498 +/* Control MDS CPU buffer clear before idling (halt, mwait) */
2499 +DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
2500 +EXPORT_SYMBOL_GPL(mds_idle_clear);
2501 +
2502 void __init check_bugs(void)
2503 {
2504 identify_boot_cpu();
2505 @@ -99,6 +107,10 @@ void __init check_bugs(void)
2506
2507 l1tf_select_mitigation();
2508
2509 + mds_select_mitigation();
2510 +
2511 + arch_smt_update();
2512 +
2513 #ifdef CONFIG_X86_32
2514 /*
2515 * Check whether we are able to run this kernel safely on SMP.
2516 @@ -204,6 +216,61 @@ static void x86_amd_ssb_disable(void)
2517 wrmsrl(MSR_AMD64_LS_CFG, msrval);
2518 }
2519
2520 +#undef pr_fmt
2521 +#define pr_fmt(fmt) "MDS: " fmt
2522 +
2523 +/* Default mitigation for MDS-affected CPUs */
2524 +static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL;
2525 +static bool mds_nosmt __ro_after_init = false;
2526 +
2527 +static const char * const mds_strings[] = {
2528 + [MDS_MITIGATION_OFF] = "Vulnerable",
2529 + [MDS_MITIGATION_FULL] = "Mitigation: Clear CPU buffers",
2530 + [MDS_MITIGATION_VMWERV] = "Vulnerable: Clear CPU buffers attempted, no microcode",
2531 +};
2532 +
2533 +static void __init mds_select_mitigation(void)
2534 +{
2535 + if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) {
2536 + mds_mitigation = MDS_MITIGATION_OFF;
2537 + return;
2538 + }
2539 +
2540 + if (mds_mitigation == MDS_MITIGATION_FULL) {
2541 + if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
2542 + mds_mitigation = MDS_MITIGATION_VMWERV;
2543 +
2544 + static_branch_enable(&mds_user_clear);
2545 +
2546 + if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) &&
2547 + (mds_nosmt || cpu_mitigations_auto_nosmt()))
2548 + cpu_smt_disable(false);
2549 + }
2550 +
2551 + pr_info("%s\n", mds_strings[mds_mitigation]);
2552 +}
2553 +
2554 +static int __init mds_cmdline(char *str)
2555 +{
2556 + if (!boot_cpu_has_bug(X86_BUG_MDS))
2557 + return 0;
2558 +
2559 + if (!str)
2560 + return -EINVAL;
2561 +
2562 + if (!strcmp(str, "off"))
2563 + mds_mitigation = MDS_MITIGATION_OFF;
2564 + else if (!strcmp(str, "full"))
2565 + mds_mitigation = MDS_MITIGATION_FULL;
2566 + else if (!strcmp(str, "full,nosmt")) {
2567 + mds_mitigation = MDS_MITIGATION_FULL;
2568 + mds_nosmt = true;
2569 + }
2570 +
2571 + return 0;
2572 +}
2573 +early_param("mds", mds_cmdline);
2574 +
2575 #undef pr_fmt
2576 #define pr_fmt(fmt) "Spectre V2 : " fmt
2577
2578 @@ -428,7 +495,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
2579 char arg[20];
2580 int ret, i;
2581
2582 - if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
2583 + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2") ||
2584 + cpu_mitigations_off())
2585 return SPECTRE_V2_CMD_NONE;
2586
2587 ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
2588 @@ -560,9 +628,6 @@ specv2_set_mode:
2589
2590 /* Set up IBPB and STIBP depending on the general spectre V2 command */
2591 spectre_v2_user_select_mitigation(cmd);
2592 -
2593 - /* Enable STIBP if appropriate */
2594 - arch_smt_update();
2595 }
2596
2597 static void update_stibp_msr(void * __unused)
2598 @@ -596,6 +661,31 @@ static void update_indir_branch_cond(void)
2599 static_branch_disable(&switch_to_cond_stibp);
2600 }
2601
2602 +#undef pr_fmt
2603 +#define pr_fmt(fmt) fmt
2604 +
2605 +/* Update the static key controlling the MDS CPU buffer clear in idle */
2606 +static void update_mds_branch_idle(void)
2607 +{
2608 + /*
2609 + * Enable the idle clearing if SMT is active on CPUs which are
2610 + * affected only by MSBDS and not any other MDS variant.
2611 + *
2612 + * The other variants cannot be mitigated when SMT is enabled, so
2613 + * clearing the buffers on idle just to prevent the Store Buffer
2614 + * repartitioning leak would be a window dressing exercise.
2615 + */
2616 + if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY))
2617 + return;
2618 +
2619 + if (sched_smt_active())
2620 + static_branch_enable(&mds_idle_clear);
2621 + else
2622 + static_branch_disable(&mds_idle_clear);
2623 +}
2624 +
2625 +#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
2626 +
2627 void arch_smt_update(void)
2628 {
2629 /* Enhanced IBRS implies STIBP. No update required. */
2630 @@ -616,6 +706,17 @@ void arch_smt_update(void)
2631 break;
2632 }
2633
2634 + switch (mds_mitigation) {
2635 + case MDS_MITIGATION_FULL:
2636 + case MDS_MITIGATION_VMWERV:
2637 + if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY))
2638 + pr_warn_once(MDS_MSG_SMT);
2639 + update_mds_branch_idle();
2640 + break;
2641 + case MDS_MITIGATION_OFF:
2642 + break;
2643 + }
2644 +
2645 mutex_unlock(&spec_ctrl_mutex);
2646 }
2647
2648 @@ -657,7 +758,8 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
2649 char arg[20];
2650 int ret, i;
2651
2652 - if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) {
2653 + if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable") ||
2654 + cpu_mitigations_off()) {
2655 return SPEC_STORE_BYPASS_CMD_NONE;
2656 } else {
2657 ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
2658 @@ -978,6 +1080,11 @@ static void __init l1tf_select_mitigation(void)
2659 if (!boot_cpu_has_bug(X86_BUG_L1TF))
2660 return;
2661
2662 + if (cpu_mitigations_off())
2663 + l1tf_mitigation = L1TF_MITIGATION_OFF;
2664 + else if (cpu_mitigations_auto_nosmt())
2665 + l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT;
2666 +
2667 override_cache_bits(&boot_cpu_data);
2668
2669 switch (l1tf_mitigation) {
2670 @@ -1006,7 +1113,7 @@ static void __init l1tf_select_mitigation(void)
2671 pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n",
2672 half_pa);
2673 pr_info("However, doing so will make a part of your RAM unusable.\n");
2674 - pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html might help you decide.\n");
2675 + pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html might help you decide.\n");
2676 return;
2677 }
2678
2679 @@ -1039,6 +1146,7 @@ static int __init l1tf_cmdline(char *str)
2680 early_param("l1tf", l1tf_cmdline);
2681
2682 #undef pr_fmt
2683 +#define pr_fmt(fmt) fmt
2684
2685 #ifdef CONFIG_SYSFS
2686
2687 @@ -1077,6 +1185,23 @@ static ssize_t l1tf_show_state(char *buf)
2688 }
2689 #endif
2690
2691 +static ssize_t mds_show_state(char *buf)
2692 +{
2693 + if (!hypervisor_is_type(X86_HYPER_NATIVE)) {
2694 + return sprintf(buf, "%s; SMT Host state unknown\n",
2695 + mds_strings[mds_mitigation]);
2696 + }
2697 +
2698 + if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) {
2699 + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation],
2700 + (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" :
2701 + sched_smt_active() ? "mitigated" : "disabled"));
2702 + }
2703 +
2704 + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation],
2705 + sched_smt_active() ? "vulnerable" : "disabled");
2706 +}
2707 +
2708 static char *stibp_state(void)
2709 {
2710 if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
2711 @@ -1141,6 +1266,10 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
2712 if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV))
2713 return l1tf_show_state(buf);
2714 break;
2715 +
2716 + case X86_BUG_MDS:
2717 + return mds_show_state(buf);
2718 +
2719 default:
2720 break;
2721 }
2722 @@ -1172,4 +1301,9 @@ ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *b
2723 {
2724 return cpu_show_common(dev, attr, buf, X86_BUG_L1TF);
2725 }
2726 +
2727 +ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf)
2728 +{
2729 + return cpu_show_common(dev, attr, buf, X86_BUG_MDS);
2730 +}
2731 #endif
2732 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
2733 index 44c4ef3d989b..1073118b9bf0 100644
2734 --- a/arch/x86/kernel/cpu/common.c
2735 +++ b/arch/x86/kernel/cpu/common.c
2736 @@ -948,60 +948,73 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
2737 #endif
2738 }
2739
2740 -static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
2741 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY },
2742 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY },
2743 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY },
2744 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY },
2745 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY },
2746 - { X86_VENDOR_CENTAUR, 5 },
2747 - { X86_VENDOR_INTEL, 5 },
2748 - { X86_VENDOR_NSC, 5 },
2749 - { X86_VENDOR_ANY, 4 },
2750 +#define NO_SPECULATION BIT(0)
2751 +#define NO_MELTDOWN BIT(1)
2752 +#define NO_SSB BIT(2)
2753 +#define NO_L1TF BIT(3)
2754 +#define NO_MDS BIT(4)
2755 +#define MSBDS_ONLY BIT(5)
2756 +
2757 +#define VULNWL(_vendor, _family, _model, _whitelist) \
2758 + { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
2759 +
2760 +#define VULNWL_INTEL(model, whitelist) \
2761 + VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist)
2762 +
2763 +#define VULNWL_AMD(family, whitelist) \
2764 + VULNWL(AMD, family, X86_MODEL_ANY, whitelist)
2765 +
2766 +static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
2767 + VULNWL(ANY, 4, X86_MODEL_ANY, NO_SPECULATION),
2768 + VULNWL(CENTAUR, 5, X86_MODEL_ANY, NO_SPECULATION),
2769 + VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION),
2770 + VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION),
2771 +
2772 + /* Intel Family 6 */
2773 + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION),
2774 + VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION),
2775 + VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION),
2776 + VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION),
2777 + VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION),
2778 +
2779 + VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY),
2780 + VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY),
2781 + VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY),
2782 + VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY),
2783 + VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY),
2784 + VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY),
2785 +
2786 + VULNWL_INTEL(CORE_YONAH, NO_SSB),
2787 +
2788 + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY),
2789 +
2790 + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF),
2791 + VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF),
2792 + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF),
2793 +
2794 + /* AMD Family 0xf - 0x12 */
2795 + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
2796 + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
2797 + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
2798 + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
2799 +
2800 + /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
2801 + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS),
2802 {}
2803 };
2804
2805 -static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
2806 - { X86_VENDOR_AMD },
2807 - {}
2808 -};
2809 -
2810 -/* Only list CPUs which speculate but are non susceptible to SSB */
2811 -static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
2812 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
2813 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
2814 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
2815 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
2816 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH },
2817 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
2818 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
2819 - { X86_VENDOR_AMD, 0x12, },
2820 - { X86_VENDOR_AMD, 0x11, },
2821 - { X86_VENDOR_AMD, 0x10, },
2822 - { X86_VENDOR_AMD, 0xf, },
2823 - {}
2824 -};
2825 +static bool __init cpu_matches(unsigned long which)
2826 +{
2827 + const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist);
2828
2829 -static const __initconst struct x86_cpu_id cpu_no_l1tf[] = {
2830 - /* in addition to cpu_no_speculation */
2831 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
2832 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
2833 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
2834 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
2835 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MOOREFIELD },
2836 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT },
2837 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON },
2838 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GEMINI_LAKE },
2839 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
2840 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
2841 - {}
2842 -};
2843 + return m && !!(m->driver_data & which);
2844 +}
2845
2846 static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
2847 {
2848 u64 ia32_cap = 0;
2849
2850 - if (x86_match_cpu(cpu_no_speculation))
2851 + if (cpu_matches(NO_SPECULATION))
2852 return;
2853
2854 setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
2855 @@ -1010,15 +1023,20 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
2856 if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
2857 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
2858
2859 - if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
2860 - !(ia32_cap & ARCH_CAP_SSB_NO) &&
2861 + if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) &&
2862 !cpu_has(c, X86_FEATURE_AMD_SSB_NO))
2863 setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
2864
2865 if (ia32_cap & ARCH_CAP_IBRS_ALL)
2866 setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
2867
2868 - if (x86_match_cpu(cpu_no_meltdown))
2869 + if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) {
2870 + setup_force_cpu_bug(X86_BUG_MDS);
2871 + if (cpu_matches(MSBDS_ONLY))
2872 + setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
2873 + }
2874 +
2875 + if (cpu_matches(NO_MELTDOWN))
2876 return;
2877
2878 /* Rogue Data Cache Load? No! */
2879 @@ -1027,7 +1045,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
2880
2881 setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
2882
2883 - if (x86_match_cpu(cpu_no_l1tf))
2884 + if (cpu_matches(NO_L1TF))
2885 return;
2886
2887 setup_force_cpu_bug(X86_BUG_L1TF);
2888 diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
2889 index f8c260d522ca..912d53939f4f 100644
2890 --- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
2891 +++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
2892 @@ -91,7 +91,7 @@ static u64 get_prefetch_disable_bits(void)
2893 */
2894 return 0xF;
2895 case INTEL_FAM6_ATOM_GOLDMONT:
2896 - case INTEL_FAM6_ATOM_GEMINI_LAKE:
2897 + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
2898 /*
2899 * SDM defines bits of MSR_MISC_FEATURE_CONTROL register
2900 * as:
2901 @@ -995,7 +995,7 @@ static int measure_cycles_perf_fn(void *_plr)
2902
2903 switch (boot_cpu_data.x86_model) {
2904 case INTEL_FAM6_ATOM_GOLDMONT:
2905 - case INTEL_FAM6_ATOM_GEMINI_LAKE:
2906 + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
2907 l2_hit_bits = (0x52ULL << 16) | (0x2 << 8) | 0xd1;
2908 l2_miss_bits = (0x52ULL << 16) | (0x10 << 8) | 0xd1;
2909 break;
2910 diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
2911 index 18bc9b51ac9b..086cf1d1d71d 100644
2912 --- a/arch/x86/kernel/nmi.c
2913 +++ b/arch/x86/kernel/nmi.c
2914 @@ -34,6 +34,7 @@
2915 #include <asm/x86_init.h>
2916 #include <asm/reboot.h>
2917 #include <asm/cache.h>
2918 +#include <asm/nospec-branch.h>
2919
2920 #define CREATE_TRACE_POINTS
2921 #include <trace/events/nmi.h>
2922 @@ -533,6 +534,9 @@ nmi_restart:
2923 write_cr2(this_cpu_read(nmi_cr2));
2924 if (this_cpu_dec_return(nmi_state))
2925 goto nmi_restart;
2926 +
2927 + if (user_mode(regs))
2928 + mds_user_clear_cpu_buffers();
2929 }
2930 NOKPROBE_SYMBOL(do_nmi);
2931
2932 diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
2933 index e6db475164ed..0a5efd764914 100644
2934 --- a/arch/x86/kernel/traps.c
2935 +++ b/arch/x86/kernel/traps.c
2936 @@ -58,6 +58,7 @@
2937 #include <asm/alternative.h>
2938 #include <asm/fpu/xstate.h>
2939 #include <asm/trace/mpx.h>
2940 +#include <asm/nospec-branch.h>
2941 #include <asm/mpx.h>
2942 #include <asm/vm86.h>
2943 #include <asm/umip.h>
2944 @@ -387,6 +388,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
2945 regs->ip = (unsigned long)general_protection;
2946 regs->sp = (unsigned long)&gpregs->orig_ax;
2947
2948 + /*
2949 + * This situation can be triggered by userspace via
2950 + * modify_ldt(2) and the return does not take the regular
2951 + * user space exit, so a CPU buffer clear is required when
2952 + * MDS mitigation is enabled.
2953 + */
2954 + mds_user_clear_cpu_buffers();
2955 return;
2956 }
2957 #endif
2958 diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
2959 index 6d5dc5dabfd7..03b7529333a6 100644
2960 --- a/arch/x86/kernel/tsc.c
2961 +++ b/arch/x86/kernel/tsc.c
2962 @@ -636,7 +636,7 @@ unsigned long native_calibrate_tsc(void)
2963 case INTEL_FAM6_KABYLAKE_DESKTOP:
2964 crystal_khz = 24000; /* 24.0 MHz */
2965 break;
2966 - case INTEL_FAM6_ATOM_DENVERTON:
2967 + case INTEL_FAM6_ATOM_GOLDMONT_X:
2968 crystal_khz = 25000; /* 25.0 MHz */
2969 break;
2970 case INTEL_FAM6_ATOM_GOLDMONT:
2971 diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
2972 index 27ef714d886c..3d0e9aeea7c8 100644
2973 --- a/arch/x86/kernel/tsc_msr.c
2974 +++ b/arch/x86/kernel/tsc_msr.c
2975 @@ -59,12 +59,12 @@ static const struct freq_desc freq_desc_ann = {
2976 };
2977
2978 static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
2979 - INTEL_CPU_FAM6(ATOM_PENWELL, freq_desc_pnw),
2980 - INTEL_CPU_FAM6(ATOM_CLOVERVIEW, freq_desc_clv),
2981 - INTEL_CPU_FAM6(ATOM_SILVERMONT1, freq_desc_byt),
2982 + INTEL_CPU_FAM6(ATOM_SALTWELL_MID, freq_desc_pnw),
2983 + INTEL_CPU_FAM6(ATOM_SALTWELL_TABLET, freq_desc_clv),
2984 + INTEL_CPU_FAM6(ATOM_SILVERMONT, freq_desc_byt),
2985 + INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, freq_desc_tng),
2986 INTEL_CPU_FAM6(ATOM_AIRMONT, freq_desc_cht),
2987 - INTEL_CPU_FAM6(ATOM_MERRIFIELD, freq_desc_tng),
2988 - INTEL_CPU_FAM6(ATOM_MOOREFIELD, freq_desc_ann),
2989 + INTEL_CPU_FAM6(ATOM_AIRMONT_MID, freq_desc_ann),
2990 {}
2991 };
2992
2993 diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
2994 index 98d13c6a64be..b810102a9cfa 100644
2995 --- a/arch/x86/kvm/cpuid.c
2996 +++ b/arch/x86/kvm/cpuid.c
2997 @@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2998 /* cpuid 0x80000008.ebx */
2999 const u32 kvm_cpuid_8000_0008_ebx_x86_features =
3000 F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
3001 - F(AMD_SSB_NO);
3002 + F(AMD_SSB_NO) | F(AMD_STIBP);
3003
3004 /* cpuid 0xC0000001.edx */
3005 const u32 kvm_cpuid_C000_0001_edx_x86_features =
3006 @@ -412,7 +412,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
3007 /* cpuid 7.0.edx*/
3008 const u32 kvm_cpuid_7_0_edx_x86_features =
3009 F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
3010 - F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES);
3011 + F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
3012 + F(MD_CLEAR);
3013
3014 /* all calls to cpuid_count() should be made on the same cpu */
3015 get_cpu();
3016 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
3017 index 215339c7d161..73d6d585dd66 100644
3018 --- a/arch/x86/kvm/vmx.c
3019 +++ b/arch/x86/kvm/vmx.c
3020 @@ -10765,8 +10765,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
3021 evmcs_rsp = static_branch_unlikely(&enable_evmcs) ?
3022 (unsigned long)&current_evmcs->host_rsp : 0;
3023
3024 + /* L1D Flush includes CPU buffer clear to mitigate MDS */
3025 if (static_branch_unlikely(&vmx_l1d_should_flush))
3026 vmx_l1d_flush(vcpu);
3027 + else if (static_branch_unlikely(&mds_user_clear))
3028 + mds_clear_cpu_buffers();
3029
3030 asm(
3031 /* Store host registers */
3032 @@ -11127,8 +11130,8 @@ free_vcpu:
3033 return ERR_PTR(err);
3034 }
3035
3036 -#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n"
3037 -#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n"
3038 +#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
3039 +#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
3040
3041 static int vmx_vm_init(struct kvm *kvm)
3042 {
3043 diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
3044 index c1fc1ae6b429..4df3e5c89d57 100644
3045 --- a/arch/x86/mm/pti.c
3046 +++ b/arch/x86/mm/pti.c
3047 @@ -35,6 +35,7 @@
3048 #include <linux/spinlock.h>
3049 #include <linux/mm.h>
3050 #include <linux/uaccess.h>
3051 +#include <linux/cpu.h>
3052
3053 #include <asm/cpufeature.h>
3054 #include <asm/hypervisor.h>
3055 @@ -115,7 +116,8 @@ void __init pti_check_boottime_disable(void)
3056 }
3057 }
3058
3059 - if (cmdline_find_option_bool(boot_command_line, "nopti")) {
3060 + if (cmdline_find_option_bool(boot_command_line, "nopti") ||
3061 + cpu_mitigations_off()) {
3062 pti_mode = PTI_FORCE_OFF;
3063 pti_print_if_insecure("disabled on command line.");
3064 return;
3065 diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c
3066 index 034813d4ab1e..41dae0f0d898 100644
3067 --- a/arch/x86/platform/atom/punit_atom_debug.c
3068 +++ b/arch/x86/platform/atom/punit_atom_debug.c
3069 @@ -143,8 +143,8 @@ static void punit_dbgfs_unregister(void)
3070 (kernel_ulong_t)&drv_data }
3071
3072 static const struct x86_cpu_id intel_punit_cpu_ids[] = {
3073 - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, punit_device_byt),
3074 - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, punit_device_tng),
3075 + ICPU(INTEL_FAM6_ATOM_SILVERMONT, punit_device_byt),
3076 + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, punit_device_tng),
3077 ICPU(INTEL_FAM6_ATOM_AIRMONT, punit_device_cht),
3078 {}
3079 };
3080 diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
3081 index 5a0483e7bf66..31dce781364c 100644
3082 --- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c
3083 +++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
3084 @@ -68,7 +68,7 @@ static struct bt_sfi_data tng_bt_sfi_data __initdata = {
3085 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (kernel_ulong_t)&ddata }
3086
3087 static const struct x86_cpu_id bt_sfi_cpu_ids[] = {
3088 - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, tng_bt_sfi_data),
3089 + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, tng_bt_sfi_data),
3090 {}
3091 };
3092
3093 diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
3094 index 969bf8d515c0..c651e206d796 100644
3095 --- a/drivers/acpi/acpi_lpss.c
3096 +++ b/drivers/acpi/acpi_lpss.c
3097 @@ -292,7 +292,7 @@ static const struct lpss_device_desc bsw_spi_dev_desc = {
3098 #define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, }
3099
3100 static const struct x86_cpu_id lpss_cpu_ids[] = {
3101 - ICPU(INTEL_FAM6_ATOM_SILVERMONT1), /* Valleyview, Bay Trail */
3102 + ICPU(INTEL_FAM6_ATOM_SILVERMONT), /* Valleyview, Bay Trail */
3103 ICPU(INTEL_FAM6_ATOM_AIRMONT), /* Braswell, Cherry Trail */
3104 {}
3105 };
3106 diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
3107 index 06c31ec3cc70..9a8e286dd86f 100644
3108 --- a/drivers/acpi/x86/utils.c
3109 +++ b/drivers/acpi/x86/utils.c
3110 @@ -54,7 +54,7 @@ static const struct always_present_id always_present_ids[] = {
3111 * Bay / Cherry Trail PWM directly poked by GPU driver in win10,
3112 * but Linux uses a separate PWM driver, harmless if not used.
3113 */
3114 - ENTRY("80860F09", "1", ICPU(INTEL_FAM6_ATOM_SILVERMONT1), {}),
3115 + ENTRY("80860F09", "1", ICPU(INTEL_FAM6_ATOM_SILVERMONT), {}),
3116 ENTRY("80862288", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT), {}),
3117 /*
3118 * The INT0002 device is necessary to clear wakeup interrupt sources
3119 diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
3120 index eb9443d5bae1..2fd6ca1021c2 100644
3121 --- a/drivers/base/cpu.c
3122 +++ b/drivers/base/cpu.c
3123 @@ -546,11 +546,18 @@ ssize_t __weak cpu_show_l1tf(struct device *dev,
3124 return sprintf(buf, "Not affected\n");
3125 }
3126
3127 +ssize_t __weak cpu_show_mds(struct device *dev,
3128 + struct device_attribute *attr, char *buf)
3129 +{
3130 + return sprintf(buf, "Not affected\n");
3131 +}
3132 +
3133 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
3134 static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
3135 static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
3136 static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
3137 static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
3138 +static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
3139
3140 static struct attribute *cpu_root_vulnerabilities_attrs[] = {
3141 &dev_attr_meltdown.attr,
3142 @@ -558,6 +565,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
3143 &dev_attr_spectre_v2.attr,
3144 &dev_attr_spec_store_bypass.attr,
3145 &dev_attr_l1tf.attr,
3146 + &dev_attr_mds.attr,
3147 NULL
3148 };
3149
3150 diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
3151 index a005711f909e..29f25d5d65e0 100644
3152 --- a/drivers/cpufreq/intel_pstate.c
3153 +++ b/drivers/cpufreq/intel_pstate.c
3154 @@ -1779,7 +1779,7 @@ static const struct pstate_funcs knl_funcs = {
3155 static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
3156 ICPU(INTEL_FAM6_SANDYBRIDGE, core_funcs),
3157 ICPU(INTEL_FAM6_SANDYBRIDGE_X, core_funcs),
3158 - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, silvermont_funcs),
3159 + ICPU(INTEL_FAM6_ATOM_SILVERMONT, silvermont_funcs),
3160 ICPU(INTEL_FAM6_IVYBRIDGE, core_funcs),
3161 ICPU(INTEL_FAM6_HASWELL_CORE, core_funcs),
3162 ICPU(INTEL_FAM6_BROADWELL_CORE, core_funcs),
3163 @@ -1796,7 +1796,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
3164 ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_funcs),
3165 ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_funcs),
3166 ICPU(INTEL_FAM6_ATOM_GOLDMONT, core_funcs),
3167 - ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, core_funcs),
3168 + ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, core_funcs),
3169 ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs),
3170 {}
3171 };
3172 diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c
3173 index df28b65358d2..903a4f1fadcc 100644
3174 --- a/drivers/edac/pnd2_edac.c
3175 +++ b/drivers/edac/pnd2_edac.c
3176 @@ -1541,7 +1541,7 @@ static struct dunit_ops dnv_ops = {
3177
3178 static const struct x86_cpu_id pnd2_cpuids[] = {
3179 { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT, 0, (kernel_ulong_t)&apl_ops },
3180 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON, 0, (kernel_ulong_t)&dnv_ops },
3181 + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_X, 0, (kernel_ulong_t)&dnv_ops },
3182 { }
3183 };
3184 MODULE_DEVICE_TABLE(x86cpu, pnd2_cpuids);
3185 diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
3186 index b2ccce5fb071..c4bb67ed8da3 100644
3187 --- a/drivers/idle/intel_idle.c
3188 +++ b/drivers/idle/intel_idle.c
3189 @@ -1076,14 +1076,14 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
3190 ICPU(INTEL_FAM6_WESTMERE, idle_cpu_nehalem),
3191 ICPU(INTEL_FAM6_WESTMERE_EP, idle_cpu_nehalem),
3192 ICPU(INTEL_FAM6_NEHALEM_EX, idle_cpu_nehalem),
3193 - ICPU(INTEL_FAM6_ATOM_PINEVIEW, idle_cpu_atom),
3194 - ICPU(INTEL_FAM6_ATOM_LINCROFT, idle_cpu_lincroft),
3195 + ICPU(INTEL_FAM6_ATOM_BONNELL, idle_cpu_atom),
3196 + ICPU(INTEL_FAM6_ATOM_BONNELL_MID, idle_cpu_lincroft),
3197 ICPU(INTEL_FAM6_WESTMERE_EX, idle_cpu_nehalem),
3198 ICPU(INTEL_FAM6_SANDYBRIDGE, idle_cpu_snb),
3199 ICPU(INTEL_FAM6_SANDYBRIDGE_X, idle_cpu_snb),
3200 - ICPU(INTEL_FAM6_ATOM_CEDARVIEW, idle_cpu_atom),
3201 - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, idle_cpu_byt),
3202 - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, idle_cpu_tangier),
3203 + ICPU(INTEL_FAM6_ATOM_SALTWELL, idle_cpu_atom),
3204 + ICPU(INTEL_FAM6_ATOM_SILVERMONT, idle_cpu_byt),
3205 + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, idle_cpu_tangier),
3206 ICPU(INTEL_FAM6_ATOM_AIRMONT, idle_cpu_cht),
3207 ICPU(INTEL_FAM6_IVYBRIDGE, idle_cpu_ivb),
3208 ICPU(INTEL_FAM6_IVYBRIDGE_X, idle_cpu_ivt),
3209 @@ -1091,7 +1091,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
3210 ICPU(INTEL_FAM6_HASWELL_X, idle_cpu_hsw),
3211 ICPU(INTEL_FAM6_HASWELL_ULT, idle_cpu_hsw),
3212 ICPU(INTEL_FAM6_HASWELL_GT3E, idle_cpu_hsw),
3213 - ICPU(INTEL_FAM6_ATOM_SILVERMONT2, idle_cpu_avn),
3214 + ICPU(INTEL_FAM6_ATOM_SILVERMONT_X, idle_cpu_avn),
3215 ICPU(INTEL_FAM6_BROADWELL_CORE, idle_cpu_bdw),
3216 ICPU(INTEL_FAM6_BROADWELL_GT3E, idle_cpu_bdw),
3217 ICPU(INTEL_FAM6_BROADWELL_X, idle_cpu_bdw),
3218 @@ -1104,8 +1104,8 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
3219 ICPU(INTEL_FAM6_XEON_PHI_KNL, idle_cpu_knl),
3220 ICPU(INTEL_FAM6_XEON_PHI_KNM, idle_cpu_knl),
3221 ICPU(INTEL_FAM6_ATOM_GOLDMONT, idle_cpu_bxt),
3222 - ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, idle_cpu_bxt),
3223 - ICPU(INTEL_FAM6_ATOM_DENVERTON, idle_cpu_dnv),
3224 + ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, idle_cpu_bxt),
3225 + ICPU(INTEL_FAM6_ATOM_GOLDMONT_X, idle_cpu_dnv),
3226 {}
3227 };
3228
3229 @@ -1322,7 +1322,7 @@ static void intel_idle_state_table_update(void)
3230 ivt_idle_state_table_update();
3231 break;
3232 case INTEL_FAM6_ATOM_GOLDMONT:
3233 - case INTEL_FAM6_ATOM_GEMINI_LAKE:
3234 + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3235 bxt_idle_state_table_update();
3236 break;
3237 case INTEL_FAM6_SKYLAKE_DESKTOP:
3238 diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
3239 index c61109f7b793..57c1ec322e42 100644
3240 --- a/drivers/mmc/host/sdhci-acpi.c
3241 +++ b/drivers/mmc/host/sdhci-acpi.c
3242 @@ -247,7 +247,7 @@ static const struct sdhci_acpi_chip sdhci_acpi_chip_int = {
3243 static bool sdhci_acpi_byt(void)
3244 {
3245 static const struct x86_cpu_id byt[] = {
3246 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
3247 + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT },
3248 {}
3249 };
3250
3251 diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c
3252 index 314e135014dc..30fbe2ea6eab 100644
3253 --- a/drivers/pci/pci-mid.c
3254 +++ b/drivers/pci/pci-mid.c
3255 @@ -62,8 +62,8 @@ static const struct pci_platform_pm_ops mid_pci_platform_pm = {
3256 * arch/x86/platform/intel-mid/pwr.c.
3257 */
3258 static const struct x86_cpu_id lpss_cpu_ids[] = {
3259 - ICPU(INTEL_FAM6_ATOM_PENWELL),
3260 - ICPU(INTEL_FAM6_ATOM_MERRIFIELD),
3261 + ICPU(INTEL_FAM6_ATOM_SALTWELL_MID),
3262 + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID),
3263 {}
3264 };
3265
3266 diff --git a/drivers/platform/x86/intel_int0002_vgpio.c b/drivers/platform/x86/intel_int0002_vgpio.c
3267 index a473dc51b18d..e89ad4964dc1 100644
3268 --- a/drivers/platform/x86/intel_int0002_vgpio.c
3269 +++ b/drivers/platform/x86/intel_int0002_vgpio.c
3270 @@ -60,7 +60,7 @@ static const struct x86_cpu_id int0002_cpu_ids[] = {
3271 /*
3272 * Limit ourselves to Cherry Trail for now, until testing shows we
3273 * need to handle the INT0002 device on Baytrail too.
3274 - * ICPU(INTEL_FAM6_ATOM_SILVERMONT1), * Valleyview, Bay Trail *
3275 + * ICPU(INTEL_FAM6_ATOM_SILVERMONT), * Valleyview, Bay Trail *
3276 */
3277 ICPU(INTEL_FAM6_ATOM_AIRMONT), /* Braswell, Cherry Trail */
3278 {}
3279 diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c
3280 index d79fbf924b13..5ad44204a9c3 100644
3281 --- a/drivers/platform/x86/intel_mid_powerbtn.c
3282 +++ b/drivers/platform/x86/intel_mid_powerbtn.c
3283 @@ -125,8 +125,8 @@ static const struct mid_pb_ddata mrfld_ddata = {
3284 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (kernel_ulong_t)&ddata }
3285
3286 static const struct x86_cpu_id mid_pb_cpu_ids[] = {
3287 - ICPU(INTEL_FAM6_ATOM_PENWELL, mfld_ddata),
3288 - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, mrfld_ddata),
3289 + ICPU(INTEL_FAM6_ATOM_SALTWELL_MID, mfld_ddata),
3290 + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, mrfld_ddata),
3291 {}
3292 };
3293
3294 diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c
3295 index 1423fa8710fd..b998d7da97fb 100644
3296 --- a/drivers/platform/x86/intel_telemetry_debugfs.c
3297 +++ b/drivers/platform/x86/intel_telemetry_debugfs.c
3298 @@ -320,7 +320,7 @@ static struct telemetry_debugfs_conf telem_apl_debugfs_conf = {
3299
3300 static const struct x86_cpu_id telemetry_debugfs_cpu_ids[] = {
3301 TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GOLDMONT, telem_apl_debugfs_conf),
3302 - TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE, telem_apl_debugfs_conf),
3303 + TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, telem_apl_debugfs_conf),
3304 {}
3305 };
3306
3307 diff --git a/drivers/platform/x86/intel_telemetry_pltdrv.c b/drivers/platform/x86/intel_telemetry_pltdrv.c
3308 index 2f889d6c270e..fcc6bee51a42 100644
3309 --- a/drivers/platform/x86/intel_telemetry_pltdrv.c
3310 +++ b/drivers/platform/x86/intel_telemetry_pltdrv.c
3311 @@ -192,7 +192,7 @@ static struct telemetry_plt_config telem_glk_config = {
3312
3313 static const struct x86_cpu_id telemetry_cpu_ids[] = {
3314 TELEM_CPU(INTEL_FAM6_ATOM_GOLDMONT, telem_apl_config),
3315 - TELEM_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE, telem_glk_config),
3316 + TELEM_CPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, telem_glk_config),
3317 {}
3318 };
3319
3320 diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
3321 index 295d8dcba48c..8cbfcce57a06 100644
3322 --- a/drivers/powercap/intel_rapl.c
3323 +++ b/drivers/powercap/intel_rapl.c
3324 @@ -1164,13 +1164,13 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
3325 RAPL_CPU(INTEL_FAM6_KABYLAKE_DESKTOP, rapl_defaults_core),
3326 RAPL_CPU(INTEL_FAM6_CANNONLAKE_MOBILE, rapl_defaults_core),
3327
3328 - RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT1, rapl_defaults_byt),
3329 + RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT, rapl_defaults_byt),
3330 RAPL_CPU(INTEL_FAM6_ATOM_AIRMONT, rapl_defaults_cht),
3331 - RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD, rapl_defaults_tng),
3332 - RAPL_CPU(INTEL_FAM6_ATOM_MOOREFIELD, rapl_defaults_ann),
3333 + RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT_MID, rapl_defaults_tng),
3334 + RAPL_CPU(INTEL_FAM6_ATOM_AIRMONT_MID, rapl_defaults_ann),
3335 RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT, rapl_defaults_core),
3336 - RAPL_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE, rapl_defaults_core),
3337 - RAPL_CPU(INTEL_FAM6_ATOM_DENVERTON, rapl_defaults_core),
3338 + RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, rapl_defaults_core),
3339 + RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT_X, rapl_defaults_core),
3340
3341 RAPL_CPU(INTEL_FAM6_XEON_PHI_KNL, rapl_defaults_hsw_server),
3342 RAPL_CPU(INTEL_FAM6_XEON_PHI_KNM, rapl_defaults_hsw_server),
3343 diff --git a/drivers/thermal/intel_soc_dts_thermal.c b/drivers/thermal/intel_soc_dts_thermal.c
3344 index 1e47511a6bd5..d748527d7a38 100644
3345 --- a/drivers/thermal/intel_soc_dts_thermal.c
3346 +++ b/drivers/thermal/intel_soc_dts_thermal.c
3347 @@ -45,7 +45,7 @@ static irqreturn_t soc_irq_thread_fn(int irq, void *dev_data)
3348 }
3349
3350 static const struct x86_cpu_id soc_thermal_ids[] = {
3351 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1, 0,
3352 + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT, 0,
3353 BYT_SOC_DTS_APIC_IRQ},
3354 {}
3355 };
3356 diff --git a/include/linux/cpu.h b/include/linux/cpu.h
3357 index 5041357d0297..57ae83c4d5f4 100644
3358 --- a/include/linux/cpu.h
3359 +++ b/include/linux/cpu.h
3360 @@ -57,6 +57,8 @@ extern ssize_t cpu_show_spec_store_bypass(struct device *dev,
3361 struct device_attribute *attr, char *buf);
3362 extern ssize_t cpu_show_l1tf(struct device *dev,
3363 struct device_attribute *attr, char *buf);
3364 +extern ssize_t cpu_show_mds(struct device *dev,
3365 + struct device_attribute *attr, char *buf);
3366
3367 extern __printf(4, 5)
3368 struct device *cpu_device_create(struct device *parent, void *drvdata,
3369 @@ -187,4 +189,28 @@ static inline void cpu_smt_disable(bool force) { }
3370 static inline void cpu_smt_check_topology(void) { }
3371 #endif
3372
3373 +/*
3374 + * These are used for a global "mitigations=" cmdline option for toggling
3375 + * optional CPU mitigations.
3376 + */
3377 +enum cpu_mitigations {
3378 + CPU_MITIGATIONS_OFF,
3379 + CPU_MITIGATIONS_AUTO,
3380 + CPU_MITIGATIONS_AUTO_NOSMT,
3381 +};
3382 +
3383 +extern enum cpu_mitigations cpu_mitigations;
3384 +
3385 +/* mitigations=off */
3386 +static inline bool cpu_mitigations_off(void)
3387 +{
3388 + return cpu_mitigations == CPU_MITIGATIONS_OFF;
3389 +}
3390 +
3391 +/* mitigations=auto,nosmt */
3392 +static inline bool cpu_mitigations_auto_nosmt(void)
3393 +{
3394 + return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
3395 +}
3396 +
3397 #endif /* _LINUX_CPU_H_ */
3398 diff --git a/kernel/cpu.c b/kernel/cpu.c
3399 index dc250ec2c096..bc6c880a093f 100644
3400 --- a/kernel/cpu.c
3401 +++ b/kernel/cpu.c
3402 @@ -2278,3 +2278,18 @@ void __init boot_cpu_hotplug_init(void)
3403 #endif
3404 this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
3405 }
3406 +
3407 +enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
3408 +
3409 +static int __init mitigations_parse_cmdline(char *arg)
3410 +{
3411 + if (!strcmp(arg, "off"))
3412 + cpu_mitigations = CPU_MITIGATIONS_OFF;
3413 + else if (!strcmp(arg, "auto"))
3414 + cpu_mitigations = CPU_MITIGATIONS_AUTO;
3415 + else if (!strcmp(arg, "auto,nosmt"))
3416 + cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
3417 +
3418 + return 0;
3419 +}
3420 +early_param("mitigations", mitigations_parse_cmdline);
3421 diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c
3422 index b74bbee111c6..c6c8d20be1d2 100644
3423 --- a/sound/soc/intel/boards/bytcr_rt5651.c
3424 +++ b/sound/soc/intel/boards/bytcr_rt5651.c
3425 @@ -787,7 +787,7 @@ static struct snd_soc_card byt_rt5651_card = {
3426 };
3427
3428 static const struct x86_cpu_id baytrail_cpu_ids[] = {
3429 - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, /* Valleyview */
3430 + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, /* Valleyview */
3431 {}
3432 };
3433
3434 diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
3435 index 2ab25aa38263..ff058bfbca3e 100644
3436 --- a/tools/power/x86/turbostat/Makefile
3437 +++ b/tools/power/x86/turbostat/Makefile
3438 @@ -9,7 +9,7 @@ ifeq ("$(origin O)", "command line")
3439 endif
3440
3441 turbostat : turbostat.c
3442 -CFLAGS += -Wall
3443 +CFLAGS += -Wall -I../../../include
3444 CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
3445 CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"'
3446
3447 diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
3448 index 83964f796edb..fbb53c952b73 100644
3449 --- a/tools/power/x86/turbostat/turbostat.c
3450 +++ b/tools/power/x86/turbostat/turbostat.c
3451 @@ -2082,7 +2082,7 @@ int has_turbo_ratio_group_limits(int family, int model)
3452 switch (model) {
3453 case INTEL_FAM6_ATOM_GOLDMONT:
3454 case INTEL_FAM6_SKYLAKE_X:
3455 - case INTEL_FAM6_ATOM_DENVERTON:
3456 + case INTEL_FAM6_ATOM_GOLDMONT_X:
3457 return 1;
3458 }
3459 return 0;
3460 @@ -3149,9 +3149,9 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
3461 pkg_cstate_limits = skx_pkg_cstate_limits;
3462 has_misc_feature_control = 1;
3463 break;
3464 - case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */
3465 + case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
3466 no_MSR_MISC_PWR_MGMT = 1;
3467 - case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */
3468 + case INTEL_FAM6_ATOM_SILVERMONT_X: /* AVN */
3469 pkg_cstate_limits = slv_pkg_cstate_limits;
3470 break;
3471 case INTEL_FAM6_ATOM_AIRMONT: /* AMT */
3472 @@ -3163,8 +3163,8 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
3473 pkg_cstate_limits = phi_pkg_cstate_limits;
3474 break;
3475 case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
3476 - case INTEL_FAM6_ATOM_GEMINI_LAKE:
3477 - case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3478 + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3479 + case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */
3480 pkg_cstate_limits = bxt_pkg_cstate_limits;
3481 break;
3482 default:
3483 @@ -3193,9 +3193,9 @@ int has_slv_msrs(unsigned int family, unsigned int model)
3484 return 0;
3485
3486 switch (model) {
3487 - case INTEL_FAM6_ATOM_SILVERMONT1:
3488 - case INTEL_FAM6_ATOM_MERRIFIELD:
3489 - case INTEL_FAM6_ATOM_MOOREFIELD:
3490 + case INTEL_FAM6_ATOM_SILVERMONT:
3491 + case INTEL_FAM6_ATOM_SILVERMONT_MID:
3492 + case INTEL_FAM6_ATOM_AIRMONT_MID:
3493 return 1;
3494 }
3495 return 0;
3496 @@ -3207,7 +3207,7 @@ int is_dnv(unsigned int family, unsigned int model)
3497 return 0;
3498
3499 switch (model) {
3500 - case INTEL_FAM6_ATOM_DENVERTON:
3501 + case INTEL_FAM6_ATOM_GOLDMONT_X:
3502 return 1;
3503 }
3504 return 0;
3505 @@ -3724,8 +3724,8 @@ double get_tdp(unsigned int model)
3506 return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
3507
3508 switch (model) {
3509 - case INTEL_FAM6_ATOM_SILVERMONT1:
3510 - case INTEL_FAM6_ATOM_SILVERMONT2:
3511 + case INTEL_FAM6_ATOM_SILVERMONT:
3512 + case INTEL_FAM6_ATOM_SILVERMONT_X:
3513 return 30.0;
3514 default:
3515 return 135.0;
3516 @@ -3791,7 +3791,7 @@ void rapl_probe(unsigned int family, unsigned int model)
3517 }
3518 break;
3519 case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
3520 - case INTEL_FAM6_ATOM_GEMINI_LAKE:
3521 + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3522 do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
3523 if (rapl_joules)
3524 BIC_PRESENT(BIC_Pkg_J);
3525 @@ -3850,8 +3850,8 @@ void rapl_probe(unsigned int family, unsigned int model)
3526 BIC_PRESENT(BIC_RAMWatt);
3527 }
3528 break;
3529 - case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */
3530 - case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */
3531 + case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
3532 + case INTEL_FAM6_ATOM_SILVERMONT_X: /* AVN */
3533 do_rapl = RAPL_PKG | RAPL_CORES;
3534 if (rapl_joules) {
3535 BIC_PRESENT(BIC_Pkg_J);
3536 @@ -3861,7 +3861,7 @@ void rapl_probe(unsigned int family, unsigned int model)
3537 BIC_PRESENT(BIC_CorWatt);
3538 }
3539 break;
3540 - case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3541 + case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */
3542 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
3543 BIC_PRESENT(BIC_PKG__);
3544 BIC_PRESENT(BIC_RAM__);
3545 @@ -3884,7 +3884,7 @@ void rapl_probe(unsigned int family, unsigned int model)
3546 return;
3547
3548 rapl_power_units = 1.0 / (1 << (msr & 0xF));
3549 - if (model == INTEL_FAM6_ATOM_SILVERMONT1)
3550 + if (model == INTEL_FAM6_ATOM_SILVERMONT)
3551 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
3552 else
3553 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
3554 @@ -4141,8 +4141,8 @@ int has_snb_msrs(unsigned int family, unsigned int model)
3555 case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
3556 case INTEL_FAM6_SKYLAKE_X: /* SKX */
3557 case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
3558 - case INTEL_FAM6_ATOM_GEMINI_LAKE:
3559 - case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3560 + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3561 + case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */
3562 return 1;
3563 }
3564 return 0;
3565 @@ -4174,7 +4174,7 @@ int has_hsw_msrs(unsigned int family, unsigned int model)
3566 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
3567 case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
3568 case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
3569 - case INTEL_FAM6_ATOM_GEMINI_LAKE:
3570 + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3571 return 1;
3572 }
3573 return 0;
3574 @@ -4209,8 +4209,8 @@ int is_slm(unsigned int family, unsigned int model)
3575 if (!genuine_intel)
3576 return 0;
3577 switch (model) {
3578 - case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */
3579 - case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */
3580 + case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
3581 + case INTEL_FAM6_ATOM_SILVERMONT_X: /* AVN */
3582 return 1;
3583 }
3584 return 0;
3585 @@ -4581,11 +4581,11 @@ void process_cpuid()
3586 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
3587 crystal_hz = 24000000; /* 24.0 MHz */
3588 break;
3589 - case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3590 + case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */
3591 crystal_hz = 25000000; /* 25.0 MHz */
3592 break;
3593 case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
3594 - case INTEL_FAM6_ATOM_GEMINI_LAKE:
3595 + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3596 crystal_hz = 19200000; /* 19.2 MHz */
3597 break;
3598 default:
3599 diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile
3600 index f4534fb8b951..da781b430937 100644
3601 --- a/tools/power/x86/x86_energy_perf_policy/Makefile
3602 +++ b/tools/power/x86/x86_energy_perf_policy/Makefile
3603 @@ -9,7 +9,7 @@ ifeq ("$(origin O)", "command line")
3604 endif
3605
3606 x86_energy_perf_policy : x86_energy_perf_policy.c
3607 -CFLAGS += -Wall
3608 +CFLAGS += -Wall -I../../../include
3609 CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
3610
3611 %: %.c