Magellan Linux

Annotation of /trunk/kernel-alx/patches-4.19/0142-4.19.43-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3421 - (hide annotations) (download)
Fri Aug 2 11:47:50 2019 UTC (4 years, 10 months ago) by niro
File size: 140299 byte(s)
-linux-4.19.43
1 niro 3421 diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
2     index 73318225a368..8718d4ad227b 100644
3     --- a/Documentation/ABI/testing/sysfs-devices-system-cpu
4     +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
5     @@ -477,6 +477,7 @@ What: /sys/devices/system/cpu/vulnerabilities
6     /sys/devices/system/cpu/vulnerabilities/spectre_v2
7     /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
8     /sys/devices/system/cpu/vulnerabilities/l1tf
9     + /sys/devices/system/cpu/vulnerabilities/mds
10     Date: January 2018
11     Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
12     Description: Information about CPU vulnerabilities
13     @@ -489,8 +490,7 @@ Description: Information about CPU vulnerabilities
14     "Vulnerable" CPU is affected and no mitigation in effect
15     "Mitigation: $M" CPU is affected and mitigation $M is in effect
16    
17     - Details about the l1tf file can be found in
18     - Documentation/admin-guide/l1tf.rst
19     + See also: Documentation/admin-guide/hw-vuln/index.rst
20    
21     What: /sys/devices/system/cpu/smt
22     /sys/devices/system/cpu/smt/active
23     diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
24     new file mode 100644
25     index 000000000000..ffc064c1ec68
26     --- /dev/null
27     +++ b/Documentation/admin-guide/hw-vuln/index.rst
28     @@ -0,0 +1,13 @@
29     +========================
30     +Hardware vulnerabilities
31     +========================
32     +
33     +This section describes CPU vulnerabilities and provides an overview of the
34     +possible mitigations along with guidance for selecting mitigations if they
35     +are configurable at compile, boot or run time.
36     +
37     +.. toctree::
38     + :maxdepth: 1
39     +
40     + l1tf
41     + mds
42     diff --git a/Documentation/admin-guide/hw-vuln/l1tf.rst b/Documentation/admin-guide/hw-vuln/l1tf.rst
43     new file mode 100644
44     index 000000000000..31653a9f0e1b
45     --- /dev/null
46     +++ b/Documentation/admin-guide/hw-vuln/l1tf.rst
47     @@ -0,0 +1,615 @@
48     +L1TF - L1 Terminal Fault
49     +========================
50     +
51     +L1 Terminal Fault is a hardware vulnerability which allows unprivileged
52     +speculative access to data which is available in the Level 1 Data Cache
53     +when the page table entry controlling the virtual address, which is used
54     +for the access, has the Present bit cleared or other reserved bits set.
55     +
56     +Affected processors
57     +-------------------
58     +
59     +This vulnerability affects a wide range of Intel processors. The
60     +vulnerability is not present on:
61     +
62     + - Processors from AMD, Centaur and other non Intel vendors
63     +
64     + - Older processor models, where the CPU family is < 6
65     +
66     + - A range of Intel ATOM processors (Cedarview, Cloverview, Lincroft,
67     + Penwell, Pineview, Silvermont, Airmont, Merrifield)
68     +
69     + - The Intel XEON PHI family
70     +
71     + - Intel processors which have the ARCH_CAP_RDCL_NO bit set in the
72     + IA32_ARCH_CAPABILITIES MSR. If the bit is set the CPU is not affected
73     + by the Meltdown vulnerability either. These CPUs should become
74     + available by end of 2018.
75     +
76     +Whether a processor is affected or not can be read out from the L1TF
77     +vulnerability file in sysfs. See :ref:`l1tf_sys_info`.
78     +
79     +Related CVEs
80     +------------
81     +
82     +The following CVE entries are related to the L1TF vulnerability:
83     +
84     + ============= ================= ==============================
85     + CVE-2018-3615 L1 Terminal Fault SGX related aspects
86     + CVE-2018-3620 L1 Terminal Fault OS, SMM related aspects
87     + CVE-2018-3646 L1 Terminal Fault Virtualization related aspects
88     + ============= ================= ==============================
89     +
90     +Problem
91     +-------
92     +
93     +If an instruction accesses a virtual address for which the relevant page
94     +table entry (PTE) has the Present bit cleared or other reserved bits set,
95     +then speculative execution ignores the invalid PTE and loads the referenced
96     +data if it is present in the Level 1 Data Cache, as if the page referenced
97     +by the address bits in the PTE was still present and accessible.
98     +
99     +While this is a purely speculative mechanism and the instruction will raise
100     +a page fault when it is retired eventually, the pure act of loading the
101     +data and making it available to other speculative instructions opens up the
102     +opportunity for side channel attacks to unprivileged malicious code,
103     +similar to the Meltdown attack.
104     +
105     +While Meltdown breaks the user space to kernel space protection, L1TF
106     +allows to attack any physical memory address in the system and the attack
107     +works across all protection domains. It allows an attack of SGX and also
108     +works from inside virtual machines because the speculation bypasses the
109     +extended page table (EPT) protection mechanism.
110     +
111     +
112     +Attack scenarios
113     +----------------
114     +
115     +1. Malicious user space
116     +^^^^^^^^^^^^^^^^^^^^^^^
117     +
118     + Operating Systems store arbitrary information in the address bits of a
119     + PTE which is marked non present. This allows a malicious user space
120     + application to attack the physical memory to which these PTEs resolve.
121     + In some cases user-space can maliciously influence the information
122     + encoded in the address bits of the PTE, thus making attacks more
123     + deterministic and more practical.
124     +
125     + The Linux kernel contains a mitigation for this attack vector, PTE
126     + inversion, which is permanently enabled and has no performance
127     + impact. The kernel ensures that the address bits of PTEs, which are not
128     + marked present, never point to cacheable physical memory space.
129     +
130     + A system with an up to date kernel is protected against attacks from
131     + malicious user space applications.
132     +
133     +2. Malicious guest in a virtual machine
134     +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
135     +
136     + The fact that L1TF breaks all domain protections allows malicious guest
137     + OSes, which can control the PTEs directly, and malicious guest user
138     + space applications, which run on an unprotected guest kernel lacking the
139     + PTE inversion mitigation for L1TF, to attack physical host memory.
140     +
141     + A special aspect of L1TF in the context of virtualization is symmetric
142     + multi threading (SMT). The Intel implementation of SMT is called
143     + HyperThreading. The fact that Hyperthreads on the affected processors
144     + share the L1 Data Cache (L1D) is important for this. As the flaw allows
145     + only to attack data which is present in L1D, a malicious guest running
146     + on one Hyperthread can attack the data which is brought into the L1D by
147     + the context which runs on the sibling Hyperthread of the same physical
148     + core. This context can be host OS, host user space or a different guest.
149     +
150     + If the processor does not support Extended Page Tables, the attack is
151     + only possible, when the hypervisor does not sanitize the content of the
152     + effective (shadow) page tables.
153     +
154     + While solutions exist to mitigate these attack vectors fully, these
155     + mitigations are not enabled by default in the Linux kernel because they
156     + can affect performance significantly. The kernel provides several
157     + mechanisms which can be utilized to address the problem depending on the
158     + deployment scenario. The mitigations, their protection scope and impact
159     + are described in the next sections.
160     +
161     + The default mitigations and the rationale for choosing them are explained
162     + at the end of this document. See :ref:`default_mitigations`.
163     +
164     +.. _l1tf_sys_info:
165     +
166     +L1TF system information
167     +-----------------------
168     +
169     +The Linux kernel provides a sysfs interface to enumerate the current L1TF
170     +status of the system: whether the system is vulnerable, and which
171     +mitigations are active. The relevant sysfs file is:
172     +
173     +/sys/devices/system/cpu/vulnerabilities/l1tf
174     +
175     +The possible values in this file are:
176     +
177     + =========================== ===============================
178     + 'Not affected' The processor is not vulnerable
179     + 'Mitigation: PTE Inversion' The host protection is active
180     + =========================== ===============================
181     +
182     +If KVM/VMX is enabled and the processor is vulnerable then the following
183     +information is appended to the 'Mitigation: PTE Inversion' part:
184     +
185     + - SMT status:
186     +
187     + ===================== ================
188     + 'VMX: SMT vulnerable' SMT is enabled
189     + 'VMX: SMT disabled' SMT is disabled
190     + ===================== ================
191     +
192     + - L1D Flush mode:
193     +
194     + ================================ ====================================
195     + 'L1D vulnerable' L1D flushing is disabled
196     +
197     + 'L1D conditional cache flushes' L1D flush is conditionally enabled
198     +
199     + 'L1D cache flushes' L1D flush is unconditionally enabled
200     + ================================ ====================================
201     +
202     +The resulting grade of protection is discussed in the following sections.
203     +
204     +
205     +Host mitigation mechanism
206     +-------------------------
207     +
208     +The kernel is unconditionally protected against L1TF attacks from malicious
209     +user space running on the host.
210     +
211     +
212     +Guest mitigation mechanisms
213     +---------------------------
214     +
215     +.. _l1d_flush:
216     +
217     +1. L1D flush on VMENTER
218     +^^^^^^^^^^^^^^^^^^^^^^^
219     +
220     + To make sure that a guest cannot attack data which is present in the L1D
221     + the hypervisor flushes the L1D before entering the guest.
222     +
223     + Flushing the L1D evicts not only the data which should not be accessed
224     + by a potentially malicious guest, it also flushes the guest
225     + data. Flushing the L1D has a performance impact as the processor has to
226     + bring the flushed guest data back into the L1D. Depending on the
227     + frequency of VMEXIT/VMENTER and the type of computations in the guest
228     + performance degradation in the range of 1% to 50% has been observed. For
229     + scenarios where guest VMEXIT/VMENTER are rare the performance impact is
230     + minimal. Virtio and mechanisms like posted interrupts are designed to
231     + confine the VMEXITs to a bare minimum, but specific configurations and
232     + application scenarios might still suffer from a high VMEXIT rate.
233     +
234     + The kernel provides two L1D flush modes:
235     + - conditional ('cond')
236     + - unconditional ('always')
237     +
238     + The conditional mode avoids L1D flushing after VMEXITs which execute
239     + only audited code paths before the corresponding VMENTER. These code
240     + paths have been verified that they cannot expose secrets or other
241     + interesting data to an attacker, but they can leak information about the
242     + address space layout of the hypervisor.
243     +
244     + Unconditional mode flushes L1D on all VMENTER invocations and provides
245     + maximum protection. It has a higher overhead than the conditional
246     + mode. The overhead cannot be quantified correctly as it depends on the
247     + workload scenario and the resulting number of VMEXITs.
248     +
249     + The general recommendation is to enable L1D flush on VMENTER. The kernel
250     + defaults to conditional mode on affected processors.
251     +
252     + **Note**, that L1D flush does not prevent the SMT problem because the
253     + sibling thread will also bring back its data into the L1D which makes it
254     + attackable again.
255     +
256     + L1D flush can be controlled by the administrator via the kernel command
257     + line and sysfs control files. See :ref:`mitigation_control_command_line`
258     + and :ref:`mitigation_control_kvm`.
259     +
260     +.. _guest_confinement:
261     +
262     +2. Guest VCPU confinement to dedicated physical cores
263     +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
264     +
265     + To address the SMT problem, it is possible to make a guest or a group of
266     + guests affine to one or more physical cores. The proper mechanism for
267     + that is to utilize exclusive cpusets to ensure that no other guest or
268     + host tasks can run on these cores.
269     +
270     + If only a single guest or related guests run on sibling SMT threads on
271     + the same physical core then they can only attack their own memory and
272     + restricted parts of the host memory.
273     +
274     + Host memory is attackable, when one of the sibling SMT threads runs in
275     + host OS (hypervisor) context and the other in guest context. The amount
276     + of valuable information from the host OS context depends on the context
277     + which the host OS executes, i.e. interrupts, soft interrupts and kernel
278     + threads. The amount of valuable data from these contexts cannot be
279     + declared as non-interesting for an attacker without deep inspection of
280     + the code.
281     +
282     + **Note**, that assigning guests to a fixed set of physical cores affects
283     + the ability of the scheduler to do load balancing and might have
284     + negative effects on CPU utilization depending on the hosting
285     + scenario. Disabling SMT might be a viable alternative for particular
286     + scenarios.
287     +
288     + For further information about confining guests to a single or to a group
289     + of cores consult the cpusets documentation:
290     +
291     + https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt
292     +
293     +.. _interrupt_isolation:
294     +
295     +3. Interrupt affinity
296     +^^^^^^^^^^^^^^^^^^^^^
297     +
298     + Interrupts can be made affine to logical CPUs. This is not universally
299     + true because there are types of interrupts which are truly per CPU
300     + interrupts, e.g. the local timer interrupt. Aside of that multi queue
301     + devices affine their interrupts to single CPUs or groups of CPUs per
302     + queue without allowing the administrator to control the affinities.
303     +
304     + Moving the interrupts, which can be affinity controlled, away from CPUs
305     + which run untrusted guests, reduces the attack vector space.
306     +
307     + Whether the interrupts with are affine to CPUs, which run untrusted
308     + guests, provide interesting data for an attacker depends on the system
309     + configuration and the scenarios which run on the system. While for some
310     + of the interrupts it can be assumed that they won't expose interesting
311     + information beyond exposing hints about the host OS memory layout, there
312     + is no way to make general assumptions.
313     +
314     + Interrupt affinity can be controlled by the administrator via the
315     + /proc/irq/$NR/smp_affinity[_list] files. Limited documentation is
316     + available at:
317     +
318     + https://www.kernel.org/doc/Documentation/IRQ-affinity.txt
319     +
320     +.. _smt_control:
321     +
322     +4. SMT control
323     +^^^^^^^^^^^^^^
324     +
325     + To prevent the SMT issues of L1TF it might be necessary to disable SMT
326     + completely. Disabling SMT can have a significant performance impact, but
327     + the impact depends on the hosting scenario and the type of workloads.
328     + The impact of disabling SMT needs also to be weighted against the impact
329     + of other mitigation solutions like confining guests to dedicated cores.
330     +
331     + The kernel provides a sysfs interface to retrieve the status of SMT and
332     + to control it. It also provides a kernel command line interface to
333     + control SMT.
334     +
335     + The kernel command line interface consists of the following options:
336     +
337     + =========== ==========================================================
338     + nosmt Affects the bring up of the secondary CPUs during boot. The
339     + kernel tries to bring all present CPUs online during the
340     + boot process. "nosmt" makes sure that from each physical
341     + core only one - the so called primary (hyper) thread is
342     + activated. Due to a design flaw of Intel processors related
343     + to Machine Check Exceptions the non primary siblings have
344     + to be brought up at least partially and are then shut down
345     + again. "nosmt" can be undone via the sysfs interface.
346     +
347     + nosmt=force Has the same effect as "nosmt" but it does not allow to
348     + undo the SMT disable via the sysfs interface.
349     + =========== ==========================================================
350     +
351     + The sysfs interface provides two files:
352     +
353     + - /sys/devices/system/cpu/smt/control
354     + - /sys/devices/system/cpu/smt/active
355     +
356     + /sys/devices/system/cpu/smt/control:
357     +
358     + This file allows to read out the SMT control state and provides the
359     + ability to disable or (re)enable SMT. The possible states are:
360     +
361     + ============== ===================================================
362     + on SMT is supported by the CPU and enabled. All
363     + logical CPUs can be onlined and offlined without
364     + restrictions.
365     +
366     + off SMT is supported by the CPU and disabled. Only
367     + the so called primary SMT threads can be onlined
368     + and offlined without restrictions. An attempt to
369     + online a non-primary sibling is rejected
370     +
371     + forceoff Same as 'off' but the state cannot be controlled.
372     + Attempts to write to the control file are rejected.
373     +
374     + notsupported The processor does not support SMT. It's therefore
375     + not affected by the SMT implications of L1TF.
376     + Attempts to write to the control file are rejected.
377     + ============== ===================================================
378     +
379     + The possible states which can be written into this file to control SMT
380     + state are:
381     +
382     + - on
383     + - off
384     + - forceoff
385     +
386     + /sys/devices/system/cpu/smt/active:
387     +
388     + This file reports whether SMT is enabled and active, i.e. if on any
389     + physical core two or more sibling threads are online.
390     +
391     + SMT control is also possible at boot time via the l1tf kernel command
392     + line parameter in combination with L1D flush control. See
393     + :ref:`mitigation_control_command_line`.
394     +
395     +5. Disabling EPT
396     +^^^^^^^^^^^^^^^^
397     +
398     + Disabling EPT for virtual machines provides full mitigation for L1TF even
399     + with SMT enabled, because the effective page tables for guests are
400     + managed and sanitized by the hypervisor. Though disabling EPT has a
401     + significant performance impact especially when the Meltdown mitigation
402     + KPTI is enabled.
403     +
404     + EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter.
405     +
406     +There is ongoing research and development for new mitigation mechanisms to
407     +address the performance impact of disabling SMT or EPT.
408     +
409     +.. _mitigation_control_command_line:
410     +
411     +Mitigation control on the kernel command line
412     +---------------------------------------------
413     +
414     +The kernel command line allows to control the L1TF mitigations at boot
415     +time with the option "l1tf=". The valid arguments for this option are:
416     +
417     + ============ =============================================================
418     + full Provides all available mitigations for the L1TF
419     + vulnerability. Disables SMT and enables all mitigations in
420     + the hypervisors, i.e. unconditional L1D flushing
421     +
422     + SMT control and L1D flush control via the sysfs interface
423     + is still possible after boot. Hypervisors will issue a
424     + warning when the first VM is started in a potentially
425     + insecure configuration, i.e. SMT enabled or L1D flush
426     + disabled.
427     +
428     + full,force Same as 'full', but disables SMT and L1D flush runtime
429     + control. Implies the 'nosmt=force' command line option.
430     + (i.e. sysfs control of SMT is disabled.)
431     +
432     + flush Leaves SMT enabled and enables the default hypervisor
433     + mitigation, i.e. conditional L1D flushing
434     +
435     + SMT control and L1D flush control via the sysfs interface
436     + is still possible after boot. Hypervisors will issue a
437     + warning when the first VM is started in a potentially
438     + insecure configuration, i.e. SMT enabled or L1D flush
439     + disabled.
440     +
441     + flush,nosmt Disables SMT and enables the default hypervisor mitigation,
442     + i.e. conditional L1D flushing.
443     +
444     + SMT control and L1D flush control via the sysfs interface
445     + is still possible after boot. Hypervisors will issue a
446     + warning when the first VM is started in a potentially
447     + insecure configuration, i.e. SMT enabled or L1D flush
448     + disabled.
449     +
450     + flush,nowarn Same as 'flush', but hypervisors will not warn when a VM is
451     + started in a potentially insecure configuration.
452     +
453     + off Disables hypervisor mitigations and doesn't emit any
454     + warnings.
455     + It also drops the swap size and available RAM limit restrictions
456     + on both hypervisor and bare metal.
457     +
458     + ============ =============================================================
459     +
460     +The default is 'flush'. For details about L1D flushing see :ref:`l1d_flush`.
461     +
462     +
463     +.. _mitigation_control_kvm:
464     +
465     +Mitigation control for KVM - module parameter
466     +-------------------------------------------------------------
467     +
468     +The KVM hypervisor mitigation mechanism, flushing the L1D cache when
469     +entering a guest, can be controlled with a module parameter.
470     +
471     +The option/parameter is "kvm-intel.vmentry_l1d_flush=". It takes the
472     +following arguments:
473     +
474     + ============ ==============================================================
475     + always L1D cache flush on every VMENTER.
476     +
477     + cond Flush L1D on VMENTER only when the code between VMEXIT and
478     + VMENTER can leak host memory which is considered
479     + interesting for an attacker. This still can leak host memory
480     + which allows e.g. to determine the hosts address space layout.
481     +
482     + never Disables the mitigation
483     + ============ ==============================================================
484     +
485     +The parameter can be provided on the kernel command line, as a module
486     +parameter when loading the modules and at runtime modified via the sysfs
487     +file:
488     +
489     +/sys/module/kvm_intel/parameters/vmentry_l1d_flush
490     +
491     +The default is 'cond'. If 'l1tf=full,force' is given on the kernel command
492     +line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush
493     +module parameter is ignored and writes to the sysfs file are rejected.
494     +
495     +.. _mitigation_selection:
496     +
497     +Mitigation selection guide
498     +--------------------------
499     +
500     +1. No virtualization in use
501     +^^^^^^^^^^^^^^^^^^^^^^^^^^^
502     +
503     + The system is protected by the kernel unconditionally and no further
504     + action is required.
505     +
506     +2. Virtualization with trusted guests
507     +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
508     +
509     + If the guest comes from a trusted source and the guest OS kernel is
510     + guaranteed to have the L1TF mitigations in place the system is fully
511     + protected against L1TF and no further action is required.
512     +
513     + To avoid the overhead of the default L1D flushing on VMENTER the
514     + administrator can disable the flushing via the kernel command line and
515     + sysfs control files. See :ref:`mitigation_control_command_line` and
516     + :ref:`mitigation_control_kvm`.
517     +
518     +
519     +3. Virtualization with untrusted guests
520     +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
521     +
522     +3.1. SMT not supported or disabled
523     +""""""""""""""""""""""""""""""""""
524     +
525     + If SMT is not supported by the processor or disabled in the BIOS or by
526     + the kernel, it's only required to enforce L1D flushing on VMENTER.
527     +
528     + Conditional L1D flushing is the default behaviour and can be tuned. See
529     + :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`.
530     +
531     +3.2. EPT not supported or disabled
532     +""""""""""""""""""""""""""""""""""
533     +
534     + If EPT is not supported by the processor or disabled in the hypervisor,
535     + the system is fully protected. SMT can stay enabled and L1D flushing on
536     + VMENTER is not required.
537     +
538     + EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter.
539     +
540     +3.3. SMT and EPT supported and active
541     +"""""""""""""""""""""""""""""""""""""
542     +
543     + If SMT and EPT are supported and active then various degrees of
544     + mitigations can be employed:
545     +
546     + - L1D flushing on VMENTER:
547     +
548     + L1D flushing on VMENTER is the minimal protection requirement, but it
549     + is only potent in combination with other mitigation methods.
550     +
551     + Conditional L1D flushing is the default behaviour and can be tuned. See
552     + :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`.
553     +
554     + - Guest confinement:
555     +
556     + Confinement of guests to a single or a group of physical cores which
557     + are not running any other processes, can reduce the attack surface
558     + significantly, but interrupts, soft interrupts and kernel threads can
559     + still expose valuable data to a potential attacker. See
560     + :ref:`guest_confinement`.
561     +
562     + - Interrupt isolation:
563     +
564     + Isolating the guest CPUs from interrupts can reduce the attack surface
565     + further, but still allows a malicious guest to explore a limited amount
566     + of host physical memory. This can at least be used to gain knowledge
567     + about the host address space layout. The interrupts which have a fixed
568     + affinity to the CPUs which run the untrusted guests can depending on
569     + the scenario still trigger soft interrupts and schedule kernel threads
570     + which might expose valuable information. See
571     + :ref:`interrupt_isolation`.
572     +
573     +The above three mitigation methods combined can provide protection to a
574     +certain degree, but the risk of the remaining attack surface has to be
575     +carefully analyzed. For full protection the following methods are
576     +available:
577     +
578     + - Disabling SMT:
579     +
580     + Disabling SMT and enforcing the L1D flushing provides the maximum
581     + amount of protection. This mitigation is not depending on any of the
582     + above mitigation methods.
583     +
584     + SMT control and L1D flushing can be tuned by the command line
585     + parameters 'nosmt', 'l1tf', 'kvm-intel.vmentry_l1d_flush' and at run
586     + time with the matching sysfs control files. See :ref:`smt_control`,
587     + :ref:`mitigation_control_command_line` and
588     + :ref:`mitigation_control_kvm`.
589     +
590     + - Disabling EPT:
591     +
592     + Disabling EPT provides the maximum amount of protection as well. It is
593     + not depending on any of the above mitigation methods. SMT can stay
594     + enabled and L1D flushing is not required, but the performance impact is
595     + significant.
596     +
597     + EPT can be disabled in the hypervisor via the 'kvm-intel.ept'
598     + parameter.
599     +
600     +3.4. Nested virtual machines
601     +""""""""""""""""""""""""""""
602     +
603     +When nested virtualization is in use, three operating systems are involved:
604     +the bare metal hypervisor, the nested hypervisor and the nested virtual
605     +machine. VMENTER operations from the nested hypervisor into the nested
606     +guest will always be processed by the bare metal hypervisor. If KVM is the
607     +bare metal hypervisor it will:
608     +
609     + - Flush the L1D cache on every switch from the nested hypervisor to the
610     + nested virtual machine, so that the nested hypervisor's secrets are not
611     + exposed to the nested virtual machine;
612     +
613     + - Flush the L1D cache on every switch from the nested virtual machine to
614     + the nested hypervisor; this is a complex operation, and flushing the L1D
615     + cache avoids that the bare metal hypervisor's secrets are exposed to the
616     + nested virtual machine;
617     +
618     + - Instruct the nested hypervisor to not perform any L1D cache flush. This
619     + is an optimization to avoid double L1D flushing.
620     +
621     +
622     +.. _default_mitigations:
623     +
624     +Default mitigations
625     +-------------------
626     +
627     + The kernel default mitigations for vulnerable processors are:
628     +
629     + - PTE inversion to protect against malicious user space. This is done
630     + unconditionally and cannot be controlled. The swap storage is limited
631     + to ~16TB.
632     +
633     + - L1D conditional flushing on VMENTER when EPT is enabled for
634     + a guest.
635     +
636     + The kernel does not by default enforce the disabling of SMT, which leaves
637     + SMT systems vulnerable when running untrusted guests with EPT enabled.
638     +
639     + The rationale for this choice is:
640     +
641     + - Force disabling SMT can break existing setups, especially with
642     + unattended updates.
643     +
644     + - If regular users run untrusted guests on their machine, then L1TF is
645     + just an add on to other malware which might be embedded in an untrusted
646     + guest, e.g. spam-bots or attacks on the local network.
647     +
648     + There is no technical way to prevent a user from running untrusted code
649     + on their machines blindly.
650     +
651     + - It's technically extremely unlikely and from today's knowledge even
652     + impossible that L1TF can be exploited via the most popular attack
653     + mechanisms like JavaScript because these mechanisms have no way to
654     + control PTEs. If this would be possible and not other mitigation would
655     + be possible, then the default might be different.
656     +
657     + - The administrators of cloud and hosting setups have to carefully
658     + analyze the risk for their scenarios and make the appropriate
659     + mitigation choices, which might even vary across their deployed
660     + machines and also result in other changes of their overall setup.
661     + There is no way for the kernel to provide a sensible default for this
662     + kind of scenarios.
663     diff --git a/Documentation/admin-guide/hw-vuln/mds.rst b/Documentation/admin-guide/hw-vuln/mds.rst
664     new file mode 100644
665     index 000000000000..e3a796c0d3a2
666     --- /dev/null
667     +++ b/Documentation/admin-guide/hw-vuln/mds.rst
668     @@ -0,0 +1,308 @@
669     +MDS - Microarchitectural Data Sampling
670     +======================================
671     +
672     +Microarchitectural Data Sampling is a hardware vulnerability which allows
673     +unprivileged speculative access to data which is available in various CPU
674     +internal buffers.
675     +
676     +Affected processors
677     +-------------------
678     +
679     +This vulnerability affects a wide range of Intel processors. The
680     +vulnerability is not present on:
681     +
682     + - Processors from AMD, Centaur and other non Intel vendors
683     +
684     + - Older processor models, where the CPU family is < 6
685     +
686     + - Some Atoms (Bonnell, Saltwell, Goldmont, GoldmontPlus)
687     +
688     + - Intel processors which have the ARCH_CAP_MDS_NO bit set in the
689     + IA32_ARCH_CAPABILITIES MSR.
690     +
691     +Whether a processor is affected or not can be read out from the MDS
692     +vulnerability file in sysfs. See :ref:`mds_sys_info`.
693     +
694     +Not all processors are affected by all variants of MDS, but the mitigation
695     +is identical for all of them so the kernel treats them as a single
696     +vulnerability.
697     +
698     +Related CVEs
699     +------------
700     +
701     +The following CVE entries are related to the MDS vulnerability:
702     +
703     + ============== ===== ===================================================
704     + CVE-2018-12126 MSBDS Microarchitectural Store Buffer Data Sampling
705     + CVE-2018-12130 MFBDS Microarchitectural Fill Buffer Data Sampling
706     + CVE-2018-12127 MLPDS Microarchitectural Load Port Data Sampling
707     + CVE-2019-11091 MDSUM Microarchitectural Data Sampling Uncacheable Memory
708     + ============== ===== ===================================================
709     +
710     +Problem
711     +-------
712     +
713     +When performing store, load, L1 refill operations, processors write data
714     +into temporary microarchitectural structures (buffers). The data in the
715     +buffer can be forwarded to load operations as an optimization.
716     +
717     +Under certain conditions, usually a fault/assist caused by a load
718     +operation, data unrelated to the load memory address can be speculatively
719     +forwarded from the buffers. Because the load operation causes a fault or
720     +assist and its result will be discarded, the forwarded data will not cause
721     +incorrect program execution or state changes. But a malicious operation
722     +may be able to forward this speculative data to a disclosure gadget which
723     +allows in turn to infer the value via a cache side channel attack.
724     +
725     +Because the buffers are potentially shared between Hyper-Threads cross
726     +Hyper-Thread attacks are possible.
727     +
728     +Deeper technical information is available in the MDS specific x86
729     +architecture section: :ref:`Documentation/x86/mds.rst <mds>`.
730     +
731     +
732     +Attack scenarios
733     +----------------
734     +
735     +Attacks against the MDS vulnerabilities can be mounted from malicious non
736     +priviledged user space applications running on hosts or guest. Malicious
737     +guest OSes can obviously mount attacks as well.
738     +
739     +Contrary to other speculation based vulnerabilities the MDS vulnerability
740     +does not allow the attacker to control the memory target address. As a
741     +consequence the attacks are purely sampling based, but as demonstrated with
742     +the TLBleed attack samples can be postprocessed successfully.
743     +
744     +Web-Browsers
745     +^^^^^^^^^^^^
746     +
747     + It's unclear whether attacks through Web-Browsers are possible at
748     + all. The exploitation through Java-Script is considered very unlikely,
749     + but other widely used web technologies like Webassembly could possibly be
750     + abused.
751     +
752     +
753     +.. _mds_sys_info:
754     +
755     +MDS system information
756     +-----------------------
757     +
758     +The Linux kernel provides a sysfs interface to enumerate the current MDS
759     +status of the system: whether the system is vulnerable, and which
760     +mitigations are active. The relevant sysfs file is:
761     +
762     +/sys/devices/system/cpu/vulnerabilities/mds
763     +
764     +The possible values in this file are:
765     +
766     + .. list-table::
767     +
768     + * - 'Not affected'
769     + - The processor is not vulnerable
770     + * - 'Vulnerable'
771     + - The processor is vulnerable, but no mitigation enabled
772     + * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
773     + - The processor is vulnerable but microcode is not updated.
774     +
775     + The mitigation is enabled on a best effort basis. See :ref:`vmwerv`
776     + * - 'Mitigation: Clear CPU buffers'
777     + - The processor is vulnerable and the CPU buffer clearing mitigation is
778     + enabled.
779     +
780     +If the processor is vulnerable then the following information is appended
781     +to the above information:
782     +
783     + ======================== ============================================
784     + 'SMT vulnerable' SMT is enabled
785     + 'SMT mitigated' SMT is enabled and mitigated
786     + 'SMT disabled' SMT is disabled
787     + 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown
788     + ======================== ============================================
789     +
790     +.. _vmwerv:
791     +
792     +Best effort mitigation mode
793     +^^^^^^^^^^^^^^^^^^^^^^^^^^^
794     +
795     + If the processor is vulnerable, but the availability of the microcode based
796     + mitigation mechanism is not advertised via CPUID the kernel selects a best
797     + effort mitigation mode. This mode invokes the mitigation instructions
798     + without a guarantee that they clear the CPU buffers.
799     +
800     + This is done to address virtualization scenarios where the host has the
801     + microcode update applied, but the hypervisor is not yet updated to expose
802     + the CPUID to the guest. If the host has updated microcode the protection
803     + takes effect otherwise a few cpu cycles are wasted pointlessly.
804     +
805     + The state in the mds sysfs file reflects this situation accordingly.
806     +
807     +
808     +Mitigation mechanism
809     +-------------------------
810     +
811     +The kernel detects the affected CPUs and the presence of the microcode
812     +which is required.
813     +
814     +If a CPU is affected and the microcode is available, then the kernel
815     +enables the mitigation by default. The mitigation can be controlled at boot
816     +time via a kernel command line option. See
817     +:ref:`mds_mitigation_control_command_line`.
818     +
819     +.. _cpu_buffer_clear:
820     +
821     +CPU buffer clearing
822     +^^^^^^^^^^^^^^^^^^^
823     +
824     + The mitigation for MDS clears the affected CPU buffers on return to user
825     + space and when entering a guest.
826     +
827     + If SMT is enabled it also clears the buffers on idle entry when the CPU
828     + is only affected by MSBDS and not any other MDS variant, because the
829     + other variants cannot be protected against cross Hyper-Thread attacks.
830     +
831     + For CPUs which are only affected by MSBDS the user space, guest and idle
832     + transition mitigations are sufficient and SMT is not affected.
833     +
834     +.. _virt_mechanism:
835     +
836     +Virtualization mitigation
837     +^^^^^^^^^^^^^^^^^^^^^^^^^
838     +
839     + The protection for host to guest transition depends on the L1TF
840     + vulnerability of the CPU:
841     +
842     + - CPU is affected by L1TF:
843     +
844     + If the L1D flush mitigation is enabled and up to date microcode is
845     + available, the L1D flush mitigation is automatically protecting the
846     + guest transition.
847     +
848     + If the L1D flush mitigation is disabled then the MDS mitigation is
849     + invoked explicit when the host MDS mitigation is enabled.
850     +
851     + For details on L1TF and virtualization see:
852     + :ref:`Documentation/admin-guide/hw-vuln//l1tf.rst <mitigation_control_kvm>`.
853     +
854     + - CPU is not affected by L1TF:
855     +
856     + CPU buffers are flushed before entering the guest when the host MDS
857     + mitigation is enabled.
858     +
859     + The resulting MDS protection matrix for the host to guest transition:
860     +
861     + ============ ===== ============= ============ =================
862     + L1TF MDS VMX-L1FLUSH Host MDS MDS-State
863     +
864     + Don't care No Don't care N/A Not affected
865     +
866     + Yes Yes Disabled Off Vulnerable
867     +
868     + Yes Yes Disabled Full Mitigated
869     +
870     + Yes Yes Enabled Don't care Mitigated
871     +
872     + No Yes N/A Off Vulnerable
873     +
874     + No Yes N/A Full Mitigated
875     + ============ ===== ============= ============ =================
876     +
877     + This only covers the host to guest transition, i.e. prevents leakage from
878     + host to guest, but does not protect the guest internally. Guests need to
879     + have their own protections.
880     +
881     +.. _xeon_phi:
882     +
883     +XEON PHI specific considerations
884     +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
885     +
886     + The XEON PHI processor family is affected by MSBDS which can be exploited
887     + cross Hyper-Threads when entering idle states. Some XEON PHI variants allow
888     + to use MWAIT in user space (Ring 3) which opens an potential attack vector
889     + for malicious user space. The exposure can be disabled on the kernel
890     + command line with the 'ring3mwait=disable' command line option.
891     +
892     + XEON PHI is not affected by the other MDS variants and MSBDS is mitigated
893     + before the CPU enters a idle state. As XEON PHI is not affected by L1TF
894     + either disabling SMT is not required for full protection.
895     +
896     +.. _mds_smt_control:
897     +
898     +SMT control
899     +^^^^^^^^^^^
900     +
901     + All MDS variants except MSBDS can be attacked cross Hyper-Threads. That
902     + means on CPUs which are affected by MFBDS or MLPDS it is necessary to
903     + disable SMT for full protection. These are most of the affected CPUs; the
904     + exception is XEON PHI, see :ref:`xeon_phi`.
905     +
906     + Disabling SMT can have a significant performance impact, but the impact
907     + depends on the type of workloads.
908     +
909     + See the relevant chapter in the L1TF mitigation documentation for details:
910     + :ref:`Documentation/admin-guide/hw-vuln/l1tf.rst <smt_control>`.
911     +
912     +
913     +.. _mds_mitigation_control_command_line:
914     +
915     +Mitigation control on the kernel command line
916     +---------------------------------------------
917     +
918     +The kernel command line allows to control the MDS mitigations at boot
919     +time with the option "mds=". The valid arguments for this option are:
920     +
921     + ============ =============================================================
922     + full If the CPU is vulnerable, enable all available mitigations
923     + for the MDS vulnerability, CPU buffer clearing on exit to
924     + userspace and when entering a VM. Idle transitions are
925     + protected as well if SMT is enabled.
926     +
927     + It does not automatically disable SMT.
928     +
929     + full,nosmt The same as mds=full, with SMT disabled on vulnerable
930     + CPUs. This is the complete mitigation.
931     +
932     + off Disables MDS mitigations completely.
933     +
934     + ============ =============================================================
935     +
936     +Not specifying this option is equivalent to "mds=full".
937     +
938     +
939     +Mitigation selection guide
940     +--------------------------
941     +
942     +1. Trusted userspace
943     +^^^^^^^^^^^^^^^^^^^^
944     +
945     + If all userspace applications are from a trusted source and do not
946     + execute untrusted code which is supplied externally, then the mitigation
947     + can be disabled.
948     +
949     +
950     +2. Virtualization with trusted guests
951     +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
952     +
953     + The same considerations as above versus trusted user space apply.
954     +
955     +3. Virtualization with untrusted guests
956     +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
957     +
958     + The protection depends on the state of the L1TF mitigations.
959     + See :ref:`virt_mechanism`.
960     +
961     + If the MDS mitigation is enabled and SMT is disabled, guest to host and
962     + guest to guest attacks are prevented.
963     +
964     +.. _mds_default_mitigations:
965     +
966     +Default mitigations
967     +-------------------
968     +
969     + The kernel default mitigations for vulnerable processors are:
970     +
971     + - Enable CPU buffer clearing
972     +
973     + The kernel does not by default enforce the disabling of SMT, which leaves
974     + SMT systems vulnerable when running untrusted code. The same rationale as
975     + for L1TF applies.
976     + See :ref:`Documentation/admin-guide/hw-vuln//l1tf.rst <default_mitigations>`.
977     diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
978     index 0873685bab0f..89abc5057349 100644
979     --- a/Documentation/admin-guide/index.rst
980     +++ b/Documentation/admin-guide/index.rst
981     @@ -17,14 +17,12 @@ etc.
982     kernel-parameters
983     devices
984    
985     -This section describes CPU vulnerabilities and provides an overview of the
986     -possible mitigations along with guidance for selecting mitigations if they
987     -are configurable at compile, boot or run time.
988     +This section describes CPU vulnerabilities and their mitigations.
989    
990     .. toctree::
991     :maxdepth: 1
992    
993     - l1tf
994     + hw-vuln/index
995    
996     Here is a set of documents aimed at users who are trying to track down
997     problems and bugs in particular.
998     diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
999     index 8b6567f7cb9b..a29301d6e6c6 100644
1000     --- a/Documentation/admin-guide/kernel-parameters.txt
1001     +++ b/Documentation/admin-guide/kernel-parameters.txt
1002     @@ -2079,7 +2079,7 @@
1003    
1004     Default is 'flush'.
1005    
1006     - For details see: Documentation/admin-guide/l1tf.rst
1007     + For details see: Documentation/admin-guide/hw-vuln/l1tf.rst
1008    
1009     l2cr= [PPC]
1010    
1011     @@ -2319,6 +2319,32 @@
1012     Format: <first>,<last>
1013     Specifies range of consoles to be captured by the MDA.
1014    
1015     + mds= [X86,INTEL]
1016     + Control mitigation for the Micro-architectural Data
1017     + Sampling (MDS) vulnerability.
1018     +
1019     + Certain CPUs are vulnerable to an exploit against CPU
1020     + internal buffers which can forward information to a
1021     + disclosure gadget under certain conditions.
1022     +
1023     + In vulnerable processors, the speculatively
1024     + forwarded data can be used in a cache side channel
1025     + attack, to access data to which the attacker does
1026     + not have direct access.
1027     +
1028     + This parameter controls the MDS mitigation. The
1029     + options are:
1030     +
1031     + full - Enable MDS mitigation on vulnerable CPUs
1032     + full,nosmt - Enable MDS mitigation and disable
1033     + SMT on vulnerable CPUs
1034     + off - Unconditionally disable MDS mitigation
1035     +
1036     + Not specifying this option is equivalent to
1037     + mds=full.
1038     +
1039     + For details see: Documentation/admin-guide/hw-vuln/mds.rst
1040     +
1041     mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
1042     Amount of memory to be used when the kernel is not able
1043     to see the whole system memory or for test.
1044     @@ -2476,6 +2502,40 @@
1045     in the "bleeding edge" mini2440 support kernel at
1046     http://repo.or.cz/w/linux-2.6/mini2440.git
1047    
1048     + mitigations=
1049     + [X86,PPC,S390] Control optional mitigations for CPU
1050     + vulnerabilities. This is a set of curated,
1051     + arch-independent options, each of which is an
1052     + aggregation of existing arch-specific options.
1053     +
1054     + off
1055     + Disable all optional CPU mitigations. This
1056     + improves system performance, but it may also
1057     + expose users to several CPU vulnerabilities.
1058     + Equivalent to: nopti [X86,PPC]
1059     + nospectre_v1 [PPC]
1060     + nobp=0 [S390]
1061     + nospectre_v2 [X86,PPC,S390]
1062     + spectre_v2_user=off [X86]
1063     + spec_store_bypass_disable=off [X86,PPC]
1064     + l1tf=off [X86]
1065     + mds=off [X86]
1066     +
1067     + auto (default)
1068     + Mitigate all CPU vulnerabilities, but leave SMT
1069     + enabled, even if it's vulnerable. This is for
1070     + users who don't want to be surprised by SMT
1071     + getting disabled across kernel upgrades, or who
1072     + have other ways of avoiding SMT-based attacks.
1073     + Equivalent to: (default behavior)
1074     +
1075     + auto,nosmt
1076     + Mitigate all CPU vulnerabilities, disabling SMT
1077     + if needed. This is for users who always want to
1078     + be fully mitigated, even if it means losing SMT.
1079     + Equivalent to: l1tf=flush,nosmt [X86]
1080     + mds=full,nosmt [X86]
1081     +
1082     mminit_loglevel=
1083     [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
1084     parameter allows control of the logging verbosity for
1085     diff --git a/Documentation/admin-guide/l1tf.rst b/Documentation/admin-guide/l1tf.rst
1086     deleted file mode 100644
1087     index 9f5924f81f89..000000000000
1088     --- a/Documentation/admin-guide/l1tf.rst
1089     +++ /dev/null
1090     @@ -1,614 +0,0 @@
1091     -L1TF - L1 Terminal Fault
1092     -========================
1093     -
1094     -L1 Terminal Fault is a hardware vulnerability which allows unprivileged
1095     -speculative access to data which is available in the Level 1 Data Cache
1096     -when the page table entry controlling the virtual address, which is used
1097     -for the access, has the Present bit cleared or other reserved bits set.
1098     -
1099     -Affected processors
1100     --------------------
1101     -
1102     -This vulnerability affects a wide range of Intel processors. The
1103     -vulnerability is not present on:
1104     -
1105     - - Processors from AMD, Centaur and other non Intel vendors
1106     -
1107     - - Older processor models, where the CPU family is < 6
1108     -
1109     - - A range of Intel ATOM processors (Cedarview, Cloverview, Lincroft,
1110     - Penwell, Pineview, Silvermont, Airmont, Merrifield)
1111     -
1112     - - The Intel XEON PHI family
1113     -
1114     - - Intel processors which have the ARCH_CAP_RDCL_NO bit set in the
1115     - IA32_ARCH_CAPABILITIES MSR. If the bit is set the CPU is not affected
1116     - by the Meltdown vulnerability either. These CPUs should become
1117     - available by end of 2018.
1118     -
1119     -Whether a processor is affected or not can be read out from the L1TF
1120     -vulnerability file in sysfs. See :ref:`l1tf_sys_info`.
1121     -
1122     -Related CVEs
1123     -------------
1124     -
1125     -The following CVE entries are related to the L1TF vulnerability:
1126     -
1127     - ============= ================= ==============================
1128     - CVE-2018-3615 L1 Terminal Fault SGX related aspects
1129     - CVE-2018-3620 L1 Terminal Fault OS, SMM related aspects
1130     - CVE-2018-3646 L1 Terminal Fault Virtualization related aspects
1131     - ============= ================= ==============================
1132     -
1133     -Problem
1134     --------
1135     -
1136     -If an instruction accesses a virtual address for which the relevant page
1137     -table entry (PTE) has the Present bit cleared or other reserved bits set,
1138     -then speculative execution ignores the invalid PTE and loads the referenced
1139     -data if it is present in the Level 1 Data Cache, as if the page referenced
1140     -by the address bits in the PTE was still present and accessible.
1141     -
1142     -While this is a purely speculative mechanism and the instruction will raise
1143     -a page fault when it is retired eventually, the pure act of loading the
1144     -data and making it available to other speculative instructions opens up the
1145     -opportunity for side channel attacks to unprivileged malicious code,
1146     -similar to the Meltdown attack.
1147     -
1148     -While Meltdown breaks the user space to kernel space protection, L1TF
1149     -allows to attack any physical memory address in the system and the attack
1150     -works across all protection domains. It allows an attack of SGX and also
1151     -works from inside virtual machines because the speculation bypasses the
1152     -extended page table (EPT) protection mechanism.
1153     -
1154     -
1155     -Attack scenarios
1156     -----------------
1157     -
1158     -1. Malicious user space
1159     -^^^^^^^^^^^^^^^^^^^^^^^
1160     -
1161     - Operating Systems store arbitrary information in the address bits of a
1162     - PTE which is marked non present. This allows a malicious user space
1163     - application to attack the physical memory to which these PTEs resolve.
1164     - In some cases user-space can maliciously influence the information
1165     - encoded in the address bits of the PTE, thus making attacks more
1166     - deterministic and more practical.
1167     -
1168     - The Linux kernel contains a mitigation for this attack vector, PTE
1169     - inversion, which is permanently enabled and has no performance
1170     - impact. The kernel ensures that the address bits of PTEs, which are not
1171     - marked present, never point to cacheable physical memory space.
1172     -
1173     - A system with an up to date kernel is protected against attacks from
1174     - malicious user space applications.
1175     -
1176     -2. Malicious guest in a virtual machine
1177     -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1178     -
1179     - The fact that L1TF breaks all domain protections allows malicious guest
1180     - OSes, which can control the PTEs directly, and malicious guest user
1181     - space applications, which run on an unprotected guest kernel lacking the
1182     - PTE inversion mitigation for L1TF, to attack physical host memory.
1183     -
1184     - A special aspect of L1TF in the context of virtualization is symmetric
1185     - multi threading (SMT). The Intel implementation of SMT is called
1186     - HyperThreading. The fact that Hyperthreads on the affected processors
1187     - share the L1 Data Cache (L1D) is important for this. As the flaw allows
1188     - only to attack data which is present in L1D, a malicious guest running
1189     - on one Hyperthread can attack the data which is brought into the L1D by
1190     - the context which runs on the sibling Hyperthread of the same physical
1191     - core. This context can be host OS, host user space or a different guest.
1192     -
1193     - If the processor does not support Extended Page Tables, the attack is
1194     - only possible, when the hypervisor does not sanitize the content of the
1195     - effective (shadow) page tables.
1196     -
1197     - While solutions exist to mitigate these attack vectors fully, these
1198     - mitigations are not enabled by default in the Linux kernel because they
1199     - can affect performance significantly. The kernel provides several
1200     - mechanisms which can be utilized to address the problem depending on the
1201     - deployment scenario. The mitigations, their protection scope and impact
1202     - are described in the next sections.
1203     -
1204     - The default mitigations and the rationale for choosing them are explained
1205     - at the end of this document. See :ref:`default_mitigations`.
1206     -
1207     -.. _l1tf_sys_info:
1208     -
1209     -L1TF system information
1210     ------------------------
1211     -
1212     -The Linux kernel provides a sysfs interface to enumerate the current L1TF
1213     -status of the system: whether the system is vulnerable, and which
1214     -mitigations are active. The relevant sysfs file is:
1215     -
1216     -/sys/devices/system/cpu/vulnerabilities/l1tf
1217     -
1218     -The possible values in this file are:
1219     -
1220     - =========================== ===============================
1221     - 'Not affected' The processor is not vulnerable
1222     - 'Mitigation: PTE Inversion' The host protection is active
1223     - =========================== ===============================
1224     -
1225     -If KVM/VMX is enabled and the processor is vulnerable then the following
1226     -information is appended to the 'Mitigation: PTE Inversion' part:
1227     -
1228     - - SMT status:
1229     -
1230     - ===================== ================
1231     - 'VMX: SMT vulnerable' SMT is enabled
1232     - 'VMX: SMT disabled' SMT is disabled
1233     - ===================== ================
1234     -
1235     - - L1D Flush mode:
1236     -
1237     - ================================ ====================================
1238     - 'L1D vulnerable' L1D flushing is disabled
1239     -
1240     - 'L1D conditional cache flushes' L1D flush is conditionally enabled
1241     -
1242     - 'L1D cache flushes' L1D flush is unconditionally enabled
1243     - ================================ ====================================
1244     -
1245     -The resulting grade of protection is discussed in the following sections.
1246     -
1247     -
1248     -Host mitigation mechanism
1249     --------------------------
1250     -
1251     -The kernel is unconditionally protected against L1TF attacks from malicious
1252     -user space running on the host.
1253     -
1254     -
1255     -Guest mitigation mechanisms
1256     ----------------------------
1257     -
1258     -.. _l1d_flush:
1259     -
1260     -1. L1D flush on VMENTER
1261     -^^^^^^^^^^^^^^^^^^^^^^^
1262     -
1263     - To make sure that a guest cannot attack data which is present in the L1D
1264     - the hypervisor flushes the L1D before entering the guest.
1265     -
1266     - Flushing the L1D evicts not only the data which should not be accessed
1267     - by a potentially malicious guest, it also flushes the guest
1268     - data. Flushing the L1D has a performance impact as the processor has to
1269     - bring the flushed guest data back into the L1D. Depending on the
1270     - frequency of VMEXIT/VMENTER and the type of computations in the guest
1271     - performance degradation in the range of 1% to 50% has been observed. For
1272     - scenarios where guest VMEXIT/VMENTER are rare the performance impact is
1273     - minimal. Virtio and mechanisms like posted interrupts are designed to
1274     - confine the VMEXITs to a bare minimum, but specific configurations and
1275     - application scenarios might still suffer from a high VMEXIT rate.
1276     -
1277     - The kernel provides two L1D flush modes:
1278     - - conditional ('cond')
1279     - - unconditional ('always')
1280     -
1281     - The conditional mode avoids L1D flushing after VMEXITs which execute
1282     - only audited code paths before the corresponding VMENTER. These code
1283     - paths have been verified that they cannot expose secrets or other
1284     - interesting data to an attacker, but they can leak information about the
1285     - address space layout of the hypervisor.
1286     -
1287     - Unconditional mode flushes L1D on all VMENTER invocations and provides
1288     - maximum protection. It has a higher overhead than the conditional
1289     - mode. The overhead cannot be quantified correctly as it depends on the
1290     - workload scenario and the resulting number of VMEXITs.
1291     -
1292     - The general recommendation is to enable L1D flush on VMENTER. The kernel
1293     - defaults to conditional mode on affected processors.
1294     -
1295     - **Note**, that L1D flush does not prevent the SMT problem because the
1296     - sibling thread will also bring back its data into the L1D which makes it
1297     - attackable again.
1298     -
1299     - L1D flush can be controlled by the administrator via the kernel command
1300     - line and sysfs control files. See :ref:`mitigation_control_command_line`
1301     - and :ref:`mitigation_control_kvm`.
1302     -
1303     -.. _guest_confinement:
1304     -
1305     -2. Guest VCPU confinement to dedicated physical cores
1306     -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1307     -
1308     - To address the SMT problem, it is possible to make a guest or a group of
1309     - guests affine to one or more physical cores. The proper mechanism for
1310     - that is to utilize exclusive cpusets to ensure that no other guest or
1311     - host tasks can run on these cores.
1312     -
1313     - If only a single guest or related guests run on sibling SMT threads on
1314     - the same physical core then they can only attack their own memory and
1315     - restricted parts of the host memory.
1316     -
1317     - Host memory is attackable, when one of the sibling SMT threads runs in
1318     - host OS (hypervisor) context and the other in guest context. The amount
1319     - of valuable information from the host OS context depends on the context
1320     - which the host OS executes, i.e. interrupts, soft interrupts and kernel
1321     - threads. The amount of valuable data from these contexts cannot be
1322     - declared as non-interesting for an attacker without deep inspection of
1323     - the code.
1324     -
1325     - **Note**, that assigning guests to a fixed set of physical cores affects
1326     - the ability of the scheduler to do load balancing and might have
1327     - negative effects on CPU utilization depending on the hosting
1328     - scenario. Disabling SMT might be a viable alternative for particular
1329     - scenarios.
1330     -
1331     - For further information about confining guests to a single or to a group
1332     - of cores consult the cpusets documentation:
1333     -
1334     - https://www.kernel.org/doc/Documentation/cgroup-v1/cpusets.txt
1335     -
1336     -.. _interrupt_isolation:
1337     -
1338     -3. Interrupt affinity
1339     -^^^^^^^^^^^^^^^^^^^^^
1340     -
1341     - Interrupts can be made affine to logical CPUs. This is not universally
1342     - true because there are types of interrupts which are truly per CPU
1343     - interrupts, e.g. the local timer interrupt. Aside of that multi queue
1344     - devices affine their interrupts to single CPUs or groups of CPUs per
1345     - queue without allowing the administrator to control the affinities.
1346     -
1347     - Moving the interrupts, which can be affinity controlled, away from CPUs
1348     - which run untrusted guests, reduces the attack vector space.
1349     -
1350     - Whether the interrupts with are affine to CPUs, which run untrusted
1351     - guests, provide interesting data for an attacker depends on the system
1352     - configuration and the scenarios which run on the system. While for some
1353     - of the interrupts it can be assumed that they won't expose interesting
1354     - information beyond exposing hints about the host OS memory layout, there
1355     - is no way to make general assumptions.
1356     -
1357     - Interrupt affinity can be controlled by the administrator via the
1358     - /proc/irq/$NR/smp_affinity[_list] files. Limited documentation is
1359     - available at:
1360     -
1361     - https://www.kernel.org/doc/Documentation/IRQ-affinity.txt
1362     -
1363     -.. _smt_control:
1364     -
1365     -4. SMT control
1366     -^^^^^^^^^^^^^^
1367     -
1368     - To prevent the SMT issues of L1TF it might be necessary to disable SMT
1369     - completely. Disabling SMT can have a significant performance impact, but
1370     - the impact depends on the hosting scenario and the type of workloads.
1371     - The impact of disabling SMT needs also to be weighted against the impact
1372     - of other mitigation solutions like confining guests to dedicated cores.
1373     -
1374     - The kernel provides a sysfs interface to retrieve the status of SMT and
1375     - to control it. It also provides a kernel command line interface to
1376     - control SMT.
1377     -
1378     - The kernel command line interface consists of the following options:
1379     -
1380     - =========== ==========================================================
1381     - nosmt Affects the bring up of the secondary CPUs during boot. The
1382     - kernel tries to bring all present CPUs online during the
1383     - boot process. "nosmt" makes sure that from each physical
1384     - core only one - the so called primary (hyper) thread is
1385     - activated. Due to a design flaw of Intel processors related
1386     - to Machine Check Exceptions the non primary siblings have
1387     - to be brought up at least partially and are then shut down
1388     - again. "nosmt" can be undone via the sysfs interface.
1389     -
1390     - nosmt=force Has the same effect as "nosmt" but it does not allow to
1391     - undo the SMT disable via the sysfs interface.
1392     - =========== ==========================================================
1393     -
1394     - The sysfs interface provides two files:
1395     -
1396     - - /sys/devices/system/cpu/smt/control
1397     - - /sys/devices/system/cpu/smt/active
1398     -
1399     - /sys/devices/system/cpu/smt/control:
1400     -
1401     - This file allows to read out the SMT control state and provides the
1402     - ability to disable or (re)enable SMT. The possible states are:
1403     -
1404     - ============== ===================================================
1405     - on SMT is supported by the CPU and enabled. All
1406     - logical CPUs can be onlined and offlined without
1407     - restrictions.
1408     -
1409     - off SMT is supported by the CPU and disabled. Only
1410     - the so called primary SMT threads can be onlined
1411     - and offlined without restrictions. An attempt to
1412     - online a non-primary sibling is rejected
1413     -
1414     - forceoff Same as 'off' but the state cannot be controlled.
1415     - Attempts to write to the control file are rejected.
1416     -
1417     - notsupported The processor does not support SMT. It's therefore
1418     - not affected by the SMT implications of L1TF.
1419     - Attempts to write to the control file are rejected.
1420     - ============== ===================================================
1421     -
1422     - The possible states which can be written into this file to control SMT
1423     - state are:
1424     -
1425     - - on
1426     - - off
1427     - - forceoff
1428     -
1429     - /sys/devices/system/cpu/smt/active:
1430     -
1431     - This file reports whether SMT is enabled and active, i.e. if on any
1432     - physical core two or more sibling threads are online.
1433     -
1434     - SMT control is also possible at boot time via the l1tf kernel command
1435     - line parameter in combination with L1D flush control. See
1436     - :ref:`mitigation_control_command_line`.
1437     -
1438     -5. Disabling EPT
1439     -^^^^^^^^^^^^^^^^
1440     -
1441     - Disabling EPT for virtual machines provides full mitigation for L1TF even
1442     - with SMT enabled, because the effective page tables for guests are
1443     - managed and sanitized by the hypervisor. Though disabling EPT has a
1444     - significant performance impact especially when the Meltdown mitigation
1445     - KPTI is enabled.
1446     -
1447     - EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter.
1448     -
1449     -There is ongoing research and development for new mitigation mechanisms to
1450     -address the performance impact of disabling SMT or EPT.
1451     -
1452     -.. _mitigation_control_command_line:
1453     -
1454     -Mitigation control on the kernel command line
1455     ----------------------------------------------
1456     -
1457     -The kernel command line allows to control the L1TF mitigations at boot
1458     -time with the option "l1tf=". The valid arguments for this option are:
1459     -
1460     - ============ =============================================================
1461     - full Provides all available mitigations for the L1TF
1462     - vulnerability. Disables SMT and enables all mitigations in
1463     - the hypervisors, i.e. unconditional L1D flushing
1464     -
1465     - SMT control and L1D flush control via the sysfs interface
1466     - is still possible after boot. Hypervisors will issue a
1467     - warning when the first VM is started in a potentially
1468     - insecure configuration, i.e. SMT enabled or L1D flush
1469     - disabled.
1470     -
1471     - full,force Same as 'full', but disables SMT and L1D flush runtime
1472     - control. Implies the 'nosmt=force' command line option.
1473     - (i.e. sysfs control of SMT is disabled.)
1474     -
1475     - flush Leaves SMT enabled and enables the default hypervisor
1476     - mitigation, i.e. conditional L1D flushing
1477     -
1478     - SMT control and L1D flush control via the sysfs interface
1479     - is still possible after boot. Hypervisors will issue a
1480     - warning when the first VM is started in a potentially
1481     - insecure configuration, i.e. SMT enabled or L1D flush
1482     - disabled.
1483     -
1484     - flush,nosmt Disables SMT and enables the default hypervisor mitigation,
1485     - i.e. conditional L1D flushing.
1486     -
1487     - SMT control and L1D flush control via the sysfs interface
1488     - is still possible after boot. Hypervisors will issue a
1489     - warning when the first VM is started in a potentially
1490     - insecure configuration, i.e. SMT enabled or L1D flush
1491     - disabled.
1492     -
1493     - flush,nowarn Same as 'flush', but hypervisors will not warn when a VM is
1494     - started in a potentially insecure configuration.
1495     -
1496     - off Disables hypervisor mitigations and doesn't emit any
1497     - warnings.
1498     - It also drops the swap size and available RAM limit restrictions
1499     - on both hypervisor and bare metal.
1500     -
1501     - ============ =============================================================
1502     -
1503     -The default is 'flush'. For details about L1D flushing see :ref:`l1d_flush`.
1504     -
1505     -
1506     -.. _mitigation_control_kvm:
1507     -
1508     -Mitigation control for KVM - module parameter
1509     --------------------------------------------------------------
1510     -
1511     -The KVM hypervisor mitigation mechanism, flushing the L1D cache when
1512     -entering a guest, can be controlled with a module parameter.
1513     -
1514     -The option/parameter is "kvm-intel.vmentry_l1d_flush=". It takes the
1515     -following arguments:
1516     -
1517     - ============ ==============================================================
1518     - always L1D cache flush on every VMENTER.
1519     -
1520     - cond Flush L1D on VMENTER only when the code between VMEXIT and
1521     - VMENTER can leak host memory which is considered
1522     - interesting for an attacker. This still can leak host memory
1523     - which allows e.g. to determine the hosts address space layout.
1524     -
1525     - never Disables the mitigation
1526     - ============ ==============================================================
1527     -
1528     -The parameter can be provided on the kernel command line, as a module
1529     -parameter when loading the modules and at runtime modified via the sysfs
1530     -file:
1531     -
1532     -/sys/module/kvm_intel/parameters/vmentry_l1d_flush
1533     -
1534     -The default is 'cond'. If 'l1tf=full,force' is given on the kernel command
1535     -line, then 'always' is enforced and the kvm-intel.vmentry_l1d_flush
1536     -module parameter is ignored and writes to the sysfs file are rejected.
1537     -
1538     -
1539     -Mitigation selection guide
1540     ---------------------------
1541     -
1542     -1. No virtualization in use
1543     -^^^^^^^^^^^^^^^^^^^^^^^^^^^
1544     -
1545     - The system is protected by the kernel unconditionally and no further
1546     - action is required.
1547     -
1548     -2. Virtualization with trusted guests
1549     -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1550     -
1551     - If the guest comes from a trusted source and the guest OS kernel is
1552     - guaranteed to have the L1TF mitigations in place the system is fully
1553     - protected against L1TF and no further action is required.
1554     -
1555     - To avoid the overhead of the default L1D flushing on VMENTER the
1556     - administrator can disable the flushing via the kernel command line and
1557     - sysfs control files. See :ref:`mitigation_control_command_line` and
1558     - :ref:`mitigation_control_kvm`.
1559     -
1560     -
1561     -3. Virtualization with untrusted guests
1562     -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1563     -
1564     -3.1. SMT not supported or disabled
1565     -""""""""""""""""""""""""""""""""""
1566     -
1567     - If SMT is not supported by the processor or disabled in the BIOS or by
1568     - the kernel, it's only required to enforce L1D flushing on VMENTER.
1569     -
1570     - Conditional L1D flushing is the default behaviour and can be tuned. See
1571     - :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`.
1572     -
1573     -3.2. EPT not supported or disabled
1574     -""""""""""""""""""""""""""""""""""
1575     -
1576     - If EPT is not supported by the processor or disabled in the hypervisor,
1577     - the system is fully protected. SMT can stay enabled and L1D flushing on
1578     - VMENTER is not required.
1579     -
1580     - EPT can be disabled in the hypervisor via the 'kvm-intel.ept' parameter.
1581     -
1582     -3.3. SMT and EPT supported and active
1583     -"""""""""""""""""""""""""""""""""""""
1584     -
1585     - If SMT and EPT are supported and active then various degrees of
1586     - mitigations can be employed:
1587     -
1588     - - L1D flushing on VMENTER:
1589     -
1590     - L1D flushing on VMENTER is the minimal protection requirement, but it
1591     - is only potent in combination with other mitigation methods.
1592     -
1593     - Conditional L1D flushing is the default behaviour and can be tuned. See
1594     - :ref:`mitigation_control_command_line` and :ref:`mitigation_control_kvm`.
1595     -
1596     - - Guest confinement:
1597     -
1598     - Confinement of guests to a single or a group of physical cores which
1599     - are not running any other processes, can reduce the attack surface
1600     - significantly, but interrupts, soft interrupts and kernel threads can
1601     - still expose valuable data to a potential attacker. See
1602     - :ref:`guest_confinement`.
1603     -
1604     - - Interrupt isolation:
1605     -
1606     - Isolating the guest CPUs from interrupts can reduce the attack surface
1607     - further, but still allows a malicious guest to explore a limited amount
1608     - of host physical memory. This can at least be used to gain knowledge
1609     - about the host address space layout. The interrupts which have a fixed
1610     - affinity to the CPUs which run the untrusted guests can depending on
1611     - the scenario still trigger soft interrupts and schedule kernel threads
1612     - which might expose valuable information. See
1613     - :ref:`interrupt_isolation`.
1614     -
1615     -The above three mitigation methods combined can provide protection to a
1616     -certain degree, but the risk of the remaining attack surface has to be
1617     -carefully analyzed. For full protection the following methods are
1618     -available:
1619     -
1620     - - Disabling SMT:
1621     -
1622     - Disabling SMT and enforcing the L1D flushing provides the maximum
1623     - amount of protection. This mitigation is not depending on any of the
1624     - above mitigation methods.
1625     -
1626     - SMT control and L1D flushing can be tuned by the command line
1627     - parameters 'nosmt', 'l1tf', 'kvm-intel.vmentry_l1d_flush' and at run
1628     - time with the matching sysfs control files. See :ref:`smt_control`,
1629     - :ref:`mitigation_control_command_line` and
1630     - :ref:`mitigation_control_kvm`.
1631     -
1632     - - Disabling EPT:
1633     -
1634     - Disabling EPT provides the maximum amount of protection as well. It is
1635     - not depending on any of the above mitigation methods. SMT can stay
1636     - enabled and L1D flushing is not required, but the performance impact is
1637     - significant.
1638     -
1639     - EPT can be disabled in the hypervisor via the 'kvm-intel.ept'
1640     - parameter.
1641     -
1642     -3.4. Nested virtual machines
1643     -""""""""""""""""""""""""""""
1644     -
1645     -When nested virtualization is in use, three operating systems are involved:
1646     -the bare metal hypervisor, the nested hypervisor and the nested virtual
1647     -machine. VMENTER operations from the nested hypervisor into the nested
1648     -guest will always be processed by the bare metal hypervisor. If KVM is the
1649     -bare metal hypervisor it wiil:
1650     -
1651     - - Flush the L1D cache on every switch from the nested hypervisor to the
1652     - nested virtual machine, so that the nested hypervisor's secrets are not
1653     - exposed to the nested virtual machine;
1654     -
1655     - - Flush the L1D cache on every switch from the nested virtual machine to
1656     - the nested hypervisor; this is a complex operation, and flushing the L1D
1657     - cache avoids that the bare metal hypervisor's secrets are exposed to the
1658     - nested virtual machine;
1659     -
1660     - - Instruct the nested hypervisor to not perform any L1D cache flush. This
1661     - is an optimization to avoid double L1D flushing.
1662     -
1663     -
1664     -.. _default_mitigations:
1665     -
1666     -Default mitigations
1667     --------------------
1668     -
1669     - The kernel default mitigations for vulnerable processors are:
1670     -
1671     - - PTE inversion to protect against malicious user space. This is done
1672     - unconditionally and cannot be controlled. The swap storage is limited
1673     - to ~16TB.
1674     -
1675     - - L1D conditional flushing on VMENTER when EPT is enabled for
1676     - a guest.
1677     -
1678     - The kernel does not by default enforce the disabling of SMT, which leaves
1679     - SMT systems vulnerable when running untrusted guests with EPT enabled.
1680     -
1681     - The rationale for this choice is:
1682     -
1683     - - Force disabling SMT can break existing setups, especially with
1684     - unattended updates.
1685     -
1686     - - If regular users run untrusted guests on their machine, then L1TF is
1687     - just an add on to other malware which might be embedded in an untrusted
1688     - guest, e.g. spam-bots or attacks on the local network.
1689     -
1690     - There is no technical way to prevent a user from running untrusted code
1691     - on their machines blindly.
1692     -
1693     - - It's technically extremely unlikely and from today's knowledge even
1694     - impossible that L1TF can be exploited via the most popular attack
1695     - mechanisms like JavaScript because these mechanisms have no way to
1696     - control PTEs. If this would be possible and not other mitigation would
1697     - be possible, then the default might be different.
1698     -
1699     - - The administrators of cloud and hosting setups have to carefully
1700     - analyze the risk for their scenarios and make the appropriate
1701     - mitigation choices, which might even vary across their deployed
1702     - machines and also result in other changes of their overall setup.
1703     - There is no way for the kernel to provide a sensible default for this
1704     - kind of scenarios.
1705     diff --git a/Documentation/index.rst b/Documentation/index.rst
1706     index 5db7e87c7cb1..1cdc139adb40 100644
1707     --- a/Documentation/index.rst
1708     +++ b/Documentation/index.rst
1709     @@ -104,6 +104,7 @@ implementation.
1710     :maxdepth: 2
1711    
1712     sh/index
1713     + x86/index
1714    
1715     Filesystem Documentation
1716     ------------------------
1717     diff --git a/Documentation/x86/conf.py b/Documentation/x86/conf.py
1718     new file mode 100644
1719     index 000000000000..33c5c3142e20
1720     --- /dev/null
1721     +++ b/Documentation/x86/conf.py
1722     @@ -0,0 +1,10 @@
1723     +# -*- coding: utf-8; mode: python -*-
1724     +
1725     +project = "X86 architecture specific documentation"
1726     +
1727     +tags.add("subproject")
1728     +
1729     +latex_documents = [
1730     + ('index', 'x86.tex', project,
1731     + 'The kernel development community', 'manual'),
1732     +]
1733     diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
1734     new file mode 100644
1735     index 000000000000..ef389dcf1b1d
1736     --- /dev/null
1737     +++ b/Documentation/x86/index.rst
1738     @@ -0,0 +1,8 @@
1739     +==========================
1740     +x86 architecture specifics
1741     +==========================
1742     +
1743     +.. toctree::
1744     + :maxdepth: 1
1745     +
1746     + mds
1747     diff --git a/Documentation/x86/mds.rst b/Documentation/x86/mds.rst
1748     new file mode 100644
1749     index 000000000000..534e9baa4e1d
1750     --- /dev/null
1751     +++ b/Documentation/x86/mds.rst
1752     @@ -0,0 +1,225 @@
1753     +Microarchitectural Data Sampling (MDS) mitigation
1754     +=================================================
1755     +
1756     +.. _mds:
1757     +
1758     +Overview
1759     +--------
1760     +
1761     +Microarchitectural Data Sampling (MDS) is a family of side channel attacks
1762     +on internal buffers in Intel CPUs. The variants are:
1763     +
1764     + - Microarchitectural Store Buffer Data Sampling (MSBDS) (CVE-2018-12126)
1765     + - Microarchitectural Fill Buffer Data Sampling (MFBDS) (CVE-2018-12130)
1766     + - Microarchitectural Load Port Data Sampling (MLPDS) (CVE-2018-12127)
1767     + - Microarchitectural Data Sampling Uncacheable Memory (MDSUM) (CVE-2019-11091)
1768     +
1769     +MSBDS leaks Store Buffer Entries which can be speculatively forwarded to a
1770     +dependent load (store-to-load forwarding) as an optimization. The forward
1771     +can also happen to a faulting or assisting load operation for a different
1772     +memory address, which can be exploited under certain conditions. Store
1773     +buffers are partitioned between Hyper-Threads so cross thread forwarding is
1774     +not possible. But if a thread enters or exits a sleep state the store
1775     +buffer is repartitioned which can expose data from one thread to the other.
1776     +
1777     +MFBDS leaks Fill Buffer Entries. Fill buffers are used internally to manage
1778     +L1 miss situations and to hold data which is returned or sent in response
1779     +to a memory or I/O operation. Fill buffers can forward data to a load
1780     +operation and also write data to the cache. When the fill buffer is
1781     +deallocated it can retain the stale data of the preceding operations which
1782     +can then be forwarded to a faulting or assisting load operation, which can
1783     +be exploited under certain conditions. Fill buffers are shared between
1784     +Hyper-Threads so cross thread leakage is possible.
1785     +
1786     +MLPDS leaks Load Port Data. Load ports are used to perform load operations
1787     +from memory or I/O. The received data is then forwarded to the register
1788     +file or a subsequent operation. In some implementations the Load Port can
1789     +contain stale data from a previous operation which can be forwarded to
1790     +faulting or assisting loads under certain conditions, which again can be
1791     +exploited eventually. Load ports are shared between Hyper-Threads so cross
1792     +thread leakage is possible.
1793     +
1794     +MDSUM is a special case of MSBDS, MFBDS and MLPDS. An uncacheable load from
1795     +memory that takes a fault or assist can leave data in a microarchitectural
1796     +structure that may later be observed using one of the same methods used by
1797     +MSBDS, MFBDS or MLPDS.
1798     +
1799     +Exposure assumptions
1800     +--------------------
1801     +
1802     +It is assumed that attack code resides in user space or in a guest with one
1803     +exception. The rationale behind this assumption is that the code construct
1804     +needed for exploiting MDS requires:
1805     +
1806     + - to control the load to trigger a fault or assist
1807     +
1808     + - to have a disclosure gadget which exposes the speculatively accessed
1809     + data for consumption through a side channel.
1810     +
1811     + - to control the pointer through which the disclosure gadget exposes the
1812     + data
1813     +
1814     +The existence of such a construct in the kernel cannot be excluded with
1815     +100% certainty, but the complexity involved makes it extremly unlikely.
1816     +
1817     +There is one exception, which is untrusted BPF. The functionality of
1818     +untrusted BPF is limited, but it needs to be thoroughly investigated
1819     +whether it can be used to create such a construct.
1820     +
1821     +
1822     +Mitigation strategy
1823     +-------------------
1824     +
1825     +All variants have the same mitigation strategy at least for the single CPU
1826     +thread case (SMT off): Force the CPU to clear the affected buffers.
1827     +
1828     +This is achieved by using the otherwise unused and obsolete VERW
1829     +instruction in combination with a microcode update. The microcode clears
1830     +the affected CPU buffers when the VERW instruction is executed.
1831     +
1832     +For virtualization there are two ways to achieve CPU buffer
1833     +clearing. Either the modified VERW instruction or via the L1D Flush
1834     +command. The latter is issued when L1TF mitigation is enabled so the extra
1835     +VERW can be avoided. If the CPU is not affected by L1TF then VERW needs to
1836     +be issued.
1837     +
1838     +If the VERW instruction with the supplied segment selector argument is
1839     +executed on a CPU without the microcode update there is no side effect
1840     +other than a small number of pointlessly wasted CPU cycles.
1841     +
1842     +This does not protect against cross Hyper-Thread attacks except for MSBDS
1843     +which is only exploitable cross Hyper-thread when one of the Hyper-Threads
1844     +enters a C-state.
1845     +
1846     +The kernel provides a function to invoke the buffer clearing:
1847     +
1848     + mds_clear_cpu_buffers()
1849     +
1850     +The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state
1851     +(idle) transitions.
1852     +
1853     +As a special quirk to address virtualization scenarios where the host has
1854     +the microcode updated, but the hypervisor does not (yet) expose the
1855     +MD_CLEAR CPUID bit to guests, the kernel issues the VERW instruction in the
1856     +hope that it might actually clear the buffers. The state is reflected
1857     +accordingly.
1858     +
1859     +According to current knowledge additional mitigations inside the kernel
1860     +itself are not required because the necessary gadgets to expose the leaked
1861     +data cannot be controlled in a way which allows exploitation from malicious
1862     +user space or VM guests.
1863     +
1864     +Kernel internal mitigation modes
1865     +--------------------------------
1866     +
1867     + ======= ============================================================
1868     + off Mitigation is disabled. Either the CPU is not affected or
1869     + mds=off is supplied on the kernel command line
1870     +
1871     + full Mitigation is enabled. CPU is affected and MD_CLEAR is
1872     + advertised in CPUID.
1873     +
1874     + vmwerv Mitigation is enabled. CPU is affected and MD_CLEAR is not
1875     + advertised in CPUID. That is mainly for virtualization
1876     + scenarios where the host has the updated microcode but the
1877     + hypervisor does not expose MD_CLEAR in CPUID. It's a best
1878     + effort approach without guarantee.
1879     + ======= ============================================================
1880     +
1881     +If the CPU is affected and mds=off is not supplied on the kernel command
1882     +line then the kernel selects the appropriate mitigation mode depending on
1883     +the availability of the MD_CLEAR CPUID bit.
1884     +
1885     +Mitigation points
1886     +-----------------
1887     +
1888     +1. Return to user space
1889     +^^^^^^^^^^^^^^^^^^^^^^^
1890     +
1891     + When transitioning from kernel to user space the CPU buffers are flushed
1892     + on affected CPUs when the mitigation is not disabled on the kernel
1893     + command line. The migitation is enabled through the static key
1894     + mds_user_clear.
1895     +
1896     + The mitigation is invoked in prepare_exit_to_usermode() which covers
1897     + most of the kernel to user space transitions. There are a few exceptions
1898     + which are not invoking prepare_exit_to_usermode() on return to user
1899     + space. These exceptions use the paranoid exit code.
1900     +
1901     + - Non Maskable Interrupt (NMI):
1902     +
1903     + Access to sensible data like keys, credentials in the NMI context is
1904     + mostly theoretical: The CPU can do prefetching or execute a
1905     + misspeculated code path and thereby fetching data which might end up
1906     + leaking through a buffer.
1907     +
1908     + But for mounting other attacks the kernel stack address of the task is
1909     + already valuable information. So in full mitigation mode, the NMI is
1910     + mitigated on the return from do_nmi() to provide almost complete
1911     + coverage.
1912     +
1913     + - Double fault (#DF):
1914     +
1915     + A double fault is usually fatal, but the ESPFIX workaround, which can
1916     + be triggered from user space through modify_ldt(2) is a recoverable
1917     + double fault. #DF uses the paranoid exit path, so explicit mitigation
1918     + in the double fault handler is required.
1919     +
1920     + - Machine Check Exception (#MC):
1921     +
1922     + Another corner case is a #MC which hits between the CPU buffer clear
1923     + invocation and the actual return to user. As this still is in kernel
1924     + space it takes the paranoid exit path which does not clear the CPU
1925     + buffers. So the #MC handler repopulates the buffers to some
1926     + extent. Machine checks are not reliably controllable and the window is
1927     + extremly small so mitigation would just tick a checkbox that this
1928     + theoretical corner case is covered. To keep the amount of special
1929     + cases small, ignore #MC.
1930     +
1931     + - Debug Exception (#DB):
1932     +
1933     + This takes the paranoid exit path only when the INT1 breakpoint is in
1934     + kernel space. #DB on a user space address takes the regular exit path,
1935     + so no extra mitigation required.
1936     +
1937     +
1938     +2. C-State transition
1939     +^^^^^^^^^^^^^^^^^^^^^
1940     +
1941     + When a CPU goes idle and enters a C-State the CPU buffers need to be
1942     + cleared on affected CPUs when SMT is active. This addresses the
1943     + repartitioning of the store buffer when one of the Hyper-Threads enters
1944     + a C-State.
1945     +
1946     + When SMT is inactive, i.e. either the CPU does not support it or all
1947     + sibling threads are offline CPU buffer clearing is not required.
1948     +
1949     + The idle clearing is enabled on CPUs which are only affected by MSBDS
1950     + and not by any other MDS variant. The other MDS variants cannot be
1951     + protected against cross Hyper-Thread attacks because the Fill Buffer and
1952     + the Load Ports are shared. So on CPUs affected by other variants, the
1953     + idle clearing would be a window dressing exercise and is therefore not
1954     + activated.
1955     +
1956     + The invocation is controlled by the static key mds_idle_clear which is
1957     + switched depending on the chosen mitigation mode and the SMT state of
1958     + the system.
1959     +
1960     + The buffer clear is only invoked before entering the C-State to prevent
1961     + that stale data from the idling CPU from spilling to the Hyper-Thread
1962     + sibling after the store buffer got repartitioned and all entries are
1963     + available to the non idle sibling.
1964     +
1965     + When coming out of idle the store buffer is partitioned again so each
1966     + sibling has half of it available. The back from idle CPU could be then
1967     + speculatively exposed to contents of the sibling. The buffers are
1968     + flushed either on exit to user space or on VMENTER so malicious code
1969     + in user space or the guest cannot speculatively access them.
1970     +
1971     + The mitigation is hooked into all variants of halt()/mwait(), but does
1972     + not cover the legacy ACPI IO-Port mechanism because the ACPI idle driver
1973     + has been superseded by the intel_idle driver around 2010 and is
1974     + preferred on all affected CPUs which are expected to gain the MD_CLEAR
1975     + functionality in microcode. Aside of that the IO-Port mechanism is a
1976     + legacy interface which is only used on older systems which are either
1977     + not affected or do not receive microcode updates anymore.
1978     diff --git a/Makefile b/Makefile
1979     index 914d69b9e3fd..be894b3a97d5 100644
1980     --- a/Makefile
1981     +++ b/Makefile
1982     @@ -1,7 +1,7 @@
1983     # SPDX-License-Identifier: GPL-2.0
1984     VERSION = 4
1985     PATCHLEVEL = 19
1986     -SUBLEVEL = 42
1987     +SUBLEVEL = 43
1988     EXTRAVERSION =
1989     NAME = "People's Front"
1990    
1991     diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
1992     index 1341325599a7..4ccbf611a3c5 100644
1993     --- a/arch/powerpc/kernel/security.c
1994     +++ b/arch/powerpc/kernel/security.c
1995     @@ -56,7 +56,7 @@ void setup_barrier_nospec(void)
1996     enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
1997     security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR);
1998    
1999     - if (!no_nospec)
2000     + if (!no_nospec && !cpu_mitigations_off())
2001     enable_barrier_nospec(enable);
2002     }
2003    
2004     @@ -115,7 +115,7 @@ static int __init handle_nospectre_v2(char *p)
2005     early_param("nospectre_v2", handle_nospectre_v2);
2006     void setup_spectre_v2(void)
2007     {
2008     - if (no_spectrev2)
2009     + if (no_spectrev2 || cpu_mitigations_off())
2010     do_btb_flush_fixups();
2011     else
2012     btb_flush_enabled = true;
2013     @@ -299,7 +299,7 @@ void setup_stf_barrier(void)
2014    
2015     stf_enabled_flush_types = type;
2016    
2017     - if (!no_stf_barrier)
2018     + if (!no_stf_barrier && !cpu_mitigations_off())
2019     stf_barrier_enable(enable);
2020     }
2021    
2022     diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
2023     index faf00222b324..eaf7300be5ab 100644
2024     --- a/arch/powerpc/kernel/setup_64.c
2025     +++ b/arch/powerpc/kernel/setup_64.c
2026     @@ -955,7 +955,7 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable)
2027    
2028     enabled_flush_types = types;
2029    
2030     - if (!no_rfi_flush)
2031     + if (!no_rfi_flush && !cpu_mitigations_off())
2032     rfi_flush_enable(enable);
2033     }
2034    
2035     diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
2036     index bdddaae96559..649135cbedd5 100644
2037     --- a/arch/s390/kernel/nospec-branch.c
2038     +++ b/arch/s390/kernel/nospec-branch.c
2039     @@ -1,6 +1,7 @@
2040     // SPDX-License-Identifier: GPL-2.0
2041     #include <linux/module.h>
2042     #include <linux/device.h>
2043     +#include <linux/cpu.h>
2044     #include <asm/nospec-branch.h>
2045    
2046     static int __init nobp_setup_early(char *str)
2047     @@ -58,7 +59,7 @@ early_param("nospectre_v2", nospectre_v2_setup_early);
2048    
2049     void __init nospec_auto_detect(void)
2050     {
2051     - if (test_facility(156)) {
2052     + if (test_facility(156) || cpu_mitigations_off()) {
2053     /*
2054     * The machine supports etokens.
2055     * Disable expolines and disable nobp.
2056     diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
2057     index 3b2490b81918..8353348ddeaf 100644
2058     --- a/arch/x86/entry/common.c
2059     +++ b/arch/x86/entry/common.c
2060     @@ -31,6 +31,7 @@
2061     #include <asm/vdso.h>
2062     #include <linux/uaccess.h>
2063     #include <asm/cpufeature.h>
2064     +#include <asm/nospec-branch.h>
2065    
2066     #define CREATE_TRACE_POINTS
2067     #include <trace/events/syscalls.h>
2068     @@ -212,6 +213,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
2069     #endif
2070    
2071     user_enter_irqoff();
2072     +
2073     + mds_user_clear_cpu_buffers();
2074     }
2075    
2076     #define SYSCALL_EXIT_WORK_FLAGS \
2077     diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
2078     index f9958ad4d335..a759e59990fb 100644
2079     --- a/arch/x86/events/intel/core.c
2080     +++ b/arch/x86/events/intel/core.c
2081     @@ -4132,11 +4132,11 @@ __init int intel_pmu_init(void)
2082     name = "nehalem";
2083     break;
2084    
2085     - case INTEL_FAM6_ATOM_PINEVIEW:
2086     - case INTEL_FAM6_ATOM_LINCROFT:
2087     - case INTEL_FAM6_ATOM_PENWELL:
2088     - case INTEL_FAM6_ATOM_CLOVERVIEW:
2089     - case INTEL_FAM6_ATOM_CEDARVIEW:
2090     + case INTEL_FAM6_ATOM_BONNELL:
2091     + case INTEL_FAM6_ATOM_BONNELL_MID:
2092     + case INTEL_FAM6_ATOM_SALTWELL:
2093     + case INTEL_FAM6_ATOM_SALTWELL_MID:
2094     + case INTEL_FAM6_ATOM_SALTWELL_TABLET:
2095     memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
2096     sizeof(hw_cache_event_ids));
2097    
2098     @@ -4149,9 +4149,11 @@ __init int intel_pmu_init(void)
2099     name = "bonnell";
2100     break;
2101    
2102     - case INTEL_FAM6_ATOM_SILVERMONT1:
2103     - case INTEL_FAM6_ATOM_SILVERMONT2:
2104     + case INTEL_FAM6_ATOM_SILVERMONT:
2105     + case INTEL_FAM6_ATOM_SILVERMONT_X:
2106     + case INTEL_FAM6_ATOM_SILVERMONT_MID:
2107     case INTEL_FAM6_ATOM_AIRMONT:
2108     + case INTEL_FAM6_ATOM_AIRMONT_MID:
2109     memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
2110     sizeof(hw_cache_event_ids));
2111     memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
2112     @@ -4170,7 +4172,7 @@ __init int intel_pmu_init(void)
2113     break;
2114    
2115     case INTEL_FAM6_ATOM_GOLDMONT:
2116     - case INTEL_FAM6_ATOM_DENVERTON:
2117     + case INTEL_FAM6_ATOM_GOLDMONT_X:
2118     memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
2119     sizeof(hw_cache_event_ids));
2120     memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
2121     @@ -4196,7 +4198,7 @@ __init int intel_pmu_init(void)
2122     name = "goldmont";
2123     break;
2124    
2125     - case INTEL_FAM6_ATOM_GEMINI_LAKE:
2126     + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
2127     memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
2128     sizeof(hw_cache_event_ids));
2129     memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
2130     diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
2131     index 6eb76106c469..56194c571299 100644
2132     --- a/arch/x86/events/intel/cstate.c
2133     +++ b/arch/x86/events/intel/cstate.c
2134     @@ -559,8 +559,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
2135    
2136     X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
2137    
2138     - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates),
2139     - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates),
2140     + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT, slm_cstates),
2141     + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT_X, slm_cstates),
2142     X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates),
2143    
2144     X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE, snb_cstates),
2145     @@ -581,9 +581,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
2146     X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
2147    
2148     X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
2149     - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_DENVERTON, glm_cstates),
2150     + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_X, glm_cstates),
2151    
2152     - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GEMINI_LAKE, glm_cstates),
2153     + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
2154     { },
2155     };
2156     MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
2157     diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
2158     index 32f3e9423e99..91039ffed633 100644
2159     --- a/arch/x86/events/intel/rapl.c
2160     +++ b/arch/x86/events/intel/rapl.c
2161     @@ -777,9 +777,9 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
2162     X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, skl_rapl_init),
2163    
2164     X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
2165     - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init),
2166     + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init),
2167    
2168     - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GEMINI_LAKE, hsw_rapl_init),
2169     + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init),
2170     {},
2171     };
2172    
2173     diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
2174     index b4771a6ddbc1..1b9f85abf9bc 100644
2175     --- a/arch/x86/events/msr.c
2176     +++ b/arch/x86/events/msr.c
2177     @@ -69,14 +69,14 @@ static bool test_intel(int idx)
2178     case INTEL_FAM6_BROADWELL_GT3E:
2179     case INTEL_FAM6_BROADWELL_X:
2180    
2181     - case INTEL_FAM6_ATOM_SILVERMONT1:
2182     - case INTEL_FAM6_ATOM_SILVERMONT2:
2183     + case INTEL_FAM6_ATOM_SILVERMONT:
2184     + case INTEL_FAM6_ATOM_SILVERMONT_X:
2185     case INTEL_FAM6_ATOM_AIRMONT:
2186    
2187     case INTEL_FAM6_ATOM_GOLDMONT:
2188     - case INTEL_FAM6_ATOM_DENVERTON:
2189     + case INTEL_FAM6_ATOM_GOLDMONT_X:
2190    
2191     - case INTEL_FAM6_ATOM_GEMINI_LAKE:
2192     + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
2193    
2194     case INTEL_FAM6_XEON_PHI_KNL:
2195     case INTEL_FAM6_XEON_PHI_KNM:
2196     diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
2197     index 7b31ee5223fc..69037da75ea0 100644
2198     --- a/arch/x86/include/asm/cpufeatures.h
2199     +++ b/arch/x86/include/asm/cpufeatures.h
2200     @@ -341,6 +341,7 @@
2201     #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
2202     #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
2203     #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
2204     +#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
2205     #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
2206     #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
2207     #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
2208     @@ -378,5 +379,7 @@
2209     #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
2210     #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
2211     #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
2212     +#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
2213     +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
2214    
2215     #endif /* _ASM_X86_CPUFEATURES_H */
2216     diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
2217     index 0ad25cc895ae..058b1a1994c4 100644
2218     --- a/arch/x86/include/asm/intel-family.h
2219     +++ b/arch/x86/include/asm/intel-family.h
2220     @@ -8,9 +8,6 @@
2221     * The "_X" parts are generally the EP and EX Xeons, or the
2222     * "Extreme" ones, like Broadwell-E.
2223     *
2224     - * Things ending in "2" are usually because we have no better
2225     - * name for them. There's no processor called "SILVERMONT2".
2226     - *
2227     * While adding a new CPUID for a new microarchitecture, add a new
2228     * group to keep logically sorted out in chronological order. Within
2229     * that group keep the CPUID for the variants sorted by model number.
2230     @@ -59,19 +56,23 @@
2231    
2232     /* "Small Core" Processors (Atom) */
2233    
2234     -#define INTEL_FAM6_ATOM_PINEVIEW 0x1C
2235     -#define INTEL_FAM6_ATOM_LINCROFT 0x26
2236     -#define INTEL_FAM6_ATOM_PENWELL 0x27
2237     -#define INTEL_FAM6_ATOM_CLOVERVIEW 0x35
2238     -#define INTEL_FAM6_ATOM_CEDARVIEW 0x36
2239     -#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */
2240     -#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */
2241     -#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */
2242     -#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */
2243     -#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */
2244     -#define INTEL_FAM6_ATOM_GOLDMONT 0x5C
2245     -#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */
2246     -#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A
2247     +#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
2248     +#define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */
2249     +
2250     +#define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */
2251     +#define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */
2252     +#define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */
2253     +
2254     +#define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */
2255     +#define INTEL_FAM6_ATOM_SILVERMONT_X 0x4D /* Avaton, Rangely */
2256     +#define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */
2257     +
2258     +#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */
2259     +#define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */
2260     +
2261     +#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
2262     +#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */
2263     +#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
2264    
2265     /* Xeon Phi */
2266    
2267     diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
2268     index 15450a675031..c99c66b41e53 100644
2269     --- a/arch/x86/include/asm/irqflags.h
2270     +++ b/arch/x86/include/asm/irqflags.h
2271     @@ -6,6 +6,8 @@
2272    
2273     #ifndef __ASSEMBLY__
2274    
2275     +#include <asm/nospec-branch.h>
2276     +
2277     /* Provide __cpuidle; we can't safely include <linux/cpu.h> */
2278     #define __cpuidle __attribute__((__section__(".cpuidle.text")))
2279    
2280     @@ -54,11 +56,13 @@ static inline void native_irq_enable(void)
2281    
2282     static inline __cpuidle void native_safe_halt(void)
2283     {
2284     + mds_idle_clear_cpu_buffers();
2285     asm volatile("sti; hlt": : :"memory");
2286     }
2287    
2288     static inline __cpuidle void native_halt(void)
2289     {
2290     + mds_idle_clear_cpu_buffers();
2291     asm volatile("hlt": : :"memory");
2292     }
2293    
2294     diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
2295     index f14ca0be1e3f..f85f43db9225 100644
2296     --- a/arch/x86/include/asm/msr-index.h
2297     +++ b/arch/x86/include/asm/msr-index.h
2298     @@ -2,6 +2,8 @@
2299     #ifndef _ASM_X86_MSR_INDEX_H
2300     #define _ASM_X86_MSR_INDEX_H
2301    
2302     +#include <linux/bits.h>
2303     +
2304     /*
2305     * CPU model specific register (MSR) numbers.
2306     *
2307     @@ -40,14 +42,14 @@
2308     /* Intel MSRs. Some also available on other CPUs */
2309    
2310     #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
2311     -#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
2312     +#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */
2313     #define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */
2314     -#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
2315     +#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
2316     #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
2317     -#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
2318     +#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
2319    
2320     #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
2321     -#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
2322     +#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
2323    
2324     #define MSR_PPIN_CTL 0x0000004e
2325     #define MSR_PPIN 0x0000004f
2326     @@ -69,20 +71,25 @@
2327     #define MSR_MTRRcap 0x000000fe
2328    
2329     #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
2330     -#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
2331     -#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
2332     -#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH (1 << 3) /* Skip L1D flush on vmentry */
2333     -#define ARCH_CAP_SSB_NO (1 << 4) /*
2334     - * Not susceptible to Speculative Store Bypass
2335     - * attack, so no Speculative Store Bypass
2336     - * control required.
2337     - */
2338     +#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
2339     +#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
2340     +#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
2341     +#define ARCH_CAP_SSB_NO BIT(4) /*
2342     + * Not susceptible to Speculative Store Bypass
2343     + * attack, so no Speculative Store Bypass
2344     + * control required.
2345     + */
2346     +#define ARCH_CAP_MDS_NO BIT(5) /*
2347     + * Not susceptible to
2348     + * Microarchitectural Data
2349     + * Sampling (MDS) vulnerabilities.
2350     + */
2351    
2352     #define MSR_IA32_FLUSH_CMD 0x0000010b
2353     -#define L1D_FLUSH (1 << 0) /*
2354     - * Writeback and invalidate the
2355     - * L1 data cache.
2356     - */
2357     +#define L1D_FLUSH BIT(0) /*
2358     + * Writeback and invalidate the
2359     + * L1 data cache.
2360     + */
2361    
2362     #define MSR_IA32_BBL_CR_CTL 0x00000119
2363     #define MSR_IA32_BBL_CR_CTL3 0x0000011e
2364     diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
2365     index 39a2fb29378a..eb0f80ce8524 100644
2366     --- a/arch/x86/include/asm/mwait.h
2367     +++ b/arch/x86/include/asm/mwait.h
2368     @@ -6,6 +6,7 @@
2369     #include <linux/sched/idle.h>
2370    
2371     #include <asm/cpufeature.h>
2372     +#include <asm/nospec-branch.h>
2373    
2374     #define MWAIT_SUBSTATE_MASK 0xf
2375     #define MWAIT_CSTATE_MASK 0xf
2376     @@ -40,6 +41,8 @@ static inline void __monitorx(const void *eax, unsigned long ecx,
2377    
2378     static inline void __mwait(unsigned long eax, unsigned long ecx)
2379     {
2380     + mds_idle_clear_cpu_buffers();
2381     +
2382     /* "mwait %eax, %ecx;" */
2383     asm volatile(".byte 0x0f, 0x01, 0xc9;"
2384     :: "a" (eax), "c" (ecx));
2385     @@ -74,6 +77,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
2386     static inline void __mwaitx(unsigned long eax, unsigned long ebx,
2387     unsigned long ecx)
2388     {
2389     + /* No MDS buffer clear as this is AMD/HYGON only */
2390     +
2391     /* "mwaitx %eax, %ebx, %ecx;" */
2392     asm volatile(".byte 0x0f, 0x01, 0xfb;"
2393     :: "a" (eax), "b" (ebx), "c" (ecx));
2394     @@ -81,6 +86,8 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx,
2395    
2396     static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
2397     {
2398     + mds_idle_clear_cpu_buffers();
2399     +
2400     trace_hardirqs_on();
2401     /* "mwait %eax, %ecx;" */
2402     asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
2403     diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
2404     index 032b6009baab..599c273f5d00 100644
2405     --- a/arch/x86/include/asm/nospec-branch.h
2406     +++ b/arch/x86/include/asm/nospec-branch.h
2407     @@ -317,6 +317,56 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
2408     DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
2409     DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
2410    
2411     +DECLARE_STATIC_KEY_FALSE(mds_user_clear);
2412     +DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
2413     +
2414     +#include <asm/segment.h>
2415     +
2416     +/**
2417     + * mds_clear_cpu_buffers - Mitigation for MDS vulnerability
2418     + *
2419     + * This uses the otherwise unused and obsolete VERW instruction in
2420     + * combination with microcode which triggers a CPU buffer flush when the
2421     + * instruction is executed.
2422     + */
2423     +static inline void mds_clear_cpu_buffers(void)
2424     +{
2425     + static const u16 ds = __KERNEL_DS;
2426     +
2427     + /*
2428     + * Has to be the memory-operand variant because only that
2429     + * guarantees the CPU buffer flush functionality according to
2430     + * documentation. The register-operand variant does not.
2431     + * Works with any segment selector, but a valid writable
2432     + * data segment is the fastest variant.
2433     + *
2434     + * "cc" clobber is required because VERW modifies ZF.
2435     + */
2436     + asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
2437     +}
2438     +
2439     +/**
2440     + * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability
2441     + *
2442     + * Clear CPU buffers if the corresponding static key is enabled
2443     + */
2444     +static inline void mds_user_clear_cpu_buffers(void)
2445     +{
2446     + if (static_branch_likely(&mds_user_clear))
2447     + mds_clear_cpu_buffers();
2448     +}
2449     +
2450     +/**
2451     + * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
2452     + *
2453     + * Clear CPU buffers if the corresponding static key is enabled
2454     + */
2455     +static inline void mds_idle_clear_cpu_buffers(void)
2456     +{
2457     + if (static_branch_likely(&mds_idle_clear))
2458     + mds_clear_cpu_buffers();
2459     +}
2460     +
2461     #endif /* __ASSEMBLY__ */
2462    
2463     /*
2464     diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
2465     index d53c54b842da..b54f25697beb 100644
2466     --- a/arch/x86/include/asm/processor.h
2467     +++ b/arch/x86/include/asm/processor.h
2468     @@ -997,4 +997,10 @@ enum l1tf_mitigations {
2469    
2470     extern enum l1tf_mitigations l1tf_mitigation;
2471    
2472     +enum mds_mitigations {
2473     + MDS_MITIGATION_OFF,
2474     + MDS_MITIGATION_FULL,
2475     + MDS_MITIGATION_VMWERV,
2476     +};
2477     +
2478     #endif /* _ASM_X86_PROCESSOR_H */
2479     diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
2480     index e5258bd64200..9b096f26d1c8 100644
2481     --- a/arch/x86/kernel/cpu/bugs.c
2482     +++ b/arch/x86/kernel/cpu/bugs.c
2483     @@ -35,6 +35,7 @@
2484     static void __init spectre_v2_select_mitigation(void);
2485     static void __init ssb_select_mitigation(void);
2486     static void __init l1tf_select_mitigation(void);
2487     +static void __init mds_select_mitigation(void);
2488    
2489     /* The base value of the SPEC_CTRL MSR that always has to be preserved. */
2490     u64 x86_spec_ctrl_base;
2491     @@ -61,6 +62,13 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
2492     /* Control unconditional IBPB in switch_mm() */
2493     DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
2494    
2495     +/* Control MDS CPU buffer clear before returning to user space */
2496     +DEFINE_STATIC_KEY_FALSE(mds_user_clear);
2497     +EXPORT_SYMBOL_GPL(mds_user_clear);
2498     +/* Control MDS CPU buffer clear before idling (halt, mwait) */
2499     +DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
2500     +EXPORT_SYMBOL_GPL(mds_idle_clear);
2501     +
2502     void __init check_bugs(void)
2503     {
2504     identify_boot_cpu();
2505     @@ -99,6 +107,10 @@ void __init check_bugs(void)
2506    
2507     l1tf_select_mitigation();
2508    
2509     + mds_select_mitigation();
2510     +
2511     + arch_smt_update();
2512     +
2513     #ifdef CONFIG_X86_32
2514     /*
2515     * Check whether we are able to run this kernel safely on SMP.
2516     @@ -204,6 +216,61 @@ static void x86_amd_ssb_disable(void)
2517     wrmsrl(MSR_AMD64_LS_CFG, msrval);
2518     }
2519    
2520     +#undef pr_fmt
2521     +#define pr_fmt(fmt) "MDS: " fmt
2522     +
2523     +/* Default mitigation for MDS-affected CPUs */
2524     +static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL;
2525     +static bool mds_nosmt __ro_after_init = false;
2526     +
2527     +static const char * const mds_strings[] = {
2528     + [MDS_MITIGATION_OFF] = "Vulnerable",
2529     + [MDS_MITIGATION_FULL] = "Mitigation: Clear CPU buffers",
2530     + [MDS_MITIGATION_VMWERV] = "Vulnerable: Clear CPU buffers attempted, no microcode",
2531     +};
2532     +
2533     +static void __init mds_select_mitigation(void)
2534     +{
2535     + if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) {
2536     + mds_mitigation = MDS_MITIGATION_OFF;
2537     + return;
2538     + }
2539     +
2540     + if (mds_mitigation == MDS_MITIGATION_FULL) {
2541     + if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
2542     + mds_mitigation = MDS_MITIGATION_VMWERV;
2543     +
2544     + static_branch_enable(&mds_user_clear);
2545     +
2546     + if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) &&
2547     + (mds_nosmt || cpu_mitigations_auto_nosmt()))
2548     + cpu_smt_disable(false);
2549     + }
2550     +
2551     + pr_info("%s\n", mds_strings[mds_mitigation]);
2552     +}
2553     +
2554     +static int __init mds_cmdline(char *str)
2555     +{
2556     + if (!boot_cpu_has_bug(X86_BUG_MDS))
2557     + return 0;
2558     +
2559     + if (!str)
2560     + return -EINVAL;
2561     +
2562     + if (!strcmp(str, "off"))
2563     + mds_mitigation = MDS_MITIGATION_OFF;
2564     + else if (!strcmp(str, "full"))
2565     + mds_mitigation = MDS_MITIGATION_FULL;
2566     + else if (!strcmp(str, "full,nosmt")) {
2567     + mds_mitigation = MDS_MITIGATION_FULL;
2568     + mds_nosmt = true;
2569     + }
2570     +
2571     + return 0;
2572     +}
2573     +early_param("mds", mds_cmdline);
2574     +
2575     #undef pr_fmt
2576     #define pr_fmt(fmt) "Spectre V2 : " fmt
2577    
2578     @@ -428,7 +495,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
2579     char arg[20];
2580     int ret, i;
2581    
2582     - if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
2583     + if (cmdline_find_option_bool(boot_command_line, "nospectre_v2") ||
2584     + cpu_mitigations_off())
2585     return SPECTRE_V2_CMD_NONE;
2586    
2587     ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
2588     @@ -560,9 +628,6 @@ specv2_set_mode:
2589    
2590     /* Set up IBPB and STIBP depending on the general spectre V2 command */
2591     spectre_v2_user_select_mitigation(cmd);
2592     -
2593     - /* Enable STIBP if appropriate */
2594     - arch_smt_update();
2595     }
2596    
2597     static void update_stibp_msr(void * __unused)
2598     @@ -596,6 +661,31 @@ static void update_indir_branch_cond(void)
2599     static_branch_disable(&switch_to_cond_stibp);
2600     }
2601    
2602     +#undef pr_fmt
2603     +#define pr_fmt(fmt) fmt
2604     +
2605     +/* Update the static key controlling the MDS CPU buffer clear in idle */
2606     +static void update_mds_branch_idle(void)
2607     +{
2608     + /*
2609     + * Enable the idle clearing if SMT is active on CPUs which are
2610     + * affected only by MSBDS and not any other MDS variant.
2611     + *
2612     + * The other variants cannot be mitigated when SMT is enabled, so
2613     + * clearing the buffers on idle just to prevent the Store Buffer
2614     + * repartitioning leak would be a window dressing exercise.
2615     + */
2616     + if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY))
2617     + return;
2618     +
2619     + if (sched_smt_active())
2620     + static_branch_enable(&mds_idle_clear);
2621     + else
2622     + static_branch_disable(&mds_idle_clear);
2623     +}
2624     +
2625     +#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
2626     +
2627     void arch_smt_update(void)
2628     {
2629     /* Enhanced IBRS implies STIBP. No update required. */
2630     @@ -616,6 +706,17 @@ void arch_smt_update(void)
2631     break;
2632     }
2633    
2634     + switch (mds_mitigation) {
2635     + case MDS_MITIGATION_FULL:
2636     + case MDS_MITIGATION_VMWERV:
2637     + if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY))
2638     + pr_warn_once(MDS_MSG_SMT);
2639     + update_mds_branch_idle();
2640     + break;
2641     + case MDS_MITIGATION_OFF:
2642     + break;
2643     + }
2644     +
2645     mutex_unlock(&spec_ctrl_mutex);
2646     }
2647    
2648     @@ -657,7 +758,8 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
2649     char arg[20];
2650     int ret, i;
2651    
2652     - if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) {
2653     + if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable") ||
2654     + cpu_mitigations_off()) {
2655     return SPEC_STORE_BYPASS_CMD_NONE;
2656     } else {
2657     ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
2658     @@ -978,6 +1080,11 @@ static void __init l1tf_select_mitigation(void)
2659     if (!boot_cpu_has_bug(X86_BUG_L1TF))
2660     return;
2661    
2662     + if (cpu_mitigations_off())
2663     + l1tf_mitigation = L1TF_MITIGATION_OFF;
2664     + else if (cpu_mitigations_auto_nosmt())
2665     + l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT;
2666     +
2667     override_cache_bits(&boot_cpu_data);
2668    
2669     switch (l1tf_mitigation) {
2670     @@ -1006,7 +1113,7 @@ static void __init l1tf_select_mitigation(void)
2671     pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n",
2672     half_pa);
2673     pr_info("However, doing so will make a part of your RAM unusable.\n");
2674     - pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html might help you decide.\n");
2675     + pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html might help you decide.\n");
2676     return;
2677     }
2678    
2679     @@ -1039,6 +1146,7 @@ static int __init l1tf_cmdline(char *str)
2680     early_param("l1tf", l1tf_cmdline);
2681    
2682     #undef pr_fmt
2683     +#define pr_fmt(fmt) fmt
2684    
2685     #ifdef CONFIG_SYSFS
2686    
2687     @@ -1077,6 +1185,23 @@ static ssize_t l1tf_show_state(char *buf)
2688     }
2689     #endif
2690    
2691     +static ssize_t mds_show_state(char *buf)
2692     +{
2693     + if (!hypervisor_is_type(X86_HYPER_NATIVE)) {
2694     + return sprintf(buf, "%s; SMT Host state unknown\n",
2695     + mds_strings[mds_mitigation]);
2696     + }
2697     +
2698     + if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) {
2699     + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation],
2700     + (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" :
2701     + sched_smt_active() ? "mitigated" : "disabled"));
2702     + }
2703     +
2704     + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation],
2705     + sched_smt_active() ? "vulnerable" : "disabled");
2706     +}
2707     +
2708     static char *stibp_state(void)
2709     {
2710     if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
2711     @@ -1141,6 +1266,10 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
2712     if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV))
2713     return l1tf_show_state(buf);
2714     break;
2715     +
2716     + case X86_BUG_MDS:
2717     + return mds_show_state(buf);
2718     +
2719     default:
2720     break;
2721     }
2722     @@ -1172,4 +1301,9 @@ ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *b
2723     {
2724     return cpu_show_common(dev, attr, buf, X86_BUG_L1TF);
2725     }
2726     +
2727     +ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf)
2728     +{
2729     + return cpu_show_common(dev, attr, buf, X86_BUG_MDS);
2730     +}
2731     #endif
2732     diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
2733     index 44c4ef3d989b..1073118b9bf0 100644
2734     --- a/arch/x86/kernel/cpu/common.c
2735     +++ b/arch/x86/kernel/cpu/common.c
2736     @@ -948,60 +948,73 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
2737     #endif
2738     }
2739    
2740     -static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
2741     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY },
2742     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY },
2743     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY },
2744     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY },
2745     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY },
2746     - { X86_VENDOR_CENTAUR, 5 },
2747     - { X86_VENDOR_INTEL, 5 },
2748     - { X86_VENDOR_NSC, 5 },
2749     - { X86_VENDOR_ANY, 4 },
2750     +#define NO_SPECULATION BIT(0)
2751     +#define NO_MELTDOWN BIT(1)
2752     +#define NO_SSB BIT(2)
2753     +#define NO_L1TF BIT(3)
2754     +#define NO_MDS BIT(4)
2755     +#define MSBDS_ONLY BIT(5)
2756     +
2757     +#define VULNWL(_vendor, _family, _model, _whitelist) \
2758     + { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
2759     +
2760     +#define VULNWL_INTEL(model, whitelist) \
2761     + VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist)
2762     +
2763     +#define VULNWL_AMD(family, whitelist) \
2764     + VULNWL(AMD, family, X86_MODEL_ANY, whitelist)
2765     +
2766     +static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
2767     + VULNWL(ANY, 4, X86_MODEL_ANY, NO_SPECULATION),
2768     + VULNWL(CENTAUR, 5, X86_MODEL_ANY, NO_SPECULATION),
2769     + VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION),
2770     + VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION),
2771     +
2772     + /* Intel Family 6 */
2773     + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION),
2774     + VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION),
2775     + VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION),
2776     + VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION),
2777     + VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION),
2778     +
2779     + VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY),
2780     + VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY),
2781     + VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY),
2782     + VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY),
2783     + VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY),
2784     + VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY),
2785     +
2786     + VULNWL_INTEL(CORE_YONAH, NO_SSB),
2787     +
2788     + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY),
2789     +
2790     + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF),
2791     + VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF),
2792     + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF),
2793     +
2794     + /* AMD Family 0xf - 0x12 */
2795     + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
2796     + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
2797     + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
2798     + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
2799     +
2800     + /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
2801     + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS),
2802     {}
2803     };
2804    
2805     -static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
2806     - { X86_VENDOR_AMD },
2807     - {}
2808     -};
2809     -
2810     -/* Only list CPUs which speculate but are non susceptible to SSB */
2811     -static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
2812     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
2813     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
2814     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
2815     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
2816     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH },
2817     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
2818     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
2819     - { X86_VENDOR_AMD, 0x12, },
2820     - { X86_VENDOR_AMD, 0x11, },
2821     - { X86_VENDOR_AMD, 0x10, },
2822     - { X86_VENDOR_AMD, 0xf, },
2823     - {}
2824     -};
2825     +static bool __init cpu_matches(unsigned long which)
2826     +{
2827     + const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist);
2828    
2829     -static const __initconst struct x86_cpu_id cpu_no_l1tf[] = {
2830     - /* in addition to cpu_no_speculation */
2831     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
2832     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
2833     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
2834     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
2835     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MOOREFIELD },
2836     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT },
2837     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON },
2838     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GEMINI_LAKE },
2839     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
2840     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
2841     - {}
2842     -};
2843     + return m && !!(m->driver_data & which);
2844     +}
2845    
2846     static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
2847     {
2848     u64 ia32_cap = 0;
2849    
2850     - if (x86_match_cpu(cpu_no_speculation))
2851     + if (cpu_matches(NO_SPECULATION))
2852     return;
2853    
2854     setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
2855     @@ -1010,15 +1023,20 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
2856     if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
2857     rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
2858    
2859     - if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
2860     - !(ia32_cap & ARCH_CAP_SSB_NO) &&
2861     + if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) &&
2862     !cpu_has(c, X86_FEATURE_AMD_SSB_NO))
2863     setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
2864    
2865     if (ia32_cap & ARCH_CAP_IBRS_ALL)
2866     setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
2867    
2868     - if (x86_match_cpu(cpu_no_meltdown))
2869     + if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) {
2870     + setup_force_cpu_bug(X86_BUG_MDS);
2871     + if (cpu_matches(MSBDS_ONLY))
2872     + setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
2873     + }
2874     +
2875     + if (cpu_matches(NO_MELTDOWN))
2876     return;
2877    
2878     /* Rogue Data Cache Load? No! */
2879     @@ -1027,7 +1045,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
2880    
2881     setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
2882    
2883     - if (x86_match_cpu(cpu_no_l1tf))
2884     + if (cpu_matches(NO_L1TF))
2885     return;
2886    
2887     setup_force_cpu_bug(X86_BUG_L1TF);
2888     diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
2889     index f8c260d522ca..912d53939f4f 100644
2890     --- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
2891     +++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
2892     @@ -91,7 +91,7 @@ static u64 get_prefetch_disable_bits(void)
2893     */
2894     return 0xF;
2895     case INTEL_FAM6_ATOM_GOLDMONT:
2896     - case INTEL_FAM6_ATOM_GEMINI_LAKE:
2897     + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
2898     /*
2899     * SDM defines bits of MSR_MISC_FEATURE_CONTROL register
2900     * as:
2901     @@ -995,7 +995,7 @@ static int measure_cycles_perf_fn(void *_plr)
2902    
2903     switch (boot_cpu_data.x86_model) {
2904     case INTEL_FAM6_ATOM_GOLDMONT:
2905     - case INTEL_FAM6_ATOM_GEMINI_LAKE:
2906     + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
2907     l2_hit_bits = (0x52ULL << 16) | (0x2 << 8) | 0xd1;
2908     l2_miss_bits = (0x52ULL << 16) | (0x10 << 8) | 0xd1;
2909     break;
2910     diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
2911     index 18bc9b51ac9b..086cf1d1d71d 100644
2912     --- a/arch/x86/kernel/nmi.c
2913     +++ b/arch/x86/kernel/nmi.c
2914     @@ -34,6 +34,7 @@
2915     #include <asm/x86_init.h>
2916     #include <asm/reboot.h>
2917     #include <asm/cache.h>
2918     +#include <asm/nospec-branch.h>
2919    
2920     #define CREATE_TRACE_POINTS
2921     #include <trace/events/nmi.h>
2922     @@ -533,6 +534,9 @@ nmi_restart:
2923     write_cr2(this_cpu_read(nmi_cr2));
2924     if (this_cpu_dec_return(nmi_state))
2925     goto nmi_restart;
2926     +
2927     + if (user_mode(regs))
2928     + mds_user_clear_cpu_buffers();
2929     }
2930     NOKPROBE_SYMBOL(do_nmi);
2931    
2932     diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
2933     index e6db475164ed..0a5efd764914 100644
2934     --- a/arch/x86/kernel/traps.c
2935     +++ b/arch/x86/kernel/traps.c
2936     @@ -58,6 +58,7 @@
2937     #include <asm/alternative.h>
2938     #include <asm/fpu/xstate.h>
2939     #include <asm/trace/mpx.h>
2940     +#include <asm/nospec-branch.h>
2941     #include <asm/mpx.h>
2942     #include <asm/vm86.h>
2943     #include <asm/umip.h>
2944     @@ -387,6 +388,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
2945     regs->ip = (unsigned long)general_protection;
2946     regs->sp = (unsigned long)&gpregs->orig_ax;
2947    
2948     + /*
2949     + * This situation can be triggered by userspace via
2950     + * modify_ldt(2) and the return does not take the regular
2951     + * user space exit, so a CPU buffer clear is required when
2952     + * MDS mitigation is enabled.
2953     + */
2954     + mds_user_clear_cpu_buffers();
2955     return;
2956     }
2957     #endif
2958     diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
2959     index 6d5dc5dabfd7..03b7529333a6 100644
2960     --- a/arch/x86/kernel/tsc.c
2961     +++ b/arch/x86/kernel/tsc.c
2962     @@ -636,7 +636,7 @@ unsigned long native_calibrate_tsc(void)
2963     case INTEL_FAM6_KABYLAKE_DESKTOP:
2964     crystal_khz = 24000; /* 24.0 MHz */
2965     break;
2966     - case INTEL_FAM6_ATOM_DENVERTON:
2967     + case INTEL_FAM6_ATOM_GOLDMONT_X:
2968     crystal_khz = 25000; /* 25.0 MHz */
2969     break;
2970     case INTEL_FAM6_ATOM_GOLDMONT:
2971     diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
2972     index 27ef714d886c..3d0e9aeea7c8 100644
2973     --- a/arch/x86/kernel/tsc_msr.c
2974     +++ b/arch/x86/kernel/tsc_msr.c
2975     @@ -59,12 +59,12 @@ static const struct freq_desc freq_desc_ann = {
2976     };
2977    
2978     static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
2979     - INTEL_CPU_FAM6(ATOM_PENWELL, freq_desc_pnw),
2980     - INTEL_CPU_FAM6(ATOM_CLOVERVIEW, freq_desc_clv),
2981     - INTEL_CPU_FAM6(ATOM_SILVERMONT1, freq_desc_byt),
2982     + INTEL_CPU_FAM6(ATOM_SALTWELL_MID, freq_desc_pnw),
2983     + INTEL_CPU_FAM6(ATOM_SALTWELL_TABLET, freq_desc_clv),
2984     + INTEL_CPU_FAM6(ATOM_SILVERMONT, freq_desc_byt),
2985     + INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, freq_desc_tng),
2986     INTEL_CPU_FAM6(ATOM_AIRMONT, freq_desc_cht),
2987     - INTEL_CPU_FAM6(ATOM_MERRIFIELD, freq_desc_tng),
2988     - INTEL_CPU_FAM6(ATOM_MOOREFIELD, freq_desc_ann),
2989     + INTEL_CPU_FAM6(ATOM_AIRMONT_MID, freq_desc_ann),
2990     {}
2991     };
2992    
2993     diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
2994     index 98d13c6a64be..b810102a9cfa 100644
2995     --- a/arch/x86/kvm/cpuid.c
2996     +++ b/arch/x86/kvm/cpuid.c
2997     @@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
2998     /* cpuid 0x80000008.ebx */
2999     const u32 kvm_cpuid_8000_0008_ebx_x86_features =
3000     F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
3001     - F(AMD_SSB_NO);
3002     + F(AMD_SSB_NO) | F(AMD_STIBP);
3003    
3004     /* cpuid 0xC0000001.edx */
3005     const u32 kvm_cpuid_C000_0001_edx_x86_features =
3006     @@ -412,7 +412,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
3007     /* cpuid 7.0.edx*/
3008     const u32 kvm_cpuid_7_0_edx_x86_features =
3009     F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
3010     - F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES);
3011     + F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
3012     + F(MD_CLEAR);
3013    
3014     /* all calls to cpuid_count() should be made on the same cpu */
3015     get_cpu();
3016     diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
3017     index 215339c7d161..73d6d585dd66 100644
3018     --- a/arch/x86/kvm/vmx.c
3019     +++ b/arch/x86/kvm/vmx.c
3020     @@ -10765,8 +10765,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
3021     evmcs_rsp = static_branch_unlikely(&enable_evmcs) ?
3022     (unsigned long)&current_evmcs->host_rsp : 0;
3023    
3024     + /* L1D Flush includes CPU buffer clear to mitigate MDS */
3025     if (static_branch_unlikely(&vmx_l1d_should_flush))
3026     vmx_l1d_flush(vcpu);
3027     + else if (static_branch_unlikely(&mds_user_clear))
3028     + mds_clear_cpu_buffers();
3029    
3030     asm(
3031     /* Store host registers */
3032     @@ -11127,8 +11130,8 @@ free_vcpu:
3033     return ERR_PTR(err);
3034     }
3035    
3036     -#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n"
3037     -#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n"
3038     +#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
3039     +#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
3040    
3041     static int vmx_vm_init(struct kvm *kvm)
3042     {
3043     diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
3044     index c1fc1ae6b429..4df3e5c89d57 100644
3045     --- a/arch/x86/mm/pti.c
3046     +++ b/arch/x86/mm/pti.c
3047     @@ -35,6 +35,7 @@
3048     #include <linux/spinlock.h>
3049     #include <linux/mm.h>
3050     #include <linux/uaccess.h>
3051     +#include <linux/cpu.h>
3052    
3053     #include <asm/cpufeature.h>
3054     #include <asm/hypervisor.h>
3055     @@ -115,7 +116,8 @@ void __init pti_check_boottime_disable(void)
3056     }
3057     }
3058    
3059     - if (cmdline_find_option_bool(boot_command_line, "nopti")) {
3060     + if (cmdline_find_option_bool(boot_command_line, "nopti") ||
3061     + cpu_mitigations_off()) {
3062     pti_mode = PTI_FORCE_OFF;
3063     pti_print_if_insecure("disabled on command line.");
3064     return;
3065     diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c
3066     index 034813d4ab1e..41dae0f0d898 100644
3067     --- a/arch/x86/platform/atom/punit_atom_debug.c
3068     +++ b/arch/x86/platform/atom/punit_atom_debug.c
3069     @@ -143,8 +143,8 @@ static void punit_dbgfs_unregister(void)
3070     (kernel_ulong_t)&drv_data }
3071    
3072     static const struct x86_cpu_id intel_punit_cpu_ids[] = {
3073     - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, punit_device_byt),
3074     - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, punit_device_tng),
3075     + ICPU(INTEL_FAM6_ATOM_SILVERMONT, punit_device_byt),
3076     + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, punit_device_tng),
3077     ICPU(INTEL_FAM6_ATOM_AIRMONT, punit_device_cht),
3078     {}
3079     };
3080     diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
3081     index 5a0483e7bf66..31dce781364c 100644
3082     --- a/arch/x86/platform/intel-mid/device_libs/platform_bt.c
3083     +++ b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
3084     @@ -68,7 +68,7 @@ static struct bt_sfi_data tng_bt_sfi_data __initdata = {
3085     { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (kernel_ulong_t)&ddata }
3086    
3087     static const struct x86_cpu_id bt_sfi_cpu_ids[] = {
3088     - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, tng_bt_sfi_data),
3089     + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, tng_bt_sfi_data),
3090     {}
3091     };
3092    
3093     diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
3094     index 969bf8d515c0..c651e206d796 100644
3095     --- a/drivers/acpi/acpi_lpss.c
3096     +++ b/drivers/acpi/acpi_lpss.c
3097     @@ -292,7 +292,7 @@ static const struct lpss_device_desc bsw_spi_dev_desc = {
3098     #define ICPU(model) { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, }
3099    
3100     static const struct x86_cpu_id lpss_cpu_ids[] = {
3101     - ICPU(INTEL_FAM6_ATOM_SILVERMONT1), /* Valleyview, Bay Trail */
3102     + ICPU(INTEL_FAM6_ATOM_SILVERMONT), /* Valleyview, Bay Trail */
3103     ICPU(INTEL_FAM6_ATOM_AIRMONT), /* Braswell, Cherry Trail */
3104     {}
3105     };
3106     diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
3107     index 06c31ec3cc70..9a8e286dd86f 100644
3108     --- a/drivers/acpi/x86/utils.c
3109     +++ b/drivers/acpi/x86/utils.c
3110     @@ -54,7 +54,7 @@ static const struct always_present_id always_present_ids[] = {
3111     * Bay / Cherry Trail PWM directly poked by GPU driver in win10,
3112     * but Linux uses a separate PWM driver, harmless if not used.
3113     */
3114     - ENTRY("80860F09", "1", ICPU(INTEL_FAM6_ATOM_SILVERMONT1), {}),
3115     + ENTRY("80860F09", "1", ICPU(INTEL_FAM6_ATOM_SILVERMONT), {}),
3116     ENTRY("80862288", "1", ICPU(INTEL_FAM6_ATOM_AIRMONT), {}),
3117     /*
3118     * The INT0002 device is necessary to clear wakeup interrupt sources
3119     diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
3120     index eb9443d5bae1..2fd6ca1021c2 100644
3121     --- a/drivers/base/cpu.c
3122     +++ b/drivers/base/cpu.c
3123     @@ -546,11 +546,18 @@ ssize_t __weak cpu_show_l1tf(struct device *dev,
3124     return sprintf(buf, "Not affected\n");
3125     }
3126    
3127     +ssize_t __weak cpu_show_mds(struct device *dev,
3128     + struct device_attribute *attr, char *buf)
3129     +{
3130     + return sprintf(buf, "Not affected\n");
3131     +}
3132     +
3133     static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
3134     static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
3135     static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
3136     static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
3137     static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
3138     +static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
3139    
3140     static struct attribute *cpu_root_vulnerabilities_attrs[] = {
3141     &dev_attr_meltdown.attr,
3142     @@ -558,6 +565,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
3143     &dev_attr_spectre_v2.attr,
3144     &dev_attr_spec_store_bypass.attr,
3145     &dev_attr_l1tf.attr,
3146     + &dev_attr_mds.attr,
3147     NULL
3148     };
3149    
3150     diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
3151     index a005711f909e..29f25d5d65e0 100644
3152     --- a/drivers/cpufreq/intel_pstate.c
3153     +++ b/drivers/cpufreq/intel_pstate.c
3154     @@ -1779,7 +1779,7 @@ static const struct pstate_funcs knl_funcs = {
3155     static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
3156     ICPU(INTEL_FAM6_SANDYBRIDGE, core_funcs),
3157     ICPU(INTEL_FAM6_SANDYBRIDGE_X, core_funcs),
3158     - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, silvermont_funcs),
3159     + ICPU(INTEL_FAM6_ATOM_SILVERMONT, silvermont_funcs),
3160     ICPU(INTEL_FAM6_IVYBRIDGE, core_funcs),
3161     ICPU(INTEL_FAM6_HASWELL_CORE, core_funcs),
3162     ICPU(INTEL_FAM6_BROADWELL_CORE, core_funcs),
3163     @@ -1796,7 +1796,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
3164     ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_funcs),
3165     ICPU(INTEL_FAM6_XEON_PHI_KNM, knl_funcs),
3166     ICPU(INTEL_FAM6_ATOM_GOLDMONT, core_funcs),
3167     - ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, core_funcs),
3168     + ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, core_funcs),
3169     ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs),
3170     {}
3171     };
3172     diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c
3173     index df28b65358d2..903a4f1fadcc 100644
3174     --- a/drivers/edac/pnd2_edac.c
3175     +++ b/drivers/edac/pnd2_edac.c
3176     @@ -1541,7 +1541,7 @@ static struct dunit_ops dnv_ops = {
3177    
3178     static const struct x86_cpu_id pnd2_cpuids[] = {
3179     { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT, 0, (kernel_ulong_t)&apl_ops },
3180     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON, 0, (kernel_ulong_t)&dnv_ops },
3181     + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_X, 0, (kernel_ulong_t)&dnv_ops },
3182     { }
3183     };
3184     MODULE_DEVICE_TABLE(x86cpu, pnd2_cpuids);
3185     diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
3186     index b2ccce5fb071..c4bb67ed8da3 100644
3187     --- a/drivers/idle/intel_idle.c
3188     +++ b/drivers/idle/intel_idle.c
3189     @@ -1076,14 +1076,14 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
3190     ICPU(INTEL_FAM6_WESTMERE, idle_cpu_nehalem),
3191     ICPU(INTEL_FAM6_WESTMERE_EP, idle_cpu_nehalem),
3192     ICPU(INTEL_FAM6_NEHALEM_EX, idle_cpu_nehalem),
3193     - ICPU(INTEL_FAM6_ATOM_PINEVIEW, idle_cpu_atom),
3194     - ICPU(INTEL_FAM6_ATOM_LINCROFT, idle_cpu_lincroft),
3195     + ICPU(INTEL_FAM6_ATOM_BONNELL, idle_cpu_atom),
3196     + ICPU(INTEL_FAM6_ATOM_BONNELL_MID, idle_cpu_lincroft),
3197     ICPU(INTEL_FAM6_WESTMERE_EX, idle_cpu_nehalem),
3198     ICPU(INTEL_FAM6_SANDYBRIDGE, idle_cpu_snb),
3199     ICPU(INTEL_FAM6_SANDYBRIDGE_X, idle_cpu_snb),
3200     - ICPU(INTEL_FAM6_ATOM_CEDARVIEW, idle_cpu_atom),
3201     - ICPU(INTEL_FAM6_ATOM_SILVERMONT1, idle_cpu_byt),
3202     - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, idle_cpu_tangier),
3203     + ICPU(INTEL_FAM6_ATOM_SALTWELL, idle_cpu_atom),
3204     + ICPU(INTEL_FAM6_ATOM_SILVERMONT, idle_cpu_byt),
3205     + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, idle_cpu_tangier),
3206     ICPU(INTEL_FAM6_ATOM_AIRMONT, idle_cpu_cht),
3207     ICPU(INTEL_FAM6_IVYBRIDGE, idle_cpu_ivb),
3208     ICPU(INTEL_FAM6_IVYBRIDGE_X, idle_cpu_ivt),
3209     @@ -1091,7 +1091,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
3210     ICPU(INTEL_FAM6_HASWELL_X, idle_cpu_hsw),
3211     ICPU(INTEL_FAM6_HASWELL_ULT, idle_cpu_hsw),
3212     ICPU(INTEL_FAM6_HASWELL_GT3E, idle_cpu_hsw),
3213     - ICPU(INTEL_FAM6_ATOM_SILVERMONT2, idle_cpu_avn),
3214     + ICPU(INTEL_FAM6_ATOM_SILVERMONT_X, idle_cpu_avn),
3215     ICPU(INTEL_FAM6_BROADWELL_CORE, idle_cpu_bdw),
3216     ICPU(INTEL_FAM6_BROADWELL_GT3E, idle_cpu_bdw),
3217     ICPU(INTEL_FAM6_BROADWELL_X, idle_cpu_bdw),
3218     @@ -1104,8 +1104,8 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
3219     ICPU(INTEL_FAM6_XEON_PHI_KNL, idle_cpu_knl),
3220     ICPU(INTEL_FAM6_XEON_PHI_KNM, idle_cpu_knl),
3221     ICPU(INTEL_FAM6_ATOM_GOLDMONT, idle_cpu_bxt),
3222     - ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE, idle_cpu_bxt),
3223     - ICPU(INTEL_FAM6_ATOM_DENVERTON, idle_cpu_dnv),
3224     + ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, idle_cpu_bxt),
3225     + ICPU(INTEL_FAM6_ATOM_GOLDMONT_X, idle_cpu_dnv),
3226     {}
3227     };
3228    
3229     @@ -1322,7 +1322,7 @@ static void intel_idle_state_table_update(void)
3230     ivt_idle_state_table_update();
3231     break;
3232     case INTEL_FAM6_ATOM_GOLDMONT:
3233     - case INTEL_FAM6_ATOM_GEMINI_LAKE:
3234     + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3235     bxt_idle_state_table_update();
3236     break;
3237     case INTEL_FAM6_SKYLAKE_DESKTOP:
3238     diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
3239     index c61109f7b793..57c1ec322e42 100644
3240     --- a/drivers/mmc/host/sdhci-acpi.c
3241     +++ b/drivers/mmc/host/sdhci-acpi.c
3242     @@ -247,7 +247,7 @@ static const struct sdhci_acpi_chip sdhci_acpi_chip_int = {
3243     static bool sdhci_acpi_byt(void)
3244     {
3245     static const struct x86_cpu_id byt[] = {
3246     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
3247     + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT },
3248     {}
3249     };
3250    
3251     diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c
3252     index 314e135014dc..30fbe2ea6eab 100644
3253     --- a/drivers/pci/pci-mid.c
3254     +++ b/drivers/pci/pci-mid.c
3255     @@ -62,8 +62,8 @@ static const struct pci_platform_pm_ops mid_pci_platform_pm = {
3256     * arch/x86/platform/intel-mid/pwr.c.
3257     */
3258     static const struct x86_cpu_id lpss_cpu_ids[] = {
3259     - ICPU(INTEL_FAM6_ATOM_PENWELL),
3260     - ICPU(INTEL_FAM6_ATOM_MERRIFIELD),
3261     + ICPU(INTEL_FAM6_ATOM_SALTWELL_MID),
3262     + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID),
3263     {}
3264     };
3265    
3266     diff --git a/drivers/platform/x86/intel_int0002_vgpio.c b/drivers/platform/x86/intel_int0002_vgpio.c
3267     index a473dc51b18d..e89ad4964dc1 100644
3268     --- a/drivers/platform/x86/intel_int0002_vgpio.c
3269     +++ b/drivers/platform/x86/intel_int0002_vgpio.c
3270     @@ -60,7 +60,7 @@ static const struct x86_cpu_id int0002_cpu_ids[] = {
3271     /*
3272     * Limit ourselves to Cherry Trail for now, until testing shows we
3273     * need to handle the INT0002 device on Baytrail too.
3274     - * ICPU(INTEL_FAM6_ATOM_SILVERMONT1), * Valleyview, Bay Trail *
3275     + * ICPU(INTEL_FAM6_ATOM_SILVERMONT), * Valleyview, Bay Trail *
3276     */
3277     ICPU(INTEL_FAM6_ATOM_AIRMONT), /* Braswell, Cherry Trail */
3278     {}
3279     diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c
3280     index d79fbf924b13..5ad44204a9c3 100644
3281     --- a/drivers/platform/x86/intel_mid_powerbtn.c
3282     +++ b/drivers/platform/x86/intel_mid_powerbtn.c
3283     @@ -125,8 +125,8 @@ static const struct mid_pb_ddata mrfld_ddata = {
3284     { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (kernel_ulong_t)&ddata }
3285    
3286     static const struct x86_cpu_id mid_pb_cpu_ids[] = {
3287     - ICPU(INTEL_FAM6_ATOM_PENWELL, mfld_ddata),
3288     - ICPU(INTEL_FAM6_ATOM_MERRIFIELD, mrfld_ddata),
3289     + ICPU(INTEL_FAM6_ATOM_SALTWELL_MID, mfld_ddata),
3290     + ICPU(INTEL_FAM6_ATOM_SILVERMONT_MID, mrfld_ddata),
3291     {}
3292     };
3293    
3294     diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c
3295     index 1423fa8710fd..b998d7da97fb 100644
3296     --- a/drivers/platform/x86/intel_telemetry_debugfs.c
3297     +++ b/drivers/platform/x86/intel_telemetry_debugfs.c
3298     @@ -320,7 +320,7 @@ static struct telemetry_debugfs_conf telem_apl_debugfs_conf = {
3299    
3300     static const struct x86_cpu_id telemetry_debugfs_cpu_ids[] = {
3301     TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GOLDMONT, telem_apl_debugfs_conf),
3302     - TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE, telem_apl_debugfs_conf),
3303     + TELEM_DEBUGFS_CPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, telem_apl_debugfs_conf),
3304     {}
3305     };
3306    
3307     diff --git a/drivers/platform/x86/intel_telemetry_pltdrv.c b/drivers/platform/x86/intel_telemetry_pltdrv.c
3308     index 2f889d6c270e..fcc6bee51a42 100644
3309     --- a/drivers/platform/x86/intel_telemetry_pltdrv.c
3310     +++ b/drivers/platform/x86/intel_telemetry_pltdrv.c
3311     @@ -192,7 +192,7 @@ static struct telemetry_plt_config telem_glk_config = {
3312    
3313     static const struct x86_cpu_id telemetry_cpu_ids[] = {
3314     TELEM_CPU(INTEL_FAM6_ATOM_GOLDMONT, telem_apl_config),
3315     - TELEM_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE, telem_glk_config),
3316     + TELEM_CPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, telem_glk_config),
3317     {}
3318     };
3319    
3320     diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
3321     index 295d8dcba48c..8cbfcce57a06 100644
3322     --- a/drivers/powercap/intel_rapl.c
3323     +++ b/drivers/powercap/intel_rapl.c
3324     @@ -1164,13 +1164,13 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
3325     RAPL_CPU(INTEL_FAM6_KABYLAKE_DESKTOP, rapl_defaults_core),
3326     RAPL_CPU(INTEL_FAM6_CANNONLAKE_MOBILE, rapl_defaults_core),
3327    
3328     - RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT1, rapl_defaults_byt),
3329     + RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT, rapl_defaults_byt),
3330     RAPL_CPU(INTEL_FAM6_ATOM_AIRMONT, rapl_defaults_cht),
3331     - RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD, rapl_defaults_tng),
3332     - RAPL_CPU(INTEL_FAM6_ATOM_MOOREFIELD, rapl_defaults_ann),
3333     + RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT_MID, rapl_defaults_tng),
3334     + RAPL_CPU(INTEL_FAM6_ATOM_AIRMONT_MID, rapl_defaults_ann),
3335     RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT, rapl_defaults_core),
3336     - RAPL_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE, rapl_defaults_core),
3337     - RAPL_CPU(INTEL_FAM6_ATOM_DENVERTON, rapl_defaults_core),
3338     + RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS, rapl_defaults_core),
3339     + RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT_X, rapl_defaults_core),
3340    
3341     RAPL_CPU(INTEL_FAM6_XEON_PHI_KNL, rapl_defaults_hsw_server),
3342     RAPL_CPU(INTEL_FAM6_XEON_PHI_KNM, rapl_defaults_hsw_server),
3343     diff --git a/drivers/thermal/intel_soc_dts_thermal.c b/drivers/thermal/intel_soc_dts_thermal.c
3344     index 1e47511a6bd5..d748527d7a38 100644
3345     --- a/drivers/thermal/intel_soc_dts_thermal.c
3346     +++ b/drivers/thermal/intel_soc_dts_thermal.c
3347     @@ -45,7 +45,7 @@ static irqreturn_t soc_irq_thread_fn(int irq, void *dev_data)
3348     }
3349    
3350     static const struct x86_cpu_id soc_thermal_ids[] = {
3351     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1, 0,
3352     + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT, 0,
3353     BYT_SOC_DTS_APIC_IRQ},
3354     {}
3355     };
3356     diff --git a/include/linux/cpu.h b/include/linux/cpu.h
3357     index 5041357d0297..57ae83c4d5f4 100644
3358     --- a/include/linux/cpu.h
3359     +++ b/include/linux/cpu.h
3360     @@ -57,6 +57,8 @@ extern ssize_t cpu_show_spec_store_bypass(struct device *dev,
3361     struct device_attribute *attr, char *buf);
3362     extern ssize_t cpu_show_l1tf(struct device *dev,
3363     struct device_attribute *attr, char *buf);
3364     +extern ssize_t cpu_show_mds(struct device *dev,
3365     + struct device_attribute *attr, char *buf);
3366    
3367     extern __printf(4, 5)
3368     struct device *cpu_device_create(struct device *parent, void *drvdata,
3369     @@ -187,4 +189,28 @@ static inline void cpu_smt_disable(bool force) { }
3370     static inline void cpu_smt_check_topology(void) { }
3371     #endif
3372    
3373     +/*
3374     + * These are used for a global "mitigations=" cmdline option for toggling
3375     + * optional CPU mitigations.
3376     + */
3377     +enum cpu_mitigations {
3378     + CPU_MITIGATIONS_OFF,
3379     + CPU_MITIGATIONS_AUTO,
3380     + CPU_MITIGATIONS_AUTO_NOSMT,
3381     +};
3382     +
3383     +extern enum cpu_mitigations cpu_mitigations;
3384     +
3385     +/* mitigations=off */
3386     +static inline bool cpu_mitigations_off(void)
3387     +{
3388     + return cpu_mitigations == CPU_MITIGATIONS_OFF;
3389     +}
3390     +
3391     +/* mitigations=auto,nosmt */
3392     +static inline bool cpu_mitigations_auto_nosmt(void)
3393     +{
3394     + return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
3395     +}
3396     +
3397     #endif /* _LINUX_CPU_H_ */
3398     diff --git a/kernel/cpu.c b/kernel/cpu.c
3399     index dc250ec2c096..bc6c880a093f 100644
3400     --- a/kernel/cpu.c
3401     +++ b/kernel/cpu.c
3402     @@ -2278,3 +2278,18 @@ void __init boot_cpu_hotplug_init(void)
3403     #endif
3404     this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
3405     }
3406     +
3407     +enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
3408     +
3409     +static int __init mitigations_parse_cmdline(char *arg)
3410     +{
3411     + if (!strcmp(arg, "off"))
3412     + cpu_mitigations = CPU_MITIGATIONS_OFF;
3413     + else if (!strcmp(arg, "auto"))
3414     + cpu_mitigations = CPU_MITIGATIONS_AUTO;
3415     + else if (!strcmp(arg, "auto,nosmt"))
3416     + cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
3417     +
3418     + return 0;
3419     +}
3420     +early_param("mitigations", mitigations_parse_cmdline);
3421     diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c
3422     index b74bbee111c6..c6c8d20be1d2 100644
3423     --- a/sound/soc/intel/boards/bytcr_rt5651.c
3424     +++ b/sound/soc/intel/boards/bytcr_rt5651.c
3425     @@ -787,7 +787,7 @@ static struct snd_soc_card byt_rt5651_card = {
3426     };
3427    
3428     static const struct x86_cpu_id baytrail_cpu_ids[] = {
3429     - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, /* Valleyview */
3430     + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, /* Valleyview */
3431     {}
3432     };
3433    
3434     diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
3435     index 2ab25aa38263..ff058bfbca3e 100644
3436     --- a/tools/power/x86/turbostat/Makefile
3437     +++ b/tools/power/x86/turbostat/Makefile
3438     @@ -9,7 +9,7 @@ ifeq ("$(origin O)", "command line")
3439     endif
3440    
3441     turbostat : turbostat.c
3442     -CFLAGS += -Wall
3443     +CFLAGS += -Wall -I../../../include
3444     CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
3445     CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"'
3446    
3447     diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
3448     index 83964f796edb..fbb53c952b73 100644
3449     --- a/tools/power/x86/turbostat/turbostat.c
3450     +++ b/tools/power/x86/turbostat/turbostat.c
3451     @@ -2082,7 +2082,7 @@ int has_turbo_ratio_group_limits(int family, int model)
3452     switch (model) {
3453     case INTEL_FAM6_ATOM_GOLDMONT:
3454     case INTEL_FAM6_SKYLAKE_X:
3455     - case INTEL_FAM6_ATOM_DENVERTON:
3456     + case INTEL_FAM6_ATOM_GOLDMONT_X:
3457     return 1;
3458     }
3459     return 0;
3460     @@ -3149,9 +3149,9 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
3461     pkg_cstate_limits = skx_pkg_cstate_limits;
3462     has_misc_feature_control = 1;
3463     break;
3464     - case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */
3465     + case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
3466     no_MSR_MISC_PWR_MGMT = 1;
3467     - case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */
3468     + case INTEL_FAM6_ATOM_SILVERMONT_X: /* AVN */
3469     pkg_cstate_limits = slv_pkg_cstate_limits;
3470     break;
3471     case INTEL_FAM6_ATOM_AIRMONT: /* AMT */
3472     @@ -3163,8 +3163,8 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
3473     pkg_cstate_limits = phi_pkg_cstate_limits;
3474     break;
3475     case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
3476     - case INTEL_FAM6_ATOM_GEMINI_LAKE:
3477     - case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3478     + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3479     + case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */
3480     pkg_cstate_limits = bxt_pkg_cstate_limits;
3481     break;
3482     default:
3483     @@ -3193,9 +3193,9 @@ int has_slv_msrs(unsigned int family, unsigned int model)
3484     return 0;
3485    
3486     switch (model) {
3487     - case INTEL_FAM6_ATOM_SILVERMONT1:
3488     - case INTEL_FAM6_ATOM_MERRIFIELD:
3489     - case INTEL_FAM6_ATOM_MOOREFIELD:
3490     + case INTEL_FAM6_ATOM_SILVERMONT:
3491     + case INTEL_FAM6_ATOM_SILVERMONT_MID:
3492     + case INTEL_FAM6_ATOM_AIRMONT_MID:
3493     return 1;
3494     }
3495     return 0;
3496     @@ -3207,7 +3207,7 @@ int is_dnv(unsigned int family, unsigned int model)
3497     return 0;
3498    
3499     switch (model) {
3500     - case INTEL_FAM6_ATOM_DENVERTON:
3501     + case INTEL_FAM6_ATOM_GOLDMONT_X:
3502     return 1;
3503     }
3504     return 0;
3505     @@ -3724,8 +3724,8 @@ double get_tdp(unsigned int model)
3506     return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
3507    
3508     switch (model) {
3509     - case INTEL_FAM6_ATOM_SILVERMONT1:
3510     - case INTEL_FAM6_ATOM_SILVERMONT2:
3511     + case INTEL_FAM6_ATOM_SILVERMONT:
3512     + case INTEL_FAM6_ATOM_SILVERMONT_X:
3513     return 30.0;
3514     default:
3515     return 135.0;
3516     @@ -3791,7 +3791,7 @@ void rapl_probe(unsigned int family, unsigned int model)
3517     }
3518     break;
3519     case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
3520     - case INTEL_FAM6_ATOM_GEMINI_LAKE:
3521     + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3522     do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
3523     if (rapl_joules)
3524     BIC_PRESENT(BIC_Pkg_J);
3525     @@ -3850,8 +3850,8 @@ void rapl_probe(unsigned int family, unsigned int model)
3526     BIC_PRESENT(BIC_RAMWatt);
3527     }
3528     break;
3529     - case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */
3530     - case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */
3531     + case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
3532     + case INTEL_FAM6_ATOM_SILVERMONT_X: /* AVN */
3533     do_rapl = RAPL_PKG | RAPL_CORES;
3534     if (rapl_joules) {
3535     BIC_PRESENT(BIC_Pkg_J);
3536     @@ -3861,7 +3861,7 @@ void rapl_probe(unsigned int family, unsigned int model)
3537     BIC_PRESENT(BIC_CorWatt);
3538     }
3539     break;
3540     - case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3541     + case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */
3542     do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
3543     BIC_PRESENT(BIC_PKG__);
3544     BIC_PRESENT(BIC_RAM__);
3545     @@ -3884,7 +3884,7 @@ void rapl_probe(unsigned int family, unsigned int model)
3546     return;
3547    
3548     rapl_power_units = 1.0 / (1 << (msr & 0xF));
3549     - if (model == INTEL_FAM6_ATOM_SILVERMONT1)
3550     + if (model == INTEL_FAM6_ATOM_SILVERMONT)
3551     rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
3552     else
3553     rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
3554     @@ -4141,8 +4141,8 @@ int has_snb_msrs(unsigned int family, unsigned int model)
3555     case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
3556     case INTEL_FAM6_SKYLAKE_X: /* SKX */
3557     case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
3558     - case INTEL_FAM6_ATOM_GEMINI_LAKE:
3559     - case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3560     + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3561     + case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */
3562     return 1;
3563     }
3564     return 0;
3565     @@ -4174,7 +4174,7 @@ int has_hsw_msrs(unsigned int family, unsigned int model)
3566     case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
3567     case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
3568     case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
3569     - case INTEL_FAM6_ATOM_GEMINI_LAKE:
3570     + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3571     return 1;
3572     }
3573     return 0;
3574     @@ -4209,8 +4209,8 @@ int is_slm(unsigned int family, unsigned int model)
3575     if (!genuine_intel)
3576     return 0;
3577     switch (model) {
3578     - case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */
3579     - case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */
3580     + case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
3581     + case INTEL_FAM6_ATOM_SILVERMONT_X: /* AVN */
3582     return 1;
3583     }
3584     return 0;
3585     @@ -4581,11 +4581,11 @@ void process_cpuid()
3586     case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
3587     crystal_hz = 24000000; /* 24.0 MHz */
3588     break;
3589     - case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3590     + case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */
3591     crystal_hz = 25000000; /* 25.0 MHz */
3592     break;
3593     case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
3594     - case INTEL_FAM6_ATOM_GEMINI_LAKE:
3595     + case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
3596     crystal_hz = 19200000; /* 19.2 MHz */
3597     break;
3598     default:
3599     diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile
3600     index f4534fb8b951..da781b430937 100644
3601     --- a/tools/power/x86/x86_energy_perf_policy/Makefile
3602     +++ b/tools/power/x86/x86_energy_perf_policy/Makefile
3603     @@ -9,7 +9,7 @@ ifeq ("$(origin O)", "command line")
3604     endif
3605    
3606     x86_energy_perf_policy : x86_energy_perf_policy.c
3607     -CFLAGS += -Wall
3608     +CFLAGS += -Wall -I../../../include
3609     CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
3610    
3611     %: %.c