Magellan Linux

Annotation of /trunk/kernel-alx/patches-4.14/0108-4.14.9-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3238 - (hide annotations) (download)
Fri Nov 9 12:14:58 2018 UTC (5 years, 6 months ago) by niro
File size: 356090 byte(s)
-added up to patches-4.14.79
1 niro 3238 diff --git a/Documentation/x86/orc-unwinder.txt b/Documentation/x86/orc-unwinder.txt
2     index af0c9a4c65a6..cd4b29be29af 100644
3     --- a/Documentation/x86/orc-unwinder.txt
4     +++ b/Documentation/x86/orc-unwinder.txt
5     @@ -4,7 +4,7 @@ ORC unwinder
6     Overview
7     --------
8    
9     -The kernel CONFIG_ORC_UNWINDER option enables the ORC unwinder, which is
10     +The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
11     similar in concept to a DWARF unwinder. The difference is that the
12     format of the ORC data is much simpler than DWARF, which in turn allows
13     the ORC unwinder to be much simpler and faster.
14     diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
15     index b0798e281aa6..3448e675b462 100644
16     --- a/Documentation/x86/x86_64/mm.txt
17     +++ b/Documentation/x86/x86_64/mm.txt
18     @@ -34,7 +34,7 @@ ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
19     ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
20     ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
21     ... unused hole ...
22     -ffd8000000000000 - fff7ffffffffffff (=53 bits) kasan shadow memory (8PB)
23     +ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
24     ... unused hole ...
25     ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
26     ... unused hole ...
27     diff --git a/Makefile b/Makefile
28     index 97b5ae76ac8c..ed2132c6d286 100644
29     --- a/Makefile
30     +++ b/Makefile
31     @@ -1,7 +1,7 @@
32     # SPDX-License-Identifier: GPL-2.0
33     VERSION = 4
34     PATCHLEVEL = 14
35     -SUBLEVEL = 8
36     +SUBLEVEL = 9
37     EXTRAVERSION =
38     NAME = Petit Gorille
39    
40     @@ -935,8 +935,8 @@ ifdef CONFIG_STACK_VALIDATION
41     ifeq ($(has_libelf),1)
42     objtool_target := tools/objtool FORCE
43     else
44     - ifdef CONFIG_ORC_UNWINDER
45     - $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
46     + ifdef CONFIG_UNWINDER_ORC
47     + $(error "Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
48     else
49     $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
50     endif
51     diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
52     index 8c2a2619971b..f1d7834990ec 100644
53     --- a/arch/arm/configs/exynos_defconfig
54     +++ b/arch/arm/configs/exynos_defconfig
55     @@ -244,7 +244,7 @@ CONFIG_USB_STORAGE_ONETOUCH=m
56     CONFIG_USB_STORAGE_KARMA=m
57     CONFIG_USB_STORAGE_CYPRESS_ATACB=m
58     CONFIG_USB_STORAGE_ENE_UB6250=m
59     -CONFIG_USB_UAS=m
60     +CONFIG_USB_UAS=y
61     CONFIG_USB_DWC3=y
62     CONFIG_USB_DWC2=y
63     CONFIG_USB_HSIC_USB3503=y
64     diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
65     index e9c9a117bd25..c7cdbb43ae7c 100644
66     --- a/arch/arm/include/asm/ptrace.h
67     +++ b/arch/arm/include/asm/ptrace.h
68     @@ -126,8 +126,7 @@ extern unsigned long profile_pc(struct pt_regs *regs);
69     /*
70     * kprobe-based event tracer support
71     */
72     -#include <linux/stddef.h>
73     -#include <linux/types.h>
74     +#include <linux/compiler.h>
75     #define MAX_REG_OFFSET (offsetof(struct pt_regs, ARM_ORIG_r0))
76    
77     extern int regs_query_register_offset(const char *name);
78     diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
79     index caf86be815ba..4052ec39e8db 100644
80     --- a/arch/arm64/include/asm/fixmap.h
81     +++ b/arch/arm64/include/asm/fixmap.h
82     @@ -51,6 +51,13 @@ enum fixed_addresses {
83    
84     FIX_EARLYCON_MEM_BASE,
85     FIX_TEXT_POKE0,
86     +
87     +#ifdef CONFIG_ACPI_APEI_GHES
88     + /* Used for GHES mapping from assorted contexts */
89     + FIX_APEI_GHES_IRQ,
90     + FIX_APEI_GHES_NMI,
91     +#endif /* CONFIG_ACPI_APEI_GHES */
92     +
93     __end_of_permanent_fixed_addresses,
94    
95     /*
96     diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
97     index 57190f384f63..ce848ff84edd 100644
98     --- a/arch/powerpc/kernel/watchdog.c
99     +++ b/arch/powerpc/kernel/watchdog.c
100     @@ -276,9 +276,12 @@ void arch_touch_nmi_watchdog(void)
101     {
102     unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
103     int cpu = smp_processor_id();
104     + u64 tb = get_tb();
105    
106     - if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks)
107     - watchdog_timer_interrupt(cpu);
108     + if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {
109     + per_cpu(wd_timer_tb, cpu) = tb;
110     + wd_smp_clear_cpu_pending(cpu, tb);
111     + }
112     }
113     EXPORT_SYMBOL(arch_touch_nmi_watchdog);
114    
115     diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
116     index a66e64b0b251..5d115bd32539 100644
117     --- a/arch/powerpc/net/bpf_jit_comp64.c
118     +++ b/arch/powerpc/net/bpf_jit_comp64.c
119     @@ -762,7 +762,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
120     func = (u8 *) __bpf_call_base + imm;
121    
122     /* Save skb pointer if we need to re-cache skb data */
123     - if (bpf_helper_changes_pkt_data(func))
124     + if ((ctx->seen & SEEN_SKB) &&
125     + bpf_helper_changes_pkt_data(func))
126     PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
127    
128     bpf_jit_emit_func_call(image, ctx, (u64)func);
129     @@ -771,7 +772,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
130     PPC_MR(b2p[BPF_REG_0], 3);
131    
132     /* refresh skb cache */
133     - if (bpf_helper_changes_pkt_data(func)) {
134     + if ((ctx->seen & SEEN_SKB) &&
135     + bpf_helper_changes_pkt_data(func)) {
136     /* reload skb pointer to r3 */
137     PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
138     bpf_jit_emit_skb_loads(image, ctx);
139     diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
140     index c008083fbc4f..2c8b325591cc 100644
141     --- a/arch/powerpc/xmon/xmon.c
142     +++ b/arch/powerpc/xmon/xmon.c
143     @@ -530,14 +530,19 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
144    
145     waiting:
146     secondary = 1;
147     + spin_begin();
148     while (secondary && !xmon_gate) {
149     if (in_xmon == 0) {
150     - if (fromipi)
151     + if (fromipi) {
152     + spin_end();
153     goto leave;
154     + }
155     secondary = test_and_set_bit(0, &in_xmon);
156     }
157     - barrier();
158     + spin_cpu_relax();
159     + touch_nmi_watchdog();
160     }
161     + spin_end();
162    
163     if (!secondary && !xmon_gate) {
164     /* we are the first cpu to come in */
165     @@ -568,21 +573,25 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
166     mb();
167     xmon_gate = 1;
168     barrier();
169     + touch_nmi_watchdog();
170     }
171    
172     cmdloop:
173     while (in_xmon) {
174     if (secondary) {
175     + spin_begin();
176     if (cpu == xmon_owner) {
177     if (!test_and_set_bit(0, &xmon_taken)) {
178     secondary = 0;
179     + spin_end();
180     continue;
181     }
182     /* missed it */
183     while (cpu == xmon_owner)
184     - barrier();
185     + spin_cpu_relax();
186     }
187     - barrier();
188     + spin_cpu_relax();
189     + touch_nmi_watchdog();
190     } else {
191     cmd = cmds(regs);
192     if (cmd != 0) {
193     diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
194     index b15cd2f0320f..33e2785f6842 100644
195     --- a/arch/s390/net/bpf_jit_comp.c
196     +++ b/arch/s390/net/bpf_jit_comp.c
197     @@ -55,8 +55,7 @@ struct bpf_jit {
198     #define SEEN_LITERAL 8 /* code uses literals */
199     #define SEEN_FUNC 16 /* calls C functions */
200     #define SEEN_TAIL_CALL 32 /* code uses tail calls */
201     -#define SEEN_SKB_CHANGE 64 /* code changes skb data */
202     -#define SEEN_REG_AX 128 /* code uses constant blinding */
203     +#define SEEN_REG_AX 64 /* code uses constant blinding */
204     #define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
205    
206     /*
207     @@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit)
208     EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
209     REG_15, 152);
210     }
211     - if (jit->seen & SEEN_SKB)
212     + if (jit->seen & SEEN_SKB) {
213     emit_load_skb_data_hlen(jit);
214     - if (jit->seen & SEEN_SKB_CHANGE)
215     /* stg %b1,ST_OFF_SKBP(%r0,%r15) */
216     EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
217     STK_OFF_SKBP);
218     + }
219     }
220    
221     /*
222     @@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
223     EMIT2(0x0d00, REG_14, REG_W1);
224     /* lgr %b0,%r2: load return value into %b0 */
225     EMIT4(0xb9040000, BPF_REG_0, REG_2);
226     - if (bpf_helper_changes_pkt_data((void *)func)) {
227     - jit->seen |= SEEN_SKB_CHANGE;
228     + if ((jit->seen & SEEN_SKB) &&
229     + bpf_helper_changes_pkt_data((void *)func)) {
230     /* lg %b1,ST_OFF_SKBP(%r15) */
231     EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
232     REG_15, STK_OFF_SKBP);
233     diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h
234     index 6a339a78f4f4..71dd82b43cc5 100644
235     --- a/arch/sparc/include/asm/ptrace.h
236     +++ b/arch/sparc/include/asm/ptrace.h
237     @@ -7,6 +7,7 @@
238     #if defined(__sparc__) && defined(__arch64__)
239     #ifndef __ASSEMBLY__
240    
241     +#include <linux/compiler.h>
242     #include <linux/threads.h>
243     #include <asm/switch_to.h>
244    
245     diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
246     index 5765e7e711f7..ff5f9cb3039a 100644
247     --- a/arch/sparc/net/bpf_jit_comp_64.c
248     +++ b/arch/sparc/net/bpf_jit_comp_64.c
249     @@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
250     u8 *func = ((u8 *)__bpf_call_base) + imm;
251    
252     ctx->saw_call = true;
253     + if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
254     + emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
255    
256     emit_call((u32 *)func, ctx);
257     emit_nop(ctx);
258    
259     emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
260    
261     - if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind)
262     - load_skb_regs(ctx, bpf2sparc[BPF_REG_6]);
263     + if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
264     + load_skb_regs(ctx, L7);
265     break;
266     }
267    
268     diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
269     index 50a32c33d729..73c57f614c9e 100644
270     --- a/arch/um/include/asm/Kbuild
271     +++ b/arch/um/include/asm/Kbuild
272     @@ -1,4 +1,5 @@
273     generic-y += barrier.h
274     +generic-y += bpf_perf_event.h
275     generic-y += bug.h
276     generic-y += clkdev.h
277     generic-y += current.h
278     diff --git a/arch/um/include/shared/init.h b/arch/um/include/shared/init.h
279     index 390572daa40d..b3f5865a92c9 100644
280     --- a/arch/um/include/shared/init.h
281     +++ b/arch/um/include/shared/init.h
282     @@ -41,7 +41,7 @@
283     typedef int (*initcall_t)(void);
284     typedef void (*exitcall_t)(void);
285    
286     -#include <linux/compiler.h>
287     +#include <linux/compiler_types.h>
288    
289     /* These are for everybody (although not all archs will actually
290     discard it in modules) */
291     diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
292     index 9bceea6a5852..48646160eb83 100644
293     --- a/arch/x86/Kconfig
294     +++ b/arch/x86/Kconfig
295     @@ -108,7 +108,7 @@ config X86
296     select HAVE_ARCH_AUDITSYSCALL
297     select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
298     select HAVE_ARCH_JUMP_LABEL
299     - select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP
300     + select HAVE_ARCH_KASAN if X86_64
301     select HAVE_ARCH_KGDB
302     select HAVE_ARCH_KMEMCHECK
303     select HAVE_ARCH_MMAP_RND_BITS if MMU
304     @@ -171,7 +171,7 @@ config X86
305     select HAVE_PERF_USER_STACK_DUMP
306     select HAVE_RCU_TABLE_FREE
307     select HAVE_REGS_AND_STACK_ACCESS_API
308     - select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION
309     + select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
310     select HAVE_STACK_VALIDATION if X86_64
311     select HAVE_SYSCALL_TRACEPOINTS
312     select HAVE_UNSTABLE_SCHED_CLOCK
313     @@ -303,7 +303,6 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
314     config KASAN_SHADOW_OFFSET
315     hex
316     depends on KASAN
317     - default 0xdff8000000000000 if X86_5LEVEL
318     default 0xdffffc0000000000
319    
320     config HAVE_INTEL_TXT
321     diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
322     index 90b123056f4b..6293a8768a91 100644
323     --- a/arch/x86/Kconfig.debug
324     +++ b/arch/x86/Kconfig.debug
325     @@ -359,28 +359,14 @@ config PUNIT_ATOM_DEBUG
326    
327     choice
328     prompt "Choose kernel unwinder"
329     - default FRAME_POINTER_UNWINDER
330     + default UNWINDER_ORC if X86_64
331     + default UNWINDER_FRAME_POINTER if X86_32
332     ---help---
333     This determines which method will be used for unwinding kernel stack
334     traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
335     livepatch, lockdep, and more.
336    
337     -config FRAME_POINTER_UNWINDER
338     - bool "Frame pointer unwinder"
339     - select FRAME_POINTER
340     - ---help---
341     - This option enables the frame pointer unwinder for unwinding kernel
342     - stack traces.
343     -
344     - The unwinder itself is fast and it uses less RAM than the ORC
345     - unwinder, but the kernel text size will grow by ~3% and the kernel's
346     - overall performance will degrade by roughly 5-10%.
347     -
348     - This option is recommended if you want to use the livepatch
349     - consistency model, as this is currently the only way to get a
350     - reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
351     -
352     -config ORC_UNWINDER
353     +config UNWINDER_ORC
354     bool "ORC unwinder"
355     depends on X86_64
356     select STACK_VALIDATION
357     @@ -396,7 +382,22 @@ config ORC_UNWINDER
358     Enabling this option will increase the kernel's runtime memory usage
359     by roughly 2-4MB, depending on your kernel config.
360    
361     -config GUESS_UNWINDER
362     +config UNWINDER_FRAME_POINTER
363     + bool "Frame pointer unwinder"
364     + select FRAME_POINTER
365     + ---help---
366     + This option enables the frame pointer unwinder for unwinding kernel
367     + stack traces.
368     +
369     + The unwinder itself is fast and it uses less RAM than the ORC
370     + unwinder, but the kernel text size will grow by ~3% and the kernel's
371     + overall performance will degrade by roughly 5-10%.
372     +
373     + This option is recommended if you want to use the livepatch
374     + consistency model, as this is currently the only way to get a
375     + reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
376     +
377     +config UNWINDER_GUESS
378     bool "Guess unwinder"
379     depends on EXPERT
380     ---help---
381     @@ -411,7 +412,7 @@ config GUESS_UNWINDER
382     endchoice
383    
384     config FRAME_POINTER
385     - depends on !ORC_UNWINDER && !GUESS_UNWINDER
386     + depends on !UNWINDER_ORC && !UNWINDER_GUESS
387     bool
388    
389     endmenu
390     diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
391     index 550cd5012b73..66c9e2aab16c 100644
392     --- a/arch/x86/configs/tiny.config
393     +++ b/arch/x86/configs/tiny.config
394     @@ -1,5 +1,5 @@
395     CONFIG_NOHIGHMEM=y
396     # CONFIG_HIGHMEM4G is not set
397     # CONFIG_HIGHMEM64G is not set
398     -CONFIG_GUESS_UNWINDER=y
399     -# CONFIG_FRAME_POINTER_UNWINDER is not set
400     +CONFIG_UNWINDER_GUESS=y
401     +# CONFIG_UNWINDER_FRAME_POINTER is not set
402     diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
403     index 4a4b16e56d35..e32fc1f274d8 100644
404     --- a/arch/x86/configs/x86_64_defconfig
405     +++ b/arch/x86/configs/x86_64_defconfig
406     @@ -299,6 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y
407     # CONFIG_DEBUG_RODATA_TEST is not set
408     CONFIG_DEBUG_BOOT_PARAMS=y
409     CONFIG_OPTIMIZE_INLINING=y
410     +CONFIG_UNWINDER_ORC=y
411     CONFIG_SECURITY=y
412     CONFIG_SECURITY_NETWORK=y
413     CONFIG_SECURITY_SELINUX=y
414     diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
415     index 6e160031cfea..3fd8bc560fae 100644
416     --- a/arch/x86/entry/calling.h
417     +++ b/arch/x86/entry/calling.h
418     @@ -142,56 +142,25 @@ For 32-bit we have the following conventions - kernel is built with
419     UNWIND_HINT_REGS offset=\offset
420     .endm
421    
422     - .macro RESTORE_EXTRA_REGS offset=0
423     - movq 0*8+\offset(%rsp), %r15
424     - movq 1*8+\offset(%rsp), %r14
425     - movq 2*8+\offset(%rsp), %r13
426     - movq 3*8+\offset(%rsp), %r12
427     - movq 4*8+\offset(%rsp), %rbp
428     - movq 5*8+\offset(%rsp), %rbx
429     - UNWIND_HINT_REGS offset=\offset extra=0
430     - .endm
431     -
432     - .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
433     - .if \rstor_r11
434     - movq 6*8(%rsp), %r11
435     - .endif
436     - .if \rstor_r8910
437     - movq 7*8(%rsp), %r10
438     - movq 8*8(%rsp), %r9
439     - movq 9*8(%rsp), %r8
440     - .endif
441     - .if \rstor_rax
442     - movq 10*8(%rsp), %rax
443     - .endif
444     - .if \rstor_rcx
445     - movq 11*8(%rsp), %rcx
446     - .endif
447     - .if \rstor_rdx
448     - movq 12*8(%rsp), %rdx
449     - .endif
450     - movq 13*8(%rsp), %rsi
451     - movq 14*8(%rsp), %rdi
452     - UNWIND_HINT_IRET_REGS offset=16*8
453     - .endm
454     - .macro RESTORE_C_REGS
455     - RESTORE_C_REGS_HELPER 1,1,1,1,1
456     - .endm
457     - .macro RESTORE_C_REGS_EXCEPT_RAX
458     - RESTORE_C_REGS_HELPER 0,1,1,1,1
459     - .endm
460     - .macro RESTORE_C_REGS_EXCEPT_RCX
461     - RESTORE_C_REGS_HELPER 1,0,1,1,1
462     - .endm
463     - .macro RESTORE_C_REGS_EXCEPT_R11
464     - RESTORE_C_REGS_HELPER 1,1,0,1,1
465     - .endm
466     - .macro RESTORE_C_REGS_EXCEPT_RCX_R11
467     - RESTORE_C_REGS_HELPER 1,0,0,1,1
468     - .endm
469     -
470     - .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
471     - subq $-(15*8+\addskip), %rsp
472     + .macro POP_EXTRA_REGS
473     + popq %r15
474     + popq %r14
475     + popq %r13
476     + popq %r12
477     + popq %rbp
478     + popq %rbx
479     + .endm
480     +
481     + .macro POP_C_REGS
482     + popq %r11
483     + popq %r10
484     + popq %r9
485     + popq %r8
486     + popq %rax
487     + popq %rcx
488     + popq %rdx
489     + popq %rsi
490     + popq %rdi
491     .endm
492    
493     .macro icebp
494     diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
495     index 4838037f97f6..bd8b57a5c874 100644
496     --- a/arch/x86/entry/entry_32.S
497     +++ b/arch/x86/entry/entry_32.S
498     @@ -941,7 +941,8 @@ ENTRY(debug)
499     movl %esp, %eax # pt_regs pointer
500    
501     /* Are we currently on the SYSENTER stack? */
502     - PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
503     + movl PER_CPU_VAR(cpu_entry_area), %ecx
504     + addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
505     subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
506     cmpl $SIZEOF_SYSENTER_stack, %ecx
507     jb .Ldebug_from_sysenter_stack
508     @@ -984,7 +985,8 @@ ENTRY(nmi)
509     movl %esp, %eax # pt_regs pointer
510    
511     /* Are we currently on the SYSENTER stack? */
512     - PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
513     + movl PER_CPU_VAR(cpu_entry_area), %ecx
514     + addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
515     subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
516     cmpl $SIZEOF_SYSENTER_stack, %ecx
517     jb .Lnmi_from_sysenter_stack
518     diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
519     index 2e956afe272c..6abe3fcaece9 100644
520     --- a/arch/x86/entry/entry_64.S
521     +++ b/arch/x86/entry/entry_64.S
522     @@ -136,6 +136,64 @@ END(native_usergs_sysret64)
523     * with them due to bugs in both AMD and Intel CPUs.
524     */
525    
526     + .pushsection .entry_trampoline, "ax"
527     +
528     +/*
529     + * The code in here gets remapped into cpu_entry_area's trampoline. This means
530     + * that the assembler and linker have the wrong idea as to where this code
531     + * lives (and, in fact, it's mapped more than once, so it's not even at a
532     + * fixed address). So we can't reference any symbols outside the entry
533     + * trampoline and expect it to work.
534     + *
535     + * Instead, we carefully abuse %rip-relative addressing.
536     + * _entry_trampoline(%rip) refers to the start of the remapped) entry
537     + * trampoline. We can thus find cpu_entry_area with this macro:
538     + */
539     +
540     +#define CPU_ENTRY_AREA \
541     + _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
542     +
543     +/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
544     +#define RSP_SCRATCH CPU_ENTRY_AREA_SYSENTER_stack + \
545     + SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
546     +
547     +ENTRY(entry_SYSCALL_64_trampoline)
548     + UNWIND_HINT_EMPTY
549     + swapgs
550     +
551     + /* Stash the user RSP. */
552     + movq %rsp, RSP_SCRATCH
553     +
554     + /* Load the top of the task stack into RSP */
555     + movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
556     +
557     + /* Start building the simulated IRET frame. */
558     + pushq $__USER_DS /* pt_regs->ss */
559     + pushq RSP_SCRATCH /* pt_regs->sp */
560     + pushq %r11 /* pt_regs->flags */
561     + pushq $__USER_CS /* pt_regs->cs */
562     + pushq %rcx /* pt_regs->ip */
563     +
564     + /*
565     + * x86 lacks a near absolute jump, and we can't jump to the real
566     + * entry text with a relative jump. We could push the target
567     + * address and then use retq, but this destroys the pipeline on
568     + * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead,
569     + * spill RDI and restore it in a second-stage trampoline.
570     + */
571     + pushq %rdi
572     + movq $entry_SYSCALL_64_stage2, %rdi
573     + jmp *%rdi
574     +END(entry_SYSCALL_64_trampoline)
575     +
576     + .popsection
577     +
578     +ENTRY(entry_SYSCALL_64_stage2)
579     + UNWIND_HINT_EMPTY
580     + popq %rdi
581     + jmp entry_SYSCALL_64_after_hwframe
582     +END(entry_SYSCALL_64_stage2)
583     +
584     ENTRY(entry_SYSCALL_64)
585     UNWIND_HINT_EMPTY
586     /*
587     @@ -221,10 +279,9 @@ entry_SYSCALL_64_fastpath:
588     TRACE_IRQS_ON /* user mode is traced as IRQs on */
589     movq RIP(%rsp), %rcx
590     movq EFLAGS(%rsp), %r11
591     - RESTORE_C_REGS_EXCEPT_RCX_R11
592     - movq RSP(%rsp), %rsp
593     + addq $6*8, %rsp /* skip extra regs -- they were preserved */
594     UNWIND_HINT_EMPTY
595     - USERGS_SYSRET64
596     + jmp .Lpop_c_regs_except_rcx_r11_and_sysret
597    
598     1:
599     /*
600     @@ -246,17 +303,18 @@ entry_SYSCALL64_slow_path:
601     call do_syscall_64 /* returns with IRQs disabled */
602    
603     return_from_SYSCALL_64:
604     - RESTORE_EXTRA_REGS
605     TRACE_IRQS_IRETQ /* we're about to change IF */
606    
607     /*
608     * Try to use SYSRET instead of IRET if we're returning to
609     - * a completely clean 64-bit userspace context.
610     + * a completely clean 64-bit userspace context. If we're not,
611     + * go to the slow exit path.
612     */
613     movq RCX(%rsp), %rcx
614     movq RIP(%rsp), %r11
615     - cmpq %rcx, %r11 /* RCX == RIP */
616     - jne opportunistic_sysret_failed
617     +
618     + cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
619     + jne swapgs_restore_regs_and_return_to_usermode
620    
621     /*
622     * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
623     @@ -274,14 +332,14 @@ return_from_SYSCALL_64:
624    
625     /* If this changed %rcx, it was not canonical */
626     cmpq %rcx, %r11
627     - jne opportunistic_sysret_failed
628     + jne swapgs_restore_regs_and_return_to_usermode
629    
630     cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
631     - jne opportunistic_sysret_failed
632     + jne swapgs_restore_regs_and_return_to_usermode
633    
634     movq R11(%rsp), %r11
635     cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
636     - jne opportunistic_sysret_failed
637     + jne swapgs_restore_regs_and_return_to_usermode
638    
639     /*
640     * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
641     @@ -302,12 +360,12 @@ return_from_SYSCALL_64:
642     * would never get past 'stuck_here'.
643     */
644     testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
645     - jnz opportunistic_sysret_failed
646     + jnz swapgs_restore_regs_and_return_to_usermode
647    
648     /* nothing to check for RSP */
649    
650     cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
651     - jne opportunistic_sysret_failed
652     + jne swapgs_restore_regs_and_return_to_usermode
653    
654     /*
655     * We win! This label is here just for ease of understanding
656     @@ -315,14 +373,36 @@ return_from_SYSCALL_64:
657     */
658     syscall_return_via_sysret:
659     /* rcx and r11 are already restored (see code above) */
660     - RESTORE_C_REGS_EXCEPT_RCX_R11
661     - movq RSP(%rsp), %rsp
662     UNWIND_HINT_EMPTY
663     - USERGS_SYSRET64
664     + POP_EXTRA_REGS
665     +.Lpop_c_regs_except_rcx_r11_and_sysret:
666     + popq %rsi /* skip r11 */
667     + popq %r10
668     + popq %r9
669     + popq %r8
670     + popq %rax
671     + popq %rsi /* skip rcx */
672     + popq %rdx
673     + popq %rsi
674    
675     -opportunistic_sysret_failed:
676     - SWAPGS
677     - jmp restore_c_regs_and_iret
678     + /*
679     + * Now all regs are restored except RSP and RDI.
680     + * Save old stack pointer and switch to trampoline stack.
681     + */
682     + movq %rsp, %rdi
683     + movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
684     +
685     + pushq RSP-RDI(%rdi) /* RSP */
686     + pushq (%rdi) /* RDI */
687     +
688     + /*
689     + * We are on the trampoline stack. All regs except RDI are live.
690     + * We can do future final exit work right here.
691     + */
692     +
693     + popq %rdi
694     + popq %rsp
695     + USERGS_SYSRET64
696     END(entry_SYSCALL_64)
697    
698     ENTRY(stub_ptregs_64)
699     @@ -423,8 +503,7 @@ ENTRY(ret_from_fork)
700     movq %rsp, %rdi
701     call syscall_return_slowpath /* returns with IRQs disabled */
702     TRACE_IRQS_ON /* user mode is traced as IRQS on */
703     - SWAPGS
704     - jmp restore_regs_and_iret
705     + jmp swapgs_restore_regs_and_return_to_usermode
706    
707     1:
708     /* kernel thread */
709     @@ -457,12 +536,13 @@ END(irq_entries_start)
710    
711     .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
712     #ifdef CONFIG_DEBUG_ENTRY
713     - pushfq
714     - testl $X86_EFLAGS_IF, (%rsp)
715     + pushq %rax
716     + SAVE_FLAGS(CLBR_RAX)
717     + testl $X86_EFLAGS_IF, %eax
718     jz .Lokay_\@
719     ud2
720     .Lokay_\@:
721     - addq $8, %rsp
722     + popq %rax
723     #endif
724     .endm
725    
726     @@ -554,6 +634,13 @@ END(irq_entries_start)
727     /* 0(%rsp): ~(interrupt number) */
728     .macro interrupt func
729     cld
730     +
731     + testb $3, CS-ORIG_RAX(%rsp)
732     + jz 1f
733     + SWAPGS
734     + call switch_to_thread_stack
735     +1:
736     +
737     ALLOC_PT_GPREGS_ON_STACK
738     SAVE_C_REGS
739     SAVE_EXTRA_REGS
740     @@ -563,12 +650,8 @@ END(irq_entries_start)
741     jz 1f
742    
743     /*
744     - * IRQ from user mode. Switch to kernel gsbase and inform context
745     - * tracking that we're in kernel mode.
746     - */
747     - SWAPGS
748     -
749     - /*
750     + * IRQ from user mode.
751     + *
752     * We need to tell lockdep that IRQs are off. We can't do this until
753     * we fix gsbase, and we should do it before enter_from_user_mode
754     * (which can take locks). Since TRACE_IRQS_OFF idempotent,
755     @@ -612,8 +695,52 @@ GLOBAL(retint_user)
756     mov %rsp,%rdi
757     call prepare_exit_to_usermode
758     TRACE_IRQS_IRETQ
759     +
760     +GLOBAL(swapgs_restore_regs_and_return_to_usermode)
761     +#ifdef CONFIG_DEBUG_ENTRY
762     + /* Assert that pt_regs indicates user mode. */
763     + testb $3, CS(%rsp)
764     + jnz 1f
765     + ud2
766     +1:
767     +#endif
768     + POP_EXTRA_REGS
769     + popq %r11
770     + popq %r10
771     + popq %r9
772     + popq %r8
773     + popq %rax
774     + popq %rcx
775     + popq %rdx
776     + popq %rsi
777     +
778     + /*
779     + * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
780     + * Save old stack pointer and switch to trampoline stack.
781     + */
782     + movq %rsp, %rdi
783     + movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
784     +
785     + /* Copy the IRET frame to the trampoline stack. */
786     + pushq 6*8(%rdi) /* SS */
787     + pushq 5*8(%rdi) /* RSP */
788     + pushq 4*8(%rdi) /* EFLAGS */
789     + pushq 3*8(%rdi) /* CS */
790     + pushq 2*8(%rdi) /* RIP */
791     +
792     + /* Push user RDI on the trampoline stack. */
793     + pushq (%rdi)
794     +
795     + /*
796     + * We are on the trampoline stack. All regs except RDI are live.
797     + * We can do future final exit work right here.
798     + */
799     +
800     + /* Restore RDI. */
801     + popq %rdi
802     SWAPGS
803     - jmp restore_regs_and_iret
804     + INTERRUPT_RETURN
805     +
806    
807     /* Returning to kernel space */
808     retint_kernel:
809     @@ -633,15 +760,17 @@ retint_kernel:
810     */
811     TRACE_IRQS_IRETQ
812    
813     -/*
814     - * At this label, code paths which return to kernel and to user,
815     - * which come from interrupts/exception and from syscalls, merge.
816     - */
817     -GLOBAL(restore_regs_and_iret)
818     - RESTORE_EXTRA_REGS
819     -restore_c_regs_and_iret:
820     - RESTORE_C_REGS
821     - REMOVE_PT_GPREGS_FROM_STACK 8
822     +GLOBAL(restore_regs_and_return_to_kernel)
823     +#ifdef CONFIG_DEBUG_ENTRY
824     + /* Assert that pt_regs indicates kernel mode. */
825     + testb $3, CS(%rsp)
826     + jz 1f
827     + ud2
828     +1:
829     +#endif
830     + POP_EXTRA_REGS
831     + POP_C_REGS
832     + addq $8, %rsp /* skip regs->orig_ax */
833     INTERRUPT_RETURN
834    
835     ENTRY(native_iret)
836     @@ -805,7 +934,33 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
837     /*
838     * Exception entry points.
839     */
840     -#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
841     +#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
842     +
843     +/*
844     + * Switch to the thread stack. This is called with the IRET frame and
845     + * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
846     + * space has not been allocated for them.)
847     + */
848     +ENTRY(switch_to_thread_stack)
849     + UNWIND_HINT_FUNC
850     +
851     + pushq %rdi
852     + movq %rsp, %rdi
853     + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
854     + UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
855     +
856     + pushq 7*8(%rdi) /* regs->ss */
857     + pushq 6*8(%rdi) /* regs->rsp */
858     + pushq 5*8(%rdi) /* regs->eflags */
859     + pushq 4*8(%rdi) /* regs->cs */
860     + pushq 3*8(%rdi) /* regs->ip */
861     + pushq 2*8(%rdi) /* regs->orig_ax */
862     + pushq 8(%rdi) /* return address */
863     + UNWIND_HINT_FUNC
864     +
865     + movq (%rdi), %rdi
866     + ret
867     +END(switch_to_thread_stack)
868    
869     .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
870     ENTRY(\sym)
871     @@ -818,17 +973,18 @@ ENTRY(\sym)
872    
873     ASM_CLAC
874    
875     - .ifeq \has_error_code
876     + .if \has_error_code == 0
877     pushq $-1 /* ORIG_RAX: no syscall to restart */
878     .endif
879    
880     ALLOC_PT_GPREGS_ON_STACK
881    
882     - .if \paranoid
883     - .if \paranoid == 1
884     + .if \paranoid < 2
885     testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
886     - jnz 1f
887     + jnz .Lfrom_usermode_switch_stack_\@
888     .endif
889     +
890     + .if \paranoid
891     call paranoid_entry
892     .else
893     call error_entry
894     @@ -870,20 +1026,15 @@ ENTRY(\sym)
895     jmp error_exit
896     .endif
897    
898     - .if \paranoid == 1
899     + .if \paranoid < 2
900     /*
901     - * Paranoid entry from userspace. Switch stacks and treat it
902     + * Entry from userspace. Switch stacks and treat it
903     * as a normal entry. This means that paranoid handlers
904     * run in real process context if user_mode(regs).
905     */
906     -1:
907     +.Lfrom_usermode_switch_stack_\@:
908     call error_entry
909    
910     -
911     - movq %rsp, %rdi /* pt_regs pointer */
912     - call sync_regs
913     - movq %rax, %rsp /* switch stack */
914     -
915     movq %rsp, %rdi /* pt_regs pointer */
916    
917     .if \has_error_code
918     @@ -1059,6 +1210,7 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
919     idtentry stack_segment do_stack_segment has_error_code=1
920    
921     #ifdef CONFIG_XEN
922     +idtentry xennmi do_nmi has_error_code=0
923     idtentry xendebug do_debug has_error_code=0
924     idtentry xenint3 do_int3 has_error_code=0
925     #endif
926     @@ -1112,17 +1264,14 @@ ENTRY(paranoid_exit)
927     DISABLE_INTERRUPTS(CLBR_ANY)
928     TRACE_IRQS_OFF_DEBUG
929     testl %ebx, %ebx /* swapgs needed? */
930     - jnz paranoid_exit_no_swapgs
931     + jnz .Lparanoid_exit_no_swapgs
932     TRACE_IRQS_IRETQ
933     SWAPGS_UNSAFE_STACK
934     - jmp paranoid_exit_restore
935     -paranoid_exit_no_swapgs:
936     + jmp .Lparanoid_exit_restore
937     +.Lparanoid_exit_no_swapgs:
938     TRACE_IRQS_IRETQ_DEBUG
939     -paranoid_exit_restore:
940     - RESTORE_EXTRA_REGS
941     - RESTORE_C_REGS
942     - REMOVE_PT_GPREGS_FROM_STACK 8
943     - INTERRUPT_RETURN
944     +.Lparanoid_exit_restore:
945     + jmp restore_regs_and_return_to_kernel
946     END(paranoid_exit)
947    
948     /*
949     @@ -1146,6 +1295,14 @@ ENTRY(error_entry)
950     SWAPGS
951    
952     .Lerror_entry_from_usermode_after_swapgs:
953     + /* Put us onto the real thread stack. */
954     + popq %r12 /* save return addr in %12 */
955     + movq %rsp, %rdi /* arg0 = pt_regs pointer */
956     + call sync_regs
957     + movq %rax, %rsp /* switch stack */
958     + ENCODE_FRAME_POINTER
959     + pushq %r12
960     +
961     /*
962     * We need to tell lockdep that IRQs are off. We can't do this until
963     * we fix gsbase, and we should do it before enter_from_user_mode
964     @@ -1223,10 +1380,13 @@ ENTRY(error_exit)
965     jmp retint_user
966     END(error_exit)
967    
968     -/* Runs on exception stack */
969     -/* XXX: broken on Xen PV */
970     +/*
971     + * Runs on exception stack. Xen PV does not go through this path at all,
972     + * so we can use real assembly here.
973     + */
974     ENTRY(nmi)
975     UNWIND_HINT_IRET_REGS
976     +
977     /*
978     * We allow breakpoints in NMIs. If a breakpoint occurs, then
979     * the iretq it performs will take us out of NMI context.
980     @@ -1284,7 +1444,7 @@ ENTRY(nmi)
981     * stacks lest we corrupt the "NMI executing" variable.
982     */
983    
984     - SWAPGS_UNSAFE_STACK
985     + swapgs
986     cld
987     movq %rsp, %rdx
988     movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
989     @@ -1328,8 +1488,7 @@ ENTRY(nmi)
990     * Return back to user mode. We must *not* do the normal exit
991     * work, because we don't want to enable interrupts.
992     */
993     - SWAPGS
994     - jmp restore_regs_and_iret
995     + jmp swapgs_restore_regs_and_return_to_usermode
996    
997     .Lnmi_from_kernel:
998     /*
999     @@ -1450,7 +1609,7 @@ nested_nmi_out:
1000     popq %rdx
1001    
1002     /* We are returning to kernel mode, so this cannot result in a fault. */
1003     - INTERRUPT_RETURN
1004     + iretq
1005    
1006     first_nmi:
1007     /* Restore rdx. */
1008     @@ -1481,7 +1640,7 @@ first_nmi:
1009     pushfq /* RFLAGS */
1010     pushq $__KERNEL_CS /* CS */
1011     pushq $1f /* RIP */
1012     - INTERRUPT_RETURN /* continues at repeat_nmi below */
1013     + iretq /* continues at repeat_nmi below */
1014     UNWIND_HINT_IRET_REGS
1015     1:
1016     #endif
1017     @@ -1544,29 +1703,34 @@ end_repeat_nmi:
1018     nmi_swapgs:
1019     SWAPGS_UNSAFE_STACK
1020     nmi_restore:
1021     - RESTORE_EXTRA_REGS
1022     - RESTORE_C_REGS
1023     + POP_EXTRA_REGS
1024     + POP_C_REGS
1025    
1026     - /* Point RSP at the "iret" frame. */
1027     - REMOVE_PT_GPREGS_FROM_STACK 6*8
1028     + /*
1029     + * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
1030     + * at the "iret" frame.
1031     + */
1032     + addq $6*8, %rsp
1033    
1034     /*
1035     * Clear "NMI executing". Set DF first so that we can easily
1036     * distinguish the remaining code between here and IRET from
1037     - * the SYSCALL entry and exit paths. On a native kernel, we
1038     - * could just inspect RIP, but, on paravirt kernels,
1039     - * INTERRUPT_RETURN can translate into a jump into a
1040     - * hypercall page.
1041     + * the SYSCALL entry and exit paths.
1042     + *
1043     + * We arguably should just inspect RIP instead, but I (Andy) wrote
1044     + * this code when I had the misapprehension that Xen PV supported
1045     + * NMIs, and Xen PV would break that approach.
1046     */
1047     std
1048     movq $0, 5*8(%rsp) /* clear "NMI executing" */
1049    
1050     /*
1051     - * INTERRUPT_RETURN reads the "iret" frame and exits the NMI
1052     - * stack in a single instruction. We are returning to kernel
1053     - * mode, so this cannot result in a fault.
1054     + * iretq reads the "iret" frame and exits the NMI stack in a
1055     + * single instruction. We are returning to kernel mode, so this
1056     + * cannot result in a fault. Similarly, we don't need to worry
1057     + * about espfix64 on the way back to kernel mode.
1058     */
1059     - INTERRUPT_RETURN
1060     + iretq
1061     END(nmi)
1062    
1063     ENTRY(ignore_sysret)
1064     diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
1065     index b5c7a56ed256..95ad40eb7eff 100644
1066     --- a/arch/x86/entry/entry_64_compat.S
1067     +++ b/arch/x86/entry/entry_64_compat.S
1068     @@ -48,7 +48,7 @@
1069     */
1070     ENTRY(entry_SYSENTER_compat)
1071     /* Interrupts are off on entry. */
1072     - SWAPGS_UNSAFE_STACK
1073     + SWAPGS
1074     movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
1075    
1076     /*
1077     @@ -306,8 +306,11 @@ ENTRY(entry_INT80_compat)
1078     */
1079     movl %eax, %eax
1080    
1081     - /* Construct struct pt_regs on stack (iret frame is already on stack) */
1082     pushq %rax /* pt_regs->orig_ax */
1083     +
1084     + /* switch to thread stack expects orig_ax to be pushed */
1085     + call switch_to_thread_stack
1086     +
1087     pushq %rdi /* pt_regs->di */
1088     pushq %rsi /* pt_regs->si */
1089     pushq %rdx /* pt_regs->dx */
1090     @@ -337,8 +340,7 @@ ENTRY(entry_INT80_compat)
1091    
1092     /* Go back to user mode. */
1093     TRACE_IRQS_ON
1094     - SWAPGS
1095     - jmp restore_regs_and_iret
1096     + jmp swapgs_restore_regs_and_return_to_usermode
1097     END(entry_INT80_compat)
1098    
1099     ENTRY(stub32_clone)
1100     diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile
1101     index 331f1dca5085..6fb9b57ed5ba 100644
1102     --- a/arch/x86/entry/syscalls/Makefile
1103     +++ b/arch/x86/entry/syscalls/Makefile
1104     @@ -1,6 +1,6 @@
1105     # SPDX-License-Identifier: GPL-2.0
1106     -out := $(obj)/../../include/generated/asm
1107     -uapi := $(obj)/../../include/generated/uapi/asm
1108     +out := arch/$(SRCARCH)/include/generated/asm
1109     +uapi := arch/$(SRCARCH)/include/generated/uapi/asm
1110    
1111     # Create output directory if not already present
1112     _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
1113     diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
1114     index 80534d3c2480..589af1eec7c1 100644
1115     --- a/arch/x86/events/core.c
1116     +++ b/arch/x86/events/core.c
1117     @@ -2371,7 +2371,7 @@ static unsigned long get_segment_base(unsigned int segment)
1118     struct ldt_struct *ldt;
1119    
1120     /* IRQs are off, so this synchronizes with smp_store_release */
1121     - ldt = lockless_dereference(current->active_mm->context.ldt);
1122     + ldt = READ_ONCE(current->active_mm->context.ldt);
1123     if (!ldt || idx >= ldt->nr_entries)
1124     return 0;
1125    
1126     diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
1127     index f94855000d4e..09c26a4f139c 100644
1128     --- a/arch/x86/events/intel/core.c
1129     +++ b/arch/x86/events/intel/core.c
1130     @@ -2958,6 +2958,10 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
1131    
1132     if (event->attr.use_clockid)
1133     flags &= ~PERF_SAMPLE_TIME;
1134     + if (!event->attr.exclude_kernel)
1135     + flags &= ~PERF_SAMPLE_REGS_USER;
1136     + if (event->attr.sample_regs_user & ~PEBS_REGS)
1137     + flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
1138     return flags;
1139     }
1140    
1141     diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
1142     index 4196f81ec0e1..f7aaadf9331f 100644
1143     --- a/arch/x86/events/perf_event.h
1144     +++ b/arch/x86/events/perf_event.h
1145     @@ -85,13 +85,15 @@ struct amd_nb {
1146     * Flags PEBS can handle without an PMI.
1147     *
1148     * TID can only be handled by flushing at context switch.
1149     + * REGS_USER can be handled for events limited to ring 3.
1150     *
1151     */
1152     #define PEBS_FREERUNNING_FLAGS \
1153     (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
1154     PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
1155     PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
1156     - PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR)
1157     + PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
1158     + PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
1159    
1160     /*
1161     * A debug store configuration.
1162     @@ -110,6 +112,26 @@ struct debug_store {
1163     u64 pebs_event_reset[MAX_PEBS_EVENTS];
1164     };
1165    
1166     +#define PEBS_REGS \
1167     + (PERF_REG_X86_AX | \
1168     + PERF_REG_X86_BX | \
1169     + PERF_REG_X86_CX | \
1170     + PERF_REG_X86_DX | \
1171     + PERF_REG_X86_DI | \
1172     + PERF_REG_X86_SI | \
1173     + PERF_REG_X86_SP | \
1174     + PERF_REG_X86_BP | \
1175     + PERF_REG_X86_IP | \
1176     + PERF_REG_X86_FLAGS | \
1177     + PERF_REG_X86_R8 | \
1178     + PERF_REG_X86_R9 | \
1179     + PERF_REG_X86_R10 | \
1180     + PERF_REG_X86_R11 | \
1181     + PERF_REG_X86_R12 | \
1182     + PERF_REG_X86_R13 | \
1183     + PERF_REG_X86_R14 | \
1184     + PERF_REG_X86_R15)
1185     +
1186     /*
1187     * Per register state.
1188     */
1189     diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
1190     index a5db63f728a2..a0b86cf486e0 100644
1191     --- a/arch/x86/hyperv/hv_init.c
1192     +++ b/arch/x86/hyperv/hv_init.c
1193     @@ -113,7 +113,7 @@ void hyperv_init(void)
1194     u64 guest_id;
1195     union hv_x64_msr_hypercall_contents hypercall_msr;
1196    
1197     - if (x86_hyper != &x86_hyper_ms_hyperv)
1198     + if (x86_hyper_type != X86_HYPER_MS_HYPERV)
1199     return;
1200    
1201     /* Allocate percpu VP index */
1202     diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
1203     index 5b0579abb398..3ac991d81e74 100644
1204     --- a/arch/x86/include/asm/archrandom.h
1205     +++ b/arch/x86/include/asm/archrandom.h
1206     @@ -45,7 +45,7 @@ static inline bool rdrand_long(unsigned long *v)
1207     bool ok;
1208     unsigned int retry = RDRAND_RETRY_LOOPS;
1209     do {
1210     - asm volatile(RDRAND_LONG "\n\t"
1211     + asm volatile(RDRAND_LONG
1212     CC_SET(c)
1213     : CC_OUT(c) (ok), "=a" (*v));
1214     if (ok)
1215     @@ -59,7 +59,7 @@ static inline bool rdrand_int(unsigned int *v)
1216     bool ok;
1217     unsigned int retry = RDRAND_RETRY_LOOPS;
1218     do {
1219     - asm volatile(RDRAND_INT "\n\t"
1220     + asm volatile(RDRAND_INT
1221     CC_SET(c)
1222     : CC_OUT(c) (ok), "=a" (*v));
1223     if (ok)
1224     @@ -71,7 +71,7 @@ static inline bool rdrand_int(unsigned int *v)
1225     static inline bool rdseed_long(unsigned long *v)
1226     {
1227     bool ok;
1228     - asm volatile(RDSEED_LONG "\n\t"
1229     + asm volatile(RDSEED_LONG
1230     CC_SET(c)
1231     : CC_OUT(c) (ok), "=a" (*v));
1232     return ok;
1233     @@ -80,7 +80,7 @@ static inline bool rdseed_long(unsigned long *v)
1234     static inline bool rdseed_int(unsigned int *v)
1235     {
1236     bool ok;
1237     - asm volatile(RDSEED_INT "\n\t"
1238     + asm volatile(RDSEED_INT
1239     CC_SET(c)
1240     : CC_OUT(c) (ok), "=a" (*v));
1241     return ok;
1242     diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
1243     index 2bcf47314959..3fa039855b8f 100644
1244     --- a/arch/x86/include/asm/bitops.h
1245     +++ b/arch/x86/include/asm/bitops.h
1246     @@ -143,7 +143,7 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
1247     static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
1248     {
1249     bool negative;
1250     - asm volatile(LOCK_PREFIX "andb %2,%1\n\t"
1251     + asm volatile(LOCK_PREFIX "andb %2,%1"
1252     CC_SET(s)
1253     : CC_OUT(s) (negative), ADDR
1254     : "ir" ((char) ~(1 << nr)) : "memory");
1255     @@ -246,7 +246,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
1256     {
1257     bool oldbit;
1258    
1259     - asm("bts %2,%1\n\t"
1260     + asm("bts %2,%1"
1261     CC_SET(c)
1262     : CC_OUT(c) (oldbit), ADDR
1263     : "Ir" (nr));
1264     @@ -286,7 +286,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
1265     {
1266     bool oldbit;
1267    
1268     - asm volatile("btr %2,%1\n\t"
1269     + asm volatile("btr %2,%1"
1270     CC_SET(c)
1271     : CC_OUT(c) (oldbit), ADDR
1272     : "Ir" (nr));
1273     @@ -298,7 +298,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
1274     {
1275     bool oldbit;
1276    
1277     - asm volatile("btc %2,%1\n\t"
1278     + asm volatile("btc %2,%1"
1279     CC_SET(c)
1280     : CC_OUT(c) (oldbit), ADDR
1281     : "Ir" (nr) : "memory");
1282     @@ -329,7 +329,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
1283     {
1284     bool oldbit;
1285    
1286     - asm volatile("bt %2,%1\n\t"
1287     + asm volatile("bt %2,%1"
1288     CC_SET(c)
1289     : CC_OUT(c) (oldbit)
1290     : "m" (*(unsigned long *)addr), "Ir" (nr));
1291     diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
1292     index 70bc1df580b2..2cbd75dd2fd3 100644
1293     --- a/arch/x86/include/asm/compat.h
1294     +++ b/arch/x86/include/asm/compat.h
1295     @@ -7,6 +7,7 @@
1296     */
1297     #include <linux/types.h>
1298     #include <linux/sched.h>
1299     +#include <linux/sched/task_stack.h>
1300     #include <asm/processor.h>
1301     #include <asm/user32.h>
1302     #include <asm/unistd.h>
1303     diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
1304     index 0dfa68438e80..ea9a7dde62e5 100644
1305     --- a/arch/x86/include/asm/cpufeature.h
1306     +++ b/arch/x86/include/asm/cpufeature.h
1307     @@ -126,16 +126,17 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
1308     #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
1309    
1310     #define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
1311     -#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability))
1312     -#define setup_clear_cpu_cap(bit) do { \
1313     - clear_cpu_cap(&boot_cpu_data, bit); \
1314     - set_bit(bit, (unsigned long *)cpu_caps_cleared); \
1315     -} while (0)
1316     +
1317     +extern void setup_clear_cpu_cap(unsigned int bit);
1318     +extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
1319     +
1320     #define setup_force_cpu_cap(bit) do { \
1321     set_cpu_cap(&boot_cpu_data, bit); \
1322     set_bit(bit, (unsigned long *)cpu_caps_set); \
1323     } while (0)
1324    
1325     +#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
1326     +
1327     #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
1328     /*
1329     * Static testing of CPU features. Used the same as boot_cpu_has().
1330     diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
1331     index 793690fbda36..800104c8a3ed 100644
1332     --- a/arch/x86/include/asm/cpufeatures.h
1333     +++ b/arch/x86/include/asm/cpufeatures.h
1334     @@ -13,173 +13,176 @@
1335     /*
1336     * Defines x86 CPU feature bits
1337     */
1338     -#define NCAPINTS 18 /* N 32-bit words worth of info */
1339     -#define NBUGINTS 1 /* N 32-bit bug flags */
1340     +#define NCAPINTS 18 /* N 32-bit words worth of info */
1341     +#define NBUGINTS 1 /* N 32-bit bug flags */
1342    
1343     /*
1344     * Note: If the comment begins with a quoted string, that string is used
1345     * in /proc/cpuinfo instead of the macro name. If the string is "",
1346     * this feature bit is not displayed in /proc/cpuinfo at all.
1347     + *
1348     + * When adding new features here that depend on other features,
1349     + * please update the table in kernel/cpu/cpuid-deps.c as well.
1350     */
1351    
1352     -/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
1353     -#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
1354     -#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
1355     -#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
1356     -#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
1357     -#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
1358     -#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
1359     -#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
1360     -#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
1361     -#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
1362     -#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
1363     -#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
1364     -#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
1365     -#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
1366     -#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
1367     -#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */
1368     - /* (plus FCMOVcc, FCOMI with FPU) */
1369     -#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
1370     -#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
1371     -#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
1372     -#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
1373     -#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
1374     -#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
1375     -#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
1376     -#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
1377     -#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
1378     -#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
1379     -#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
1380     -#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
1381     -#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
1382     -#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
1383     -#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
1384     +/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
1385     +#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
1386     +#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
1387     +#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
1388     +#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
1389     +#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
1390     +#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
1391     +#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
1392     +#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
1393     +#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
1394     +#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
1395     +#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
1396     +#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
1397     +#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
1398     +#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
1399     +#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
1400     +#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
1401     +#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
1402     +#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
1403     +#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
1404     +#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
1405     +#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
1406     +#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
1407     +#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
1408     +#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
1409     +#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
1410     +#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
1411     +#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
1412     +#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
1413     +#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
1414     +#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
1415    
1416     /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
1417     /* Don't duplicate feature flags which are redundant with Intel! */
1418     -#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
1419     -#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
1420     -#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
1421     -#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
1422     -#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
1423     -#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
1424     -#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
1425     -#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
1426     -#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */
1427     -#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */
1428     +#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
1429     +#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */
1430     +#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
1431     +#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
1432     +#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
1433     +#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
1434     +#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
1435     +#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */
1436     +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */
1437     +#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */
1438    
1439     /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
1440     -#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
1441     -#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
1442     -#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
1443     +#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
1444     +#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
1445     +#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
1446    
1447     /* Other features, Linux-defined mapping, word 3 */
1448     /* This range is used for feature bits which conflict or are synthesized */
1449     -#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
1450     -#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
1451     -#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
1452     -#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
1453     -/* cpu types for specific tunings: */
1454     -#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
1455     -#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
1456     -#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
1457     -#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
1458     -#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
1459     -#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */
1460     -#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */
1461     -#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
1462     -#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
1463     -#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
1464     -#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
1465     -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
1466     -#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
1467     -#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
1468     -#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
1469     -#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
1470     -#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
1471     -#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
1472     -#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
1473     -#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
1474     -#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
1475     -#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
1476     -#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
1477     -#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
1478     -#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
1479     -#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
1480     -#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
1481     +#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
1482     +#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
1483     +#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
1484     +#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
1485     +
1486     +/* CPU types for specific tunings: */
1487     +#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
1488     +#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
1489     +#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
1490     +#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
1491     +#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
1492     +#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */
1493     +#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */
1494     +#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
1495     +#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
1496     +#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
1497     +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
1498     +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
1499     +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
1500     +#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" MFENCE synchronizes RDTSC */
1501     +#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
1502     +#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
1503     +#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
1504     +#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
1505     +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */
1506     +#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
1507     +#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
1508     +#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
1509     +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */
1510     +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */
1511     +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
1512     +#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
1513     +#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
1514    
1515     -/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
1516     -#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
1517     -#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
1518     -#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
1519     -#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
1520     -#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
1521     -#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
1522     -#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */
1523     -#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
1524     -#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
1525     -#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
1526     -#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
1527     -#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
1528     -#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
1529     -#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
1530     -#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
1531     -#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */
1532     -#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
1533     -#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
1534     -#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
1535     -#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
1536     -#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */
1537     -#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
1538     -#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
1539     -#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */
1540     -#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
1541     -#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
1542     -#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */
1543     -#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
1544     -#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */
1545     -#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */
1546     -#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
1547     +/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
1548     +#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
1549     +#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
1550     +#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
1551     +#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */
1552     +#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
1553     +#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
1554     +#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */
1555     +#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
1556     +#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
1557     +#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
1558     +#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
1559     +#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
1560     +#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
1561     +#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */
1562     +#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
1563     +#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */
1564     +#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
1565     +#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
1566     +#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
1567     +#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
1568     +#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */
1569     +#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
1570     +#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
1571     +#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */
1572     +#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
1573     +#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
1574     +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */
1575     +#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
1576     +#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */
1577     +#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */
1578     +#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
1579    
1580     /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
1581     -#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
1582     -#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
1583     -#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
1584     -#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
1585     -#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
1586     -#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
1587     -#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
1588     -#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
1589     -#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
1590     -#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
1591     +#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
1592     +#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
1593     +#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
1594     +#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
1595     +#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
1596     +#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
1597     +#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
1598     +#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
1599     +#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
1600     +#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
1601    
1602     -/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
1603     -#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
1604     -#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
1605     -#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */
1606     -#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
1607     -#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
1608     -#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
1609     -#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
1610     -#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
1611     -#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
1612     -#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
1613     -#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
1614     -#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
1615     -#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
1616     -#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
1617     -#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
1618     -#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
1619     -#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */
1620     -#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
1621     -#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */
1622     -#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
1623     -#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
1624     -#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
1625     -#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
1626     -#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */
1627     -#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */
1628     -#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
1629     +/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
1630     +#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
1631     +#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
1632     +#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */
1633     +#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
1634     +#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
1635     +#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
1636     +#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
1637     +#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
1638     +#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
1639     +#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
1640     +#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
1641     +#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
1642     +#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
1643     +#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
1644     +#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
1645     +#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
1646     +#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */
1647     +#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
1648     +#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */
1649     +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */
1650     +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */
1651     +#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
1652     +#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */
1653     +#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */
1654     +#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */
1655     +#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
1656    
1657     /*
1658     * Auxiliary flags: Linux defined - For features scattered in various
1659     @@ -187,146 +190,155 @@
1660     *
1661     * Reuse free bits when adding new feature flags!
1662     */
1663     -#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
1664     -#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
1665     -#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
1666     -#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
1667     -#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
1668     -#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
1669     -#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
1670     +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */
1671     +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
1672     +#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
1673     +#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
1674     +#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
1675     +#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
1676     +#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
1677    
1678     -#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
1679     -#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
1680     -#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
1681     +#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
1682     +#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
1683     +#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
1684    
1685     -#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
1686     -#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
1687     -#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
1688     -#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
1689     +#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
1690     +#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
1691     +#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
1692     +#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
1693    
1694     -#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
1695     +#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
1696    
1697     /* Virtualization flags: Linux defined, word 8 */
1698     -#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
1699     -#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
1700     -#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
1701     -#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
1702     -#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
1703     +#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
1704     +#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
1705     +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
1706     +#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
1707     +#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
1708    
1709     -#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
1710     -#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
1711     +#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
1712     +#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
1713    
1714    
1715     -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
1716     -#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
1717     -#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
1718     -#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
1719     -#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
1720     -#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
1721     -#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
1722     -#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
1723     -#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
1724     -#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
1725     -#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
1726     -#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
1727     -#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
1728     -#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
1729     -#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
1730     -#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
1731     -#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
1732     -#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
1733     -#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
1734     -#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
1735     -#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
1736     -#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
1737     -#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
1738     -#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
1739     -#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
1740     -#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
1741     -#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
1742     -#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
1743     +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
1744     +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
1745     +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */
1746     +#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
1747     +#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
1748     +#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
1749     +#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
1750     +#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
1751     +#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
1752     +#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
1753     +#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
1754     +#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
1755     +#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
1756     +#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
1757     +#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
1758     +#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
1759     +#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */
1760     +#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */
1761     +#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
1762     +#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
1763     +#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
1764     +#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
1765     +#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
1766     +#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
1767     +#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
1768     +#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
1769     +#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
1770     +#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
1771    
1772     -/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
1773     -#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
1774     -#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */
1775     -#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
1776     -#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
1777     +/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
1778     +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */
1779     +#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */
1780     +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
1781     +#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
1782    
1783     -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
1784     -#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
1785     +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
1786     +#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
1787    
1788     -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
1789     -#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
1790     -#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
1791     -#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
1792     +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
1793     +#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */
1794     +#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
1795     +#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
1796    
1797     -/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
1798     -#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
1799     -#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */
1800     +/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
1801     +#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
1802     +#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
1803     +#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
1804    
1805     -/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
1806     -#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
1807     -#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
1808     -#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
1809     -#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
1810     -#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
1811     -#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
1812     -#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
1813     -#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
1814     -#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
1815     -#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
1816     +/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
1817     +#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
1818     +#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
1819     +#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
1820     +#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
1821     +#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
1822     +#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
1823     +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
1824     +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
1825     +#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
1826     +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
1827    
1828     -/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
1829     -#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
1830     -#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
1831     -#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
1832     -#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
1833     -#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
1834     -#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
1835     -#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
1836     -#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
1837     -#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
1838     -#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
1839     -#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
1840     -#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
1841     -#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
1842     +/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
1843     +#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
1844     +#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
1845     +#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
1846     +#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
1847     +#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
1848     +#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
1849     +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
1850     +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
1851     +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
1852     +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
1853     +#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
1854     +#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
1855     +#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
1856    
1857     -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
1858     -#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
1859     -#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
1860     -#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
1861     -#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
1862     -#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
1863     -#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
1864     +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
1865     +#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
1866     +#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */
1867     +#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
1868     +#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
1869     +#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
1870     +#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
1871     +#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
1872     +#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
1873     +#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */
1874     +#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
1875     +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
1876     +#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
1877     +#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
1878    
1879     -/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
1880     -#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
1881     -#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
1882     -#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */
1883     +/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
1884     +#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
1885     +#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */
1886     +#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */
1887    
1888     /*
1889     * BUG word(s)
1890     */
1891     -#define X86_BUG(x) (NCAPINTS*32 + (x))
1892     +#define X86_BUG(x) (NCAPINTS*32 + (x))
1893    
1894     -#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
1895     -#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
1896     -#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
1897     -#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
1898     -#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
1899     -#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
1900     -#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
1901     -#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
1902     -#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
1903     +#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
1904     +#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
1905     +#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
1906     +#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
1907     +#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
1908     +#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
1909     +#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
1910     +#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
1911     +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
1912     #ifdef CONFIG_X86_32
1913     /*
1914     * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
1915     * to avoid confusion.
1916     */
1917     -#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
1918     +#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
1919     #endif
1920     -#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
1921     -#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
1922     -#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
1923     -#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
1924     +#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
1925     +#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
1926     +#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
1927     +#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
1928     +
1929     #endif /* _ASM_X86_CPUFEATURES_H */
1930     diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
1931     index 0a3e808b9123..2ace1f90d138 100644
1932     --- a/arch/x86/include/asm/desc.h
1933     +++ b/arch/x86/include/asm/desc.h
1934     @@ -60,17 +60,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
1935     return this_cpu_ptr(&gdt_page)->gdt;
1936     }
1937    
1938     -/* Get the fixmap index for a specific processor */
1939     -static inline unsigned int get_cpu_gdt_ro_index(int cpu)
1940     -{
1941     - return FIX_GDT_REMAP_BEGIN + cpu;
1942     -}
1943     -
1944     /* Provide the fixmap address of the remapped GDT */
1945     static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
1946     {
1947     - unsigned int idx = get_cpu_gdt_ro_index(cpu);
1948     - return (struct desc_struct *)__fix_to_virt(idx);
1949     + return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
1950     }
1951    
1952     /* Provide the current read-only GDT */
1953     @@ -185,7 +178,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
1954     #endif
1955     }
1956    
1957     -static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
1958     +static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
1959     {
1960     struct desc_struct *d = get_cpu_gdt_rw(cpu);
1961     tss_desc tss;
1962     diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
1963     index dcd9fb55e679..94fc4fa14127 100644
1964     --- a/arch/x86/include/asm/fixmap.h
1965     +++ b/arch/x86/include/asm/fixmap.h
1966     @@ -44,6 +44,45 @@ extern unsigned long __FIXADDR_TOP;
1967     PAGE_SIZE)
1968     #endif
1969    
1970     +/*
1971     + * cpu_entry_area is a percpu region in the fixmap that contains things
1972     + * needed by the CPU and early entry/exit code. Real types aren't used
1973     + * for all fields here to avoid circular header dependencies.
1974     + *
1975     + * Every field is a virtual alias of some other allocated backing store.
1976     + * There is no direct allocation of a struct cpu_entry_area.
1977     + */
1978     +struct cpu_entry_area {
1979     + char gdt[PAGE_SIZE];
1980     +
1981     + /*
1982     + * The GDT is just below SYSENTER_stack and thus serves (on x86_64) as
1983     + * a a read-only guard page.
1984     + */
1985     + struct SYSENTER_stack_page SYSENTER_stack_page;
1986     +
1987     + /*
1988     + * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
1989     + * we need task switches to work, and task switches write to the TSS.
1990     + */
1991     + struct tss_struct tss;
1992     +
1993     + char entry_trampoline[PAGE_SIZE];
1994     +
1995     +#ifdef CONFIG_X86_64
1996     + /*
1997     + * Exception stacks used for IST entries.
1998     + *
1999     + * In the future, this should have a separate slot for each stack
2000     + * with guard pages between them.
2001     + */
2002     + char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
2003     +#endif
2004     +};
2005     +
2006     +#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
2007     +
2008     +extern void setup_cpu_entry_areas(void);
2009    
2010     /*
2011     * Here we define all the compile-time 'special' virtual
2012     @@ -101,8 +140,14 @@ enum fixed_addresses {
2013     FIX_LNW_VRTC,
2014     #endif
2015     /* Fixmap entries to remap the GDTs, one per processor. */
2016     - FIX_GDT_REMAP_BEGIN,
2017     - FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
2018     + FIX_CPU_ENTRY_AREA_TOP,
2019     + FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,
2020     +
2021     +#ifdef CONFIG_ACPI_APEI_GHES
2022     + /* Used for GHES mapping from assorted contexts */
2023     + FIX_APEI_GHES_IRQ,
2024     + FIX_APEI_GHES_NMI,
2025     +#endif
2026    
2027     __end_of_permanent_fixed_addresses,
2028    
2029     @@ -185,5 +230,30 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
2030     void __early_set_fixmap(enum fixed_addresses idx,
2031     phys_addr_t phys, pgprot_t flags);
2032    
2033     +static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page)
2034     +{
2035     + BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
2036     +
2037     + return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page;
2038     +}
2039     +
2040     +#define __get_cpu_entry_area_offset_index(cpu, offset) ({ \
2041     + BUILD_BUG_ON(offset % PAGE_SIZE != 0); \
2042     + __get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE); \
2043     + })
2044     +
2045     +#define get_cpu_entry_area_index(cpu, field) \
2046     + __get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field))
2047     +
2048     +static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
2049     +{
2050     + return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
2051     +}
2052     +
2053     +static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu)
2054     +{
2055     + return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack;
2056     +}
2057     +
2058     #endif /* !__ASSEMBLY__ */
2059     #endif /* _ASM_X86_FIXMAP_H */
2060     diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
2061     index 0ead9dbb9130..96aa6b9884dc 100644
2062     --- a/arch/x86/include/asm/hypervisor.h
2063     +++ b/arch/x86/include/asm/hypervisor.h
2064     @@ -20,14 +20,22 @@
2065     #ifndef _ASM_X86_HYPERVISOR_H
2066     #define _ASM_X86_HYPERVISOR_H
2067    
2068     +/* x86 hypervisor types */
2069     +enum x86_hypervisor_type {
2070     + X86_HYPER_NATIVE = 0,
2071     + X86_HYPER_VMWARE,
2072     + X86_HYPER_MS_HYPERV,
2073     + X86_HYPER_XEN_PV,
2074     + X86_HYPER_XEN_HVM,
2075     + X86_HYPER_KVM,
2076     +};
2077     +
2078     #ifdef CONFIG_HYPERVISOR_GUEST
2079    
2080     #include <asm/kvm_para.h>
2081     +#include <asm/x86_init.h>
2082     #include <asm/xen/hypervisor.h>
2083    
2084     -/*
2085     - * x86 hypervisor information
2086     - */
2087     struct hypervisor_x86 {
2088     /* Hypervisor name */
2089     const char *name;
2090     @@ -35,40 +43,27 @@ struct hypervisor_x86 {
2091     /* Detection routine */
2092     uint32_t (*detect)(void);
2093    
2094     - /* Platform setup (run once per boot) */
2095     - void (*init_platform)(void);
2096     -
2097     - /* X2APIC detection (run once per boot) */
2098     - bool (*x2apic_available)(void);
2099     + /* Hypervisor type */
2100     + enum x86_hypervisor_type type;
2101    
2102     - /* pin current vcpu to specified physical cpu (run rarely) */
2103     - void (*pin_vcpu)(int);
2104     + /* init time callbacks */
2105     + struct x86_hyper_init init;
2106    
2107     - /* called during init_mem_mapping() to setup early mappings. */
2108     - void (*init_mem_mapping)(void);
2109     + /* runtime callbacks */
2110     + struct x86_hyper_runtime runtime;
2111     };
2112    
2113     -extern const struct hypervisor_x86 *x86_hyper;
2114     -
2115     -/* Recognized hypervisors */
2116     -extern const struct hypervisor_x86 x86_hyper_vmware;
2117     -extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
2118     -extern const struct hypervisor_x86 x86_hyper_xen_pv;
2119     -extern const struct hypervisor_x86 x86_hyper_xen_hvm;
2120     -extern const struct hypervisor_x86 x86_hyper_kvm;
2121     -
2122     +extern enum x86_hypervisor_type x86_hyper_type;
2123     extern void init_hypervisor_platform(void);
2124     -extern bool hypervisor_x2apic_available(void);
2125     -extern void hypervisor_pin_vcpu(int cpu);
2126     -
2127     -static inline void hypervisor_init_mem_mapping(void)
2128     +static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
2129     {
2130     - if (x86_hyper && x86_hyper->init_mem_mapping)
2131     - x86_hyper->init_mem_mapping();
2132     + return x86_hyper_type == type;
2133     }
2134     #else
2135     static inline void init_hypervisor_platform(void) { }
2136     -static inline bool hypervisor_x2apic_available(void) { return false; }
2137     -static inline void hypervisor_init_mem_mapping(void) { }
2138     +static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
2139     +{
2140     + return type == X86_HYPER_NATIVE;
2141     +}
2142     #endif /* CONFIG_HYPERVISOR_GUEST */
2143     #endif /* _ASM_X86_HYPERVISOR_H */
2144     diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
2145     index c8ef23f2c28f..89f08955fff7 100644
2146     --- a/arch/x86/include/asm/irqflags.h
2147     +++ b/arch/x86/include/asm/irqflags.h
2148     @@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
2149     swapgs; \
2150     sysretl
2151    
2152     +#ifdef CONFIG_DEBUG_ENTRY
2153     +#define SAVE_FLAGS(x) pushfq; popq %rax
2154     +#endif
2155     #else
2156     #define INTERRUPT_RETURN iret
2157     #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
2158     diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
2159     index f86a8caa561e..395c9631e000 100644
2160     --- a/arch/x86/include/asm/kdebug.h
2161     +++ b/arch/x86/include/asm/kdebug.h
2162     @@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
2163     extern int __must_check __die(const char *, struct pt_regs *, long);
2164     extern void show_stack_regs(struct pt_regs *regs);
2165     extern void __show_regs(struct pt_regs *regs, int all);
2166     +extern void show_iret_regs(struct pt_regs *regs);
2167     extern unsigned long oops_begin(void);
2168     extern void oops_end(unsigned long, struct pt_regs *, int signr);
2169    
2170     diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
2171     index 6699fc441644..6d16d15d09a0 100644
2172     --- a/arch/x86/include/asm/mmu_context.h
2173     +++ b/arch/x86/include/asm/mmu_context.h
2174     @@ -73,8 +73,8 @@ static inline void load_mm_ldt(struct mm_struct *mm)
2175     #ifdef CONFIG_MODIFY_LDT_SYSCALL
2176     struct ldt_struct *ldt;
2177    
2178     - /* lockless_dereference synchronizes with smp_store_release */
2179     - ldt = lockless_dereference(mm->context.ldt);
2180     + /* READ_ONCE synchronizes with smp_store_release */
2181     + ldt = READ_ONCE(mm->context.ldt);
2182    
2183     /*
2184     * Any change to mm->context.ldt is followed by an IPI to all
2185     diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
2186     index 8546fafa21a9..7948a17febb4 100644
2187     --- a/arch/x86/include/asm/module.h
2188     +++ b/arch/x86/include/asm/module.h
2189     @@ -6,7 +6,7 @@
2190     #include <asm/orc_types.h>
2191    
2192     struct mod_arch_specific {
2193     -#ifdef CONFIG_ORC_UNWINDER
2194     +#ifdef CONFIG_UNWINDER_ORC
2195     unsigned int num_orcs;
2196     int *orc_unwind_ip;
2197     struct orc_entry *orc_unwind;
2198     diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
2199     index fd81228e8037..892df375b615 100644
2200     --- a/arch/x86/include/asm/paravirt.h
2201     +++ b/arch/x86/include/asm/paravirt.h
2202     @@ -16,10 +16,9 @@
2203     #include <linux/cpumask.h>
2204     #include <asm/frame.h>
2205    
2206     -static inline void load_sp0(struct tss_struct *tss,
2207     - struct thread_struct *thread)
2208     +static inline void load_sp0(unsigned long sp0)
2209     {
2210     - PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
2211     + PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0);
2212     }
2213    
2214     /* The paravirtualized CPUID instruction. */
2215     @@ -928,6 +927,15 @@ extern void default_banner(void);
2216     PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
2217     CLBR_NONE, \
2218     jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
2219     +
2220     +#ifdef CONFIG_DEBUG_ENTRY
2221     +#define SAVE_FLAGS(clobbers) \
2222     + PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
2223     + PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
2224     + call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
2225     + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
2226     +#endif
2227     +
2228     #endif /* CONFIG_X86_32 */
2229    
2230     #endif /* __ASSEMBLY__ */
2231     diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
2232     index 10cc3b9709fe..6ec54d01972d 100644
2233     --- a/arch/x86/include/asm/paravirt_types.h
2234     +++ b/arch/x86/include/asm/paravirt_types.h
2235     @@ -134,7 +134,7 @@ struct pv_cpu_ops {
2236     void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
2237     void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
2238    
2239     - void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
2240     + void (*load_sp0)(unsigned long sp0);
2241    
2242     void (*set_iopl_mask)(unsigned mask);
2243    
2244     diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
2245     index 377f1ffd18be..ba3c523aaf16 100644
2246     --- a/arch/x86/include/asm/percpu.h
2247     +++ b/arch/x86/include/asm/percpu.h
2248     @@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
2249     {
2250     bool oldbit;
2251    
2252     - asm volatile("bt "__percpu_arg(2)",%1\n\t"
2253     + asm volatile("bt "__percpu_arg(2)",%1"
2254     CC_SET(c)
2255     : CC_OUT(c) (oldbit)
2256     : "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
2257     diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
2258     index 59df7b47a434..9e9b05fc4860 100644
2259     --- a/arch/x86/include/asm/pgtable_types.h
2260     +++ b/arch/x86/include/asm/pgtable_types.h
2261     @@ -200,10 +200,9 @@ enum page_cache_mode {
2262    
2263     #define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
2264    
2265     -#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
2266     - _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_ENC)
2267     #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
2268     _PAGE_DIRTY | _PAGE_ENC)
2269     +#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER)
2270    
2271     #define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC)
2272     #define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC)
2273     diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
2274     index bdac19ab2488..da943411d3d8 100644
2275     --- a/arch/x86/include/asm/processor.h
2276     +++ b/arch/x86/include/asm/processor.h
2277     @@ -162,9 +162,9 @@ enum cpuid_regs_idx {
2278     extern struct cpuinfo_x86 boot_cpu_data;
2279     extern struct cpuinfo_x86 new_cpu_data;
2280    
2281     -extern struct tss_struct doublefault_tss;
2282     -extern __u32 cpu_caps_cleared[NCAPINTS];
2283     -extern __u32 cpu_caps_set[NCAPINTS];
2284     +extern struct x86_hw_tss doublefault_tss;
2285     +extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
2286     +extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
2287    
2288     #ifdef CONFIG_SMP
2289     DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
2290     @@ -252,6 +252,11 @@ static inline void load_cr3(pgd_t *pgdir)
2291     write_cr3(__sme_pa(pgdir));
2292     }
2293    
2294     +/*
2295     + * Note that while the legacy 'TSS' name comes from 'Task State Segment',
2296     + * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
2297     + * unrelated to the task-switch mechanism:
2298     + */
2299     #ifdef CONFIG_X86_32
2300     /* This is the TSS defined by the hardware. */
2301     struct x86_hw_tss {
2302     @@ -304,7 +309,13 @@ struct x86_hw_tss {
2303     struct x86_hw_tss {
2304     u32 reserved1;
2305     u64 sp0;
2306     +
2307     + /*
2308     + * We store cpu_current_top_of_stack in sp1 so it's always accessible.
2309     + * Linux does not use ring 1, so sp1 is not otherwise needed.
2310     + */
2311     u64 sp1;
2312     +
2313     u64 sp2;
2314     u64 reserved2;
2315     u64 ist[7];
2316     @@ -322,12 +333,22 @@ struct x86_hw_tss {
2317     #define IO_BITMAP_BITS 65536
2318     #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
2319     #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
2320     -#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap)
2321     +#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
2322     #define INVALID_IO_BITMAP_OFFSET 0x8000
2323    
2324     +struct SYSENTER_stack {
2325     + unsigned long words[64];
2326     +};
2327     +
2328     +struct SYSENTER_stack_page {
2329     + struct SYSENTER_stack stack;
2330     +} __aligned(PAGE_SIZE);
2331     +
2332     struct tss_struct {
2333     /*
2334     - * The hardware state:
2335     + * The fixed hardware portion. This must not cross a page boundary
2336     + * at risk of violating the SDM's advice and potentially triggering
2337     + * errata.
2338     */
2339     struct x86_hw_tss x86_tss;
2340    
2341     @@ -338,18 +359,9 @@ struct tss_struct {
2342     * be within the limit.
2343     */
2344     unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
2345     +} __aligned(PAGE_SIZE);
2346    
2347     -#ifdef CONFIG_X86_32
2348     - /*
2349     - * Space for the temporary SYSENTER stack.
2350     - */
2351     - unsigned long SYSENTER_stack_canary;
2352     - unsigned long SYSENTER_stack[64];
2353     -#endif
2354     -
2355     -} ____cacheline_aligned;
2356     -
2357     -DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
2358     +DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
2359    
2360     /*
2361     * sizeof(unsigned long) coming from an extra "long" at the end
2362     @@ -363,6 +375,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
2363    
2364     #ifdef CONFIG_X86_32
2365     DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
2366     +#else
2367     +/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
2368     +#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
2369     #endif
2370    
2371     /*
2372     @@ -431,7 +446,9 @@ typedef struct {
2373     struct thread_struct {
2374     /* Cached TLS descriptors: */
2375     struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
2376     +#ifdef CONFIG_X86_32
2377     unsigned long sp0;
2378     +#endif
2379     unsigned long sp;
2380     #ifdef CONFIG_X86_32
2381     unsigned long sysenter_cs;
2382     @@ -518,16 +535,9 @@ static inline void native_set_iopl_mask(unsigned mask)
2383     }
2384    
2385     static inline void
2386     -native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
2387     +native_load_sp0(unsigned long sp0)
2388     {
2389     - tss->x86_tss.sp0 = thread->sp0;
2390     -#ifdef CONFIG_X86_32
2391     - /* Only happens when SEP is enabled, no need to test "SEP"arately: */
2392     - if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
2393     - tss->x86_tss.ss1 = thread->sysenter_cs;
2394     - wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
2395     - }
2396     -#endif
2397     + this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
2398     }
2399    
2400     static inline void native_swapgs(void)
2401     @@ -539,12 +549,18 @@ static inline void native_swapgs(void)
2402    
2403     static inline unsigned long current_top_of_stack(void)
2404     {
2405     -#ifdef CONFIG_X86_64
2406     - return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
2407     -#else
2408     - /* sp0 on x86_32 is special in and around vm86 mode. */
2409     + /*
2410     + * We can't read directly from tss.sp0: sp0 on x86_32 is special in
2411     + * and around vm86 mode and sp0 on x86_64 is special because of the
2412     + * entry trampoline.
2413     + */
2414     return this_cpu_read_stable(cpu_current_top_of_stack);
2415     -#endif
2416     +}
2417     +
2418     +static inline bool on_thread_stack(void)
2419     +{
2420     + return (unsigned long)(current_top_of_stack() -
2421     + current_stack_pointer) < THREAD_SIZE;
2422     }
2423    
2424     #ifdef CONFIG_PARAVIRT
2425     @@ -552,10 +568,9 @@ static inline unsigned long current_top_of_stack(void)
2426     #else
2427     #define __cpuid native_cpuid
2428    
2429     -static inline void load_sp0(struct tss_struct *tss,
2430     - struct thread_struct *thread)
2431     +static inline void load_sp0(unsigned long sp0)
2432     {
2433     - native_load_sp0(tss, thread);
2434     + native_load_sp0(sp0);
2435     }
2436    
2437     #define set_iopl_mask native_set_iopl_mask
2438     @@ -804,6 +819,15 @@ static inline void spin_lock_prefetch(const void *x)
2439     #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
2440     TOP_OF_KERNEL_STACK_PADDING)
2441    
2442     +#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
2443     +
2444     +#define task_pt_regs(task) \
2445     +({ \
2446     + unsigned long __ptr = (unsigned long)task_stack_page(task); \
2447     + __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
2448     + ((struct pt_regs *)__ptr) - 1; \
2449     +})
2450     +
2451     #ifdef CONFIG_X86_32
2452     /*
2453     * User space process size: 3GB (default).
2454     @@ -823,23 +847,6 @@ static inline void spin_lock_prefetch(const void *x)
2455     .addr_limit = KERNEL_DS, \
2456     }
2457    
2458     -/*
2459     - * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
2460     - * This is necessary to guarantee that the entire "struct pt_regs"
2461     - * is accessible even if the CPU haven't stored the SS/ESP registers
2462     - * on the stack (interrupt gate does not save these registers
2463     - * when switching to the same priv ring).
2464     - * Therefore beware: accessing the ss/esp fields of the
2465     - * "struct pt_regs" is possible, but they may contain the
2466     - * completely wrong values.
2467     - */
2468     -#define task_pt_regs(task) \
2469     -({ \
2470     - unsigned long __ptr = (unsigned long)task_stack_page(task); \
2471     - __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
2472     - ((struct pt_regs *)__ptr) - 1; \
2473     -})
2474     -
2475     #define KSTK_ESP(task) (task_pt_regs(task)->sp)
2476    
2477     #else
2478     @@ -873,11 +880,9 @@ static inline void spin_lock_prefetch(const void *x)
2479     #define STACK_TOP_MAX TASK_SIZE_MAX
2480    
2481     #define INIT_THREAD { \
2482     - .sp0 = TOP_OF_INIT_STACK, \
2483     .addr_limit = KERNEL_DS, \
2484     }
2485    
2486     -#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
2487     extern unsigned long KSTK_ESP(struct task_struct *task);
2488    
2489     #endif /* CONFIG_X86_64 */
2490     diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
2491     index c0e3c45cf6ab..14131dd06b29 100644
2492     --- a/arch/x86/include/asm/ptrace.h
2493     +++ b/arch/x86/include/asm/ptrace.h
2494     @@ -136,9 +136,9 @@ static inline int v8086_mode(struct pt_regs *regs)
2495     #endif
2496     }
2497    
2498     -#ifdef CONFIG_X86_64
2499     static inline bool user_64bit_mode(struct pt_regs *regs)
2500     {
2501     +#ifdef CONFIG_X86_64
2502     #ifndef CONFIG_PARAVIRT
2503     /*
2504     * On non-paravirt systems, this is the only long mode CPL 3
2505     @@ -149,8 +149,12 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
2506     /* Headers are too twisted for this to go in paravirt.h. */
2507     return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
2508     #endif
2509     +#else /* !CONFIG_X86_64 */
2510     + return false;
2511     +#endif
2512     }
2513    
2514     +#ifdef CONFIG_X86_64
2515     #define current_user_stack_pointer() current_pt_regs()->sp
2516     #define compat_user_stack_pointer() current_pt_regs()->sp
2517     #endif
2518     diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
2519     index d8f3a6ae9f6c..f91c365e57c3 100644
2520     --- a/arch/x86/include/asm/rmwcc.h
2521     +++ b/arch/x86/include/asm/rmwcc.h
2522     @@ -29,7 +29,7 @@ cc_label: \
2523     #define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \
2524     do { \
2525     bool c; \
2526     - asm volatile (fullop ";" CC_SET(cc) \
2527     + asm volatile (fullop CC_SET(cc) \
2528     : [counter] "+m" (var), CC_OUT(cc) (c) \
2529     : __VA_ARGS__ : clobbers); \
2530     return c; \
2531     diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
2532     index 8da111b3c342..f8062bfd43a0 100644
2533     --- a/arch/x86/include/asm/stacktrace.h
2534     +++ b/arch/x86/include/asm/stacktrace.h
2535     @@ -16,6 +16,7 @@ enum stack_type {
2536     STACK_TYPE_TASK,
2537     STACK_TYPE_IRQ,
2538     STACK_TYPE_SOFTIRQ,
2539     + STACK_TYPE_SYSENTER,
2540     STACK_TYPE_EXCEPTION,
2541     STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
2542     };
2543     @@ -28,6 +29,8 @@ struct stack_info {
2544     bool in_task_stack(unsigned long *stack, struct task_struct *task,
2545     struct stack_info *info);
2546    
2547     +bool in_sysenter_stack(unsigned long *stack, struct stack_info *info);
2548     +
2549     int get_stack_info(unsigned long *stack, struct task_struct *task,
2550     struct stack_info *info, unsigned long *visit_mask);
2551    
2552     diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
2553     index 899084b70412..9b6df68d8fd1 100644
2554     --- a/arch/x86/include/asm/switch_to.h
2555     +++ b/arch/x86/include/asm/switch_to.h
2556     @@ -2,6 +2,8 @@
2557     #ifndef _ASM_X86_SWITCH_TO_H
2558     #define _ASM_X86_SWITCH_TO_H
2559    
2560     +#include <linux/sched/task_stack.h>
2561     +
2562     struct task_struct; /* one of the stranger aspects of C forward declarations */
2563    
2564     struct task_struct *__switch_to_asm(struct task_struct *prev,
2565     @@ -73,4 +75,28 @@ do { \
2566     ((last) = __switch_to_asm((prev), (next))); \
2567     } while (0)
2568    
2569     +#ifdef CONFIG_X86_32
2570     +static inline void refresh_sysenter_cs(struct thread_struct *thread)
2571     +{
2572     + /* Only happens when SEP is enabled, no need to test "SEP"arately: */
2573     + if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
2574     + return;
2575     +
2576     + this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
2577     + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
2578     +}
2579     +#endif
2580     +
2581     +/* This is used when switching tasks or entering/exiting vm86 mode. */
2582     +static inline void update_sp0(struct task_struct *task)
2583     +{
2584     + /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
2585     +#ifdef CONFIG_X86_32
2586     + load_sp0(task->thread.sp0);
2587     +#else
2588     + if (static_cpu_has(X86_FEATURE_XENPV))
2589     + load_sp0(task_top_of_stack(task));
2590     +#endif
2591     +}
2592     +
2593     #endif /* _ASM_X86_SWITCH_TO_H */
2594     diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
2595     index 70f425947dc5..00223333821a 100644
2596     --- a/arch/x86/include/asm/thread_info.h
2597     +++ b/arch/x86/include/asm/thread_info.h
2598     @@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack,
2599     #else /* !__ASSEMBLY__ */
2600    
2601     #ifdef CONFIG_X86_64
2602     -# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
2603     +# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
2604     #endif
2605    
2606     #endif
2607     diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
2608     index fa60398bbc3a..069c04be1507 100644
2609     --- a/arch/x86/include/asm/trace/fpu.h
2610     +++ b/arch/x86/include/asm/trace/fpu.h
2611     @@ -34,11 +34,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
2612     )
2613     );
2614    
2615     -DEFINE_EVENT(x86_fpu, x86_fpu_state,
2616     - TP_PROTO(struct fpu *fpu),
2617     - TP_ARGS(fpu)
2618     -);
2619     -
2620     DEFINE_EVENT(x86_fpu, x86_fpu_before_save,
2621     TP_PROTO(struct fpu *fpu),
2622     TP_ARGS(fpu)
2623     @@ -74,11 +69,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
2624     TP_ARGS(fpu)
2625     );
2626    
2627     -DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state,
2628     - TP_PROTO(struct fpu *fpu),
2629     - TP_ARGS(fpu)
2630     -);
2631     -
2632     DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
2633     TP_PROTO(struct fpu *fpu),
2634     TP_ARGS(fpu)
2635     diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
2636     index b0cced97a6ce..31051f35cbb7 100644
2637     --- a/arch/x86/include/asm/traps.h
2638     +++ b/arch/x86/include/asm/traps.h
2639     @@ -38,9 +38,9 @@ asmlinkage void simd_coprocessor_error(void);
2640    
2641     #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
2642     asmlinkage void xen_divide_error(void);
2643     +asmlinkage void xen_xennmi(void);
2644     asmlinkage void xen_xendebug(void);
2645     asmlinkage void xen_xenint3(void);
2646     -asmlinkage void xen_nmi(void);
2647     asmlinkage void xen_overflow(void);
2648     asmlinkage void xen_bounds(void);
2649     asmlinkage void xen_invalid_op(void);
2650     @@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
2651     dotraplinkage void do_stack_segment(struct pt_regs *, long);
2652     #ifdef CONFIG_X86_64
2653     dotraplinkage void do_double_fault(struct pt_regs *, long);
2654     -asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
2655     #endif
2656     dotraplinkage void do_general_protection(struct pt_regs *, long);
2657     dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
2658     @@ -145,4 +144,22 @@ enum {
2659     X86_TRAP_IRET = 32, /* 32, IRET Exception */
2660     };
2661    
2662     +/*
2663     + * Page fault error code bits:
2664     + *
2665     + * bit 0 == 0: no page found 1: protection fault
2666     + * bit 1 == 0: read access 1: write access
2667     + * bit 2 == 0: kernel-mode access 1: user-mode access
2668     + * bit 3 == 1: use of reserved bit detected
2669     + * bit 4 == 1: fault was an instruction fetch
2670     + * bit 5 == 1: protection keys block access
2671     + */
2672     +enum x86_pf_error_code {
2673     + X86_PF_PROT = 1 << 0,
2674     + X86_PF_WRITE = 1 << 1,
2675     + X86_PF_USER = 1 << 2,
2676     + X86_PF_RSVD = 1 << 3,
2677     + X86_PF_INSTR = 1 << 4,
2678     + X86_PF_PK = 1 << 5,
2679     +};
2680     #endif /* _ASM_X86_TRAPS_H */
2681     diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
2682     index 87adc0d38c4a..c1688c2d0a12 100644
2683     --- a/arch/x86/include/asm/unwind.h
2684     +++ b/arch/x86/include/asm/unwind.h
2685     @@ -7,17 +7,20 @@
2686     #include <asm/ptrace.h>
2687     #include <asm/stacktrace.h>
2688    
2689     +#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
2690     +#define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
2691     +
2692     struct unwind_state {
2693     struct stack_info stack_info;
2694     unsigned long stack_mask;
2695     struct task_struct *task;
2696     int graph_idx;
2697     bool error;
2698     -#if defined(CONFIG_ORC_UNWINDER)
2699     +#if defined(CONFIG_UNWINDER_ORC)
2700     bool signal, full_regs;
2701     unsigned long sp, bp, ip;
2702     struct pt_regs *regs;
2703     -#elif defined(CONFIG_FRAME_POINTER_UNWINDER)
2704     +#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
2705     bool got_irq;
2706     unsigned long *bp, *orig_sp, ip;
2707     struct pt_regs *regs;
2708     @@ -51,7 +54,11 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
2709     __unwind_start(state, task, regs, first_frame);
2710     }
2711    
2712     -#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER)
2713     +#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
2714     +/*
2715     + * WARNING: The entire pt_regs may not be safe to dereference. In some cases,
2716     + * only the iret frame registers are accessible. Use with caution!
2717     + */
2718     static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
2719     {
2720     if (unwind_done(state))
2721     @@ -66,7 +73,7 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
2722     }
2723     #endif
2724    
2725     -#ifdef CONFIG_ORC_UNWINDER
2726     +#ifdef CONFIG_UNWINDER_ORC
2727     void unwind_init(void);
2728     void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
2729     void *orc, size_t orc_size);
2730     diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
2731     index 8a1ebf9540dd..ad15a0fda917 100644
2732     --- a/arch/x86/include/asm/x86_init.h
2733     +++ b/arch/x86/include/asm/x86_init.h
2734     @@ -114,6 +114,18 @@ struct x86_init_pci {
2735     void (*fixup_irqs)(void);
2736     };
2737    
2738     +/**
2739     + * struct x86_hyper_init - x86 hypervisor init functions
2740     + * @init_platform: platform setup
2741     + * @x2apic_available: X2APIC detection
2742     + * @init_mem_mapping: setup early mappings during init_mem_mapping()
2743     + */
2744     +struct x86_hyper_init {
2745     + void (*init_platform)(void);
2746     + bool (*x2apic_available)(void);
2747     + void (*init_mem_mapping)(void);
2748     +};
2749     +
2750     /**
2751     * struct x86_init_ops - functions for platform specific setup
2752     *
2753     @@ -127,6 +139,7 @@ struct x86_init_ops {
2754     struct x86_init_timers timers;
2755     struct x86_init_iommu iommu;
2756     struct x86_init_pci pci;
2757     + struct x86_hyper_init hyper;
2758     };
2759    
2760     /**
2761     @@ -199,6 +212,15 @@ struct x86_legacy_features {
2762     struct x86_legacy_devices devices;
2763     };
2764    
2765     +/**
2766     + * struct x86_hyper_runtime - x86 hypervisor specific runtime callbacks
2767     + *
2768     + * @pin_vcpu: pin current vcpu to specified physical cpu (run rarely)
2769     + */
2770     +struct x86_hyper_runtime {
2771     + void (*pin_vcpu)(int cpu);
2772     +};
2773     +
2774     /**
2775     * struct x86_platform_ops - platform specific runtime functions
2776     * @calibrate_cpu: calibrate CPU
2777     @@ -218,6 +240,7 @@ struct x86_legacy_features {
2778     * possible in x86_early_init_platform_quirks() by
2779     * only using the current x86_hardware_subarch
2780     * semantics.
2781     + * @hyper: x86 hypervisor specific runtime callbacks
2782     */
2783     struct x86_platform_ops {
2784     unsigned long (*calibrate_cpu)(void);
2785     @@ -233,6 +256,7 @@ struct x86_platform_ops {
2786     void (*apic_post_init)(void);
2787     struct x86_legacy_features legacy;
2788     void (*set_legacy_features)(void);
2789     + struct x86_hyper_runtime hyper;
2790     };
2791    
2792     struct pci_dev;
2793     diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
2794     index 6f3355399665..53b4ca55ebb6 100644
2795     --- a/arch/x86/include/uapi/asm/processor-flags.h
2796     +++ b/arch/x86/include/uapi/asm/processor-flags.h
2797     @@ -152,5 +152,8 @@
2798     #define CX86_ARR_BASE 0xc4
2799     #define CX86_RCR_BASE 0xdc
2800    
2801     +#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
2802     + X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
2803     + X86_CR0_PG)
2804    
2805     #endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */
2806     diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
2807     index 5f70044340ff..295abaa58add 100644
2808     --- a/arch/x86/kernel/Makefile
2809     +++ b/arch/x86/kernel/Makefile
2810     @@ -25,9 +25,9 @@ endif
2811     KASAN_SANITIZE_head$(BITS).o := n
2812     KASAN_SANITIZE_dumpstack.o := n
2813     KASAN_SANITIZE_dumpstack_$(BITS).o := n
2814     -KASAN_SANITIZE_stacktrace.o := n
2815     +KASAN_SANITIZE_stacktrace.o := n
2816     +KASAN_SANITIZE_paravirt.o := n
2817    
2818     -OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y
2819     OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
2820     OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
2821     OBJECT_FILES_NON_STANDARD_test_nx.o := y
2822     @@ -128,9 +128,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
2823     obj-$(CONFIG_TRACING) += tracepoint.o
2824     obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
2825    
2826     -obj-$(CONFIG_ORC_UNWINDER) += unwind_orc.o
2827     -obj-$(CONFIG_FRAME_POINTER_UNWINDER) += unwind_frame.o
2828     -obj-$(CONFIG_GUESS_UNWINDER) += unwind_guess.o
2829     +obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o
2830     +obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
2831     +obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
2832    
2833     ###
2834     # 64 bit specific files
2835     diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
2836     index ff891772c9f8..89c7c8569e5e 100644
2837     --- a/arch/x86/kernel/apic/apic.c
2838     +++ b/arch/x86/kernel/apic/apic.c
2839     @@ -1645,7 +1645,7 @@ static __init void try_to_enable_x2apic(int remap_mode)
2840     * under KVM
2841     */
2842     if (max_physical_apicid > 255 ||
2843     - !hypervisor_x2apic_available()) {
2844     + !x86_init.hyper.x2apic_available()) {
2845     pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
2846     x2apic_disable();
2847     return;
2848     diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
2849     index 0d57bb9079c9..c0b694810ff4 100644
2850     --- a/arch/x86/kernel/apic/x2apic_uv_x.c
2851     +++ b/arch/x86/kernel/apic/x2apic_uv_x.c
2852     @@ -920,9 +920,8 @@ static __init void uv_rtc_init(void)
2853     /*
2854     * percpu heartbeat timer
2855     */
2856     -static void uv_heartbeat(unsigned long ignored)
2857     +static void uv_heartbeat(struct timer_list *timer)
2858     {
2859     - struct timer_list *timer = &uv_scir_info->timer;
2860     unsigned char bits = uv_scir_info->state;
2861    
2862     /* Flip heartbeat bit: */
2863     @@ -947,7 +946,7 @@ static int uv_heartbeat_enable(unsigned int cpu)
2864     struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
2865    
2866     uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
2867     - setup_pinned_timer(timer, uv_heartbeat, cpu);
2868     + timer_setup(timer, uv_heartbeat, TIMER_PINNED);
2869     timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
2870     add_timer_on(timer, cpu);
2871     uv_cpu_scir_info(cpu)->enabled = 1;
2872     diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
2873     index 8ea78275480d..cd360a5e0dca 100644
2874     --- a/arch/x86/kernel/asm-offsets.c
2875     +++ b/arch/x86/kernel/asm-offsets.c
2876     @@ -93,4 +93,10 @@ void common(void) {
2877    
2878     BLANK();
2879     DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
2880     +
2881     + /* Layout info for cpu_entry_area */
2882     + OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
2883     + OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
2884     + OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page);
2885     + DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
2886     }
2887     diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
2888     index dedf428b20b6..7d20d9c0b3d6 100644
2889     --- a/arch/x86/kernel/asm-offsets_32.c
2890     +++ b/arch/x86/kernel/asm-offsets_32.c
2891     @@ -47,13 +47,8 @@ void foo(void)
2892     BLANK();
2893    
2894     /* Offset from the sysenter stack to tss.sp0 */
2895     - DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
2896     - offsetofend(struct tss_struct, SYSENTER_stack));
2897     -
2898     - /* Offset from cpu_tss to SYSENTER_stack */
2899     - OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
2900     - /* Size of SYSENTER_stack */
2901     - DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
2902     + DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
2903     + offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack));
2904    
2905     #ifdef CONFIG_CC_STACKPROTECTOR
2906     BLANK();
2907     diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
2908     index 630212fa9b9d..bf51e51d808d 100644
2909     --- a/arch/x86/kernel/asm-offsets_64.c
2910     +++ b/arch/x86/kernel/asm-offsets_64.c
2911     @@ -23,6 +23,9 @@ int main(void)
2912     #ifdef CONFIG_PARAVIRT
2913     OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
2914     OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
2915     +#ifdef CONFIG_DEBUG_ENTRY
2916     + OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
2917     +#endif
2918     BLANK();
2919     #endif
2920    
2921     @@ -63,6 +66,7 @@ int main(void)
2922    
2923     OFFSET(TSS_ist, tss_struct, x86_tss.ist);
2924     OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
2925     + OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
2926     BLANK();
2927    
2928     #ifdef CONFIG_CC_STACKPROTECTOR
2929     diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
2930     index c60922a66385..90cb82dbba57 100644
2931     --- a/arch/x86/kernel/cpu/Makefile
2932     +++ b/arch/x86/kernel/cpu/Makefile
2933     @@ -23,6 +23,7 @@ obj-y += rdrand.o
2934     obj-y += match.o
2935     obj-y += bugs.o
2936     obj-$(CONFIG_CPU_FREQ) += aperfmperf.o
2937     +obj-y += cpuid-deps.o
2938    
2939     obj-$(CONFIG_PROC_FS) += proc.o
2940     obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
2941     diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
2942     index d58184b7cd44..bcb75dc97d44 100644
2943     --- a/arch/x86/kernel/cpu/amd.c
2944     +++ b/arch/x86/kernel/cpu/amd.c
2945     @@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 *c)
2946     case 0x17: init_amd_zn(c); break;
2947     }
2948    
2949     - /* Enable workaround for FXSAVE leak */
2950     - if (c->x86 >= 6)
2951     + /*
2952     + * Enable workaround for FXSAVE leak on CPUs
2953     + * without a XSaveErPtr feature
2954     + */
2955     + if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR)))
2956     set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
2957    
2958     cpu_detect_cache_sizes(c);
2959     diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
2960     index c9176bae7fd8..034900623adf 100644
2961     --- a/arch/x86/kernel/cpu/common.c
2962     +++ b/arch/x86/kernel/cpu/common.c
2963     @@ -452,8 +452,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
2964     return NULL; /* Not found */
2965     }
2966    
2967     -__u32 cpu_caps_cleared[NCAPINTS];
2968     -__u32 cpu_caps_set[NCAPINTS];
2969     +__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
2970     +__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
2971    
2972     void load_percpu_segment(int cpu)
2973     {
2974     @@ -466,27 +466,116 @@ void load_percpu_segment(int cpu)
2975     load_stack_canary_segment();
2976     }
2977    
2978     -/* Setup the fixmap mapping only once per-processor */
2979     -static inline void setup_fixmap_gdt(int cpu)
2980     +#ifdef CONFIG_X86_32
2981     +/* The 32-bit entry code needs to find cpu_entry_area. */
2982     +DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
2983     +#endif
2984     +
2985     +#ifdef CONFIG_X86_64
2986     +/*
2987     + * Special IST stacks which the CPU switches to when it calls
2988     + * an IST-marked descriptor entry. Up to 7 stacks (hardware
2989     + * limit), all of them are 4K, except the debug stack which
2990     + * is 8K.
2991     + */
2992     +static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
2993     + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
2994     + [DEBUG_STACK - 1] = DEBUG_STKSZ
2995     +};
2996     +
2997     +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
2998     + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
2999     +#endif
3000     +
3001     +static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page,
3002     + SYSENTER_stack_storage);
3003     +
3004     +static void __init
3005     +set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
3006     +{
3007     + for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
3008     + __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
3009     +}
3010     +
3011     +/* Setup the fixmap mappings only once per-processor */
3012     +static void __init setup_cpu_entry_area(int cpu)
3013     {
3014     #ifdef CONFIG_X86_64
3015     - /* On 64-bit systems, we use a read-only fixmap GDT. */
3016     - pgprot_t prot = PAGE_KERNEL_RO;
3017     + extern char _entry_trampoline[];
3018     +
3019     + /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
3020     + pgprot_t gdt_prot = PAGE_KERNEL_RO;
3021     + pgprot_t tss_prot = PAGE_KERNEL_RO;
3022     #else
3023     /*
3024     * On native 32-bit systems, the GDT cannot be read-only because
3025     * our double fault handler uses a task gate, and entering through
3026     - * a task gate needs to change an available TSS to busy. If the GDT
3027     - * is read-only, that will triple fault.
3028     + * a task gate needs to change an available TSS to busy. If the
3029     + * GDT is read-only, that will triple fault. The TSS cannot be
3030     + * read-only because the CPU writes to it on task switches.
3031     *
3032     - * On Xen PV, the GDT must be read-only because the hypervisor requires
3033     - * it.
3034     + * On Xen PV, the GDT must be read-only because the hypervisor
3035     + * requires it.
3036     */
3037     - pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
3038     + pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
3039     PAGE_KERNEL_RO : PAGE_KERNEL;
3040     + pgprot_t tss_prot = PAGE_KERNEL;
3041     +#endif
3042     +
3043     + __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
3044     + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page),
3045     + per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1,
3046     + PAGE_KERNEL);
3047     +
3048     + /*
3049     + * The Intel SDM says (Volume 3, 7.2.1):
3050     + *
3051     + * Avoid placing a page boundary in the part of the TSS that the
3052     + * processor reads during a task switch (the first 104 bytes). The
3053     + * processor may not correctly perform address translations if a
3054     + * boundary occurs in this area. During a task switch, the processor
3055     + * reads and writes into the first 104 bytes of each TSS (using
3056     + * contiguous physical addresses beginning with the physical address
3057     + * of the first byte of the TSS). So, after TSS access begins, if
3058     + * part of the 104 bytes is not physically contiguous, the processor
3059     + * will access incorrect information without generating a page-fault
3060     + * exception.
3061     + *
3062     + * There are also a lot of errata involving the TSS spanning a page
3063     + * boundary. Assert that we're not doing that.
3064     + */
3065     + BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
3066     + offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
3067     + BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
3068     + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
3069     + &per_cpu(cpu_tss_rw, cpu),
3070     + sizeof(struct tss_struct) / PAGE_SIZE,
3071     + tss_prot);
3072     +
3073     +#ifdef CONFIG_X86_32
3074     + per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
3075     #endif
3076    
3077     - __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
3078     +#ifdef CONFIG_X86_64
3079     + BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
3080     + BUILD_BUG_ON(sizeof(exception_stacks) !=
3081     + sizeof(((struct cpu_entry_area *)0)->exception_stacks));
3082     + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
3083     + &per_cpu(exception_stacks, cpu),
3084     + sizeof(exception_stacks) / PAGE_SIZE,
3085     + PAGE_KERNEL);
3086     +
3087     + __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
3088     + __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
3089     +#endif
3090     +}
3091     +
3092     +void __init setup_cpu_entry_areas(void)
3093     +{
3094     + unsigned int cpu;
3095     +
3096     + for_each_possible_cpu(cpu)
3097     + setup_cpu_entry_area(cpu);
3098     }
3099    
3100     /* Load the original GDT from the per-cpu structure */
3101     @@ -723,7 +812,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
3102     {
3103     int i;
3104    
3105     - for (i = 0; i < NCAPINTS; i++) {
3106     + for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
3107     c->x86_capability[i] &= ~cpu_caps_cleared[i];
3108     c->x86_capability[i] |= cpu_caps_set[i];
3109     }
3110     @@ -1225,7 +1314,7 @@ void enable_sep_cpu(void)
3111     return;
3112    
3113     cpu = get_cpu();
3114     - tss = &per_cpu(cpu_tss, cpu);
3115     + tss = &per_cpu(cpu_tss_rw, cpu);
3116    
3117     /*
3118     * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
3119     @@ -1234,11 +1323,7 @@ void enable_sep_cpu(void)
3120    
3121     tss->x86_tss.ss1 = __KERNEL_CS;
3122     wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
3123     -
3124     - wrmsr(MSR_IA32_SYSENTER_ESP,
3125     - (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
3126     - 0);
3127     -
3128     + wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0);
3129     wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
3130    
3131     put_cpu();
3132     @@ -1301,18 +1386,16 @@ void print_cpu_info(struct cpuinfo_x86 *c)
3133     pr_cont(")\n");
3134     }
3135    
3136     -static __init int setup_disablecpuid(char *arg)
3137     +/*
3138     + * clearcpuid= was already parsed in fpu__init_parse_early_param.
3139     + * But we need to keep a dummy __setup around otherwise it would
3140     + * show up as an environment variable for init.
3141     + */
3142     +static __init int setup_clearcpuid(char *arg)
3143     {
3144     - int bit;
3145     -
3146     - if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
3147     - setup_clear_cpu_cap(bit);
3148     - else
3149     - return 0;
3150     -
3151     return 1;
3152     }
3153     -__setup("clearcpuid=", setup_disablecpuid);
3154     +__setup("clearcpuid=", setup_clearcpuid);
3155    
3156     #ifdef CONFIG_X86_64
3157     DEFINE_PER_CPU_FIRST(union irq_stack_union,
3158     @@ -1334,25 +1417,19 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
3159     DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
3160     EXPORT_PER_CPU_SYMBOL(__preempt_count);
3161    
3162     -/*
3163     - * Special IST stacks which the CPU switches to when it calls
3164     - * an IST-marked descriptor entry. Up to 7 stacks (hardware
3165     - * limit), all of them are 4K, except the debug stack which
3166     - * is 8K.
3167     - */
3168     -static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
3169     - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
3170     - [DEBUG_STACK - 1] = DEBUG_STKSZ
3171     -};
3172     -
3173     -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
3174     - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
3175     -
3176     /* May not be marked __init: used by software suspend */
3177     void syscall_init(void)
3178     {
3179     + extern char _entry_trampoline[];
3180     + extern char entry_SYSCALL_64_trampoline[];
3181     +
3182     + int cpu = smp_processor_id();
3183     + unsigned long SYSCALL64_entry_trampoline =
3184     + (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
3185     + (entry_SYSCALL_64_trampoline - _entry_trampoline);
3186     +
3187     wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
3188     - wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
3189     + wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
3190    
3191     #ifdef CONFIG_IA32_EMULATION
3192     wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
3193     @@ -1363,7 +1440,7 @@ void syscall_init(void)
3194     * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
3195     */
3196     wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
3197     - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
3198     + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
3199     wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
3200     #else
3201     wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
3202     @@ -1507,7 +1584,7 @@ void cpu_init(void)
3203     if (cpu)
3204     load_ucode_ap();
3205    
3206     - t = &per_cpu(cpu_tss, cpu);
3207     + t = &per_cpu(cpu_tss_rw, cpu);
3208     oist = &per_cpu(orig_ist, cpu);
3209    
3210     #ifdef CONFIG_NUMA
3211     @@ -1546,7 +1623,7 @@ void cpu_init(void)
3212     * set up and load the per-CPU TSS
3213     */
3214     if (!oist->ist[0]) {
3215     - char *estacks = per_cpu(exception_stacks, cpu);
3216     + char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
3217    
3218     for (v = 0; v < N_EXCEPTION_STACKS; v++) {
3219     estacks += exception_stack_sizes[v];
3220     @@ -1557,7 +1634,7 @@ void cpu_init(void)
3221     }
3222     }
3223    
3224     - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
3225     + t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
3226    
3227     /*
3228     * <= is required because the CPU will access up to
3229     @@ -1572,9 +1649,14 @@ void cpu_init(void)
3230     initialize_tlbstate_and_flush();
3231     enter_lazy_tlb(&init_mm, me);
3232    
3233     - load_sp0(t, &current->thread);
3234     - set_tss_desc(cpu, t);
3235     + /*
3236     + * Initialize the TSS. sp0 points to the entry trampoline stack
3237     + * regardless of what task is running.
3238     + */
3239     + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
3240     load_TR_desc();
3241     + load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
3242     +
3243     load_mm_ldt(&init_mm);
3244    
3245     clear_all_debug_regs();
3246     @@ -1585,7 +1667,6 @@ void cpu_init(void)
3247     if (is_uv_system())
3248     uv_cpu_init();
3249    
3250     - setup_fixmap_gdt(cpu);
3251     load_fixmap_gdt(cpu);
3252     }
3253    
3254     @@ -1595,8 +1676,7 @@ void cpu_init(void)
3255     {
3256     int cpu = smp_processor_id();
3257     struct task_struct *curr = current;
3258     - struct tss_struct *t = &per_cpu(cpu_tss, cpu);
3259     - struct thread_struct *thread = &curr->thread;
3260     + struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
3261    
3262     wait_for_master_cpu(cpu);
3263    
3264     @@ -1627,12 +1707,16 @@ void cpu_init(void)
3265     initialize_tlbstate_and_flush();
3266     enter_lazy_tlb(&init_mm, curr);
3267    
3268     - load_sp0(t, thread);
3269     - set_tss_desc(cpu, t);
3270     + /*
3271     + * Initialize the TSS. Don't bother initializing sp0, as the initial
3272     + * task never enters user mode.
3273     + */
3274     + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
3275     load_TR_desc();
3276     +
3277     load_mm_ldt(&init_mm);
3278    
3279     - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
3280     + t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
3281    
3282     #ifdef CONFIG_DOUBLEFAULT
3283     /* Set up doublefault TSS pointer in the GDT */
3284     @@ -1644,7 +1728,6 @@ void cpu_init(void)
3285    
3286     fpu__init_cpu();
3287    
3288     - setup_fixmap_gdt(cpu);
3289     load_fixmap_gdt(cpu);
3290     }
3291     #endif
3292     diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
3293     new file mode 100644
3294     index 000000000000..904b0a3c4e53
3295     --- /dev/null
3296     +++ b/arch/x86/kernel/cpu/cpuid-deps.c
3297     @@ -0,0 +1,121 @@
3298     +/* Declare dependencies between CPUIDs */
3299     +#include <linux/kernel.h>
3300     +#include <linux/init.h>
3301     +#include <linux/module.h>
3302     +#include <asm/cpufeature.h>
3303     +
3304     +struct cpuid_dep {
3305     + unsigned int feature;
3306     + unsigned int depends;
3307     +};
3308     +
3309     +/*
3310     + * Table of CPUID features that depend on others.
3311     + *
3312     + * This only includes dependencies that can be usefully disabled, not
3313     + * features part of the base set (like FPU).
3314     + *
3315     + * Note this all is not __init / __initdata because it can be
3316     + * called from cpu hotplug. It shouldn't do anything in this case,
3317     + * but it's difficult to tell that to the init reference checker.
3318     + */
3319     +const static struct cpuid_dep cpuid_deps[] = {
3320     + { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
3321     + { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
3322     + { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
3323     + { X86_FEATURE_AVX, X86_FEATURE_XSAVE },
3324     + { X86_FEATURE_PKU, X86_FEATURE_XSAVE },
3325     + { X86_FEATURE_MPX, X86_FEATURE_XSAVE },
3326     + { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
3327     + { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
3328     + { X86_FEATURE_XMM, X86_FEATURE_FXSR },
3329     + { X86_FEATURE_XMM2, X86_FEATURE_XMM },
3330     + { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
3331     + { X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 },
3332     + { X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 },
3333     + { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
3334     + { X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 },
3335     + { X86_FEATURE_SSSE3, X86_FEATURE_XMM2, },
3336     + { X86_FEATURE_F16C, X86_FEATURE_XMM2, },
3337     + { X86_FEATURE_AES, X86_FEATURE_XMM2 },
3338     + { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 },
3339     + { X86_FEATURE_FMA, X86_FEATURE_AVX },
3340     + { X86_FEATURE_AVX2, X86_FEATURE_AVX, },
3341     + { X86_FEATURE_AVX512F, X86_FEATURE_AVX, },
3342     + { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F },
3343     + { X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F },
3344     + { X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F },
3345     + { X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F },
3346     + { X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F },
3347     + { X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F },
3348     + { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
3349     + { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
3350     + { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL },
3351     + { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL },
3352     + { X86_FEATURE_VAES, X86_FEATURE_AVX512VL },
3353     + { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL },
3354     + { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL },
3355     + { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL },
3356     + { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
3357     + { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
3358     + { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
3359     + {}
3360     +};
3361     +
3362     +static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
3363     +{
3364     + /*
3365     + * Note: This could use the non atomic __*_bit() variants, but the
3366     + * rest of the cpufeature code uses atomics as well, so keep it for
3367     + * consistency. Cleanup all of it separately.
3368     + */
3369     + if (!c) {
3370     + clear_cpu_cap(&boot_cpu_data, feature);
3371     + set_bit(feature, (unsigned long *)cpu_caps_cleared);
3372     + } else {
3373     + clear_bit(feature, (unsigned long *)c->x86_capability);
3374     + }
3375     +}
3376     +
3377     +/* Take the capabilities and the BUG bits into account */
3378     +#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8)
3379     +
3380     +static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
3381     +{
3382     + DECLARE_BITMAP(disable, MAX_FEATURE_BITS);
3383     + const struct cpuid_dep *d;
3384     + bool changed;
3385     +
3386     + if (WARN_ON(feature >= MAX_FEATURE_BITS))
3387     + return;
3388     +
3389     + clear_feature(c, feature);
3390     +
3391     + /* Collect all features to disable, handling dependencies */
3392     + memset(disable, 0, sizeof(disable));
3393     + __set_bit(feature, disable);
3394     +
3395     + /* Loop until we get a stable state. */
3396     + do {
3397     + changed = false;
3398     + for (d = cpuid_deps; d->feature; d++) {
3399     + if (!test_bit(d->depends, disable))
3400     + continue;
3401     + if (__test_and_set_bit(d->feature, disable))
3402     + continue;
3403     +
3404     + changed = true;
3405     + clear_feature(c, d->feature);
3406     + }
3407     + } while (changed);
3408     +}
3409     +
3410     +void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
3411     +{
3412     + do_clear_cpu_cap(c, feature);
3413     +}
3414     +
3415     +void setup_clear_cpu_cap(unsigned int feature)
3416     +{
3417     + do_clear_cpu_cap(NULL, feature);
3418     +}
3419     diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
3420     index 4fa90006ac68..bea8d3e24f50 100644
3421     --- a/arch/x86/kernel/cpu/hypervisor.c
3422     +++ b/arch/x86/kernel/cpu/hypervisor.c
3423     @@ -26,6 +26,12 @@
3424     #include <asm/processor.h>
3425     #include <asm/hypervisor.h>
3426    
3427     +extern const struct hypervisor_x86 x86_hyper_vmware;
3428     +extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
3429     +extern const struct hypervisor_x86 x86_hyper_xen_pv;
3430     +extern const struct hypervisor_x86 x86_hyper_xen_hvm;
3431     +extern const struct hypervisor_x86 x86_hyper_kvm;
3432     +
3433     static const __initconst struct hypervisor_x86 * const hypervisors[] =
3434     {
3435     #ifdef CONFIG_XEN_PV
3436     @@ -41,54 +47,52 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
3437     #endif
3438     };
3439    
3440     -const struct hypervisor_x86 *x86_hyper;
3441     -EXPORT_SYMBOL(x86_hyper);
3442     +enum x86_hypervisor_type x86_hyper_type;
3443     +EXPORT_SYMBOL(x86_hyper_type);
3444    
3445     -static inline void __init
3446     +static inline const struct hypervisor_x86 * __init
3447     detect_hypervisor_vendor(void)
3448     {
3449     - const struct hypervisor_x86 *h, * const *p;
3450     + const struct hypervisor_x86 *h = NULL, * const *p;
3451     uint32_t pri, max_pri = 0;
3452    
3453     for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) {
3454     - h = *p;
3455     - pri = h->detect();
3456     - if (pri != 0 && pri > max_pri) {
3457     + pri = (*p)->detect();
3458     + if (pri > max_pri) {
3459     max_pri = pri;
3460     - x86_hyper = h;
3461     + h = *p;
3462     }
3463     }
3464    
3465     - if (max_pri)
3466     - pr_info("Hypervisor detected: %s\n", x86_hyper->name);
3467     + if (h)
3468     + pr_info("Hypervisor detected: %s\n", h->name);
3469     +
3470     + return h;
3471     }
3472    
3473     -void __init init_hypervisor_platform(void)
3474     +static void __init copy_array(const void *src, void *target, unsigned int size)
3475     {
3476     + unsigned int i, n = size / sizeof(void *);
3477     + const void * const *from = (const void * const *)src;
3478     + const void **to = (const void **)target;
3479    
3480     - detect_hypervisor_vendor();
3481     -
3482     - if (!x86_hyper)
3483     - return;
3484     -
3485     - if (x86_hyper->init_platform)
3486     - x86_hyper->init_platform();
3487     + for (i = 0; i < n; i++)
3488     + if (from[i])
3489     + to[i] = from[i];
3490     }
3491    
3492     -bool __init hypervisor_x2apic_available(void)
3493     +void __init init_hypervisor_platform(void)
3494     {
3495     - return x86_hyper &&
3496     - x86_hyper->x2apic_available &&
3497     - x86_hyper->x2apic_available();
3498     -}
3499     + const struct hypervisor_x86 *h;
3500    
3501     -void hypervisor_pin_vcpu(int cpu)
3502     -{
3503     - if (!x86_hyper)
3504     + h = detect_hypervisor_vendor();
3505     +
3506     + if (!h)
3507     return;
3508    
3509     - if (x86_hyper->pin_vcpu)
3510     - x86_hyper->pin_vcpu(cpu);
3511     - else
3512     - WARN_ONCE(1, "vcpu pinning requested but not supported!\n");
3513     + copy_array(&h->init, &x86_init.hyper, sizeof(h->init));
3514     + copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime));
3515     +
3516     + x86_hyper_type = h->type;
3517     + x86_init.hyper.init_platform();
3518     }
3519     diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
3520     index 236324e83a3a..85eb5fc180c8 100644
3521     --- a/arch/x86/kernel/cpu/mshyperv.c
3522     +++ b/arch/x86/kernel/cpu/mshyperv.c
3523     @@ -254,9 +254,9 @@ static void __init ms_hyperv_init_platform(void)
3524     #endif
3525     }
3526    
3527     -const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
3528     +const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
3529     .name = "Microsoft Hyper-V",
3530     .detect = ms_hyperv_platform,
3531     - .init_platform = ms_hyperv_init_platform,
3532     + .type = X86_HYPER_MS_HYPERV,
3533     + .init.init_platform = ms_hyperv_init_platform,
3534     };
3535     -EXPORT_SYMBOL(x86_hyper_ms_hyperv);
3536     diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
3537     index 40ed26852ebd..8e005329648b 100644
3538     --- a/arch/x86/kernel/cpu/vmware.c
3539     +++ b/arch/x86/kernel/cpu/vmware.c
3540     @@ -205,10 +205,10 @@ static bool __init vmware_legacy_x2apic_available(void)
3541     (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
3542     }
3543    
3544     -const __refconst struct hypervisor_x86 x86_hyper_vmware = {
3545     +const __initconst struct hypervisor_x86 x86_hyper_vmware = {
3546     .name = "VMware",
3547     .detect = vmware_platform,
3548     - .init_platform = vmware_platform_setup,
3549     - .x2apic_available = vmware_legacy_x2apic_available,
3550     + .type = X86_HYPER_VMWARE,
3551     + .init.init_platform = vmware_platform_setup,
3552     + .init.x2apic_available = vmware_legacy_x2apic_available,
3553     };
3554     -EXPORT_SYMBOL(x86_hyper_vmware);
3555     diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
3556     index 0e662c55ae90..0b8cedb20d6d 100644
3557     --- a/arch/x86/kernel/doublefault.c
3558     +++ b/arch/x86/kernel/doublefault.c
3559     @@ -50,25 +50,23 @@ static void doublefault_fn(void)
3560     cpu_relax();
3561     }
3562    
3563     -struct tss_struct doublefault_tss __cacheline_aligned = {
3564     - .x86_tss = {
3565     - .sp0 = STACK_START,
3566     - .ss0 = __KERNEL_DS,
3567     - .ldt = 0,
3568     - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
3569     -
3570     - .ip = (unsigned long) doublefault_fn,
3571     - /* 0x2 bit is always set */
3572     - .flags = X86_EFLAGS_SF | 0x2,
3573     - .sp = STACK_START,
3574     - .es = __USER_DS,
3575     - .cs = __KERNEL_CS,
3576     - .ss = __KERNEL_DS,
3577     - .ds = __USER_DS,
3578     - .fs = __KERNEL_PERCPU,
3579     -
3580     - .__cr3 = __pa_nodebug(swapper_pg_dir),
3581     - }
3582     +struct x86_hw_tss doublefault_tss __cacheline_aligned = {
3583     + .sp0 = STACK_START,
3584     + .ss0 = __KERNEL_DS,
3585     + .ldt = 0,
3586     + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
3587     +
3588     + .ip = (unsigned long) doublefault_fn,
3589     + /* 0x2 bit is always set */
3590     + .flags = X86_EFLAGS_SF | 0x2,
3591     + .sp = STACK_START,
3592     + .es = __USER_DS,
3593     + .cs = __KERNEL_CS,
3594     + .ss = __KERNEL_DS,
3595     + .ds = __USER_DS,
3596     + .fs = __KERNEL_PERCPU,
3597     +
3598     + .__cr3 = __pa_nodebug(swapper_pg_dir),
3599     };
3600    
3601     /* dummy for do_double_fault() call */
3602     diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
3603     index f13b4c00a5de..bbd6d986e2d0 100644
3604     --- a/arch/x86/kernel/dumpstack.c
3605     +++ b/arch/x86/kernel/dumpstack.c
3606     @@ -43,6 +43,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
3607     return true;
3608     }
3609    
3610     +bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
3611     +{
3612     + struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id());
3613     +
3614     + void *begin = ss;
3615     + void *end = ss + 1;
3616     +
3617     + if ((void *)stack < begin || (void *)stack >= end)
3618     + return false;
3619     +
3620     + info->type = STACK_TYPE_SYSENTER;
3621     + info->begin = begin;
3622     + info->end = end;
3623     + info->next_sp = NULL;
3624     +
3625     + return true;
3626     +}
3627     +
3628     static void printk_stack_address(unsigned long address, int reliable,
3629     char *log_lvl)
3630     {
3631     @@ -50,6 +68,28 @@ static void printk_stack_address(unsigned long address, int reliable,
3632     printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
3633     }
3634    
3635     +void show_iret_regs(struct pt_regs *regs)
3636     +{
3637     + printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
3638     + printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
3639     + regs->sp, regs->flags);
3640     +}
3641     +
3642     +static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
3643     +{
3644     + if (on_stack(info, regs, sizeof(*regs)))
3645     + __show_regs(regs, 0);
3646     + else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
3647     + IRET_FRAME_SIZE)) {
3648     + /*
3649     + * When an interrupt or exception occurs in entry code, the
3650     + * full pt_regs might not have been saved yet. In that case
3651     + * just print the iret frame.
3652     + */
3653     + show_iret_regs(regs);
3654     + }
3655     +}
3656     +
3657     void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
3658     unsigned long *stack, char *log_lvl)
3659     {
3660     @@ -71,31 +111,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
3661     * - task stack
3662     * - interrupt stack
3663     * - HW exception stacks (double fault, nmi, debug, mce)
3664     + * - SYSENTER stack
3665     *
3666     - * x86-32 can have up to three stacks:
3667     + * x86-32 can have up to four stacks:
3668     * - task stack
3669     * - softirq stack
3670     * - hardirq stack
3671     + * - SYSENTER stack
3672     */
3673     for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
3674     const char *stack_name;
3675    
3676     - /*
3677     - * If we overflowed the task stack into a guard page, jump back
3678     - * to the bottom of the usable stack.
3679     - */
3680     - if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
3681     - stack = task_stack_page(task);
3682     -
3683     - if (get_stack_info(stack, task, &stack_info, &visit_mask))
3684     - break;
3685     + if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
3686     + /*
3687     + * We weren't on a valid stack. It's possible that
3688     + * we overflowed a valid stack into a guard page.
3689     + * See if the next page up is valid so that we can
3690     + * generate some kind of backtrace if this happens.
3691     + */
3692     + stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
3693     + if (get_stack_info(stack, task, &stack_info, &visit_mask))
3694     + break;
3695     + }
3696    
3697     stack_name = stack_type_name(stack_info.type);
3698     if (stack_name)
3699     printk("%s <%s>\n", log_lvl, stack_name);
3700    
3701     - if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
3702     - __show_regs(regs, 0);
3703     + if (regs)
3704     + show_regs_safe(&stack_info, regs);
3705    
3706     /*
3707     * Scan the stack, printing any text addresses we find. At the
3708     @@ -119,7 +163,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
3709    
3710     /*
3711     * Don't print regs->ip again if it was already printed
3712     - * by __show_regs() below.
3713     + * by show_regs_safe() below.
3714     */
3715     if (regs && stack == &regs->ip)
3716     goto next;
3717     @@ -155,8 +199,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
3718    
3719     /* if the frame has entry regs, print them */
3720     regs = unwind_get_entry_regs(&state);
3721     - if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
3722     - __show_regs(regs, 0);
3723     + if (regs)
3724     + show_regs_safe(&stack_info, regs);
3725     }
3726    
3727     if (stack_name)
3728     diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
3729     index daefae83a3aa..5ff13a6b3680 100644
3730     --- a/arch/x86/kernel/dumpstack_32.c
3731     +++ b/arch/x86/kernel/dumpstack_32.c
3732     @@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type)
3733     if (type == STACK_TYPE_SOFTIRQ)
3734     return "SOFTIRQ";
3735    
3736     + if (type == STACK_TYPE_SYSENTER)
3737     + return "SYSENTER";
3738     +
3739     return NULL;
3740     }
3741    
3742     @@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
3743     if (task != current)
3744     goto unknown;
3745    
3746     + if (in_sysenter_stack(stack, info))
3747     + goto recursion_check;
3748     +
3749     if (in_hardirq_stack(stack, info))
3750     goto recursion_check;
3751    
3752     diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
3753     index 88ce2ffdb110..abc828f8c297 100644
3754     --- a/arch/x86/kernel/dumpstack_64.c
3755     +++ b/arch/x86/kernel/dumpstack_64.c
3756     @@ -37,6 +37,9 @@ const char *stack_type_name(enum stack_type type)
3757     if (type == STACK_TYPE_IRQ)
3758     return "IRQ";
3759    
3760     + if (type == STACK_TYPE_SYSENTER)
3761     + return "SYSENTER";
3762     +
3763     if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
3764     return exception_stack_names[type - STACK_TYPE_EXCEPTION];
3765    
3766     @@ -115,6 +118,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
3767     if (in_irq_stack(stack, info))
3768     goto recursion_check;
3769    
3770     + if (in_sysenter_stack(stack, info))
3771     + goto recursion_check;
3772     +
3773     goto unknown;
3774    
3775     recursion_check:
3776     diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
3777     index 7affb7e3d9a5..6abd83572b01 100644
3778     --- a/arch/x86/kernel/fpu/init.c
3779     +++ b/arch/x86/kernel/fpu/init.c
3780     @@ -249,6 +249,10 @@ static void __init fpu__init_system_ctx_switch(void)
3781     */
3782     static void __init fpu__init_parse_early_param(void)
3783     {
3784     + char arg[32];
3785     + char *argptr = arg;
3786     + int bit;
3787     +
3788     if (cmdline_find_option_bool(boot_command_line, "no387"))
3789     setup_clear_cpu_cap(X86_FEATURE_FPU);
3790    
3791     @@ -266,6 +270,13 @@ static void __init fpu__init_parse_early_param(void)
3792    
3793     if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
3794     setup_clear_cpu_cap(X86_FEATURE_XSAVES);
3795     +
3796     + if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
3797     + sizeof(arg)) &&
3798     + get_option(&argptr, &bit) &&
3799     + bit >= 0 &&
3800     + bit < NCAPINTS * 32)
3801     + setup_clear_cpu_cap(bit);
3802     }
3803    
3804     /*
3805     diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
3806     index f1d5476c9022..87a57b7642d3 100644
3807     --- a/arch/x86/kernel/fpu/xstate.c
3808     +++ b/arch/x86/kernel/fpu/xstate.c
3809     @@ -15,6 +15,7 @@
3810     #include <asm/fpu/xstate.h>
3811    
3812     #include <asm/tlbflush.h>
3813     +#include <asm/cpufeature.h>
3814    
3815     /*
3816     * Although we spell it out in here, the Processor Trace
3817     @@ -36,6 +37,19 @@ static const char *xfeature_names[] =
3818     "unknown xstate feature" ,
3819     };
3820    
3821     +static short xsave_cpuid_features[] __initdata = {
3822     + X86_FEATURE_FPU,
3823     + X86_FEATURE_XMM,
3824     + X86_FEATURE_AVX,
3825     + X86_FEATURE_MPX,
3826     + X86_FEATURE_MPX,
3827     + X86_FEATURE_AVX512F,
3828     + X86_FEATURE_AVX512F,
3829     + X86_FEATURE_AVX512F,
3830     + X86_FEATURE_INTEL_PT,
3831     + X86_FEATURE_PKU,
3832     +};
3833     +
3834     /*
3835     * Mask of xstate features supported by the CPU and the kernel:
3836     */
3837     @@ -59,26 +73,6 @@ unsigned int fpu_user_xstate_size;
3838     void fpu__xstate_clear_all_cpu_caps(void)
3839     {
3840     setup_clear_cpu_cap(X86_FEATURE_XSAVE);
3841     - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
3842     - setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
3843     - setup_clear_cpu_cap(X86_FEATURE_XSAVES);
3844     - setup_clear_cpu_cap(X86_FEATURE_AVX);
3845     - setup_clear_cpu_cap(X86_FEATURE_AVX2);
3846     - setup_clear_cpu_cap(X86_FEATURE_AVX512F);
3847     - setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
3848     - setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
3849     - setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
3850     - setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
3851     - setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
3852     - setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
3853     - setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
3854     - setup_clear_cpu_cap(X86_FEATURE_MPX);
3855     - setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
3856     - setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
3857     - setup_clear_cpu_cap(X86_FEATURE_PKU);
3858     - setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
3859     - setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
3860     - setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ);
3861     }
3862    
3863     /*
3864     @@ -726,6 +720,7 @@ void __init fpu__init_system_xstate(void)
3865     unsigned int eax, ebx, ecx, edx;
3866     static int on_boot_cpu __initdata = 1;
3867     int err;
3868     + int i;
3869    
3870     WARN_ON_FPU(!on_boot_cpu);
3871     on_boot_cpu = 0;
3872     @@ -759,6 +754,14 @@ void __init fpu__init_system_xstate(void)
3873     goto out_disable;
3874     }
3875    
3876     + /*
3877     + * Clear XSAVE features that are disabled in the normal CPUID.
3878     + */
3879     + for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
3880     + if (!boot_cpu_has(xsave_cpuid_features[i]))
3881     + xfeatures_mask &= ~BIT(i);
3882     + }
3883     +
3884     xfeatures_mask &= fpu__get_supported_xfeatures_mask();
3885    
3886     /* Enable xstate instructions to be able to continue with initialization: */
3887     diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
3888     index f1d528bb66a6..c29020907886 100644
3889     --- a/arch/x86/kernel/head_32.S
3890     +++ b/arch/x86/kernel/head_32.S
3891     @@ -212,9 +212,6 @@ ENTRY(startup_32_smp)
3892     #endif
3893    
3894     .Ldefault_entry:
3895     -#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
3896     - X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
3897     - X86_CR0_PG)
3898     movl $(CR0_STATE & ~X86_CR0_PG),%eax
3899     movl %eax,%cr0
3900    
3901     @@ -402,7 +399,7 @@ ENTRY(early_idt_handler_array)
3902     # 24(%rsp) error code
3903     i = 0
3904     .rept NUM_EXCEPTION_VECTORS
3905     - .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
3906     + .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
3907     pushl $0 # Dummy error code, to make stack frame uniform
3908     .endif
3909     pushl $i # 20(%esp) Vector number
3910     diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
3911     index 6dde3f3fc1f8..7dca675fe78d 100644
3912     --- a/arch/x86/kernel/head_64.S
3913     +++ b/arch/x86/kernel/head_64.S
3914     @@ -38,11 +38,12 @@
3915     *
3916     */
3917    
3918     -#define p4d_index(x) (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
3919     #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
3920    
3921     +#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
3922     PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
3923     PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
3924     +#endif
3925     L3_START_KERNEL = pud_index(__START_KERNEL_map)
3926    
3927     .text
3928     @@ -50,6 +51,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
3929     .code64
3930     .globl startup_64
3931     startup_64:
3932     + UNWIND_HINT_EMPTY
3933     /*
3934     * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
3935     * and someone has loaded an identity mapped page table
3936     @@ -89,6 +91,7 @@ startup_64:
3937     addq $(early_top_pgt - __START_KERNEL_map), %rax
3938     jmp 1f
3939     ENTRY(secondary_startup_64)
3940     + UNWIND_HINT_EMPTY
3941     /*
3942     * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
3943     * and someone has loaded a mapped page table.
3944     @@ -133,6 +136,7 @@ ENTRY(secondary_startup_64)
3945     movq $1f, %rax
3946     jmp *%rax
3947     1:
3948     + UNWIND_HINT_EMPTY
3949    
3950     /* Check if nx is implemented */
3951     movl $0x80000001, %eax
3952     @@ -150,9 +154,6 @@ ENTRY(secondary_startup_64)
3953     1: wrmsr /* Make changes effective */
3954    
3955     /* Setup cr0 */
3956     -#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
3957     - X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
3958     - X86_CR0_PG)
3959     movl $CR0_STATE, %eax
3960     /* Make changes effective */
3961     movq %rax, %cr0
3962     @@ -235,7 +236,7 @@ ENTRY(secondary_startup_64)
3963     pushq %rax # target address in negative space
3964     lretq
3965     .Lafter_lret:
3966     -ENDPROC(secondary_startup_64)
3967     +END(secondary_startup_64)
3968    
3969     #include "verify_cpu.S"
3970    
3971     @@ -247,6 +248,7 @@ ENDPROC(secondary_startup_64)
3972     */
3973     ENTRY(start_cpu0)
3974     movq initial_stack(%rip), %rsp
3975     + UNWIND_HINT_EMPTY
3976     jmp .Ljump_to_C_code
3977     ENDPROC(start_cpu0)
3978     #endif
3979     @@ -266,26 +268,24 @@ ENDPROC(start_cpu0)
3980     .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
3981     __FINITDATA
3982    
3983     -bad_address:
3984     - jmp bad_address
3985     -
3986     __INIT
3987     ENTRY(early_idt_handler_array)
3988     - # 104(%rsp) %rflags
3989     - # 96(%rsp) %cs
3990     - # 88(%rsp) %rip
3991     - # 80(%rsp) error code
3992     i = 0
3993     .rept NUM_EXCEPTION_VECTORS
3994     - .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
3995     - pushq $0 # Dummy error code, to make stack frame uniform
3996     + .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
3997     + UNWIND_HINT_IRET_REGS
3998     + pushq $0 # Dummy error code, to make stack frame uniform
3999     + .else
4000     + UNWIND_HINT_IRET_REGS offset=8
4001     .endif
4002     pushq $i # 72(%rsp) Vector number
4003     jmp early_idt_handler_common
4004     + UNWIND_HINT_IRET_REGS
4005     i = i + 1
4006     .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
4007     .endr
4008     -ENDPROC(early_idt_handler_array)
4009     + UNWIND_HINT_IRET_REGS offset=16
4010     +END(early_idt_handler_array)
4011    
4012     early_idt_handler_common:
4013     /*
4014     @@ -313,6 +313,7 @@ early_idt_handler_common:
4015     pushq %r13 /* pt_regs->r13 */
4016     pushq %r14 /* pt_regs->r14 */
4017     pushq %r15 /* pt_regs->r15 */
4018     + UNWIND_HINT_REGS
4019    
4020     cmpq $14,%rsi /* Page fault? */
4021     jnz 10f
4022     @@ -327,8 +328,8 @@ early_idt_handler_common:
4023    
4024     20:
4025     decl early_recursion_flag(%rip)
4026     - jmp restore_regs_and_iret
4027     -ENDPROC(early_idt_handler_common)
4028     + jmp restore_regs_and_return_to_kernel
4029     +END(early_idt_handler_common)
4030    
4031     __INITDATA
4032    
4033     @@ -362,10 +363,7 @@ NEXT_PAGE(early_dynamic_pgts)
4034    
4035     .data
4036    
4037     -#ifndef CONFIG_XEN
4038     -NEXT_PAGE(init_top_pgt)
4039     - .fill 512,8,0
4040     -#else
4041     +#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
4042     NEXT_PAGE(init_top_pgt)
4043     .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
4044     .org init_top_pgt + PGD_PAGE_OFFSET*8, 0
4045     @@ -382,6 +380,9 @@ NEXT_PAGE(level2_ident_pgt)
4046     * Don't set NX because code runs from these pages.
4047     */
4048     PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
4049     +#else
4050     +NEXT_PAGE(init_top_pgt)
4051     + .fill 512,8,0
4052     #endif
4053    
4054     #ifdef CONFIG_X86_5LEVEL
4055     @@ -435,7 +436,7 @@ ENTRY(phys_base)
4056     EXPORT_SYMBOL(phys_base)
4057    
4058     #include "../../x86/xen/xen-head.S"
4059     -
4060     +
4061     __PAGE_ALIGNED_BSS
4062     NEXT_PAGE(empty_zero_page)
4063     .skip PAGE_SIZE
4064     diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
4065     index 3feb648781c4..2f723301eb58 100644
4066     --- a/arch/x86/kernel/ioport.c
4067     +++ b/arch/x86/kernel/ioport.c
4068     @@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
4069     * because the ->io_bitmap_max value must match the bitmap
4070     * contents:
4071     */
4072     - tss = &per_cpu(cpu_tss, get_cpu());
4073     + tss = &per_cpu(cpu_tss_rw, get_cpu());
4074    
4075     if (turn_on)
4076     bitmap_clear(t->io_bitmap_ptr, from, num);
4077     diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
4078     index 52089c043160..aa9d51eea9d0 100644
4079     --- a/arch/x86/kernel/irq.c
4080     +++ b/arch/x86/kernel/irq.c
4081     @@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
4082     /* high bit used in ret_from_ code */
4083     unsigned vector = ~regs->orig_ax;
4084    
4085     - /*
4086     - * NB: Unlike exception entries, IRQ entries do not reliably
4087     - * handle context tracking in the low-level entry code. This is
4088     - * because syscall entries execute briefly with IRQs on before
4089     - * updating context tracking state, so we can take an IRQ from
4090     - * kernel mode with CONTEXT_USER. The low-level entry code only
4091     - * updates the context if we came from user mode, so we won't
4092     - * switch to CONTEXT_KERNEL. We'll fix that once the syscall
4093     - * code is cleaned up enough that we can cleanly defer enabling
4094     - * IRQs.
4095     - */
4096     -
4097     entering_irq();
4098    
4099     /* entering_irq() tells RCU that we're not quiescent. Check it. */
4100     diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
4101     index 020efbf5786b..d86e344f5b3d 100644
4102     --- a/arch/x86/kernel/irq_64.c
4103     +++ b/arch/x86/kernel/irq_64.c
4104     @@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs)
4105     if (regs->sp >= estack_top && regs->sp <= estack_bottom)
4106     return;
4107    
4108     - WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
4109     + WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
4110     current->comm, curbase, regs->sp,
4111     irq_stack_top, irq_stack_bottom,
4112     - estack_top, estack_bottom);
4113     + estack_top, estack_bottom, (void *)regs->ip);
4114    
4115     if (sysctl_panic_on_stackoverflow)
4116     panic("low stack detected by irq handler - check messages\n");
4117     diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
4118     index 8bb9594d0761..a94de09edbed 100644
4119     --- a/arch/x86/kernel/kvm.c
4120     +++ b/arch/x86/kernel/kvm.c
4121     @@ -544,12 +544,12 @@ static uint32_t __init kvm_detect(void)
4122     return kvm_cpuid_base();
4123     }
4124    
4125     -const struct hypervisor_x86 x86_hyper_kvm __refconst = {
4126     +const __initconst struct hypervisor_x86 x86_hyper_kvm = {
4127     .name = "KVM",
4128     .detect = kvm_detect,
4129     - .x2apic_available = kvm_para_available,
4130     + .type = X86_HYPER_KVM,
4131     + .init.x2apic_available = kvm_para_available,
4132     };
4133     -EXPORT_SYMBOL_GPL(x86_hyper_kvm);
4134    
4135     static __init int activate_jump_labels(void)
4136     {
4137     diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
4138     index ae5615b03def..1c1eae961340 100644
4139     --- a/arch/x86/kernel/ldt.c
4140     +++ b/arch/x86/kernel/ldt.c
4141     @@ -103,7 +103,7 @@ static void finalize_ldt_struct(struct ldt_struct *ldt)
4142     static void install_ldt(struct mm_struct *current_mm,
4143     struct ldt_struct *ldt)
4144     {
4145     - /* Synchronizes with lockless_dereference in load_mm_ldt. */
4146     + /* Synchronizes with READ_ONCE in load_mm_ldt. */
4147     smp_store_release(&current_mm->context.ldt, ldt);
4148    
4149     /* Activate the LDT for all CPUs using current_mm. */
4150     diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
4151     index ac0be8283325..9edadabf04f6 100644
4152     --- a/arch/x86/kernel/paravirt_patch_64.c
4153     +++ b/arch/x86/kernel/paravirt_patch_64.c
4154     @@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
4155     DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
4156     DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
4157     DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
4158     -DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
4159     DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
4160    
4161     DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
4162     @@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
4163     PATCH_SITE(pv_mmu_ops, read_cr2);
4164     PATCH_SITE(pv_mmu_ops, read_cr3);
4165     PATCH_SITE(pv_mmu_ops, write_cr3);
4166     - PATCH_SITE(pv_mmu_ops, flush_tlb_single);
4167     PATCH_SITE(pv_cpu_ops, wbinvd);
4168     #if defined(CONFIG_PARAVIRT_SPINLOCKS)
4169     case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
4170     diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
4171     index c67685337c5a..517415978409 100644
4172     --- a/arch/x86/kernel/process.c
4173     +++ b/arch/x86/kernel/process.c
4174     @@ -47,9 +47,25 @@
4175     * section. Since TSS's are completely CPU-local, we want them
4176     * on exact cacheline boundaries, to eliminate cacheline ping-pong.
4177     */
4178     -__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
4179     +__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
4180     .x86_tss = {
4181     - .sp0 = TOP_OF_INIT_STACK,
4182     + /*
4183     + * .sp0 is only used when entering ring 0 from a lower
4184     + * privilege level. Since the init task never runs anything
4185     + * but ring 0 code, there is no need for a valid value here.
4186     + * Poison it.
4187     + */
4188     + .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
4189     +
4190     +#ifdef CONFIG_X86_64
4191     + /*
4192     + * .sp1 is cpu_current_top_of_stack. The init task never
4193     + * runs user code, but cpu_current_top_of_stack should still
4194     + * be well defined before the first context switch.
4195     + */
4196     + .sp1 = TOP_OF_INIT_STACK,
4197     +#endif
4198     +
4199     #ifdef CONFIG_X86_32
4200     .ss0 = __KERNEL_DS,
4201     .ss1 = __KERNEL_CS,
4202     @@ -65,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
4203     */
4204     .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
4205     #endif
4206     -#ifdef CONFIG_X86_32
4207     - .SYSENTER_stack_canary = STACK_END_MAGIC,
4208     -#endif
4209     };
4210     -EXPORT_PER_CPU_SYMBOL(cpu_tss);
4211     +EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
4212    
4213     DEFINE_PER_CPU(bool, __tss_limit_invalid);
4214     EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
4215     @@ -98,7 +111,7 @@ void exit_thread(struct task_struct *tsk)
4216     struct fpu *fpu = &t->fpu;
4217    
4218     if (bp) {
4219     - struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
4220     + struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
4221    
4222     t->io_bitmap_ptr = NULL;
4223     clear_thread_flag(TIF_IO_BITMAP);
4224     diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
4225     index 11966251cd42..5224c6099184 100644
4226     --- a/arch/x86/kernel/process_32.c
4227     +++ b/arch/x86/kernel/process_32.c
4228     @@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
4229     struct fpu *prev_fpu = &prev->fpu;
4230     struct fpu *next_fpu = &next->fpu;
4231     int cpu = smp_processor_id();
4232     - struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
4233     + struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
4234    
4235     /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
4236    
4237     @@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
4238    
4239     /*
4240     * Reload esp0 and cpu_current_top_of_stack. This changes
4241     - * current_thread_info().
4242     + * current_thread_info(). Refresh the SYSENTER configuration in
4243     + * case prev or next is vm86.
4244     */
4245     - load_sp0(tss, next);
4246     + update_sp0(next_p);
4247     + refresh_sysenter_cs(next);
4248     this_cpu_write(cpu_current_top_of_stack,
4249     (unsigned long)task_stack_page(next_p) +
4250     THREAD_SIZE);
4251     diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
4252     index 302e7b2572d1..c75466232016 100644
4253     --- a/arch/x86/kernel/process_64.c
4254     +++ b/arch/x86/kernel/process_64.c
4255     @@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
4256     unsigned int fsindex, gsindex;
4257     unsigned int ds, cs, es;
4258    
4259     - printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip);
4260     - printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
4261     - regs->sp, regs->flags);
4262     + show_iret_regs(regs);
4263     +
4264     if (regs->orig_ax != -1)
4265     pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
4266     else
4267     @@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
4268     printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
4269     regs->r13, regs->r14, regs->r15);
4270    
4271     + if (!all)
4272     + return;
4273     +
4274     asm("movl %%ds,%0" : "=r" (ds));
4275     asm("movl %%cs,%0" : "=r" (cs));
4276     asm("movl %%es,%0" : "=r" (es));
4277     @@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
4278     rdmsrl(MSR_GS_BASE, gs);
4279     rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
4280    
4281     - if (!all)
4282     - return;
4283     -
4284     cr0 = read_cr0();
4285     cr2 = read_cr2();
4286     cr3 = __read_cr3();
4287     @@ -274,7 +273,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
4288     struct inactive_task_frame *frame;
4289     struct task_struct *me = current;
4290    
4291     - p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
4292     childregs = task_pt_regs(p);
4293     fork_frame = container_of(childregs, struct fork_frame, regs);
4294     frame = &fork_frame->frame;
4295     @@ -401,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
4296     struct fpu *prev_fpu = &prev->fpu;
4297     struct fpu *next_fpu = &next->fpu;
4298     int cpu = smp_processor_id();
4299     - struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
4300     + struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
4301    
4302     WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
4303     this_cpu_read(irq_count) != -1);
4304     @@ -463,9 +461,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
4305     * Switch the PDA and FPU contexts.
4306     */
4307     this_cpu_write(current_task, next_p);
4308     + this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
4309    
4310     - /* Reload esp0 and ss1. This changes current_thread_info(). */
4311     - load_sp0(tss, next);
4312     + /* Reload sp0. */
4313     + update_sp0(next_p);
4314    
4315     /*
4316     * Now maybe reload the debug registers and handle I/O bitmaps
4317     diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
4318     index 5e0453f18a57..142126ab5aae 100644
4319     --- a/arch/x86/kernel/smpboot.c
4320     +++ b/arch/x86/kernel/smpboot.c
4321     @@ -962,8 +962,7 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
4322     #ifdef CONFIG_X86_32
4323     /* Stack for startup_32 can be just as for start_secondary onwards */
4324     irq_ctx_init(cpu);
4325     - per_cpu(cpu_current_top_of_stack, cpu) =
4326     - (unsigned long)task_stack_page(idle) + THREAD_SIZE;
4327     + per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
4328     #else
4329     initial_gs = per_cpu_offset(cpu);
4330     #endif
4331     diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
4332     index 5a6b8f809792..74136fd16f49 100644
4333     --- a/arch/x86/kernel/traps.c
4334     +++ b/arch/x86/kernel/traps.c
4335     @@ -141,8 +141,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
4336     * will catch asm bugs and any attempt to use ist_preempt_enable
4337     * from double_fault.
4338     */
4339     - BUG_ON((unsigned long)(current_top_of_stack() -
4340     - current_stack_pointer) >= THREAD_SIZE);
4341     + BUG_ON(!on_thread_stack());
4342    
4343     preempt_enable_no_resched();
4344     }
4345     @@ -349,9 +348,15 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
4346    
4347     /*
4348     * If IRET takes a non-IST fault on the espfix64 stack, then we
4349     - * end up promoting it to a doublefault. In that case, modify
4350     - * the stack to make it look like we just entered the #GP
4351     - * handler from user space, similar to bad_iret.
4352     + * end up promoting it to a doublefault. In that case, take
4353     + * advantage of the fact that we're not using the normal (TSS.sp0)
4354     + * stack right now. We can write a fake #GP(0) frame at TSS.sp0
4355     + * and then modify our own IRET frame so that, when we return,
4356     + * we land directly at the #GP(0) vector with the stack already
4357     + * set up according to its expectations.
4358     + *
4359     + * The net result is that our #GP handler will think that we
4360     + * entered from usermode with the bad user context.
4361     *
4362     * No need for ist_enter here because we don't use RCU.
4363     */
4364     @@ -359,13 +364,26 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
4365     regs->cs == __KERNEL_CS &&
4366     regs->ip == (unsigned long)native_irq_return_iret)
4367     {
4368     - struct pt_regs *normal_regs = task_pt_regs(current);
4369     + struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
4370    
4371     - /* Fake a #GP(0) from userspace. */
4372     - memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
4373     - normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */
4374     + /*
4375     + * regs->sp points to the failing IRET frame on the
4376     + * ESPFIX64 stack. Copy it to the entry stack. This fills
4377     + * in gpregs->ss through gpregs->ip.
4378     + *
4379     + */
4380     + memmove(&gpregs->ip, (void *)regs->sp, 5*8);
4381     + gpregs->orig_ax = 0; /* Missing (lost) #GP error code */
4382     +
4383     + /*
4384     + * Adjust our frame so that we return straight to the #GP
4385     + * vector with the expected RSP value. This is safe because
4386     + * we won't enable interupts or schedule before we invoke
4387     + * general_protection, so nothing will clobber the stack
4388     + * frame we just set up.
4389     + */
4390     regs->ip = (unsigned long)general_protection;
4391     - regs->sp = (unsigned long)&normal_regs->orig_ax;
4392     + regs->sp = (unsigned long)&gpregs->orig_ax;
4393    
4394     return;
4395     }
4396     @@ -390,7 +408,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
4397     *
4398     * Processors update CR2 whenever a page fault is detected. If a
4399     * second page fault occurs while an earlier page fault is being
4400     - * deliv- ered, the faulting linear address of the second fault will
4401     + * delivered, the faulting linear address of the second fault will
4402     * overwrite the contents of CR2 (replacing the previous
4403     * address). These updates to CR2 occur even if the page fault
4404     * results in a double fault or occurs during the delivery of a
4405     @@ -601,14 +619,15 @@ NOKPROBE_SYMBOL(do_int3);
4406    
4407     #ifdef CONFIG_X86_64
4408     /*
4409     - * Help handler running on IST stack to switch off the IST stack if the
4410     - * interrupted code was in user mode. The actual stack switch is done in
4411     - * entry_64.S
4412     + * Help handler running on a per-cpu (IST or entry trampoline) stack
4413     + * to switch to the normal thread stack if the interrupted code was in
4414     + * user mode. The actual stack switch is done in entry_64.S
4415     */
4416     asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
4417     {
4418     - struct pt_regs *regs = task_pt_regs(current);
4419     - *regs = *eregs;
4420     + struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
4421     + if (regs != eregs)
4422     + *regs = *eregs;
4423     return regs;
4424     }
4425     NOKPROBE_SYMBOL(sync_regs);
4426     @@ -624,13 +643,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
4427     /*
4428     * This is called from entry_64.S early in handling a fault
4429     * caused by a bad iret to user mode. To handle the fault
4430     - * correctly, we want move our stack frame to task_pt_regs
4431     - * and we want to pretend that the exception came from the
4432     - * iret target.
4433     + * correctly, we want to move our stack frame to where it would
4434     + * be had we entered directly on the entry stack (rather than
4435     + * just below the IRET frame) and we want to pretend that the
4436     + * exception came from the IRET target.
4437     */
4438     struct bad_iret_stack *new_stack =
4439     - container_of(task_pt_regs(current),
4440     - struct bad_iret_stack, regs);
4441     + (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
4442    
4443     /* Copy the IRET target to the new stack. */
4444     memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
4445     @@ -795,14 +814,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
4446     debug_stack_usage_dec();
4447    
4448     exit:
4449     -#if defined(CONFIG_X86_32)
4450     - /*
4451     - * This is the most likely code path that involves non-trivial use
4452     - * of the SYSENTER stack. Check that we haven't overrun it.
4453     - */
4454     - WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
4455     - "Overran or corrupted SYSENTER stack\n");
4456     -#endif
4457     ist_exit(regs);
4458     }
4459     NOKPROBE_SYMBOL(do_debug);
4460     @@ -929,6 +940,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
4461    
4462     void __init trap_init(void)
4463     {
4464     + /* Init cpu_entry_area before IST entries are set up */
4465     + setup_cpu_entry_areas();
4466     +
4467     idt_setup_traps();
4468    
4469     /*
4470     diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
4471     index a3f973b2c97a..be86a865087a 100644
4472     --- a/arch/x86/kernel/unwind_orc.c
4473     +++ b/arch/x86/kernel/unwind_orc.c
4474     @@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
4475     return NULL;
4476     }
4477    
4478     -static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
4479     +static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
4480     size_t len)
4481     {
4482     struct stack_info *info = &state->stack_info;
4483     + void *addr = (void *)_addr;
4484    
4485     - /*
4486     - * If the address isn't on the current stack, switch to the next one.
4487     - *
4488     - * We may have to traverse multiple stacks to deal with the possibility
4489     - * that info->next_sp could point to an empty stack and the address
4490     - * could be on a subsequent stack.
4491     - */
4492     - while (!on_stack(info, (void *)addr, len))
4493     - if (get_stack_info(info->next_sp, state->task, info,
4494     - &state->stack_mask))
4495     - return false;
4496     + if (!on_stack(info, addr, len) &&
4497     + (get_stack_info(addr, state->task, info, &state->stack_mask)))
4498     + return false;
4499    
4500     return true;
4501     }
4502     @@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
4503     return true;
4504     }
4505    
4506     -#define REGS_SIZE (sizeof(struct pt_regs))
4507     -#define SP_OFFSET (offsetof(struct pt_regs, sp))
4508     -#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
4509     -#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
4510     -
4511     static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
4512     - unsigned long *ip, unsigned long *sp, bool full)
4513     + unsigned long *ip, unsigned long *sp)
4514     {
4515     - size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
4516     - size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
4517     - struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
4518     -
4519     - if (IS_ENABLED(CONFIG_X86_64)) {
4520     - if (!stack_access_ok(state, addr, regs_size))
4521     - return false;
4522     + struct pt_regs *regs = (struct pt_regs *)addr;
4523    
4524     - *ip = regs->ip;
4525     - *sp = regs->sp;
4526     + /* x86-32 support will be more complicated due to the &regs->sp hack */
4527     + BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
4528    
4529     - return true;
4530     - }
4531     -
4532     - if (!stack_access_ok(state, addr, sp_offset))
4533     + if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
4534     return false;
4535    
4536     *ip = regs->ip;
4537     + *sp = regs->sp;
4538     + return true;
4539     +}
4540    
4541     - if (user_mode(regs)) {
4542     - if (!stack_access_ok(state, addr + sp_offset,
4543     - REGS_SIZE - SP_OFFSET))
4544     - return false;
4545     +static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
4546     + unsigned long *ip, unsigned long *sp)
4547     +{
4548     + struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
4549    
4550     - *sp = regs->sp;
4551     - } else
4552     - *sp = (unsigned long)&regs->sp;
4553     + if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
4554     + return false;
4555    
4556     + *ip = regs->ip;
4557     + *sp = regs->sp;
4558     return true;
4559     }
4560    
4561     @@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
4562     unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
4563     enum stack_type prev_type = state->stack_info.type;
4564     struct orc_entry *orc;
4565     - struct pt_regs *ptregs;
4566     bool indirect = false;
4567    
4568     if (unwind_done(state))
4569     @@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state)
4570     break;
4571    
4572     case ORC_TYPE_REGS:
4573     - if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
4574     + if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
4575     orc_warn("can't dereference registers at %p for ip %pB\n",
4576     (void *)sp, (void *)orig_ip);
4577     goto done;
4578     @@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
4579     break;
4580    
4581     case ORC_TYPE_REGS_IRET:
4582     - if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
4583     + if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
4584     orc_warn("can't dereference iret registers at %p for ip %pB\n",
4585     (void *)sp, (void *)orig_ip);
4586     goto done;
4587     }
4588    
4589     - ptregs = container_of((void *)sp, struct pt_regs, ip);
4590     - if ((unsigned long)ptregs >= prev_sp &&
4591     - on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
4592     - state->regs = ptregs;
4593     - state->full_regs = false;
4594     - } else
4595     - state->regs = NULL;
4596     -
4597     + state->regs = (void *)sp - IRET_FRAME_OFFSET;
4598     + state->full_regs = false;
4599     state->signal = true;
4600     break;
4601    
4602     @@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
4603     }
4604    
4605     if (get_stack_info((unsigned long *)state->sp, state->task,
4606     - &state->stack_info, &state->stack_mask))
4607     - return;
4608     + &state->stack_info, &state->stack_mask)) {
4609     + /*
4610     + * We weren't on a valid stack. It's possible that
4611     + * we overflowed a valid stack into a guard page.
4612     + * See if the next page up is valid so that we can
4613     + * generate some kind of backtrace if this happens.
4614     + */
4615     + void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
4616     + if (get_stack_info(next_page, state->task, &state->stack_info,
4617     + &state->stack_mask))
4618     + return;
4619     + }
4620    
4621     /*
4622     * The caller can provide the address of the first frame directly
4623     diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
4624     index 014ea59aa153..3d3c2f71f617 100644
4625     --- a/arch/x86/kernel/verify_cpu.S
4626     +++ b/arch/x86/kernel/verify_cpu.S
4627     @@ -33,7 +33,7 @@
4628     #include <asm/cpufeatures.h>
4629     #include <asm/msr-index.h>
4630    
4631     -verify_cpu:
4632     +ENTRY(verify_cpu)
4633     pushf # Save caller passed flags
4634     push $0 # Kill any dangerous flags
4635     popf
4636     @@ -139,3 +139,4 @@ verify_cpu:
4637     popf # Restore caller passed flags
4638     xorl %eax, %eax
4639     ret
4640     +ENDPROC(verify_cpu)
4641     diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
4642     index 68244742ecb0..5edb27f1a2c4 100644
4643     --- a/arch/x86/kernel/vm86_32.c
4644     +++ b/arch/x86/kernel/vm86_32.c
4645     @@ -55,6 +55,7 @@
4646     #include <asm/irq.h>
4647     #include <asm/traps.h>
4648     #include <asm/vm86.h>
4649     +#include <asm/switch_to.h>
4650    
4651     /*
4652     * Known problems:
4653     @@ -94,7 +95,6 @@
4654    
4655     void save_v86_state(struct kernel_vm86_regs *regs, int retval)
4656     {
4657     - struct tss_struct *tss;
4658     struct task_struct *tsk = current;
4659     struct vm86plus_struct __user *user;
4660     struct vm86 *vm86 = current->thread.vm86;
4661     @@ -146,12 +146,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
4662     do_exit(SIGSEGV);
4663     }
4664    
4665     - tss = &per_cpu(cpu_tss, get_cpu());
4666     + preempt_disable();
4667     tsk->thread.sp0 = vm86->saved_sp0;
4668     tsk->thread.sysenter_cs = __KERNEL_CS;
4669     - load_sp0(tss, &tsk->thread);
4670     + update_sp0(tsk);
4671     + refresh_sysenter_cs(&tsk->thread);
4672     vm86->saved_sp0 = 0;
4673     - put_cpu();
4674     + preempt_enable();
4675    
4676     memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
4677    
4678     @@ -237,7 +238,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
4679    
4680     static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
4681     {
4682     - struct tss_struct *tss;
4683     struct task_struct *tsk = current;
4684     struct vm86 *vm86 = tsk->thread.vm86;
4685     struct kernel_vm86_regs vm86regs;
4686     @@ -365,15 +365,17 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
4687     vm86->saved_sp0 = tsk->thread.sp0;
4688     lazy_save_gs(vm86->regs32.gs);
4689    
4690     - tss = &per_cpu(cpu_tss, get_cpu());
4691     /* make room for real-mode segments */
4692     + preempt_disable();
4693     tsk->thread.sp0 += 16;
4694    
4695     - if (static_cpu_has(X86_FEATURE_SEP))
4696     + if (static_cpu_has(X86_FEATURE_SEP)) {
4697     tsk->thread.sysenter_cs = 0;
4698     + refresh_sysenter_cs(&tsk->thread);
4699     + }
4700    
4701     - load_sp0(tss, &tsk->thread);
4702     - put_cpu();
4703     + update_sp0(tsk);
4704     + preempt_enable();
4705    
4706     if (vm86->flags & VM86_SCREEN_BITMAP)
4707     mark_screen_rdonly(tsk->mm);
4708     diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
4709     index a4009fb9be87..d2a8b5a24a44 100644
4710     --- a/arch/x86/kernel/vmlinux.lds.S
4711     +++ b/arch/x86/kernel/vmlinux.lds.S
4712     @@ -107,6 +107,15 @@ SECTIONS
4713     SOFTIRQENTRY_TEXT
4714     *(.fixup)
4715     *(.gnu.warning)
4716     +
4717     +#ifdef CONFIG_X86_64
4718     + . = ALIGN(PAGE_SIZE);
4719     + _entry_trampoline = .;
4720     + *(.entry_trampoline)
4721     + . = ALIGN(PAGE_SIZE);
4722     + ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
4723     +#endif
4724     +
4725     /* End of text section */
4726     _etext = .;
4727     } :text = 0x9090
4728     diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
4729     index a088b2c47f73..5b2d10c1973a 100644
4730     --- a/arch/x86/kernel/x86_init.c
4731     +++ b/arch/x86/kernel/x86_init.c
4732     @@ -28,6 +28,8 @@ void x86_init_noop(void) { }
4733     void __init x86_init_uint_noop(unsigned int unused) { }
4734     int __init iommu_init_noop(void) { return 0; }
4735     void iommu_shutdown_noop(void) { }
4736     +bool __init bool_x86_init_noop(void) { return false; }
4737     +void x86_op_int_noop(int cpu) { }
4738    
4739     /*
4740     * The platform setup functions are preset with the default functions
4741     @@ -81,6 +83,12 @@ struct x86_init_ops x86_init __initdata = {
4742     .init_irq = x86_default_pci_init_irq,
4743     .fixup_irqs = x86_default_pci_fixup_irqs,
4744     },
4745     +
4746     + .hyper = {
4747     + .init_platform = x86_init_noop,
4748     + .x2apic_available = bool_x86_init_noop,
4749     + .init_mem_mapping = x86_init_noop,
4750     + },
4751     };
4752    
4753     struct x86_cpuinit_ops x86_cpuinit = {
4754     @@ -101,6 +109,7 @@ struct x86_platform_ops x86_platform __ro_after_init = {
4755     .get_nmi_reason = default_get_nmi_reason,
4756     .save_sched_clock_state = tsc_save_sched_clock_state,
4757     .restore_sched_clock_state = tsc_restore_sched_clock_state,
4758     + .hyper.pin_vcpu = x86_op_int_noop,
4759     };
4760    
4761     EXPORT_SYMBOL_GPL(x86_platform);
4762     diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
4763     index 7a69cf053711..13ebeedcec07 100644
4764     --- a/arch/x86/kvm/mmu.c
4765     +++ b/arch/x86/kvm/mmu.c
4766     @@ -5476,13 +5476,13 @@ int kvm_mmu_module_init(void)
4767    
4768     pte_list_desc_cache = kmem_cache_create("pte_list_desc",
4769     sizeof(struct pte_list_desc),
4770     - 0, 0, NULL);
4771     + 0, SLAB_ACCOUNT, NULL);
4772     if (!pte_list_desc_cache)
4773     goto nomem;
4774    
4775     mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
4776     sizeof(struct kvm_mmu_page),
4777     - 0, 0, NULL);
4778     + 0, SLAB_ACCOUNT, NULL);
4779     if (!mmu_page_header_cache)
4780     goto nomem;
4781    
4782     diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
4783     index bc5921c1e2f2..47d9432756f3 100644
4784     --- a/arch/x86/kvm/vmx.c
4785     +++ b/arch/x86/kvm/vmx.c
4786     @@ -2295,7 +2295,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
4787     * processors. See 22.2.4.
4788     */
4789     vmcs_writel(HOST_TR_BASE,
4790     - (unsigned long)this_cpu_ptr(&cpu_tss));
4791     + (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
4792     vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
4793    
4794     /*
4795     diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
4796     index 553f8fd23cc4..4846eff7e4c8 100644
4797     --- a/arch/x86/lib/delay.c
4798     +++ b/arch/x86/lib/delay.c
4799     @@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops)
4800     delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
4801    
4802     /*
4803     - * Use cpu_tss as a cacheline-aligned, seldomly
4804     + * Use cpu_tss_rw as a cacheline-aligned, seldomly
4805     * accessed per-cpu variable as the monitor target.
4806     */
4807     - __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
4808     + __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
4809    
4810     /*
4811     * AMD, like Intel, supports the EAX hint and EAX=0xf
4812     diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
4813     index b0ff378650a9..3109ba6c6ede 100644
4814     --- a/arch/x86/mm/fault.c
4815     +++ b/arch/x86/mm/fault.c
4816     @@ -29,26 +29,6 @@
4817     #define CREATE_TRACE_POINTS
4818     #include <asm/trace/exceptions.h>
4819    
4820     -/*
4821     - * Page fault error code bits:
4822     - *
4823     - * bit 0 == 0: no page found 1: protection fault
4824     - * bit 1 == 0: read access 1: write access
4825     - * bit 2 == 0: kernel-mode access 1: user-mode access
4826     - * bit 3 == 1: use of reserved bit detected
4827     - * bit 4 == 1: fault was an instruction fetch
4828     - * bit 5 == 1: protection keys block access
4829     - */
4830     -enum x86_pf_error_code {
4831     -
4832     - PF_PROT = 1 << 0,
4833     - PF_WRITE = 1 << 1,
4834     - PF_USER = 1 << 2,
4835     - PF_RSVD = 1 << 3,
4836     - PF_INSTR = 1 << 4,
4837     - PF_PK = 1 << 5,
4838     -};
4839     -
4840     /*
4841     * Returns 0 if mmiotrace is disabled, or if the fault is not
4842     * handled by mmiotrace:
4843     @@ -150,7 +130,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
4844     * If it was a exec (instruction fetch) fault on NX page, then
4845     * do not ignore the fault:
4846     */
4847     - if (error_code & PF_INSTR)
4848     + if (error_code & X86_PF_INSTR)
4849     return 0;
4850    
4851     instr = (void *)convert_ip_to_linear(current, regs);
4852     @@ -180,7 +160,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
4853     * siginfo so userspace can discover which protection key was set
4854     * on the PTE.
4855     *
4856     - * If we get here, we know that the hardware signaled a PF_PK
4857     + * If we get here, we know that the hardware signaled a X86_PF_PK
4858     * fault and that there was a VMA once we got in the fault
4859     * handler. It does *not* guarantee that the VMA we find here
4860     * was the one that we faulted on.
4861     @@ -205,7 +185,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
4862     /*
4863     * force_sig_info_fault() is called from a number of
4864     * contexts, some of which have a VMA and some of which
4865     - * do not. The PF_PK handing happens after we have a
4866     + * do not. The X86_PF_PK handing happens after we have a
4867     * valid VMA, so we should never reach this without a
4868     * valid VMA.
4869     */
4870     @@ -698,7 +678,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
4871     if (!oops_may_print())
4872     return;
4873    
4874     - if (error_code & PF_INSTR) {
4875     + if (error_code & X86_PF_INSTR) {
4876     unsigned int level;
4877     pgd_t *pgd;
4878     pte_t *pte;
4879     @@ -780,7 +760,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
4880     */
4881     if (current->thread.sig_on_uaccess_err && signal) {
4882     tsk->thread.trap_nr = X86_TRAP_PF;
4883     - tsk->thread.error_code = error_code | PF_USER;
4884     + tsk->thread.error_code = error_code | X86_PF_USER;
4885     tsk->thread.cr2 = address;
4886    
4887     /* XXX: hwpoison faults will set the wrong code. */
4888     @@ -898,7 +878,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
4889     struct task_struct *tsk = current;
4890    
4891     /* User mode accesses just cause a SIGSEGV */
4892     - if (error_code & PF_USER) {
4893     + if (error_code & X86_PF_USER) {
4894     /*
4895     * It's possible to have interrupts off here:
4896     */
4897     @@ -919,7 +899,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
4898     * Instruction fetch faults in the vsyscall page might need
4899     * emulation.
4900     */
4901     - if (unlikely((error_code & PF_INSTR) &&
4902     + if (unlikely((error_code & X86_PF_INSTR) &&
4903     ((address & ~0xfff) == VSYSCALL_ADDR))) {
4904     if (emulate_vsyscall(regs, address))
4905     return;
4906     @@ -932,7 +912,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
4907     * are always protection faults.
4908     */
4909     if (address >= TASK_SIZE_MAX)
4910     - error_code |= PF_PROT;
4911     + error_code |= X86_PF_PROT;
4912    
4913     if (likely(show_unhandled_signals))
4914     show_signal_msg(regs, error_code, address, tsk);
4915     @@ -993,11 +973,11 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
4916    
4917     if (!boot_cpu_has(X86_FEATURE_OSPKE))
4918     return false;
4919     - if (error_code & PF_PK)
4920     + if (error_code & X86_PF_PK)
4921     return true;
4922     /* this checks permission keys on the VMA: */
4923     - if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
4924     - (error_code & PF_INSTR), foreign))
4925     + if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
4926     + (error_code & X86_PF_INSTR), foreign))
4927     return true;
4928     return false;
4929     }
4930     @@ -1025,7 +1005,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
4931     int code = BUS_ADRERR;
4932    
4933     /* Kernel mode? Handle exceptions or die: */
4934     - if (!(error_code & PF_USER)) {
4935     + if (!(error_code & X86_PF_USER)) {
4936     no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
4937     return;
4938     }
4939     @@ -1053,14 +1033,14 @@ static noinline void
4940     mm_fault_error(struct pt_regs *regs, unsigned long error_code,
4941     unsigned long address, u32 *pkey, unsigned int fault)
4942     {
4943     - if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
4944     + if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
4945     no_context(regs, error_code, address, 0, 0);
4946     return;
4947     }
4948    
4949     if (fault & VM_FAULT_OOM) {
4950     /* Kernel mode? Handle exceptions or die: */
4951     - if (!(error_code & PF_USER)) {
4952     + if (!(error_code & X86_PF_USER)) {
4953     no_context(regs, error_code, address,
4954     SIGSEGV, SEGV_MAPERR);
4955     return;
4956     @@ -1085,16 +1065,16 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
4957    
4958     static int spurious_fault_check(unsigned long error_code, pte_t *pte)
4959     {
4960     - if ((error_code & PF_WRITE) && !pte_write(*pte))
4961     + if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
4962     return 0;
4963    
4964     - if ((error_code & PF_INSTR) && !pte_exec(*pte))
4965     + if ((error_code & X86_PF_INSTR) && !pte_exec(*pte))
4966     return 0;
4967     /*
4968     * Note: We do not do lazy flushing on protection key
4969     - * changes, so no spurious fault will ever set PF_PK.
4970     + * changes, so no spurious fault will ever set X86_PF_PK.
4971     */
4972     - if ((error_code & PF_PK))
4973     + if ((error_code & X86_PF_PK))
4974     return 1;
4975    
4976     return 1;
4977     @@ -1140,8 +1120,8 @@ spurious_fault(unsigned long error_code, unsigned long address)
4978     * change, so user accesses are not expected to cause spurious
4979     * faults.
4980     */
4981     - if (error_code != (PF_WRITE | PF_PROT)
4982     - && error_code != (PF_INSTR | PF_PROT))
4983     + if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
4984     + error_code != (X86_PF_INSTR | X86_PF_PROT))
4985     return 0;
4986    
4987     pgd = init_mm.pgd + pgd_index(address);
4988     @@ -1201,19 +1181,19 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
4989     * always an unconditional error and can never result in
4990     * a follow-up action to resolve the fault, like a COW.
4991     */
4992     - if (error_code & PF_PK)
4993     + if (error_code & X86_PF_PK)
4994     return 1;
4995    
4996     /*
4997     * Make sure to check the VMA so that we do not perform
4998     - * faults just to hit a PF_PK as soon as we fill in a
4999     + * faults just to hit a X86_PF_PK as soon as we fill in a
5000     * page.
5001     */
5002     - if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
5003     - (error_code & PF_INSTR), foreign))
5004     + if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
5005     + (error_code & X86_PF_INSTR), foreign))
5006     return 1;
5007    
5008     - if (error_code & PF_WRITE) {
5009     + if (error_code & X86_PF_WRITE) {
5010     /* write, present and write, not present: */
5011     if (unlikely(!(vma->vm_flags & VM_WRITE)))
5012     return 1;
5013     @@ -1221,7 +1201,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
5014     }
5015    
5016     /* read, present: */
5017     - if (unlikely(error_code & PF_PROT))
5018     + if (unlikely(error_code & X86_PF_PROT))
5019     return 1;
5020    
5021     /* read, not present: */
5022     @@ -1244,7 +1224,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
5023     if (!static_cpu_has(X86_FEATURE_SMAP))
5024     return false;
5025    
5026     - if (error_code & PF_USER)
5027     + if (error_code & X86_PF_USER)
5028     return false;
5029    
5030     if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
5031     @@ -1297,7 +1277,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
5032     * protection error (error_code & 9) == 0.
5033     */
5034     if (unlikely(fault_in_kernel_space(address))) {
5035     - if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
5036     + if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
5037     if (vmalloc_fault(address) >= 0)
5038     return;
5039    
5040     @@ -1325,7 +1305,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
5041     if (unlikely(kprobes_fault(regs)))
5042     return;
5043    
5044     - if (unlikely(error_code & PF_RSVD))
5045     + if (unlikely(error_code & X86_PF_RSVD))
5046     pgtable_bad(regs, error_code, address);
5047    
5048     if (unlikely(smap_violation(error_code, regs))) {
5049     @@ -1351,7 +1331,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
5050     */
5051     if (user_mode(regs)) {
5052     local_irq_enable();
5053     - error_code |= PF_USER;
5054     + error_code |= X86_PF_USER;
5055     flags |= FAULT_FLAG_USER;
5056     } else {
5057     if (regs->flags & X86_EFLAGS_IF)
5058     @@ -1360,9 +1340,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
5059    
5060     perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
5061    
5062     - if (error_code & PF_WRITE)
5063     + if (error_code & X86_PF_WRITE)
5064     flags |= FAULT_FLAG_WRITE;
5065     - if (error_code & PF_INSTR)
5066     + if (error_code & X86_PF_INSTR)
5067     flags |= FAULT_FLAG_INSTRUCTION;
5068    
5069     /*
5070     @@ -1382,7 +1362,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
5071     * space check, thus avoiding the deadlock:
5072     */
5073     if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
5074     - if ((error_code & PF_USER) == 0 &&
5075     + if (!(error_code & X86_PF_USER) &&
5076     !search_exception_tables(regs->ip)) {
5077     bad_area_nosemaphore(regs, error_code, address, NULL);
5078     return;
5079     @@ -1409,7 +1389,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
5080     bad_area(regs, error_code, address);
5081     return;
5082     }
5083     - if (error_code & PF_USER) {
5084     + if (error_code & X86_PF_USER) {
5085     /*
5086     * Accessing the stack below %sp is always a bug.
5087     * The large cushion allows instructions like enter
5088     diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
5089     index af5c1ed21d43..a22c2b95e513 100644
5090     --- a/arch/x86/mm/init.c
5091     +++ b/arch/x86/mm/init.c
5092     @@ -671,7 +671,7 @@ void __init init_mem_mapping(void)
5093     load_cr3(swapper_pg_dir);
5094     __flush_tlb_all();
5095    
5096     - hypervisor_init_mem_mapping();
5097     + x86_init.hyper.init_mem_mapping();
5098    
5099     early_memtest(0, max_pfn_mapped << PAGE_SHIFT);
5100     }
5101     diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
5102     index 048fbe8fc274..adcea90a2046 100644
5103     --- a/arch/x86/mm/init_64.c
5104     +++ b/arch/x86/mm/init_64.c
5105     @@ -1426,16 +1426,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
5106    
5107     #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
5108     void register_page_bootmem_memmap(unsigned long section_nr,
5109     - struct page *start_page, unsigned long size)
5110     + struct page *start_page, unsigned long nr_pages)
5111     {
5112     unsigned long addr = (unsigned long)start_page;
5113     - unsigned long end = (unsigned long)(start_page + size);
5114     + unsigned long end = (unsigned long)(start_page + nr_pages);
5115     unsigned long next;
5116     pgd_t *pgd;
5117     p4d_t *p4d;
5118     pud_t *pud;
5119     pmd_t *pmd;
5120     - unsigned int nr_pages;
5121     + unsigned int nr_pmd_pages;
5122     struct page *page;
5123    
5124     for (; addr < end; addr = next) {
5125     @@ -1482,9 +1482,9 @@ void register_page_bootmem_memmap(unsigned long section_nr,
5126     if (pmd_none(*pmd))
5127     continue;
5128    
5129     - nr_pages = 1 << (get_order(PMD_SIZE));
5130     + nr_pmd_pages = 1 << get_order(PMD_SIZE);
5131     page = pmd_page(*pmd);
5132     - while (nr_pages--)
5133     + while (nr_pmd_pages--)
5134     get_page_bootmem(section_nr, page++,
5135     SECTION_INFO);
5136     }
5137     diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
5138     index 8f5be3eb40dd..9ec70d780f1f 100644
5139     --- a/arch/x86/mm/kasan_init_64.c
5140     +++ b/arch/x86/mm/kasan_init_64.c
5141     @@ -4,19 +4,150 @@
5142     #include <linux/bootmem.h>
5143     #include <linux/kasan.h>
5144     #include <linux/kdebug.h>
5145     +#include <linux/memblock.h>
5146     #include <linux/mm.h>
5147     #include <linux/sched.h>
5148     #include <linux/sched/task.h>
5149     #include <linux/vmalloc.h>
5150    
5151     #include <asm/e820/types.h>
5152     +#include <asm/pgalloc.h>
5153     #include <asm/tlbflush.h>
5154     #include <asm/sections.h>
5155     #include <asm/pgtable.h>
5156    
5157     extern struct range pfn_mapped[E820_MAX_ENTRIES];
5158    
5159     -static int __init map_range(struct range *range)
5160     +static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
5161     +
5162     +static __init void *early_alloc(size_t size, int nid)
5163     +{
5164     + return memblock_virt_alloc_try_nid_nopanic(size, size,
5165     + __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
5166     +}
5167     +
5168     +static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
5169     + unsigned long end, int nid)
5170     +{
5171     + pte_t *pte;
5172     +
5173     + if (pmd_none(*pmd)) {
5174     + void *p;
5175     +
5176     + if (boot_cpu_has(X86_FEATURE_PSE) &&
5177     + ((end - addr) == PMD_SIZE) &&
5178     + IS_ALIGNED(addr, PMD_SIZE)) {
5179     + p = early_alloc(PMD_SIZE, nid);
5180     + if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL))
5181     + return;
5182     + else if (p)
5183     + memblock_free(__pa(p), PMD_SIZE);
5184     + }
5185     +
5186     + p = early_alloc(PAGE_SIZE, nid);
5187     + pmd_populate_kernel(&init_mm, pmd, p);
5188     + }
5189     +
5190     + pte = pte_offset_kernel(pmd, addr);
5191     + do {
5192     + pte_t entry;
5193     + void *p;
5194     +
5195     + if (!pte_none(*pte))
5196     + continue;
5197     +
5198     + p = early_alloc(PAGE_SIZE, nid);
5199     + entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL);
5200     + set_pte_at(&init_mm, addr, pte, entry);
5201     + } while (pte++, addr += PAGE_SIZE, addr != end);
5202     +}
5203     +
5204     +static void __init kasan_populate_pud(pud_t *pud, unsigned long addr,
5205     + unsigned long end, int nid)
5206     +{
5207     + pmd_t *pmd;
5208     + unsigned long next;
5209     +
5210     + if (pud_none(*pud)) {
5211     + void *p;
5212     +
5213     + if (boot_cpu_has(X86_FEATURE_GBPAGES) &&
5214     + ((end - addr) == PUD_SIZE) &&
5215     + IS_ALIGNED(addr, PUD_SIZE)) {
5216     + p = early_alloc(PUD_SIZE, nid);
5217     + if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL))
5218     + return;
5219     + else if (p)
5220     + memblock_free(__pa(p), PUD_SIZE);
5221     + }
5222     +
5223     + p = early_alloc(PAGE_SIZE, nid);
5224     + pud_populate(&init_mm, pud, p);
5225     + }
5226     +
5227     + pmd = pmd_offset(pud, addr);
5228     + do {
5229     + next = pmd_addr_end(addr, end);
5230     + if (!pmd_large(*pmd))
5231     + kasan_populate_pmd(pmd, addr, next, nid);
5232     + } while (pmd++, addr = next, addr != end);
5233     +}
5234     +
5235     +static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
5236     + unsigned long end, int nid)
5237     +{
5238     + pud_t *pud;
5239     + unsigned long next;
5240     +
5241     + if (p4d_none(*p4d)) {
5242     + void *p = early_alloc(PAGE_SIZE, nid);
5243     +
5244     + p4d_populate(&init_mm, p4d, p);
5245     + }
5246     +
5247     + pud = pud_offset(p4d, addr);
5248     + do {
5249     + next = pud_addr_end(addr, end);
5250     + if (!pud_large(*pud))
5251     + kasan_populate_pud(pud, addr, next, nid);
5252     + } while (pud++, addr = next, addr != end);
5253     +}
5254     +
5255     +static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr,
5256     + unsigned long end, int nid)
5257     +{
5258     + void *p;
5259     + p4d_t *p4d;
5260     + unsigned long next;
5261     +
5262     + if (pgd_none(*pgd)) {
5263     + p = early_alloc(PAGE_SIZE, nid);
5264     + pgd_populate(&init_mm, pgd, p);
5265     + }
5266     +
5267     + p4d = p4d_offset(pgd, addr);
5268     + do {
5269     + next = p4d_addr_end(addr, end);
5270     + kasan_populate_p4d(p4d, addr, next, nid);
5271     + } while (p4d++, addr = next, addr != end);
5272     +}
5273     +
5274     +static void __init kasan_populate_shadow(unsigned long addr, unsigned long end,
5275     + int nid)
5276     +{
5277     + pgd_t *pgd;
5278     + unsigned long next;
5279     +
5280     + addr = addr & PAGE_MASK;
5281     + end = round_up(end, PAGE_SIZE);
5282     + pgd = pgd_offset_k(addr);
5283     + do {
5284     + next = pgd_addr_end(addr, end);
5285     + kasan_populate_pgd(pgd, addr, next, nid);
5286     + } while (pgd++, addr = next, addr != end);
5287     +}
5288     +
5289     +static void __init map_range(struct range *range)
5290     {
5291     unsigned long start;
5292     unsigned long end;
5293     @@ -24,15 +155,17 @@ static int __init map_range(struct range *range)
5294     start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start));
5295     end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end));
5296    
5297     - return vmemmap_populate(start, end, NUMA_NO_NODE);
5298     + kasan_populate_shadow(start, end, early_pfn_to_nid(range->start));
5299     }
5300    
5301     static void __init clear_pgds(unsigned long start,
5302     unsigned long end)
5303     {
5304     pgd_t *pgd;
5305     + /* See comment in kasan_init() */
5306     + unsigned long pgd_end = end & PGDIR_MASK;
5307    
5308     - for (; start < end; start += PGDIR_SIZE) {
5309     + for (; start < pgd_end; start += PGDIR_SIZE) {
5310     pgd = pgd_offset_k(start);
5311     /*
5312     * With folded p4d, pgd_clear() is nop, use p4d_clear()
5313     @@ -43,29 +176,61 @@ static void __init clear_pgds(unsigned long start,
5314     else
5315     pgd_clear(pgd);
5316     }
5317     +
5318     + pgd = pgd_offset_k(start);
5319     + for (; start < end; start += P4D_SIZE)
5320     + p4d_clear(p4d_offset(pgd, start));
5321     +}
5322     +
5323     +static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
5324     +{
5325     + unsigned long p4d;
5326     +
5327     + if (!IS_ENABLED(CONFIG_X86_5LEVEL))
5328     + return (p4d_t *)pgd;
5329     +
5330     + p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
5331     + p4d += __START_KERNEL_map - phys_base;
5332     + return (p4d_t *)p4d + p4d_index(addr);
5333     +}
5334     +
5335     +static void __init kasan_early_p4d_populate(pgd_t *pgd,
5336     + unsigned long addr,
5337     + unsigned long end)
5338     +{
5339     + pgd_t pgd_entry;
5340     + p4d_t *p4d, p4d_entry;
5341     + unsigned long next;
5342     +
5343     + if (pgd_none(*pgd)) {
5344     + pgd_entry = __pgd(_KERNPG_TABLE | __pa_nodebug(kasan_zero_p4d));
5345     + set_pgd(pgd, pgd_entry);
5346     + }
5347     +
5348     + p4d = early_p4d_offset(pgd, addr);
5349     + do {
5350     + next = p4d_addr_end(addr, end);
5351     +
5352     + if (!p4d_none(*p4d))
5353     + continue;
5354     +
5355     + p4d_entry = __p4d(_KERNPG_TABLE | __pa_nodebug(kasan_zero_pud));
5356     + set_p4d(p4d, p4d_entry);
5357     + } while (p4d++, addr = next, addr != end && p4d_none(*p4d));
5358     }
5359    
5360     static void __init kasan_map_early_shadow(pgd_t *pgd)
5361     {
5362     - int i;
5363     - unsigned long start = KASAN_SHADOW_START;
5364     + /* See comment in kasan_init() */
5365     + unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK;
5366     unsigned long end = KASAN_SHADOW_END;
5367     + unsigned long next;
5368    
5369     - for (i = pgd_index(start); start < end; i++) {
5370     - switch (CONFIG_PGTABLE_LEVELS) {
5371     - case 4:
5372     - pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) |
5373     - _KERNPG_TABLE);
5374     - break;
5375     - case 5:
5376     - pgd[i] = __pgd(__pa_nodebug(kasan_zero_p4d) |
5377     - _KERNPG_TABLE);
5378     - break;
5379     - default:
5380     - BUILD_BUG();
5381     - }
5382     - start += PGDIR_SIZE;
5383     - }
5384     + pgd += pgd_index(addr);
5385     + do {
5386     + next = pgd_addr_end(addr, end);
5387     + kasan_early_p4d_populate(pgd, addr, next);
5388     + } while (pgd++, addr = next, addr != end);
5389     }
5390    
5391     #ifdef CONFIG_KASAN_INLINE
5392     @@ -102,7 +267,7 @@ void __init kasan_early_init(void)
5393     for (i = 0; i < PTRS_PER_PUD; i++)
5394     kasan_zero_pud[i] = __pud(pud_val);
5395    
5396     - for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++)
5397     + for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++)
5398     kasan_zero_p4d[i] = __p4d(p4d_val);
5399    
5400     kasan_map_early_shadow(early_top_pgt);
5401     @@ -112,37 +277,76 @@ void __init kasan_early_init(void)
5402     void __init kasan_init(void)
5403     {
5404     int i;
5405     + void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
5406    
5407     #ifdef CONFIG_KASAN_INLINE
5408     register_die_notifier(&kasan_die_notifier);
5409     #endif
5410    
5411     memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
5412     +
5413     + /*
5414     + * We use the same shadow offset for 4- and 5-level paging to
5415     + * facilitate boot-time switching between paging modes.
5416     + * As result in 5-level paging mode KASAN_SHADOW_START and
5417     + * KASAN_SHADOW_END are not aligned to PGD boundary.
5418     + *
5419     + * KASAN_SHADOW_START doesn't share PGD with anything else.
5420     + * We claim whole PGD entry to make things easier.
5421     + *
5422     + * KASAN_SHADOW_END lands in the last PGD entry and it collides with
5423     + * bunch of things like kernel code, modules, EFI mapping, etc.
5424     + * We need to take extra steps to not overwrite them.
5425     + */
5426     + if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
5427     + void *ptr;
5428     +
5429     + ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
5430     + memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table));
5431     + set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)],
5432     + __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
5433     + }
5434     +
5435     load_cr3(early_top_pgt);
5436     __flush_tlb_all();
5437    
5438     - clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
5439     + clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END);
5440    
5441     - kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
5442     + kasan_populate_zero_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK),
5443     kasan_mem_to_shadow((void *)PAGE_OFFSET));
5444    
5445     for (i = 0; i < E820_MAX_ENTRIES; i++) {
5446     if (pfn_mapped[i].end == 0)
5447     break;
5448    
5449     - if (map_range(&pfn_mapped[i]))
5450     - panic("kasan: unable to allocate shadow!");
5451     + map_range(&pfn_mapped[i]);
5452     }
5453     +
5454     kasan_populate_zero_shadow(
5455     kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
5456     kasan_mem_to_shadow((void *)__START_KERNEL_map));
5457    
5458     - vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext),
5459     - (unsigned long)kasan_mem_to_shadow(_end),
5460     - NUMA_NO_NODE);
5461     + kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
5462     + (unsigned long)kasan_mem_to_shadow(_end),
5463     + early_pfn_to_nid(__pa(_stext)));
5464     +
5465     + shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM);
5466     + shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
5467     + shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
5468     + PAGE_SIZE);
5469     +
5470     + shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE);
5471     + shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
5472     + shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
5473     + PAGE_SIZE);
5474    
5475     kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
5476     - (void *)KASAN_SHADOW_END);
5477     + shadow_cpu_entry_begin);
5478     +
5479     + kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
5480     + (unsigned long)shadow_cpu_entry_end, 0);
5481     +
5482     + kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END);
5483    
5484     load_cr3(init_top_pgt);
5485     __flush_tlb_all();
5486     diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
5487     index 84fcfde53f8f..04d5157fe7f8 100644
5488     --- a/arch/x86/power/cpu.c
5489     +++ b/arch/x86/power/cpu.c
5490     @@ -160,17 +160,19 @@ static void do_fpu_end(void)
5491     static void fix_processor_context(void)
5492     {
5493     int cpu = smp_processor_id();
5494     - struct tss_struct *t = &per_cpu(cpu_tss, cpu);
5495     #ifdef CONFIG_X86_64
5496     struct desc_struct *desc = get_cpu_gdt_rw(cpu);
5497     tss_desc tss;
5498     #endif
5499     - set_tss_desc(cpu, t); /*
5500     - * This just modifies memory; should not be
5501     - * necessary. But... This is necessary, because
5502     - * 386 hardware has concept of busy TSS or some
5503     - * similar stupidity.
5504     - */
5505     +
5506     + /*
5507     + * We need to reload TR, which requires that we change the
5508     + * GDT entry to indicate "available" first.
5509     + *
5510     + * XXX: This could probably all be replaced by a call to
5511     + * force_reload_TR().
5512     + */
5513     + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
5514    
5515     #ifdef CONFIG_X86_64
5516     memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
5517     diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
5518     index de503c225ae1..754d5391d9fa 100644
5519     --- a/arch/x86/xen/enlighten_hvm.c
5520     +++ b/arch/x86/xen/enlighten_hvm.c
5521     @@ -226,12 +226,12 @@ static uint32_t __init xen_platform_hvm(void)
5522     return xen_cpuid_base();
5523     }
5524    
5525     -const struct hypervisor_x86 x86_hyper_xen_hvm = {
5526     +const __initconst struct hypervisor_x86 x86_hyper_xen_hvm = {
5527     .name = "Xen HVM",
5528     .detect = xen_platform_hvm,
5529     - .init_platform = xen_hvm_guest_init,
5530     - .pin_vcpu = xen_pin_vcpu,
5531     - .x2apic_available = xen_x2apic_para_available,
5532     - .init_mem_mapping = xen_hvm_init_mem_mapping,
5533     + .type = X86_HYPER_XEN_HVM,
5534     + .init.init_platform = xen_hvm_guest_init,
5535     + .init.x2apic_available = xen_x2apic_para_available,
5536     + .init.init_mem_mapping = xen_hvm_init_mem_mapping,
5537     + .runtime.pin_vcpu = xen_pin_vcpu,
5538     };
5539     -EXPORT_SYMBOL(x86_hyper_xen_hvm);
5540     diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
5541     index d4396e27b1fb..ae3a071e1d0f 100644
5542     --- a/arch/x86/xen/enlighten_pv.c
5543     +++ b/arch/x86/xen/enlighten_pv.c
5544     @@ -601,7 +601,7 @@ static struct trap_array_entry trap_array[] = {
5545     #ifdef CONFIG_X86_MCE
5546     { machine_check, xen_machine_check, true },
5547     #endif
5548     - { nmi, xen_nmi, true },
5549     + { nmi, xen_xennmi, true },
5550     { overflow, xen_overflow, false },
5551     #ifdef CONFIG_IA32_EMULATION
5552     { entry_INT80_compat, xen_entry_INT80_compat, false },
5553     @@ -811,15 +811,14 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
5554     }
5555     }
5556    
5557     -static void xen_load_sp0(struct tss_struct *tss,
5558     - struct thread_struct *thread)
5559     +static void xen_load_sp0(unsigned long sp0)
5560     {
5561     struct multicall_space mcs;
5562    
5563     mcs = xen_mc_entry(0);
5564     - MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
5565     + MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
5566     xen_mc_issue(PARAVIRT_LAZY_CPU);
5567     - tss->x86_tss.sp0 = thread->sp0;
5568     + this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
5569     }
5570    
5571     void xen_set_iopl_mask(unsigned mask)
5572     @@ -1460,9 +1459,9 @@ static uint32_t __init xen_platform_pv(void)
5573     return 0;
5574     }
5575    
5576     -const struct hypervisor_x86 x86_hyper_xen_pv = {
5577     +const __initconst struct hypervisor_x86 x86_hyper_xen_pv = {
5578     .name = "Xen PV",
5579     .detect = xen_platform_pv,
5580     - .pin_vcpu = xen_pin_vcpu,
5581     + .type = X86_HYPER_XEN_PV,
5582     + .runtime.pin_vcpu = xen_pin_vcpu,
5583     };
5584     -EXPORT_SYMBOL(x86_hyper_xen_pv);
5585     diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
5586     index 71495f1a86d7..c2454237fa67 100644
5587     --- a/arch/x86/xen/mmu_pv.c
5588     +++ b/arch/x86/xen/mmu_pv.c
5589     @@ -449,7 +449,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd)
5590     }
5591     PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
5592    
5593     -#if CONFIG_PGTABLE_LEVELS == 4
5594     +#ifdef CONFIG_X86_64
5595     __visible pudval_t xen_pud_val(pud_t pud)
5596     {
5597     return pte_mfn_to_pfn(pud.pud);
5598     @@ -538,7 +538,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val)
5599    
5600     xen_mc_issue(PARAVIRT_LAZY_MMU);
5601     }
5602     -#endif /* CONFIG_PGTABLE_LEVELS == 4 */
5603     +#endif /* CONFIG_X86_64 */
5604    
5605     static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
5606     int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
5607     @@ -580,21 +580,17 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
5608     int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
5609     bool last, unsigned long limit)
5610     {
5611     - int i, nr, flush = 0;
5612     + int flush = 0;
5613     + pud_t *pud;
5614    
5615     - nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
5616     - for (i = 0; i < nr; i++) {
5617     - pud_t *pud;
5618    
5619     - if (p4d_none(p4d[i]))
5620     - continue;
5621     + if (p4d_none(*p4d))
5622     + return flush;
5623    
5624     - pud = pud_offset(&p4d[i], 0);
5625     - if (PTRS_PER_PUD > 1)
5626     - flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
5627     - flush |= xen_pud_walk(mm, pud, func,
5628     - last && i == nr - 1, limit);
5629     - }
5630     + pud = pud_offset(p4d, 0);
5631     + if (PTRS_PER_PUD > 1)
5632     + flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
5633     + flush |= xen_pud_walk(mm, pud, func, last, limit);
5634     return flush;
5635     }
5636    
5637     @@ -644,8 +640,6 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
5638     continue;
5639    
5640     p4d = p4d_offset(&pgd[i], 0);
5641     - if (PTRS_PER_P4D > 1)
5642     - flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
5643     flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
5644     }
5645    
5646     @@ -1176,22 +1170,14 @@ static void __init xen_cleanmfnmap(unsigned long vaddr)
5647     {
5648     pgd_t *pgd;
5649     p4d_t *p4d;
5650     - unsigned int i;
5651     bool unpin;
5652    
5653     unpin = (vaddr == 2 * PGDIR_SIZE);
5654     vaddr &= PMD_MASK;
5655     pgd = pgd_offset_k(vaddr);
5656     p4d = p4d_offset(pgd, 0);
5657     - for (i = 0; i < PTRS_PER_P4D; i++) {
5658     - if (p4d_none(p4d[i]))
5659     - continue;
5660     - xen_cleanmfnmap_p4d(p4d + i, unpin);
5661     - }
5662     - if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
5663     - set_pgd(pgd, __pgd(0));
5664     - xen_cleanmfnmap_free_pgtbl(p4d, unpin);
5665     - }
5666     + if (!p4d_none(*p4d))
5667     + xen_cleanmfnmap_p4d(p4d, unpin);
5668     }
5669    
5670     static void __init xen_pagetable_p2m_free(void)
5671     @@ -1692,7 +1678,7 @@ static void xen_release_pmd(unsigned long pfn)
5672     xen_release_ptpage(pfn, PT_PMD);
5673     }
5674    
5675     -#if CONFIG_PGTABLE_LEVELS >= 4
5676     +#ifdef CONFIG_X86_64
5677     static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
5678     {
5679     xen_alloc_ptpage(mm, pfn, PT_PUD);
5680     @@ -2029,13 +2015,12 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
5681     */
5682     void __init xen_relocate_p2m(void)
5683     {
5684     - phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys;
5685     + phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
5686     unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
5687     - int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d;
5688     + int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
5689     pte_t *pt;
5690     pmd_t *pmd;
5691     pud_t *pud;
5692     - p4d_t *p4d = NULL;
5693     pgd_t *pgd;
5694     unsigned long *new_p2m;
5695     int save_pud;
5696     @@ -2045,11 +2030,7 @@ void __init xen_relocate_p2m(void)
5697     n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
5698     n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
5699     n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT;
5700     - if (PTRS_PER_P4D > 1)
5701     - n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
5702     - else
5703     - n_p4d = 0;
5704     - n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d;
5705     + n_frames = n_pte + n_pt + n_pmd + n_pud;
5706    
5707     new_area = xen_find_free_area(PFN_PHYS(n_frames));
5708     if (!new_area) {
5709     @@ -2065,76 +2046,56 @@ void __init xen_relocate_p2m(void)
5710     * To avoid any possible virtual address collision, just use
5711     * 2 * PUD_SIZE for the new area.
5712     */
5713     - p4d_phys = new_area;
5714     - pud_phys = p4d_phys + PFN_PHYS(n_p4d);
5715     + pud_phys = new_area;
5716     pmd_phys = pud_phys + PFN_PHYS(n_pud);
5717     pt_phys = pmd_phys + PFN_PHYS(n_pmd);
5718     p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
5719    
5720     pgd = __va(read_cr3_pa());
5721     new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
5722     - idx_p4d = 0;
5723     save_pud = n_pud;
5724     - do {
5725     - if (n_p4d > 0) {
5726     - p4d = early_memremap(p4d_phys, PAGE_SIZE);
5727     - clear_page(p4d);
5728     - n_pud = min(save_pud, PTRS_PER_P4D);
5729     - }
5730     - for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
5731     - pud = early_memremap(pud_phys, PAGE_SIZE);
5732     - clear_page(pud);
5733     - for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
5734     - idx_pmd++) {
5735     - pmd = early_memremap(pmd_phys, PAGE_SIZE);
5736     - clear_page(pmd);
5737     - for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
5738     - idx_pt++) {
5739     - pt = early_memremap(pt_phys, PAGE_SIZE);
5740     - clear_page(pt);
5741     - for (idx_pte = 0;
5742     - idx_pte < min(n_pte, PTRS_PER_PTE);
5743     - idx_pte++) {
5744     - set_pte(pt + idx_pte,
5745     - pfn_pte(p2m_pfn, PAGE_KERNEL));
5746     - p2m_pfn++;
5747     - }
5748     - n_pte -= PTRS_PER_PTE;
5749     - early_memunmap(pt, PAGE_SIZE);
5750     - make_lowmem_page_readonly(__va(pt_phys));
5751     - pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
5752     - PFN_DOWN(pt_phys));
5753     - set_pmd(pmd + idx_pt,
5754     - __pmd(_PAGE_TABLE | pt_phys));
5755     - pt_phys += PAGE_SIZE;
5756     + for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
5757     + pud = early_memremap(pud_phys, PAGE_SIZE);
5758     + clear_page(pud);
5759     + for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
5760     + idx_pmd++) {
5761     + pmd = early_memremap(pmd_phys, PAGE_SIZE);
5762     + clear_page(pmd);
5763     + for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
5764     + idx_pt++) {
5765     + pt = early_memremap(pt_phys, PAGE_SIZE);
5766     + clear_page(pt);
5767     + for (idx_pte = 0;
5768     + idx_pte < min(n_pte, PTRS_PER_PTE);
5769     + idx_pte++) {
5770     + set_pte(pt + idx_pte,
5771     + pfn_pte(p2m_pfn, PAGE_KERNEL));
5772     + p2m_pfn++;
5773     }
5774     - n_pt -= PTRS_PER_PMD;
5775     - early_memunmap(pmd, PAGE_SIZE);
5776     - make_lowmem_page_readonly(__va(pmd_phys));
5777     - pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
5778     - PFN_DOWN(pmd_phys));
5779     - set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
5780     - pmd_phys += PAGE_SIZE;
5781     + n_pte -= PTRS_PER_PTE;
5782     + early_memunmap(pt, PAGE_SIZE);
5783     + make_lowmem_page_readonly(__va(pt_phys));
5784     + pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
5785     + PFN_DOWN(pt_phys));
5786     + set_pmd(pmd + idx_pt,
5787     + __pmd(_PAGE_TABLE | pt_phys));
5788     + pt_phys += PAGE_SIZE;
5789     }
5790     - n_pmd -= PTRS_PER_PUD;
5791     - early_memunmap(pud, PAGE_SIZE);
5792     - make_lowmem_page_readonly(__va(pud_phys));
5793     - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
5794     - if (n_p4d > 0)
5795     - set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys));
5796     - else
5797     - set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
5798     - pud_phys += PAGE_SIZE;
5799     - }
5800     - if (n_p4d > 0) {
5801     - save_pud -= PTRS_PER_P4D;
5802     - early_memunmap(p4d, PAGE_SIZE);
5803     - make_lowmem_page_readonly(__va(p4d_phys));
5804     - pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys));
5805     - set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys));
5806     - p4d_phys += PAGE_SIZE;
5807     + n_pt -= PTRS_PER_PMD;
5808     + early_memunmap(pmd, PAGE_SIZE);
5809     + make_lowmem_page_readonly(__va(pmd_phys));
5810     + pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
5811     + PFN_DOWN(pmd_phys));
5812     + set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
5813     + pmd_phys += PAGE_SIZE;
5814     }
5815     - } while (++idx_p4d < n_p4d);
5816     + n_pmd -= PTRS_PER_PUD;
5817     + early_memunmap(pud, PAGE_SIZE);
5818     + make_lowmem_page_readonly(__va(pud_phys));
5819     + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
5820     + set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
5821     + pud_phys += PAGE_SIZE;
5822     + }
5823    
5824     /* Now copy the old p2m info to the new area. */
5825     memcpy(new_p2m, xen_p2m_addr, size);
5826     @@ -2311,7 +2272,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
5827     #endif
5828     case FIX_TEXT_POKE0:
5829     case FIX_TEXT_POKE1:
5830     - case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
5831     + case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM:
5832     /* All local page mappings */
5833     pte = pfn_pte(phys, prot);
5834     break;
5835     @@ -2361,7 +2322,7 @@ static void __init xen_post_allocator_init(void)
5836     pv_mmu_ops.set_pte = xen_set_pte;
5837     pv_mmu_ops.set_pmd = xen_set_pmd;
5838     pv_mmu_ops.set_pud = xen_set_pud;
5839     -#if CONFIG_PGTABLE_LEVELS >= 4
5840     +#ifdef CONFIG_X86_64
5841     pv_mmu_ops.set_p4d = xen_set_p4d;
5842     #endif
5843    
5844     @@ -2371,7 +2332,7 @@ static void __init xen_post_allocator_init(void)
5845     pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
5846     pv_mmu_ops.release_pte = xen_release_pte;
5847     pv_mmu_ops.release_pmd = xen_release_pmd;
5848     -#if CONFIG_PGTABLE_LEVELS >= 4
5849     +#ifdef CONFIG_X86_64
5850     pv_mmu_ops.alloc_pud = xen_alloc_pud;
5851     pv_mmu_ops.release_pud = xen_release_pud;
5852     #endif
5853     @@ -2435,14 +2396,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
5854     .make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
5855     .pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
5856    
5857     -#if CONFIG_PGTABLE_LEVELS >= 4
5858     +#ifdef CONFIG_X86_64
5859     .pud_val = PV_CALLEE_SAVE(xen_pud_val),
5860     .make_pud = PV_CALLEE_SAVE(xen_make_pud),
5861     .set_p4d = xen_set_p4d_hyper,
5862    
5863     .alloc_pud = xen_alloc_pmd_init,
5864     .release_pud = xen_release_pmd_init,
5865     -#endif /* CONFIG_PGTABLE_LEVELS == 4 */
5866     +#endif /* CONFIG_X86_64 */
5867    
5868     .activate_mm = xen_activate_mm,
5869     .dup_mmap = xen_dup_mmap,
5870     diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
5871     index 05f91ce9b55e..c0c756c76afe 100644
5872     --- a/arch/x86/xen/smp_pv.c
5873     +++ b/arch/x86/xen/smp_pv.c
5874     @@ -14,6 +14,7 @@
5875     * single-threaded.
5876     */
5877     #include <linux/sched.h>
5878     +#include <linux/sched/task_stack.h>
5879     #include <linux/err.h>
5880     #include <linux/slab.h>
5881     #include <linux/smp.h>
5882     @@ -294,12 +295,19 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
5883     #endif
5884     memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
5885    
5886     + /*
5887     + * Bring up the CPU in cpu_bringup_and_idle() with the stack
5888     + * pointing just below where pt_regs would be if it were a normal
5889     + * kernel entry.
5890     + */
5891     ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
5892     ctxt->flags = VGCF_IN_KERNEL;
5893     ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
5894     ctxt->user_regs.ds = __USER_DS;
5895     ctxt->user_regs.es = __USER_DS;
5896     ctxt->user_regs.ss = __KERNEL_DS;
5897     + ctxt->user_regs.cs = __KERNEL_CS;
5898     + ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle);
5899    
5900     xen_copy_trap_info(ctxt->trap_ctxt);
5901    
5902     @@ -314,8 +322,13 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
5903     ctxt->gdt_frames[0] = gdt_mfn;
5904     ctxt->gdt_ents = GDT_ENTRIES;
5905    
5906     + /*
5907     + * Set SS:SP that Xen will use when entering guest kernel mode
5908     + * from guest user mode. Subsequent calls to load_sp0() can
5909     + * change this value.
5910     + */
5911     ctxt->kernel_ss = __KERNEL_DS;
5912     - ctxt->kernel_sp = idle->thread.sp0;
5913     + ctxt->kernel_sp = task_top_of_stack(idle);
5914    
5915     #ifdef CONFIG_X86_32
5916     ctxt->event_callback_cs = __KERNEL_CS;
5917     @@ -327,10 +340,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
5918     (unsigned long)xen_hypervisor_callback;
5919     ctxt->failsafe_callback_eip =
5920     (unsigned long)xen_failsafe_callback;
5921     - ctxt->user_regs.cs = __KERNEL_CS;
5922     per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
5923    
5924     - ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
5925     ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
5926     if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
5927     BUG();
5928     diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
5929     index c98a48c861fd..8a10c9a9e2b5 100644
5930     --- a/arch/x86/xen/xen-asm_64.S
5931     +++ b/arch/x86/xen/xen-asm_64.S
5932     @@ -30,7 +30,7 @@ xen_pv_trap debug
5933     xen_pv_trap xendebug
5934     xen_pv_trap int3
5935     xen_pv_trap xenint3
5936     -xen_pv_trap nmi
5937     +xen_pv_trap xennmi
5938     xen_pv_trap overflow
5939     xen_pv_trap bounds
5940     xen_pv_trap invalid_op
5941     diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
5942     index b5b8d7f43557..497cc55a0c16 100644
5943     --- a/arch/x86/xen/xen-head.S
5944     +++ b/arch/x86/xen/xen-head.S
5945     @@ -10,6 +10,7 @@
5946     #include <asm/boot.h>
5947     #include <asm/asm.h>
5948     #include <asm/page_types.h>
5949     +#include <asm/unwind_hints.h>
5950    
5951     #include <xen/interface/elfnote.h>
5952     #include <xen/interface/features.h>
5953     @@ -20,6 +21,7 @@
5954     #ifdef CONFIG_XEN_PV
5955     __INIT
5956     ENTRY(startup_xen)
5957     + UNWIND_HINT_EMPTY
5958     cld
5959    
5960     /* Clear .bss */
5961     @@ -34,21 +36,24 @@ ENTRY(startup_xen)
5962     mov $init_thread_union+THREAD_SIZE, %_ASM_SP
5963    
5964     jmp xen_start_kernel
5965     -
5966     +END(startup_xen)
5967     __FINIT
5968     #endif
5969    
5970     .pushsection .text
5971     .balign PAGE_SIZE
5972     ENTRY(hypercall_page)
5973     - .skip PAGE_SIZE
5974     + .rept (PAGE_SIZE / 32)
5975     + UNWIND_HINT_EMPTY
5976     + .skip 32
5977     + .endr
5978    
5979     #define HYPERCALL(n) \
5980     .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
5981     .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
5982     #include <asm/xen-hypercalls.h>
5983     #undef HYPERCALL
5984     -
5985     +END(hypercall_page)
5986     .popsection
5987    
5988     ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
5989     diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
5990     index a4783da90ba8..0f860cf0d56d 100644
5991     --- a/block/bfq-iosched.c
5992     +++ b/block/bfq-iosched.c
5993     @@ -108,6 +108,7 @@
5994     #include "blk-mq-tag.h"
5995     #include "blk-mq-sched.h"
5996     #include "bfq-iosched.h"
5997     +#include "blk-wbt.h"
5998    
5999     #define BFQ_BFQQ_FNS(name) \
6000     void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \
6001     @@ -4775,7 +4776,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
6002     bfq_init_root_group(bfqd->root_group, bfqd);
6003     bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
6004    
6005     -
6006     + wbt_disable_default(q);
6007     return 0;
6008    
6009     out_free:
6010     diff --git a/block/blk-wbt.c b/block/blk-wbt.c
6011     index 6a9a0f03a67b..e59d59c11ebb 100644
6012     --- a/block/blk-wbt.c
6013     +++ b/block/blk-wbt.c
6014     @@ -654,7 +654,7 @@ void wbt_set_write_cache(struct rq_wb *rwb, bool write_cache_on)
6015     }
6016    
6017     /*
6018     - * Disable wbt, if enabled by default. Only called from CFQ.
6019     + * Disable wbt, if enabled by default.
6020     */
6021     void wbt_disable_default(struct request_queue *q)
6022     {
6023     diff --git a/crypto/lrw.c b/crypto/lrw.c
6024     index a8bfae4451bf..eb681e9fe574 100644
6025     --- a/crypto/lrw.c
6026     +++ b/crypto/lrw.c
6027     @@ -610,8 +610,10 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
6028     ecb_name[len - 1] = 0;
6029    
6030     if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
6031     - "lrw(%s)", ecb_name) >= CRYPTO_MAX_ALG_NAME)
6032     - return -ENAMETOOLONG;
6033     + "lrw(%s)", ecb_name) >= CRYPTO_MAX_ALG_NAME) {
6034     + err = -ENAMETOOLONG;
6035     + goto err_drop_spawn;
6036     + }
6037     }
6038    
6039     inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
6040     diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
6041     index 3c3a37b8503b..572b6c7303ed 100644
6042     --- a/drivers/acpi/apei/ghes.c
6043     +++ b/drivers/acpi/apei/ghes.c
6044     @@ -51,6 +51,7 @@
6045     #include <acpi/actbl1.h>
6046     #include <acpi/ghes.h>
6047     #include <acpi/apei.h>
6048     +#include <asm/fixmap.h>
6049     #include <asm/tlbflush.h>
6050     #include <ras/ras_event.h>
6051    
6052     @@ -112,7 +113,7 @@ static DEFINE_MUTEX(ghes_list_mutex);
6053     * Because the memory area used to transfer hardware error information
6054     * from BIOS to Linux can be determined only in NMI, IRQ or timer
6055     * handler, but general ioremap can not be used in atomic context, so
6056     - * a special version of atomic ioremap is implemented for that.
6057     + * the fixmap is used instead.
6058     */
6059    
6060     /*
6061     @@ -126,8 +127,8 @@ static DEFINE_MUTEX(ghes_list_mutex);
6062     /* virtual memory area for atomic ioremap */
6063     static struct vm_struct *ghes_ioremap_area;
6064     /*
6065     - * These 2 spinlock is used to prevent atomic ioremap virtual memory
6066     - * area from being mapped simultaneously.
6067     + * These 2 spinlocks are used to prevent the fixmap entries from being used
6068     + * simultaneously.
6069     */
6070     static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
6071     static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
6072     @@ -159,52 +160,36 @@ static void ghes_ioremap_exit(void)
6073    
6074     static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
6075     {
6076     - unsigned long vaddr;
6077     phys_addr_t paddr;
6078     pgprot_t prot;
6079    
6080     - vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr);
6081     -
6082     paddr = pfn << PAGE_SHIFT;
6083     prot = arch_apei_get_mem_attribute(paddr);
6084     - ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot);
6085     + __set_fixmap(FIX_APEI_GHES_NMI, paddr, prot);
6086    
6087     - return (void __iomem *)vaddr;
6088     + return (void __iomem *) fix_to_virt(FIX_APEI_GHES_NMI);
6089     }
6090    
6091     static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
6092     {
6093     - unsigned long vaddr, paddr;
6094     + phys_addr_t paddr;
6095     pgprot_t prot;
6096    
6097     - vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr);
6098     -
6099     paddr = pfn << PAGE_SHIFT;
6100     prot = arch_apei_get_mem_attribute(paddr);
6101     + __set_fixmap(FIX_APEI_GHES_IRQ, paddr, prot);
6102    
6103     - ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot);
6104     -
6105     - return (void __iomem *)vaddr;
6106     + return (void __iomem *) fix_to_virt(FIX_APEI_GHES_IRQ);
6107     }
6108    
6109     -static void ghes_iounmap_nmi(void __iomem *vaddr_ptr)
6110     +static void ghes_iounmap_nmi(void)
6111     {
6112     - unsigned long vaddr = (unsigned long __force)vaddr_ptr;
6113     - void *base = ghes_ioremap_area->addr;
6114     -
6115     - BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base));
6116     - unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
6117     - arch_apei_flush_tlb_one(vaddr);
6118     + clear_fixmap(FIX_APEI_GHES_NMI);
6119     }
6120    
6121     -static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
6122     +static void ghes_iounmap_irq(void)
6123     {
6124     - unsigned long vaddr = (unsigned long __force)vaddr_ptr;
6125     - void *base = ghes_ioremap_area->addr;
6126     -
6127     - BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base));
6128     - unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
6129     - arch_apei_flush_tlb_one(vaddr);
6130     + clear_fixmap(FIX_APEI_GHES_IRQ);
6131     }
6132    
6133     static int ghes_estatus_pool_init(void)
6134     @@ -360,10 +345,10 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
6135     paddr += trunk;
6136     buffer += trunk;
6137     if (in_nmi) {
6138     - ghes_iounmap_nmi(vaddr);
6139     + ghes_iounmap_nmi();
6140     raw_spin_unlock(&ghes_ioremap_lock_nmi);
6141     } else {
6142     - ghes_iounmap_irq(vaddr);
6143     + ghes_iounmap_irq();
6144     spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
6145     }
6146     }
6147     @@ -851,17 +836,8 @@ static void ghes_sea_remove(struct ghes *ghes)
6148     synchronize_rcu();
6149     }
6150     #else /* CONFIG_ACPI_APEI_SEA */
6151     -static inline void ghes_sea_add(struct ghes *ghes)
6152     -{
6153     - pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not supported\n",
6154     - ghes->generic->header.source_id);
6155     -}
6156     -
6157     -static inline void ghes_sea_remove(struct ghes *ghes)
6158     -{
6159     - pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not supported\n",
6160     - ghes->generic->header.source_id);
6161     -}
6162     +static inline void ghes_sea_add(struct ghes *ghes) { }
6163     +static inline void ghes_sea_remove(struct ghes *ghes) { }
6164     #endif /* CONFIG_ACPI_APEI_SEA */
6165    
6166     #ifdef CONFIG_HAVE_ACPI_APEI_NMI
6167     @@ -1063,23 +1039,9 @@ static void ghes_nmi_init_cxt(void)
6168     init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
6169     }
6170     #else /* CONFIG_HAVE_ACPI_APEI_NMI */
6171     -static inline void ghes_nmi_add(struct ghes *ghes)
6172     -{
6173     - pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n",
6174     - ghes->generic->header.source_id);
6175     - BUG();
6176     -}
6177     -
6178     -static inline void ghes_nmi_remove(struct ghes *ghes)
6179     -{
6180     - pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n",
6181     - ghes->generic->header.source_id);
6182     - BUG();
6183     -}
6184     -
6185     -static inline void ghes_nmi_init_cxt(void)
6186     -{
6187     -}
6188     +static inline void ghes_nmi_add(struct ghes *ghes) { }
6189     +static inline void ghes_nmi_remove(struct ghes *ghes) { }
6190     +static inline void ghes_nmi_init_cxt(void) { }
6191     #endif /* CONFIG_HAVE_ACPI_APEI_NMI */
6192    
6193     static int ghes_probe(struct platform_device *ghes_dev)
6194     diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
6195     index a6de32530693..0459b1204694 100644
6196     --- a/drivers/base/power/opp/core.c
6197     +++ b/drivers/base/power/opp/core.c
6198     @@ -296,7 +296,7 @@ int dev_pm_opp_get_opp_count(struct device *dev)
6199     opp_table = _find_opp_table(dev);
6200     if (IS_ERR(opp_table)) {
6201     count = PTR_ERR(opp_table);
6202     - dev_err(dev, "%s: OPP table not found (%d)\n",
6203     + dev_dbg(dev, "%s: OPP table not found (%d)\n",
6204     __func__, count);
6205     return count;
6206     }
6207     diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
6208     index e2540113d0da..73d2d88ddc03 100644
6209     --- a/drivers/bluetooth/hci_bcm.c
6210     +++ b/drivers/bluetooth/hci_bcm.c
6211     @@ -68,7 +68,7 @@ struct bcm_device {
6212     u32 init_speed;
6213     u32 oper_speed;
6214     int irq;
6215     - u8 irq_polarity;
6216     + bool irq_active_low;
6217    
6218     #ifdef CONFIG_PM
6219     struct hci_uart *hu;
6220     @@ -213,7 +213,9 @@ static int bcm_request_irq(struct bcm_data *bcm)
6221     }
6222    
6223     err = devm_request_irq(&bdev->pdev->dev, bdev->irq, bcm_host_wake,
6224     - IRQF_TRIGGER_RISING, "host_wake", bdev);
6225     + bdev->irq_active_low ? IRQF_TRIGGER_FALLING :
6226     + IRQF_TRIGGER_RISING,
6227     + "host_wake", bdev);
6228     if (err)
6229     goto unlock;
6230    
6231     @@ -253,7 +255,7 @@ static int bcm_setup_sleep(struct hci_uart *hu)
6232     struct sk_buff *skb;
6233     struct bcm_set_sleep_mode sleep_params = default_sleep_params;
6234    
6235     - sleep_params.host_wake_active = !bcm->dev->irq_polarity;
6236     + sleep_params.host_wake_active = !bcm->dev->irq_active_low;
6237    
6238     skb = __hci_cmd_sync(hu->hdev, 0xfc27, sizeof(sleep_params),
6239     &sleep_params, HCI_INIT_TIMEOUT);
6240     @@ -690,10 +692,8 @@ static const struct acpi_gpio_mapping acpi_bcm_int_first_gpios[] = {
6241     };
6242    
6243     #ifdef CONFIG_ACPI
6244     -static u8 acpi_active_low = ACPI_ACTIVE_LOW;
6245     -
6246     /* IRQ polarity of some chipsets are not defined correctly in ACPI table. */
6247     -static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = {
6248     +static const struct dmi_system_id bcm_active_low_irq_dmi_table[] = {
6249     {
6250     .ident = "Asus T100TA",
6251     .matches = {
6252     @@ -701,7 +701,6 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = {
6253     "ASUSTeK COMPUTER INC."),
6254     DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TA"),
6255     },
6256     - .driver_data = &acpi_active_low,
6257     },
6258     {
6259     .ident = "Asus T100CHI",
6260     @@ -710,7 +709,6 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = {
6261     "ASUSTeK COMPUTER INC."),
6262     DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100CHI"),
6263     },
6264     - .driver_data = &acpi_active_low,
6265     },
6266     { /* Handle ThinkPad 8 tablets with BCM2E55 chipset ACPI ID */
6267     .ident = "Lenovo ThinkPad 8",
6268     @@ -718,7 +716,6 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = {
6269     DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
6270     DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "ThinkPad 8"),
6271     },
6272     - .driver_data = &acpi_active_low,
6273     },
6274     { }
6275     };
6276     @@ -733,13 +730,13 @@ static int bcm_resource(struct acpi_resource *ares, void *data)
6277     switch (ares->type) {
6278     case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
6279     irq = &ares->data.extended_irq;
6280     - dev->irq_polarity = irq->polarity;
6281     + dev->irq_active_low = irq->polarity == ACPI_ACTIVE_LOW;
6282     break;
6283    
6284     case ACPI_RESOURCE_TYPE_GPIO:
6285     gpio = &ares->data.gpio;
6286     if (gpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT)
6287     - dev->irq_polarity = gpio->polarity;
6288     + dev->irq_active_low = gpio->polarity == ACPI_ACTIVE_LOW;
6289     break;
6290    
6291     case ACPI_RESOURCE_TYPE_SERIAL_BUS:
6292     @@ -834,11 +831,11 @@ static int bcm_acpi_probe(struct bcm_device *dev)
6293     return ret;
6294     acpi_dev_free_resource_list(&resources);
6295    
6296     - dmi_id = dmi_first_match(bcm_wrong_irq_dmi_table);
6297     + dmi_id = dmi_first_match(bcm_active_low_irq_dmi_table);
6298     if (dmi_id) {
6299     bt_dev_warn(dev, "%s: Overwriting IRQ polarity to active low",
6300     dmi_id->ident);
6301     - dev->irq_polarity = *(u8 *)dmi_id->driver_data;
6302     + dev->irq_active_low = true;
6303     }
6304    
6305     return 0;
6306     diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
6307     index 6e2403805784..6aef3bde10d7 100644
6308     --- a/drivers/bluetooth/hci_ldisc.c
6309     +++ b/drivers/bluetooth/hci_ldisc.c
6310     @@ -41,6 +41,7 @@
6311     #include <linux/ioctl.h>
6312     #include <linux/skbuff.h>
6313     #include <linux/firmware.h>
6314     +#include <linux/serdev.h>
6315    
6316     #include <net/bluetooth/bluetooth.h>
6317     #include <net/bluetooth/hci_core.h>
6318     @@ -298,6 +299,12 @@ void hci_uart_set_flow_control(struct hci_uart *hu, bool enable)
6319     unsigned int set = 0;
6320     unsigned int clear = 0;
6321    
6322     + if (hu->serdev) {
6323     + serdev_device_set_flow_control(hu->serdev, !enable);
6324     + serdev_device_set_rts(hu->serdev, !enable);
6325     + return;
6326     + }
6327     +
6328     if (enable) {
6329     /* Disable hardware flow control */
6330     ktermios = tty->termios;
6331     diff --git a/drivers/clk/sunxi-ng/ccu-sun5i.c b/drivers/clk/sunxi-ng/ccu-sun5i.c
6332     index ab9e850b3707..2f385a57cd91 100644
6333     --- a/drivers/clk/sunxi-ng/ccu-sun5i.c
6334     +++ b/drivers/clk/sunxi-ng/ccu-sun5i.c
6335     @@ -982,8 +982,8 @@ static void __init sun5i_ccu_init(struct device_node *node,
6336    
6337     /* Force the PLL-Audio-1x divider to 4 */
6338     val = readl(reg + SUN5I_PLL_AUDIO_REG);
6339     - val &= ~GENMASK(19, 16);
6340     - writel(val | (3 << 16), reg + SUN5I_PLL_AUDIO_REG);
6341     + val &= ~GENMASK(29, 26);
6342     + writel(val | (3 << 26), reg + SUN5I_PLL_AUDIO_REG);
6343    
6344     /*
6345     * Use the peripheral PLL as the AHB parent, instead of CPU /
6346     diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
6347     index 8af434815fba..241fb13f1c06 100644
6348     --- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
6349     +++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
6350     @@ -608,7 +608,7 @@ static SUNXI_CCU_M_WITH_MUX_GATE(hdmi_clk, "hdmi", lcd_ch1_parents,
6351     0x150, 0, 4, 24, 2, BIT(31),
6352     CLK_SET_RATE_PARENT);
6353    
6354     -static SUNXI_CCU_GATE(hdmi_ddc_clk, "hdmi-ddc", "osc24M", 0x150, BIT(30), 0);
6355     +static SUNXI_CCU_GATE(hdmi_ddc_clk, "ddc", "osc24M", 0x150, BIT(30), 0);
6356    
6357     static SUNXI_CCU_GATE(ps_clk, "ps", "lcd1-ch1", 0x140, BIT(31), 0);
6358    
6359     diff --git a/drivers/clk/sunxi-ng/ccu_nm.c b/drivers/clk/sunxi-ng/ccu_nm.c
6360     index a32158e8f2e3..84a5e7f17f6f 100644
6361     --- a/drivers/clk/sunxi-ng/ccu_nm.c
6362     +++ b/drivers/clk/sunxi-ng/ccu_nm.c
6363     @@ -99,6 +99,9 @@ static long ccu_nm_round_rate(struct clk_hw *hw, unsigned long rate,
6364     struct ccu_nm *nm = hw_to_ccu_nm(hw);
6365     struct _ccu_nm _nm;
6366    
6367     + if (ccu_frac_helper_has_rate(&nm->common, &nm->frac, rate))
6368     + return rate;
6369     +
6370     _nm.min_n = nm->n.min ?: 1;
6371     _nm.max_n = nm->n.max ?: 1 << nm->n.width;
6372     _nm.min_m = 1;
6373     diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
6374     index 484cc8909d5c..ed4df58a855e 100644
6375     --- a/drivers/cpuidle/cpuidle.c
6376     +++ b/drivers/cpuidle/cpuidle.c
6377     @@ -208,6 +208,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
6378     return -EBUSY;
6379     }
6380     target_state = &drv->states[index];
6381     + broadcast = false;
6382     }
6383    
6384     /* Take note of the planned idle state. */
6385     diff --git a/drivers/crypto/amcc/crypto4xx_core.h b/drivers/crypto/amcc/crypto4xx_core.h
6386     index ecfdcfe3698d..4f41d6da5acc 100644
6387     --- a/drivers/crypto/amcc/crypto4xx_core.h
6388     +++ b/drivers/crypto/amcc/crypto4xx_core.h
6389     @@ -34,12 +34,12 @@
6390     #define PPC405EX_CE_RESET 0x00000008
6391    
6392     #define CRYPTO4XX_CRYPTO_PRIORITY 300
6393     -#define PPC4XX_LAST_PD 63
6394     -#define PPC4XX_NUM_PD 64
6395     -#define PPC4XX_LAST_GD 1023
6396     +#define PPC4XX_NUM_PD 256
6397     +#define PPC4XX_LAST_PD (PPC4XX_NUM_PD - 1)
6398     #define PPC4XX_NUM_GD 1024
6399     -#define PPC4XX_LAST_SD 63
6400     -#define PPC4XX_NUM_SD 64
6401     +#define PPC4XX_LAST_GD (PPC4XX_NUM_GD - 1)
6402     +#define PPC4XX_NUM_SD 256
6403     +#define PPC4XX_LAST_SD (PPC4XX_NUM_SD - 1)
6404     #define PPC4XX_SD_BUFFER_SIZE 2048
6405    
6406     #define PD_ENTRY_INUSE 1
6407     diff --git a/drivers/gpu/drm/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/drm_dp_dual_mode_helper.c
6408     index 0ef9011a1856..02a50929af67 100644
6409     --- a/drivers/gpu/drm/drm_dp_dual_mode_helper.c
6410     +++ b/drivers/gpu/drm/drm_dp_dual_mode_helper.c
6411     @@ -410,6 +410,7 @@ int drm_lspcon_get_mode(struct i2c_adapter *adapter,
6412     {
6413     u8 data;
6414     int ret = 0;
6415     + int retry;
6416    
6417     if (!mode) {
6418     DRM_ERROR("NULL input\n");
6419     @@ -417,10 +418,19 @@ int drm_lspcon_get_mode(struct i2c_adapter *adapter,
6420     }
6421    
6422     /* Read Status: i2c over aux */
6423     - ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_LSPCON_CURRENT_MODE,
6424     - &data, sizeof(data));
6425     + for (retry = 0; retry < 6; retry++) {
6426     + if (retry)
6427     + usleep_range(500, 1000);
6428     +
6429     + ret = drm_dp_dual_mode_read(adapter,
6430     + DP_DUAL_MODE_LSPCON_CURRENT_MODE,
6431     + &data, sizeof(data));
6432     + if (!ret)
6433     + break;
6434     + }
6435     +
6436     if (ret < 0) {
6437     - DRM_ERROR("LSPCON read(0x80, 0x41) failed\n");
6438     + DRM_DEBUG_KMS("LSPCON read(0x80, 0x41) failed\n");
6439     return -EFAULT;
6440     }
6441    
6442     diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c
6443     index d1e0dc908048..04796d7d0fdb 100644
6444     --- a/drivers/gpu/drm/vc4/vc4_dsi.c
6445     +++ b/drivers/gpu/drm/vc4/vc4_dsi.c
6446     @@ -866,7 +866,8 @@ static bool vc4_dsi_encoder_mode_fixup(struct drm_encoder *encoder,
6447     adjusted_mode->clock = pixel_clock_hz / 1000 + 1;
6448    
6449     /* Given the new pixel clock, adjust HFP to keep vrefresh the same. */
6450     - adjusted_mode->htotal = pixel_clock_hz / (mode->vrefresh * mode->vtotal);
6451     + adjusted_mode->htotal = adjusted_mode->clock * mode->htotal /
6452     + mode->clock;
6453     adjusted_mode->hsync_end += adjusted_mode->htotal - mode->htotal;
6454     adjusted_mode->hsync_start += adjusted_mode->htotal - mode->htotal;
6455    
6456     diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
6457     index 937801ac2fe0..2cd134dd94d2 100644
6458     --- a/drivers/hv/vmbus_drv.c
6459     +++ b/drivers/hv/vmbus_drv.c
6460     @@ -1534,7 +1534,7 @@ static int __init hv_acpi_init(void)
6461     {
6462     int ret, t;
6463    
6464     - if (x86_hyper != &x86_hyper_ms_hyperv)
6465     + if (x86_hyper_type != X86_HYPER_MS_HYPERV)
6466     return -ENODEV;
6467    
6468     init_completion(&probe_event);
6469     diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
6470     index 752856b3a849..379de1829cdb 100644
6471     --- a/drivers/iio/accel/st_accel_core.c
6472     +++ b/drivers/iio/accel/st_accel_core.c
6473     @@ -164,7 +164,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
6474     .mask_int2 = 0x00,
6475     .addr_ihl = 0x25,
6476     .mask_ihl = 0x02,
6477     - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6478     + .stat_drdy = {
6479     + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6480     + .mask = 0x07,
6481     + },
6482     },
6483     .sim = {
6484     .addr = 0x23,
6485     @@ -236,7 +239,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
6486     .mask_ihl = 0x80,
6487     .addr_od = 0x22,
6488     .mask_od = 0x40,
6489     - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6490     + .stat_drdy = {
6491     + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6492     + .mask = 0x07,
6493     + },
6494     },
6495     .sim = {
6496     .addr = 0x23,
6497     @@ -318,7 +324,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
6498     .mask_int2 = 0x00,
6499     .addr_ihl = 0x23,
6500     .mask_ihl = 0x40,
6501     - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6502     + .stat_drdy = {
6503     + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6504     + .mask = 0x07,
6505     + },
6506     .ig1 = {
6507     .en_addr = 0x23,
6508     .en_mask = 0x08,
6509     @@ -389,7 +398,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
6510     .drdy_irq = {
6511     .addr = 0x21,
6512     .mask_int1 = 0x04,
6513     - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6514     + .stat_drdy = {
6515     + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6516     + .mask = 0x07,
6517     + },
6518     },
6519     .sim = {
6520     .addr = 0x21,
6521     @@ -451,7 +463,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
6522     .mask_ihl = 0x80,
6523     .addr_od = 0x22,
6524     .mask_od = 0x40,
6525     - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6526     + .stat_drdy = {
6527     + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6528     + .mask = 0x07,
6529     + },
6530     },
6531     .sim = {
6532     .addr = 0x21,
6533     @@ -569,7 +584,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
6534     .drdy_irq = {
6535     .addr = 0x21,
6536     .mask_int1 = 0x04,
6537     - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6538     + .stat_drdy = {
6539     + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6540     + .mask = 0x07,
6541     + },
6542     },
6543     .sim = {
6544     .addr = 0x21,
6545     @@ -640,7 +658,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
6546     .mask_int2 = 0x00,
6547     .addr_ihl = 0x25,
6548     .mask_ihl = 0x02,
6549     - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6550     + .stat_drdy = {
6551     + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6552     + .mask = 0x07,
6553     + },
6554     },
6555     .sim = {
6556     .addr = 0x23,
6557     diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
6558     index 02e833b14db0..34115f05d5c4 100644
6559     --- a/drivers/iio/common/st_sensors/st_sensors_core.c
6560     +++ b/drivers/iio/common/st_sensors/st_sensors_core.c
6561     @@ -470,7 +470,7 @@ int st_sensors_set_dataready_irq(struct iio_dev *indio_dev, bool enable)
6562     * different one. Take into account irq status register
6563     * to understand if irq trigger can be properly supported
6564     */
6565     - if (sdata->sensor_settings->drdy_irq.addr_stat_drdy)
6566     + if (sdata->sensor_settings->drdy_irq.stat_drdy.addr)
6567     sdata->hw_irq_trigger = enable;
6568     return 0;
6569     }
6570     diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c
6571     index fa73e6795359..fdcc5a891958 100644
6572     --- a/drivers/iio/common/st_sensors/st_sensors_trigger.c
6573     +++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c
6574     @@ -31,7 +31,7 @@ static int st_sensors_new_samples_available(struct iio_dev *indio_dev,
6575     int ret;
6576    
6577     /* How would I know if I can't check it? */
6578     - if (!sdata->sensor_settings->drdy_irq.addr_stat_drdy)
6579     + if (!sdata->sensor_settings->drdy_irq.stat_drdy.addr)
6580     return -EINVAL;
6581    
6582     /* No scan mask, no interrupt */
6583     @@ -39,23 +39,15 @@ static int st_sensors_new_samples_available(struct iio_dev *indio_dev,
6584     return 0;
6585    
6586     ret = sdata->tf->read_byte(&sdata->tb, sdata->dev,
6587     - sdata->sensor_settings->drdy_irq.addr_stat_drdy,
6588     + sdata->sensor_settings->drdy_irq.stat_drdy.addr,
6589     &status);
6590     if (ret < 0) {
6591     dev_err(sdata->dev,
6592     "error checking samples available\n");
6593     return ret;
6594     }
6595     - /*
6596     - * the lower bits of .active_scan_mask[0] is directly mapped
6597     - * to the channels on the sensor: either bit 0 for
6598     - * one-dimensional sensors, or e.g. x,y,z for accelerometers,
6599     - * gyroscopes or magnetometers. No sensor use more than 3
6600     - * channels, so cut the other status bits here.
6601     - */
6602     - status &= 0x07;
6603    
6604     - if (status & (u8)indio_dev->active_scan_mask[0])
6605     + if (status & sdata->sensor_settings->drdy_irq.stat_drdy.mask)
6606     return 1;
6607    
6608     return 0;
6609     @@ -212,7 +204,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
6610     * it was "our" interrupt.
6611     */
6612     if (sdata->int_pin_open_drain &&
6613     - sdata->sensor_settings->drdy_irq.addr_stat_drdy)
6614     + sdata->sensor_settings->drdy_irq.stat_drdy.addr)
6615     irq_trig |= IRQF_SHARED;
6616    
6617     err = request_threaded_irq(sdata->get_irq_data_ready(indio_dev),
6618     diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c
6619     index e366422e8512..2536a8400c98 100644
6620     --- a/drivers/iio/gyro/st_gyro_core.c
6621     +++ b/drivers/iio/gyro/st_gyro_core.c
6622     @@ -118,7 +118,10 @@ static const struct st_sensor_settings st_gyro_sensors_settings[] = {
6623     * drain settings, but only for INT1 and not
6624     * for the DRDY line on INT2.
6625     */
6626     - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6627     + .stat_drdy = {
6628     + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6629     + .mask = 0x07,
6630     + },
6631     },
6632     .multi_read_bit = true,
6633     .bootime = 2,
6634     @@ -188,7 +191,10 @@ static const struct st_sensor_settings st_gyro_sensors_settings[] = {
6635     * drain settings, but only for INT1 and not
6636     * for the DRDY line on INT2.
6637     */
6638     - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6639     + .stat_drdy = {
6640     + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6641     + .mask = 0x07,
6642     + },
6643     },
6644     .multi_read_bit = true,
6645     .bootime = 2,
6646     @@ -253,7 +259,10 @@ static const struct st_sensor_settings st_gyro_sensors_settings[] = {
6647     * drain settings, but only for INT1 and not
6648     * for the DRDY line on INT2.
6649     */
6650     - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6651     + .stat_drdy = {
6652     + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6653     + .mask = 0x07,
6654     + },
6655     },
6656     .multi_read_bit = true,
6657     .bootime = 2,
6658     diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c
6659     index 08aafba4481c..19031a7bce23 100644
6660     --- a/drivers/iio/magnetometer/st_magn_core.c
6661     +++ b/drivers/iio/magnetometer/st_magn_core.c
6662     @@ -317,7 +317,10 @@ static const struct st_sensor_settings st_magn_sensors_settings[] = {
6663     },
6664     .drdy_irq = {
6665     /* drdy line is routed drdy pin */
6666     - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6667     + .stat_drdy = {
6668     + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6669     + .mask = 0x07,
6670     + },
6671     },
6672     .multi_read_bit = true,
6673     .bootime = 2,
6674     @@ -361,7 +364,10 @@ static const struct st_sensor_settings st_magn_sensors_settings[] = {
6675     .drdy_irq = {
6676     .addr = 0x62,
6677     .mask_int1 = 0x01,
6678     - .addr_stat_drdy = 0x67,
6679     + .stat_drdy = {
6680     + .addr = 0x67,
6681     + .mask = 0x07,
6682     + },
6683     },
6684     .multi_read_bit = false,
6685     .bootime = 2,
6686     diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
6687     index 34611a8ea2ce..ea075fcd5a6f 100644
6688     --- a/drivers/iio/pressure/st_pressure_core.c
6689     +++ b/drivers/iio/pressure/st_pressure_core.c
6690     @@ -287,7 +287,10 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
6691     .mask_ihl = 0x80,
6692     .addr_od = 0x22,
6693     .mask_od = 0x40,
6694     - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6695     + .stat_drdy = {
6696     + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6697     + .mask = 0x03,
6698     + },
6699     },
6700     .multi_read_bit = true,
6701     .bootime = 2,
6702     @@ -395,7 +398,10 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
6703     .mask_ihl = 0x80,
6704     .addr_od = 0x22,
6705     .mask_od = 0x40,
6706     - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6707     + .stat_drdy = {
6708     + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6709     + .mask = 0x03,
6710     + },
6711     },
6712     .multi_read_bit = true,
6713     .bootime = 2,
6714     @@ -454,7 +460,10 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
6715     .mask_ihl = 0x80,
6716     .addr_od = 0x12,
6717     .mask_od = 0x40,
6718     - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6719     + .stat_drdy = {
6720     + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6721     + .mask = 0x03,
6722     + },
6723     },
6724     .multi_read_bit = false,
6725     .bootime = 2,
6726     diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
6727     index 747efd1ae5a6..8208c30f03c5 100644
6728     --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
6729     +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
6730     @@ -1001,6 +1001,11 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
6731     }
6732     }
6733    
6734     + if (!ne) {
6735     + dev_err(dev, "Reseved loop qp is absent!\n");
6736     + goto free_work;
6737     + }
6738     +
6739     do {
6740     ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc);
6741     if (ret < 0) {
6742     diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
6743     index c1b5f38f31a5..3b4916680018 100644
6744     --- a/drivers/infiniband/sw/rxe/rxe_pool.c
6745     +++ b/drivers/infiniband/sw/rxe/rxe_pool.c
6746     @@ -404,6 +404,8 @@ void *rxe_alloc(struct rxe_pool *pool)
6747     elem = kmem_cache_zalloc(pool_cache(pool),
6748     (pool->flags & RXE_POOL_ATOMIC) ?
6749     GFP_ATOMIC : GFP_KERNEL);
6750     + if (!elem)
6751     + return NULL;
6752    
6753     elem->pool = pool;
6754     kref_init(&elem->ref_cnt);
6755     diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
6756     index afa938bd26d6..a72278e9cd27 100644
6757     --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
6758     +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
6759     @@ -139,6 +139,7 @@ void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter)
6760     rcu_assign_pointer(adapter->mactbl, NULL);
6761     synchronize_rcu();
6762     opa_vnic_free_mac_tbl(mactbl);
6763     + adapter->info.vport.mac_tbl_digest = 0;
6764     mutex_unlock(&adapter->mactbl_lock);
6765     }
6766    
6767     diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
6768     index c2733964379c..9655cc3aa3a0 100644
6769     --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
6770     +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
6771     @@ -348,7 +348,7 @@ void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter,
6772     void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter,
6773     struct opa_veswport_iface_macs *macs)
6774     {
6775     - u16 start_idx, tot_macs, num_macs, idx = 0, count = 0;
6776     + u16 start_idx, tot_macs, num_macs, idx = 0, count = 0, em_macs = 0;
6777     struct netdev_hw_addr *ha;
6778    
6779     start_idx = be16_to_cpu(macs->start_idx);
6780     @@ -359,8 +359,10 @@ void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter,
6781    
6782     /* Do not include EM specified MAC address */
6783     if (!memcmp(adapter->info.vport.base_mac_addr, ha->addr,
6784     - ARRAY_SIZE(adapter->info.vport.base_mac_addr)))
6785     + ARRAY_SIZE(adapter->info.vport.base_mac_addr))) {
6786     + em_macs++;
6787     continue;
6788     + }
6789    
6790     if (start_idx > idx++)
6791     continue;
6792     @@ -383,7 +385,7 @@ void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter,
6793     }
6794    
6795     tot_macs = netdev_hw_addr_list_count(&adapter->netdev->dev_addrs) +
6796     - netdev_uc_count(adapter->netdev);
6797     + netdev_uc_count(adapter->netdev) - em_macs;
6798     macs->tot_macs_in_lst = cpu_to_be16(tot_macs);
6799     macs->num_macs_in_msg = cpu_to_be16(count);
6800     macs->gen_count = cpu_to_be16(adapter->info.vport.uc_macs_gen_count);
6801     diff --git a/drivers/input/mouse/vmmouse.c b/drivers/input/mouse/vmmouse.c
6802     index 0f586780ceb4..1ae5c1ef3f5b 100644
6803     --- a/drivers/input/mouse/vmmouse.c
6804     +++ b/drivers/input/mouse/vmmouse.c
6805     @@ -316,11 +316,9 @@ static int vmmouse_enable(struct psmouse *psmouse)
6806     /*
6807     * Array of supported hypervisors.
6808     */
6809     -static const struct hypervisor_x86 *vmmouse_supported_hypervisors[] = {
6810     - &x86_hyper_vmware,
6811     -#ifdef CONFIG_KVM_GUEST
6812     - &x86_hyper_kvm,
6813     -#endif
6814     +static enum x86_hypervisor_type vmmouse_supported_hypervisors[] = {
6815     + X86_HYPER_VMWARE,
6816     + X86_HYPER_KVM,
6817     };
6818    
6819     /**
6820     @@ -331,7 +329,7 @@ static bool vmmouse_check_hypervisor(void)
6821     int i;
6822    
6823     for (i = 0; i < ARRAY_SIZE(vmmouse_supported_hypervisors); i++)
6824     - if (vmmouse_supported_hypervisors[i] == x86_hyper)
6825     + if (vmmouse_supported_hypervisors[i] == x86_hyper_type)
6826     return true;
6827    
6828     return false;
6829     diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c
6830     index 905729191d3e..78183f90820e 100644
6831     --- a/drivers/leds/leds-pca955x.c
6832     +++ b/drivers/leds/leds-pca955x.c
6833     @@ -61,6 +61,10 @@
6834     #define PCA955X_LS_BLINK0 0x2 /* Blink at PWM0 rate */
6835     #define PCA955X_LS_BLINK1 0x3 /* Blink at PWM1 rate */
6836    
6837     +#define PCA955X_GPIO_INPUT LED_OFF
6838     +#define PCA955X_GPIO_HIGH LED_OFF
6839     +#define PCA955X_GPIO_LOW LED_FULL
6840     +
6841     enum pca955x_type {
6842     pca9550,
6843     pca9551,
6844     @@ -329,9 +333,9 @@ static int pca955x_set_value(struct gpio_chip *gc, unsigned int offset,
6845     struct pca955x_led *led = &pca955x->leds[offset];
6846    
6847     if (val)
6848     - return pca955x_led_set(&led->led_cdev, LED_FULL);
6849     - else
6850     - return pca955x_led_set(&led->led_cdev, LED_OFF);
6851     + return pca955x_led_set(&led->led_cdev, PCA955X_GPIO_HIGH);
6852     +
6853     + return pca955x_led_set(&led->led_cdev, PCA955X_GPIO_LOW);
6854     }
6855    
6856     static void pca955x_gpio_set_value(struct gpio_chip *gc, unsigned int offset,
6857     @@ -355,8 +359,11 @@ static int pca955x_gpio_get_value(struct gpio_chip *gc, unsigned int offset)
6858     static int pca955x_gpio_direction_input(struct gpio_chip *gc,
6859     unsigned int offset)
6860     {
6861     - /* To use as input ensure pin is not driven */
6862     - return pca955x_set_value(gc, offset, 0);
6863     + struct pca955x *pca955x = gpiochip_get_data(gc);
6864     + struct pca955x_led *led = &pca955x->leds[offset];
6865     +
6866     + /* To use as input ensure pin is not driven. */
6867     + return pca955x_led_set(&led->led_cdev, PCA955X_GPIO_INPUT);
6868     }
6869    
6870     static int pca955x_gpio_direction_output(struct gpio_chip *gc,
6871     diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
6872     index 35e82b14ded7..ddf0a4341ae2 100644
6873     --- a/drivers/md/dm-mpath.c
6874     +++ b/drivers/md/dm-mpath.c
6875     @@ -366,7 +366,7 @@ static struct pgpath *choose_path_in_pg(struct multipath *m,
6876    
6877     pgpath = path_to_pgpath(path);
6878    
6879     - if (unlikely(lockless_dereference(m->current_pg) != pg)) {
6880     + if (unlikely(READ_ONCE(m->current_pg) != pg)) {
6881     /* Only update current_pgpath if pg changed */
6882     spin_lock_irqsave(&m->lock, flags);
6883     m->current_pgpath = pgpath;
6884     @@ -390,7 +390,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
6885     }
6886    
6887     /* Were we instructed to switch PG? */
6888     - if (lockless_dereference(m->next_pg)) {
6889     + if (READ_ONCE(m->next_pg)) {
6890     spin_lock_irqsave(&m->lock, flags);
6891     pg = m->next_pg;
6892     if (!pg) {
6893     @@ -406,7 +406,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
6894    
6895     /* Don't change PG until it has no remaining paths */
6896     check_current_pg:
6897     - pg = lockless_dereference(m->current_pg);
6898     + pg = READ_ONCE(m->current_pg);
6899     if (pg) {
6900     pgpath = choose_path_in_pg(m, pg, nr_bytes);
6901     if (!IS_ERR_OR_NULL(pgpath))
6902     @@ -473,7 +473,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
6903     struct request *clone;
6904    
6905     /* Do we need to select a new pgpath? */
6906     - pgpath = lockless_dereference(m->current_pgpath);
6907     + pgpath = READ_ONCE(m->current_pgpath);
6908     if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags))
6909     pgpath = choose_pgpath(m, nr_bytes);
6910    
6911     @@ -533,7 +533,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
6912     bool queue_io;
6913    
6914     /* Do we need to select a new pgpath? */
6915     - pgpath = lockless_dereference(m->current_pgpath);
6916     + pgpath = READ_ONCE(m->current_pgpath);
6917     queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags);
6918     if (!pgpath || !queue_io)
6919     pgpath = choose_pgpath(m, nr_bytes);
6920     @@ -1802,7 +1802,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
6921     struct pgpath *current_pgpath;
6922     int r;
6923    
6924     - current_pgpath = lockless_dereference(m->current_pgpath);
6925     + current_pgpath = READ_ONCE(m->current_pgpath);
6926     if (!current_pgpath)
6927     current_pgpath = choose_pgpath(m, 0);
6928    
6929     @@ -1824,7 +1824,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
6930     }
6931    
6932     if (r == -ENOTCONN) {
6933     - if (!lockless_dereference(m->current_pg)) {
6934     + if (!READ_ONCE(m->current_pg)) {
6935     /* Path status changed, redo selection */
6936     (void) choose_pgpath(m, 0);
6937     }
6938     @@ -1893,9 +1893,9 @@ static int multipath_busy(struct dm_target *ti)
6939     return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED);
6940    
6941     /* Guess which priority_group will be used at next mapping time */
6942     - pg = lockless_dereference(m->current_pg);
6943     - next_pg = lockless_dereference(m->next_pg);
6944     - if (unlikely(!lockless_dereference(m->current_pgpath) && next_pg))
6945     + pg = READ_ONCE(m->current_pg);
6946     + next_pg = READ_ONCE(m->next_pg);
6947     + if (unlikely(!READ_ONCE(m->current_pgpath) && next_pg))
6948     pg = next_pg;
6949    
6950     if (!pg) {
6951     diff --git a/drivers/md/md.c b/drivers/md/md.c
6952     index 98ea86309ceb..6bf093cef958 100644
6953     --- a/drivers/md/md.c
6954     +++ b/drivers/md/md.c
6955     @@ -7468,8 +7468,8 @@ void md_wakeup_thread(struct md_thread *thread)
6956     {
6957     if (thread) {
6958     pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
6959     - if (!test_and_set_bit(THREAD_WAKEUP, &thread->flags))
6960     - wake_up(&thread->wqueue);
6961     + set_bit(THREAD_WAKEUP, &thread->flags);
6962     + wake_up(&thread->wqueue);
6963     }
6964     }
6965     EXPORT_SYMBOL(md_wakeup_thread);
6966     diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c
6967     index eda38cbe8530..41f2a9f6851d 100644
6968     --- a/drivers/misc/pti.c
6969     +++ b/drivers/misc/pti.c
6970     @@ -32,7 +32,7 @@
6971     #include <linux/pci.h>
6972     #include <linux/mutex.h>
6973     #include <linux/miscdevice.h>
6974     -#include <linux/pti.h>
6975     +#include <linux/intel-pti.h>
6976     #include <linux/slab.h>
6977     #include <linux/uaccess.h>
6978    
6979     diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
6980     index 1e688bfec567..9047c0a529b2 100644
6981     --- a/drivers/misc/vmw_balloon.c
6982     +++ b/drivers/misc/vmw_balloon.c
6983     @@ -1271,7 +1271,7 @@ static int __init vmballoon_init(void)
6984     * Check if we are running on VMware's hypervisor and bail out
6985     * if we are not.
6986     */
6987     - if (x86_hyper != &x86_hyper_vmware)
6988     + if (x86_hyper_type != X86_HYPER_VMWARE)
6989     return -ENODEV;
6990    
6991     for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
6992     diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
6993     index c66abd476023..3b0db01ead1f 100644
6994     --- a/drivers/net/ethernet/ibm/ibmvnic.c
6995     +++ b/drivers/net/ethernet/ibm/ibmvnic.c
6996     @@ -927,6 +927,7 @@ static int ibmvnic_open(struct net_device *netdev)
6997     }
6998    
6999     rc = __ibmvnic_open(netdev);
7000     + netif_carrier_on(netdev);
7001     mutex_unlock(&adapter->reset_lock);
7002    
7003     return rc;
7004     @@ -3899,6 +3900,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
7005     if (rc)
7006     goto ibmvnic_init_fail;
7007    
7008     + netif_carrier_off(netdev);
7009     rc = register_netdev(netdev);
7010     if (rc) {
7011     dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc);
7012     diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
7013     index 689c413b7782..d2f9a2dd76a2 100644
7014     --- a/drivers/net/ethernet/intel/fm10k/fm10k.h
7015     +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
7016     @@ -526,8 +526,8 @@ s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid);
7017     int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac);
7018     int fm10k_ndo_set_vf_vlan(struct net_device *netdev,
7019     int vf_idx, u16 vid, u8 qos, __be16 vlan_proto);
7020     -int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate,
7021     - int unused);
7022     +int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
7023     + int __always_unused min_rate, int max_rate);
7024     int fm10k_ndo_get_vf_config(struct net_device *netdev,
7025     int vf_idx, struct ifla_vf_info *ivi);
7026    
7027     diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
7028     index 5f4dac0d36ef..e72fd52bacfe 100644
7029     --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
7030     +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
7031     @@ -126,6 +126,9 @@ s32 fm10k_iov_mbx(struct fm10k_intfc *interface)
7032     struct fm10k_mbx_info *mbx = &vf_info->mbx;
7033     u16 glort = vf_info->glort;
7034    
7035     + /* process the SM mailbox first to drain outgoing messages */
7036     + hw->mbx.ops.process(hw, &hw->mbx);
7037     +
7038     /* verify port mapping is valid, if not reset port */
7039     if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort))
7040     hw->iov.ops.reset_lport(hw, vf_info);
7041     @@ -482,7 +485,7 @@ int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid,
7042     }
7043    
7044     int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
7045     - int __always_unused unused, int rate)
7046     + int __always_unused min_rate, int max_rate)
7047     {
7048     struct fm10k_intfc *interface = netdev_priv(netdev);
7049     struct fm10k_iov_data *iov_data = interface->iov_data;
7050     @@ -493,14 +496,15 @@ int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
7051     return -EINVAL;
7052    
7053     /* rate limit cannot be less than 10Mbs or greater than link speed */
7054     - if (rate && ((rate < FM10K_VF_TC_MIN) || rate > FM10K_VF_TC_MAX))
7055     + if (max_rate &&
7056     + (max_rate < FM10K_VF_TC_MIN || max_rate > FM10K_VF_TC_MAX))
7057     return -EINVAL;
7058    
7059     /* store values */
7060     - iov_data->vf_info[vf_idx].rate = rate;
7061     + iov_data->vf_info[vf_idx].rate = max_rate;
7062    
7063     /* update hardware configuration */
7064     - hw->iov.ops.configure_tc(hw, vf_idx, rate);
7065     + hw->iov.ops.configure_tc(hw, vf_idx, max_rate);
7066    
7067     return 0;
7068     }
7069     diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
7070     index ea20aacd5e1d..b2cde9b16d82 100644
7071     --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
7072     +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
7073     @@ -2874,14 +2874,15 @@ static void i40e_vsi_free_rx_resources(struct i40e_vsi *vsi)
7074     static void i40e_config_xps_tx_ring(struct i40e_ring *ring)
7075     {
7076     struct i40e_vsi *vsi = ring->vsi;
7077     + int cpu;
7078    
7079     if (!ring->q_vector || !ring->netdev)
7080     return;
7081    
7082     if ((vsi->tc_config.numtc <= 1) &&
7083     !test_and_set_bit(__I40E_TX_XPS_INIT_DONE, &ring->state)) {
7084     - netif_set_xps_queue(ring->netdev,
7085     - get_cpu_mask(ring->q_vector->v_idx),
7086     + cpu = cpumask_local_spread(ring->q_vector->v_idx, -1);
7087     + netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu),
7088     ring->queue_index);
7089     }
7090    
7091     @@ -3471,6 +3472,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
7092     int tx_int_idx = 0;
7093     int vector, err;
7094     int irq_num;
7095     + int cpu;
7096    
7097     for (vector = 0; vector < q_vectors; vector++) {
7098     struct i40e_q_vector *q_vector = vsi->q_vectors[vector];
7099     @@ -3506,10 +3508,14 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
7100     q_vector->affinity_notify.notify = i40e_irq_affinity_notify;
7101     q_vector->affinity_notify.release = i40e_irq_affinity_release;
7102     irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
7103     - /* get_cpu_mask returns a static constant mask with
7104     - * a permanent lifetime so it's ok to use here.
7105     + /* Spread affinity hints out across online CPUs.
7106     + *
7107     + * get_cpu_mask returns a static constant mask with
7108     + * a permanent lifetime so it's ok to pass to
7109     + * irq_set_affinity_hint without making a copy.
7110     */
7111     - irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx));
7112     + cpu = cpumask_local_spread(q_vector->v_idx, -1);
7113     + irq_set_affinity_hint(irq_num, get_cpu_mask(cpu));
7114     }
7115    
7116     vsi->irqs_ready = true;
7117     diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
7118     index 4d1e670f490e..e368b0237a1b 100644
7119     --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
7120     +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
7121     @@ -1008,8 +1008,8 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf)
7122     set_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states);
7123     clear_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
7124     /* Do not notify the client during VF init */
7125     - if (test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE,
7126     - &vf->vf_states))
7127     + if (!test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE,
7128     + &vf->vf_states))
7129     i40e_notify_client_of_vf_reset(pf, abs_vf_id);
7130     vf->num_vlan = 0;
7131     }
7132     @@ -2779,6 +2779,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
7133     struct i40e_mac_filter *f;
7134     struct i40e_vf *vf;
7135     int ret = 0;
7136     + struct hlist_node *h;
7137     int bkt;
7138    
7139     /* validate the request */
7140     @@ -2817,7 +2818,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
7141     /* Delete all the filters for this VSI - we're going to kill it
7142     * anyway.
7143     */
7144     - hash_for_each(vsi->mac_filter_hash, bkt, f, hlist)
7145     + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
7146     __i40e_del_filter(vsi, f);
7147    
7148     spin_unlock_bh(&vsi->mac_filter_hash_lock);
7149     diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
7150     index 1825d956bb00..1ccad6f30ebf 100644
7151     --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
7152     +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
7153     @@ -546,6 +546,7 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
7154     unsigned int vector, q_vectors;
7155     unsigned int rx_int_idx = 0, tx_int_idx = 0;
7156     int irq_num, err;
7157     + int cpu;
7158    
7159     i40evf_irq_disable(adapter);
7160     /* Decrement for Other and TCP Timer vectors */
7161     @@ -584,10 +585,12 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
7162     q_vector->affinity_notify.release =
7163     i40evf_irq_affinity_release;
7164     irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
7165     - /* get_cpu_mask returns a static constant mask with
7166     - * a permanent lifetime so it's ok to use here.
7167     + /* Spread the IRQ affinity hints across online CPUs. Note that
7168     + * get_cpu_mask returns a mask with a permanent lifetime so
7169     + * it's safe to use as a hint for irq_set_affinity_hint.
7170     */
7171     - irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx));
7172     + cpu = cpumask_local_spread(q_vector->v_idx, -1);
7173     + irq_set_affinity_hint(irq_num, get_cpu_mask(cpu));
7174     }
7175    
7176     return 0;
7177     diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
7178     index b0031c5ff767..667dbc7d4a4e 100644
7179     --- a/drivers/net/ethernet/intel/igb/igb_main.c
7180     +++ b/drivers/net/ethernet/intel/igb/igb_main.c
7181     @@ -3162,6 +3162,8 @@ static int igb_sw_init(struct igb_adapter *adapter)
7182     /* Setup and initialize a copy of the hw vlan table array */
7183     adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32),
7184     GFP_ATOMIC);
7185     + if (!adapter->shadow_vfta)
7186     + return -ENOMEM;
7187    
7188     /* This call may decrease the number of queues */
7189     if (igb_init_interrupt_scheme(adapter, true)) {
7190     diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
7191     index 6e6ab6f6875e..64429a14c630 100644
7192     --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
7193     +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
7194     @@ -3781,10 +3781,10 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
7195     fw_cmd.ver_build = build;
7196     fw_cmd.ver_sub = sub;
7197     fw_cmd.hdr.checksum = 0;
7198     - fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
7199     - (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
7200     fw_cmd.pad = 0;
7201     fw_cmd.pad2 = 0;
7202     + fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
7203     + (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
7204    
7205     for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
7206     ret_val = ixgbe_host_interface_command(hw, &fw_cmd,
7207     diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
7208     index 19fbb2f28ea4..8a85217845ae 100644
7209     --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
7210     +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
7211     @@ -900,6 +900,8 @@ static s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
7212     /* convert offset from words to bytes */
7213     buffer.address = cpu_to_be32((offset + current_word) * 2);
7214     buffer.length = cpu_to_be16(words_to_read * 2);
7215     + buffer.pad2 = 0;
7216     + buffer.pad3 = 0;
7217    
7218     status = ixgbe_hic_unlocked(hw, (u32 *)&buffer, sizeof(buffer),
7219     IXGBE_HI_COMMAND_TIMEOUT);
7220     diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
7221     index c1e52b9dc58d..5f93e6add563 100644
7222     --- a/drivers/net/phy/at803x.c
7223     +++ b/drivers/net/phy/at803x.c
7224     @@ -167,7 +167,7 @@ static int at803x_set_wol(struct phy_device *phydev,
7225     mac = (const u8 *) ndev->dev_addr;
7226    
7227     if (!is_valid_ether_addr(mac))
7228     - return -EFAULT;
7229     + return -EINVAL;
7230    
7231     for (i = 0; i < 3; i++) {
7232     phy_write(phydev, AT803X_MMD_ACCESS_CONTROL,
7233     diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
7234     index ac41c8be9200..0fd8e164339c 100644
7235     --- a/drivers/pci/iov.c
7236     +++ b/drivers/pci/iov.c
7237     @@ -162,7 +162,6 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset)
7238    
7239     pci_device_add(virtfn, virtfn->bus);
7240    
7241     - pci_bus_add_device(virtfn);
7242     sprintf(buf, "virtfn%u", id);
7243     rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
7244     if (rc)
7245     @@ -173,6 +172,8 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset)
7246    
7247     kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
7248    
7249     + pci_bus_add_device(virtfn);
7250     +
7251     return 0;
7252    
7253     failed2:
7254     diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
7255     index 6078dfc11b11..74f1c57ab93b 100644
7256     --- a/drivers/pci/pci.c
7257     +++ b/drivers/pci/pci.c
7258     @@ -4356,6 +4356,10 @@ static bool pci_bus_resetable(struct pci_bus *bus)
7259     {
7260     struct pci_dev *dev;
7261    
7262     +
7263     + if (bus->self && (bus->self->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET))
7264     + return false;
7265     +
7266     list_for_each_entry(dev, &bus->devices, bus_list) {
7267     if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET ||
7268     (dev->subordinate && !pci_bus_resetable(dev->subordinate)))
7269     diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
7270     index 890efcc574cb..744805232155 100644
7271     --- a/drivers/pci/pcie/aer/aerdrv_core.c
7272     +++ b/drivers/pci/pcie/aer/aerdrv_core.c
7273     @@ -390,7 +390,14 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
7274     * If the error is reported by an end point, we think this
7275     * error is related to the upstream link of the end point.
7276     */
7277     - pci_walk_bus(dev->bus, cb, &result_data);
7278     + if (state == pci_channel_io_normal)
7279     + /*
7280     + * the error is non fatal so the bus is ok, just invoke
7281     + * the callback for the function that logged the error.
7282     + */
7283     + cb(dev, &result_data);
7284     + else
7285     + pci_walk_bus(dev->bus, cb, &result_data);
7286     }
7287    
7288     return result_data.result;
7289     diff --git a/drivers/platform/x86/asus-wireless.c b/drivers/platform/x86/asus-wireless.c
7290     index f3796164329e..d4aeac3477f5 100644
7291     --- a/drivers/platform/x86/asus-wireless.c
7292     +++ b/drivers/platform/x86/asus-wireless.c
7293     @@ -118,6 +118,7 @@ static void asus_wireless_notify(struct acpi_device *adev, u32 event)
7294     return;
7295     }
7296     input_report_key(data->idev, KEY_RFKILL, 1);
7297     + input_sync(data->idev);
7298     input_report_key(data->idev, KEY_RFKILL, 0);
7299     input_sync(data->idev);
7300     }
7301     diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
7302     index 8cec9a02c0b8..9eb32ead63db 100644
7303     --- a/drivers/rtc/interface.c
7304     +++ b/drivers/rtc/interface.c
7305     @@ -779,7 +779,7 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
7306     }
7307    
7308     timerqueue_add(&rtc->timerqueue, &timer->node);
7309     - if (!next) {
7310     + if (!next || ktime_before(timer->node.expires, next->expires)) {
7311     struct rtc_wkalrm alarm;
7312     int err;
7313     alarm.time = rtc_ktime_to_tm(timer->node.expires);
7314     diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
7315     index e1687e19c59f..a30f24cb6c83 100644
7316     --- a/drivers/rtc/rtc-pl031.c
7317     +++ b/drivers/rtc/rtc-pl031.c
7318     @@ -308,7 +308,8 @@ static int pl031_remove(struct amba_device *adev)
7319    
7320     dev_pm_clear_wake_irq(&adev->dev);
7321     device_init_wakeup(&adev->dev, false);
7322     - free_irq(adev->irq[0], ldata);
7323     + if (adev->irq[0])
7324     + free_irq(adev->irq[0], ldata);
7325     rtc_device_unregister(ldata->rtc);
7326     iounmap(ldata->base);
7327     kfree(ldata);
7328     @@ -381,12 +382,13 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id)
7329     goto out_no_rtc;
7330     }
7331    
7332     - if (request_irq(adev->irq[0], pl031_interrupt,
7333     - vendor->irqflags, "rtc-pl031", ldata)) {
7334     - ret = -EIO;
7335     - goto out_no_irq;
7336     + if (adev->irq[0]) {
7337     + ret = request_irq(adev->irq[0], pl031_interrupt,
7338     + vendor->irqflags, "rtc-pl031", ldata);
7339     + if (ret)
7340     + goto out_no_irq;
7341     + dev_pm_set_wake_irq(&adev->dev, adev->irq[0]);
7342     }
7343     - dev_pm_set_wake_irq(&adev->dev, adev->irq[0]);
7344     return 0;
7345    
7346     out_no_irq:
7347     diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
7348     index 1d02cf9fe06c..30d5f0ef29bb 100644
7349     --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
7350     +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
7351     @@ -1575,6 +1575,7 @@ static void release_offload_resources(struct cxgbi_sock *csk)
7352     csk, csk->state, csk->flags, csk->tid);
7353    
7354     cxgbi_sock_free_cpl_skbs(csk);
7355     + cxgbi_sock_purge_write_queue(csk);
7356     if (csk->wr_cred != csk->wr_max_cred) {
7357     cxgbi_sock_purge_wr_queue(csk);
7358     cxgbi_sock_reset_wr_list(csk);
7359     diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
7360     index 499df9d17339..d9a03beb76a4 100644
7361     --- a/drivers/scsi/lpfc/lpfc_hbadisc.c
7362     +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
7363     @@ -4983,7 +4983,8 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
7364     lpfc_cancel_retry_delay_tmo(vport, ndlp);
7365     if ((ndlp->nlp_flag & NLP_DEFER_RM) &&
7366     !(ndlp->nlp_flag & NLP_REG_LOGIN_SEND) &&
7367     - !(ndlp->nlp_flag & NLP_RPI_REGISTERED)) {
7368     + !(ndlp->nlp_flag & NLP_RPI_REGISTERED) &&
7369     + phba->sli_rev != LPFC_SLI_REV4) {
7370     /* For this case we need to cleanup the default rpi
7371     * allocated by the firmware.
7372     */
7373     diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
7374     index 1db0a38683f4..2b145966c73f 100644
7375     --- a/drivers/scsi/lpfc/lpfc_hw4.h
7376     +++ b/drivers/scsi/lpfc/lpfc_hw4.h
7377     @@ -3636,7 +3636,7 @@ struct lpfc_mbx_get_port_name {
7378     #define MB_CEQ_STATUS_QUEUE_FLUSHING 0x4
7379     #define MB_CQE_STATUS_DMA_FAILED 0x5
7380    
7381     -#define LPFC_MBX_WR_CONFIG_MAX_BDE 8
7382     +#define LPFC_MBX_WR_CONFIG_MAX_BDE 1
7383     struct lpfc_mbx_wr_object {
7384     struct mbox_header header;
7385     union {
7386     diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
7387     index 3c5b054a56ac..7ac1a067d780 100644
7388     --- a/drivers/scsi/lpfc/lpfc_nvmet.c
7389     +++ b/drivers/scsi/lpfc/lpfc_nvmet.c
7390     @@ -1464,6 +1464,7 @@ static struct lpfc_nvmet_ctxbuf *
7391     lpfc_nvmet_replenish_context(struct lpfc_hba *phba,
7392     struct lpfc_nvmet_ctx_info *current_infop)
7393     {
7394     +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
7395     struct lpfc_nvmet_ctxbuf *ctx_buf = NULL;
7396     struct lpfc_nvmet_ctx_info *get_infop;
7397     int i;
7398     @@ -1511,6 +1512,7 @@ lpfc_nvmet_replenish_context(struct lpfc_hba *phba,
7399     get_infop = get_infop->nvmet_ctx_next_cpu;
7400     }
7401    
7402     +#endif
7403     /* Nothing found, all contexts for the MRQ are in-flight */
7404     return NULL;
7405     }
7406     diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
7407     index 22998cbd538f..33ff691878e2 100644
7408     --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
7409     +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
7410     @@ -4804,6 +4804,11 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
7411     } else if (log_info == VIRTUAL_IO_FAILED_RETRY) {
7412     scmd->result = DID_RESET << 16;
7413     break;
7414     + } else if ((scmd->device->channel == RAID_CHANNEL) &&
7415     + (scsi_state == (MPI2_SCSI_STATE_TERMINATED |
7416     + MPI2_SCSI_STATE_NO_SCSI_STATUS))) {
7417     + scmd->result = DID_RESET << 16;
7418     + break;
7419     }
7420     scmd->result = DID_SOFT_ERROR << 16;
7421     break;
7422     diff --git a/drivers/staging/greybus/light.c b/drivers/staging/greybus/light.c
7423     index 3f4148c92308..0f538b8c3a07 100644
7424     --- a/drivers/staging/greybus/light.c
7425     +++ b/drivers/staging/greybus/light.c
7426     @@ -925,6 +925,8 @@ static void __gb_lights_led_unregister(struct gb_channel *channel)
7427     return;
7428    
7429     led_classdev_unregister(cdev);
7430     + kfree(cdev->name);
7431     + cdev->name = NULL;
7432     channel->led = NULL;
7433     }
7434    
7435     diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c
7436     index 7952357df9c8..edb6e4e9ef3a 100644
7437     --- a/drivers/tee/optee/core.c
7438     +++ b/drivers/tee/optee/core.c
7439     @@ -590,7 +590,6 @@ static int __init optee_driver_init(void)
7440     return -ENODEV;
7441    
7442     np = of_find_matching_node(fw_np, optee_match);
7443     - of_node_put(fw_np);
7444     if (!np)
7445     return -ENODEV;
7446    
7447     diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
7448     index bd3572c41585..6d8906d65476 100644
7449     --- a/drivers/thermal/hisi_thermal.c
7450     +++ b/drivers/thermal/hisi_thermal.c
7451     @@ -35,8 +35,9 @@
7452     #define TEMP0_RST_MSK (0x1C)
7453     #define TEMP0_VALUE (0x28)
7454    
7455     -#define HISI_TEMP_BASE (-60)
7456     +#define HISI_TEMP_BASE (-60000)
7457     #define HISI_TEMP_RESET (100000)
7458     +#define HISI_TEMP_STEP (784)
7459    
7460     #define HISI_MAX_SENSORS 4
7461    
7462     @@ -61,19 +62,38 @@ struct hisi_thermal_data {
7463     void __iomem *regs;
7464     };
7465    
7466     -/* in millicelsius */
7467     -static inline int _step_to_temp(int step)
7468     +/*
7469     + * The temperature computation on the tsensor is as follow:
7470     + * Unit: millidegree Celsius
7471     + * Step: 255/200 (0.7843)
7472     + * Temperature base: -60°C
7473     + *
7474     + * The register is programmed in temperature steps, every step is 784
7475     + * millidegree and begins at -60 000 m°C
7476     + *
7477     + * The temperature from the steps:
7478     + *
7479     + * Temp = TempBase + (steps x 784)
7480     + *
7481     + * and the steps from the temperature:
7482     + *
7483     + * steps = (Temp - TempBase) / 784
7484     + *
7485     + */
7486     +static inline int hisi_thermal_step_to_temp(int step)
7487     {
7488     - /*
7489     - * Every step equals (1 * 200) / 255 celsius, and finally
7490     - * need convert to millicelsius.
7491     - */
7492     - return (HISI_TEMP_BASE * 1000 + (step * 200000 / 255));
7493     + return HISI_TEMP_BASE + (step * HISI_TEMP_STEP);
7494     +}
7495     +
7496     +static inline long hisi_thermal_temp_to_step(long temp)
7497     +{
7498     + return (temp - HISI_TEMP_BASE) / HISI_TEMP_STEP;
7499     }
7500    
7501     -static inline long _temp_to_step(long temp)
7502     +static inline long hisi_thermal_round_temp(int temp)
7503     {
7504     - return ((temp - HISI_TEMP_BASE * 1000) * 255) / 200000;
7505     + return hisi_thermal_step_to_temp(
7506     + hisi_thermal_temp_to_step(temp));
7507     }
7508    
7509     static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data,
7510     @@ -99,7 +119,7 @@ static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data,
7511     usleep_range(3000, 5000);
7512    
7513     val = readl(data->regs + TEMP0_VALUE);
7514     - val = _step_to_temp(val);
7515     + val = hisi_thermal_step_to_temp(val);
7516    
7517     mutex_unlock(&data->thermal_lock);
7518    
7519     @@ -126,10 +146,11 @@ static void hisi_thermal_enable_bind_irq_sensor
7520     writel((sensor->id << 12), data->regs + TEMP0_CFG);
7521    
7522     /* enable for interrupt */
7523     - writel(_temp_to_step(sensor->thres_temp) | 0x0FFFFFF00,
7524     + writel(hisi_thermal_temp_to_step(sensor->thres_temp) | 0x0FFFFFF00,
7525     data->regs + TEMP0_TH);
7526    
7527     - writel(_temp_to_step(HISI_TEMP_RESET), data->regs + TEMP0_RST_TH);
7528     + writel(hisi_thermal_temp_to_step(HISI_TEMP_RESET),
7529     + data->regs + TEMP0_RST_TH);
7530    
7531     /* enable module */
7532     writel(0x1, data->regs + TEMP0_RST_MSK);
7533     @@ -230,7 +251,7 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev)
7534     sensor = &data->sensors[data->irq_bind_sensor];
7535    
7536     dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n",
7537     - sensor->thres_temp / 1000);
7538     + sensor->thres_temp);
7539     mutex_unlock(&data->thermal_lock);
7540    
7541     for (i = 0; i < HISI_MAX_SENSORS; i++) {
7542     @@ -269,7 +290,7 @@ static int hisi_thermal_register_sensor(struct platform_device *pdev,
7543    
7544     for (i = 0; i < of_thermal_get_ntrips(sensor->tzd); i++) {
7545     if (trip[i].type == THERMAL_TRIP_PASSIVE) {
7546     - sensor->thres_temp = trip[i].temperature;
7547     + sensor->thres_temp = hisi_thermal_round_temp(trip[i].temperature);
7548     break;
7549     }
7550     }
7551     @@ -317,15 +338,6 @@ static int hisi_thermal_probe(struct platform_device *pdev)
7552     if (data->irq < 0)
7553     return data->irq;
7554    
7555     - ret = devm_request_threaded_irq(&pdev->dev, data->irq,
7556     - hisi_thermal_alarm_irq,
7557     - hisi_thermal_alarm_irq_thread,
7558     - 0, "hisi_thermal", data);
7559     - if (ret < 0) {
7560     - dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret);
7561     - return ret;
7562     - }
7563     -
7564     platform_set_drvdata(pdev, data);
7565    
7566     data->clk = devm_clk_get(&pdev->dev, "thermal_clk");
7567     @@ -345,8 +357,7 @@ static int hisi_thermal_probe(struct platform_device *pdev)
7568     }
7569    
7570     hisi_thermal_enable_bind_irq_sensor(data);
7571     - irq_get_irqchip_state(data->irq, IRQCHIP_STATE_MASKED,
7572     - &data->irq_enabled);
7573     + data->irq_enabled = true;
7574    
7575     for (i = 0; i < HISI_MAX_SENSORS; ++i) {
7576     ret = hisi_thermal_register_sensor(pdev, data,
7577     @@ -358,6 +369,17 @@ static int hisi_thermal_probe(struct platform_device *pdev)
7578     hisi_thermal_toggle_sensor(&data->sensors[i], true);
7579     }
7580    
7581     + ret = devm_request_threaded_irq(&pdev->dev, data->irq,
7582     + hisi_thermal_alarm_irq,
7583     + hisi_thermal_alarm_irq_thread,
7584     + 0, "hisi_thermal", data);
7585     + if (ret < 0) {
7586     + dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret);
7587     + return ret;
7588     + }
7589     +
7590     + enable_irq(data->irq);
7591     +
7592     return 0;
7593     }
7594    
7595     diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
7596     index 5628fe114347..91335e6de88a 100644
7597     --- a/drivers/vfio/pci/vfio_pci_config.c
7598     +++ b/drivers/vfio/pci/vfio_pci_config.c
7599     @@ -849,11 +849,13 @@ static int __init init_pci_cap_exp_perm(struct perm_bits *perm)
7600    
7601     /*
7602     * Allow writes to device control fields, except devctl_phantom,
7603     - * which could confuse IOMMU, and the ARI bit in devctl2, which
7604     + * which could confuse IOMMU, MPS, which can break communication
7605     + * with other physical devices, and the ARI bit in devctl2, which
7606     * is set at probe time. FLR gets virtualized via our writefn.
7607     */
7608     p_setw(perm, PCI_EXP_DEVCTL,
7609     - PCI_EXP_DEVCTL_BCR_FLR, ~PCI_EXP_DEVCTL_PHANTOM);
7610     + PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD,
7611     + ~PCI_EXP_DEVCTL_PHANTOM);
7612     p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI);
7613     return 0;
7614     }
7615     diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
7616     index 9bd17682655a..1c2289ddd555 100644
7617     --- a/drivers/video/backlight/pwm_bl.c
7618     +++ b/drivers/video/backlight/pwm_bl.c
7619     @@ -79,14 +79,17 @@ static void pwm_backlight_power_off(struct pwm_bl_data *pb)
7620     static int compute_duty_cycle(struct pwm_bl_data *pb, int brightness)
7621     {
7622     unsigned int lth = pb->lth_brightness;
7623     - int duty_cycle;
7624     + u64 duty_cycle;
7625    
7626     if (pb->levels)
7627     duty_cycle = pb->levels[brightness];
7628     else
7629     duty_cycle = brightness;
7630    
7631     - return (duty_cycle * (pb->period - lth) / pb->scale) + lth;
7632     + duty_cycle *= pb->period - lth;
7633     + do_div(duty_cycle, pb->scale);
7634     +
7635     + return duty_cycle + lth;
7636     }
7637    
7638     static int pwm_backlight_update_status(struct backlight_device *bl)
7639     diff --git a/fs/dcache.c b/fs/dcache.c
7640     index f90141387f01..34c852af215c 100644
7641     --- a/fs/dcache.c
7642     +++ b/fs/dcache.c
7643     @@ -231,7 +231,7 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c
7644     {
7645     /*
7646     * Be careful about RCU walk racing with rename:
7647     - * use 'lockless_dereference' to fetch the name pointer.
7648     + * use 'READ_ONCE' to fetch the name pointer.
7649     *
7650     * NOTE! Even if a rename will mean that the length
7651     * was not loaded atomically, we don't care. The
7652     @@ -245,7 +245,7 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c
7653     * early because the data cannot match (there can
7654     * be no NUL in the ct/tcount data)
7655     */
7656     - const unsigned char *cs = lockless_dereference(dentry->d_name.name);
7657     + const unsigned char *cs = READ_ONCE(dentry->d_name.name);
7658    
7659     return dentry_string_cmp(cs, ct, tcount);
7660     }
7661     diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
7662     index 25d9b5adcd42..36b49bd09264 100644
7663     --- a/fs/overlayfs/ovl_entry.h
7664     +++ b/fs/overlayfs/ovl_entry.h
7665     @@ -77,5 +77,5 @@ static inline struct ovl_inode *OVL_I(struct inode *inode)
7666    
7667     static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi)
7668     {
7669     - return lockless_dereference(oi->__upperdentry);
7670     + return READ_ONCE(oi->__upperdentry);
7671     }
7672     diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
7673     index b2c7f33e08fc..d94a51dc4e32 100644
7674     --- a/fs/overlayfs/readdir.c
7675     +++ b/fs/overlayfs/readdir.c
7676     @@ -757,7 +757,7 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
7677     if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) {
7678     struct inode *inode = file_inode(file);
7679    
7680     - realfile = lockless_dereference(od->upperfile);
7681     + realfile = READ_ONCE(od->upperfile);
7682     if (!realfile) {
7683     struct path upperpath;
7684    
7685     diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
7686     index e549bff87c5b..353f52fdc35e 100644
7687     --- a/include/asm-generic/vmlinux.lds.h
7688     +++ b/include/asm-generic/vmlinux.lds.h
7689     @@ -688,7 +688,7 @@
7690     #define BUG_TABLE
7691     #endif
7692    
7693     -#ifdef CONFIG_ORC_UNWINDER
7694     +#ifdef CONFIG_UNWINDER_ORC
7695     #define ORC_UNWIND_TABLE \
7696     . = ALIGN(4); \
7697     .orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \
7698     diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
7699     index b8d200f60a40..73bec75b74c8 100644
7700     --- a/include/linux/bpf_verifier.h
7701     +++ b/include/linux/bpf_verifier.h
7702     @@ -15,11 +15,11 @@
7703     * In practice this is far bigger than any realistic pointer offset; this limit
7704     * ensures that umax_value + (int)off + (int)size cannot overflow a u64.
7705     */
7706     -#define BPF_MAX_VAR_OFF (1ULL << 31)
7707     +#define BPF_MAX_VAR_OFF (1 << 29)
7708     /* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures
7709     * that converting umax_value to int cannot overflow.
7710     */
7711     -#define BPF_MAX_VAR_SIZ INT_MAX
7712     +#define BPF_MAX_VAR_SIZ (1 << 29)
7713    
7714     /* Liveness marks, used for registers and spilled-regs (in stack slots).
7715     * Read marks propagate upwards until they find a write mark; they record that
7716     @@ -110,7 +110,7 @@ struct bpf_insn_aux_data {
7717     struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */
7718     };
7719     int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
7720     - int converted_op_size; /* the valid value width after perceived conversion */
7721     + bool seen; /* this insn was processed by the verifier */
7722     };
7723    
7724     #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
7725     diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
7726     index 780b1242bf24..3b609edffa8f 100644
7727     --- a/include/linux/compiler-clang.h
7728     +++ b/include/linux/compiler-clang.h
7729     @@ -1,5 +1,5 @@
7730     /* SPDX-License-Identifier: GPL-2.0 */
7731     -#ifndef __LINUX_COMPILER_H
7732     +#ifndef __LINUX_COMPILER_TYPES_H
7733     #error "Please don't include <linux/compiler-clang.h> directly, include <linux/compiler.h> instead."
7734     #endif
7735    
7736     diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
7737     index bb78e5bdff26..2272ded07496 100644
7738     --- a/include/linux/compiler-gcc.h
7739     +++ b/include/linux/compiler-gcc.h
7740     @@ -1,5 +1,5 @@
7741     /* SPDX-License-Identifier: GPL-2.0 */
7742     -#ifndef __LINUX_COMPILER_H
7743     +#ifndef __LINUX_COMPILER_TYPES_H
7744     #error "Please don't include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead."
7745     #endif
7746    
7747     diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
7748     index 523d1b74550f..bfa08160db3a 100644
7749     --- a/include/linux/compiler-intel.h
7750     +++ b/include/linux/compiler-intel.h
7751     @@ -1,5 +1,5 @@
7752     /* SPDX-License-Identifier: GPL-2.0 */
7753     -#ifndef __LINUX_COMPILER_H
7754     +#ifndef __LINUX_COMPILER_TYPES_H
7755     #error "Please don't include <linux/compiler-intel.h> directly, include <linux/compiler.h> instead."
7756     #endif
7757    
7758     diff --git a/include/linux/compiler.h b/include/linux/compiler.h
7759     index 202710420d6d..fab5dc250c61 100644
7760     --- a/include/linux/compiler.h
7761     +++ b/include/linux/compiler.h
7762     @@ -2,111 +2,12 @@
7763     #ifndef __LINUX_COMPILER_H
7764     #define __LINUX_COMPILER_H
7765    
7766     -#ifndef __ASSEMBLY__
7767     +#include <linux/compiler_types.h>
7768    
7769     -#ifdef __CHECKER__
7770     -# define __user __attribute__((noderef, address_space(1)))
7771     -# define __kernel __attribute__((address_space(0)))
7772     -# define __safe __attribute__((safe))
7773     -# define __force __attribute__((force))
7774     -# define __nocast __attribute__((nocast))
7775     -# define __iomem __attribute__((noderef, address_space(2)))
7776     -# define __must_hold(x) __attribute__((context(x,1,1)))
7777     -# define __acquires(x) __attribute__((context(x,0,1)))
7778     -# define __releases(x) __attribute__((context(x,1,0)))
7779     -# define __acquire(x) __context__(x,1)
7780     -# define __release(x) __context__(x,-1)
7781     -# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0)
7782     -# define __percpu __attribute__((noderef, address_space(3)))
7783     -# define __rcu __attribute__((noderef, address_space(4)))
7784     -# define __private __attribute__((noderef))
7785     -extern void __chk_user_ptr(const volatile void __user *);
7786     -extern void __chk_io_ptr(const volatile void __iomem *);
7787     -# define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member))
7788     -#else /* __CHECKER__ */
7789     -# ifdef STRUCTLEAK_PLUGIN
7790     -# define __user __attribute__((user))
7791     -# else
7792     -# define __user
7793     -# endif
7794     -# define __kernel
7795     -# define __safe
7796     -# define __force
7797     -# define __nocast
7798     -# define __iomem
7799     -# define __chk_user_ptr(x) (void)0
7800     -# define __chk_io_ptr(x) (void)0
7801     -# define __builtin_warning(x, y...) (1)
7802     -# define __must_hold(x)
7803     -# define __acquires(x)
7804     -# define __releases(x)
7805     -# define __acquire(x) (void)0
7806     -# define __release(x) (void)0
7807     -# define __cond_lock(x,c) (c)
7808     -# define __percpu
7809     -# define __rcu
7810     -# define __private
7811     -# define ACCESS_PRIVATE(p, member) ((p)->member)
7812     -#endif /* __CHECKER__ */
7813     -
7814     -/* Indirect macros required for expanded argument pasting, eg. __LINE__. */
7815     -#define ___PASTE(a,b) a##b
7816     -#define __PASTE(a,b) ___PASTE(a,b)
7817     +#ifndef __ASSEMBLY__
7818    
7819     #ifdef __KERNEL__
7820    
7821     -#ifdef __GNUC__
7822     -#include <linux/compiler-gcc.h>
7823     -#endif
7824     -
7825     -#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
7826     -#define notrace __attribute__((hotpatch(0,0)))
7827     -#else
7828     -#define notrace __attribute__((no_instrument_function))
7829     -#endif
7830     -
7831     -/* Intel compiler defines __GNUC__. So we will overwrite implementations
7832     - * coming from above header files here
7833     - */
7834     -#ifdef __INTEL_COMPILER
7835     -# include <linux/compiler-intel.h>
7836     -#endif
7837     -
7838     -/* Clang compiler defines __GNUC__. So we will overwrite implementations
7839     - * coming from above header files here
7840     - */
7841     -#ifdef __clang__
7842     -#include <linux/compiler-clang.h>
7843     -#endif
7844     -
7845     -/*
7846     - * Generic compiler-dependent macros required for kernel
7847     - * build go below this comment. Actual compiler/compiler version
7848     - * specific implementations come from the above header files
7849     - */
7850     -
7851     -struct ftrace_branch_data {
7852     - const char *func;
7853     - const char *file;
7854     - unsigned line;
7855     - union {
7856     - struct {
7857     - unsigned long correct;
7858     - unsigned long incorrect;
7859     - };
7860     - struct {
7861     - unsigned long miss;
7862     - unsigned long hit;
7863     - };
7864     - unsigned long miss_hit[2];
7865     - };
7866     -};
7867     -
7868     -struct ftrace_likely_data {
7869     - struct ftrace_branch_data data;
7870     - unsigned long constant;
7871     -};
7872     -
7873     /*
7874     * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
7875     * to disable branch tracing on a per file basis.
7876     @@ -333,6 +234,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
7877     * with an explicit memory barrier or atomic instruction that provides the
7878     * required ordering.
7879     */
7880     +#include <asm/barrier.h>
7881    
7882     #define __READ_ONCE(x, check) \
7883     ({ \
7884     @@ -341,6 +243,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
7885     __read_once_size(&(x), __u.__c, sizeof(x)); \
7886     else \
7887     __read_once_size_nocheck(&(x), __u.__c, sizeof(x)); \
7888     + smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \
7889     __u.__val; \
7890     })
7891     #define READ_ONCE(x) __READ_ONCE(x, 1)
7892     @@ -363,167 +266,6 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
7893    
7894     #endif /* __ASSEMBLY__ */
7895    
7896     -#ifdef __KERNEL__
7897     -/*
7898     - * Allow us to mark functions as 'deprecated' and have gcc emit a nice
7899     - * warning for each use, in hopes of speeding the functions removal.
7900     - * Usage is:
7901     - * int __deprecated foo(void)
7902     - */
7903     -#ifndef __deprecated
7904     -# define __deprecated /* unimplemented */
7905     -#endif
7906     -
7907     -#ifdef MODULE
7908     -#define __deprecated_for_modules __deprecated
7909     -#else
7910     -#define __deprecated_for_modules
7911     -#endif
7912     -
7913     -#ifndef __must_check
7914     -#define __must_check
7915     -#endif
7916     -
7917     -#ifndef CONFIG_ENABLE_MUST_CHECK
7918     -#undef __must_check
7919     -#define __must_check
7920     -#endif
7921     -#ifndef CONFIG_ENABLE_WARN_DEPRECATED
7922     -#undef __deprecated
7923     -#undef __deprecated_for_modules
7924     -#define __deprecated
7925     -#define __deprecated_for_modules
7926     -#endif
7927     -
7928     -#ifndef __malloc
7929     -#define __malloc
7930     -#endif
7931     -
7932     -/*
7933     - * Allow us to avoid 'defined but not used' warnings on functions and data,
7934     - * as well as force them to be emitted to the assembly file.
7935     - *
7936     - * As of gcc 3.4, static functions that are not marked with attribute((used))
7937     - * may be elided from the assembly file. As of gcc 3.4, static data not so
7938     - * marked will not be elided, but this may change in a future gcc version.
7939     - *
7940     - * NOTE: Because distributions shipped with a backported unit-at-a-time
7941     - * compiler in gcc 3.3, we must define __used to be __attribute__((used))
7942     - * for gcc >=3.3 instead of 3.4.
7943     - *
7944     - * In prior versions of gcc, such functions and data would be emitted, but
7945     - * would be warned about except with attribute((unused)).
7946     - *
7947     - * Mark functions that are referenced only in inline assembly as __used so
7948     - * the code is emitted even though it appears to be unreferenced.
7949     - */
7950     -#ifndef __used
7951     -# define __used /* unimplemented */
7952     -#endif
7953     -
7954     -#ifndef __maybe_unused
7955     -# define __maybe_unused /* unimplemented */
7956     -#endif
7957     -
7958     -#ifndef __always_unused
7959     -# define __always_unused /* unimplemented */
7960     -#endif
7961     -
7962     -#ifndef noinline
7963     -#define noinline
7964     -#endif
7965     -
7966     -/*
7967     - * Rather then using noinline to prevent stack consumption, use
7968     - * noinline_for_stack instead. For documentation reasons.
7969     - */
7970     -#define noinline_for_stack noinline
7971     -
7972     -#ifndef __always_inline
7973     -#define __always_inline inline
7974     -#endif
7975     -
7976     -#endif /* __KERNEL__ */
7977     -
7978     -/*
7979     - * From the GCC manual:
7980     - *
7981     - * Many functions do not examine any values except their arguments,
7982     - * and have no effects except the return value. Basically this is
7983     - * just slightly more strict class than the `pure' attribute above,
7984     - * since function is not allowed to read global memory.
7985     - *
7986     - * Note that a function that has pointer arguments and examines the
7987     - * data pointed to must _not_ be declared `const'. Likewise, a
7988     - * function that calls a non-`const' function usually must not be
7989     - * `const'. It does not make sense for a `const' function to return
7990     - * `void'.
7991     - */
7992     -#ifndef __attribute_const__
7993     -# define __attribute_const__ /* unimplemented */
7994     -#endif
7995     -
7996     -#ifndef __designated_init
7997     -# define __designated_init
7998     -#endif
7999     -
8000     -#ifndef __latent_entropy
8001     -# define __latent_entropy
8002     -#endif
8003     -
8004     -#ifndef __randomize_layout
8005     -# define __randomize_layout __designated_init
8006     -#endif
8007     -
8008     -#ifndef __no_randomize_layout
8009     -# define __no_randomize_layout
8010     -#endif
8011     -
8012     -#ifndef randomized_struct_fields_start
8013     -# define randomized_struct_fields_start
8014     -# define randomized_struct_fields_end
8015     -#endif
8016     -
8017     -/*
8018     - * Tell gcc if a function is cold. The compiler will assume any path
8019     - * directly leading to the call is unlikely.
8020     - */
8021     -
8022     -#ifndef __cold
8023     -#define __cold
8024     -#endif
8025     -
8026     -/* Simple shorthand for a section definition */
8027     -#ifndef __section
8028     -# define __section(S) __attribute__ ((__section__(#S)))
8029     -#endif
8030     -
8031     -#ifndef __visible
8032     -#define __visible
8033     -#endif
8034     -
8035     -#ifndef __nostackprotector
8036     -# define __nostackprotector
8037     -#endif
8038     -
8039     -/*
8040     - * Assume alignment of return value.
8041     - */
8042     -#ifndef __assume_aligned
8043     -#define __assume_aligned(a, ...)
8044     -#endif
8045     -
8046     -
8047     -/* Are two types/vars the same type (ignoring qualifiers)? */
8048     -#ifndef __same_type
8049     -# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
8050     -#endif
8051     -
8052     -/* Is this type a native word size -- useful for atomic operations */
8053     -#ifndef __native_word
8054     -# define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
8055     -#endif
8056     -
8057     /* Compile time object size, -1 for unknown */
8058     #ifndef __compiletime_object_size
8059     # define __compiletime_object_size(obj) -1
8060     diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
8061     new file mode 100644
8062     index 000000000000..6b79a9bba9a7
8063     --- /dev/null
8064     +++ b/include/linux/compiler_types.h
8065     @@ -0,0 +1,274 @@
8066     +#ifndef __LINUX_COMPILER_TYPES_H
8067     +#define __LINUX_COMPILER_TYPES_H
8068     +
8069     +#ifndef __ASSEMBLY__
8070     +
8071     +#ifdef __CHECKER__
8072     +# define __user __attribute__((noderef, address_space(1)))
8073     +# define __kernel __attribute__((address_space(0)))
8074     +# define __safe __attribute__((safe))
8075     +# define __force __attribute__((force))
8076     +# define __nocast __attribute__((nocast))
8077     +# define __iomem __attribute__((noderef, address_space(2)))
8078     +# define __must_hold(x) __attribute__((context(x,1,1)))
8079     +# define __acquires(x) __attribute__((context(x,0,1)))
8080     +# define __releases(x) __attribute__((context(x,1,0)))
8081     +# define __acquire(x) __context__(x,1)
8082     +# define __release(x) __context__(x,-1)
8083     +# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0)
8084     +# define __percpu __attribute__((noderef, address_space(3)))
8085     +# define __rcu __attribute__((noderef, address_space(4)))
8086     +# define __private __attribute__((noderef))
8087     +extern void __chk_user_ptr(const volatile void __user *);
8088     +extern void __chk_io_ptr(const volatile void __iomem *);
8089     +# define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member))
8090     +#else /* __CHECKER__ */
8091     +# ifdef STRUCTLEAK_PLUGIN
8092     +# define __user __attribute__((user))
8093     +# else
8094     +# define __user
8095     +# endif
8096     +# define __kernel
8097     +# define __safe
8098     +# define __force
8099     +# define __nocast
8100     +# define __iomem
8101     +# define __chk_user_ptr(x) (void)0
8102     +# define __chk_io_ptr(x) (void)0
8103     +# define __builtin_warning(x, y...) (1)
8104     +# define __must_hold(x)
8105     +# define __acquires(x)
8106     +# define __releases(x)
8107     +# define __acquire(x) (void)0
8108     +# define __release(x) (void)0
8109     +# define __cond_lock(x,c) (c)
8110     +# define __percpu
8111     +# define __rcu
8112     +# define __private
8113     +# define ACCESS_PRIVATE(p, member) ((p)->member)
8114     +#endif /* __CHECKER__ */
8115     +
8116     +/* Indirect macros required for expanded argument pasting, eg. __LINE__. */
8117     +#define ___PASTE(a,b) a##b
8118     +#define __PASTE(a,b) ___PASTE(a,b)
8119     +
8120     +#ifdef __KERNEL__
8121     +
8122     +#ifdef __GNUC__
8123     +#include <linux/compiler-gcc.h>
8124     +#endif
8125     +
8126     +#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
8127     +#define notrace __attribute__((hotpatch(0,0)))
8128     +#else
8129     +#define notrace __attribute__((no_instrument_function))
8130     +#endif
8131     +
8132     +/* Intel compiler defines __GNUC__. So we will overwrite implementations
8133     + * coming from above header files here
8134     + */
8135     +#ifdef __INTEL_COMPILER
8136     +# include <linux/compiler-intel.h>
8137     +#endif
8138     +
8139     +/* Clang compiler defines __GNUC__. So we will overwrite implementations
8140     + * coming from above header files here
8141     + */
8142     +#ifdef __clang__
8143     +#include <linux/compiler-clang.h>
8144     +#endif
8145     +
8146     +/*
8147     + * Generic compiler-dependent macros required for kernel
8148     + * build go below this comment. Actual compiler/compiler version
8149     + * specific implementations come from the above header files
8150     + */
8151     +
8152     +struct ftrace_branch_data {
8153     + const char *func;
8154     + const char *file;
8155     + unsigned line;
8156     + union {
8157     + struct {
8158     + unsigned long correct;
8159     + unsigned long incorrect;
8160     + };
8161     + struct {
8162     + unsigned long miss;
8163     + unsigned long hit;
8164     + };
8165     + unsigned long miss_hit[2];
8166     + };
8167     +};
8168     +
8169     +struct ftrace_likely_data {
8170     + struct ftrace_branch_data data;
8171     + unsigned long constant;
8172     +};
8173     +
8174     +#endif /* __KERNEL__ */
8175     +
8176     +#endif /* __ASSEMBLY__ */
8177     +
8178     +#ifdef __KERNEL__
8179     +/*
8180     + * Allow us to mark functions as 'deprecated' and have gcc emit a nice
8181     + * warning for each use, in hopes of speeding the functions removal.
8182     + * Usage is:
8183     + * int __deprecated foo(void)
8184     + */
8185     +#ifndef __deprecated
8186     +# define __deprecated /* unimplemented */
8187     +#endif
8188     +
8189     +#ifdef MODULE
8190     +#define __deprecated_for_modules __deprecated
8191     +#else
8192     +#define __deprecated_for_modules
8193     +#endif
8194     +
8195     +#ifndef __must_check
8196     +#define __must_check
8197     +#endif
8198     +
8199     +#ifndef CONFIG_ENABLE_MUST_CHECK
8200     +#undef __must_check
8201     +#define __must_check
8202     +#endif
8203     +#ifndef CONFIG_ENABLE_WARN_DEPRECATED
8204     +#undef __deprecated
8205     +#undef __deprecated_for_modules
8206     +#define __deprecated
8207     +#define __deprecated_for_modules
8208     +#endif
8209     +
8210     +#ifndef __malloc
8211     +#define __malloc
8212     +#endif
8213     +
8214     +/*
8215     + * Allow us to avoid 'defined but not used' warnings on functions and data,
8216     + * as well as force them to be emitted to the assembly file.
8217     + *
8218     + * As of gcc 3.4, static functions that are not marked with attribute((used))
8219     + * may be elided from the assembly file. As of gcc 3.4, static data not so
8220     + * marked will not be elided, but this may change in a future gcc version.
8221     + *
8222     + * NOTE: Because distributions shipped with a backported unit-at-a-time
8223     + * compiler in gcc 3.3, we must define __used to be __attribute__((used))
8224     + * for gcc >=3.3 instead of 3.4.
8225     + *
8226     + * In prior versions of gcc, such functions and data would be emitted, but
8227     + * would be warned about except with attribute((unused)).
8228     + *
8229     + * Mark functions that are referenced only in inline assembly as __used so
8230     + * the code is emitted even though it appears to be unreferenced.
8231     + */
8232     +#ifndef __used
8233     +# define __used /* unimplemented */
8234     +#endif
8235     +
8236     +#ifndef __maybe_unused
8237     +# define __maybe_unused /* unimplemented */
8238     +#endif
8239     +
8240     +#ifndef __always_unused
8241     +# define __always_unused /* unimplemented */
8242     +#endif
8243     +
8244     +#ifndef noinline
8245     +#define noinline
8246     +#endif
8247     +
8248     +/*
8249     + * Rather then using noinline to prevent stack consumption, use
8250     + * noinline_for_stack instead. For documentation reasons.
8251     + */
8252     +#define noinline_for_stack noinline
8253     +
8254     +#ifndef __always_inline
8255     +#define __always_inline inline
8256     +#endif
8257     +
8258     +#endif /* __KERNEL__ */
8259     +
8260     +/*
8261     + * From the GCC manual:
8262     + *
8263     + * Many functions do not examine any values except their arguments,
8264     + * and have no effects except the return value. Basically this is
8265     + * just slightly more strict class than the `pure' attribute above,
8266     + * since function is not allowed to read global memory.
8267     + *
8268     + * Note that a function that has pointer arguments and examines the
8269     + * data pointed to must _not_ be declared `const'. Likewise, a
8270     + * function that calls a non-`const' function usually must not be
8271     + * `const'. It does not make sense for a `const' function to return
8272     + * `void'.
8273     + */
8274     +#ifndef __attribute_const__
8275     +# define __attribute_const__ /* unimplemented */
8276     +#endif
8277     +
8278     +#ifndef __designated_init
8279     +# define __designated_init
8280     +#endif
8281     +
8282     +#ifndef __latent_entropy
8283     +# define __latent_entropy
8284     +#endif
8285     +
8286     +#ifndef __randomize_layout
8287     +# define __randomize_layout __designated_init
8288     +#endif
8289     +
8290     +#ifndef __no_randomize_layout
8291     +# define __no_randomize_layout
8292     +#endif
8293     +
8294     +#ifndef randomized_struct_fields_start
8295     +# define randomized_struct_fields_start
8296     +# define randomized_struct_fields_end
8297     +#endif
8298     +
8299     +/*
8300     + * Tell gcc if a function is cold. The compiler will assume any path
8301     + * directly leading to the call is unlikely.
8302     + */
8303     +
8304     +#ifndef __cold
8305     +#define __cold
8306     +#endif
8307     +
8308     +/* Simple shorthand for a section definition */
8309     +#ifndef __section
8310     +# define __section(S) __attribute__ ((__section__(#S)))
8311     +#endif
8312     +
8313     +#ifndef __visible
8314     +#define __visible
8315     +#endif
8316     +
8317     +#ifndef __nostackprotector
8318     +# define __nostackprotector
8319     +#endif
8320     +
8321     +/*
8322     + * Assume alignment of return value.
8323     + */
8324     +#ifndef __assume_aligned
8325     +#define __assume_aligned(a, ...)
8326     +#endif
8327     +
8328     +
8329     +/* Are two types/vars the same type (ignoring qualifiers)? */
8330     +#ifndef __same_type
8331     +# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
8332     +#endif
8333     +
8334     +/* Is this type a native word size -- useful for atomic operations */
8335     +#ifndef __native_word
8336     +# define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
8337     +#endif
8338     +
8339     +#endif /* __LINUX_COMPILER_TYPES_H */
8340     diff --git a/include/linux/hypervisor.h b/include/linux/hypervisor.h
8341     index b4054fd5b6f6..b19563f9a8eb 100644
8342     --- a/include/linux/hypervisor.h
8343     +++ b/include/linux/hypervisor.h
8344     @@ -7,8 +7,12 @@
8345     * Juergen Gross <jgross@suse.com>
8346     */
8347    
8348     -#ifdef CONFIG_HYPERVISOR_GUEST
8349     -#include <asm/hypervisor.h>
8350     +#ifdef CONFIG_X86
8351     +#include <asm/x86_init.h>
8352     +static inline void hypervisor_pin_vcpu(int cpu)
8353     +{
8354     + x86_platform.hyper.pin_vcpu(cpu);
8355     +}
8356     #else
8357     static inline void hypervisor_pin_vcpu(int cpu)
8358     {
8359     diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
8360     index 7b0fa8b5c120..ce0ef1c0a30a 100644
8361     --- a/include/linux/iio/common/st_sensors.h
8362     +++ b/include/linux/iio/common/st_sensors.h
8363     @@ -139,7 +139,7 @@ struct st_sensor_das {
8364     * @mask_ihl: mask to enable/disable active low on the INT lines.
8365     * @addr_od: address to enable/disable Open Drain on the INT lines.
8366     * @mask_od: mask to enable/disable Open Drain on the INT lines.
8367     - * @addr_stat_drdy: address to read status of DRDY (data ready) interrupt
8368     + * struct stat_drdy - status register of DRDY (data ready) interrupt.
8369     * struct ig1 - represents the Interrupt Generator 1 of sensors.
8370     * @en_addr: address of the enable ig1 register.
8371     * @en_mask: mask to write the on/off value for enable.
8372     @@ -152,7 +152,10 @@ struct st_sensor_data_ready_irq {
8373     u8 mask_ihl;
8374     u8 addr_od;
8375     u8 mask_od;
8376     - u8 addr_stat_drdy;
8377     + struct {
8378     + u8 addr;
8379     + u8 mask;
8380     + } stat_drdy;
8381     struct {
8382     u8 en_addr;
8383     u8 en_mask;
8384     diff --git a/include/linux/intel-pti.h b/include/linux/intel-pti.h
8385     new file mode 100644
8386     index 000000000000..2710d72de3c9
8387     --- /dev/null
8388     +++ b/include/linux/intel-pti.h
8389     @@ -0,0 +1,43 @@
8390     +/*
8391     + * Copyright (C) Intel 2011
8392     + *
8393     + * This program is free software; you can redistribute it and/or modify
8394     + * it under the terms of the GNU General Public License version 2 as
8395     + * published by the Free Software Foundation.
8396     + *
8397     + * This program is distributed in the hope that it will be useful,
8398     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
8399     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8400     + * GNU General Public License for more details.
8401     + *
8402     + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
8403     + *
8404     + * The PTI (Parallel Trace Interface) driver directs trace data routed from
8405     + * various parts in the system out through the Intel Penwell PTI port and
8406     + * out of the mobile device for analysis with a debugging tool
8407     + * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
8408     + * compact JTAG, standard.
8409     + *
8410     + * This header file will allow other parts of the OS to use the
8411     + * interface to write out it's contents for debugging a mobile system.
8412     + */
8413     +
8414     +#ifndef LINUX_INTEL_PTI_H_
8415     +#define LINUX_INTEL_PTI_H_
8416     +
8417     +/* offset for last dword of any PTI message. Part of MIPI P1149.7 */
8418     +#define PTI_LASTDWORD_DTS 0x30
8419     +
8420     +/* basic structure used as a write address to the PTI HW */
8421     +struct pti_masterchannel {
8422     + u8 master;
8423     + u8 channel;
8424     +};
8425     +
8426     +/* the following functions are defined in misc/pti.c */
8427     +void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count);
8428     +struct pti_masterchannel *pti_request_masterchannel(u8 type,
8429     + const char *thread_name);
8430     +void pti_release_masterchannel(struct pti_masterchannel *mc);
8431     +
8432     +#endif /* LINUX_INTEL_PTI_H_ */
8433     diff --git a/include/linux/linkage.h b/include/linux/linkage.h
8434     index 2e6f90bd52aa..f68db9e450eb 100644
8435     --- a/include/linux/linkage.h
8436     +++ b/include/linux/linkage.h
8437     @@ -2,7 +2,7 @@
8438     #ifndef _LINUX_LINKAGE_H
8439     #define _LINUX_LINKAGE_H
8440    
8441     -#include <linux/compiler.h>
8442     +#include <linux/compiler_types.h>
8443     #include <linux/stringify.h>
8444     #include <linux/export.h>
8445     #include <asm/linkage.h>
8446     diff --git a/include/linux/mm.h b/include/linux/mm.h
8447     index db647d428100..f50deada0f5c 100644
8448     --- a/include/linux/mm.h
8449     +++ b/include/linux/mm.h
8450     @@ -2510,7 +2510,7 @@ void vmemmap_populate_print_last(void);
8451     void vmemmap_free(unsigned long start, unsigned long end);
8452     #endif
8453     void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
8454     - unsigned long size);
8455     + unsigned long nr_pages);
8456    
8457     enum mf_flags {
8458     MF_COUNT_INCREASED = 1 << 0,
8459     diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
8460     index 18b06983131a..f0938257ee6d 100644
8461     --- a/include/linux/mmzone.h
8462     +++ b/include/linux/mmzone.h
8463     @@ -1152,13 +1152,17 @@ struct mem_section {
8464     #define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1)
8465    
8466     #ifdef CONFIG_SPARSEMEM_EXTREME
8467     -extern struct mem_section *mem_section[NR_SECTION_ROOTS];
8468     +extern struct mem_section **mem_section;
8469     #else
8470     extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
8471     #endif
8472    
8473     static inline struct mem_section *__nr_to_section(unsigned long nr)
8474     {
8475     +#ifdef CONFIG_SPARSEMEM_EXTREME
8476     + if (!mem_section)
8477     + return NULL;
8478     +#endif
8479     if (!mem_section[SECTION_NR_TO_ROOT(nr)])
8480     return NULL;
8481     return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
8482     diff --git a/include/linux/pti.h b/include/linux/pti.h
8483     deleted file mode 100644
8484     index b3ea01a3197e..000000000000
8485     --- a/include/linux/pti.h
8486     +++ /dev/null
8487     @@ -1,43 +0,0 @@
8488     -/*
8489     - * Copyright (C) Intel 2011
8490     - *
8491     - * This program is free software; you can redistribute it and/or modify
8492     - * it under the terms of the GNU General Public License version 2 as
8493     - * published by the Free Software Foundation.
8494     - *
8495     - * This program is distributed in the hope that it will be useful,
8496     - * but WITHOUT ANY WARRANTY; without even the implied warranty of
8497     - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8498     - * GNU General Public License for more details.
8499     - *
8500     - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
8501     - *
8502     - * The PTI (Parallel Trace Interface) driver directs trace data routed from
8503     - * various parts in the system out through the Intel Penwell PTI port and
8504     - * out of the mobile device for analysis with a debugging tool
8505     - * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
8506     - * compact JTAG, standard.
8507     - *
8508     - * This header file will allow other parts of the OS to use the
8509     - * interface to write out it's contents for debugging a mobile system.
8510     - */
8511     -
8512     -#ifndef PTI_H_
8513     -#define PTI_H_
8514     -
8515     -/* offset for last dword of any PTI message. Part of MIPI P1149.7 */
8516     -#define PTI_LASTDWORD_DTS 0x30
8517     -
8518     -/* basic structure used as a write address to the PTI HW */
8519     -struct pti_masterchannel {
8520     - u8 master;
8521     - u8 channel;
8522     -};
8523     -
8524     -/* the following functions are defined in misc/pti.c */
8525     -void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count);
8526     -struct pti_masterchannel *pti_request_masterchannel(u8 type,
8527     - const char *thread_name);
8528     -void pti_release_masterchannel(struct pti_masterchannel *mc);
8529     -
8530     -#endif /*PTI_H_*/
8531     diff --git a/include/linux/rculist.h b/include/linux/rculist.h
8532     index c2cdd45a880a..127f534fec94 100644
8533     --- a/include/linux/rculist.h
8534     +++ b/include/linux/rculist.h
8535     @@ -275,7 +275,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
8536     * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
8537     */
8538     #define list_entry_rcu(ptr, type, member) \
8539     - container_of(lockless_dereference(ptr), type, member)
8540     + container_of(READ_ONCE(ptr), type, member)
8541    
8542     /*
8543     * Where are list_empty_rcu() and list_first_entry_rcu()?
8544     @@ -368,7 +368,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
8545     * example is when items are added to the list, but never deleted.
8546     */
8547     #define list_entry_lockless(ptr, type, member) \
8548     - container_of((typeof(ptr))lockless_dereference(ptr), type, member)
8549     + container_of((typeof(ptr))READ_ONCE(ptr), type, member)
8550    
8551     /**
8552     * list_for_each_entry_lockless - iterate over rcu list of given type
8553     diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
8554     index 1a9f70d44af9..a6ddc42f87a5 100644
8555     --- a/include/linux/rcupdate.h
8556     +++ b/include/linux/rcupdate.h
8557     @@ -346,7 +346,7 @@ static inline void rcu_preempt_sleep_check(void) { }
8558     #define __rcu_dereference_check(p, c, space) \
8559     ({ \
8560     /* Dependency order vs. p above. */ \
8561     - typeof(*p) *________p1 = (typeof(*p) *__force)lockless_dereference(p); \
8562     + typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \
8563     RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \
8564     rcu_dereference_sparse(p, space); \
8565     ((typeof(*p) __force __kernel *)(________p1)); \
8566     @@ -360,7 +360,7 @@ static inline void rcu_preempt_sleep_check(void) { }
8567     #define rcu_dereference_raw(p) \
8568     ({ \
8569     /* Dependency order vs. p above. */ \
8570     - typeof(p) ________p1 = lockless_dereference(p); \
8571     + typeof(p) ________p1 = READ_ONCE(p); \
8572     ((typeof(*p) __force __kernel *)(________p1)); \
8573     })
8574    
8575     diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h
8576     index f65b92e0e1f9..ee8220f8dcf5 100644
8577     --- a/include/uapi/linux/stddef.h
8578     +++ b/include/uapi/linux/stddef.h
8579     @@ -1,5 +1,5 @@
8580     /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
8581     -#include <linux/compiler.h>
8582     +#include <linux/compiler_types.h>
8583    
8584     #ifndef __always_inline
8585     #define __always_inline inline
8586     diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
8587     index c48ca2a34b5e..c5ff809e86d0 100644
8588     --- a/kernel/bpf/verifier.c
8589     +++ b/kernel/bpf/verifier.c
8590     @@ -1061,6 +1061,11 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
8591     break;
8592     case PTR_TO_STACK:
8593     pointer_desc = "stack ";
8594     + /* The stack spill tracking logic in check_stack_write()
8595     + * and check_stack_read() relies on stack accesses being
8596     + * aligned.
8597     + */
8598     + strict = true;
8599     break;
8600     default:
8601     break;
8602     @@ -1068,6 +1073,29 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
8603     return check_generic_ptr_alignment(reg, pointer_desc, off, size, strict);
8604     }
8605    
8606     +/* truncate register to smaller size (in bytes)
8607     + * must be called with size < BPF_REG_SIZE
8608     + */
8609     +static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
8610     +{
8611     + u64 mask;
8612     +
8613     + /* clear high bits in bit representation */
8614     + reg->var_off = tnum_cast(reg->var_off, size);
8615     +
8616     + /* fix arithmetic bounds */
8617     + mask = ((u64)1 << (size * 8)) - 1;
8618     + if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
8619     + reg->umin_value &= mask;
8620     + reg->umax_value &= mask;
8621     + } else {
8622     + reg->umin_value = 0;
8623     + reg->umax_value = mask;
8624     + }
8625     + reg->smin_value = reg->umin_value;
8626     + reg->smax_value = reg->umax_value;
8627     +}
8628     +
8629     /* check whether memory at (regno + off) is accessible for t = (read | write)
8630     * if t==write, value_regno is a register which value is stored into memory
8631     * if t==read, value_regno is a register which will receive the value from memory
8632     @@ -1200,9 +1228,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
8633     if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
8634     state->regs[value_regno].type == SCALAR_VALUE) {
8635     /* b/h/w load zero-extends, mark upper bits as known 0 */
8636     - state->regs[value_regno].var_off = tnum_cast(
8637     - state->regs[value_regno].var_off, size);
8638     - __update_reg_bounds(&state->regs[value_regno]);
8639     + coerce_reg_to_size(&state->regs[value_regno], size);
8640     }
8641     return err;
8642     }
8643     @@ -1282,6 +1308,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
8644     tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off);
8645     verbose("invalid variable stack read R%d var_off=%s\n",
8646     regno, tn_buf);
8647     + return -EACCES;
8648     }
8649     off = regs[regno].off + regs[regno].var_off.value;
8650     if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
8651     @@ -1742,14 +1769,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
8652     return 0;
8653     }
8654    
8655     -static void coerce_reg_to_32(struct bpf_reg_state *reg)
8656     -{
8657     - /* clear high 32 bits */
8658     - reg->var_off = tnum_cast(reg->var_off, 4);
8659     - /* Update bounds */
8660     - __update_reg_bounds(reg);
8661     -}
8662     -
8663     static bool signed_add_overflows(s64 a, s64 b)
8664     {
8665     /* Do the add in u64, where overflow is well-defined */
8666     @@ -1770,6 +1789,41 @@ static bool signed_sub_overflows(s64 a, s64 b)
8667     return res > a;
8668     }
8669    
8670     +static bool check_reg_sane_offset(struct bpf_verifier_env *env,
8671     + const struct bpf_reg_state *reg,
8672     + enum bpf_reg_type type)
8673     +{
8674     + bool known = tnum_is_const(reg->var_off);
8675     + s64 val = reg->var_off.value;
8676     + s64 smin = reg->smin_value;
8677     +
8678     + if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
8679     + verbose("math between %s pointer and %lld is not allowed\n",
8680     + reg_type_str[type], val);
8681     + return false;
8682     + }
8683     +
8684     + if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
8685     + verbose("%s pointer offset %d is not allowed\n",
8686     + reg_type_str[type], reg->off);
8687     + return false;
8688     + }
8689     +
8690     + if (smin == S64_MIN) {
8691     + verbose("math between %s pointer and register with unbounded min value is not allowed\n",
8692     + reg_type_str[type]);
8693     + return false;
8694     + }
8695     +
8696     + if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
8697     + verbose("value %lld makes %s pointer be out of bounds\n",
8698     + smin, reg_type_str[type]);
8699     + return false;
8700     + }
8701     +
8702     + return true;
8703     +}
8704     +
8705     /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
8706     * Caller should also handle BPF_MOV case separately.
8707     * If we return -EACCES, caller may want to try again treating pointer as a
8708     @@ -1835,6 +1889,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
8709     dst_reg->type = ptr_reg->type;
8710     dst_reg->id = ptr_reg->id;
8711    
8712     + if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
8713     + !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
8714     + return -EINVAL;
8715     +
8716     switch (opcode) {
8717     case BPF_ADD:
8718     /* We can take a fixed offset as long as it doesn't overflow
8719     @@ -1965,12 +2023,19 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
8720     return -EACCES;
8721     }
8722    
8723     + if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
8724     + return -EINVAL;
8725     +
8726     __update_reg_bounds(dst_reg);
8727     __reg_deduce_bounds(dst_reg);
8728     __reg_bound_offset(dst_reg);
8729     return 0;
8730     }
8731    
8732     +/* WARNING: This function does calculations on 64-bit values, but the actual
8733     + * execution may occur on 32-bit values. Therefore, things like bitshifts
8734     + * need extra checks in the 32-bit case.
8735     + */
8736     static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
8737     struct bpf_insn *insn,
8738     struct bpf_reg_state *dst_reg,
8739     @@ -1981,12 +2046,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
8740     bool src_known, dst_known;
8741     s64 smin_val, smax_val;
8742     u64 umin_val, umax_val;
8743     + u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
8744    
8745     - if (BPF_CLASS(insn->code) != BPF_ALU64) {
8746     - /* 32-bit ALU ops are (32,32)->64 */
8747     - coerce_reg_to_32(dst_reg);
8748     - coerce_reg_to_32(&src_reg);
8749     - }
8750     smin_val = src_reg.smin_value;
8751     smax_val = src_reg.smax_value;
8752     umin_val = src_reg.umin_value;
8753     @@ -1994,6 +2055,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
8754     src_known = tnum_is_const(src_reg.var_off);
8755     dst_known = tnum_is_const(dst_reg->var_off);
8756    
8757     + if (!src_known &&
8758     + opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
8759     + __mark_reg_unknown(dst_reg);
8760     + return 0;
8761     + }
8762     +
8763     switch (opcode) {
8764     case BPF_ADD:
8765     if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
8766     @@ -2122,9 +2189,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
8767     __update_reg_bounds(dst_reg);
8768     break;
8769     case BPF_LSH:
8770     - if (umax_val > 63) {
8771     - /* Shifts greater than 63 are undefined. This includes
8772     - * shifts by a negative number.
8773     + if (umax_val >= insn_bitness) {
8774     + /* Shifts greater than 31 or 63 are undefined.
8775     + * This includes shifts by a negative number.
8776     */
8777     mark_reg_unknown(regs, insn->dst_reg);
8778     break;
8779     @@ -2150,27 +2217,29 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
8780     __update_reg_bounds(dst_reg);
8781     break;
8782     case BPF_RSH:
8783     - if (umax_val > 63) {
8784     - /* Shifts greater than 63 are undefined. This includes
8785     - * shifts by a negative number.
8786     + if (umax_val >= insn_bitness) {
8787     + /* Shifts greater than 31 or 63 are undefined.
8788     + * This includes shifts by a negative number.
8789     */
8790     mark_reg_unknown(regs, insn->dst_reg);
8791     break;
8792     }
8793     - /* BPF_RSH is an unsigned shift, so make the appropriate casts */
8794     - if (dst_reg->smin_value < 0) {
8795     - if (umin_val) {
8796     - /* Sign bit will be cleared */
8797     - dst_reg->smin_value = 0;
8798     - } else {
8799     - /* Lost sign bit information */
8800     - dst_reg->smin_value = S64_MIN;
8801     - dst_reg->smax_value = S64_MAX;
8802     - }
8803     - } else {
8804     - dst_reg->smin_value =
8805     - (u64)(dst_reg->smin_value) >> umax_val;
8806     - }
8807     + /* BPF_RSH is an unsigned shift. If the value in dst_reg might
8808     + * be negative, then either:
8809     + * 1) src_reg might be zero, so the sign bit of the result is
8810     + * unknown, so we lose our signed bounds
8811     + * 2) it's known negative, thus the unsigned bounds capture the
8812     + * signed bounds
8813     + * 3) the signed bounds cross zero, so they tell us nothing
8814     + * about the result
8815     + * If the value in dst_reg is known nonnegative, then again the
8816     + * unsigned bounts capture the signed bounds.
8817     + * Thus, in all cases it suffices to blow away our signed bounds
8818     + * and rely on inferring new ones from the unsigned bounds and
8819     + * var_off of the result.
8820     + */
8821     + dst_reg->smin_value = S64_MIN;
8822     + dst_reg->smax_value = S64_MAX;
8823     if (src_known)
8824     dst_reg->var_off = tnum_rshift(dst_reg->var_off,
8825     umin_val);
8826     @@ -2186,6 +2255,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
8827     break;
8828     }
8829    
8830     + if (BPF_CLASS(insn->code) != BPF_ALU64) {
8831     + /* 32-bit ALU ops are (32,32)->32 */
8832     + coerce_reg_to_size(dst_reg, 4);
8833     + coerce_reg_to_size(&src_reg, 4);
8834     + }
8835     +
8836     __reg_deduce_bounds(dst_reg);
8837     __reg_bound_offset(dst_reg);
8838     return 0;
8839     @@ -2362,17 +2437,20 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
8840     return -EACCES;
8841     }
8842     mark_reg_unknown(regs, insn->dst_reg);
8843     - /* high 32 bits are known zero. */
8844     - regs[insn->dst_reg].var_off = tnum_cast(
8845     - regs[insn->dst_reg].var_off, 4);
8846     - __update_reg_bounds(&regs[insn->dst_reg]);
8847     + coerce_reg_to_size(&regs[insn->dst_reg], 4);
8848     }
8849     } else {
8850     /* case: R = imm
8851     * remember the value we stored into this reg
8852     */
8853     regs[insn->dst_reg].type = SCALAR_VALUE;
8854     - __mark_reg_known(regs + insn->dst_reg, insn->imm);
8855     + if (BPF_CLASS(insn->code) == BPF_ALU64) {
8856     + __mark_reg_known(regs + insn->dst_reg,
8857     + insn->imm);
8858     + } else {
8859     + __mark_reg_known(regs + insn->dst_reg,
8860     + (u32)insn->imm);
8861     + }
8862     }
8863    
8864     } else if (opcode > BPF_END) {
8865     @@ -3307,15 +3385,14 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
8866     return range_within(rold, rcur) &&
8867     tnum_in(rold->var_off, rcur->var_off);
8868     } else {
8869     - /* if we knew anything about the old value, we're not
8870     - * equal, because we can't know anything about the
8871     - * scalar value of the pointer in the new value.
8872     + /* We're trying to use a pointer in place of a scalar.
8873     + * Even if the scalar was unbounded, this could lead to
8874     + * pointer leaks because scalars are allowed to leak
8875     + * while pointers are not. We could make this safe in
8876     + * special cases if root is calling us, but it's
8877     + * probably not worth the hassle.
8878     */
8879     - return rold->umin_value == 0 &&
8880     - rold->umax_value == U64_MAX &&
8881     - rold->smin_value == S64_MIN &&
8882     - rold->smax_value == S64_MAX &&
8883     - tnum_is_unknown(rold->var_off);
8884     + return false;
8885     }
8886     case PTR_TO_MAP_VALUE:
8887     /* If the new min/max/var_off satisfy the old ones and
8888     @@ -3665,6 +3742,7 @@ static int do_check(struct bpf_verifier_env *env)
8889     if (err)
8890     return err;
8891    
8892     + env->insn_aux_data[insn_idx].seen = true;
8893     if (class == BPF_ALU || class == BPF_ALU64) {
8894     err = check_alu_op(env, insn);
8895     if (err)
8896     @@ -3855,6 +3933,7 @@ static int do_check(struct bpf_verifier_env *env)
8897     return err;
8898    
8899     insn_idx++;
8900     + env->insn_aux_data[insn_idx].seen = true;
8901     } else {
8902     verbose("invalid BPF_LD mode\n");
8903     return -EINVAL;
8904     @@ -4035,6 +4114,7 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
8905     u32 off, u32 cnt)
8906     {
8907     struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
8908     + int i;
8909    
8910     if (cnt == 1)
8911     return 0;
8912     @@ -4044,6 +4124,8 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
8913     memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
8914     memcpy(new_data + off + cnt - 1, old_data + off,
8915     sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
8916     + for (i = off; i < off + cnt - 1; i++)
8917     + new_data[i].seen = true;
8918     env->insn_aux_data = new_data;
8919     vfree(old_data);
8920     return 0;
8921     @@ -4062,6 +4144,25 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
8922     return new_prog;
8923     }
8924    
8925     +/* The verifier does more data flow analysis than llvm and will not explore
8926     + * branches that are dead at run time. Malicious programs can have dead code
8927     + * too. Therefore replace all dead at-run-time code with nops.
8928     + */
8929     +static void sanitize_dead_code(struct bpf_verifier_env *env)
8930     +{
8931     + struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
8932     + struct bpf_insn nop = BPF_MOV64_REG(BPF_REG_0, BPF_REG_0);
8933     + struct bpf_insn *insn = env->prog->insnsi;
8934     + const int insn_cnt = env->prog->len;
8935     + int i;
8936     +
8937     + for (i = 0; i < insn_cnt; i++) {
8938     + if (aux_data[i].seen)
8939     + continue;
8940     + memcpy(insn + i, &nop, sizeof(nop));
8941     + }
8942     +}
8943     +
8944     /* convert load instructions that access fields of 'struct __sk_buff'
8945     * into sequence of instructions that access fields of 'struct sk_buff'
8946     */
8947     @@ -4378,6 +4479,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
8948     while (pop_stack(env, NULL) >= 0);
8949     free_states(env);
8950    
8951     + if (ret == 0)
8952     + sanitize_dead_code(env);
8953     +
8954     if (ret == 0)
8955     /* program is valid, convert *(u32*)(ctx + off) accesses */
8956     ret = convert_ctx_accesses(env);
8957     diff --git a/kernel/events/core.c b/kernel/events/core.c
8958     index 4f1d4bfc607a..24ebad5567b4 100644
8959     --- a/kernel/events/core.c
8960     +++ b/kernel/events/core.c
8961     @@ -4233,7 +4233,7 @@ static void perf_remove_from_owner(struct perf_event *event)
8962     * indeed free this event, otherwise we need to serialize on
8963     * owner->perf_event_mutex.
8964     */
8965     - owner = lockless_dereference(event->owner);
8966     + owner = READ_ONCE(event->owner);
8967     if (owner) {
8968     /*
8969     * Since delayed_put_task_struct() also drops the last
8970     @@ -4330,7 +4330,7 @@ int perf_event_release_kernel(struct perf_event *event)
8971     * Cannot change, child events are not migrated, see the
8972     * comment with perf_event_ctx_lock_nested().
8973     */
8974     - ctx = lockless_dereference(child->ctx);
8975     + ctx = READ_ONCE(child->ctx);
8976     /*
8977     * Since child_mutex nests inside ctx::mutex, we must jump
8978     * through hoops. We start by grabbing a reference on the ctx.
8979     diff --git a/kernel/seccomp.c b/kernel/seccomp.c
8980     index 418a1c045933..5f0dfb2abb8d 100644
8981     --- a/kernel/seccomp.c
8982     +++ b/kernel/seccomp.c
8983     @@ -190,7 +190,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
8984     u32 ret = SECCOMP_RET_ALLOW;
8985     /* Make sure cross-thread synced filter points somewhere sane. */
8986     struct seccomp_filter *f =
8987     - lockless_dereference(current->seccomp.filter);
8988     + READ_ONCE(current->seccomp.filter);
8989    
8990     /* Ensure unexpected behavior doesn't result in failing open. */
8991     if (unlikely(WARN_ON(f == NULL)))
8992     diff --git a/kernel/task_work.c b/kernel/task_work.c
8993     index 5718b3ea202a..0fef395662a6 100644
8994     --- a/kernel/task_work.c
8995     +++ b/kernel/task_work.c
8996     @@ -68,7 +68,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func)
8997     * we raced with task_work_run(), *pprev == NULL/exited.
8998     */
8999     raw_spin_lock_irqsave(&task->pi_lock, flags);
9000     - while ((work = lockless_dereference(*pprev))) {
9001     + while ((work = READ_ONCE(*pprev))) {
9002     if (work->func != func)
9003     pprev = &work->next;
9004     else if (cmpxchg(pprev, work, work->next) == work)
9005     diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
9006     index dc498b605d5d..6350f64d5aa4 100644
9007     --- a/kernel/trace/bpf_trace.c
9008     +++ b/kernel/trace/bpf_trace.c
9009     @@ -293,14 +293,13 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = {
9010     .arg2_type = ARG_ANYTHING,
9011     };
9012    
9013     -static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd);
9014     +static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd);
9015    
9016     static __always_inline u64
9017     __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
9018     - u64 flags, struct perf_raw_record *raw)
9019     + u64 flags, struct perf_sample_data *sd)
9020     {
9021     struct bpf_array *array = container_of(map, struct bpf_array, map);
9022     - struct perf_sample_data *sd = this_cpu_ptr(&bpf_sd);
9023     unsigned int cpu = smp_processor_id();
9024     u64 index = flags & BPF_F_INDEX_MASK;
9025     struct bpf_event_entry *ee;
9026     @@ -323,8 +322,6 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
9027     if (unlikely(event->oncpu != cpu))
9028     return -EOPNOTSUPP;
9029    
9030     - perf_sample_data_init(sd, 0, 0);
9031     - sd->raw = raw;
9032     perf_event_output(event, sd, regs);
9033     return 0;
9034     }
9035     @@ -332,6 +329,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
9036     BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
9037     u64, flags, void *, data, u64, size)
9038     {
9039     + struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd);
9040     struct perf_raw_record raw = {
9041     .frag = {
9042     .size = size,
9043     @@ -342,7 +340,10 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
9044     if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
9045     return -EINVAL;
9046    
9047     - return __bpf_perf_event_output(regs, map, flags, &raw);
9048     + perf_sample_data_init(sd, 0, 0);
9049     + sd->raw = &raw;
9050     +
9051     + return __bpf_perf_event_output(regs, map, flags, sd);
9052     }
9053    
9054     static const struct bpf_func_proto bpf_perf_event_output_proto = {
9055     @@ -357,10 +358,12 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
9056     };
9057    
9058     static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
9059     +static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd);
9060    
9061     u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
9062     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
9063     {
9064     + struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd);
9065     struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
9066     struct perf_raw_frag frag = {
9067     .copy = ctx_copy,
9068     @@ -378,8 +381,10 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
9069     };
9070    
9071     perf_fetch_caller_regs(regs);
9072     + perf_sample_data_init(sd, 0, 0);
9073     + sd->raw = &raw;
9074    
9075     - return __bpf_perf_event_output(regs, map, flags, &raw);
9076     + return __bpf_perf_event_output(regs, map, flags, sd);
9077     }
9078    
9079     BPF_CALL_0(bpf_get_current_task)
9080     diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
9081     index 1c21d0e2a145..7eb975a2d0e1 100644
9082     --- a/kernel/trace/trace_events_hist.c
9083     +++ b/kernel/trace/trace_events_hist.c
9084     @@ -450,7 +450,7 @@ static int create_val_field(struct hist_trigger_data *hist_data,
9085     }
9086    
9087     field = trace_find_event_field(file->event_call, field_name);
9088     - if (!field) {
9089     + if (!field || !field->size) {
9090     ret = -EINVAL;
9091     goto out;
9092     }
9093     @@ -548,7 +548,7 @@ static int create_key_field(struct hist_trigger_data *hist_data,
9094     }
9095    
9096     field = trace_find_event_field(file->event_call, field_name);
9097     - if (!field) {
9098     + if (!field || !field->size) {
9099     ret = -EINVAL;
9100     goto out;
9101     }
9102     diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
9103     index dfdad67d8f6c..ff21b4dbb392 100644
9104     --- a/lib/Kconfig.debug
9105     +++ b/lib/Kconfig.debug
9106     @@ -376,7 +376,7 @@ config STACK_VALIDATION
9107     that runtime stack traces are more reliable.
9108    
9109     This is also a prerequisite for generation of ORC unwind data, which
9110     - is needed for CONFIG_ORC_UNWINDER.
9111     + is needed for CONFIG_UNWINDER_ORC.
9112    
9113     For more information, see
9114     tools/objtool/Documentation/stack-validation.txt.
9115     diff --git a/mm/slab.h b/mm/slab.h
9116     index 028cdc7df67e..86d7c7d860f9 100644
9117     --- a/mm/slab.h
9118     +++ b/mm/slab.h
9119     @@ -259,7 +259,7 @@ cache_from_memcg_idx(struct kmem_cache *s, int idx)
9120     * memcg_caches issues a write barrier to match this (see
9121     * memcg_create_kmem_cache()).
9122     */
9123     - cachep = lockless_dereference(arr->entries[idx]);
9124     + cachep = READ_ONCE(arr->entries[idx]);
9125     rcu_read_unlock();
9126    
9127     return cachep;
9128     diff --git a/mm/sparse.c b/mm/sparse.c
9129     index 4900707ae146..60805abf98af 100644
9130     --- a/mm/sparse.c
9131     +++ b/mm/sparse.c
9132     @@ -23,8 +23,7 @@
9133     * 1) mem_section - memory sections, mem_map's for valid memory
9134     */
9135     #ifdef CONFIG_SPARSEMEM_EXTREME
9136     -struct mem_section *mem_section[NR_SECTION_ROOTS]
9137     - ____cacheline_internodealigned_in_smp;
9138     +struct mem_section **mem_section;
9139     #else
9140     struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
9141     ____cacheline_internodealigned_in_smp;
9142     @@ -101,7 +100,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
9143     int __section_nr(struct mem_section* ms)
9144     {
9145     unsigned long root_nr;
9146     - struct mem_section* root;
9147     + struct mem_section *root = NULL;
9148    
9149     for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
9150     root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
9151     @@ -112,7 +111,7 @@ int __section_nr(struct mem_section* ms)
9152     break;
9153     }
9154    
9155     - VM_BUG_ON(root_nr == NR_SECTION_ROOTS);
9156     + VM_BUG_ON(!root);
9157    
9158     return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
9159     }
9160     @@ -208,6 +207,16 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
9161     {
9162     unsigned long pfn;
9163    
9164     +#ifdef CONFIG_SPARSEMEM_EXTREME
9165     + if (unlikely(!mem_section)) {
9166     + unsigned long size, align;
9167     +
9168     + size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
9169     + align = 1 << (INTERNODE_CACHE_SHIFT);
9170     + mem_section = memblock_virt_alloc(size, align);
9171     + }
9172     +#endif
9173     +
9174     start &= PAGE_SECTION_MASK;
9175     mminit_validate_memmodel_limits(&start, &end);
9176     for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
9177     @@ -330,11 +339,17 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
9178     static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
9179     {
9180     unsigned long usemap_snr, pgdat_snr;
9181     - static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
9182     - static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
9183     + static unsigned long old_usemap_snr;
9184     + static unsigned long old_pgdat_snr;
9185     struct pglist_data *pgdat = NODE_DATA(nid);
9186     int usemap_nid;
9187    
9188     + /* First call */
9189     + if (!old_usemap_snr) {
9190     + old_usemap_snr = NR_MEM_SECTIONS;
9191     + old_pgdat_snr = NR_MEM_SECTIONS;
9192     + }
9193     +
9194     usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
9195     pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
9196     if (usemap_snr == pgdat_snr)
9197     diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
9198     index 467e44d7587d..045331204097 100644
9199     --- a/net/ipv4/ip_gre.c
9200     +++ b/net/ipv4/ip_gre.c
9201     @@ -579,8 +579,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
9202     if (gre_handle_offloads(skb, false))
9203     goto err_free_rt;
9204    
9205     - if (skb->len > dev->mtu) {
9206     - pskb_trim(skb, dev->mtu);
9207     + if (skb->len > dev->mtu + dev->hard_header_len) {
9208     + pskb_trim(skb, dev->mtu + dev->hard_header_len);
9209     truncate = true;
9210     }
9211    
9212     @@ -731,8 +731,8 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
9213     if (skb_cow_head(skb, dev->needed_headroom))
9214     goto free_skb;
9215    
9216     - if (skb->len - dev->hard_header_len > dev->mtu) {
9217     - pskb_trim(skb, dev->mtu);
9218     + if (skb->len > dev->mtu + dev->hard_header_len) {
9219     + pskb_trim(skb, dev->mtu + dev->hard_header_len);
9220     truncate = true;
9221     }
9222    
9223     diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
9224     index 218cfcc77650..ee113ff15fd0 100644
9225     --- a/net/ipv4/tcp_vegas.c
9226     +++ b/net/ipv4/tcp_vegas.c
9227     @@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
9228    
9229     static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
9230     {
9231     - return min(tp->snd_ssthresh, tp->snd_cwnd-1);
9232     + return min(tp->snd_ssthresh, tp->snd_cwnd);
9233     }
9234    
9235     static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
9236     diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
9237     index 8a1c846d3df9..2ec39404c449 100644
9238     --- a/net/ipv6/addrconf.c
9239     +++ b/net/ipv6/addrconf.c
9240     @@ -303,10 +303,10 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
9241     .disable_policy = 0,
9242     };
9243    
9244     -/* Check if a valid qdisc is available */
9245     -static inline bool addrconf_qdisc_ok(const struct net_device *dev)
9246     +/* Check if link is ready: is it up and is a valid qdisc available */
9247     +static inline bool addrconf_link_ready(const struct net_device *dev)
9248     {
9249     - return !qdisc_tx_is_noop(dev);
9250     + return netif_oper_up(dev) && !qdisc_tx_is_noop(dev);
9251     }
9252    
9253     static void addrconf_del_rs_timer(struct inet6_dev *idev)
9254     @@ -451,7 +451,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
9255    
9256     ndev->token = in6addr_any;
9257    
9258     - if (netif_running(dev) && addrconf_qdisc_ok(dev))
9259     + if (netif_running(dev) && addrconf_link_ready(dev))
9260     ndev->if_flags |= IF_READY;
9261    
9262     ipv6_mc_init_dev(ndev);
9263     @@ -3404,7 +3404,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
9264     /* restore routes for permanent addresses */
9265     addrconf_permanent_addr(dev);
9266    
9267     - if (!addrconf_qdisc_ok(dev)) {
9268     + if (!addrconf_link_ready(dev)) {
9269     /* device is not ready yet. */
9270     pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
9271     dev->name);
9272     @@ -3419,7 +3419,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
9273     run_pending = 1;
9274     }
9275     } else if (event == NETDEV_CHANGE) {
9276     - if (!addrconf_qdisc_ok(dev)) {
9277     + if (!addrconf_link_ready(dev)) {
9278     /* device is still not ready. */
9279     break;
9280     }
9281     diff --git a/net/ipv6/route.c b/net/ipv6/route.c
9282     index 598efa8cfe25..76b47682f77f 100644
9283     --- a/net/ipv6/route.c
9284     +++ b/net/ipv6/route.c
9285     @@ -1055,7 +1055,6 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
9286    
9287     static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
9288     {
9289     - struct fib6_table *table = rt->rt6i_table;
9290     struct rt6_info *pcpu_rt, *prev, **p;
9291    
9292     pcpu_rt = ip6_rt_pcpu_alloc(rt);
9293     @@ -1066,28 +1065,20 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
9294     return net->ipv6.ip6_null_entry;
9295     }
9296    
9297     - read_lock_bh(&table->tb6_lock);
9298     - if (rt->rt6i_pcpu) {
9299     - p = this_cpu_ptr(rt->rt6i_pcpu);
9300     - prev = cmpxchg(p, NULL, pcpu_rt);
9301     - if (prev) {
9302     - /* If someone did it before us, return prev instead */
9303     - dst_release_immediate(&pcpu_rt->dst);
9304     - pcpu_rt = prev;
9305     - }
9306     - } else {
9307     - /* rt has been removed from the fib6 tree
9308     - * before we have a chance to acquire the read_lock.
9309     - * In this case, don't brother to create a pcpu rt
9310     - * since rt is going away anyway. The next
9311     - * dst_check() will trigger a re-lookup.
9312     - */
9313     + dst_hold(&pcpu_rt->dst);
9314     + p = this_cpu_ptr(rt->rt6i_pcpu);
9315     + prev = cmpxchg(p, NULL, pcpu_rt);
9316     + if (prev) {
9317     + /* If someone did it before us, return prev instead */
9318     + /* release refcnt taken by ip6_rt_pcpu_alloc() */
9319     dst_release_immediate(&pcpu_rt->dst);
9320     - pcpu_rt = rt;
9321     + /* release refcnt taken by above dst_hold() */
9322     + dst_release_immediate(&pcpu_rt->dst);
9323     + dst_hold(&prev->dst);
9324     + pcpu_rt = prev;
9325     }
9326     - dst_hold(&pcpu_rt->dst);
9327     +
9328     rt6_dst_from_metrics_check(pcpu_rt);
9329     - read_unlock_bh(&table->tb6_lock);
9330     return pcpu_rt;
9331     }
9332    
9333     @@ -1177,19 +1168,28 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
9334     if (pcpu_rt) {
9335     read_unlock_bh(&table->tb6_lock);
9336     } else {
9337     - /* We have to do the read_unlock first
9338     - * because rt6_make_pcpu_route() may trigger
9339     - * ip6_dst_gc() which will take the write_lock.
9340     - */
9341     - dst_hold(&rt->dst);
9342     - read_unlock_bh(&table->tb6_lock);
9343     - pcpu_rt = rt6_make_pcpu_route(rt);
9344     - dst_release(&rt->dst);
9345     + /* atomic_inc_not_zero() is needed when using rcu */
9346     + if (atomic_inc_not_zero(&rt->rt6i_ref)) {
9347     + /* We have to do the read_unlock first
9348     + * because rt6_make_pcpu_route() may trigger
9349     + * ip6_dst_gc() which will take the write_lock.
9350     + *
9351     + * No dst_hold() on rt is needed because grabbing
9352     + * rt->rt6i_ref makes sure rt can't be released.
9353     + */
9354     + read_unlock_bh(&table->tb6_lock);
9355     + pcpu_rt = rt6_make_pcpu_route(rt);
9356     + rt6_release(rt);
9357     + } else {
9358     + /* rt is already removed from tree */
9359     + read_unlock_bh(&table->tb6_lock);
9360     + pcpu_rt = net->ipv6.ip6_null_entry;
9361     + dst_hold(&pcpu_rt->dst);
9362     + }
9363     }
9364    
9365     trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
9366     return pcpu_rt;
9367     -
9368     }
9369     }
9370     EXPORT_SYMBOL_GPL(ip6_pol_route);
9371     diff --git a/net/sctp/stream.c b/net/sctp/stream.c
9372     index fa8371ff05c4..724adf2786a2 100644
9373     --- a/net/sctp/stream.c
9374     +++ b/net/sctp/stream.c
9375     @@ -40,9 +40,14 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
9376     {
9377     int i;
9378    
9379     + gfp |= __GFP_NOWARN;
9380     +
9381     /* Initial stream->out size may be very big, so free it and alloc
9382     - * a new one with new outcnt to save memory.
9383     + * a new one with new outcnt to save memory if needed.
9384     */
9385     + if (outcnt == stream->outcnt)
9386     + goto in;
9387     +
9388     kfree(stream->out);
9389    
9390     stream->out = kcalloc(outcnt, sizeof(*stream->out), gfp);
9391     @@ -53,6 +58,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
9392     for (i = 0; i < stream->outcnt; i++)
9393     stream->out[i].state = SCTP_STREAM_OPEN;
9394    
9395     +in:
9396     if (!incnt)
9397     return 0;
9398    
9399     diff --git a/scripts/Makefile.build b/scripts/Makefile.build
9400     index bb831d49bcfd..e63af4e19382 100644
9401     --- a/scripts/Makefile.build
9402     +++ b/scripts/Makefile.build
9403     @@ -259,7 +259,7 @@ ifneq ($(SKIP_STACK_VALIDATION),1)
9404    
9405     __objtool_obj := $(objtree)/tools/objtool/objtool
9406    
9407     -objtool_args = $(if $(CONFIG_ORC_UNWINDER),orc generate,check)
9408     +objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
9409    
9410     ifndef CONFIG_FRAME_POINTER
9411     objtool_args += --no-fp
9412     diff --git a/scripts/headers_install.sh b/scripts/headers_install.sh
9413     index 4d1ea96e8794..a18bca720995 100755
9414     --- a/scripts/headers_install.sh
9415     +++ b/scripts/headers_install.sh
9416     @@ -34,7 +34,7 @@ do
9417     sed -r \
9418     -e 's/([ \t(])(__user|__force|__iomem)[ \t]/\1/g' \
9419     -e 's/__attribute_const__([ \t]|$)/\1/g' \
9420     - -e 's@^#include <linux/compiler.h>@@' \
9421     + -e 's@^#include <linux/compiler(|_types).h>@@' \
9422     -e 's/(^|[^a-zA-Z0-9])__packed([^a-zA-Z0-9_]|$)/\1__attribute__((packed))\2/g' \
9423     -e 's/(^|[ \t(])(inline|asm|volatile)([ \t(]|$)/\1__\2__\3/g' \
9424     -e 's@#(ifndef|define|endif[ \t]*/[*])[ \t]*_UAPI@#\1 @' \
9425     diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c
9426     index 549c269acc7d..18933bf6473f 100644
9427     --- a/sound/soc/codecs/msm8916-wcd-analog.c
9428     +++ b/sound/soc/codecs/msm8916-wcd-analog.c
9429     @@ -104,7 +104,7 @@
9430     #define CDC_A_MICB_1_VAL (0xf141)
9431     #define MICB_MIN_VAL 1600
9432     #define MICB_STEP_SIZE 50
9433     -#define MICB_VOLTAGE_REGVAL(v) ((v - MICB_MIN_VAL)/MICB_STEP_SIZE)
9434     +#define MICB_VOLTAGE_REGVAL(v) (((v - MICB_MIN_VAL)/MICB_STEP_SIZE) << 3)
9435     #define MICB_1_VAL_MICB_OUT_VAL_MASK GENMASK(7, 3)
9436     #define MICB_1_VAL_MICB_OUT_VAL_V2P70V ((0x16) << 3)
9437     #define MICB_1_VAL_MICB_OUT_VAL_V1P80V ((0x4) << 3)
9438     @@ -349,8 +349,9 @@ static void pm8916_wcd_analog_micbias_enable(struct snd_soc_codec *codec)
9439     | MICB_1_CTL_EXT_PRECHARG_EN_ENABLE);
9440    
9441     if (wcd->micbias_mv) {
9442     - snd_soc_write(codec, CDC_A_MICB_1_VAL,
9443     - MICB_VOLTAGE_REGVAL(wcd->micbias_mv));
9444     + snd_soc_update_bits(codec, CDC_A_MICB_1_VAL,
9445     + MICB_1_VAL_MICB_OUT_VAL_MASK,
9446     + MICB_VOLTAGE_REGVAL(wcd->micbias_mv));
9447     /*
9448     * Special headset needs MICBIAS as 2.7V so wait for
9449     * 50 msec for the MICBIAS to reach 2.7 volts.
9450     @@ -1241,6 +1242,8 @@ static const struct of_device_id pm8916_wcd_analog_spmi_match_table[] = {
9451     { }
9452     };
9453    
9454     +MODULE_DEVICE_TABLE(of, pm8916_wcd_analog_spmi_match_table);
9455     +
9456     static struct platform_driver pm8916_wcd_analog_spmi_driver = {
9457     .driver = {
9458     .name = "qcom,pm8916-wcd-spmi-codec",
9459     diff --git a/sound/soc/img/img-parallel-out.c b/sound/soc/img/img-parallel-out.c
9460     index 23b0f0f6ec9c..2fc8a6372206 100644
9461     --- a/sound/soc/img/img-parallel-out.c
9462     +++ b/sound/soc/img/img-parallel-out.c
9463     @@ -164,9 +164,11 @@ static int img_prl_out_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
9464     return -EINVAL;
9465     }
9466    
9467     + pm_runtime_get_sync(prl->dev);
9468     reg = img_prl_out_readl(prl, IMG_PRL_OUT_CTL);
9469     reg = (reg & ~IMG_PRL_OUT_CTL_EDGE_MASK) | control_set;
9470     img_prl_out_writel(prl, reg, IMG_PRL_OUT_CTL);
9471     + pm_runtime_put(prl->dev);
9472    
9473     return 0;
9474     }
9475     diff --git a/tools/objtool/check.c b/tools/objtool/check.c
9476     index c0e26ad1fa7e..9b341584eb1b 100644
9477     --- a/tools/objtool/check.c
9478     +++ b/tools/objtool/check.c
9479     @@ -1757,11 +1757,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
9480     if (insn->dead_end)
9481     return 0;
9482    
9483     - insn = next_insn;
9484     - if (!insn) {
9485     + if (!next_insn) {
9486     + if (state.cfa.base == CFI_UNDEFINED)
9487     + return 0;
9488     WARN("%s: unexpected end of section", sec->name);
9489     return 1;
9490     }
9491     +
9492     + insn = next_insn;
9493     }
9494    
9495     return 0;
9496     diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
9497     index 31e0f9143840..07f329919828 100644
9498     --- a/tools/objtool/objtool.c
9499     +++ b/tools/objtool/objtool.c
9500     @@ -70,7 +70,7 @@ static void cmd_usage(void)
9501    
9502     printf("\n");
9503    
9504     - exit(1);
9505     + exit(129);
9506     }
9507    
9508     static void handle_options(int *argc, const char ***argv)
9509     @@ -86,9 +86,7 @@ static void handle_options(int *argc, const char ***argv)
9510     break;
9511     } else {
9512     fprintf(stderr, "Unknown option: %s\n", cmd);
9513     - fprintf(stderr, "\n Usage: %s\n",
9514     - objtool_usage_string);
9515     - exit(1);
9516     + cmd_usage();
9517     }
9518    
9519     (*argv)++;
9520     diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
9521     index 64ae21f64489..7a2d221c4702 100644
9522     --- a/tools/testing/selftests/bpf/test_verifier.c
9523     +++ b/tools/testing/selftests/bpf/test_verifier.c
9524     @@ -606,7 +606,6 @@ static struct bpf_test tests[] = {
9525     },
9526     .errstr = "misaligned stack access",
9527     .result = REJECT,
9528     - .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
9529     },
9530     {
9531     "invalid map_fd for function call",
9532     @@ -1797,7 +1796,6 @@ static struct bpf_test tests[] = {
9533     },
9534     .result = REJECT,
9535     .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
9536     - .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
9537     },
9538     {
9539     "PTR_TO_STACK store/load - bad alignment on reg",
9540     @@ -1810,7 +1808,6 @@ static struct bpf_test tests[] = {
9541     },
9542     .result = REJECT,
9543     .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
9544     - .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
9545     },
9546     {
9547     "PTR_TO_STACK store/load - out of bounds low",
9548     @@ -6115,7 +6112,7 @@ static struct bpf_test tests[] = {
9549     BPF_EXIT_INSN(),
9550     },
9551     .fixup_map1 = { 3 },
9552     - .errstr = "R0 min value is negative",
9553     + .errstr = "unbounded min value",
9554     .result = REJECT,
9555     },
9556     {
9557     @@ -6139,7 +6136,7 @@ static struct bpf_test tests[] = {
9558     BPF_EXIT_INSN(),
9559     },
9560     .fixup_map1 = { 3 },
9561     - .errstr = "R0 min value is negative",
9562     + .errstr = "unbounded min value",
9563     .result = REJECT,
9564     },
9565     {
9566     @@ -6165,7 +6162,7 @@ static struct bpf_test tests[] = {
9567     BPF_EXIT_INSN(),
9568     },
9569     .fixup_map1 = { 3 },
9570     - .errstr = "R8 invalid mem access 'inv'",
9571     + .errstr = "unbounded min value",
9572     .result = REJECT,
9573     },
9574     {
9575     @@ -6190,7 +6187,7 @@ static struct bpf_test tests[] = {
9576     BPF_EXIT_INSN(),
9577     },
9578     .fixup_map1 = { 3 },
9579     - .errstr = "R8 invalid mem access 'inv'",
9580     + .errstr = "unbounded min value",
9581     .result = REJECT,
9582     },
9583     {
9584     @@ -6238,7 +6235,7 @@ static struct bpf_test tests[] = {
9585     BPF_EXIT_INSN(),
9586     },
9587     .fixup_map1 = { 3 },
9588     - .errstr = "R0 min value is negative",
9589     + .errstr = "unbounded min value",
9590     .result = REJECT,
9591     },
9592     {
9593     @@ -6309,7 +6306,7 @@ static struct bpf_test tests[] = {
9594     BPF_EXIT_INSN(),
9595     },
9596     .fixup_map1 = { 3 },
9597     - .errstr = "R0 min value is negative",
9598     + .errstr = "unbounded min value",
9599     .result = REJECT,
9600     },
9601     {
9602     @@ -6360,7 +6357,7 @@ static struct bpf_test tests[] = {
9603     BPF_EXIT_INSN(),
9604     },
9605     .fixup_map1 = { 3 },
9606     - .errstr = "R0 min value is negative",
9607     + .errstr = "unbounded min value",
9608     .result = REJECT,
9609     },
9610     {
9611     @@ -6387,7 +6384,7 @@ static struct bpf_test tests[] = {
9612     BPF_EXIT_INSN(),
9613     },
9614     .fixup_map1 = { 3 },
9615     - .errstr = "R0 min value is negative",
9616     + .errstr = "unbounded min value",
9617     .result = REJECT,
9618     },
9619     {
9620     @@ -6413,7 +6410,7 @@ static struct bpf_test tests[] = {
9621     BPF_EXIT_INSN(),
9622     },
9623     .fixup_map1 = { 3 },
9624     - .errstr = "R0 min value is negative",
9625     + .errstr = "unbounded min value",
9626     .result = REJECT,
9627     },
9628     {
9629     @@ -6442,7 +6439,7 @@ static struct bpf_test tests[] = {
9630     BPF_EXIT_INSN(),
9631     },
9632     .fixup_map1 = { 3 },
9633     - .errstr = "R0 min value is negative",
9634     + .errstr = "unbounded min value",
9635     .result = REJECT,
9636     },
9637     {
9638     @@ -6472,7 +6469,7 @@ static struct bpf_test tests[] = {
9639     BPF_JMP_IMM(BPF_JA, 0, 0, -7),
9640     },
9641     .fixup_map1 = { 4 },
9642     - .errstr = "R0 min value is negative",
9643     + .errstr = "unbounded min value",
9644     .result = REJECT,
9645     },
9646     {
9647     @@ -6500,8 +6497,7 @@ static struct bpf_test tests[] = {
9648     BPF_EXIT_INSN(),
9649     },
9650     .fixup_map1 = { 3 },
9651     - .errstr_unpriv = "R0 pointer comparison prohibited",
9652     - .errstr = "R0 min value is negative",
9653     + .errstr = "unbounded min value",
9654     .result = REJECT,
9655     .result_unpriv = REJECT,
9656     },
9657     @@ -6556,6 +6552,462 @@ static struct bpf_test tests[] = {
9658     .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
9659     .result = REJECT,
9660     },
9661     + {
9662     + "bounds check based on zero-extended MOV",
9663     + .insns = {
9664     + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9665     + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9666     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9667     + BPF_LD_MAP_FD(BPF_REG_1, 0),
9668     + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9669     + BPF_FUNC_map_lookup_elem),
9670     + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
9671     + /* r2 = 0x0000'0000'ffff'ffff */
9672     + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff),
9673     + /* r2 = 0 */
9674     + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
9675     + /* no-op */
9676     + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
9677     + /* access at offset 0 */
9678     + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
9679     + /* exit */
9680     + BPF_MOV64_IMM(BPF_REG_0, 0),
9681     + BPF_EXIT_INSN(),
9682     + },
9683     + .fixup_map1 = { 3 },
9684     + .result = ACCEPT
9685     + },
9686     + {
9687     + "bounds check based on sign-extended MOV. test1",
9688     + .insns = {
9689     + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9690     + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9691     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9692     + BPF_LD_MAP_FD(BPF_REG_1, 0),
9693     + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9694     + BPF_FUNC_map_lookup_elem),
9695     + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
9696     + /* r2 = 0xffff'ffff'ffff'ffff */
9697     + BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
9698     + /* r2 = 0xffff'ffff */
9699     + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
9700     + /* r0 = <oob pointer> */
9701     + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
9702     + /* access to OOB pointer */
9703     + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
9704     + /* exit */
9705     + BPF_MOV64_IMM(BPF_REG_0, 0),
9706     + BPF_EXIT_INSN(),
9707     + },
9708     + .fixup_map1 = { 3 },
9709     + .errstr = "map_value pointer and 4294967295",
9710     + .result = REJECT
9711     + },
9712     + {
9713     + "bounds check based on sign-extended MOV. test2",
9714     + .insns = {
9715     + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9716     + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9717     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9718     + BPF_LD_MAP_FD(BPF_REG_1, 0),
9719     + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9720     + BPF_FUNC_map_lookup_elem),
9721     + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
9722     + /* r2 = 0xffff'ffff'ffff'ffff */
9723     + BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
9724     + /* r2 = 0xfff'ffff */
9725     + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36),
9726     + /* r0 = <oob pointer> */
9727     + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
9728     + /* access to OOB pointer */
9729     + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
9730     + /* exit */
9731     + BPF_MOV64_IMM(BPF_REG_0, 0),
9732     + BPF_EXIT_INSN(),
9733     + },
9734     + .fixup_map1 = { 3 },
9735     + .errstr = "R0 min value is outside of the array range",
9736     + .result = REJECT
9737     + },
9738     + {
9739     + "bounds check based on reg_off + var_off + insn_off. test1",
9740     + .insns = {
9741     + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
9742     + offsetof(struct __sk_buff, mark)),
9743     + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9744     + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9745     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9746     + BPF_LD_MAP_FD(BPF_REG_1, 0),
9747     + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9748     + BPF_FUNC_map_lookup_elem),
9749     + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
9750     + BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
9751     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1),
9752     + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
9753     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
9754     + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
9755     + BPF_MOV64_IMM(BPF_REG_0, 0),
9756     + BPF_EXIT_INSN(),
9757     + },
9758     + .fixup_map1 = { 4 },
9759     + .errstr = "value_size=8 off=1073741825",
9760     + .result = REJECT,
9761     + .prog_type = BPF_PROG_TYPE_SCHED_CLS,
9762     + },
9763     + {
9764     + "bounds check based on reg_off + var_off + insn_off. test2",
9765     + .insns = {
9766     + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
9767     + offsetof(struct __sk_buff, mark)),
9768     + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9769     + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9770     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9771     + BPF_LD_MAP_FD(BPF_REG_1, 0),
9772     + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9773     + BPF_FUNC_map_lookup_elem),
9774     + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
9775     + BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
9776     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1),
9777     + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
9778     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
9779     + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
9780     + BPF_MOV64_IMM(BPF_REG_0, 0),
9781     + BPF_EXIT_INSN(),
9782     + },
9783     + .fixup_map1 = { 4 },
9784     + .errstr = "value 1073741823",
9785     + .result = REJECT,
9786     + .prog_type = BPF_PROG_TYPE_SCHED_CLS,
9787     + },
9788     + {
9789     + "bounds check after truncation of non-boundary-crossing range",
9790     + .insns = {
9791     + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9792     + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9793     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9794     + BPF_LD_MAP_FD(BPF_REG_1, 0),
9795     + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9796     + BPF_FUNC_map_lookup_elem),
9797     + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
9798     + /* r1 = [0x00, 0xff] */
9799     + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
9800     + BPF_MOV64_IMM(BPF_REG_2, 1),
9801     + /* r2 = 0x10'0000'0000 */
9802     + BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36),
9803     + /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */
9804     + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
9805     + /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */
9806     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
9807     + /* r1 = [0x00, 0xff] */
9808     + BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff),
9809     + /* r1 = 0 */
9810     + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
9811     + /* no-op */
9812     + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
9813     + /* access at offset 0 */
9814     + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
9815     + /* exit */
9816     + BPF_MOV64_IMM(BPF_REG_0, 0),
9817     + BPF_EXIT_INSN(),
9818     + },
9819     + .fixup_map1 = { 3 },
9820     + .result = ACCEPT
9821     + },
9822     + {
9823     + "bounds check after truncation of boundary-crossing range (1)",
9824     + .insns = {
9825     + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9826     + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9827     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9828     + BPF_LD_MAP_FD(BPF_REG_1, 0),
9829     + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9830     + BPF_FUNC_map_lookup_elem),
9831     + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
9832     + /* r1 = [0x00, 0xff] */
9833     + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
9834     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
9835     + /* r1 = [0xffff'ff80, 0x1'0000'007f] */
9836     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
9837     + /* r1 = [0xffff'ff80, 0xffff'ffff] or
9838     + * [0x0000'0000, 0x0000'007f]
9839     + */
9840     + BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0),
9841     + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
9842     + /* r1 = [0x00, 0xff] or
9843     + * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
9844     + */
9845     + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
9846     + /* r1 = 0 or
9847     + * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
9848     + */
9849     + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
9850     + /* no-op or OOB pointer computation */
9851     + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
9852     + /* potentially OOB access */
9853     + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
9854     + /* exit */
9855     + BPF_MOV64_IMM(BPF_REG_0, 0),
9856     + BPF_EXIT_INSN(),
9857     + },
9858     + .fixup_map1 = { 3 },
9859     + /* not actually fully unbounded, but the bound is very high */
9860     + .errstr = "R0 unbounded memory access",
9861     + .result = REJECT
9862     + },
9863     + {
9864     + "bounds check after truncation of boundary-crossing range (2)",
9865     + .insns = {
9866     + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9867     + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9868     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9869     + BPF_LD_MAP_FD(BPF_REG_1, 0),
9870     + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9871     + BPF_FUNC_map_lookup_elem),
9872     + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
9873     + /* r1 = [0x00, 0xff] */
9874     + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
9875     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
9876     + /* r1 = [0xffff'ff80, 0x1'0000'007f] */
9877     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
9878     + /* r1 = [0xffff'ff80, 0xffff'ffff] or
9879     + * [0x0000'0000, 0x0000'007f]
9880     + * difference to previous test: truncation via MOV32
9881     + * instead of ALU32.
9882     + */
9883     + BPF_MOV32_REG(BPF_REG_1, BPF_REG_1),
9884     + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
9885     + /* r1 = [0x00, 0xff] or
9886     + * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
9887     + */
9888     + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
9889     + /* r1 = 0 or
9890     + * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
9891     + */
9892     + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
9893     + /* no-op or OOB pointer computation */
9894     + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
9895     + /* potentially OOB access */
9896     + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
9897     + /* exit */
9898     + BPF_MOV64_IMM(BPF_REG_0, 0),
9899     + BPF_EXIT_INSN(),
9900     + },
9901     + .fixup_map1 = { 3 },
9902     + /* not actually fully unbounded, but the bound is very high */
9903     + .errstr = "R0 unbounded memory access",
9904     + .result = REJECT
9905     + },
9906     + {
9907     + "bounds check after wrapping 32-bit addition",
9908     + .insns = {
9909     + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9910     + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9911     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9912     + BPF_LD_MAP_FD(BPF_REG_1, 0),
9913     + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9914     + BPF_FUNC_map_lookup_elem),
9915     + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
9916     + /* r1 = 0x7fff'ffff */
9917     + BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff),
9918     + /* r1 = 0xffff'fffe */
9919     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
9920     + /* r1 = 0 */
9921     + BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2),
9922     + /* no-op */
9923     + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
9924     + /* access at offset 0 */
9925     + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
9926     + /* exit */
9927     + BPF_MOV64_IMM(BPF_REG_0, 0),
9928     + BPF_EXIT_INSN(),
9929     + },
9930     + .fixup_map1 = { 3 },
9931     + .result = ACCEPT
9932     + },
9933     + {
9934     + "bounds check after shift with oversized count operand",
9935     + .insns = {
9936     + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9937     + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9938     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9939     + BPF_LD_MAP_FD(BPF_REG_1, 0),
9940     + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9941     + BPF_FUNC_map_lookup_elem),
9942     + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
9943     + BPF_MOV64_IMM(BPF_REG_2, 32),
9944     + BPF_MOV64_IMM(BPF_REG_1, 1),
9945     + /* r1 = (u32)1 << (u32)32 = ? */
9946     + BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2),
9947     + /* r1 = [0x0000, 0xffff] */
9948     + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff),
9949     + /* computes unknown pointer, potentially OOB */
9950     + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
9951     + /* potentially OOB access */
9952     + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
9953     + /* exit */
9954     + BPF_MOV64_IMM(BPF_REG_0, 0),
9955     + BPF_EXIT_INSN(),
9956     + },
9957     + .fixup_map1 = { 3 },
9958     + .errstr = "R0 max value is outside of the array range",
9959     + .result = REJECT
9960     + },
9961     + {
9962     + "bounds check after right shift of maybe-negative number",
9963     + .insns = {
9964     + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9965     + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9966     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9967     + BPF_LD_MAP_FD(BPF_REG_1, 0),
9968     + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9969     + BPF_FUNC_map_lookup_elem),
9970     + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
9971     + /* r1 = [0x00, 0xff] */
9972     + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
9973     + /* r1 = [-0x01, 0xfe] */
9974     + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
9975     + /* r1 = 0 or 0xff'ffff'ffff'ffff */
9976     + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
9977     + /* r1 = 0 or 0xffff'ffff'ffff */
9978     + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
9979     + /* computes unknown pointer, potentially OOB */
9980     + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
9981     + /* potentially OOB access */
9982     + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
9983     + /* exit */
9984     + BPF_MOV64_IMM(BPF_REG_0, 0),
9985     + BPF_EXIT_INSN(),
9986     + },
9987     + .fixup_map1 = { 3 },
9988     + .errstr = "R0 unbounded memory access",
9989     + .result = REJECT
9990     + },
9991     + {
9992     + "bounds check map access with off+size signed 32bit overflow. test1",
9993     + .insns = {
9994     + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9995     + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9996     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9997     + BPF_LD_MAP_FD(BPF_REG_1, 0),
9998     + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9999     + BPF_FUNC_map_lookup_elem),
10000     + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
10001     + BPF_EXIT_INSN(),
10002     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe),
10003     + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
10004     + BPF_JMP_A(0),
10005     + BPF_EXIT_INSN(),
10006     + },
10007     + .fixup_map1 = { 3 },
10008     + .errstr = "map_value pointer and 2147483646",
10009     + .result = REJECT
10010     + },
10011     + {
10012     + "bounds check map access with off+size signed 32bit overflow. test2",
10013     + .insns = {
10014     + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
10015     + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
10016     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
10017     + BPF_LD_MAP_FD(BPF_REG_1, 0),
10018     + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
10019     + BPF_FUNC_map_lookup_elem),
10020     + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
10021     + BPF_EXIT_INSN(),
10022     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
10023     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
10024     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
10025     + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
10026     + BPF_JMP_A(0),
10027     + BPF_EXIT_INSN(),
10028     + },
10029     + .fixup_map1 = { 3 },
10030     + .errstr = "pointer offset 1073741822",
10031     + .result = REJECT
10032     + },
10033     + {
10034     + "bounds check map access with off+size signed 32bit overflow. test3",
10035     + .insns = {
10036     + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
10037     + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
10038     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
10039     + BPF_LD_MAP_FD(BPF_REG_1, 0),
10040     + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
10041     + BPF_FUNC_map_lookup_elem),
10042     + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
10043     + BPF_EXIT_INSN(),
10044     + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
10045     + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
10046     + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
10047     + BPF_JMP_A(0),
10048     + BPF_EXIT_INSN(),
10049     + },
10050     + .fixup_map1 = { 3 },
10051     + .errstr = "pointer offset -1073741822",
10052     + .result = REJECT
10053     + },
10054     + {
10055     + "bounds check map access with off+size signed 32bit overflow. test4",
10056     + .insns = {
10057     + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
10058     + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
10059     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
10060     + BPF_LD_MAP_FD(BPF_REG_1, 0),
10061     + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
10062     + BPF_FUNC_map_lookup_elem),
10063     + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
10064     + BPF_EXIT_INSN(),
10065     + BPF_MOV64_IMM(BPF_REG_1, 1000000),
10066     + BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000),
10067     + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
10068     + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
10069     + BPF_JMP_A(0),
10070     + BPF_EXIT_INSN(),
10071     + },
10072     + .fixup_map1 = { 3 },
10073     + .errstr = "map_value pointer and 1000000000000",
10074     + .result = REJECT
10075     + },
10076     + {
10077     + "pointer/scalar confusion in state equality check (way 1)",
10078     + .insns = {
10079     + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
10080     + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
10081     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
10082     + BPF_LD_MAP_FD(BPF_REG_1, 0),
10083     + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
10084     + BPF_FUNC_map_lookup_elem),
10085     + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
10086     + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
10087     + BPF_JMP_A(1),
10088     + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
10089     + BPF_JMP_A(0),
10090     + BPF_EXIT_INSN(),
10091     + },
10092     + .fixup_map1 = { 3 },
10093     + .result = ACCEPT,
10094     + .result_unpriv = REJECT,
10095     + .errstr_unpriv = "R0 leaks addr as return value"
10096     + },
10097     + {
10098     + "pointer/scalar confusion in state equality check (way 2)",
10099     + .insns = {
10100     + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
10101     + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
10102     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
10103     + BPF_LD_MAP_FD(BPF_REG_1, 0),
10104     + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
10105     + BPF_FUNC_map_lookup_elem),
10106     + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
10107     + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
10108     + BPF_JMP_A(1),
10109     + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
10110     + BPF_EXIT_INSN(),
10111     + },
10112     + .fixup_map1 = { 3 },
10113     + .result = ACCEPT,
10114     + .result_unpriv = REJECT,
10115     + .errstr_unpriv = "R0 leaks addr as return value"
10116     + },
10117     {
10118     "variable-offset ctx access",
10119     .insns = {
10120     @@ -6597,6 +7049,71 @@ static struct bpf_test tests[] = {
10121     .result = REJECT,
10122     .prog_type = BPF_PROG_TYPE_LWT_IN,
10123     },
10124     + {
10125     + "indirect variable-offset stack access",
10126     + .insns = {
10127     + /* Fill the top 8 bytes of the stack */
10128     + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
10129     + /* Get an unknown value */
10130     + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
10131     + /* Make it small and 4-byte aligned */
10132     + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
10133     + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
10134     + /* add it to fp. We now have either fp-4 or fp-8, but
10135     + * we don't know which
10136     + */
10137     + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
10138     + /* dereference it indirectly */
10139     + BPF_LD_MAP_FD(BPF_REG_1, 0),
10140     + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
10141     + BPF_FUNC_map_lookup_elem),
10142     + BPF_MOV64_IMM(BPF_REG_0, 0),
10143     + BPF_EXIT_INSN(),
10144     + },
10145     + .fixup_map1 = { 5 },
10146     + .errstr = "variable stack read R2",
10147     + .result = REJECT,
10148     + .prog_type = BPF_PROG_TYPE_LWT_IN,
10149     + },
10150     + {
10151     + "direct stack access with 32-bit wraparound. test1",
10152     + .insns = {
10153     + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
10154     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
10155     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
10156     + BPF_MOV32_IMM(BPF_REG_0, 0),
10157     + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
10158     + BPF_EXIT_INSN()
10159     + },
10160     + .errstr = "fp pointer and 2147483647",
10161     + .result = REJECT
10162     + },
10163     + {
10164     + "direct stack access with 32-bit wraparound. test2",
10165     + .insns = {
10166     + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
10167     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
10168     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
10169     + BPF_MOV32_IMM(BPF_REG_0, 0),
10170     + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
10171     + BPF_EXIT_INSN()
10172     + },
10173     + .errstr = "fp pointer and 1073741823",
10174     + .result = REJECT
10175     + },
10176     + {
10177     + "direct stack access with 32-bit wraparound. test3",
10178     + .insns = {
10179     + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
10180     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
10181     + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
10182     + BPF_MOV32_IMM(BPF_REG_0, 0),
10183     + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
10184     + BPF_EXIT_INSN()
10185     + },
10186     + .errstr = "fp pointer offset 1073741822",
10187     + .result = REJECT
10188     + },
10189     {
10190     "liveness pruning and write screening",
10191     .insns = {
10192     diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
10193     index 2afc41a3730f..66e5ce5b91f0 100644
10194     --- a/tools/testing/selftests/x86/ldt_gdt.c
10195     +++ b/tools/testing/selftests/x86/ldt_gdt.c
10196     @@ -137,30 +137,51 @@ static void check_valid_segment(uint16_t index, int ldt,
10197     }
10198     }
10199    
10200     -static bool install_valid_mode(const struct user_desc *desc, uint32_t ar,
10201     - bool oldmode)
10202     +static bool install_valid_mode(const struct user_desc *d, uint32_t ar,
10203     + bool oldmode, bool ldt)
10204     {
10205     - int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
10206     - desc, sizeof(*desc));
10207     - if (ret < -1)
10208     - errno = -ret;
10209     + struct user_desc desc = *d;
10210     + int ret;
10211     +
10212     + if (!ldt) {
10213     +#ifndef __i386__
10214     + /* No point testing set_thread_area in a 64-bit build */
10215     + return false;
10216     +#endif
10217     + if (!gdt_entry_num)
10218     + return false;
10219     + desc.entry_number = gdt_entry_num;
10220     +
10221     + ret = syscall(SYS_set_thread_area, &desc);
10222     + } else {
10223     + ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
10224     + &desc, sizeof(desc));
10225     +
10226     + if (ret < -1)
10227     + errno = -ret;
10228     +
10229     + if (ret != 0 && errno == ENOSYS) {
10230     + printf("[OK]\tmodify_ldt returned -ENOSYS\n");
10231     + return false;
10232     + }
10233     + }
10234     +
10235     if (ret == 0) {
10236     - uint32_t limit = desc->limit;
10237     - if (desc->limit_in_pages)
10238     + uint32_t limit = desc.limit;
10239     + if (desc.limit_in_pages)
10240     limit = (limit << 12) + 4095;
10241     - check_valid_segment(desc->entry_number, 1, ar, limit, true);
10242     + check_valid_segment(desc.entry_number, ldt, ar, limit, true);
10243     return true;
10244     - } else if (errno == ENOSYS) {
10245     - printf("[OK]\tmodify_ldt returned -ENOSYS\n");
10246     - return false;
10247     } else {
10248     - if (desc->seg_32bit) {
10249     - printf("[FAIL]\tUnexpected modify_ldt failure %d\n",
10250     + if (desc.seg_32bit) {
10251     + printf("[FAIL]\tUnexpected %s failure %d\n",
10252     + ldt ? "modify_ldt" : "set_thread_area",
10253     errno);
10254     nerrs++;
10255     return false;
10256     } else {
10257     - printf("[OK]\tmodify_ldt rejected 16 bit segment\n");
10258     + printf("[OK]\t%s rejected 16 bit segment\n",
10259     + ldt ? "modify_ldt" : "set_thread_area");
10260     return false;
10261     }
10262     }
10263     @@ -168,7 +189,15 @@ static bool install_valid_mode(const struct user_desc *desc, uint32_t ar,
10264    
10265     static bool install_valid(const struct user_desc *desc, uint32_t ar)
10266     {
10267     - return install_valid_mode(desc, ar, false);
10268     + bool ret = install_valid_mode(desc, ar, false, true);
10269     +
10270     + if (desc->contents <= 1 && desc->seg_32bit &&
10271     + !desc->seg_not_present) {
10272     + /* Should work in the GDT, too. */
10273     + install_valid_mode(desc, ar, false, false);
10274     + }
10275     +
10276     + return ret;
10277     }
10278    
10279     static void install_invalid(const struct user_desc *desc, bool oldmode)
10280     diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
10281     index 484e8820c382..2447d7c017e7 100644
10282     --- a/virt/kvm/kvm_main.c
10283     +++ b/virt/kvm/kvm_main.c
10284     @@ -4018,7 +4018,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
10285     if (!vcpu_align)
10286     vcpu_align = __alignof__(struct kvm_vcpu);
10287     kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align,
10288     - 0, NULL);
10289     + SLAB_ACCOUNT, NULL);
10290     if (!kvm_vcpu_cache) {
10291     r = -ENOMEM;
10292     goto out_free_3;