Magellan Linux

Contents of /trunk/kernel-alx/patches-4.14/0108-4.14.9-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3238 - (show annotations) (download)
Fri Nov 9 12:14:58 2018 UTC (5 years, 5 months ago) by niro
File size: 356090 byte(s)
-added up to patches-4.14.79
1 diff --git a/Documentation/x86/orc-unwinder.txt b/Documentation/x86/orc-unwinder.txt
2 index af0c9a4c65a6..cd4b29be29af 100644
3 --- a/Documentation/x86/orc-unwinder.txt
4 +++ b/Documentation/x86/orc-unwinder.txt
5 @@ -4,7 +4,7 @@ ORC unwinder
6 Overview
7 --------
8
9 -The kernel CONFIG_ORC_UNWINDER option enables the ORC unwinder, which is
10 +The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
11 similar in concept to a DWARF unwinder. The difference is that the
12 format of the ORC data is much simpler than DWARF, which in turn allows
13 the ORC unwinder to be much simpler and faster.
14 diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
15 index b0798e281aa6..3448e675b462 100644
16 --- a/Documentation/x86/x86_64/mm.txt
17 +++ b/Documentation/x86/x86_64/mm.txt
18 @@ -34,7 +34,7 @@ ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
19 ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
20 ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
21 ... unused hole ...
22 -ffd8000000000000 - fff7ffffffffffff (=53 bits) kasan shadow memory (8PB)
23 +ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
24 ... unused hole ...
25 ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
26 ... unused hole ...
27 diff --git a/Makefile b/Makefile
28 index 97b5ae76ac8c..ed2132c6d286 100644
29 --- a/Makefile
30 +++ b/Makefile
31 @@ -1,7 +1,7 @@
32 # SPDX-License-Identifier: GPL-2.0
33 VERSION = 4
34 PATCHLEVEL = 14
35 -SUBLEVEL = 8
36 +SUBLEVEL = 9
37 EXTRAVERSION =
38 NAME = Petit Gorille
39
40 @@ -935,8 +935,8 @@ ifdef CONFIG_STACK_VALIDATION
41 ifeq ($(has_libelf),1)
42 objtool_target := tools/objtool FORCE
43 else
44 - ifdef CONFIG_ORC_UNWINDER
45 - $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
46 + ifdef CONFIG_UNWINDER_ORC
47 + $(error "Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
48 else
49 $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
50 endif
51 diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
52 index 8c2a2619971b..f1d7834990ec 100644
53 --- a/arch/arm/configs/exynos_defconfig
54 +++ b/arch/arm/configs/exynos_defconfig
55 @@ -244,7 +244,7 @@ CONFIG_USB_STORAGE_ONETOUCH=m
56 CONFIG_USB_STORAGE_KARMA=m
57 CONFIG_USB_STORAGE_CYPRESS_ATACB=m
58 CONFIG_USB_STORAGE_ENE_UB6250=m
59 -CONFIG_USB_UAS=m
60 +CONFIG_USB_UAS=y
61 CONFIG_USB_DWC3=y
62 CONFIG_USB_DWC2=y
63 CONFIG_USB_HSIC_USB3503=y
64 diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
65 index e9c9a117bd25..c7cdbb43ae7c 100644
66 --- a/arch/arm/include/asm/ptrace.h
67 +++ b/arch/arm/include/asm/ptrace.h
68 @@ -126,8 +126,7 @@ extern unsigned long profile_pc(struct pt_regs *regs);
69 /*
70 * kprobe-based event tracer support
71 */
72 -#include <linux/stddef.h>
73 -#include <linux/types.h>
74 +#include <linux/compiler.h>
75 #define MAX_REG_OFFSET (offsetof(struct pt_regs, ARM_ORIG_r0))
76
77 extern int regs_query_register_offset(const char *name);
78 diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
79 index caf86be815ba..4052ec39e8db 100644
80 --- a/arch/arm64/include/asm/fixmap.h
81 +++ b/arch/arm64/include/asm/fixmap.h
82 @@ -51,6 +51,13 @@ enum fixed_addresses {
83
84 FIX_EARLYCON_MEM_BASE,
85 FIX_TEXT_POKE0,
86 +
87 +#ifdef CONFIG_ACPI_APEI_GHES
88 + /* Used for GHES mapping from assorted contexts */
89 + FIX_APEI_GHES_IRQ,
90 + FIX_APEI_GHES_NMI,
91 +#endif /* CONFIG_ACPI_APEI_GHES */
92 +
93 __end_of_permanent_fixed_addresses,
94
95 /*
96 diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
97 index 57190f384f63..ce848ff84edd 100644
98 --- a/arch/powerpc/kernel/watchdog.c
99 +++ b/arch/powerpc/kernel/watchdog.c
100 @@ -276,9 +276,12 @@ void arch_touch_nmi_watchdog(void)
101 {
102 unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
103 int cpu = smp_processor_id();
104 + u64 tb = get_tb();
105
106 - if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks)
107 - watchdog_timer_interrupt(cpu);
108 + if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {
109 + per_cpu(wd_timer_tb, cpu) = tb;
110 + wd_smp_clear_cpu_pending(cpu, tb);
111 + }
112 }
113 EXPORT_SYMBOL(arch_touch_nmi_watchdog);
114
115 diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
116 index a66e64b0b251..5d115bd32539 100644
117 --- a/arch/powerpc/net/bpf_jit_comp64.c
118 +++ b/arch/powerpc/net/bpf_jit_comp64.c
119 @@ -762,7 +762,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
120 func = (u8 *) __bpf_call_base + imm;
121
122 /* Save skb pointer if we need to re-cache skb data */
123 - if (bpf_helper_changes_pkt_data(func))
124 + if ((ctx->seen & SEEN_SKB) &&
125 + bpf_helper_changes_pkt_data(func))
126 PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
127
128 bpf_jit_emit_func_call(image, ctx, (u64)func);
129 @@ -771,7 +772,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
130 PPC_MR(b2p[BPF_REG_0], 3);
131
132 /* refresh skb cache */
133 - if (bpf_helper_changes_pkt_data(func)) {
134 + if ((ctx->seen & SEEN_SKB) &&
135 + bpf_helper_changes_pkt_data(func)) {
136 /* reload skb pointer to r3 */
137 PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
138 bpf_jit_emit_skb_loads(image, ctx);
139 diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
140 index c008083fbc4f..2c8b325591cc 100644
141 --- a/arch/powerpc/xmon/xmon.c
142 +++ b/arch/powerpc/xmon/xmon.c
143 @@ -530,14 +530,19 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
144
145 waiting:
146 secondary = 1;
147 + spin_begin();
148 while (secondary && !xmon_gate) {
149 if (in_xmon == 0) {
150 - if (fromipi)
151 + if (fromipi) {
152 + spin_end();
153 goto leave;
154 + }
155 secondary = test_and_set_bit(0, &in_xmon);
156 }
157 - barrier();
158 + spin_cpu_relax();
159 + touch_nmi_watchdog();
160 }
161 + spin_end();
162
163 if (!secondary && !xmon_gate) {
164 /* we are the first cpu to come in */
165 @@ -568,21 +573,25 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
166 mb();
167 xmon_gate = 1;
168 barrier();
169 + touch_nmi_watchdog();
170 }
171
172 cmdloop:
173 while (in_xmon) {
174 if (secondary) {
175 + spin_begin();
176 if (cpu == xmon_owner) {
177 if (!test_and_set_bit(0, &xmon_taken)) {
178 secondary = 0;
179 + spin_end();
180 continue;
181 }
182 /* missed it */
183 while (cpu == xmon_owner)
184 - barrier();
185 + spin_cpu_relax();
186 }
187 - barrier();
188 + spin_cpu_relax();
189 + touch_nmi_watchdog();
190 } else {
191 cmd = cmds(regs);
192 if (cmd != 0) {
193 diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
194 index b15cd2f0320f..33e2785f6842 100644
195 --- a/arch/s390/net/bpf_jit_comp.c
196 +++ b/arch/s390/net/bpf_jit_comp.c
197 @@ -55,8 +55,7 @@ struct bpf_jit {
198 #define SEEN_LITERAL 8 /* code uses literals */
199 #define SEEN_FUNC 16 /* calls C functions */
200 #define SEEN_TAIL_CALL 32 /* code uses tail calls */
201 -#define SEEN_SKB_CHANGE 64 /* code changes skb data */
202 -#define SEEN_REG_AX 128 /* code uses constant blinding */
203 +#define SEEN_REG_AX 64 /* code uses constant blinding */
204 #define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
205
206 /*
207 @@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit)
208 EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
209 REG_15, 152);
210 }
211 - if (jit->seen & SEEN_SKB)
212 + if (jit->seen & SEEN_SKB) {
213 emit_load_skb_data_hlen(jit);
214 - if (jit->seen & SEEN_SKB_CHANGE)
215 /* stg %b1,ST_OFF_SKBP(%r0,%r15) */
216 EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
217 STK_OFF_SKBP);
218 + }
219 }
220
221 /*
222 @@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
223 EMIT2(0x0d00, REG_14, REG_W1);
224 /* lgr %b0,%r2: load return value into %b0 */
225 EMIT4(0xb9040000, BPF_REG_0, REG_2);
226 - if (bpf_helper_changes_pkt_data((void *)func)) {
227 - jit->seen |= SEEN_SKB_CHANGE;
228 + if ((jit->seen & SEEN_SKB) &&
229 + bpf_helper_changes_pkt_data((void *)func)) {
230 /* lg %b1,ST_OFF_SKBP(%r15) */
231 EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
232 REG_15, STK_OFF_SKBP);
233 diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h
234 index 6a339a78f4f4..71dd82b43cc5 100644
235 --- a/arch/sparc/include/asm/ptrace.h
236 +++ b/arch/sparc/include/asm/ptrace.h
237 @@ -7,6 +7,7 @@
238 #if defined(__sparc__) && defined(__arch64__)
239 #ifndef __ASSEMBLY__
240
241 +#include <linux/compiler.h>
242 #include <linux/threads.h>
243 #include <asm/switch_to.h>
244
245 diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
246 index 5765e7e711f7..ff5f9cb3039a 100644
247 --- a/arch/sparc/net/bpf_jit_comp_64.c
248 +++ b/arch/sparc/net/bpf_jit_comp_64.c
249 @@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
250 u8 *func = ((u8 *)__bpf_call_base) + imm;
251
252 ctx->saw_call = true;
253 + if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
254 + emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
255
256 emit_call((u32 *)func, ctx);
257 emit_nop(ctx);
258
259 emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
260
261 - if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind)
262 - load_skb_regs(ctx, bpf2sparc[BPF_REG_6]);
263 + if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
264 + load_skb_regs(ctx, L7);
265 break;
266 }
267
268 diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
269 index 50a32c33d729..73c57f614c9e 100644
270 --- a/arch/um/include/asm/Kbuild
271 +++ b/arch/um/include/asm/Kbuild
272 @@ -1,4 +1,5 @@
273 generic-y += barrier.h
274 +generic-y += bpf_perf_event.h
275 generic-y += bug.h
276 generic-y += clkdev.h
277 generic-y += current.h
278 diff --git a/arch/um/include/shared/init.h b/arch/um/include/shared/init.h
279 index 390572daa40d..b3f5865a92c9 100644
280 --- a/arch/um/include/shared/init.h
281 +++ b/arch/um/include/shared/init.h
282 @@ -41,7 +41,7 @@
283 typedef int (*initcall_t)(void);
284 typedef void (*exitcall_t)(void);
285
286 -#include <linux/compiler.h>
287 +#include <linux/compiler_types.h>
288
289 /* These are for everybody (although not all archs will actually
290 discard it in modules) */
291 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
292 index 9bceea6a5852..48646160eb83 100644
293 --- a/arch/x86/Kconfig
294 +++ b/arch/x86/Kconfig
295 @@ -108,7 +108,7 @@ config X86
296 select HAVE_ARCH_AUDITSYSCALL
297 select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
298 select HAVE_ARCH_JUMP_LABEL
299 - select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP
300 + select HAVE_ARCH_KASAN if X86_64
301 select HAVE_ARCH_KGDB
302 select HAVE_ARCH_KMEMCHECK
303 select HAVE_ARCH_MMAP_RND_BITS if MMU
304 @@ -171,7 +171,7 @@ config X86
305 select HAVE_PERF_USER_STACK_DUMP
306 select HAVE_RCU_TABLE_FREE
307 select HAVE_REGS_AND_STACK_ACCESS_API
308 - select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION
309 + select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
310 select HAVE_STACK_VALIDATION if X86_64
311 select HAVE_SYSCALL_TRACEPOINTS
312 select HAVE_UNSTABLE_SCHED_CLOCK
313 @@ -303,7 +303,6 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
314 config KASAN_SHADOW_OFFSET
315 hex
316 depends on KASAN
317 - default 0xdff8000000000000 if X86_5LEVEL
318 default 0xdffffc0000000000
319
320 config HAVE_INTEL_TXT
321 diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
322 index 90b123056f4b..6293a8768a91 100644
323 --- a/arch/x86/Kconfig.debug
324 +++ b/arch/x86/Kconfig.debug
325 @@ -359,28 +359,14 @@ config PUNIT_ATOM_DEBUG
326
327 choice
328 prompt "Choose kernel unwinder"
329 - default FRAME_POINTER_UNWINDER
330 + default UNWINDER_ORC if X86_64
331 + default UNWINDER_FRAME_POINTER if X86_32
332 ---help---
333 This determines which method will be used for unwinding kernel stack
334 traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
335 livepatch, lockdep, and more.
336
337 -config FRAME_POINTER_UNWINDER
338 - bool "Frame pointer unwinder"
339 - select FRAME_POINTER
340 - ---help---
341 - This option enables the frame pointer unwinder for unwinding kernel
342 - stack traces.
343 -
344 - The unwinder itself is fast and it uses less RAM than the ORC
345 - unwinder, but the kernel text size will grow by ~3% and the kernel's
346 - overall performance will degrade by roughly 5-10%.
347 -
348 - This option is recommended if you want to use the livepatch
349 - consistency model, as this is currently the only way to get a
350 - reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
351 -
352 -config ORC_UNWINDER
353 +config UNWINDER_ORC
354 bool "ORC unwinder"
355 depends on X86_64
356 select STACK_VALIDATION
357 @@ -396,7 +382,22 @@ config ORC_UNWINDER
358 Enabling this option will increase the kernel's runtime memory usage
359 by roughly 2-4MB, depending on your kernel config.
360
361 -config GUESS_UNWINDER
362 +config UNWINDER_FRAME_POINTER
363 + bool "Frame pointer unwinder"
364 + select FRAME_POINTER
365 + ---help---
366 + This option enables the frame pointer unwinder for unwinding kernel
367 + stack traces.
368 +
369 + The unwinder itself is fast and it uses less RAM than the ORC
370 + unwinder, but the kernel text size will grow by ~3% and the kernel's
371 + overall performance will degrade by roughly 5-10%.
372 +
373 + This option is recommended if you want to use the livepatch
374 + consistency model, as this is currently the only way to get a
375 + reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
376 +
377 +config UNWINDER_GUESS
378 bool "Guess unwinder"
379 depends on EXPERT
380 ---help---
381 @@ -411,7 +412,7 @@ config GUESS_UNWINDER
382 endchoice
383
384 config FRAME_POINTER
385 - depends on !ORC_UNWINDER && !GUESS_UNWINDER
386 + depends on !UNWINDER_ORC && !UNWINDER_GUESS
387 bool
388
389 endmenu
390 diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
391 index 550cd5012b73..66c9e2aab16c 100644
392 --- a/arch/x86/configs/tiny.config
393 +++ b/arch/x86/configs/tiny.config
394 @@ -1,5 +1,5 @@
395 CONFIG_NOHIGHMEM=y
396 # CONFIG_HIGHMEM4G is not set
397 # CONFIG_HIGHMEM64G is not set
398 -CONFIG_GUESS_UNWINDER=y
399 -# CONFIG_FRAME_POINTER_UNWINDER is not set
400 +CONFIG_UNWINDER_GUESS=y
401 +# CONFIG_UNWINDER_FRAME_POINTER is not set
402 diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
403 index 4a4b16e56d35..e32fc1f274d8 100644
404 --- a/arch/x86/configs/x86_64_defconfig
405 +++ b/arch/x86/configs/x86_64_defconfig
406 @@ -299,6 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y
407 # CONFIG_DEBUG_RODATA_TEST is not set
408 CONFIG_DEBUG_BOOT_PARAMS=y
409 CONFIG_OPTIMIZE_INLINING=y
410 +CONFIG_UNWINDER_ORC=y
411 CONFIG_SECURITY=y
412 CONFIG_SECURITY_NETWORK=y
413 CONFIG_SECURITY_SELINUX=y
414 diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
415 index 6e160031cfea..3fd8bc560fae 100644
416 --- a/arch/x86/entry/calling.h
417 +++ b/arch/x86/entry/calling.h
418 @@ -142,56 +142,25 @@ For 32-bit we have the following conventions - kernel is built with
419 UNWIND_HINT_REGS offset=\offset
420 .endm
421
422 - .macro RESTORE_EXTRA_REGS offset=0
423 - movq 0*8+\offset(%rsp), %r15
424 - movq 1*8+\offset(%rsp), %r14
425 - movq 2*8+\offset(%rsp), %r13
426 - movq 3*8+\offset(%rsp), %r12
427 - movq 4*8+\offset(%rsp), %rbp
428 - movq 5*8+\offset(%rsp), %rbx
429 - UNWIND_HINT_REGS offset=\offset extra=0
430 - .endm
431 -
432 - .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
433 - .if \rstor_r11
434 - movq 6*8(%rsp), %r11
435 - .endif
436 - .if \rstor_r8910
437 - movq 7*8(%rsp), %r10
438 - movq 8*8(%rsp), %r9
439 - movq 9*8(%rsp), %r8
440 - .endif
441 - .if \rstor_rax
442 - movq 10*8(%rsp), %rax
443 - .endif
444 - .if \rstor_rcx
445 - movq 11*8(%rsp), %rcx
446 - .endif
447 - .if \rstor_rdx
448 - movq 12*8(%rsp), %rdx
449 - .endif
450 - movq 13*8(%rsp), %rsi
451 - movq 14*8(%rsp), %rdi
452 - UNWIND_HINT_IRET_REGS offset=16*8
453 - .endm
454 - .macro RESTORE_C_REGS
455 - RESTORE_C_REGS_HELPER 1,1,1,1,1
456 - .endm
457 - .macro RESTORE_C_REGS_EXCEPT_RAX
458 - RESTORE_C_REGS_HELPER 0,1,1,1,1
459 - .endm
460 - .macro RESTORE_C_REGS_EXCEPT_RCX
461 - RESTORE_C_REGS_HELPER 1,0,1,1,1
462 - .endm
463 - .macro RESTORE_C_REGS_EXCEPT_R11
464 - RESTORE_C_REGS_HELPER 1,1,0,1,1
465 - .endm
466 - .macro RESTORE_C_REGS_EXCEPT_RCX_R11
467 - RESTORE_C_REGS_HELPER 1,0,0,1,1
468 - .endm
469 -
470 - .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
471 - subq $-(15*8+\addskip), %rsp
472 + .macro POP_EXTRA_REGS
473 + popq %r15
474 + popq %r14
475 + popq %r13
476 + popq %r12
477 + popq %rbp
478 + popq %rbx
479 + .endm
480 +
481 + .macro POP_C_REGS
482 + popq %r11
483 + popq %r10
484 + popq %r9
485 + popq %r8
486 + popq %rax
487 + popq %rcx
488 + popq %rdx
489 + popq %rsi
490 + popq %rdi
491 .endm
492
493 .macro icebp
494 diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
495 index 4838037f97f6..bd8b57a5c874 100644
496 --- a/arch/x86/entry/entry_32.S
497 +++ b/arch/x86/entry/entry_32.S
498 @@ -941,7 +941,8 @@ ENTRY(debug)
499 movl %esp, %eax # pt_regs pointer
500
501 /* Are we currently on the SYSENTER stack? */
502 - PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
503 + movl PER_CPU_VAR(cpu_entry_area), %ecx
504 + addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
505 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
506 cmpl $SIZEOF_SYSENTER_stack, %ecx
507 jb .Ldebug_from_sysenter_stack
508 @@ -984,7 +985,8 @@ ENTRY(nmi)
509 movl %esp, %eax # pt_regs pointer
510
511 /* Are we currently on the SYSENTER stack? */
512 - PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
513 + movl PER_CPU_VAR(cpu_entry_area), %ecx
514 + addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
515 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
516 cmpl $SIZEOF_SYSENTER_stack, %ecx
517 jb .Lnmi_from_sysenter_stack
518 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
519 index 2e956afe272c..6abe3fcaece9 100644
520 --- a/arch/x86/entry/entry_64.S
521 +++ b/arch/x86/entry/entry_64.S
522 @@ -136,6 +136,64 @@ END(native_usergs_sysret64)
523 * with them due to bugs in both AMD and Intel CPUs.
524 */
525
526 + .pushsection .entry_trampoline, "ax"
527 +
528 +/*
529 + * The code in here gets remapped into cpu_entry_area's trampoline. This means
530 + * that the assembler and linker have the wrong idea as to where this code
531 + * lives (and, in fact, it's mapped more than once, so it's not even at a
532 + * fixed address). So we can't reference any symbols outside the entry
533 + * trampoline and expect it to work.
534 + *
535 + * Instead, we carefully abuse %rip-relative addressing.
536 + * _entry_trampoline(%rip) refers to the start of the remapped) entry
537 + * trampoline. We can thus find cpu_entry_area with this macro:
538 + */
539 +
540 +#define CPU_ENTRY_AREA \
541 + _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
542 +
543 +/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
544 +#define RSP_SCRATCH CPU_ENTRY_AREA_SYSENTER_stack + \
545 + SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
546 +
547 +ENTRY(entry_SYSCALL_64_trampoline)
548 + UNWIND_HINT_EMPTY
549 + swapgs
550 +
551 + /* Stash the user RSP. */
552 + movq %rsp, RSP_SCRATCH
553 +
554 + /* Load the top of the task stack into RSP */
555 + movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
556 +
557 + /* Start building the simulated IRET frame. */
558 + pushq $__USER_DS /* pt_regs->ss */
559 + pushq RSP_SCRATCH /* pt_regs->sp */
560 + pushq %r11 /* pt_regs->flags */
561 + pushq $__USER_CS /* pt_regs->cs */
562 + pushq %rcx /* pt_regs->ip */
563 +
564 + /*
565 + * x86 lacks a near absolute jump, and we can't jump to the real
566 + * entry text with a relative jump. We could push the target
567 + * address and then use retq, but this destroys the pipeline on
568 + * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead,
569 + * spill RDI and restore it in a second-stage trampoline.
570 + */
571 + pushq %rdi
572 + movq $entry_SYSCALL_64_stage2, %rdi
573 + jmp *%rdi
574 +END(entry_SYSCALL_64_trampoline)
575 +
576 + .popsection
577 +
578 +ENTRY(entry_SYSCALL_64_stage2)
579 + UNWIND_HINT_EMPTY
580 + popq %rdi
581 + jmp entry_SYSCALL_64_after_hwframe
582 +END(entry_SYSCALL_64_stage2)
583 +
584 ENTRY(entry_SYSCALL_64)
585 UNWIND_HINT_EMPTY
586 /*
587 @@ -221,10 +279,9 @@ entry_SYSCALL_64_fastpath:
588 TRACE_IRQS_ON /* user mode is traced as IRQs on */
589 movq RIP(%rsp), %rcx
590 movq EFLAGS(%rsp), %r11
591 - RESTORE_C_REGS_EXCEPT_RCX_R11
592 - movq RSP(%rsp), %rsp
593 + addq $6*8, %rsp /* skip extra regs -- they were preserved */
594 UNWIND_HINT_EMPTY
595 - USERGS_SYSRET64
596 + jmp .Lpop_c_regs_except_rcx_r11_and_sysret
597
598 1:
599 /*
600 @@ -246,17 +303,18 @@ entry_SYSCALL64_slow_path:
601 call do_syscall_64 /* returns with IRQs disabled */
602
603 return_from_SYSCALL_64:
604 - RESTORE_EXTRA_REGS
605 TRACE_IRQS_IRETQ /* we're about to change IF */
606
607 /*
608 * Try to use SYSRET instead of IRET if we're returning to
609 - * a completely clean 64-bit userspace context.
610 + * a completely clean 64-bit userspace context. If we're not,
611 + * go to the slow exit path.
612 */
613 movq RCX(%rsp), %rcx
614 movq RIP(%rsp), %r11
615 - cmpq %rcx, %r11 /* RCX == RIP */
616 - jne opportunistic_sysret_failed
617 +
618 + cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
619 + jne swapgs_restore_regs_and_return_to_usermode
620
621 /*
622 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
623 @@ -274,14 +332,14 @@ return_from_SYSCALL_64:
624
625 /* If this changed %rcx, it was not canonical */
626 cmpq %rcx, %r11
627 - jne opportunistic_sysret_failed
628 + jne swapgs_restore_regs_and_return_to_usermode
629
630 cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
631 - jne opportunistic_sysret_failed
632 + jne swapgs_restore_regs_and_return_to_usermode
633
634 movq R11(%rsp), %r11
635 cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
636 - jne opportunistic_sysret_failed
637 + jne swapgs_restore_regs_and_return_to_usermode
638
639 /*
640 * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
641 @@ -302,12 +360,12 @@ return_from_SYSCALL_64:
642 * would never get past 'stuck_here'.
643 */
644 testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
645 - jnz opportunistic_sysret_failed
646 + jnz swapgs_restore_regs_and_return_to_usermode
647
648 /* nothing to check for RSP */
649
650 cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
651 - jne opportunistic_sysret_failed
652 + jne swapgs_restore_regs_and_return_to_usermode
653
654 /*
655 * We win! This label is here just for ease of understanding
656 @@ -315,14 +373,36 @@ return_from_SYSCALL_64:
657 */
658 syscall_return_via_sysret:
659 /* rcx and r11 are already restored (see code above) */
660 - RESTORE_C_REGS_EXCEPT_RCX_R11
661 - movq RSP(%rsp), %rsp
662 UNWIND_HINT_EMPTY
663 - USERGS_SYSRET64
664 + POP_EXTRA_REGS
665 +.Lpop_c_regs_except_rcx_r11_and_sysret:
666 + popq %rsi /* skip r11 */
667 + popq %r10
668 + popq %r9
669 + popq %r8
670 + popq %rax
671 + popq %rsi /* skip rcx */
672 + popq %rdx
673 + popq %rsi
674
675 -opportunistic_sysret_failed:
676 - SWAPGS
677 - jmp restore_c_regs_and_iret
678 + /*
679 + * Now all regs are restored except RSP and RDI.
680 + * Save old stack pointer and switch to trampoline stack.
681 + */
682 + movq %rsp, %rdi
683 + movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
684 +
685 + pushq RSP-RDI(%rdi) /* RSP */
686 + pushq (%rdi) /* RDI */
687 +
688 + /*
689 + * We are on the trampoline stack. All regs except RDI are live.
690 + * We can do future final exit work right here.
691 + */
692 +
693 + popq %rdi
694 + popq %rsp
695 + USERGS_SYSRET64
696 END(entry_SYSCALL_64)
697
698 ENTRY(stub_ptregs_64)
699 @@ -423,8 +503,7 @@ ENTRY(ret_from_fork)
700 movq %rsp, %rdi
701 call syscall_return_slowpath /* returns with IRQs disabled */
702 TRACE_IRQS_ON /* user mode is traced as IRQS on */
703 - SWAPGS
704 - jmp restore_regs_and_iret
705 + jmp swapgs_restore_regs_and_return_to_usermode
706
707 1:
708 /* kernel thread */
709 @@ -457,12 +536,13 @@ END(irq_entries_start)
710
711 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
712 #ifdef CONFIG_DEBUG_ENTRY
713 - pushfq
714 - testl $X86_EFLAGS_IF, (%rsp)
715 + pushq %rax
716 + SAVE_FLAGS(CLBR_RAX)
717 + testl $X86_EFLAGS_IF, %eax
718 jz .Lokay_\@
719 ud2
720 .Lokay_\@:
721 - addq $8, %rsp
722 + popq %rax
723 #endif
724 .endm
725
726 @@ -554,6 +634,13 @@ END(irq_entries_start)
727 /* 0(%rsp): ~(interrupt number) */
728 .macro interrupt func
729 cld
730 +
731 + testb $3, CS-ORIG_RAX(%rsp)
732 + jz 1f
733 + SWAPGS
734 + call switch_to_thread_stack
735 +1:
736 +
737 ALLOC_PT_GPREGS_ON_STACK
738 SAVE_C_REGS
739 SAVE_EXTRA_REGS
740 @@ -563,12 +650,8 @@ END(irq_entries_start)
741 jz 1f
742
743 /*
744 - * IRQ from user mode. Switch to kernel gsbase and inform context
745 - * tracking that we're in kernel mode.
746 - */
747 - SWAPGS
748 -
749 - /*
750 + * IRQ from user mode.
751 + *
752 * We need to tell lockdep that IRQs are off. We can't do this until
753 * we fix gsbase, and we should do it before enter_from_user_mode
754 * (which can take locks). Since TRACE_IRQS_OFF idempotent,
755 @@ -612,8 +695,52 @@ GLOBAL(retint_user)
756 mov %rsp,%rdi
757 call prepare_exit_to_usermode
758 TRACE_IRQS_IRETQ
759 +
760 +GLOBAL(swapgs_restore_regs_and_return_to_usermode)
761 +#ifdef CONFIG_DEBUG_ENTRY
762 + /* Assert that pt_regs indicates user mode. */
763 + testb $3, CS(%rsp)
764 + jnz 1f
765 + ud2
766 +1:
767 +#endif
768 + POP_EXTRA_REGS
769 + popq %r11
770 + popq %r10
771 + popq %r9
772 + popq %r8
773 + popq %rax
774 + popq %rcx
775 + popq %rdx
776 + popq %rsi
777 +
778 + /*
779 + * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
780 + * Save old stack pointer and switch to trampoline stack.
781 + */
782 + movq %rsp, %rdi
783 + movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
784 +
785 + /* Copy the IRET frame to the trampoline stack. */
786 + pushq 6*8(%rdi) /* SS */
787 + pushq 5*8(%rdi) /* RSP */
788 + pushq 4*8(%rdi) /* EFLAGS */
789 + pushq 3*8(%rdi) /* CS */
790 + pushq 2*8(%rdi) /* RIP */
791 +
792 + /* Push user RDI on the trampoline stack. */
793 + pushq (%rdi)
794 +
795 + /*
796 + * We are on the trampoline stack. All regs except RDI are live.
797 + * We can do future final exit work right here.
798 + */
799 +
800 + /* Restore RDI. */
801 + popq %rdi
802 SWAPGS
803 - jmp restore_regs_and_iret
804 + INTERRUPT_RETURN
805 +
806
807 /* Returning to kernel space */
808 retint_kernel:
809 @@ -633,15 +760,17 @@ retint_kernel:
810 */
811 TRACE_IRQS_IRETQ
812
813 -/*
814 - * At this label, code paths which return to kernel and to user,
815 - * which come from interrupts/exception and from syscalls, merge.
816 - */
817 -GLOBAL(restore_regs_and_iret)
818 - RESTORE_EXTRA_REGS
819 -restore_c_regs_and_iret:
820 - RESTORE_C_REGS
821 - REMOVE_PT_GPREGS_FROM_STACK 8
822 +GLOBAL(restore_regs_and_return_to_kernel)
823 +#ifdef CONFIG_DEBUG_ENTRY
824 + /* Assert that pt_regs indicates kernel mode. */
825 + testb $3, CS(%rsp)
826 + jz 1f
827 + ud2
828 +1:
829 +#endif
830 + POP_EXTRA_REGS
831 + POP_C_REGS
832 + addq $8, %rsp /* skip regs->orig_ax */
833 INTERRUPT_RETURN
834
835 ENTRY(native_iret)
836 @@ -805,7 +934,33 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
837 /*
838 * Exception entry points.
839 */
840 -#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
841 +#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
842 +
843 +/*
844 + * Switch to the thread stack. This is called with the IRET frame and
845 + * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
846 + * space has not been allocated for them.)
847 + */
848 +ENTRY(switch_to_thread_stack)
849 + UNWIND_HINT_FUNC
850 +
851 + pushq %rdi
852 + movq %rsp, %rdi
853 + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
854 + UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
855 +
856 + pushq 7*8(%rdi) /* regs->ss */
857 + pushq 6*8(%rdi) /* regs->rsp */
858 + pushq 5*8(%rdi) /* regs->eflags */
859 + pushq 4*8(%rdi) /* regs->cs */
860 + pushq 3*8(%rdi) /* regs->ip */
861 + pushq 2*8(%rdi) /* regs->orig_ax */
862 + pushq 8(%rdi) /* return address */
863 + UNWIND_HINT_FUNC
864 +
865 + movq (%rdi), %rdi
866 + ret
867 +END(switch_to_thread_stack)
868
869 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
870 ENTRY(\sym)
871 @@ -818,17 +973,18 @@ ENTRY(\sym)
872
873 ASM_CLAC
874
875 - .ifeq \has_error_code
876 + .if \has_error_code == 0
877 pushq $-1 /* ORIG_RAX: no syscall to restart */
878 .endif
879
880 ALLOC_PT_GPREGS_ON_STACK
881
882 - .if \paranoid
883 - .if \paranoid == 1
884 + .if \paranoid < 2
885 testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
886 - jnz 1f
887 + jnz .Lfrom_usermode_switch_stack_\@
888 .endif
889 +
890 + .if \paranoid
891 call paranoid_entry
892 .else
893 call error_entry
894 @@ -870,20 +1026,15 @@ ENTRY(\sym)
895 jmp error_exit
896 .endif
897
898 - .if \paranoid == 1
899 + .if \paranoid < 2
900 /*
901 - * Paranoid entry from userspace. Switch stacks and treat it
902 + * Entry from userspace. Switch stacks and treat it
903 * as a normal entry. This means that paranoid handlers
904 * run in real process context if user_mode(regs).
905 */
906 -1:
907 +.Lfrom_usermode_switch_stack_\@:
908 call error_entry
909
910 -
911 - movq %rsp, %rdi /* pt_regs pointer */
912 - call sync_regs
913 - movq %rax, %rsp /* switch stack */
914 -
915 movq %rsp, %rdi /* pt_regs pointer */
916
917 .if \has_error_code
918 @@ -1059,6 +1210,7 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
919 idtentry stack_segment do_stack_segment has_error_code=1
920
921 #ifdef CONFIG_XEN
922 +idtentry xennmi do_nmi has_error_code=0
923 idtentry xendebug do_debug has_error_code=0
924 idtentry xenint3 do_int3 has_error_code=0
925 #endif
926 @@ -1112,17 +1264,14 @@ ENTRY(paranoid_exit)
927 DISABLE_INTERRUPTS(CLBR_ANY)
928 TRACE_IRQS_OFF_DEBUG
929 testl %ebx, %ebx /* swapgs needed? */
930 - jnz paranoid_exit_no_swapgs
931 + jnz .Lparanoid_exit_no_swapgs
932 TRACE_IRQS_IRETQ
933 SWAPGS_UNSAFE_STACK
934 - jmp paranoid_exit_restore
935 -paranoid_exit_no_swapgs:
936 + jmp .Lparanoid_exit_restore
937 +.Lparanoid_exit_no_swapgs:
938 TRACE_IRQS_IRETQ_DEBUG
939 -paranoid_exit_restore:
940 - RESTORE_EXTRA_REGS
941 - RESTORE_C_REGS
942 - REMOVE_PT_GPREGS_FROM_STACK 8
943 - INTERRUPT_RETURN
944 +.Lparanoid_exit_restore:
945 + jmp restore_regs_and_return_to_kernel
946 END(paranoid_exit)
947
948 /*
949 @@ -1146,6 +1295,14 @@ ENTRY(error_entry)
950 SWAPGS
951
952 .Lerror_entry_from_usermode_after_swapgs:
953 + /* Put us onto the real thread stack. */
954 + popq %r12 /* save return addr in %12 */
955 + movq %rsp, %rdi /* arg0 = pt_regs pointer */
956 + call sync_regs
957 + movq %rax, %rsp /* switch stack */
958 + ENCODE_FRAME_POINTER
959 + pushq %r12
960 +
961 /*
962 * We need to tell lockdep that IRQs are off. We can't do this until
963 * we fix gsbase, and we should do it before enter_from_user_mode
964 @@ -1223,10 +1380,13 @@ ENTRY(error_exit)
965 jmp retint_user
966 END(error_exit)
967
968 -/* Runs on exception stack */
969 -/* XXX: broken on Xen PV */
970 +/*
971 + * Runs on exception stack. Xen PV does not go through this path at all,
972 + * so we can use real assembly here.
973 + */
974 ENTRY(nmi)
975 UNWIND_HINT_IRET_REGS
976 +
977 /*
978 * We allow breakpoints in NMIs. If a breakpoint occurs, then
979 * the iretq it performs will take us out of NMI context.
980 @@ -1284,7 +1444,7 @@ ENTRY(nmi)
981 * stacks lest we corrupt the "NMI executing" variable.
982 */
983
984 - SWAPGS_UNSAFE_STACK
985 + swapgs
986 cld
987 movq %rsp, %rdx
988 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
989 @@ -1328,8 +1488,7 @@ ENTRY(nmi)
990 * Return back to user mode. We must *not* do the normal exit
991 * work, because we don't want to enable interrupts.
992 */
993 - SWAPGS
994 - jmp restore_regs_and_iret
995 + jmp swapgs_restore_regs_and_return_to_usermode
996
997 .Lnmi_from_kernel:
998 /*
999 @@ -1450,7 +1609,7 @@ nested_nmi_out:
1000 popq %rdx
1001
1002 /* We are returning to kernel mode, so this cannot result in a fault. */
1003 - INTERRUPT_RETURN
1004 + iretq
1005
1006 first_nmi:
1007 /* Restore rdx. */
1008 @@ -1481,7 +1640,7 @@ first_nmi:
1009 pushfq /* RFLAGS */
1010 pushq $__KERNEL_CS /* CS */
1011 pushq $1f /* RIP */
1012 - INTERRUPT_RETURN /* continues at repeat_nmi below */
1013 + iretq /* continues at repeat_nmi below */
1014 UNWIND_HINT_IRET_REGS
1015 1:
1016 #endif
1017 @@ -1544,29 +1703,34 @@ end_repeat_nmi:
1018 nmi_swapgs:
1019 SWAPGS_UNSAFE_STACK
1020 nmi_restore:
1021 - RESTORE_EXTRA_REGS
1022 - RESTORE_C_REGS
1023 + POP_EXTRA_REGS
1024 + POP_C_REGS
1025
1026 - /* Point RSP at the "iret" frame. */
1027 - REMOVE_PT_GPREGS_FROM_STACK 6*8
1028 + /*
1029 + * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
1030 + * at the "iret" frame.
1031 + */
1032 + addq $6*8, %rsp
1033
1034 /*
1035 * Clear "NMI executing". Set DF first so that we can easily
1036 * distinguish the remaining code between here and IRET from
1037 - * the SYSCALL entry and exit paths. On a native kernel, we
1038 - * could just inspect RIP, but, on paravirt kernels,
1039 - * INTERRUPT_RETURN can translate into a jump into a
1040 - * hypercall page.
1041 + * the SYSCALL entry and exit paths.
1042 + *
1043 + * We arguably should just inspect RIP instead, but I (Andy) wrote
1044 + * this code when I had the misapprehension that Xen PV supported
1045 + * NMIs, and Xen PV would break that approach.
1046 */
1047 std
1048 movq $0, 5*8(%rsp) /* clear "NMI executing" */
1049
1050 /*
1051 - * INTERRUPT_RETURN reads the "iret" frame and exits the NMI
1052 - * stack in a single instruction. We are returning to kernel
1053 - * mode, so this cannot result in a fault.
1054 + * iretq reads the "iret" frame and exits the NMI stack in a
1055 + * single instruction. We are returning to kernel mode, so this
1056 + * cannot result in a fault. Similarly, we don't need to worry
1057 + * about espfix64 on the way back to kernel mode.
1058 */
1059 - INTERRUPT_RETURN
1060 + iretq
1061 END(nmi)
1062
1063 ENTRY(ignore_sysret)
1064 diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
1065 index b5c7a56ed256..95ad40eb7eff 100644
1066 --- a/arch/x86/entry/entry_64_compat.S
1067 +++ b/arch/x86/entry/entry_64_compat.S
1068 @@ -48,7 +48,7 @@
1069 */
1070 ENTRY(entry_SYSENTER_compat)
1071 /* Interrupts are off on entry. */
1072 - SWAPGS_UNSAFE_STACK
1073 + SWAPGS
1074 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
1075
1076 /*
1077 @@ -306,8 +306,11 @@ ENTRY(entry_INT80_compat)
1078 */
1079 movl %eax, %eax
1080
1081 - /* Construct struct pt_regs on stack (iret frame is already on stack) */
1082 pushq %rax /* pt_regs->orig_ax */
1083 +
1084 + /* switch to thread stack expects orig_ax to be pushed */
1085 + call switch_to_thread_stack
1086 +
1087 pushq %rdi /* pt_regs->di */
1088 pushq %rsi /* pt_regs->si */
1089 pushq %rdx /* pt_regs->dx */
1090 @@ -337,8 +340,7 @@ ENTRY(entry_INT80_compat)
1091
1092 /* Go back to user mode. */
1093 TRACE_IRQS_ON
1094 - SWAPGS
1095 - jmp restore_regs_and_iret
1096 + jmp swapgs_restore_regs_and_return_to_usermode
1097 END(entry_INT80_compat)
1098
1099 ENTRY(stub32_clone)
1100 diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile
1101 index 331f1dca5085..6fb9b57ed5ba 100644
1102 --- a/arch/x86/entry/syscalls/Makefile
1103 +++ b/arch/x86/entry/syscalls/Makefile
1104 @@ -1,6 +1,6 @@
1105 # SPDX-License-Identifier: GPL-2.0
1106 -out := $(obj)/../../include/generated/asm
1107 -uapi := $(obj)/../../include/generated/uapi/asm
1108 +out := arch/$(SRCARCH)/include/generated/asm
1109 +uapi := arch/$(SRCARCH)/include/generated/uapi/asm
1110
1111 # Create output directory if not already present
1112 _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
1113 diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
1114 index 80534d3c2480..589af1eec7c1 100644
1115 --- a/arch/x86/events/core.c
1116 +++ b/arch/x86/events/core.c
1117 @@ -2371,7 +2371,7 @@ static unsigned long get_segment_base(unsigned int segment)
1118 struct ldt_struct *ldt;
1119
1120 /* IRQs are off, so this synchronizes with smp_store_release */
1121 - ldt = lockless_dereference(current->active_mm->context.ldt);
1122 + ldt = READ_ONCE(current->active_mm->context.ldt);
1123 if (!ldt || idx >= ldt->nr_entries)
1124 return 0;
1125
1126 diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
1127 index f94855000d4e..09c26a4f139c 100644
1128 --- a/arch/x86/events/intel/core.c
1129 +++ b/arch/x86/events/intel/core.c
1130 @@ -2958,6 +2958,10 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
1131
1132 if (event->attr.use_clockid)
1133 flags &= ~PERF_SAMPLE_TIME;
1134 + if (!event->attr.exclude_kernel)
1135 + flags &= ~PERF_SAMPLE_REGS_USER;
1136 + if (event->attr.sample_regs_user & ~PEBS_REGS)
1137 + flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
1138 return flags;
1139 }
1140
1141 diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
1142 index 4196f81ec0e1..f7aaadf9331f 100644
1143 --- a/arch/x86/events/perf_event.h
1144 +++ b/arch/x86/events/perf_event.h
1145 @@ -85,13 +85,15 @@ struct amd_nb {
1146 * Flags PEBS can handle without an PMI.
1147 *
1148 * TID can only be handled by flushing at context switch.
1149 + * REGS_USER can be handled for events limited to ring 3.
1150 *
1151 */
1152 #define PEBS_FREERUNNING_FLAGS \
1153 (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
1154 PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
1155 PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
1156 - PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR)
1157 + PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
1158 + PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
1159
1160 /*
1161 * A debug store configuration.
1162 @@ -110,6 +112,26 @@ struct debug_store {
1163 u64 pebs_event_reset[MAX_PEBS_EVENTS];
1164 };
1165
1166 +#define PEBS_REGS \
1167 + (PERF_REG_X86_AX | \
1168 + PERF_REG_X86_BX | \
1169 + PERF_REG_X86_CX | \
1170 + PERF_REG_X86_DX | \
1171 + PERF_REG_X86_DI | \
1172 + PERF_REG_X86_SI | \
1173 + PERF_REG_X86_SP | \
1174 + PERF_REG_X86_BP | \
1175 + PERF_REG_X86_IP | \
1176 + PERF_REG_X86_FLAGS | \
1177 + PERF_REG_X86_R8 | \
1178 + PERF_REG_X86_R9 | \
1179 + PERF_REG_X86_R10 | \
1180 + PERF_REG_X86_R11 | \
1181 + PERF_REG_X86_R12 | \
1182 + PERF_REG_X86_R13 | \
1183 + PERF_REG_X86_R14 | \
1184 + PERF_REG_X86_R15)
1185 +
1186 /*
1187 * Per register state.
1188 */
1189 diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
1190 index a5db63f728a2..a0b86cf486e0 100644
1191 --- a/arch/x86/hyperv/hv_init.c
1192 +++ b/arch/x86/hyperv/hv_init.c
1193 @@ -113,7 +113,7 @@ void hyperv_init(void)
1194 u64 guest_id;
1195 union hv_x64_msr_hypercall_contents hypercall_msr;
1196
1197 - if (x86_hyper != &x86_hyper_ms_hyperv)
1198 + if (x86_hyper_type != X86_HYPER_MS_HYPERV)
1199 return;
1200
1201 /* Allocate percpu VP index */
1202 diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
1203 index 5b0579abb398..3ac991d81e74 100644
1204 --- a/arch/x86/include/asm/archrandom.h
1205 +++ b/arch/x86/include/asm/archrandom.h
1206 @@ -45,7 +45,7 @@ static inline bool rdrand_long(unsigned long *v)
1207 bool ok;
1208 unsigned int retry = RDRAND_RETRY_LOOPS;
1209 do {
1210 - asm volatile(RDRAND_LONG "\n\t"
1211 + asm volatile(RDRAND_LONG
1212 CC_SET(c)
1213 : CC_OUT(c) (ok), "=a" (*v));
1214 if (ok)
1215 @@ -59,7 +59,7 @@ static inline bool rdrand_int(unsigned int *v)
1216 bool ok;
1217 unsigned int retry = RDRAND_RETRY_LOOPS;
1218 do {
1219 - asm volatile(RDRAND_INT "\n\t"
1220 + asm volatile(RDRAND_INT
1221 CC_SET(c)
1222 : CC_OUT(c) (ok), "=a" (*v));
1223 if (ok)
1224 @@ -71,7 +71,7 @@ static inline bool rdrand_int(unsigned int *v)
1225 static inline bool rdseed_long(unsigned long *v)
1226 {
1227 bool ok;
1228 - asm volatile(RDSEED_LONG "\n\t"
1229 + asm volatile(RDSEED_LONG
1230 CC_SET(c)
1231 : CC_OUT(c) (ok), "=a" (*v));
1232 return ok;
1233 @@ -80,7 +80,7 @@ static inline bool rdseed_long(unsigned long *v)
1234 static inline bool rdseed_int(unsigned int *v)
1235 {
1236 bool ok;
1237 - asm volatile(RDSEED_INT "\n\t"
1238 + asm volatile(RDSEED_INT
1239 CC_SET(c)
1240 : CC_OUT(c) (ok), "=a" (*v));
1241 return ok;
1242 diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
1243 index 2bcf47314959..3fa039855b8f 100644
1244 --- a/arch/x86/include/asm/bitops.h
1245 +++ b/arch/x86/include/asm/bitops.h
1246 @@ -143,7 +143,7 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
1247 static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
1248 {
1249 bool negative;
1250 - asm volatile(LOCK_PREFIX "andb %2,%1\n\t"
1251 + asm volatile(LOCK_PREFIX "andb %2,%1"
1252 CC_SET(s)
1253 : CC_OUT(s) (negative), ADDR
1254 : "ir" ((char) ~(1 << nr)) : "memory");
1255 @@ -246,7 +246,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
1256 {
1257 bool oldbit;
1258
1259 - asm("bts %2,%1\n\t"
1260 + asm("bts %2,%1"
1261 CC_SET(c)
1262 : CC_OUT(c) (oldbit), ADDR
1263 : "Ir" (nr));
1264 @@ -286,7 +286,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
1265 {
1266 bool oldbit;
1267
1268 - asm volatile("btr %2,%1\n\t"
1269 + asm volatile("btr %2,%1"
1270 CC_SET(c)
1271 : CC_OUT(c) (oldbit), ADDR
1272 : "Ir" (nr));
1273 @@ -298,7 +298,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
1274 {
1275 bool oldbit;
1276
1277 - asm volatile("btc %2,%1\n\t"
1278 + asm volatile("btc %2,%1"
1279 CC_SET(c)
1280 : CC_OUT(c) (oldbit), ADDR
1281 : "Ir" (nr) : "memory");
1282 @@ -329,7 +329,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
1283 {
1284 bool oldbit;
1285
1286 - asm volatile("bt %2,%1\n\t"
1287 + asm volatile("bt %2,%1"
1288 CC_SET(c)
1289 : CC_OUT(c) (oldbit)
1290 : "m" (*(unsigned long *)addr), "Ir" (nr));
1291 diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
1292 index 70bc1df580b2..2cbd75dd2fd3 100644
1293 --- a/arch/x86/include/asm/compat.h
1294 +++ b/arch/x86/include/asm/compat.h
1295 @@ -7,6 +7,7 @@
1296 */
1297 #include <linux/types.h>
1298 #include <linux/sched.h>
1299 +#include <linux/sched/task_stack.h>
1300 #include <asm/processor.h>
1301 #include <asm/user32.h>
1302 #include <asm/unistd.h>
1303 diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
1304 index 0dfa68438e80..ea9a7dde62e5 100644
1305 --- a/arch/x86/include/asm/cpufeature.h
1306 +++ b/arch/x86/include/asm/cpufeature.h
1307 @@ -126,16 +126,17 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
1308 #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
1309
1310 #define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
1311 -#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability))
1312 -#define setup_clear_cpu_cap(bit) do { \
1313 - clear_cpu_cap(&boot_cpu_data, bit); \
1314 - set_bit(bit, (unsigned long *)cpu_caps_cleared); \
1315 -} while (0)
1316 +
1317 +extern void setup_clear_cpu_cap(unsigned int bit);
1318 +extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
1319 +
1320 #define setup_force_cpu_cap(bit) do { \
1321 set_cpu_cap(&boot_cpu_data, bit); \
1322 set_bit(bit, (unsigned long *)cpu_caps_set); \
1323 } while (0)
1324
1325 +#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
1326 +
1327 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
1328 /*
1329 * Static testing of CPU features. Used the same as boot_cpu_has().
1330 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
1331 index 793690fbda36..800104c8a3ed 100644
1332 --- a/arch/x86/include/asm/cpufeatures.h
1333 +++ b/arch/x86/include/asm/cpufeatures.h
1334 @@ -13,173 +13,176 @@
1335 /*
1336 * Defines x86 CPU feature bits
1337 */
1338 -#define NCAPINTS 18 /* N 32-bit words worth of info */
1339 -#define NBUGINTS 1 /* N 32-bit bug flags */
1340 +#define NCAPINTS 18 /* N 32-bit words worth of info */
1341 +#define NBUGINTS 1 /* N 32-bit bug flags */
1342
1343 /*
1344 * Note: If the comment begins with a quoted string, that string is used
1345 * in /proc/cpuinfo instead of the macro name. If the string is "",
1346 * this feature bit is not displayed in /proc/cpuinfo at all.
1347 + *
1348 + * When adding new features here that depend on other features,
1349 + * please update the table in kernel/cpu/cpuid-deps.c as well.
1350 */
1351
1352 -/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
1353 -#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
1354 -#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
1355 -#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
1356 -#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
1357 -#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
1358 -#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
1359 -#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
1360 -#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
1361 -#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
1362 -#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
1363 -#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
1364 -#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
1365 -#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
1366 -#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
1367 -#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */
1368 - /* (plus FCMOVcc, FCOMI with FPU) */
1369 -#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
1370 -#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
1371 -#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
1372 -#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
1373 -#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
1374 -#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
1375 -#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
1376 -#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
1377 -#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
1378 -#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
1379 -#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
1380 -#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
1381 -#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
1382 -#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
1383 -#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
1384 +/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
1385 +#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
1386 +#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
1387 +#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
1388 +#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
1389 +#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
1390 +#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
1391 +#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
1392 +#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
1393 +#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
1394 +#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
1395 +#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
1396 +#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
1397 +#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
1398 +#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
1399 +#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
1400 +#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
1401 +#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
1402 +#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
1403 +#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
1404 +#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
1405 +#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
1406 +#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
1407 +#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
1408 +#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
1409 +#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
1410 +#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
1411 +#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
1412 +#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
1413 +#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
1414 +#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
1415
1416 /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
1417 /* Don't duplicate feature flags which are redundant with Intel! */
1418 -#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
1419 -#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
1420 -#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
1421 -#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
1422 -#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
1423 -#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
1424 -#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
1425 -#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
1426 -#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */
1427 -#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */
1428 +#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
1429 +#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */
1430 +#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
1431 +#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
1432 +#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
1433 +#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
1434 +#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
1435 +#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */
1436 +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */
1437 +#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */
1438
1439 /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
1440 -#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
1441 -#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
1442 -#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
1443 +#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
1444 +#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
1445 +#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
1446
1447 /* Other features, Linux-defined mapping, word 3 */
1448 /* This range is used for feature bits which conflict or are synthesized */
1449 -#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
1450 -#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
1451 -#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
1452 -#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
1453 -/* cpu types for specific tunings: */
1454 -#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
1455 -#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
1456 -#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
1457 -#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
1458 -#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
1459 -#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */
1460 -#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */
1461 -#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
1462 -#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
1463 -#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
1464 -#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
1465 -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
1466 -#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
1467 -#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
1468 -#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
1469 -#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
1470 -#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
1471 -#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
1472 -#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
1473 -#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
1474 -#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
1475 -#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
1476 -#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
1477 -#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
1478 -#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
1479 -#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
1480 -#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
1481 +#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
1482 +#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
1483 +#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
1484 +#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
1485 +
1486 +/* CPU types for specific tunings: */
1487 +#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
1488 +#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
1489 +#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
1490 +#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
1491 +#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
1492 +#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */
1493 +#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */
1494 +#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
1495 +#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
1496 +#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
1497 +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
1498 +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
1499 +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
1500 +#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" MFENCE synchronizes RDTSC */
1501 +#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
1502 +#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
1503 +#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
1504 +#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
1505 +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */
1506 +#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
1507 +#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
1508 +#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
1509 +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */
1510 +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */
1511 +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
1512 +#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
1513 +#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
1514
1515 -/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
1516 -#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
1517 -#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
1518 -#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
1519 -#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
1520 -#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
1521 -#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
1522 -#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */
1523 -#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
1524 -#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
1525 -#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
1526 -#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
1527 -#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
1528 -#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
1529 -#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
1530 -#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
1531 -#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */
1532 -#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
1533 -#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
1534 -#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
1535 -#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
1536 -#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */
1537 -#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
1538 -#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
1539 -#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */
1540 -#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
1541 -#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
1542 -#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */
1543 -#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
1544 -#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */
1545 -#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */
1546 -#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
1547 +/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
1548 +#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
1549 +#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
1550 +#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
1551 +#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */
1552 +#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
1553 +#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
1554 +#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */
1555 +#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
1556 +#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
1557 +#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
1558 +#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
1559 +#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
1560 +#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
1561 +#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */
1562 +#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
1563 +#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */
1564 +#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
1565 +#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
1566 +#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
1567 +#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
1568 +#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */
1569 +#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
1570 +#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
1571 +#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */
1572 +#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
1573 +#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
1574 +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */
1575 +#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
1576 +#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */
1577 +#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */
1578 +#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
1579
1580 /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
1581 -#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
1582 -#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
1583 -#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
1584 -#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
1585 -#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
1586 -#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
1587 -#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
1588 -#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
1589 -#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
1590 -#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
1591 +#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
1592 +#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
1593 +#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
1594 +#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
1595 +#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
1596 +#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
1597 +#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
1598 +#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
1599 +#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
1600 +#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
1601
1602 -/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
1603 -#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
1604 -#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
1605 -#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */
1606 -#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
1607 -#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
1608 -#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
1609 -#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
1610 -#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
1611 -#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
1612 -#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
1613 -#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
1614 -#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
1615 -#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
1616 -#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
1617 -#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
1618 -#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
1619 -#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */
1620 -#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
1621 -#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */
1622 -#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
1623 -#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
1624 -#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
1625 -#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
1626 -#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */
1627 -#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */
1628 -#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
1629 +/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
1630 +#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
1631 +#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
1632 +#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */
1633 +#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
1634 +#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
1635 +#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
1636 +#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
1637 +#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
1638 +#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
1639 +#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
1640 +#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
1641 +#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
1642 +#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
1643 +#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
1644 +#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
1645 +#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
1646 +#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */
1647 +#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
1648 +#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */
1649 +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */
1650 +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */
1651 +#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
1652 +#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */
1653 +#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */
1654 +#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */
1655 +#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
1656
1657 /*
1658 * Auxiliary flags: Linux defined - For features scattered in various
1659 @@ -187,146 +190,155 @@
1660 *
1661 * Reuse free bits when adding new feature flags!
1662 */
1663 -#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
1664 -#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
1665 -#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
1666 -#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
1667 -#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
1668 -#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
1669 -#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
1670 +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */
1671 +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
1672 +#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
1673 +#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
1674 +#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
1675 +#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
1676 +#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
1677
1678 -#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
1679 -#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
1680 -#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
1681 +#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
1682 +#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
1683 +#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
1684
1685 -#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
1686 -#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
1687 -#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
1688 -#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
1689 +#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
1690 +#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
1691 +#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
1692 +#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
1693
1694 -#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
1695 +#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
1696
1697 /* Virtualization flags: Linux defined, word 8 */
1698 -#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
1699 -#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
1700 -#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
1701 -#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
1702 -#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
1703 +#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
1704 +#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
1705 +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
1706 +#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
1707 +#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
1708
1709 -#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
1710 -#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
1711 +#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
1712 +#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
1713
1714
1715 -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
1716 -#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
1717 -#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
1718 -#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
1719 -#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
1720 -#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
1721 -#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
1722 -#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
1723 -#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
1724 -#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
1725 -#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
1726 -#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
1727 -#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
1728 -#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
1729 -#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
1730 -#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
1731 -#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
1732 -#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
1733 -#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
1734 -#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
1735 -#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
1736 -#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
1737 -#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
1738 -#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
1739 -#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
1740 -#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
1741 -#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
1742 -#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
1743 +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
1744 +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
1745 +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */
1746 +#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
1747 +#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
1748 +#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
1749 +#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
1750 +#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
1751 +#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
1752 +#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
1753 +#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
1754 +#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
1755 +#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
1756 +#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
1757 +#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
1758 +#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
1759 +#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */
1760 +#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */
1761 +#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
1762 +#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
1763 +#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
1764 +#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
1765 +#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
1766 +#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
1767 +#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
1768 +#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
1769 +#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
1770 +#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
1771
1772 -/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
1773 -#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
1774 -#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */
1775 -#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
1776 -#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
1777 +/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
1778 +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */
1779 +#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */
1780 +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
1781 +#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
1782
1783 -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
1784 -#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
1785 +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
1786 +#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
1787
1788 -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
1789 -#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
1790 -#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
1791 -#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
1792 +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
1793 +#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */
1794 +#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
1795 +#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
1796
1797 -/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
1798 -#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
1799 -#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */
1800 +/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
1801 +#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
1802 +#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
1803 +#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
1804
1805 -/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
1806 -#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
1807 -#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
1808 -#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
1809 -#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
1810 -#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
1811 -#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
1812 -#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
1813 -#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
1814 -#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
1815 -#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
1816 +/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
1817 +#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
1818 +#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
1819 +#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
1820 +#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
1821 +#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
1822 +#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
1823 +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
1824 +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
1825 +#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
1826 +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
1827
1828 -/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
1829 -#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
1830 -#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
1831 -#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
1832 -#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
1833 -#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
1834 -#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
1835 -#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
1836 -#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
1837 -#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
1838 -#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
1839 -#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
1840 -#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
1841 -#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
1842 +/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
1843 +#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
1844 +#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
1845 +#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
1846 +#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
1847 +#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
1848 +#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
1849 +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
1850 +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
1851 +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
1852 +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
1853 +#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
1854 +#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
1855 +#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
1856
1857 -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
1858 -#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
1859 -#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
1860 -#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
1861 -#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
1862 -#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
1863 -#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
1864 +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
1865 +#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
1866 +#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */
1867 +#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
1868 +#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
1869 +#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
1870 +#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
1871 +#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
1872 +#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
1873 +#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */
1874 +#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
1875 +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
1876 +#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
1877 +#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
1878
1879 -/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
1880 -#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
1881 -#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
1882 -#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */
1883 +/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
1884 +#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
1885 +#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */
1886 +#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */
1887
1888 /*
1889 * BUG word(s)
1890 */
1891 -#define X86_BUG(x) (NCAPINTS*32 + (x))
1892 +#define X86_BUG(x) (NCAPINTS*32 + (x))
1893
1894 -#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
1895 -#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
1896 -#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
1897 -#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
1898 -#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
1899 -#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
1900 -#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
1901 -#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
1902 -#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
1903 +#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
1904 +#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
1905 +#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
1906 +#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
1907 +#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
1908 +#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
1909 +#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
1910 +#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
1911 +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
1912 #ifdef CONFIG_X86_32
1913 /*
1914 * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
1915 * to avoid confusion.
1916 */
1917 -#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
1918 +#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
1919 #endif
1920 -#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
1921 -#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
1922 -#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
1923 -#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
1924 +#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
1925 +#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
1926 +#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
1927 +#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
1928 +
1929 #endif /* _ASM_X86_CPUFEATURES_H */
1930 diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
1931 index 0a3e808b9123..2ace1f90d138 100644
1932 --- a/arch/x86/include/asm/desc.h
1933 +++ b/arch/x86/include/asm/desc.h
1934 @@ -60,17 +60,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
1935 return this_cpu_ptr(&gdt_page)->gdt;
1936 }
1937
1938 -/* Get the fixmap index for a specific processor */
1939 -static inline unsigned int get_cpu_gdt_ro_index(int cpu)
1940 -{
1941 - return FIX_GDT_REMAP_BEGIN + cpu;
1942 -}
1943 -
1944 /* Provide the fixmap address of the remapped GDT */
1945 static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
1946 {
1947 - unsigned int idx = get_cpu_gdt_ro_index(cpu);
1948 - return (struct desc_struct *)__fix_to_virt(idx);
1949 + return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
1950 }
1951
1952 /* Provide the current read-only GDT */
1953 @@ -185,7 +178,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
1954 #endif
1955 }
1956
1957 -static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
1958 +static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
1959 {
1960 struct desc_struct *d = get_cpu_gdt_rw(cpu);
1961 tss_desc tss;
1962 diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
1963 index dcd9fb55e679..94fc4fa14127 100644
1964 --- a/arch/x86/include/asm/fixmap.h
1965 +++ b/arch/x86/include/asm/fixmap.h
1966 @@ -44,6 +44,45 @@ extern unsigned long __FIXADDR_TOP;
1967 PAGE_SIZE)
1968 #endif
1969
1970 +/*
1971 + * cpu_entry_area is a percpu region in the fixmap that contains things
1972 + * needed by the CPU and early entry/exit code. Real types aren't used
1973 + * for all fields here to avoid circular header dependencies.
1974 + *
1975 + * Every field is a virtual alias of some other allocated backing store.
1976 + * There is no direct allocation of a struct cpu_entry_area.
1977 + */
1978 +struct cpu_entry_area {
1979 + char gdt[PAGE_SIZE];
1980 +
1981 + /*
1982 + * The GDT is just below SYSENTER_stack and thus serves (on x86_64) as
1983 + * a a read-only guard page.
1984 + */
1985 + struct SYSENTER_stack_page SYSENTER_stack_page;
1986 +
1987 + /*
1988 + * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
1989 + * we need task switches to work, and task switches write to the TSS.
1990 + */
1991 + struct tss_struct tss;
1992 +
1993 + char entry_trampoline[PAGE_SIZE];
1994 +
1995 +#ifdef CONFIG_X86_64
1996 + /*
1997 + * Exception stacks used for IST entries.
1998 + *
1999 + * In the future, this should have a separate slot for each stack
2000 + * with guard pages between them.
2001 + */
2002 + char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
2003 +#endif
2004 +};
2005 +
2006 +#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
2007 +
2008 +extern void setup_cpu_entry_areas(void);
2009
2010 /*
2011 * Here we define all the compile-time 'special' virtual
2012 @@ -101,8 +140,14 @@ enum fixed_addresses {
2013 FIX_LNW_VRTC,
2014 #endif
2015 /* Fixmap entries to remap the GDTs, one per processor. */
2016 - FIX_GDT_REMAP_BEGIN,
2017 - FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
2018 + FIX_CPU_ENTRY_AREA_TOP,
2019 + FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,
2020 +
2021 +#ifdef CONFIG_ACPI_APEI_GHES
2022 + /* Used for GHES mapping from assorted contexts */
2023 + FIX_APEI_GHES_IRQ,
2024 + FIX_APEI_GHES_NMI,
2025 +#endif
2026
2027 __end_of_permanent_fixed_addresses,
2028
2029 @@ -185,5 +230,30 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
2030 void __early_set_fixmap(enum fixed_addresses idx,
2031 phys_addr_t phys, pgprot_t flags);
2032
2033 +static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page)
2034 +{
2035 + BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
2036 +
2037 + return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page;
2038 +}
2039 +
2040 +#define __get_cpu_entry_area_offset_index(cpu, offset) ({ \
2041 + BUILD_BUG_ON(offset % PAGE_SIZE != 0); \
2042 + __get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE); \
2043 + })
2044 +
2045 +#define get_cpu_entry_area_index(cpu, field) \
2046 + __get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field))
2047 +
2048 +static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
2049 +{
2050 + return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
2051 +}
2052 +
2053 +static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu)
2054 +{
2055 + return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack;
2056 +}
2057 +
2058 #endif /* !__ASSEMBLY__ */
2059 #endif /* _ASM_X86_FIXMAP_H */
2060 diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
2061 index 0ead9dbb9130..96aa6b9884dc 100644
2062 --- a/arch/x86/include/asm/hypervisor.h
2063 +++ b/arch/x86/include/asm/hypervisor.h
2064 @@ -20,14 +20,22 @@
2065 #ifndef _ASM_X86_HYPERVISOR_H
2066 #define _ASM_X86_HYPERVISOR_H
2067
2068 +/* x86 hypervisor types */
2069 +enum x86_hypervisor_type {
2070 + X86_HYPER_NATIVE = 0,
2071 + X86_HYPER_VMWARE,
2072 + X86_HYPER_MS_HYPERV,
2073 + X86_HYPER_XEN_PV,
2074 + X86_HYPER_XEN_HVM,
2075 + X86_HYPER_KVM,
2076 +};
2077 +
2078 #ifdef CONFIG_HYPERVISOR_GUEST
2079
2080 #include <asm/kvm_para.h>
2081 +#include <asm/x86_init.h>
2082 #include <asm/xen/hypervisor.h>
2083
2084 -/*
2085 - * x86 hypervisor information
2086 - */
2087 struct hypervisor_x86 {
2088 /* Hypervisor name */
2089 const char *name;
2090 @@ -35,40 +43,27 @@ struct hypervisor_x86 {
2091 /* Detection routine */
2092 uint32_t (*detect)(void);
2093
2094 - /* Platform setup (run once per boot) */
2095 - void (*init_platform)(void);
2096 -
2097 - /* X2APIC detection (run once per boot) */
2098 - bool (*x2apic_available)(void);
2099 + /* Hypervisor type */
2100 + enum x86_hypervisor_type type;
2101
2102 - /* pin current vcpu to specified physical cpu (run rarely) */
2103 - void (*pin_vcpu)(int);
2104 + /* init time callbacks */
2105 + struct x86_hyper_init init;
2106
2107 - /* called during init_mem_mapping() to setup early mappings. */
2108 - void (*init_mem_mapping)(void);
2109 + /* runtime callbacks */
2110 + struct x86_hyper_runtime runtime;
2111 };
2112
2113 -extern const struct hypervisor_x86 *x86_hyper;
2114 -
2115 -/* Recognized hypervisors */
2116 -extern const struct hypervisor_x86 x86_hyper_vmware;
2117 -extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
2118 -extern const struct hypervisor_x86 x86_hyper_xen_pv;
2119 -extern const struct hypervisor_x86 x86_hyper_xen_hvm;
2120 -extern const struct hypervisor_x86 x86_hyper_kvm;
2121 -
2122 +extern enum x86_hypervisor_type x86_hyper_type;
2123 extern void init_hypervisor_platform(void);
2124 -extern bool hypervisor_x2apic_available(void);
2125 -extern void hypervisor_pin_vcpu(int cpu);
2126 -
2127 -static inline void hypervisor_init_mem_mapping(void)
2128 +static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
2129 {
2130 - if (x86_hyper && x86_hyper->init_mem_mapping)
2131 - x86_hyper->init_mem_mapping();
2132 + return x86_hyper_type == type;
2133 }
2134 #else
2135 static inline void init_hypervisor_platform(void) { }
2136 -static inline bool hypervisor_x2apic_available(void) { return false; }
2137 -static inline void hypervisor_init_mem_mapping(void) { }
2138 +static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
2139 +{
2140 + return type == X86_HYPER_NATIVE;
2141 +}
2142 #endif /* CONFIG_HYPERVISOR_GUEST */
2143 #endif /* _ASM_X86_HYPERVISOR_H */
2144 diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
2145 index c8ef23f2c28f..89f08955fff7 100644
2146 --- a/arch/x86/include/asm/irqflags.h
2147 +++ b/arch/x86/include/asm/irqflags.h
2148 @@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
2149 swapgs; \
2150 sysretl
2151
2152 +#ifdef CONFIG_DEBUG_ENTRY
2153 +#define SAVE_FLAGS(x) pushfq; popq %rax
2154 +#endif
2155 #else
2156 #define INTERRUPT_RETURN iret
2157 #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
2158 diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
2159 index f86a8caa561e..395c9631e000 100644
2160 --- a/arch/x86/include/asm/kdebug.h
2161 +++ b/arch/x86/include/asm/kdebug.h
2162 @@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
2163 extern int __must_check __die(const char *, struct pt_regs *, long);
2164 extern void show_stack_regs(struct pt_regs *regs);
2165 extern void __show_regs(struct pt_regs *regs, int all);
2166 +extern void show_iret_regs(struct pt_regs *regs);
2167 extern unsigned long oops_begin(void);
2168 extern void oops_end(unsigned long, struct pt_regs *, int signr);
2169
2170 diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
2171 index 6699fc441644..6d16d15d09a0 100644
2172 --- a/arch/x86/include/asm/mmu_context.h
2173 +++ b/arch/x86/include/asm/mmu_context.h
2174 @@ -73,8 +73,8 @@ static inline void load_mm_ldt(struct mm_struct *mm)
2175 #ifdef CONFIG_MODIFY_LDT_SYSCALL
2176 struct ldt_struct *ldt;
2177
2178 - /* lockless_dereference synchronizes with smp_store_release */
2179 - ldt = lockless_dereference(mm->context.ldt);
2180 + /* READ_ONCE synchronizes with smp_store_release */
2181 + ldt = READ_ONCE(mm->context.ldt);
2182
2183 /*
2184 * Any change to mm->context.ldt is followed by an IPI to all
2185 diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
2186 index 8546fafa21a9..7948a17febb4 100644
2187 --- a/arch/x86/include/asm/module.h
2188 +++ b/arch/x86/include/asm/module.h
2189 @@ -6,7 +6,7 @@
2190 #include <asm/orc_types.h>
2191
2192 struct mod_arch_specific {
2193 -#ifdef CONFIG_ORC_UNWINDER
2194 +#ifdef CONFIG_UNWINDER_ORC
2195 unsigned int num_orcs;
2196 int *orc_unwind_ip;
2197 struct orc_entry *orc_unwind;
2198 diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
2199 index fd81228e8037..892df375b615 100644
2200 --- a/arch/x86/include/asm/paravirt.h
2201 +++ b/arch/x86/include/asm/paravirt.h
2202 @@ -16,10 +16,9 @@
2203 #include <linux/cpumask.h>
2204 #include <asm/frame.h>
2205
2206 -static inline void load_sp0(struct tss_struct *tss,
2207 - struct thread_struct *thread)
2208 +static inline void load_sp0(unsigned long sp0)
2209 {
2210 - PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
2211 + PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0);
2212 }
2213
2214 /* The paravirtualized CPUID instruction. */
2215 @@ -928,6 +927,15 @@ extern void default_banner(void);
2216 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
2217 CLBR_NONE, \
2218 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
2219 +
2220 +#ifdef CONFIG_DEBUG_ENTRY
2221 +#define SAVE_FLAGS(clobbers) \
2222 + PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
2223 + PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
2224 + call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
2225 + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
2226 +#endif
2227 +
2228 #endif /* CONFIG_X86_32 */
2229
2230 #endif /* __ASSEMBLY__ */
2231 diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
2232 index 10cc3b9709fe..6ec54d01972d 100644
2233 --- a/arch/x86/include/asm/paravirt_types.h
2234 +++ b/arch/x86/include/asm/paravirt_types.h
2235 @@ -134,7 +134,7 @@ struct pv_cpu_ops {
2236 void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
2237 void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
2238
2239 - void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
2240 + void (*load_sp0)(unsigned long sp0);
2241
2242 void (*set_iopl_mask)(unsigned mask);
2243
2244 diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
2245 index 377f1ffd18be..ba3c523aaf16 100644
2246 --- a/arch/x86/include/asm/percpu.h
2247 +++ b/arch/x86/include/asm/percpu.h
2248 @@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
2249 {
2250 bool oldbit;
2251
2252 - asm volatile("bt "__percpu_arg(2)",%1\n\t"
2253 + asm volatile("bt "__percpu_arg(2)",%1"
2254 CC_SET(c)
2255 : CC_OUT(c) (oldbit)
2256 : "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
2257 diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
2258 index 59df7b47a434..9e9b05fc4860 100644
2259 --- a/arch/x86/include/asm/pgtable_types.h
2260 +++ b/arch/x86/include/asm/pgtable_types.h
2261 @@ -200,10 +200,9 @@ enum page_cache_mode {
2262
2263 #define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
2264
2265 -#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
2266 - _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_ENC)
2267 #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
2268 _PAGE_DIRTY | _PAGE_ENC)
2269 +#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER)
2270
2271 #define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC)
2272 #define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC)
2273 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
2274 index bdac19ab2488..da943411d3d8 100644
2275 --- a/arch/x86/include/asm/processor.h
2276 +++ b/arch/x86/include/asm/processor.h
2277 @@ -162,9 +162,9 @@ enum cpuid_regs_idx {
2278 extern struct cpuinfo_x86 boot_cpu_data;
2279 extern struct cpuinfo_x86 new_cpu_data;
2280
2281 -extern struct tss_struct doublefault_tss;
2282 -extern __u32 cpu_caps_cleared[NCAPINTS];
2283 -extern __u32 cpu_caps_set[NCAPINTS];
2284 +extern struct x86_hw_tss doublefault_tss;
2285 +extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
2286 +extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
2287
2288 #ifdef CONFIG_SMP
2289 DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
2290 @@ -252,6 +252,11 @@ static inline void load_cr3(pgd_t *pgdir)
2291 write_cr3(__sme_pa(pgdir));
2292 }
2293
2294 +/*
2295 + * Note that while the legacy 'TSS' name comes from 'Task State Segment',
2296 + * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
2297 + * unrelated to the task-switch mechanism:
2298 + */
2299 #ifdef CONFIG_X86_32
2300 /* This is the TSS defined by the hardware. */
2301 struct x86_hw_tss {
2302 @@ -304,7 +309,13 @@ struct x86_hw_tss {
2303 struct x86_hw_tss {
2304 u32 reserved1;
2305 u64 sp0;
2306 +
2307 + /*
2308 + * We store cpu_current_top_of_stack in sp1 so it's always accessible.
2309 + * Linux does not use ring 1, so sp1 is not otherwise needed.
2310 + */
2311 u64 sp1;
2312 +
2313 u64 sp2;
2314 u64 reserved2;
2315 u64 ist[7];
2316 @@ -322,12 +333,22 @@ struct x86_hw_tss {
2317 #define IO_BITMAP_BITS 65536
2318 #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
2319 #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
2320 -#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap)
2321 +#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
2322 #define INVALID_IO_BITMAP_OFFSET 0x8000
2323
2324 +struct SYSENTER_stack {
2325 + unsigned long words[64];
2326 +};
2327 +
2328 +struct SYSENTER_stack_page {
2329 + struct SYSENTER_stack stack;
2330 +} __aligned(PAGE_SIZE);
2331 +
2332 struct tss_struct {
2333 /*
2334 - * The hardware state:
2335 + * The fixed hardware portion. This must not cross a page boundary
2336 + * at risk of violating the SDM's advice and potentially triggering
2337 + * errata.
2338 */
2339 struct x86_hw_tss x86_tss;
2340
2341 @@ -338,18 +359,9 @@ struct tss_struct {
2342 * be within the limit.
2343 */
2344 unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
2345 +} __aligned(PAGE_SIZE);
2346
2347 -#ifdef CONFIG_X86_32
2348 - /*
2349 - * Space for the temporary SYSENTER stack.
2350 - */
2351 - unsigned long SYSENTER_stack_canary;
2352 - unsigned long SYSENTER_stack[64];
2353 -#endif
2354 -
2355 -} ____cacheline_aligned;
2356 -
2357 -DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
2358 +DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
2359
2360 /*
2361 * sizeof(unsigned long) coming from an extra "long" at the end
2362 @@ -363,6 +375,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
2363
2364 #ifdef CONFIG_X86_32
2365 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
2366 +#else
2367 +/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
2368 +#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
2369 #endif
2370
2371 /*
2372 @@ -431,7 +446,9 @@ typedef struct {
2373 struct thread_struct {
2374 /* Cached TLS descriptors: */
2375 struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
2376 +#ifdef CONFIG_X86_32
2377 unsigned long sp0;
2378 +#endif
2379 unsigned long sp;
2380 #ifdef CONFIG_X86_32
2381 unsigned long sysenter_cs;
2382 @@ -518,16 +535,9 @@ static inline void native_set_iopl_mask(unsigned mask)
2383 }
2384
2385 static inline void
2386 -native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
2387 +native_load_sp0(unsigned long sp0)
2388 {
2389 - tss->x86_tss.sp0 = thread->sp0;
2390 -#ifdef CONFIG_X86_32
2391 - /* Only happens when SEP is enabled, no need to test "SEP"arately: */
2392 - if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
2393 - tss->x86_tss.ss1 = thread->sysenter_cs;
2394 - wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
2395 - }
2396 -#endif
2397 + this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
2398 }
2399
2400 static inline void native_swapgs(void)
2401 @@ -539,12 +549,18 @@ static inline void native_swapgs(void)
2402
2403 static inline unsigned long current_top_of_stack(void)
2404 {
2405 -#ifdef CONFIG_X86_64
2406 - return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
2407 -#else
2408 - /* sp0 on x86_32 is special in and around vm86 mode. */
2409 + /*
2410 + * We can't read directly from tss.sp0: sp0 on x86_32 is special in
2411 + * and around vm86 mode and sp0 on x86_64 is special because of the
2412 + * entry trampoline.
2413 + */
2414 return this_cpu_read_stable(cpu_current_top_of_stack);
2415 -#endif
2416 +}
2417 +
2418 +static inline bool on_thread_stack(void)
2419 +{
2420 + return (unsigned long)(current_top_of_stack() -
2421 + current_stack_pointer) < THREAD_SIZE;
2422 }
2423
2424 #ifdef CONFIG_PARAVIRT
2425 @@ -552,10 +568,9 @@ static inline unsigned long current_top_of_stack(void)
2426 #else
2427 #define __cpuid native_cpuid
2428
2429 -static inline void load_sp0(struct tss_struct *tss,
2430 - struct thread_struct *thread)
2431 +static inline void load_sp0(unsigned long sp0)
2432 {
2433 - native_load_sp0(tss, thread);
2434 + native_load_sp0(sp0);
2435 }
2436
2437 #define set_iopl_mask native_set_iopl_mask
2438 @@ -804,6 +819,15 @@ static inline void spin_lock_prefetch(const void *x)
2439 #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
2440 TOP_OF_KERNEL_STACK_PADDING)
2441
2442 +#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
2443 +
2444 +#define task_pt_regs(task) \
2445 +({ \
2446 + unsigned long __ptr = (unsigned long)task_stack_page(task); \
2447 + __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
2448 + ((struct pt_regs *)__ptr) - 1; \
2449 +})
2450 +
2451 #ifdef CONFIG_X86_32
2452 /*
2453 * User space process size: 3GB (default).
2454 @@ -823,23 +847,6 @@ static inline void spin_lock_prefetch(const void *x)
2455 .addr_limit = KERNEL_DS, \
2456 }
2457
2458 -/*
2459 - * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
2460 - * This is necessary to guarantee that the entire "struct pt_regs"
2461 - * is accessible even if the CPU haven't stored the SS/ESP registers
2462 - * on the stack (interrupt gate does not save these registers
2463 - * when switching to the same priv ring).
2464 - * Therefore beware: accessing the ss/esp fields of the
2465 - * "struct pt_regs" is possible, but they may contain the
2466 - * completely wrong values.
2467 - */
2468 -#define task_pt_regs(task) \
2469 -({ \
2470 - unsigned long __ptr = (unsigned long)task_stack_page(task); \
2471 - __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
2472 - ((struct pt_regs *)__ptr) - 1; \
2473 -})
2474 -
2475 #define KSTK_ESP(task) (task_pt_regs(task)->sp)
2476
2477 #else
2478 @@ -873,11 +880,9 @@ static inline void spin_lock_prefetch(const void *x)
2479 #define STACK_TOP_MAX TASK_SIZE_MAX
2480
2481 #define INIT_THREAD { \
2482 - .sp0 = TOP_OF_INIT_STACK, \
2483 .addr_limit = KERNEL_DS, \
2484 }
2485
2486 -#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
2487 extern unsigned long KSTK_ESP(struct task_struct *task);
2488
2489 #endif /* CONFIG_X86_64 */
2490 diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
2491 index c0e3c45cf6ab..14131dd06b29 100644
2492 --- a/arch/x86/include/asm/ptrace.h
2493 +++ b/arch/x86/include/asm/ptrace.h
2494 @@ -136,9 +136,9 @@ static inline int v8086_mode(struct pt_regs *regs)
2495 #endif
2496 }
2497
2498 -#ifdef CONFIG_X86_64
2499 static inline bool user_64bit_mode(struct pt_regs *regs)
2500 {
2501 +#ifdef CONFIG_X86_64
2502 #ifndef CONFIG_PARAVIRT
2503 /*
2504 * On non-paravirt systems, this is the only long mode CPL 3
2505 @@ -149,8 +149,12 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
2506 /* Headers are too twisted for this to go in paravirt.h. */
2507 return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
2508 #endif
2509 +#else /* !CONFIG_X86_64 */
2510 + return false;
2511 +#endif
2512 }
2513
2514 +#ifdef CONFIG_X86_64
2515 #define current_user_stack_pointer() current_pt_regs()->sp
2516 #define compat_user_stack_pointer() current_pt_regs()->sp
2517 #endif
2518 diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
2519 index d8f3a6ae9f6c..f91c365e57c3 100644
2520 --- a/arch/x86/include/asm/rmwcc.h
2521 +++ b/arch/x86/include/asm/rmwcc.h
2522 @@ -29,7 +29,7 @@ cc_label: \
2523 #define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \
2524 do { \
2525 bool c; \
2526 - asm volatile (fullop ";" CC_SET(cc) \
2527 + asm volatile (fullop CC_SET(cc) \
2528 : [counter] "+m" (var), CC_OUT(cc) (c) \
2529 : __VA_ARGS__ : clobbers); \
2530 return c; \
2531 diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
2532 index 8da111b3c342..f8062bfd43a0 100644
2533 --- a/arch/x86/include/asm/stacktrace.h
2534 +++ b/arch/x86/include/asm/stacktrace.h
2535 @@ -16,6 +16,7 @@ enum stack_type {
2536 STACK_TYPE_TASK,
2537 STACK_TYPE_IRQ,
2538 STACK_TYPE_SOFTIRQ,
2539 + STACK_TYPE_SYSENTER,
2540 STACK_TYPE_EXCEPTION,
2541 STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
2542 };
2543 @@ -28,6 +29,8 @@ struct stack_info {
2544 bool in_task_stack(unsigned long *stack, struct task_struct *task,
2545 struct stack_info *info);
2546
2547 +bool in_sysenter_stack(unsigned long *stack, struct stack_info *info);
2548 +
2549 int get_stack_info(unsigned long *stack, struct task_struct *task,
2550 struct stack_info *info, unsigned long *visit_mask);
2551
2552 diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
2553 index 899084b70412..9b6df68d8fd1 100644
2554 --- a/arch/x86/include/asm/switch_to.h
2555 +++ b/arch/x86/include/asm/switch_to.h
2556 @@ -2,6 +2,8 @@
2557 #ifndef _ASM_X86_SWITCH_TO_H
2558 #define _ASM_X86_SWITCH_TO_H
2559
2560 +#include <linux/sched/task_stack.h>
2561 +
2562 struct task_struct; /* one of the stranger aspects of C forward declarations */
2563
2564 struct task_struct *__switch_to_asm(struct task_struct *prev,
2565 @@ -73,4 +75,28 @@ do { \
2566 ((last) = __switch_to_asm((prev), (next))); \
2567 } while (0)
2568
2569 +#ifdef CONFIG_X86_32
2570 +static inline void refresh_sysenter_cs(struct thread_struct *thread)
2571 +{
2572 + /* Only happens when SEP is enabled, no need to test "SEP"arately: */
2573 + if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
2574 + return;
2575 +
2576 + this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
2577 + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
2578 +}
2579 +#endif
2580 +
2581 +/* This is used when switching tasks or entering/exiting vm86 mode. */
2582 +static inline void update_sp0(struct task_struct *task)
2583 +{
2584 + /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
2585 +#ifdef CONFIG_X86_32
2586 + load_sp0(task->thread.sp0);
2587 +#else
2588 + if (static_cpu_has(X86_FEATURE_XENPV))
2589 + load_sp0(task_top_of_stack(task));
2590 +#endif
2591 +}
2592 +
2593 #endif /* _ASM_X86_SWITCH_TO_H */
2594 diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
2595 index 70f425947dc5..00223333821a 100644
2596 --- a/arch/x86/include/asm/thread_info.h
2597 +++ b/arch/x86/include/asm/thread_info.h
2598 @@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack,
2599 #else /* !__ASSEMBLY__ */
2600
2601 #ifdef CONFIG_X86_64
2602 -# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
2603 +# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
2604 #endif
2605
2606 #endif
2607 diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
2608 index fa60398bbc3a..069c04be1507 100644
2609 --- a/arch/x86/include/asm/trace/fpu.h
2610 +++ b/arch/x86/include/asm/trace/fpu.h
2611 @@ -34,11 +34,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
2612 )
2613 );
2614
2615 -DEFINE_EVENT(x86_fpu, x86_fpu_state,
2616 - TP_PROTO(struct fpu *fpu),
2617 - TP_ARGS(fpu)
2618 -);
2619 -
2620 DEFINE_EVENT(x86_fpu, x86_fpu_before_save,
2621 TP_PROTO(struct fpu *fpu),
2622 TP_ARGS(fpu)
2623 @@ -74,11 +69,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
2624 TP_ARGS(fpu)
2625 );
2626
2627 -DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state,
2628 - TP_PROTO(struct fpu *fpu),
2629 - TP_ARGS(fpu)
2630 -);
2631 -
2632 DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
2633 TP_PROTO(struct fpu *fpu),
2634 TP_ARGS(fpu)
2635 diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
2636 index b0cced97a6ce..31051f35cbb7 100644
2637 --- a/arch/x86/include/asm/traps.h
2638 +++ b/arch/x86/include/asm/traps.h
2639 @@ -38,9 +38,9 @@ asmlinkage void simd_coprocessor_error(void);
2640
2641 #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
2642 asmlinkage void xen_divide_error(void);
2643 +asmlinkage void xen_xennmi(void);
2644 asmlinkage void xen_xendebug(void);
2645 asmlinkage void xen_xenint3(void);
2646 -asmlinkage void xen_nmi(void);
2647 asmlinkage void xen_overflow(void);
2648 asmlinkage void xen_bounds(void);
2649 asmlinkage void xen_invalid_op(void);
2650 @@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
2651 dotraplinkage void do_stack_segment(struct pt_regs *, long);
2652 #ifdef CONFIG_X86_64
2653 dotraplinkage void do_double_fault(struct pt_regs *, long);
2654 -asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
2655 #endif
2656 dotraplinkage void do_general_protection(struct pt_regs *, long);
2657 dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
2658 @@ -145,4 +144,22 @@ enum {
2659 X86_TRAP_IRET = 32, /* 32, IRET Exception */
2660 };
2661
2662 +/*
2663 + * Page fault error code bits:
2664 + *
2665 + * bit 0 == 0: no page found 1: protection fault
2666 + * bit 1 == 0: read access 1: write access
2667 + * bit 2 == 0: kernel-mode access 1: user-mode access
2668 + * bit 3 == 1: use of reserved bit detected
2669 + * bit 4 == 1: fault was an instruction fetch
2670 + * bit 5 == 1: protection keys block access
2671 + */
2672 +enum x86_pf_error_code {
2673 + X86_PF_PROT = 1 << 0,
2674 + X86_PF_WRITE = 1 << 1,
2675 + X86_PF_USER = 1 << 2,
2676 + X86_PF_RSVD = 1 << 3,
2677 + X86_PF_INSTR = 1 << 4,
2678 + X86_PF_PK = 1 << 5,
2679 +};
2680 #endif /* _ASM_X86_TRAPS_H */
2681 diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
2682 index 87adc0d38c4a..c1688c2d0a12 100644
2683 --- a/arch/x86/include/asm/unwind.h
2684 +++ b/arch/x86/include/asm/unwind.h
2685 @@ -7,17 +7,20 @@
2686 #include <asm/ptrace.h>
2687 #include <asm/stacktrace.h>
2688
2689 +#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
2690 +#define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
2691 +
2692 struct unwind_state {
2693 struct stack_info stack_info;
2694 unsigned long stack_mask;
2695 struct task_struct *task;
2696 int graph_idx;
2697 bool error;
2698 -#if defined(CONFIG_ORC_UNWINDER)
2699 +#if defined(CONFIG_UNWINDER_ORC)
2700 bool signal, full_regs;
2701 unsigned long sp, bp, ip;
2702 struct pt_regs *regs;
2703 -#elif defined(CONFIG_FRAME_POINTER_UNWINDER)
2704 +#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
2705 bool got_irq;
2706 unsigned long *bp, *orig_sp, ip;
2707 struct pt_regs *regs;
2708 @@ -51,7 +54,11 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
2709 __unwind_start(state, task, regs, first_frame);
2710 }
2711
2712 -#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER)
2713 +#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
2714 +/*
2715 + * WARNING: The entire pt_regs may not be safe to dereference. In some cases,
2716 + * only the iret frame registers are accessible. Use with caution!
2717 + */
2718 static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
2719 {
2720 if (unwind_done(state))
2721 @@ -66,7 +73,7 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
2722 }
2723 #endif
2724
2725 -#ifdef CONFIG_ORC_UNWINDER
2726 +#ifdef CONFIG_UNWINDER_ORC
2727 void unwind_init(void);
2728 void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
2729 void *orc, size_t orc_size);
2730 diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
2731 index 8a1ebf9540dd..ad15a0fda917 100644
2732 --- a/arch/x86/include/asm/x86_init.h
2733 +++ b/arch/x86/include/asm/x86_init.h
2734 @@ -114,6 +114,18 @@ struct x86_init_pci {
2735 void (*fixup_irqs)(void);
2736 };
2737
2738 +/**
2739 + * struct x86_hyper_init - x86 hypervisor init functions
2740 + * @init_platform: platform setup
2741 + * @x2apic_available: X2APIC detection
2742 + * @init_mem_mapping: setup early mappings during init_mem_mapping()
2743 + */
2744 +struct x86_hyper_init {
2745 + void (*init_platform)(void);
2746 + bool (*x2apic_available)(void);
2747 + void (*init_mem_mapping)(void);
2748 +};
2749 +
2750 /**
2751 * struct x86_init_ops - functions for platform specific setup
2752 *
2753 @@ -127,6 +139,7 @@ struct x86_init_ops {
2754 struct x86_init_timers timers;
2755 struct x86_init_iommu iommu;
2756 struct x86_init_pci pci;
2757 + struct x86_hyper_init hyper;
2758 };
2759
2760 /**
2761 @@ -199,6 +212,15 @@ struct x86_legacy_features {
2762 struct x86_legacy_devices devices;
2763 };
2764
2765 +/**
2766 + * struct x86_hyper_runtime - x86 hypervisor specific runtime callbacks
2767 + *
2768 + * @pin_vcpu: pin current vcpu to specified physical cpu (run rarely)
2769 + */
2770 +struct x86_hyper_runtime {
2771 + void (*pin_vcpu)(int cpu);
2772 +};
2773 +
2774 /**
2775 * struct x86_platform_ops - platform specific runtime functions
2776 * @calibrate_cpu: calibrate CPU
2777 @@ -218,6 +240,7 @@ struct x86_legacy_features {
2778 * possible in x86_early_init_platform_quirks() by
2779 * only using the current x86_hardware_subarch
2780 * semantics.
2781 + * @hyper: x86 hypervisor specific runtime callbacks
2782 */
2783 struct x86_platform_ops {
2784 unsigned long (*calibrate_cpu)(void);
2785 @@ -233,6 +256,7 @@ struct x86_platform_ops {
2786 void (*apic_post_init)(void);
2787 struct x86_legacy_features legacy;
2788 void (*set_legacy_features)(void);
2789 + struct x86_hyper_runtime hyper;
2790 };
2791
2792 struct pci_dev;
2793 diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
2794 index 6f3355399665..53b4ca55ebb6 100644
2795 --- a/arch/x86/include/uapi/asm/processor-flags.h
2796 +++ b/arch/x86/include/uapi/asm/processor-flags.h
2797 @@ -152,5 +152,8 @@
2798 #define CX86_ARR_BASE 0xc4
2799 #define CX86_RCR_BASE 0xdc
2800
2801 +#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
2802 + X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
2803 + X86_CR0_PG)
2804
2805 #endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */
2806 diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
2807 index 5f70044340ff..295abaa58add 100644
2808 --- a/arch/x86/kernel/Makefile
2809 +++ b/arch/x86/kernel/Makefile
2810 @@ -25,9 +25,9 @@ endif
2811 KASAN_SANITIZE_head$(BITS).o := n
2812 KASAN_SANITIZE_dumpstack.o := n
2813 KASAN_SANITIZE_dumpstack_$(BITS).o := n
2814 -KASAN_SANITIZE_stacktrace.o := n
2815 +KASAN_SANITIZE_stacktrace.o := n
2816 +KASAN_SANITIZE_paravirt.o := n
2817
2818 -OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y
2819 OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
2820 OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
2821 OBJECT_FILES_NON_STANDARD_test_nx.o := y
2822 @@ -128,9 +128,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
2823 obj-$(CONFIG_TRACING) += tracepoint.o
2824 obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
2825
2826 -obj-$(CONFIG_ORC_UNWINDER) += unwind_orc.o
2827 -obj-$(CONFIG_FRAME_POINTER_UNWINDER) += unwind_frame.o
2828 -obj-$(CONFIG_GUESS_UNWINDER) += unwind_guess.o
2829 +obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o
2830 +obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
2831 +obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
2832
2833 ###
2834 # 64 bit specific files
2835 diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
2836 index ff891772c9f8..89c7c8569e5e 100644
2837 --- a/arch/x86/kernel/apic/apic.c
2838 +++ b/arch/x86/kernel/apic/apic.c
2839 @@ -1645,7 +1645,7 @@ static __init void try_to_enable_x2apic(int remap_mode)
2840 * under KVM
2841 */
2842 if (max_physical_apicid > 255 ||
2843 - !hypervisor_x2apic_available()) {
2844 + !x86_init.hyper.x2apic_available()) {
2845 pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
2846 x2apic_disable();
2847 return;
2848 diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
2849 index 0d57bb9079c9..c0b694810ff4 100644
2850 --- a/arch/x86/kernel/apic/x2apic_uv_x.c
2851 +++ b/arch/x86/kernel/apic/x2apic_uv_x.c
2852 @@ -920,9 +920,8 @@ static __init void uv_rtc_init(void)
2853 /*
2854 * percpu heartbeat timer
2855 */
2856 -static void uv_heartbeat(unsigned long ignored)
2857 +static void uv_heartbeat(struct timer_list *timer)
2858 {
2859 - struct timer_list *timer = &uv_scir_info->timer;
2860 unsigned char bits = uv_scir_info->state;
2861
2862 /* Flip heartbeat bit: */
2863 @@ -947,7 +946,7 @@ static int uv_heartbeat_enable(unsigned int cpu)
2864 struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
2865
2866 uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
2867 - setup_pinned_timer(timer, uv_heartbeat, cpu);
2868 + timer_setup(timer, uv_heartbeat, TIMER_PINNED);
2869 timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
2870 add_timer_on(timer, cpu);
2871 uv_cpu_scir_info(cpu)->enabled = 1;
2872 diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
2873 index 8ea78275480d..cd360a5e0dca 100644
2874 --- a/arch/x86/kernel/asm-offsets.c
2875 +++ b/arch/x86/kernel/asm-offsets.c
2876 @@ -93,4 +93,10 @@ void common(void) {
2877
2878 BLANK();
2879 DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
2880 +
2881 + /* Layout info for cpu_entry_area */
2882 + OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
2883 + OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
2884 + OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page);
2885 + DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
2886 }
2887 diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
2888 index dedf428b20b6..7d20d9c0b3d6 100644
2889 --- a/arch/x86/kernel/asm-offsets_32.c
2890 +++ b/arch/x86/kernel/asm-offsets_32.c
2891 @@ -47,13 +47,8 @@ void foo(void)
2892 BLANK();
2893
2894 /* Offset from the sysenter stack to tss.sp0 */
2895 - DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
2896 - offsetofend(struct tss_struct, SYSENTER_stack));
2897 -
2898 - /* Offset from cpu_tss to SYSENTER_stack */
2899 - OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
2900 - /* Size of SYSENTER_stack */
2901 - DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
2902 + DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
2903 + offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack));
2904
2905 #ifdef CONFIG_CC_STACKPROTECTOR
2906 BLANK();
2907 diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
2908 index 630212fa9b9d..bf51e51d808d 100644
2909 --- a/arch/x86/kernel/asm-offsets_64.c
2910 +++ b/arch/x86/kernel/asm-offsets_64.c
2911 @@ -23,6 +23,9 @@ int main(void)
2912 #ifdef CONFIG_PARAVIRT
2913 OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
2914 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
2915 +#ifdef CONFIG_DEBUG_ENTRY
2916 + OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
2917 +#endif
2918 BLANK();
2919 #endif
2920
2921 @@ -63,6 +66,7 @@ int main(void)
2922
2923 OFFSET(TSS_ist, tss_struct, x86_tss.ist);
2924 OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
2925 + OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
2926 BLANK();
2927
2928 #ifdef CONFIG_CC_STACKPROTECTOR
2929 diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
2930 index c60922a66385..90cb82dbba57 100644
2931 --- a/arch/x86/kernel/cpu/Makefile
2932 +++ b/arch/x86/kernel/cpu/Makefile
2933 @@ -23,6 +23,7 @@ obj-y += rdrand.o
2934 obj-y += match.o
2935 obj-y += bugs.o
2936 obj-$(CONFIG_CPU_FREQ) += aperfmperf.o
2937 +obj-y += cpuid-deps.o
2938
2939 obj-$(CONFIG_PROC_FS) += proc.o
2940 obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
2941 diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
2942 index d58184b7cd44..bcb75dc97d44 100644
2943 --- a/arch/x86/kernel/cpu/amd.c
2944 +++ b/arch/x86/kernel/cpu/amd.c
2945 @@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 *c)
2946 case 0x17: init_amd_zn(c); break;
2947 }
2948
2949 - /* Enable workaround for FXSAVE leak */
2950 - if (c->x86 >= 6)
2951 + /*
2952 + * Enable workaround for FXSAVE leak on CPUs
2953 + * without a XSaveErPtr feature
2954 + */
2955 + if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR)))
2956 set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
2957
2958 cpu_detect_cache_sizes(c);
2959 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
2960 index c9176bae7fd8..034900623adf 100644
2961 --- a/arch/x86/kernel/cpu/common.c
2962 +++ b/arch/x86/kernel/cpu/common.c
2963 @@ -452,8 +452,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
2964 return NULL; /* Not found */
2965 }
2966
2967 -__u32 cpu_caps_cleared[NCAPINTS];
2968 -__u32 cpu_caps_set[NCAPINTS];
2969 +__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
2970 +__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
2971
2972 void load_percpu_segment(int cpu)
2973 {
2974 @@ -466,27 +466,116 @@ void load_percpu_segment(int cpu)
2975 load_stack_canary_segment();
2976 }
2977
2978 -/* Setup the fixmap mapping only once per-processor */
2979 -static inline void setup_fixmap_gdt(int cpu)
2980 +#ifdef CONFIG_X86_32
2981 +/* The 32-bit entry code needs to find cpu_entry_area. */
2982 +DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
2983 +#endif
2984 +
2985 +#ifdef CONFIG_X86_64
2986 +/*
2987 + * Special IST stacks which the CPU switches to when it calls
2988 + * an IST-marked descriptor entry. Up to 7 stacks (hardware
2989 + * limit), all of them are 4K, except the debug stack which
2990 + * is 8K.
2991 + */
2992 +static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
2993 + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
2994 + [DEBUG_STACK - 1] = DEBUG_STKSZ
2995 +};
2996 +
2997 +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
2998 + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
2999 +#endif
3000 +
3001 +static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page,
3002 + SYSENTER_stack_storage);
3003 +
3004 +static void __init
3005 +set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
3006 +{
3007 + for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
3008 + __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
3009 +}
3010 +
3011 +/* Setup the fixmap mappings only once per-processor */
3012 +static void __init setup_cpu_entry_area(int cpu)
3013 {
3014 #ifdef CONFIG_X86_64
3015 - /* On 64-bit systems, we use a read-only fixmap GDT. */
3016 - pgprot_t prot = PAGE_KERNEL_RO;
3017 + extern char _entry_trampoline[];
3018 +
3019 + /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
3020 + pgprot_t gdt_prot = PAGE_KERNEL_RO;
3021 + pgprot_t tss_prot = PAGE_KERNEL_RO;
3022 #else
3023 /*
3024 * On native 32-bit systems, the GDT cannot be read-only because
3025 * our double fault handler uses a task gate, and entering through
3026 - * a task gate needs to change an available TSS to busy. If the GDT
3027 - * is read-only, that will triple fault.
3028 + * a task gate needs to change an available TSS to busy. If the
3029 + * GDT is read-only, that will triple fault. The TSS cannot be
3030 + * read-only because the CPU writes to it on task switches.
3031 *
3032 - * On Xen PV, the GDT must be read-only because the hypervisor requires
3033 - * it.
3034 + * On Xen PV, the GDT must be read-only because the hypervisor
3035 + * requires it.
3036 */
3037 - pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
3038 + pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
3039 PAGE_KERNEL_RO : PAGE_KERNEL;
3040 + pgprot_t tss_prot = PAGE_KERNEL;
3041 +#endif
3042 +
3043 + __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
3044 + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page),
3045 + per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1,
3046 + PAGE_KERNEL);
3047 +
3048 + /*
3049 + * The Intel SDM says (Volume 3, 7.2.1):
3050 + *
3051 + * Avoid placing a page boundary in the part of the TSS that the
3052 + * processor reads during a task switch (the first 104 bytes). The
3053 + * processor may not correctly perform address translations if a
3054 + * boundary occurs in this area. During a task switch, the processor
3055 + * reads and writes into the first 104 bytes of each TSS (using
3056 + * contiguous physical addresses beginning with the physical address
3057 + * of the first byte of the TSS). So, after TSS access begins, if
3058 + * part of the 104 bytes is not physically contiguous, the processor
3059 + * will access incorrect information without generating a page-fault
3060 + * exception.
3061 + *
3062 + * There are also a lot of errata involving the TSS spanning a page
3063 + * boundary. Assert that we're not doing that.
3064 + */
3065 + BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
3066 + offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
3067 + BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
3068 + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
3069 + &per_cpu(cpu_tss_rw, cpu),
3070 + sizeof(struct tss_struct) / PAGE_SIZE,
3071 + tss_prot);
3072 +
3073 +#ifdef CONFIG_X86_32
3074 + per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
3075 #endif
3076
3077 - __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
3078 +#ifdef CONFIG_X86_64
3079 + BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
3080 + BUILD_BUG_ON(sizeof(exception_stacks) !=
3081 + sizeof(((struct cpu_entry_area *)0)->exception_stacks));
3082 + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
3083 + &per_cpu(exception_stacks, cpu),
3084 + sizeof(exception_stacks) / PAGE_SIZE,
3085 + PAGE_KERNEL);
3086 +
3087 + __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
3088 + __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
3089 +#endif
3090 +}
3091 +
3092 +void __init setup_cpu_entry_areas(void)
3093 +{
3094 + unsigned int cpu;
3095 +
3096 + for_each_possible_cpu(cpu)
3097 + setup_cpu_entry_area(cpu);
3098 }
3099
3100 /* Load the original GDT from the per-cpu structure */
3101 @@ -723,7 +812,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
3102 {
3103 int i;
3104
3105 - for (i = 0; i < NCAPINTS; i++) {
3106 + for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
3107 c->x86_capability[i] &= ~cpu_caps_cleared[i];
3108 c->x86_capability[i] |= cpu_caps_set[i];
3109 }
3110 @@ -1225,7 +1314,7 @@ void enable_sep_cpu(void)
3111 return;
3112
3113 cpu = get_cpu();
3114 - tss = &per_cpu(cpu_tss, cpu);
3115 + tss = &per_cpu(cpu_tss_rw, cpu);
3116
3117 /*
3118 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
3119 @@ -1234,11 +1323,7 @@ void enable_sep_cpu(void)
3120
3121 tss->x86_tss.ss1 = __KERNEL_CS;
3122 wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
3123 -
3124 - wrmsr(MSR_IA32_SYSENTER_ESP,
3125 - (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
3126 - 0);
3127 -
3128 + wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0);
3129 wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
3130
3131 put_cpu();
3132 @@ -1301,18 +1386,16 @@ void print_cpu_info(struct cpuinfo_x86 *c)
3133 pr_cont(")\n");
3134 }
3135
3136 -static __init int setup_disablecpuid(char *arg)
3137 +/*
3138 + * clearcpuid= was already parsed in fpu__init_parse_early_param.
3139 + * But we need to keep a dummy __setup around otherwise it would
3140 + * show up as an environment variable for init.
3141 + */
3142 +static __init int setup_clearcpuid(char *arg)
3143 {
3144 - int bit;
3145 -
3146 - if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
3147 - setup_clear_cpu_cap(bit);
3148 - else
3149 - return 0;
3150 -
3151 return 1;
3152 }
3153 -__setup("clearcpuid=", setup_disablecpuid);
3154 +__setup("clearcpuid=", setup_clearcpuid);
3155
3156 #ifdef CONFIG_X86_64
3157 DEFINE_PER_CPU_FIRST(union irq_stack_union,
3158 @@ -1334,25 +1417,19 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
3159 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
3160 EXPORT_PER_CPU_SYMBOL(__preempt_count);
3161
3162 -/*
3163 - * Special IST stacks which the CPU switches to when it calls
3164 - * an IST-marked descriptor entry. Up to 7 stacks (hardware
3165 - * limit), all of them are 4K, except the debug stack which
3166 - * is 8K.
3167 - */
3168 -static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
3169 - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
3170 - [DEBUG_STACK - 1] = DEBUG_STKSZ
3171 -};
3172 -
3173 -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
3174 - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
3175 -
3176 /* May not be marked __init: used by software suspend */
3177 void syscall_init(void)
3178 {
3179 + extern char _entry_trampoline[];
3180 + extern char entry_SYSCALL_64_trampoline[];
3181 +
3182 + int cpu = smp_processor_id();
3183 + unsigned long SYSCALL64_entry_trampoline =
3184 + (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
3185 + (entry_SYSCALL_64_trampoline - _entry_trampoline);
3186 +
3187 wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
3188 - wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
3189 + wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
3190
3191 #ifdef CONFIG_IA32_EMULATION
3192 wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
3193 @@ -1363,7 +1440,7 @@ void syscall_init(void)
3194 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
3195 */
3196 wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
3197 - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
3198 + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
3199 wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
3200 #else
3201 wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
3202 @@ -1507,7 +1584,7 @@ void cpu_init(void)
3203 if (cpu)
3204 load_ucode_ap();
3205
3206 - t = &per_cpu(cpu_tss, cpu);
3207 + t = &per_cpu(cpu_tss_rw, cpu);
3208 oist = &per_cpu(orig_ist, cpu);
3209
3210 #ifdef CONFIG_NUMA
3211 @@ -1546,7 +1623,7 @@ void cpu_init(void)
3212 * set up and load the per-CPU TSS
3213 */
3214 if (!oist->ist[0]) {
3215 - char *estacks = per_cpu(exception_stacks, cpu);
3216 + char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
3217
3218 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
3219 estacks += exception_stack_sizes[v];
3220 @@ -1557,7 +1634,7 @@ void cpu_init(void)
3221 }
3222 }
3223
3224 - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
3225 + t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
3226
3227 /*
3228 * <= is required because the CPU will access up to
3229 @@ -1572,9 +1649,14 @@ void cpu_init(void)
3230 initialize_tlbstate_and_flush();
3231 enter_lazy_tlb(&init_mm, me);
3232
3233 - load_sp0(t, &current->thread);
3234 - set_tss_desc(cpu, t);
3235 + /*
3236 + * Initialize the TSS. sp0 points to the entry trampoline stack
3237 + * regardless of what task is running.
3238 + */
3239 + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
3240 load_TR_desc();
3241 + load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
3242 +
3243 load_mm_ldt(&init_mm);
3244
3245 clear_all_debug_regs();
3246 @@ -1585,7 +1667,6 @@ void cpu_init(void)
3247 if (is_uv_system())
3248 uv_cpu_init();
3249
3250 - setup_fixmap_gdt(cpu);
3251 load_fixmap_gdt(cpu);
3252 }
3253
3254 @@ -1595,8 +1676,7 @@ void cpu_init(void)
3255 {
3256 int cpu = smp_processor_id();
3257 struct task_struct *curr = current;
3258 - struct tss_struct *t = &per_cpu(cpu_tss, cpu);
3259 - struct thread_struct *thread = &curr->thread;
3260 + struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
3261
3262 wait_for_master_cpu(cpu);
3263
3264 @@ -1627,12 +1707,16 @@ void cpu_init(void)
3265 initialize_tlbstate_and_flush();
3266 enter_lazy_tlb(&init_mm, curr);
3267
3268 - load_sp0(t, thread);
3269 - set_tss_desc(cpu, t);
3270 + /*
3271 + * Initialize the TSS. Don't bother initializing sp0, as the initial
3272 + * task never enters user mode.
3273 + */
3274 + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
3275 load_TR_desc();
3276 +
3277 load_mm_ldt(&init_mm);
3278
3279 - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
3280 + t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
3281
3282 #ifdef CONFIG_DOUBLEFAULT
3283 /* Set up doublefault TSS pointer in the GDT */
3284 @@ -1644,7 +1728,6 @@ void cpu_init(void)
3285
3286 fpu__init_cpu();
3287
3288 - setup_fixmap_gdt(cpu);
3289 load_fixmap_gdt(cpu);
3290 }
3291 #endif
3292 diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
3293 new file mode 100644
3294 index 000000000000..904b0a3c4e53
3295 --- /dev/null
3296 +++ b/arch/x86/kernel/cpu/cpuid-deps.c
3297 @@ -0,0 +1,121 @@
3298 +/* Declare dependencies between CPUIDs */
3299 +#include <linux/kernel.h>
3300 +#include <linux/init.h>
3301 +#include <linux/module.h>
3302 +#include <asm/cpufeature.h>
3303 +
3304 +struct cpuid_dep {
3305 + unsigned int feature;
3306 + unsigned int depends;
3307 +};
3308 +
3309 +/*
3310 + * Table of CPUID features that depend on others.
3311 + *
3312 + * This only includes dependencies that can be usefully disabled, not
3313 + * features part of the base set (like FPU).
3314 + *
3315 + * Note this all is not __init / __initdata because it can be
3316 + * called from cpu hotplug. It shouldn't do anything in this case,
3317 + * but it's difficult to tell that to the init reference checker.
3318 + */
3319 +const static struct cpuid_dep cpuid_deps[] = {
3320 + { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
3321 + { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
3322 + { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
3323 + { X86_FEATURE_AVX, X86_FEATURE_XSAVE },
3324 + { X86_FEATURE_PKU, X86_FEATURE_XSAVE },
3325 + { X86_FEATURE_MPX, X86_FEATURE_XSAVE },
3326 + { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
3327 + { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
3328 + { X86_FEATURE_XMM, X86_FEATURE_FXSR },
3329 + { X86_FEATURE_XMM2, X86_FEATURE_XMM },
3330 + { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
3331 + { X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 },
3332 + { X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 },
3333 + { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
3334 + { X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 },
3335 + { X86_FEATURE_SSSE3, X86_FEATURE_XMM2, },
3336 + { X86_FEATURE_F16C, X86_FEATURE_XMM2, },
3337 + { X86_FEATURE_AES, X86_FEATURE_XMM2 },
3338 + { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 },
3339 + { X86_FEATURE_FMA, X86_FEATURE_AVX },
3340 + { X86_FEATURE_AVX2, X86_FEATURE_AVX, },
3341 + { X86_FEATURE_AVX512F, X86_FEATURE_AVX, },
3342 + { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F },
3343 + { X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F },
3344 + { X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F },
3345 + { X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F },
3346 + { X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F },
3347 + { X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F },
3348 + { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
3349 + { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
3350 + { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL },
3351 + { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL },
3352 + { X86_FEATURE_VAES, X86_FEATURE_AVX512VL },
3353 + { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL },
3354 + { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL },
3355 + { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL },
3356 + { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
3357 + { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
3358 + { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
3359 + {}
3360 +};
3361 +
3362 +static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
3363 +{
3364 + /*
3365 + * Note: This could use the non atomic __*_bit() variants, but the
3366 + * rest of the cpufeature code uses atomics as well, so keep it for
3367 + * consistency. Cleanup all of it separately.
3368 + */
3369 + if (!c) {
3370 + clear_cpu_cap(&boot_cpu_data, feature);
3371 + set_bit(feature, (unsigned long *)cpu_caps_cleared);
3372 + } else {
3373 + clear_bit(feature, (unsigned long *)c->x86_capability);
3374 + }
3375 +}
3376 +
3377 +/* Take the capabilities and the BUG bits into account */
3378 +#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8)
3379 +
3380 +static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
3381 +{
3382 + DECLARE_BITMAP(disable, MAX_FEATURE_BITS);
3383 + const struct cpuid_dep *d;
3384 + bool changed;
3385 +
3386 + if (WARN_ON(feature >= MAX_FEATURE_BITS))
3387 + return;
3388 +
3389 + clear_feature(c, feature);
3390 +
3391 + /* Collect all features to disable, handling dependencies */
3392 + memset(disable, 0, sizeof(disable));
3393 + __set_bit(feature, disable);
3394 +
3395 + /* Loop until we get a stable state. */
3396 + do {
3397 + changed = false;
3398 + for (d = cpuid_deps; d->feature; d++) {
3399 + if (!test_bit(d->depends, disable))
3400 + continue;
3401 + if (__test_and_set_bit(d->feature, disable))
3402 + continue;
3403 +
3404 + changed = true;
3405 + clear_feature(c, d->feature);
3406 + }
3407 + } while (changed);
3408 +}
3409 +
3410 +void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
3411 +{
3412 + do_clear_cpu_cap(c, feature);
3413 +}
3414 +
3415 +void setup_clear_cpu_cap(unsigned int feature)
3416 +{
3417 + do_clear_cpu_cap(NULL, feature);
3418 +}
3419 diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
3420 index 4fa90006ac68..bea8d3e24f50 100644
3421 --- a/arch/x86/kernel/cpu/hypervisor.c
3422 +++ b/arch/x86/kernel/cpu/hypervisor.c
3423 @@ -26,6 +26,12 @@
3424 #include <asm/processor.h>
3425 #include <asm/hypervisor.h>
3426
3427 +extern const struct hypervisor_x86 x86_hyper_vmware;
3428 +extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
3429 +extern const struct hypervisor_x86 x86_hyper_xen_pv;
3430 +extern const struct hypervisor_x86 x86_hyper_xen_hvm;
3431 +extern const struct hypervisor_x86 x86_hyper_kvm;
3432 +
3433 static const __initconst struct hypervisor_x86 * const hypervisors[] =
3434 {
3435 #ifdef CONFIG_XEN_PV
3436 @@ -41,54 +47,52 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
3437 #endif
3438 };
3439
3440 -const struct hypervisor_x86 *x86_hyper;
3441 -EXPORT_SYMBOL(x86_hyper);
3442 +enum x86_hypervisor_type x86_hyper_type;
3443 +EXPORT_SYMBOL(x86_hyper_type);
3444
3445 -static inline void __init
3446 +static inline const struct hypervisor_x86 * __init
3447 detect_hypervisor_vendor(void)
3448 {
3449 - const struct hypervisor_x86 *h, * const *p;
3450 + const struct hypervisor_x86 *h = NULL, * const *p;
3451 uint32_t pri, max_pri = 0;
3452
3453 for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) {
3454 - h = *p;
3455 - pri = h->detect();
3456 - if (pri != 0 && pri > max_pri) {
3457 + pri = (*p)->detect();
3458 + if (pri > max_pri) {
3459 max_pri = pri;
3460 - x86_hyper = h;
3461 + h = *p;
3462 }
3463 }
3464
3465 - if (max_pri)
3466 - pr_info("Hypervisor detected: %s\n", x86_hyper->name);
3467 + if (h)
3468 + pr_info("Hypervisor detected: %s\n", h->name);
3469 +
3470 + return h;
3471 }
3472
3473 -void __init init_hypervisor_platform(void)
3474 +static void __init copy_array(const void *src, void *target, unsigned int size)
3475 {
3476 + unsigned int i, n = size / sizeof(void *);
3477 + const void * const *from = (const void * const *)src;
3478 + const void **to = (const void **)target;
3479
3480 - detect_hypervisor_vendor();
3481 -
3482 - if (!x86_hyper)
3483 - return;
3484 -
3485 - if (x86_hyper->init_platform)
3486 - x86_hyper->init_platform();
3487 + for (i = 0; i < n; i++)
3488 + if (from[i])
3489 + to[i] = from[i];
3490 }
3491
3492 -bool __init hypervisor_x2apic_available(void)
3493 +void __init init_hypervisor_platform(void)
3494 {
3495 - return x86_hyper &&
3496 - x86_hyper->x2apic_available &&
3497 - x86_hyper->x2apic_available();
3498 -}
3499 + const struct hypervisor_x86 *h;
3500
3501 -void hypervisor_pin_vcpu(int cpu)
3502 -{
3503 - if (!x86_hyper)
3504 + h = detect_hypervisor_vendor();
3505 +
3506 + if (!h)
3507 return;
3508
3509 - if (x86_hyper->pin_vcpu)
3510 - x86_hyper->pin_vcpu(cpu);
3511 - else
3512 - WARN_ONCE(1, "vcpu pinning requested but not supported!\n");
3513 + copy_array(&h->init, &x86_init.hyper, sizeof(h->init));
3514 + copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime));
3515 +
3516 + x86_hyper_type = h->type;
3517 + x86_init.hyper.init_platform();
3518 }
3519 diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
3520 index 236324e83a3a..85eb5fc180c8 100644
3521 --- a/arch/x86/kernel/cpu/mshyperv.c
3522 +++ b/arch/x86/kernel/cpu/mshyperv.c
3523 @@ -254,9 +254,9 @@ static void __init ms_hyperv_init_platform(void)
3524 #endif
3525 }
3526
3527 -const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
3528 +const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
3529 .name = "Microsoft Hyper-V",
3530 .detect = ms_hyperv_platform,
3531 - .init_platform = ms_hyperv_init_platform,
3532 + .type = X86_HYPER_MS_HYPERV,
3533 + .init.init_platform = ms_hyperv_init_platform,
3534 };
3535 -EXPORT_SYMBOL(x86_hyper_ms_hyperv);
3536 diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
3537 index 40ed26852ebd..8e005329648b 100644
3538 --- a/arch/x86/kernel/cpu/vmware.c
3539 +++ b/arch/x86/kernel/cpu/vmware.c
3540 @@ -205,10 +205,10 @@ static bool __init vmware_legacy_x2apic_available(void)
3541 (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
3542 }
3543
3544 -const __refconst struct hypervisor_x86 x86_hyper_vmware = {
3545 +const __initconst struct hypervisor_x86 x86_hyper_vmware = {
3546 .name = "VMware",
3547 .detect = vmware_platform,
3548 - .init_platform = vmware_platform_setup,
3549 - .x2apic_available = vmware_legacy_x2apic_available,
3550 + .type = X86_HYPER_VMWARE,
3551 + .init.init_platform = vmware_platform_setup,
3552 + .init.x2apic_available = vmware_legacy_x2apic_available,
3553 };
3554 -EXPORT_SYMBOL(x86_hyper_vmware);
3555 diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
3556 index 0e662c55ae90..0b8cedb20d6d 100644
3557 --- a/arch/x86/kernel/doublefault.c
3558 +++ b/arch/x86/kernel/doublefault.c
3559 @@ -50,25 +50,23 @@ static void doublefault_fn(void)
3560 cpu_relax();
3561 }
3562
3563 -struct tss_struct doublefault_tss __cacheline_aligned = {
3564 - .x86_tss = {
3565 - .sp0 = STACK_START,
3566 - .ss0 = __KERNEL_DS,
3567 - .ldt = 0,
3568 - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
3569 -
3570 - .ip = (unsigned long) doublefault_fn,
3571 - /* 0x2 bit is always set */
3572 - .flags = X86_EFLAGS_SF | 0x2,
3573 - .sp = STACK_START,
3574 - .es = __USER_DS,
3575 - .cs = __KERNEL_CS,
3576 - .ss = __KERNEL_DS,
3577 - .ds = __USER_DS,
3578 - .fs = __KERNEL_PERCPU,
3579 -
3580 - .__cr3 = __pa_nodebug(swapper_pg_dir),
3581 - }
3582 +struct x86_hw_tss doublefault_tss __cacheline_aligned = {
3583 + .sp0 = STACK_START,
3584 + .ss0 = __KERNEL_DS,
3585 + .ldt = 0,
3586 + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
3587 +
3588 + .ip = (unsigned long) doublefault_fn,
3589 + /* 0x2 bit is always set */
3590 + .flags = X86_EFLAGS_SF | 0x2,
3591 + .sp = STACK_START,
3592 + .es = __USER_DS,
3593 + .cs = __KERNEL_CS,
3594 + .ss = __KERNEL_DS,
3595 + .ds = __USER_DS,
3596 + .fs = __KERNEL_PERCPU,
3597 +
3598 + .__cr3 = __pa_nodebug(swapper_pg_dir),
3599 };
3600
3601 /* dummy for do_double_fault() call */
3602 diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
3603 index f13b4c00a5de..bbd6d986e2d0 100644
3604 --- a/arch/x86/kernel/dumpstack.c
3605 +++ b/arch/x86/kernel/dumpstack.c
3606 @@ -43,6 +43,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
3607 return true;
3608 }
3609
3610 +bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
3611 +{
3612 + struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id());
3613 +
3614 + void *begin = ss;
3615 + void *end = ss + 1;
3616 +
3617 + if ((void *)stack < begin || (void *)stack >= end)
3618 + return false;
3619 +
3620 + info->type = STACK_TYPE_SYSENTER;
3621 + info->begin = begin;
3622 + info->end = end;
3623 + info->next_sp = NULL;
3624 +
3625 + return true;
3626 +}
3627 +
3628 static void printk_stack_address(unsigned long address, int reliable,
3629 char *log_lvl)
3630 {
3631 @@ -50,6 +68,28 @@ static void printk_stack_address(unsigned long address, int reliable,
3632 printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
3633 }
3634
3635 +void show_iret_regs(struct pt_regs *regs)
3636 +{
3637 + printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
3638 + printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
3639 + regs->sp, regs->flags);
3640 +}
3641 +
3642 +static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
3643 +{
3644 + if (on_stack(info, regs, sizeof(*regs)))
3645 + __show_regs(regs, 0);
3646 + else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
3647 + IRET_FRAME_SIZE)) {
3648 + /*
3649 + * When an interrupt or exception occurs in entry code, the
3650 + * full pt_regs might not have been saved yet. In that case
3651 + * just print the iret frame.
3652 + */
3653 + show_iret_regs(regs);
3654 + }
3655 +}
3656 +
3657 void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
3658 unsigned long *stack, char *log_lvl)
3659 {
3660 @@ -71,31 +111,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
3661 * - task stack
3662 * - interrupt stack
3663 * - HW exception stacks (double fault, nmi, debug, mce)
3664 + * - SYSENTER stack
3665 *
3666 - * x86-32 can have up to three stacks:
3667 + * x86-32 can have up to four stacks:
3668 * - task stack
3669 * - softirq stack
3670 * - hardirq stack
3671 + * - SYSENTER stack
3672 */
3673 for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
3674 const char *stack_name;
3675
3676 - /*
3677 - * If we overflowed the task stack into a guard page, jump back
3678 - * to the bottom of the usable stack.
3679 - */
3680 - if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
3681 - stack = task_stack_page(task);
3682 -
3683 - if (get_stack_info(stack, task, &stack_info, &visit_mask))
3684 - break;
3685 + if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
3686 + /*
3687 + * We weren't on a valid stack. It's possible that
3688 + * we overflowed a valid stack into a guard page.
3689 + * See if the next page up is valid so that we can
3690 + * generate some kind of backtrace if this happens.
3691 + */
3692 + stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
3693 + if (get_stack_info(stack, task, &stack_info, &visit_mask))
3694 + break;
3695 + }
3696
3697 stack_name = stack_type_name(stack_info.type);
3698 if (stack_name)
3699 printk("%s <%s>\n", log_lvl, stack_name);
3700
3701 - if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
3702 - __show_regs(regs, 0);
3703 + if (regs)
3704 + show_regs_safe(&stack_info, regs);
3705
3706 /*
3707 * Scan the stack, printing any text addresses we find. At the
3708 @@ -119,7 +163,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
3709
3710 /*
3711 * Don't print regs->ip again if it was already printed
3712 - * by __show_regs() below.
3713 + * by show_regs_safe() below.
3714 */
3715 if (regs && stack == &regs->ip)
3716 goto next;
3717 @@ -155,8 +199,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
3718
3719 /* if the frame has entry regs, print them */
3720 regs = unwind_get_entry_regs(&state);
3721 - if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
3722 - __show_regs(regs, 0);
3723 + if (regs)
3724 + show_regs_safe(&stack_info, regs);
3725 }
3726
3727 if (stack_name)
3728 diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
3729 index daefae83a3aa..5ff13a6b3680 100644
3730 --- a/arch/x86/kernel/dumpstack_32.c
3731 +++ b/arch/x86/kernel/dumpstack_32.c
3732 @@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type)
3733 if (type == STACK_TYPE_SOFTIRQ)
3734 return "SOFTIRQ";
3735
3736 + if (type == STACK_TYPE_SYSENTER)
3737 + return "SYSENTER";
3738 +
3739 return NULL;
3740 }
3741
3742 @@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
3743 if (task != current)
3744 goto unknown;
3745
3746 + if (in_sysenter_stack(stack, info))
3747 + goto recursion_check;
3748 +
3749 if (in_hardirq_stack(stack, info))
3750 goto recursion_check;
3751
3752 diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
3753 index 88ce2ffdb110..abc828f8c297 100644
3754 --- a/arch/x86/kernel/dumpstack_64.c
3755 +++ b/arch/x86/kernel/dumpstack_64.c
3756 @@ -37,6 +37,9 @@ const char *stack_type_name(enum stack_type type)
3757 if (type == STACK_TYPE_IRQ)
3758 return "IRQ";
3759
3760 + if (type == STACK_TYPE_SYSENTER)
3761 + return "SYSENTER";
3762 +
3763 if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
3764 return exception_stack_names[type - STACK_TYPE_EXCEPTION];
3765
3766 @@ -115,6 +118,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
3767 if (in_irq_stack(stack, info))
3768 goto recursion_check;
3769
3770 + if (in_sysenter_stack(stack, info))
3771 + goto recursion_check;
3772 +
3773 goto unknown;
3774
3775 recursion_check:
3776 diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
3777 index 7affb7e3d9a5..6abd83572b01 100644
3778 --- a/arch/x86/kernel/fpu/init.c
3779 +++ b/arch/x86/kernel/fpu/init.c
3780 @@ -249,6 +249,10 @@ static void __init fpu__init_system_ctx_switch(void)
3781 */
3782 static void __init fpu__init_parse_early_param(void)
3783 {
3784 + char arg[32];
3785 + char *argptr = arg;
3786 + int bit;
3787 +
3788 if (cmdline_find_option_bool(boot_command_line, "no387"))
3789 setup_clear_cpu_cap(X86_FEATURE_FPU);
3790
3791 @@ -266,6 +270,13 @@ static void __init fpu__init_parse_early_param(void)
3792
3793 if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
3794 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
3795 +
3796 + if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
3797 + sizeof(arg)) &&
3798 + get_option(&argptr, &bit) &&
3799 + bit >= 0 &&
3800 + bit < NCAPINTS * 32)
3801 + setup_clear_cpu_cap(bit);
3802 }
3803
3804 /*
3805 diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
3806 index f1d5476c9022..87a57b7642d3 100644
3807 --- a/arch/x86/kernel/fpu/xstate.c
3808 +++ b/arch/x86/kernel/fpu/xstate.c
3809 @@ -15,6 +15,7 @@
3810 #include <asm/fpu/xstate.h>
3811
3812 #include <asm/tlbflush.h>
3813 +#include <asm/cpufeature.h>
3814
3815 /*
3816 * Although we spell it out in here, the Processor Trace
3817 @@ -36,6 +37,19 @@ static const char *xfeature_names[] =
3818 "unknown xstate feature" ,
3819 };
3820
3821 +static short xsave_cpuid_features[] __initdata = {
3822 + X86_FEATURE_FPU,
3823 + X86_FEATURE_XMM,
3824 + X86_FEATURE_AVX,
3825 + X86_FEATURE_MPX,
3826 + X86_FEATURE_MPX,
3827 + X86_FEATURE_AVX512F,
3828 + X86_FEATURE_AVX512F,
3829 + X86_FEATURE_AVX512F,
3830 + X86_FEATURE_INTEL_PT,
3831 + X86_FEATURE_PKU,
3832 +};
3833 +
3834 /*
3835 * Mask of xstate features supported by the CPU and the kernel:
3836 */
3837 @@ -59,26 +73,6 @@ unsigned int fpu_user_xstate_size;
3838 void fpu__xstate_clear_all_cpu_caps(void)
3839 {
3840 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
3841 - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
3842 - setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
3843 - setup_clear_cpu_cap(X86_FEATURE_XSAVES);
3844 - setup_clear_cpu_cap(X86_FEATURE_AVX);
3845 - setup_clear_cpu_cap(X86_FEATURE_AVX2);
3846 - setup_clear_cpu_cap(X86_FEATURE_AVX512F);
3847 - setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
3848 - setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
3849 - setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
3850 - setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
3851 - setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
3852 - setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
3853 - setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
3854 - setup_clear_cpu_cap(X86_FEATURE_MPX);
3855 - setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
3856 - setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
3857 - setup_clear_cpu_cap(X86_FEATURE_PKU);
3858 - setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
3859 - setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
3860 - setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ);
3861 }
3862
3863 /*
3864 @@ -726,6 +720,7 @@ void __init fpu__init_system_xstate(void)
3865 unsigned int eax, ebx, ecx, edx;
3866 static int on_boot_cpu __initdata = 1;
3867 int err;
3868 + int i;
3869
3870 WARN_ON_FPU(!on_boot_cpu);
3871 on_boot_cpu = 0;
3872 @@ -759,6 +754,14 @@ void __init fpu__init_system_xstate(void)
3873 goto out_disable;
3874 }
3875
3876 + /*
3877 + * Clear XSAVE features that are disabled in the normal CPUID.
3878 + */
3879 + for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
3880 + if (!boot_cpu_has(xsave_cpuid_features[i]))
3881 + xfeatures_mask &= ~BIT(i);
3882 + }
3883 +
3884 xfeatures_mask &= fpu__get_supported_xfeatures_mask();
3885
3886 /* Enable xstate instructions to be able to continue with initialization: */
3887 diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
3888 index f1d528bb66a6..c29020907886 100644
3889 --- a/arch/x86/kernel/head_32.S
3890 +++ b/arch/x86/kernel/head_32.S
3891 @@ -212,9 +212,6 @@ ENTRY(startup_32_smp)
3892 #endif
3893
3894 .Ldefault_entry:
3895 -#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
3896 - X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
3897 - X86_CR0_PG)
3898 movl $(CR0_STATE & ~X86_CR0_PG),%eax
3899 movl %eax,%cr0
3900
3901 @@ -402,7 +399,7 @@ ENTRY(early_idt_handler_array)
3902 # 24(%rsp) error code
3903 i = 0
3904 .rept NUM_EXCEPTION_VECTORS
3905 - .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
3906 + .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
3907 pushl $0 # Dummy error code, to make stack frame uniform
3908 .endif
3909 pushl $i # 20(%esp) Vector number
3910 diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
3911 index 6dde3f3fc1f8..7dca675fe78d 100644
3912 --- a/arch/x86/kernel/head_64.S
3913 +++ b/arch/x86/kernel/head_64.S
3914 @@ -38,11 +38,12 @@
3915 *
3916 */
3917
3918 -#define p4d_index(x) (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
3919 #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
3920
3921 +#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
3922 PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
3923 PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
3924 +#endif
3925 L3_START_KERNEL = pud_index(__START_KERNEL_map)
3926
3927 .text
3928 @@ -50,6 +51,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
3929 .code64
3930 .globl startup_64
3931 startup_64:
3932 + UNWIND_HINT_EMPTY
3933 /*
3934 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
3935 * and someone has loaded an identity mapped page table
3936 @@ -89,6 +91,7 @@ startup_64:
3937 addq $(early_top_pgt - __START_KERNEL_map), %rax
3938 jmp 1f
3939 ENTRY(secondary_startup_64)
3940 + UNWIND_HINT_EMPTY
3941 /*
3942 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
3943 * and someone has loaded a mapped page table.
3944 @@ -133,6 +136,7 @@ ENTRY(secondary_startup_64)
3945 movq $1f, %rax
3946 jmp *%rax
3947 1:
3948 + UNWIND_HINT_EMPTY
3949
3950 /* Check if nx is implemented */
3951 movl $0x80000001, %eax
3952 @@ -150,9 +154,6 @@ ENTRY(secondary_startup_64)
3953 1: wrmsr /* Make changes effective */
3954
3955 /* Setup cr0 */
3956 -#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
3957 - X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
3958 - X86_CR0_PG)
3959 movl $CR0_STATE, %eax
3960 /* Make changes effective */
3961 movq %rax, %cr0
3962 @@ -235,7 +236,7 @@ ENTRY(secondary_startup_64)
3963 pushq %rax # target address in negative space
3964 lretq
3965 .Lafter_lret:
3966 -ENDPROC(secondary_startup_64)
3967 +END(secondary_startup_64)
3968
3969 #include "verify_cpu.S"
3970
3971 @@ -247,6 +248,7 @@ ENDPROC(secondary_startup_64)
3972 */
3973 ENTRY(start_cpu0)
3974 movq initial_stack(%rip), %rsp
3975 + UNWIND_HINT_EMPTY
3976 jmp .Ljump_to_C_code
3977 ENDPROC(start_cpu0)
3978 #endif
3979 @@ -266,26 +268,24 @@ ENDPROC(start_cpu0)
3980 .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
3981 __FINITDATA
3982
3983 -bad_address:
3984 - jmp bad_address
3985 -
3986 __INIT
3987 ENTRY(early_idt_handler_array)
3988 - # 104(%rsp) %rflags
3989 - # 96(%rsp) %cs
3990 - # 88(%rsp) %rip
3991 - # 80(%rsp) error code
3992 i = 0
3993 .rept NUM_EXCEPTION_VECTORS
3994 - .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
3995 - pushq $0 # Dummy error code, to make stack frame uniform
3996 + .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
3997 + UNWIND_HINT_IRET_REGS
3998 + pushq $0 # Dummy error code, to make stack frame uniform
3999 + .else
4000 + UNWIND_HINT_IRET_REGS offset=8
4001 .endif
4002 pushq $i # 72(%rsp) Vector number
4003 jmp early_idt_handler_common
4004 + UNWIND_HINT_IRET_REGS
4005 i = i + 1
4006 .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
4007 .endr
4008 -ENDPROC(early_idt_handler_array)
4009 + UNWIND_HINT_IRET_REGS offset=16
4010 +END(early_idt_handler_array)
4011
4012 early_idt_handler_common:
4013 /*
4014 @@ -313,6 +313,7 @@ early_idt_handler_common:
4015 pushq %r13 /* pt_regs->r13 */
4016 pushq %r14 /* pt_regs->r14 */
4017 pushq %r15 /* pt_regs->r15 */
4018 + UNWIND_HINT_REGS
4019
4020 cmpq $14,%rsi /* Page fault? */
4021 jnz 10f
4022 @@ -327,8 +328,8 @@ early_idt_handler_common:
4023
4024 20:
4025 decl early_recursion_flag(%rip)
4026 - jmp restore_regs_and_iret
4027 -ENDPROC(early_idt_handler_common)
4028 + jmp restore_regs_and_return_to_kernel
4029 +END(early_idt_handler_common)
4030
4031 __INITDATA
4032
4033 @@ -362,10 +363,7 @@ NEXT_PAGE(early_dynamic_pgts)
4034
4035 .data
4036
4037 -#ifndef CONFIG_XEN
4038 -NEXT_PAGE(init_top_pgt)
4039 - .fill 512,8,0
4040 -#else
4041 +#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
4042 NEXT_PAGE(init_top_pgt)
4043 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
4044 .org init_top_pgt + PGD_PAGE_OFFSET*8, 0
4045 @@ -382,6 +380,9 @@ NEXT_PAGE(level2_ident_pgt)
4046 * Don't set NX because code runs from these pages.
4047 */
4048 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
4049 +#else
4050 +NEXT_PAGE(init_top_pgt)
4051 + .fill 512,8,0
4052 #endif
4053
4054 #ifdef CONFIG_X86_5LEVEL
4055 @@ -435,7 +436,7 @@ ENTRY(phys_base)
4056 EXPORT_SYMBOL(phys_base)
4057
4058 #include "../../x86/xen/xen-head.S"
4059 -
4060 +
4061 __PAGE_ALIGNED_BSS
4062 NEXT_PAGE(empty_zero_page)
4063 .skip PAGE_SIZE
4064 diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
4065 index 3feb648781c4..2f723301eb58 100644
4066 --- a/arch/x86/kernel/ioport.c
4067 +++ b/arch/x86/kernel/ioport.c
4068 @@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
4069 * because the ->io_bitmap_max value must match the bitmap
4070 * contents:
4071 */
4072 - tss = &per_cpu(cpu_tss, get_cpu());
4073 + tss = &per_cpu(cpu_tss_rw, get_cpu());
4074
4075 if (turn_on)
4076 bitmap_clear(t->io_bitmap_ptr, from, num);
4077 diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
4078 index 52089c043160..aa9d51eea9d0 100644
4079 --- a/arch/x86/kernel/irq.c
4080 +++ b/arch/x86/kernel/irq.c
4081 @@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
4082 /* high bit used in ret_from_ code */
4083 unsigned vector = ~regs->orig_ax;
4084
4085 - /*
4086 - * NB: Unlike exception entries, IRQ entries do not reliably
4087 - * handle context tracking in the low-level entry code. This is
4088 - * because syscall entries execute briefly with IRQs on before
4089 - * updating context tracking state, so we can take an IRQ from
4090 - * kernel mode with CONTEXT_USER. The low-level entry code only
4091 - * updates the context if we came from user mode, so we won't
4092 - * switch to CONTEXT_KERNEL. We'll fix that once the syscall
4093 - * code is cleaned up enough that we can cleanly defer enabling
4094 - * IRQs.
4095 - */
4096 -
4097 entering_irq();
4098
4099 /* entering_irq() tells RCU that we're not quiescent. Check it. */
4100 diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
4101 index 020efbf5786b..d86e344f5b3d 100644
4102 --- a/arch/x86/kernel/irq_64.c
4103 +++ b/arch/x86/kernel/irq_64.c
4104 @@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs)
4105 if (regs->sp >= estack_top && regs->sp <= estack_bottom)
4106 return;
4107
4108 - WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
4109 + WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
4110 current->comm, curbase, regs->sp,
4111 irq_stack_top, irq_stack_bottom,
4112 - estack_top, estack_bottom);
4113 + estack_top, estack_bottom, (void *)regs->ip);
4114
4115 if (sysctl_panic_on_stackoverflow)
4116 panic("low stack detected by irq handler - check messages\n");
4117 diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
4118 index 8bb9594d0761..a94de09edbed 100644
4119 --- a/arch/x86/kernel/kvm.c
4120 +++ b/arch/x86/kernel/kvm.c
4121 @@ -544,12 +544,12 @@ static uint32_t __init kvm_detect(void)
4122 return kvm_cpuid_base();
4123 }
4124
4125 -const struct hypervisor_x86 x86_hyper_kvm __refconst = {
4126 +const __initconst struct hypervisor_x86 x86_hyper_kvm = {
4127 .name = "KVM",
4128 .detect = kvm_detect,
4129 - .x2apic_available = kvm_para_available,
4130 + .type = X86_HYPER_KVM,
4131 + .init.x2apic_available = kvm_para_available,
4132 };
4133 -EXPORT_SYMBOL_GPL(x86_hyper_kvm);
4134
4135 static __init int activate_jump_labels(void)
4136 {
4137 diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
4138 index ae5615b03def..1c1eae961340 100644
4139 --- a/arch/x86/kernel/ldt.c
4140 +++ b/arch/x86/kernel/ldt.c
4141 @@ -103,7 +103,7 @@ static void finalize_ldt_struct(struct ldt_struct *ldt)
4142 static void install_ldt(struct mm_struct *current_mm,
4143 struct ldt_struct *ldt)
4144 {
4145 - /* Synchronizes with lockless_dereference in load_mm_ldt. */
4146 + /* Synchronizes with READ_ONCE in load_mm_ldt. */
4147 smp_store_release(&current_mm->context.ldt, ldt);
4148
4149 /* Activate the LDT for all CPUs using current_mm. */
4150 diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
4151 index ac0be8283325..9edadabf04f6 100644
4152 --- a/arch/x86/kernel/paravirt_patch_64.c
4153 +++ b/arch/x86/kernel/paravirt_patch_64.c
4154 @@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
4155 DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
4156 DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
4157 DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
4158 -DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
4159 DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
4160
4161 DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
4162 @@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
4163 PATCH_SITE(pv_mmu_ops, read_cr2);
4164 PATCH_SITE(pv_mmu_ops, read_cr3);
4165 PATCH_SITE(pv_mmu_ops, write_cr3);
4166 - PATCH_SITE(pv_mmu_ops, flush_tlb_single);
4167 PATCH_SITE(pv_cpu_ops, wbinvd);
4168 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
4169 case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
4170 diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
4171 index c67685337c5a..517415978409 100644
4172 --- a/arch/x86/kernel/process.c
4173 +++ b/arch/x86/kernel/process.c
4174 @@ -47,9 +47,25 @@
4175 * section. Since TSS's are completely CPU-local, we want them
4176 * on exact cacheline boundaries, to eliminate cacheline ping-pong.
4177 */
4178 -__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
4179 +__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
4180 .x86_tss = {
4181 - .sp0 = TOP_OF_INIT_STACK,
4182 + /*
4183 + * .sp0 is only used when entering ring 0 from a lower
4184 + * privilege level. Since the init task never runs anything
4185 + * but ring 0 code, there is no need for a valid value here.
4186 + * Poison it.
4187 + */
4188 + .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
4189 +
4190 +#ifdef CONFIG_X86_64
4191 + /*
4192 + * .sp1 is cpu_current_top_of_stack. The init task never
4193 + * runs user code, but cpu_current_top_of_stack should still
4194 + * be well defined before the first context switch.
4195 + */
4196 + .sp1 = TOP_OF_INIT_STACK,
4197 +#endif
4198 +
4199 #ifdef CONFIG_X86_32
4200 .ss0 = __KERNEL_DS,
4201 .ss1 = __KERNEL_CS,
4202 @@ -65,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
4203 */
4204 .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
4205 #endif
4206 -#ifdef CONFIG_X86_32
4207 - .SYSENTER_stack_canary = STACK_END_MAGIC,
4208 -#endif
4209 };
4210 -EXPORT_PER_CPU_SYMBOL(cpu_tss);
4211 +EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
4212
4213 DEFINE_PER_CPU(bool, __tss_limit_invalid);
4214 EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
4215 @@ -98,7 +111,7 @@ void exit_thread(struct task_struct *tsk)
4216 struct fpu *fpu = &t->fpu;
4217
4218 if (bp) {
4219 - struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
4220 + struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
4221
4222 t->io_bitmap_ptr = NULL;
4223 clear_thread_flag(TIF_IO_BITMAP);
4224 diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
4225 index 11966251cd42..5224c6099184 100644
4226 --- a/arch/x86/kernel/process_32.c
4227 +++ b/arch/x86/kernel/process_32.c
4228 @@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
4229 struct fpu *prev_fpu = &prev->fpu;
4230 struct fpu *next_fpu = &next->fpu;
4231 int cpu = smp_processor_id();
4232 - struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
4233 + struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
4234
4235 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
4236
4237 @@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
4238
4239 /*
4240 * Reload esp0 and cpu_current_top_of_stack. This changes
4241 - * current_thread_info().
4242 + * current_thread_info(). Refresh the SYSENTER configuration in
4243 + * case prev or next is vm86.
4244 */
4245 - load_sp0(tss, next);
4246 + update_sp0(next_p);
4247 + refresh_sysenter_cs(next);
4248 this_cpu_write(cpu_current_top_of_stack,
4249 (unsigned long)task_stack_page(next_p) +
4250 THREAD_SIZE);
4251 diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
4252 index 302e7b2572d1..c75466232016 100644
4253 --- a/arch/x86/kernel/process_64.c
4254 +++ b/arch/x86/kernel/process_64.c
4255 @@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
4256 unsigned int fsindex, gsindex;
4257 unsigned int ds, cs, es;
4258
4259 - printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip);
4260 - printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
4261 - regs->sp, regs->flags);
4262 + show_iret_regs(regs);
4263 +
4264 if (regs->orig_ax != -1)
4265 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
4266 else
4267 @@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
4268 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
4269 regs->r13, regs->r14, regs->r15);
4270
4271 + if (!all)
4272 + return;
4273 +
4274 asm("movl %%ds,%0" : "=r" (ds));
4275 asm("movl %%cs,%0" : "=r" (cs));
4276 asm("movl %%es,%0" : "=r" (es));
4277 @@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
4278 rdmsrl(MSR_GS_BASE, gs);
4279 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
4280
4281 - if (!all)
4282 - return;
4283 -
4284 cr0 = read_cr0();
4285 cr2 = read_cr2();
4286 cr3 = __read_cr3();
4287 @@ -274,7 +273,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
4288 struct inactive_task_frame *frame;
4289 struct task_struct *me = current;
4290
4291 - p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
4292 childregs = task_pt_regs(p);
4293 fork_frame = container_of(childregs, struct fork_frame, regs);
4294 frame = &fork_frame->frame;
4295 @@ -401,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
4296 struct fpu *prev_fpu = &prev->fpu;
4297 struct fpu *next_fpu = &next->fpu;
4298 int cpu = smp_processor_id();
4299 - struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
4300 + struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
4301
4302 WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
4303 this_cpu_read(irq_count) != -1);
4304 @@ -463,9 +461,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
4305 * Switch the PDA and FPU contexts.
4306 */
4307 this_cpu_write(current_task, next_p);
4308 + this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
4309
4310 - /* Reload esp0 and ss1. This changes current_thread_info(). */
4311 - load_sp0(tss, next);
4312 + /* Reload sp0. */
4313 + update_sp0(next_p);
4314
4315 /*
4316 * Now maybe reload the debug registers and handle I/O bitmaps
4317 diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
4318 index 5e0453f18a57..142126ab5aae 100644
4319 --- a/arch/x86/kernel/smpboot.c
4320 +++ b/arch/x86/kernel/smpboot.c
4321 @@ -962,8 +962,7 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
4322 #ifdef CONFIG_X86_32
4323 /* Stack for startup_32 can be just as for start_secondary onwards */
4324 irq_ctx_init(cpu);
4325 - per_cpu(cpu_current_top_of_stack, cpu) =
4326 - (unsigned long)task_stack_page(idle) + THREAD_SIZE;
4327 + per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
4328 #else
4329 initial_gs = per_cpu_offset(cpu);
4330 #endif
4331 diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
4332 index 5a6b8f809792..74136fd16f49 100644
4333 --- a/arch/x86/kernel/traps.c
4334 +++ b/arch/x86/kernel/traps.c
4335 @@ -141,8 +141,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
4336 * will catch asm bugs and any attempt to use ist_preempt_enable
4337 * from double_fault.
4338 */
4339 - BUG_ON((unsigned long)(current_top_of_stack() -
4340 - current_stack_pointer) >= THREAD_SIZE);
4341 + BUG_ON(!on_thread_stack());
4342
4343 preempt_enable_no_resched();
4344 }
4345 @@ -349,9 +348,15 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
4346
4347 /*
4348 * If IRET takes a non-IST fault on the espfix64 stack, then we
4349 - * end up promoting it to a doublefault. In that case, modify
4350 - * the stack to make it look like we just entered the #GP
4351 - * handler from user space, similar to bad_iret.
4352 + * end up promoting it to a doublefault. In that case, take
4353 + * advantage of the fact that we're not using the normal (TSS.sp0)
4354 + * stack right now. We can write a fake #GP(0) frame at TSS.sp0
4355 + * and then modify our own IRET frame so that, when we return,
4356 + * we land directly at the #GP(0) vector with the stack already
4357 + * set up according to its expectations.
4358 + *
4359 + * The net result is that our #GP handler will think that we
4360 + * entered from usermode with the bad user context.
4361 *
4362 * No need for ist_enter here because we don't use RCU.
4363 */
4364 @@ -359,13 +364,26 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
4365 regs->cs == __KERNEL_CS &&
4366 regs->ip == (unsigned long)native_irq_return_iret)
4367 {
4368 - struct pt_regs *normal_regs = task_pt_regs(current);
4369 + struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
4370
4371 - /* Fake a #GP(0) from userspace. */
4372 - memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
4373 - normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */
4374 + /*
4375 + * regs->sp points to the failing IRET frame on the
4376 + * ESPFIX64 stack. Copy it to the entry stack. This fills
4377 + * in gpregs->ss through gpregs->ip.
4378 + *
4379 + */
4380 + memmove(&gpregs->ip, (void *)regs->sp, 5*8);
4381 + gpregs->orig_ax = 0; /* Missing (lost) #GP error code */
4382 +
4383 + /*
4384 + * Adjust our frame so that we return straight to the #GP
4385 + * vector with the expected RSP value. This is safe because
4386 + * we won't enable interupts or schedule before we invoke
4387 + * general_protection, so nothing will clobber the stack
4388 + * frame we just set up.
4389 + */
4390 regs->ip = (unsigned long)general_protection;
4391 - regs->sp = (unsigned long)&normal_regs->orig_ax;
4392 + regs->sp = (unsigned long)&gpregs->orig_ax;
4393
4394 return;
4395 }
4396 @@ -390,7 +408,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
4397 *
4398 * Processors update CR2 whenever a page fault is detected. If a
4399 * second page fault occurs while an earlier page fault is being
4400 - * deliv- ered, the faulting linear address of the second fault will
4401 + * delivered, the faulting linear address of the second fault will
4402 * overwrite the contents of CR2 (replacing the previous
4403 * address). These updates to CR2 occur even if the page fault
4404 * results in a double fault or occurs during the delivery of a
4405 @@ -601,14 +619,15 @@ NOKPROBE_SYMBOL(do_int3);
4406
4407 #ifdef CONFIG_X86_64
4408 /*
4409 - * Help handler running on IST stack to switch off the IST stack if the
4410 - * interrupted code was in user mode. The actual stack switch is done in
4411 - * entry_64.S
4412 + * Help handler running on a per-cpu (IST or entry trampoline) stack
4413 + * to switch to the normal thread stack if the interrupted code was in
4414 + * user mode. The actual stack switch is done in entry_64.S
4415 */
4416 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
4417 {
4418 - struct pt_regs *regs = task_pt_regs(current);
4419 - *regs = *eregs;
4420 + struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
4421 + if (regs != eregs)
4422 + *regs = *eregs;
4423 return regs;
4424 }
4425 NOKPROBE_SYMBOL(sync_regs);
4426 @@ -624,13 +643,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
4427 /*
4428 * This is called from entry_64.S early in handling a fault
4429 * caused by a bad iret to user mode. To handle the fault
4430 - * correctly, we want move our stack frame to task_pt_regs
4431 - * and we want to pretend that the exception came from the
4432 - * iret target.
4433 + * correctly, we want to move our stack frame to where it would
4434 + * be had we entered directly on the entry stack (rather than
4435 + * just below the IRET frame) and we want to pretend that the
4436 + * exception came from the IRET target.
4437 */
4438 struct bad_iret_stack *new_stack =
4439 - container_of(task_pt_regs(current),
4440 - struct bad_iret_stack, regs);
4441 + (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
4442
4443 /* Copy the IRET target to the new stack. */
4444 memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
4445 @@ -795,14 +814,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
4446 debug_stack_usage_dec();
4447
4448 exit:
4449 -#if defined(CONFIG_X86_32)
4450 - /*
4451 - * This is the most likely code path that involves non-trivial use
4452 - * of the SYSENTER stack. Check that we haven't overrun it.
4453 - */
4454 - WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
4455 - "Overran or corrupted SYSENTER stack\n");
4456 -#endif
4457 ist_exit(regs);
4458 }
4459 NOKPROBE_SYMBOL(do_debug);
4460 @@ -929,6 +940,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
4461
4462 void __init trap_init(void)
4463 {
4464 + /* Init cpu_entry_area before IST entries are set up */
4465 + setup_cpu_entry_areas();
4466 +
4467 idt_setup_traps();
4468
4469 /*
4470 diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
4471 index a3f973b2c97a..be86a865087a 100644
4472 --- a/arch/x86/kernel/unwind_orc.c
4473 +++ b/arch/x86/kernel/unwind_orc.c
4474 @@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
4475 return NULL;
4476 }
4477
4478 -static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
4479 +static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
4480 size_t len)
4481 {
4482 struct stack_info *info = &state->stack_info;
4483 + void *addr = (void *)_addr;
4484
4485 - /*
4486 - * If the address isn't on the current stack, switch to the next one.
4487 - *
4488 - * We may have to traverse multiple stacks to deal with the possibility
4489 - * that info->next_sp could point to an empty stack and the address
4490 - * could be on a subsequent stack.
4491 - */
4492 - while (!on_stack(info, (void *)addr, len))
4493 - if (get_stack_info(info->next_sp, state->task, info,
4494 - &state->stack_mask))
4495 - return false;
4496 + if (!on_stack(info, addr, len) &&
4497 + (get_stack_info(addr, state->task, info, &state->stack_mask)))
4498 + return false;
4499
4500 return true;
4501 }
4502 @@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
4503 return true;
4504 }
4505
4506 -#define REGS_SIZE (sizeof(struct pt_regs))
4507 -#define SP_OFFSET (offsetof(struct pt_regs, sp))
4508 -#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
4509 -#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
4510 -
4511 static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
4512 - unsigned long *ip, unsigned long *sp, bool full)
4513 + unsigned long *ip, unsigned long *sp)
4514 {
4515 - size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
4516 - size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
4517 - struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
4518 -
4519 - if (IS_ENABLED(CONFIG_X86_64)) {
4520 - if (!stack_access_ok(state, addr, regs_size))
4521 - return false;
4522 + struct pt_regs *regs = (struct pt_regs *)addr;
4523
4524 - *ip = regs->ip;
4525 - *sp = regs->sp;
4526 + /* x86-32 support will be more complicated due to the &regs->sp hack */
4527 + BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
4528
4529 - return true;
4530 - }
4531 -
4532 - if (!stack_access_ok(state, addr, sp_offset))
4533 + if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
4534 return false;
4535
4536 *ip = regs->ip;
4537 + *sp = regs->sp;
4538 + return true;
4539 +}
4540
4541 - if (user_mode(regs)) {
4542 - if (!stack_access_ok(state, addr + sp_offset,
4543 - REGS_SIZE - SP_OFFSET))
4544 - return false;
4545 +static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
4546 + unsigned long *ip, unsigned long *sp)
4547 +{
4548 + struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
4549
4550 - *sp = regs->sp;
4551 - } else
4552 - *sp = (unsigned long)&regs->sp;
4553 + if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
4554 + return false;
4555
4556 + *ip = regs->ip;
4557 + *sp = regs->sp;
4558 return true;
4559 }
4560
4561 @@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
4562 unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
4563 enum stack_type prev_type = state->stack_info.type;
4564 struct orc_entry *orc;
4565 - struct pt_regs *ptregs;
4566 bool indirect = false;
4567
4568 if (unwind_done(state))
4569 @@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state)
4570 break;
4571
4572 case ORC_TYPE_REGS:
4573 - if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
4574 + if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
4575 orc_warn("can't dereference registers at %p for ip %pB\n",
4576 (void *)sp, (void *)orig_ip);
4577 goto done;
4578 @@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
4579 break;
4580
4581 case ORC_TYPE_REGS_IRET:
4582 - if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
4583 + if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
4584 orc_warn("can't dereference iret registers at %p for ip %pB\n",
4585 (void *)sp, (void *)orig_ip);
4586 goto done;
4587 }
4588
4589 - ptregs = container_of((void *)sp, struct pt_regs, ip);
4590 - if ((unsigned long)ptregs >= prev_sp &&
4591 - on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
4592 - state->regs = ptregs;
4593 - state->full_regs = false;
4594 - } else
4595 - state->regs = NULL;
4596 -
4597 + state->regs = (void *)sp - IRET_FRAME_OFFSET;
4598 + state->full_regs = false;
4599 state->signal = true;
4600 break;
4601
4602 @@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
4603 }
4604
4605 if (get_stack_info((unsigned long *)state->sp, state->task,
4606 - &state->stack_info, &state->stack_mask))
4607 - return;
4608 + &state->stack_info, &state->stack_mask)) {
4609 + /*
4610 + * We weren't on a valid stack. It's possible that
4611 + * we overflowed a valid stack into a guard page.
4612 + * See if the next page up is valid so that we can
4613 + * generate some kind of backtrace if this happens.
4614 + */
4615 + void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
4616 + if (get_stack_info(next_page, state->task, &state->stack_info,
4617 + &state->stack_mask))
4618 + return;
4619 + }
4620
4621 /*
4622 * The caller can provide the address of the first frame directly
4623 diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
4624 index 014ea59aa153..3d3c2f71f617 100644
4625 --- a/arch/x86/kernel/verify_cpu.S
4626 +++ b/arch/x86/kernel/verify_cpu.S
4627 @@ -33,7 +33,7 @@
4628 #include <asm/cpufeatures.h>
4629 #include <asm/msr-index.h>
4630
4631 -verify_cpu:
4632 +ENTRY(verify_cpu)
4633 pushf # Save caller passed flags
4634 push $0 # Kill any dangerous flags
4635 popf
4636 @@ -139,3 +139,4 @@ verify_cpu:
4637 popf # Restore caller passed flags
4638 xorl %eax, %eax
4639 ret
4640 +ENDPROC(verify_cpu)
4641 diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
4642 index 68244742ecb0..5edb27f1a2c4 100644
4643 --- a/arch/x86/kernel/vm86_32.c
4644 +++ b/arch/x86/kernel/vm86_32.c
4645 @@ -55,6 +55,7 @@
4646 #include <asm/irq.h>
4647 #include <asm/traps.h>
4648 #include <asm/vm86.h>
4649 +#include <asm/switch_to.h>
4650
4651 /*
4652 * Known problems:
4653 @@ -94,7 +95,6 @@
4654
4655 void save_v86_state(struct kernel_vm86_regs *regs, int retval)
4656 {
4657 - struct tss_struct *tss;
4658 struct task_struct *tsk = current;
4659 struct vm86plus_struct __user *user;
4660 struct vm86 *vm86 = current->thread.vm86;
4661 @@ -146,12 +146,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
4662 do_exit(SIGSEGV);
4663 }
4664
4665 - tss = &per_cpu(cpu_tss, get_cpu());
4666 + preempt_disable();
4667 tsk->thread.sp0 = vm86->saved_sp0;
4668 tsk->thread.sysenter_cs = __KERNEL_CS;
4669 - load_sp0(tss, &tsk->thread);
4670 + update_sp0(tsk);
4671 + refresh_sysenter_cs(&tsk->thread);
4672 vm86->saved_sp0 = 0;
4673 - put_cpu();
4674 + preempt_enable();
4675
4676 memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
4677
4678 @@ -237,7 +238,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
4679
4680 static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
4681 {
4682 - struct tss_struct *tss;
4683 struct task_struct *tsk = current;
4684 struct vm86 *vm86 = tsk->thread.vm86;
4685 struct kernel_vm86_regs vm86regs;
4686 @@ -365,15 +365,17 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
4687 vm86->saved_sp0 = tsk->thread.sp0;
4688 lazy_save_gs(vm86->regs32.gs);
4689
4690 - tss = &per_cpu(cpu_tss, get_cpu());
4691 /* make room for real-mode segments */
4692 + preempt_disable();
4693 tsk->thread.sp0 += 16;
4694
4695 - if (static_cpu_has(X86_FEATURE_SEP))
4696 + if (static_cpu_has(X86_FEATURE_SEP)) {
4697 tsk->thread.sysenter_cs = 0;
4698 + refresh_sysenter_cs(&tsk->thread);
4699 + }
4700
4701 - load_sp0(tss, &tsk->thread);
4702 - put_cpu();
4703 + update_sp0(tsk);
4704 + preempt_enable();
4705
4706 if (vm86->flags & VM86_SCREEN_BITMAP)
4707 mark_screen_rdonly(tsk->mm);
4708 diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
4709 index a4009fb9be87..d2a8b5a24a44 100644
4710 --- a/arch/x86/kernel/vmlinux.lds.S
4711 +++ b/arch/x86/kernel/vmlinux.lds.S
4712 @@ -107,6 +107,15 @@ SECTIONS
4713 SOFTIRQENTRY_TEXT
4714 *(.fixup)
4715 *(.gnu.warning)
4716 +
4717 +#ifdef CONFIG_X86_64
4718 + . = ALIGN(PAGE_SIZE);
4719 + _entry_trampoline = .;
4720 + *(.entry_trampoline)
4721 + . = ALIGN(PAGE_SIZE);
4722 + ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
4723 +#endif
4724 +
4725 /* End of text section */
4726 _etext = .;
4727 } :text = 0x9090
4728 diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
4729 index a088b2c47f73..5b2d10c1973a 100644
4730 --- a/arch/x86/kernel/x86_init.c
4731 +++ b/arch/x86/kernel/x86_init.c
4732 @@ -28,6 +28,8 @@ void x86_init_noop(void) { }
4733 void __init x86_init_uint_noop(unsigned int unused) { }
4734 int __init iommu_init_noop(void) { return 0; }
4735 void iommu_shutdown_noop(void) { }
4736 +bool __init bool_x86_init_noop(void) { return false; }
4737 +void x86_op_int_noop(int cpu) { }
4738
4739 /*
4740 * The platform setup functions are preset with the default functions
4741 @@ -81,6 +83,12 @@ struct x86_init_ops x86_init __initdata = {
4742 .init_irq = x86_default_pci_init_irq,
4743 .fixup_irqs = x86_default_pci_fixup_irqs,
4744 },
4745 +
4746 + .hyper = {
4747 + .init_platform = x86_init_noop,
4748 + .x2apic_available = bool_x86_init_noop,
4749 + .init_mem_mapping = x86_init_noop,
4750 + },
4751 };
4752
4753 struct x86_cpuinit_ops x86_cpuinit = {
4754 @@ -101,6 +109,7 @@ struct x86_platform_ops x86_platform __ro_after_init = {
4755 .get_nmi_reason = default_get_nmi_reason,
4756 .save_sched_clock_state = tsc_save_sched_clock_state,
4757 .restore_sched_clock_state = tsc_restore_sched_clock_state,
4758 + .hyper.pin_vcpu = x86_op_int_noop,
4759 };
4760
4761 EXPORT_SYMBOL_GPL(x86_platform);
4762 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
4763 index 7a69cf053711..13ebeedcec07 100644
4764 --- a/arch/x86/kvm/mmu.c
4765 +++ b/arch/x86/kvm/mmu.c
4766 @@ -5476,13 +5476,13 @@ int kvm_mmu_module_init(void)
4767
4768 pte_list_desc_cache = kmem_cache_create("pte_list_desc",
4769 sizeof(struct pte_list_desc),
4770 - 0, 0, NULL);
4771 + 0, SLAB_ACCOUNT, NULL);
4772 if (!pte_list_desc_cache)
4773 goto nomem;
4774
4775 mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
4776 sizeof(struct kvm_mmu_page),
4777 - 0, 0, NULL);
4778 + 0, SLAB_ACCOUNT, NULL);
4779 if (!mmu_page_header_cache)
4780 goto nomem;
4781
4782 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
4783 index bc5921c1e2f2..47d9432756f3 100644
4784 --- a/arch/x86/kvm/vmx.c
4785 +++ b/arch/x86/kvm/vmx.c
4786 @@ -2295,7 +2295,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
4787 * processors. See 22.2.4.
4788 */
4789 vmcs_writel(HOST_TR_BASE,
4790 - (unsigned long)this_cpu_ptr(&cpu_tss));
4791 + (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
4792 vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
4793
4794 /*
4795 diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
4796 index 553f8fd23cc4..4846eff7e4c8 100644
4797 --- a/arch/x86/lib/delay.c
4798 +++ b/arch/x86/lib/delay.c
4799 @@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops)
4800 delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
4801
4802 /*
4803 - * Use cpu_tss as a cacheline-aligned, seldomly
4804 + * Use cpu_tss_rw as a cacheline-aligned, seldomly
4805 * accessed per-cpu variable as the monitor target.
4806 */
4807 - __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
4808 + __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
4809
4810 /*
4811 * AMD, like Intel, supports the EAX hint and EAX=0xf
4812 diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
4813 index b0ff378650a9..3109ba6c6ede 100644
4814 --- a/arch/x86/mm/fault.c
4815 +++ b/arch/x86/mm/fault.c
4816 @@ -29,26 +29,6 @@
4817 #define CREATE_TRACE_POINTS
4818 #include <asm/trace/exceptions.h>
4819
4820 -/*
4821 - * Page fault error code bits:
4822 - *
4823 - * bit 0 == 0: no page found 1: protection fault
4824 - * bit 1 == 0: read access 1: write access
4825 - * bit 2 == 0: kernel-mode access 1: user-mode access
4826 - * bit 3 == 1: use of reserved bit detected
4827 - * bit 4 == 1: fault was an instruction fetch
4828 - * bit 5 == 1: protection keys block access
4829 - */
4830 -enum x86_pf_error_code {
4831 -
4832 - PF_PROT = 1 << 0,
4833 - PF_WRITE = 1 << 1,
4834 - PF_USER = 1 << 2,
4835 - PF_RSVD = 1 << 3,
4836 - PF_INSTR = 1 << 4,
4837 - PF_PK = 1 << 5,
4838 -};
4839 -
4840 /*
4841 * Returns 0 if mmiotrace is disabled, or if the fault is not
4842 * handled by mmiotrace:
4843 @@ -150,7 +130,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
4844 * If it was a exec (instruction fetch) fault on NX page, then
4845 * do not ignore the fault:
4846 */
4847 - if (error_code & PF_INSTR)
4848 + if (error_code & X86_PF_INSTR)
4849 return 0;
4850
4851 instr = (void *)convert_ip_to_linear(current, regs);
4852 @@ -180,7 +160,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
4853 * siginfo so userspace can discover which protection key was set
4854 * on the PTE.
4855 *
4856 - * If we get here, we know that the hardware signaled a PF_PK
4857 + * If we get here, we know that the hardware signaled a X86_PF_PK
4858 * fault and that there was a VMA once we got in the fault
4859 * handler. It does *not* guarantee that the VMA we find here
4860 * was the one that we faulted on.
4861 @@ -205,7 +185,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
4862 /*
4863 * force_sig_info_fault() is called from a number of
4864 * contexts, some of which have a VMA and some of which
4865 - * do not. The PF_PK handing happens after we have a
4866 + * do not. The X86_PF_PK handing happens after we have a
4867 * valid VMA, so we should never reach this without a
4868 * valid VMA.
4869 */
4870 @@ -698,7 +678,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
4871 if (!oops_may_print())
4872 return;
4873
4874 - if (error_code & PF_INSTR) {
4875 + if (error_code & X86_PF_INSTR) {
4876 unsigned int level;
4877 pgd_t *pgd;
4878 pte_t *pte;
4879 @@ -780,7 +760,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
4880 */
4881 if (current->thread.sig_on_uaccess_err && signal) {
4882 tsk->thread.trap_nr = X86_TRAP_PF;
4883 - tsk->thread.error_code = error_code | PF_USER;
4884 + tsk->thread.error_code = error_code | X86_PF_USER;
4885 tsk->thread.cr2 = address;
4886
4887 /* XXX: hwpoison faults will set the wrong code. */
4888 @@ -898,7 +878,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
4889 struct task_struct *tsk = current;
4890
4891 /* User mode accesses just cause a SIGSEGV */
4892 - if (error_code & PF_USER) {
4893 + if (error_code & X86_PF_USER) {
4894 /*
4895 * It's possible to have interrupts off here:
4896 */
4897 @@ -919,7 +899,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
4898 * Instruction fetch faults in the vsyscall page might need
4899 * emulation.
4900 */
4901 - if (unlikely((error_code & PF_INSTR) &&
4902 + if (unlikely((error_code & X86_PF_INSTR) &&
4903 ((address & ~0xfff) == VSYSCALL_ADDR))) {
4904 if (emulate_vsyscall(regs, address))
4905 return;
4906 @@ -932,7 +912,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
4907 * are always protection faults.
4908 */
4909 if (address >= TASK_SIZE_MAX)
4910 - error_code |= PF_PROT;
4911 + error_code |= X86_PF_PROT;
4912
4913 if (likely(show_unhandled_signals))
4914 show_signal_msg(regs, error_code, address, tsk);
4915 @@ -993,11 +973,11 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
4916
4917 if (!boot_cpu_has(X86_FEATURE_OSPKE))
4918 return false;
4919 - if (error_code & PF_PK)
4920 + if (error_code & X86_PF_PK)
4921 return true;
4922 /* this checks permission keys on the VMA: */
4923 - if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
4924 - (error_code & PF_INSTR), foreign))
4925 + if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
4926 + (error_code & X86_PF_INSTR), foreign))
4927 return true;
4928 return false;
4929 }
4930 @@ -1025,7 +1005,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
4931 int code = BUS_ADRERR;
4932
4933 /* Kernel mode? Handle exceptions or die: */
4934 - if (!(error_code & PF_USER)) {
4935 + if (!(error_code & X86_PF_USER)) {
4936 no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
4937 return;
4938 }
4939 @@ -1053,14 +1033,14 @@ static noinline void
4940 mm_fault_error(struct pt_regs *regs, unsigned long error_code,
4941 unsigned long address, u32 *pkey, unsigned int fault)
4942 {
4943 - if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
4944 + if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
4945 no_context(regs, error_code, address, 0, 0);
4946 return;
4947 }
4948
4949 if (fault & VM_FAULT_OOM) {
4950 /* Kernel mode? Handle exceptions or die: */
4951 - if (!(error_code & PF_USER)) {
4952 + if (!(error_code & X86_PF_USER)) {
4953 no_context(regs, error_code, address,
4954 SIGSEGV, SEGV_MAPERR);
4955 return;
4956 @@ -1085,16 +1065,16 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
4957
4958 static int spurious_fault_check(unsigned long error_code, pte_t *pte)
4959 {
4960 - if ((error_code & PF_WRITE) && !pte_write(*pte))
4961 + if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
4962 return 0;
4963
4964 - if ((error_code & PF_INSTR) && !pte_exec(*pte))
4965 + if ((error_code & X86_PF_INSTR) && !pte_exec(*pte))
4966 return 0;
4967 /*
4968 * Note: We do not do lazy flushing on protection key
4969 - * changes, so no spurious fault will ever set PF_PK.
4970 + * changes, so no spurious fault will ever set X86_PF_PK.
4971 */
4972 - if ((error_code & PF_PK))
4973 + if ((error_code & X86_PF_PK))
4974 return 1;
4975
4976 return 1;
4977 @@ -1140,8 +1120,8 @@ spurious_fault(unsigned long error_code, unsigned long address)
4978 * change, so user accesses are not expected to cause spurious
4979 * faults.
4980 */
4981 - if (error_code != (PF_WRITE | PF_PROT)
4982 - && error_code != (PF_INSTR | PF_PROT))
4983 + if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
4984 + error_code != (X86_PF_INSTR | X86_PF_PROT))
4985 return 0;
4986
4987 pgd = init_mm.pgd + pgd_index(address);
4988 @@ -1201,19 +1181,19 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
4989 * always an unconditional error and can never result in
4990 * a follow-up action to resolve the fault, like a COW.
4991 */
4992 - if (error_code & PF_PK)
4993 + if (error_code & X86_PF_PK)
4994 return 1;
4995
4996 /*
4997 * Make sure to check the VMA so that we do not perform
4998 - * faults just to hit a PF_PK as soon as we fill in a
4999 + * faults just to hit a X86_PF_PK as soon as we fill in a
5000 * page.
5001 */
5002 - if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
5003 - (error_code & PF_INSTR), foreign))
5004 + if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
5005 + (error_code & X86_PF_INSTR), foreign))
5006 return 1;
5007
5008 - if (error_code & PF_WRITE) {
5009 + if (error_code & X86_PF_WRITE) {
5010 /* write, present and write, not present: */
5011 if (unlikely(!(vma->vm_flags & VM_WRITE)))
5012 return 1;
5013 @@ -1221,7 +1201,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
5014 }
5015
5016 /* read, present: */
5017 - if (unlikely(error_code & PF_PROT))
5018 + if (unlikely(error_code & X86_PF_PROT))
5019 return 1;
5020
5021 /* read, not present: */
5022 @@ -1244,7 +1224,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
5023 if (!static_cpu_has(X86_FEATURE_SMAP))
5024 return false;
5025
5026 - if (error_code & PF_USER)
5027 + if (error_code & X86_PF_USER)
5028 return false;
5029
5030 if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
5031 @@ -1297,7 +1277,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
5032 * protection error (error_code & 9) == 0.
5033 */
5034 if (unlikely(fault_in_kernel_space(address))) {
5035 - if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
5036 + if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
5037 if (vmalloc_fault(address) >= 0)
5038 return;
5039
5040 @@ -1325,7 +1305,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
5041 if (unlikely(kprobes_fault(regs)))
5042 return;
5043
5044 - if (unlikely(error_code & PF_RSVD))
5045 + if (unlikely(error_code & X86_PF_RSVD))
5046 pgtable_bad(regs, error_code, address);
5047
5048 if (unlikely(smap_violation(error_code, regs))) {
5049 @@ -1351,7 +1331,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
5050 */
5051 if (user_mode(regs)) {
5052 local_irq_enable();
5053 - error_code |= PF_USER;
5054 + error_code |= X86_PF_USER;
5055 flags |= FAULT_FLAG_USER;
5056 } else {
5057 if (regs->flags & X86_EFLAGS_IF)
5058 @@ -1360,9 +1340,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
5059
5060 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
5061
5062 - if (error_code & PF_WRITE)
5063 + if (error_code & X86_PF_WRITE)
5064 flags |= FAULT_FLAG_WRITE;
5065 - if (error_code & PF_INSTR)
5066 + if (error_code & X86_PF_INSTR)
5067 flags |= FAULT_FLAG_INSTRUCTION;
5068
5069 /*
5070 @@ -1382,7 +1362,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
5071 * space check, thus avoiding the deadlock:
5072 */
5073 if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
5074 - if ((error_code & PF_USER) == 0 &&
5075 + if (!(error_code & X86_PF_USER) &&
5076 !search_exception_tables(regs->ip)) {
5077 bad_area_nosemaphore(regs, error_code, address, NULL);
5078 return;
5079 @@ -1409,7 +1389,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
5080 bad_area(regs, error_code, address);
5081 return;
5082 }
5083 - if (error_code & PF_USER) {
5084 + if (error_code & X86_PF_USER) {
5085 /*
5086 * Accessing the stack below %sp is always a bug.
5087 * The large cushion allows instructions like enter
5088 diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
5089 index af5c1ed21d43..a22c2b95e513 100644
5090 --- a/arch/x86/mm/init.c
5091 +++ b/arch/x86/mm/init.c
5092 @@ -671,7 +671,7 @@ void __init init_mem_mapping(void)
5093 load_cr3(swapper_pg_dir);
5094 __flush_tlb_all();
5095
5096 - hypervisor_init_mem_mapping();
5097 + x86_init.hyper.init_mem_mapping();
5098
5099 early_memtest(0, max_pfn_mapped << PAGE_SHIFT);
5100 }
5101 diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
5102 index 048fbe8fc274..adcea90a2046 100644
5103 --- a/arch/x86/mm/init_64.c
5104 +++ b/arch/x86/mm/init_64.c
5105 @@ -1426,16 +1426,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
5106
5107 #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
5108 void register_page_bootmem_memmap(unsigned long section_nr,
5109 - struct page *start_page, unsigned long size)
5110 + struct page *start_page, unsigned long nr_pages)
5111 {
5112 unsigned long addr = (unsigned long)start_page;
5113 - unsigned long end = (unsigned long)(start_page + size);
5114 + unsigned long end = (unsigned long)(start_page + nr_pages);
5115 unsigned long next;
5116 pgd_t *pgd;
5117 p4d_t *p4d;
5118 pud_t *pud;
5119 pmd_t *pmd;
5120 - unsigned int nr_pages;
5121 + unsigned int nr_pmd_pages;
5122 struct page *page;
5123
5124 for (; addr < end; addr = next) {
5125 @@ -1482,9 +1482,9 @@ void register_page_bootmem_memmap(unsigned long section_nr,
5126 if (pmd_none(*pmd))
5127 continue;
5128
5129 - nr_pages = 1 << (get_order(PMD_SIZE));
5130 + nr_pmd_pages = 1 << get_order(PMD_SIZE);
5131 page = pmd_page(*pmd);
5132 - while (nr_pages--)
5133 + while (nr_pmd_pages--)
5134 get_page_bootmem(section_nr, page++,
5135 SECTION_INFO);
5136 }
5137 diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
5138 index 8f5be3eb40dd..9ec70d780f1f 100644
5139 --- a/arch/x86/mm/kasan_init_64.c
5140 +++ b/arch/x86/mm/kasan_init_64.c
5141 @@ -4,19 +4,150 @@
5142 #include <linux/bootmem.h>
5143 #include <linux/kasan.h>
5144 #include <linux/kdebug.h>
5145 +#include <linux/memblock.h>
5146 #include <linux/mm.h>
5147 #include <linux/sched.h>
5148 #include <linux/sched/task.h>
5149 #include <linux/vmalloc.h>
5150
5151 #include <asm/e820/types.h>
5152 +#include <asm/pgalloc.h>
5153 #include <asm/tlbflush.h>
5154 #include <asm/sections.h>
5155 #include <asm/pgtable.h>
5156
5157 extern struct range pfn_mapped[E820_MAX_ENTRIES];
5158
5159 -static int __init map_range(struct range *range)
5160 +static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
5161 +
5162 +static __init void *early_alloc(size_t size, int nid)
5163 +{
5164 + return memblock_virt_alloc_try_nid_nopanic(size, size,
5165 + __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
5166 +}
5167 +
5168 +static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
5169 + unsigned long end, int nid)
5170 +{
5171 + pte_t *pte;
5172 +
5173 + if (pmd_none(*pmd)) {
5174 + void *p;
5175 +
5176 + if (boot_cpu_has(X86_FEATURE_PSE) &&
5177 + ((end - addr) == PMD_SIZE) &&
5178 + IS_ALIGNED(addr, PMD_SIZE)) {
5179 + p = early_alloc(PMD_SIZE, nid);
5180 + if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL))
5181 + return;
5182 + else if (p)
5183 + memblock_free(__pa(p), PMD_SIZE);
5184 + }
5185 +
5186 + p = early_alloc(PAGE_SIZE, nid);
5187 + pmd_populate_kernel(&init_mm, pmd, p);
5188 + }
5189 +
5190 + pte = pte_offset_kernel(pmd, addr);
5191 + do {
5192 + pte_t entry;
5193 + void *p;
5194 +
5195 + if (!pte_none(*pte))
5196 + continue;
5197 +
5198 + p = early_alloc(PAGE_SIZE, nid);
5199 + entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL);
5200 + set_pte_at(&init_mm, addr, pte, entry);
5201 + } while (pte++, addr += PAGE_SIZE, addr != end);
5202 +}
5203 +
5204 +static void __init kasan_populate_pud(pud_t *pud, unsigned long addr,
5205 + unsigned long end, int nid)
5206 +{
5207 + pmd_t *pmd;
5208 + unsigned long next;
5209 +
5210 + if (pud_none(*pud)) {
5211 + void *p;
5212 +
5213 + if (boot_cpu_has(X86_FEATURE_GBPAGES) &&
5214 + ((end - addr) == PUD_SIZE) &&
5215 + IS_ALIGNED(addr, PUD_SIZE)) {
5216 + p = early_alloc(PUD_SIZE, nid);
5217 + if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL))
5218 + return;
5219 + else if (p)
5220 + memblock_free(__pa(p), PUD_SIZE);
5221 + }
5222 +
5223 + p = early_alloc(PAGE_SIZE, nid);
5224 + pud_populate(&init_mm, pud, p);
5225 + }
5226 +
5227 + pmd = pmd_offset(pud, addr);
5228 + do {
5229 + next = pmd_addr_end(addr, end);
5230 + if (!pmd_large(*pmd))
5231 + kasan_populate_pmd(pmd, addr, next, nid);
5232 + } while (pmd++, addr = next, addr != end);
5233 +}
5234 +
5235 +static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
5236 + unsigned long end, int nid)
5237 +{
5238 + pud_t *pud;
5239 + unsigned long next;
5240 +
5241 + if (p4d_none(*p4d)) {
5242 + void *p = early_alloc(PAGE_SIZE, nid);
5243 +
5244 + p4d_populate(&init_mm, p4d, p);
5245 + }
5246 +
5247 + pud = pud_offset(p4d, addr);
5248 + do {
5249 + next = pud_addr_end(addr, end);
5250 + if (!pud_large(*pud))
5251 + kasan_populate_pud(pud, addr, next, nid);
5252 + } while (pud++, addr = next, addr != end);
5253 +}
5254 +
5255 +static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr,
5256 + unsigned long end, int nid)
5257 +{
5258 + void *p;
5259 + p4d_t *p4d;
5260 + unsigned long next;
5261 +
5262 + if (pgd_none(*pgd)) {
5263 + p = early_alloc(PAGE_SIZE, nid);
5264 + pgd_populate(&init_mm, pgd, p);
5265 + }
5266 +
5267 + p4d = p4d_offset(pgd, addr);
5268 + do {
5269 + next = p4d_addr_end(addr, end);
5270 + kasan_populate_p4d(p4d, addr, next, nid);
5271 + } while (p4d++, addr = next, addr != end);
5272 +}
5273 +
5274 +static void __init kasan_populate_shadow(unsigned long addr, unsigned long end,
5275 + int nid)
5276 +{
5277 + pgd_t *pgd;
5278 + unsigned long next;
5279 +
5280 + addr = addr & PAGE_MASK;
5281 + end = round_up(end, PAGE_SIZE);
5282 + pgd = pgd_offset_k(addr);
5283 + do {
5284 + next = pgd_addr_end(addr, end);
5285 + kasan_populate_pgd(pgd, addr, next, nid);
5286 + } while (pgd++, addr = next, addr != end);
5287 +}
5288 +
5289 +static void __init map_range(struct range *range)
5290 {
5291 unsigned long start;
5292 unsigned long end;
5293 @@ -24,15 +155,17 @@ static int __init map_range(struct range *range)
5294 start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start));
5295 end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end));
5296
5297 - return vmemmap_populate(start, end, NUMA_NO_NODE);
5298 + kasan_populate_shadow(start, end, early_pfn_to_nid(range->start));
5299 }
5300
5301 static void __init clear_pgds(unsigned long start,
5302 unsigned long end)
5303 {
5304 pgd_t *pgd;
5305 + /* See comment in kasan_init() */
5306 + unsigned long pgd_end = end & PGDIR_MASK;
5307
5308 - for (; start < end; start += PGDIR_SIZE) {
5309 + for (; start < pgd_end; start += PGDIR_SIZE) {
5310 pgd = pgd_offset_k(start);
5311 /*
5312 * With folded p4d, pgd_clear() is nop, use p4d_clear()
5313 @@ -43,29 +176,61 @@ static void __init clear_pgds(unsigned long start,
5314 else
5315 pgd_clear(pgd);
5316 }
5317 +
5318 + pgd = pgd_offset_k(start);
5319 + for (; start < end; start += P4D_SIZE)
5320 + p4d_clear(p4d_offset(pgd, start));
5321 +}
5322 +
5323 +static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
5324 +{
5325 + unsigned long p4d;
5326 +
5327 + if (!IS_ENABLED(CONFIG_X86_5LEVEL))
5328 + return (p4d_t *)pgd;
5329 +
5330 + p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
5331 + p4d += __START_KERNEL_map - phys_base;
5332 + return (p4d_t *)p4d + p4d_index(addr);
5333 +}
5334 +
5335 +static void __init kasan_early_p4d_populate(pgd_t *pgd,
5336 + unsigned long addr,
5337 + unsigned long end)
5338 +{
5339 + pgd_t pgd_entry;
5340 + p4d_t *p4d, p4d_entry;
5341 + unsigned long next;
5342 +
5343 + if (pgd_none(*pgd)) {
5344 + pgd_entry = __pgd(_KERNPG_TABLE | __pa_nodebug(kasan_zero_p4d));
5345 + set_pgd(pgd, pgd_entry);
5346 + }
5347 +
5348 + p4d = early_p4d_offset(pgd, addr);
5349 + do {
5350 + next = p4d_addr_end(addr, end);
5351 +
5352 + if (!p4d_none(*p4d))
5353 + continue;
5354 +
5355 + p4d_entry = __p4d(_KERNPG_TABLE | __pa_nodebug(kasan_zero_pud));
5356 + set_p4d(p4d, p4d_entry);
5357 + } while (p4d++, addr = next, addr != end && p4d_none(*p4d));
5358 }
5359
5360 static void __init kasan_map_early_shadow(pgd_t *pgd)
5361 {
5362 - int i;
5363 - unsigned long start = KASAN_SHADOW_START;
5364 + /* See comment in kasan_init() */
5365 + unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK;
5366 unsigned long end = KASAN_SHADOW_END;
5367 + unsigned long next;
5368
5369 - for (i = pgd_index(start); start < end; i++) {
5370 - switch (CONFIG_PGTABLE_LEVELS) {
5371 - case 4:
5372 - pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) |
5373 - _KERNPG_TABLE);
5374 - break;
5375 - case 5:
5376 - pgd[i] = __pgd(__pa_nodebug(kasan_zero_p4d) |
5377 - _KERNPG_TABLE);
5378 - break;
5379 - default:
5380 - BUILD_BUG();
5381 - }
5382 - start += PGDIR_SIZE;
5383 - }
5384 + pgd += pgd_index(addr);
5385 + do {
5386 + next = pgd_addr_end(addr, end);
5387 + kasan_early_p4d_populate(pgd, addr, next);
5388 + } while (pgd++, addr = next, addr != end);
5389 }
5390
5391 #ifdef CONFIG_KASAN_INLINE
5392 @@ -102,7 +267,7 @@ void __init kasan_early_init(void)
5393 for (i = 0; i < PTRS_PER_PUD; i++)
5394 kasan_zero_pud[i] = __pud(pud_val);
5395
5396 - for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++)
5397 + for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++)
5398 kasan_zero_p4d[i] = __p4d(p4d_val);
5399
5400 kasan_map_early_shadow(early_top_pgt);
5401 @@ -112,37 +277,76 @@ void __init kasan_early_init(void)
5402 void __init kasan_init(void)
5403 {
5404 int i;
5405 + void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
5406
5407 #ifdef CONFIG_KASAN_INLINE
5408 register_die_notifier(&kasan_die_notifier);
5409 #endif
5410
5411 memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
5412 +
5413 + /*
5414 + * We use the same shadow offset for 4- and 5-level paging to
5415 + * facilitate boot-time switching between paging modes.
5416 + * As result in 5-level paging mode KASAN_SHADOW_START and
5417 + * KASAN_SHADOW_END are not aligned to PGD boundary.
5418 + *
5419 + * KASAN_SHADOW_START doesn't share PGD with anything else.
5420 + * We claim whole PGD entry to make things easier.
5421 + *
5422 + * KASAN_SHADOW_END lands in the last PGD entry and it collides with
5423 + * bunch of things like kernel code, modules, EFI mapping, etc.
5424 + * We need to take extra steps to not overwrite them.
5425 + */
5426 + if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
5427 + void *ptr;
5428 +
5429 + ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
5430 + memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table));
5431 + set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)],
5432 + __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
5433 + }
5434 +
5435 load_cr3(early_top_pgt);
5436 __flush_tlb_all();
5437
5438 - clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
5439 + clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END);
5440
5441 - kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
5442 + kasan_populate_zero_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK),
5443 kasan_mem_to_shadow((void *)PAGE_OFFSET));
5444
5445 for (i = 0; i < E820_MAX_ENTRIES; i++) {
5446 if (pfn_mapped[i].end == 0)
5447 break;
5448
5449 - if (map_range(&pfn_mapped[i]))
5450 - panic("kasan: unable to allocate shadow!");
5451 + map_range(&pfn_mapped[i]);
5452 }
5453 +
5454 kasan_populate_zero_shadow(
5455 kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
5456 kasan_mem_to_shadow((void *)__START_KERNEL_map));
5457
5458 - vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext),
5459 - (unsigned long)kasan_mem_to_shadow(_end),
5460 - NUMA_NO_NODE);
5461 + kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
5462 + (unsigned long)kasan_mem_to_shadow(_end),
5463 + early_pfn_to_nid(__pa(_stext)));
5464 +
5465 + shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM);
5466 + shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
5467 + shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
5468 + PAGE_SIZE);
5469 +
5470 + shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE);
5471 + shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
5472 + shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
5473 + PAGE_SIZE);
5474
5475 kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
5476 - (void *)KASAN_SHADOW_END);
5477 + shadow_cpu_entry_begin);
5478 +
5479 + kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
5480 + (unsigned long)shadow_cpu_entry_end, 0);
5481 +
5482 + kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END);
5483
5484 load_cr3(init_top_pgt);
5485 __flush_tlb_all();
5486 diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
5487 index 84fcfde53f8f..04d5157fe7f8 100644
5488 --- a/arch/x86/power/cpu.c
5489 +++ b/arch/x86/power/cpu.c
5490 @@ -160,17 +160,19 @@ static void do_fpu_end(void)
5491 static void fix_processor_context(void)
5492 {
5493 int cpu = smp_processor_id();
5494 - struct tss_struct *t = &per_cpu(cpu_tss, cpu);
5495 #ifdef CONFIG_X86_64
5496 struct desc_struct *desc = get_cpu_gdt_rw(cpu);
5497 tss_desc tss;
5498 #endif
5499 - set_tss_desc(cpu, t); /*
5500 - * This just modifies memory; should not be
5501 - * necessary. But... This is necessary, because
5502 - * 386 hardware has concept of busy TSS or some
5503 - * similar stupidity.
5504 - */
5505 +
5506 + /*
5507 + * We need to reload TR, which requires that we change the
5508 + * GDT entry to indicate "available" first.
5509 + *
5510 + * XXX: This could probably all be replaced by a call to
5511 + * force_reload_TR().
5512 + */
5513 + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
5514
5515 #ifdef CONFIG_X86_64
5516 memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
5517 diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
5518 index de503c225ae1..754d5391d9fa 100644
5519 --- a/arch/x86/xen/enlighten_hvm.c
5520 +++ b/arch/x86/xen/enlighten_hvm.c
5521 @@ -226,12 +226,12 @@ static uint32_t __init xen_platform_hvm(void)
5522 return xen_cpuid_base();
5523 }
5524
5525 -const struct hypervisor_x86 x86_hyper_xen_hvm = {
5526 +const __initconst struct hypervisor_x86 x86_hyper_xen_hvm = {
5527 .name = "Xen HVM",
5528 .detect = xen_platform_hvm,
5529 - .init_platform = xen_hvm_guest_init,
5530 - .pin_vcpu = xen_pin_vcpu,
5531 - .x2apic_available = xen_x2apic_para_available,
5532 - .init_mem_mapping = xen_hvm_init_mem_mapping,
5533 + .type = X86_HYPER_XEN_HVM,
5534 + .init.init_platform = xen_hvm_guest_init,
5535 + .init.x2apic_available = xen_x2apic_para_available,
5536 + .init.init_mem_mapping = xen_hvm_init_mem_mapping,
5537 + .runtime.pin_vcpu = xen_pin_vcpu,
5538 };
5539 -EXPORT_SYMBOL(x86_hyper_xen_hvm);
5540 diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
5541 index d4396e27b1fb..ae3a071e1d0f 100644
5542 --- a/arch/x86/xen/enlighten_pv.c
5543 +++ b/arch/x86/xen/enlighten_pv.c
5544 @@ -601,7 +601,7 @@ static struct trap_array_entry trap_array[] = {
5545 #ifdef CONFIG_X86_MCE
5546 { machine_check, xen_machine_check, true },
5547 #endif
5548 - { nmi, xen_nmi, true },
5549 + { nmi, xen_xennmi, true },
5550 { overflow, xen_overflow, false },
5551 #ifdef CONFIG_IA32_EMULATION
5552 { entry_INT80_compat, xen_entry_INT80_compat, false },
5553 @@ -811,15 +811,14 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
5554 }
5555 }
5556
5557 -static void xen_load_sp0(struct tss_struct *tss,
5558 - struct thread_struct *thread)
5559 +static void xen_load_sp0(unsigned long sp0)
5560 {
5561 struct multicall_space mcs;
5562
5563 mcs = xen_mc_entry(0);
5564 - MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
5565 + MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
5566 xen_mc_issue(PARAVIRT_LAZY_CPU);
5567 - tss->x86_tss.sp0 = thread->sp0;
5568 + this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
5569 }
5570
5571 void xen_set_iopl_mask(unsigned mask)
5572 @@ -1460,9 +1459,9 @@ static uint32_t __init xen_platform_pv(void)
5573 return 0;
5574 }
5575
5576 -const struct hypervisor_x86 x86_hyper_xen_pv = {
5577 +const __initconst struct hypervisor_x86 x86_hyper_xen_pv = {
5578 .name = "Xen PV",
5579 .detect = xen_platform_pv,
5580 - .pin_vcpu = xen_pin_vcpu,
5581 + .type = X86_HYPER_XEN_PV,
5582 + .runtime.pin_vcpu = xen_pin_vcpu,
5583 };
5584 -EXPORT_SYMBOL(x86_hyper_xen_pv);
5585 diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
5586 index 71495f1a86d7..c2454237fa67 100644
5587 --- a/arch/x86/xen/mmu_pv.c
5588 +++ b/arch/x86/xen/mmu_pv.c
5589 @@ -449,7 +449,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd)
5590 }
5591 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
5592
5593 -#if CONFIG_PGTABLE_LEVELS == 4
5594 +#ifdef CONFIG_X86_64
5595 __visible pudval_t xen_pud_val(pud_t pud)
5596 {
5597 return pte_mfn_to_pfn(pud.pud);
5598 @@ -538,7 +538,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val)
5599
5600 xen_mc_issue(PARAVIRT_LAZY_MMU);
5601 }
5602 -#endif /* CONFIG_PGTABLE_LEVELS == 4 */
5603 +#endif /* CONFIG_X86_64 */
5604
5605 static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
5606 int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
5607 @@ -580,21 +580,17 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
5608 int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
5609 bool last, unsigned long limit)
5610 {
5611 - int i, nr, flush = 0;
5612 + int flush = 0;
5613 + pud_t *pud;
5614
5615 - nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
5616 - for (i = 0; i < nr; i++) {
5617 - pud_t *pud;
5618
5619 - if (p4d_none(p4d[i]))
5620 - continue;
5621 + if (p4d_none(*p4d))
5622 + return flush;
5623
5624 - pud = pud_offset(&p4d[i], 0);
5625 - if (PTRS_PER_PUD > 1)
5626 - flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
5627 - flush |= xen_pud_walk(mm, pud, func,
5628 - last && i == nr - 1, limit);
5629 - }
5630 + pud = pud_offset(p4d, 0);
5631 + if (PTRS_PER_PUD > 1)
5632 + flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
5633 + flush |= xen_pud_walk(mm, pud, func, last, limit);
5634 return flush;
5635 }
5636
5637 @@ -644,8 +640,6 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
5638 continue;
5639
5640 p4d = p4d_offset(&pgd[i], 0);
5641 - if (PTRS_PER_P4D > 1)
5642 - flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
5643 flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
5644 }
5645
5646 @@ -1176,22 +1170,14 @@ static void __init xen_cleanmfnmap(unsigned long vaddr)
5647 {
5648 pgd_t *pgd;
5649 p4d_t *p4d;
5650 - unsigned int i;
5651 bool unpin;
5652
5653 unpin = (vaddr == 2 * PGDIR_SIZE);
5654 vaddr &= PMD_MASK;
5655 pgd = pgd_offset_k(vaddr);
5656 p4d = p4d_offset(pgd, 0);
5657 - for (i = 0; i < PTRS_PER_P4D; i++) {
5658 - if (p4d_none(p4d[i]))
5659 - continue;
5660 - xen_cleanmfnmap_p4d(p4d + i, unpin);
5661 - }
5662 - if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
5663 - set_pgd(pgd, __pgd(0));
5664 - xen_cleanmfnmap_free_pgtbl(p4d, unpin);
5665 - }
5666 + if (!p4d_none(*p4d))
5667 + xen_cleanmfnmap_p4d(p4d, unpin);
5668 }
5669
5670 static void __init xen_pagetable_p2m_free(void)
5671 @@ -1692,7 +1678,7 @@ static void xen_release_pmd(unsigned long pfn)
5672 xen_release_ptpage(pfn, PT_PMD);
5673 }
5674
5675 -#if CONFIG_PGTABLE_LEVELS >= 4
5676 +#ifdef CONFIG_X86_64
5677 static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
5678 {
5679 xen_alloc_ptpage(mm, pfn, PT_PUD);
5680 @@ -2029,13 +2015,12 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
5681 */
5682 void __init xen_relocate_p2m(void)
5683 {
5684 - phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys;
5685 + phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
5686 unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
5687 - int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d;
5688 + int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
5689 pte_t *pt;
5690 pmd_t *pmd;
5691 pud_t *pud;
5692 - p4d_t *p4d = NULL;
5693 pgd_t *pgd;
5694 unsigned long *new_p2m;
5695 int save_pud;
5696 @@ -2045,11 +2030,7 @@ void __init xen_relocate_p2m(void)
5697 n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
5698 n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
5699 n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT;
5700 - if (PTRS_PER_P4D > 1)
5701 - n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
5702 - else
5703 - n_p4d = 0;
5704 - n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d;
5705 + n_frames = n_pte + n_pt + n_pmd + n_pud;
5706
5707 new_area = xen_find_free_area(PFN_PHYS(n_frames));
5708 if (!new_area) {
5709 @@ -2065,76 +2046,56 @@ void __init xen_relocate_p2m(void)
5710 * To avoid any possible virtual address collision, just use
5711 * 2 * PUD_SIZE for the new area.
5712 */
5713 - p4d_phys = new_area;
5714 - pud_phys = p4d_phys + PFN_PHYS(n_p4d);
5715 + pud_phys = new_area;
5716 pmd_phys = pud_phys + PFN_PHYS(n_pud);
5717 pt_phys = pmd_phys + PFN_PHYS(n_pmd);
5718 p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
5719
5720 pgd = __va(read_cr3_pa());
5721 new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
5722 - idx_p4d = 0;
5723 save_pud = n_pud;
5724 - do {
5725 - if (n_p4d > 0) {
5726 - p4d = early_memremap(p4d_phys, PAGE_SIZE);
5727 - clear_page(p4d);
5728 - n_pud = min(save_pud, PTRS_PER_P4D);
5729 - }
5730 - for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
5731 - pud = early_memremap(pud_phys, PAGE_SIZE);
5732 - clear_page(pud);
5733 - for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
5734 - idx_pmd++) {
5735 - pmd = early_memremap(pmd_phys, PAGE_SIZE);
5736 - clear_page(pmd);
5737 - for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
5738 - idx_pt++) {
5739 - pt = early_memremap(pt_phys, PAGE_SIZE);
5740 - clear_page(pt);
5741 - for (idx_pte = 0;
5742 - idx_pte < min(n_pte, PTRS_PER_PTE);
5743 - idx_pte++) {
5744 - set_pte(pt + idx_pte,
5745 - pfn_pte(p2m_pfn, PAGE_KERNEL));
5746 - p2m_pfn++;
5747 - }
5748 - n_pte -= PTRS_PER_PTE;
5749 - early_memunmap(pt, PAGE_SIZE);
5750 - make_lowmem_page_readonly(__va(pt_phys));
5751 - pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
5752 - PFN_DOWN(pt_phys));
5753 - set_pmd(pmd + idx_pt,
5754 - __pmd(_PAGE_TABLE | pt_phys));
5755 - pt_phys += PAGE_SIZE;
5756 + for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
5757 + pud = early_memremap(pud_phys, PAGE_SIZE);
5758 + clear_page(pud);
5759 + for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
5760 + idx_pmd++) {
5761 + pmd = early_memremap(pmd_phys, PAGE_SIZE);
5762 + clear_page(pmd);
5763 + for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
5764 + idx_pt++) {
5765 + pt = early_memremap(pt_phys, PAGE_SIZE);
5766 + clear_page(pt);
5767 + for (idx_pte = 0;
5768 + idx_pte < min(n_pte, PTRS_PER_PTE);
5769 + idx_pte++) {
5770 + set_pte(pt + idx_pte,
5771 + pfn_pte(p2m_pfn, PAGE_KERNEL));
5772 + p2m_pfn++;
5773 }
5774 - n_pt -= PTRS_PER_PMD;
5775 - early_memunmap(pmd, PAGE_SIZE);
5776 - make_lowmem_page_readonly(__va(pmd_phys));
5777 - pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
5778 - PFN_DOWN(pmd_phys));
5779 - set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
5780 - pmd_phys += PAGE_SIZE;
5781 + n_pte -= PTRS_PER_PTE;
5782 + early_memunmap(pt, PAGE_SIZE);
5783 + make_lowmem_page_readonly(__va(pt_phys));
5784 + pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
5785 + PFN_DOWN(pt_phys));
5786 + set_pmd(pmd + idx_pt,
5787 + __pmd(_PAGE_TABLE | pt_phys));
5788 + pt_phys += PAGE_SIZE;
5789 }
5790 - n_pmd -= PTRS_PER_PUD;
5791 - early_memunmap(pud, PAGE_SIZE);
5792 - make_lowmem_page_readonly(__va(pud_phys));
5793 - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
5794 - if (n_p4d > 0)
5795 - set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys));
5796 - else
5797 - set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
5798 - pud_phys += PAGE_SIZE;
5799 - }
5800 - if (n_p4d > 0) {
5801 - save_pud -= PTRS_PER_P4D;
5802 - early_memunmap(p4d, PAGE_SIZE);
5803 - make_lowmem_page_readonly(__va(p4d_phys));
5804 - pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys));
5805 - set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys));
5806 - p4d_phys += PAGE_SIZE;
5807 + n_pt -= PTRS_PER_PMD;
5808 + early_memunmap(pmd, PAGE_SIZE);
5809 + make_lowmem_page_readonly(__va(pmd_phys));
5810 + pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
5811 + PFN_DOWN(pmd_phys));
5812 + set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
5813 + pmd_phys += PAGE_SIZE;
5814 }
5815 - } while (++idx_p4d < n_p4d);
5816 + n_pmd -= PTRS_PER_PUD;
5817 + early_memunmap(pud, PAGE_SIZE);
5818 + make_lowmem_page_readonly(__va(pud_phys));
5819 + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
5820 + set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
5821 + pud_phys += PAGE_SIZE;
5822 + }
5823
5824 /* Now copy the old p2m info to the new area. */
5825 memcpy(new_p2m, xen_p2m_addr, size);
5826 @@ -2311,7 +2272,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
5827 #endif
5828 case FIX_TEXT_POKE0:
5829 case FIX_TEXT_POKE1:
5830 - case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
5831 + case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM:
5832 /* All local page mappings */
5833 pte = pfn_pte(phys, prot);
5834 break;
5835 @@ -2361,7 +2322,7 @@ static void __init xen_post_allocator_init(void)
5836 pv_mmu_ops.set_pte = xen_set_pte;
5837 pv_mmu_ops.set_pmd = xen_set_pmd;
5838 pv_mmu_ops.set_pud = xen_set_pud;
5839 -#if CONFIG_PGTABLE_LEVELS >= 4
5840 +#ifdef CONFIG_X86_64
5841 pv_mmu_ops.set_p4d = xen_set_p4d;
5842 #endif
5843
5844 @@ -2371,7 +2332,7 @@ static void __init xen_post_allocator_init(void)
5845 pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
5846 pv_mmu_ops.release_pte = xen_release_pte;
5847 pv_mmu_ops.release_pmd = xen_release_pmd;
5848 -#if CONFIG_PGTABLE_LEVELS >= 4
5849 +#ifdef CONFIG_X86_64
5850 pv_mmu_ops.alloc_pud = xen_alloc_pud;
5851 pv_mmu_ops.release_pud = xen_release_pud;
5852 #endif
5853 @@ -2435,14 +2396,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
5854 .make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
5855 .pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
5856
5857 -#if CONFIG_PGTABLE_LEVELS >= 4
5858 +#ifdef CONFIG_X86_64
5859 .pud_val = PV_CALLEE_SAVE(xen_pud_val),
5860 .make_pud = PV_CALLEE_SAVE(xen_make_pud),
5861 .set_p4d = xen_set_p4d_hyper,
5862
5863 .alloc_pud = xen_alloc_pmd_init,
5864 .release_pud = xen_release_pmd_init,
5865 -#endif /* CONFIG_PGTABLE_LEVELS == 4 */
5866 +#endif /* CONFIG_X86_64 */
5867
5868 .activate_mm = xen_activate_mm,
5869 .dup_mmap = xen_dup_mmap,
5870 diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
5871 index 05f91ce9b55e..c0c756c76afe 100644
5872 --- a/arch/x86/xen/smp_pv.c
5873 +++ b/arch/x86/xen/smp_pv.c
5874 @@ -14,6 +14,7 @@
5875 * single-threaded.
5876 */
5877 #include <linux/sched.h>
5878 +#include <linux/sched/task_stack.h>
5879 #include <linux/err.h>
5880 #include <linux/slab.h>
5881 #include <linux/smp.h>
5882 @@ -294,12 +295,19 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
5883 #endif
5884 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
5885
5886 + /*
5887 + * Bring up the CPU in cpu_bringup_and_idle() with the stack
5888 + * pointing just below where pt_regs would be if it were a normal
5889 + * kernel entry.
5890 + */
5891 ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
5892 ctxt->flags = VGCF_IN_KERNEL;
5893 ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
5894 ctxt->user_regs.ds = __USER_DS;
5895 ctxt->user_regs.es = __USER_DS;
5896 ctxt->user_regs.ss = __KERNEL_DS;
5897 + ctxt->user_regs.cs = __KERNEL_CS;
5898 + ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle);
5899
5900 xen_copy_trap_info(ctxt->trap_ctxt);
5901
5902 @@ -314,8 +322,13 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
5903 ctxt->gdt_frames[0] = gdt_mfn;
5904 ctxt->gdt_ents = GDT_ENTRIES;
5905
5906 + /*
5907 + * Set SS:SP that Xen will use when entering guest kernel mode
5908 + * from guest user mode. Subsequent calls to load_sp0() can
5909 + * change this value.
5910 + */
5911 ctxt->kernel_ss = __KERNEL_DS;
5912 - ctxt->kernel_sp = idle->thread.sp0;
5913 + ctxt->kernel_sp = task_top_of_stack(idle);
5914
5915 #ifdef CONFIG_X86_32
5916 ctxt->event_callback_cs = __KERNEL_CS;
5917 @@ -327,10 +340,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
5918 (unsigned long)xen_hypervisor_callback;
5919 ctxt->failsafe_callback_eip =
5920 (unsigned long)xen_failsafe_callback;
5921 - ctxt->user_regs.cs = __KERNEL_CS;
5922 per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
5923
5924 - ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
5925 ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
5926 if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
5927 BUG();
5928 diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
5929 index c98a48c861fd..8a10c9a9e2b5 100644
5930 --- a/arch/x86/xen/xen-asm_64.S
5931 +++ b/arch/x86/xen/xen-asm_64.S
5932 @@ -30,7 +30,7 @@ xen_pv_trap debug
5933 xen_pv_trap xendebug
5934 xen_pv_trap int3
5935 xen_pv_trap xenint3
5936 -xen_pv_trap nmi
5937 +xen_pv_trap xennmi
5938 xen_pv_trap overflow
5939 xen_pv_trap bounds
5940 xen_pv_trap invalid_op
5941 diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
5942 index b5b8d7f43557..497cc55a0c16 100644
5943 --- a/arch/x86/xen/xen-head.S
5944 +++ b/arch/x86/xen/xen-head.S
5945 @@ -10,6 +10,7 @@
5946 #include <asm/boot.h>
5947 #include <asm/asm.h>
5948 #include <asm/page_types.h>
5949 +#include <asm/unwind_hints.h>
5950
5951 #include <xen/interface/elfnote.h>
5952 #include <xen/interface/features.h>
5953 @@ -20,6 +21,7 @@
5954 #ifdef CONFIG_XEN_PV
5955 __INIT
5956 ENTRY(startup_xen)
5957 + UNWIND_HINT_EMPTY
5958 cld
5959
5960 /* Clear .bss */
5961 @@ -34,21 +36,24 @@ ENTRY(startup_xen)
5962 mov $init_thread_union+THREAD_SIZE, %_ASM_SP
5963
5964 jmp xen_start_kernel
5965 -
5966 +END(startup_xen)
5967 __FINIT
5968 #endif
5969
5970 .pushsection .text
5971 .balign PAGE_SIZE
5972 ENTRY(hypercall_page)
5973 - .skip PAGE_SIZE
5974 + .rept (PAGE_SIZE / 32)
5975 + UNWIND_HINT_EMPTY
5976 + .skip 32
5977 + .endr
5978
5979 #define HYPERCALL(n) \
5980 .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
5981 .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
5982 #include <asm/xen-hypercalls.h>
5983 #undef HYPERCALL
5984 -
5985 +END(hypercall_page)
5986 .popsection
5987
5988 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
5989 diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
5990 index a4783da90ba8..0f860cf0d56d 100644
5991 --- a/block/bfq-iosched.c
5992 +++ b/block/bfq-iosched.c
5993 @@ -108,6 +108,7 @@
5994 #include "blk-mq-tag.h"
5995 #include "blk-mq-sched.h"
5996 #include "bfq-iosched.h"
5997 +#include "blk-wbt.h"
5998
5999 #define BFQ_BFQQ_FNS(name) \
6000 void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \
6001 @@ -4775,7 +4776,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
6002 bfq_init_root_group(bfqd->root_group, bfqd);
6003 bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
6004
6005 -
6006 + wbt_disable_default(q);
6007 return 0;
6008
6009 out_free:
6010 diff --git a/block/blk-wbt.c b/block/blk-wbt.c
6011 index 6a9a0f03a67b..e59d59c11ebb 100644
6012 --- a/block/blk-wbt.c
6013 +++ b/block/blk-wbt.c
6014 @@ -654,7 +654,7 @@ void wbt_set_write_cache(struct rq_wb *rwb, bool write_cache_on)
6015 }
6016
6017 /*
6018 - * Disable wbt, if enabled by default. Only called from CFQ.
6019 + * Disable wbt, if enabled by default.
6020 */
6021 void wbt_disable_default(struct request_queue *q)
6022 {
6023 diff --git a/crypto/lrw.c b/crypto/lrw.c
6024 index a8bfae4451bf..eb681e9fe574 100644
6025 --- a/crypto/lrw.c
6026 +++ b/crypto/lrw.c
6027 @@ -610,8 +610,10 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
6028 ecb_name[len - 1] = 0;
6029
6030 if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
6031 - "lrw(%s)", ecb_name) >= CRYPTO_MAX_ALG_NAME)
6032 - return -ENAMETOOLONG;
6033 + "lrw(%s)", ecb_name) >= CRYPTO_MAX_ALG_NAME) {
6034 + err = -ENAMETOOLONG;
6035 + goto err_drop_spawn;
6036 + }
6037 }
6038
6039 inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
6040 diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
6041 index 3c3a37b8503b..572b6c7303ed 100644
6042 --- a/drivers/acpi/apei/ghes.c
6043 +++ b/drivers/acpi/apei/ghes.c
6044 @@ -51,6 +51,7 @@
6045 #include <acpi/actbl1.h>
6046 #include <acpi/ghes.h>
6047 #include <acpi/apei.h>
6048 +#include <asm/fixmap.h>
6049 #include <asm/tlbflush.h>
6050 #include <ras/ras_event.h>
6051
6052 @@ -112,7 +113,7 @@ static DEFINE_MUTEX(ghes_list_mutex);
6053 * Because the memory area used to transfer hardware error information
6054 * from BIOS to Linux can be determined only in NMI, IRQ or timer
6055 * handler, but general ioremap can not be used in atomic context, so
6056 - * a special version of atomic ioremap is implemented for that.
6057 + * the fixmap is used instead.
6058 */
6059
6060 /*
6061 @@ -126,8 +127,8 @@ static DEFINE_MUTEX(ghes_list_mutex);
6062 /* virtual memory area for atomic ioremap */
6063 static struct vm_struct *ghes_ioremap_area;
6064 /*
6065 - * These 2 spinlock is used to prevent atomic ioremap virtual memory
6066 - * area from being mapped simultaneously.
6067 + * These 2 spinlocks are used to prevent the fixmap entries from being used
6068 + * simultaneously.
6069 */
6070 static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
6071 static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
6072 @@ -159,52 +160,36 @@ static void ghes_ioremap_exit(void)
6073
6074 static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
6075 {
6076 - unsigned long vaddr;
6077 phys_addr_t paddr;
6078 pgprot_t prot;
6079
6080 - vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr);
6081 -
6082 paddr = pfn << PAGE_SHIFT;
6083 prot = arch_apei_get_mem_attribute(paddr);
6084 - ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot);
6085 + __set_fixmap(FIX_APEI_GHES_NMI, paddr, prot);
6086
6087 - return (void __iomem *)vaddr;
6088 + return (void __iomem *) fix_to_virt(FIX_APEI_GHES_NMI);
6089 }
6090
6091 static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
6092 {
6093 - unsigned long vaddr, paddr;
6094 + phys_addr_t paddr;
6095 pgprot_t prot;
6096
6097 - vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr);
6098 -
6099 paddr = pfn << PAGE_SHIFT;
6100 prot = arch_apei_get_mem_attribute(paddr);
6101 + __set_fixmap(FIX_APEI_GHES_IRQ, paddr, prot);
6102
6103 - ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot);
6104 -
6105 - return (void __iomem *)vaddr;
6106 + return (void __iomem *) fix_to_virt(FIX_APEI_GHES_IRQ);
6107 }
6108
6109 -static void ghes_iounmap_nmi(void __iomem *vaddr_ptr)
6110 +static void ghes_iounmap_nmi(void)
6111 {
6112 - unsigned long vaddr = (unsigned long __force)vaddr_ptr;
6113 - void *base = ghes_ioremap_area->addr;
6114 -
6115 - BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base));
6116 - unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
6117 - arch_apei_flush_tlb_one(vaddr);
6118 + clear_fixmap(FIX_APEI_GHES_NMI);
6119 }
6120
6121 -static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
6122 +static void ghes_iounmap_irq(void)
6123 {
6124 - unsigned long vaddr = (unsigned long __force)vaddr_ptr;
6125 - void *base = ghes_ioremap_area->addr;
6126 -
6127 - BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base));
6128 - unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
6129 - arch_apei_flush_tlb_one(vaddr);
6130 + clear_fixmap(FIX_APEI_GHES_IRQ);
6131 }
6132
6133 static int ghes_estatus_pool_init(void)
6134 @@ -360,10 +345,10 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
6135 paddr += trunk;
6136 buffer += trunk;
6137 if (in_nmi) {
6138 - ghes_iounmap_nmi(vaddr);
6139 + ghes_iounmap_nmi();
6140 raw_spin_unlock(&ghes_ioremap_lock_nmi);
6141 } else {
6142 - ghes_iounmap_irq(vaddr);
6143 + ghes_iounmap_irq();
6144 spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
6145 }
6146 }
6147 @@ -851,17 +836,8 @@ static void ghes_sea_remove(struct ghes *ghes)
6148 synchronize_rcu();
6149 }
6150 #else /* CONFIG_ACPI_APEI_SEA */
6151 -static inline void ghes_sea_add(struct ghes *ghes)
6152 -{
6153 - pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not supported\n",
6154 - ghes->generic->header.source_id);
6155 -}
6156 -
6157 -static inline void ghes_sea_remove(struct ghes *ghes)
6158 -{
6159 - pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not supported\n",
6160 - ghes->generic->header.source_id);
6161 -}
6162 +static inline void ghes_sea_add(struct ghes *ghes) { }
6163 +static inline void ghes_sea_remove(struct ghes *ghes) { }
6164 #endif /* CONFIG_ACPI_APEI_SEA */
6165
6166 #ifdef CONFIG_HAVE_ACPI_APEI_NMI
6167 @@ -1063,23 +1039,9 @@ static void ghes_nmi_init_cxt(void)
6168 init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
6169 }
6170 #else /* CONFIG_HAVE_ACPI_APEI_NMI */
6171 -static inline void ghes_nmi_add(struct ghes *ghes)
6172 -{
6173 - pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n",
6174 - ghes->generic->header.source_id);
6175 - BUG();
6176 -}
6177 -
6178 -static inline void ghes_nmi_remove(struct ghes *ghes)
6179 -{
6180 - pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n",
6181 - ghes->generic->header.source_id);
6182 - BUG();
6183 -}
6184 -
6185 -static inline void ghes_nmi_init_cxt(void)
6186 -{
6187 -}
6188 +static inline void ghes_nmi_add(struct ghes *ghes) { }
6189 +static inline void ghes_nmi_remove(struct ghes *ghes) { }
6190 +static inline void ghes_nmi_init_cxt(void) { }
6191 #endif /* CONFIG_HAVE_ACPI_APEI_NMI */
6192
6193 static int ghes_probe(struct platform_device *ghes_dev)
6194 diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
6195 index a6de32530693..0459b1204694 100644
6196 --- a/drivers/base/power/opp/core.c
6197 +++ b/drivers/base/power/opp/core.c
6198 @@ -296,7 +296,7 @@ int dev_pm_opp_get_opp_count(struct device *dev)
6199 opp_table = _find_opp_table(dev);
6200 if (IS_ERR(opp_table)) {
6201 count = PTR_ERR(opp_table);
6202 - dev_err(dev, "%s: OPP table not found (%d)\n",
6203 + dev_dbg(dev, "%s: OPP table not found (%d)\n",
6204 __func__, count);
6205 return count;
6206 }
6207 diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
6208 index e2540113d0da..73d2d88ddc03 100644
6209 --- a/drivers/bluetooth/hci_bcm.c
6210 +++ b/drivers/bluetooth/hci_bcm.c
6211 @@ -68,7 +68,7 @@ struct bcm_device {
6212 u32 init_speed;
6213 u32 oper_speed;
6214 int irq;
6215 - u8 irq_polarity;
6216 + bool irq_active_low;
6217
6218 #ifdef CONFIG_PM
6219 struct hci_uart *hu;
6220 @@ -213,7 +213,9 @@ static int bcm_request_irq(struct bcm_data *bcm)
6221 }
6222
6223 err = devm_request_irq(&bdev->pdev->dev, bdev->irq, bcm_host_wake,
6224 - IRQF_TRIGGER_RISING, "host_wake", bdev);
6225 + bdev->irq_active_low ? IRQF_TRIGGER_FALLING :
6226 + IRQF_TRIGGER_RISING,
6227 + "host_wake", bdev);
6228 if (err)
6229 goto unlock;
6230
6231 @@ -253,7 +255,7 @@ static int bcm_setup_sleep(struct hci_uart *hu)
6232 struct sk_buff *skb;
6233 struct bcm_set_sleep_mode sleep_params = default_sleep_params;
6234
6235 - sleep_params.host_wake_active = !bcm->dev->irq_polarity;
6236 + sleep_params.host_wake_active = !bcm->dev->irq_active_low;
6237
6238 skb = __hci_cmd_sync(hu->hdev, 0xfc27, sizeof(sleep_params),
6239 &sleep_params, HCI_INIT_TIMEOUT);
6240 @@ -690,10 +692,8 @@ static const struct acpi_gpio_mapping acpi_bcm_int_first_gpios[] = {
6241 };
6242
6243 #ifdef CONFIG_ACPI
6244 -static u8 acpi_active_low = ACPI_ACTIVE_LOW;
6245 -
6246 /* IRQ polarity of some chipsets are not defined correctly in ACPI table. */
6247 -static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = {
6248 +static const struct dmi_system_id bcm_active_low_irq_dmi_table[] = {
6249 {
6250 .ident = "Asus T100TA",
6251 .matches = {
6252 @@ -701,7 +701,6 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = {
6253 "ASUSTeK COMPUTER INC."),
6254 DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TA"),
6255 },
6256 - .driver_data = &acpi_active_low,
6257 },
6258 {
6259 .ident = "Asus T100CHI",
6260 @@ -710,7 +709,6 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = {
6261 "ASUSTeK COMPUTER INC."),
6262 DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100CHI"),
6263 },
6264 - .driver_data = &acpi_active_low,
6265 },
6266 { /* Handle ThinkPad 8 tablets with BCM2E55 chipset ACPI ID */
6267 .ident = "Lenovo ThinkPad 8",
6268 @@ -718,7 +716,6 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = {
6269 DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
6270 DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "ThinkPad 8"),
6271 },
6272 - .driver_data = &acpi_active_low,
6273 },
6274 { }
6275 };
6276 @@ -733,13 +730,13 @@ static int bcm_resource(struct acpi_resource *ares, void *data)
6277 switch (ares->type) {
6278 case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
6279 irq = &ares->data.extended_irq;
6280 - dev->irq_polarity = irq->polarity;
6281 + dev->irq_active_low = irq->polarity == ACPI_ACTIVE_LOW;
6282 break;
6283
6284 case ACPI_RESOURCE_TYPE_GPIO:
6285 gpio = &ares->data.gpio;
6286 if (gpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT)
6287 - dev->irq_polarity = gpio->polarity;
6288 + dev->irq_active_low = gpio->polarity == ACPI_ACTIVE_LOW;
6289 break;
6290
6291 case ACPI_RESOURCE_TYPE_SERIAL_BUS:
6292 @@ -834,11 +831,11 @@ static int bcm_acpi_probe(struct bcm_device *dev)
6293 return ret;
6294 acpi_dev_free_resource_list(&resources);
6295
6296 - dmi_id = dmi_first_match(bcm_wrong_irq_dmi_table);
6297 + dmi_id = dmi_first_match(bcm_active_low_irq_dmi_table);
6298 if (dmi_id) {
6299 bt_dev_warn(dev, "%s: Overwriting IRQ polarity to active low",
6300 dmi_id->ident);
6301 - dev->irq_polarity = *(u8 *)dmi_id->driver_data;
6302 + dev->irq_active_low = true;
6303 }
6304
6305 return 0;
6306 diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
6307 index 6e2403805784..6aef3bde10d7 100644
6308 --- a/drivers/bluetooth/hci_ldisc.c
6309 +++ b/drivers/bluetooth/hci_ldisc.c
6310 @@ -41,6 +41,7 @@
6311 #include <linux/ioctl.h>
6312 #include <linux/skbuff.h>
6313 #include <linux/firmware.h>
6314 +#include <linux/serdev.h>
6315
6316 #include <net/bluetooth/bluetooth.h>
6317 #include <net/bluetooth/hci_core.h>
6318 @@ -298,6 +299,12 @@ void hci_uart_set_flow_control(struct hci_uart *hu, bool enable)
6319 unsigned int set = 0;
6320 unsigned int clear = 0;
6321
6322 + if (hu->serdev) {
6323 + serdev_device_set_flow_control(hu->serdev, !enable);
6324 + serdev_device_set_rts(hu->serdev, !enable);
6325 + return;
6326 + }
6327 +
6328 if (enable) {
6329 /* Disable hardware flow control */
6330 ktermios = tty->termios;
6331 diff --git a/drivers/clk/sunxi-ng/ccu-sun5i.c b/drivers/clk/sunxi-ng/ccu-sun5i.c
6332 index ab9e850b3707..2f385a57cd91 100644
6333 --- a/drivers/clk/sunxi-ng/ccu-sun5i.c
6334 +++ b/drivers/clk/sunxi-ng/ccu-sun5i.c
6335 @@ -982,8 +982,8 @@ static void __init sun5i_ccu_init(struct device_node *node,
6336
6337 /* Force the PLL-Audio-1x divider to 4 */
6338 val = readl(reg + SUN5I_PLL_AUDIO_REG);
6339 - val &= ~GENMASK(19, 16);
6340 - writel(val | (3 << 16), reg + SUN5I_PLL_AUDIO_REG);
6341 + val &= ~GENMASK(29, 26);
6342 + writel(val | (3 << 26), reg + SUN5I_PLL_AUDIO_REG);
6343
6344 /*
6345 * Use the peripheral PLL as the AHB parent, instead of CPU /
6346 diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
6347 index 8af434815fba..241fb13f1c06 100644
6348 --- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
6349 +++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
6350 @@ -608,7 +608,7 @@ static SUNXI_CCU_M_WITH_MUX_GATE(hdmi_clk, "hdmi", lcd_ch1_parents,
6351 0x150, 0, 4, 24, 2, BIT(31),
6352 CLK_SET_RATE_PARENT);
6353
6354 -static SUNXI_CCU_GATE(hdmi_ddc_clk, "hdmi-ddc", "osc24M", 0x150, BIT(30), 0);
6355 +static SUNXI_CCU_GATE(hdmi_ddc_clk, "ddc", "osc24M", 0x150, BIT(30), 0);
6356
6357 static SUNXI_CCU_GATE(ps_clk, "ps", "lcd1-ch1", 0x140, BIT(31), 0);
6358
6359 diff --git a/drivers/clk/sunxi-ng/ccu_nm.c b/drivers/clk/sunxi-ng/ccu_nm.c
6360 index a32158e8f2e3..84a5e7f17f6f 100644
6361 --- a/drivers/clk/sunxi-ng/ccu_nm.c
6362 +++ b/drivers/clk/sunxi-ng/ccu_nm.c
6363 @@ -99,6 +99,9 @@ static long ccu_nm_round_rate(struct clk_hw *hw, unsigned long rate,
6364 struct ccu_nm *nm = hw_to_ccu_nm(hw);
6365 struct _ccu_nm _nm;
6366
6367 + if (ccu_frac_helper_has_rate(&nm->common, &nm->frac, rate))
6368 + return rate;
6369 +
6370 _nm.min_n = nm->n.min ?: 1;
6371 _nm.max_n = nm->n.max ?: 1 << nm->n.width;
6372 _nm.min_m = 1;
6373 diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
6374 index 484cc8909d5c..ed4df58a855e 100644
6375 --- a/drivers/cpuidle/cpuidle.c
6376 +++ b/drivers/cpuidle/cpuidle.c
6377 @@ -208,6 +208,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
6378 return -EBUSY;
6379 }
6380 target_state = &drv->states[index];
6381 + broadcast = false;
6382 }
6383
6384 /* Take note of the planned idle state. */
6385 diff --git a/drivers/crypto/amcc/crypto4xx_core.h b/drivers/crypto/amcc/crypto4xx_core.h
6386 index ecfdcfe3698d..4f41d6da5acc 100644
6387 --- a/drivers/crypto/amcc/crypto4xx_core.h
6388 +++ b/drivers/crypto/amcc/crypto4xx_core.h
6389 @@ -34,12 +34,12 @@
6390 #define PPC405EX_CE_RESET 0x00000008
6391
6392 #define CRYPTO4XX_CRYPTO_PRIORITY 300
6393 -#define PPC4XX_LAST_PD 63
6394 -#define PPC4XX_NUM_PD 64
6395 -#define PPC4XX_LAST_GD 1023
6396 +#define PPC4XX_NUM_PD 256
6397 +#define PPC4XX_LAST_PD (PPC4XX_NUM_PD - 1)
6398 #define PPC4XX_NUM_GD 1024
6399 -#define PPC4XX_LAST_SD 63
6400 -#define PPC4XX_NUM_SD 64
6401 +#define PPC4XX_LAST_GD (PPC4XX_NUM_GD - 1)
6402 +#define PPC4XX_NUM_SD 256
6403 +#define PPC4XX_LAST_SD (PPC4XX_NUM_SD - 1)
6404 #define PPC4XX_SD_BUFFER_SIZE 2048
6405
6406 #define PD_ENTRY_INUSE 1
6407 diff --git a/drivers/gpu/drm/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/drm_dp_dual_mode_helper.c
6408 index 0ef9011a1856..02a50929af67 100644
6409 --- a/drivers/gpu/drm/drm_dp_dual_mode_helper.c
6410 +++ b/drivers/gpu/drm/drm_dp_dual_mode_helper.c
6411 @@ -410,6 +410,7 @@ int drm_lspcon_get_mode(struct i2c_adapter *adapter,
6412 {
6413 u8 data;
6414 int ret = 0;
6415 + int retry;
6416
6417 if (!mode) {
6418 DRM_ERROR("NULL input\n");
6419 @@ -417,10 +418,19 @@ int drm_lspcon_get_mode(struct i2c_adapter *adapter,
6420 }
6421
6422 /* Read Status: i2c over aux */
6423 - ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_LSPCON_CURRENT_MODE,
6424 - &data, sizeof(data));
6425 + for (retry = 0; retry < 6; retry++) {
6426 + if (retry)
6427 + usleep_range(500, 1000);
6428 +
6429 + ret = drm_dp_dual_mode_read(adapter,
6430 + DP_DUAL_MODE_LSPCON_CURRENT_MODE,
6431 + &data, sizeof(data));
6432 + if (!ret)
6433 + break;
6434 + }
6435 +
6436 if (ret < 0) {
6437 - DRM_ERROR("LSPCON read(0x80, 0x41) failed\n");
6438 + DRM_DEBUG_KMS("LSPCON read(0x80, 0x41) failed\n");
6439 return -EFAULT;
6440 }
6441
6442 diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c
6443 index d1e0dc908048..04796d7d0fdb 100644
6444 --- a/drivers/gpu/drm/vc4/vc4_dsi.c
6445 +++ b/drivers/gpu/drm/vc4/vc4_dsi.c
6446 @@ -866,7 +866,8 @@ static bool vc4_dsi_encoder_mode_fixup(struct drm_encoder *encoder,
6447 adjusted_mode->clock = pixel_clock_hz / 1000 + 1;
6448
6449 /* Given the new pixel clock, adjust HFP to keep vrefresh the same. */
6450 - adjusted_mode->htotal = pixel_clock_hz / (mode->vrefresh * mode->vtotal);
6451 + adjusted_mode->htotal = adjusted_mode->clock * mode->htotal /
6452 + mode->clock;
6453 adjusted_mode->hsync_end += adjusted_mode->htotal - mode->htotal;
6454 adjusted_mode->hsync_start += adjusted_mode->htotal - mode->htotal;
6455
6456 diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
6457 index 937801ac2fe0..2cd134dd94d2 100644
6458 --- a/drivers/hv/vmbus_drv.c
6459 +++ b/drivers/hv/vmbus_drv.c
6460 @@ -1534,7 +1534,7 @@ static int __init hv_acpi_init(void)
6461 {
6462 int ret, t;
6463
6464 - if (x86_hyper != &x86_hyper_ms_hyperv)
6465 + if (x86_hyper_type != X86_HYPER_MS_HYPERV)
6466 return -ENODEV;
6467
6468 init_completion(&probe_event);
6469 diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
6470 index 752856b3a849..379de1829cdb 100644
6471 --- a/drivers/iio/accel/st_accel_core.c
6472 +++ b/drivers/iio/accel/st_accel_core.c
6473 @@ -164,7 +164,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
6474 .mask_int2 = 0x00,
6475 .addr_ihl = 0x25,
6476 .mask_ihl = 0x02,
6477 - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6478 + .stat_drdy = {
6479 + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6480 + .mask = 0x07,
6481 + },
6482 },
6483 .sim = {
6484 .addr = 0x23,
6485 @@ -236,7 +239,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
6486 .mask_ihl = 0x80,
6487 .addr_od = 0x22,
6488 .mask_od = 0x40,
6489 - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6490 + .stat_drdy = {
6491 + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6492 + .mask = 0x07,
6493 + },
6494 },
6495 .sim = {
6496 .addr = 0x23,
6497 @@ -318,7 +324,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
6498 .mask_int2 = 0x00,
6499 .addr_ihl = 0x23,
6500 .mask_ihl = 0x40,
6501 - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6502 + .stat_drdy = {
6503 + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6504 + .mask = 0x07,
6505 + },
6506 .ig1 = {
6507 .en_addr = 0x23,
6508 .en_mask = 0x08,
6509 @@ -389,7 +398,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
6510 .drdy_irq = {
6511 .addr = 0x21,
6512 .mask_int1 = 0x04,
6513 - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6514 + .stat_drdy = {
6515 + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6516 + .mask = 0x07,
6517 + },
6518 },
6519 .sim = {
6520 .addr = 0x21,
6521 @@ -451,7 +463,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
6522 .mask_ihl = 0x80,
6523 .addr_od = 0x22,
6524 .mask_od = 0x40,
6525 - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6526 + .stat_drdy = {
6527 + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6528 + .mask = 0x07,
6529 + },
6530 },
6531 .sim = {
6532 .addr = 0x21,
6533 @@ -569,7 +584,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
6534 .drdy_irq = {
6535 .addr = 0x21,
6536 .mask_int1 = 0x04,
6537 - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6538 + .stat_drdy = {
6539 + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6540 + .mask = 0x07,
6541 + },
6542 },
6543 .sim = {
6544 .addr = 0x21,
6545 @@ -640,7 +658,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
6546 .mask_int2 = 0x00,
6547 .addr_ihl = 0x25,
6548 .mask_ihl = 0x02,
6549 - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6550 + .stat_drdy = {
6551 + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6552 + .mask = 0x07,
6553 + },
6554 },
6555 .sim = {
6556 .addr = 0x23,
6557 diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
6558 index 02e833b14db0..34115f05d5c4 100644
6559 --- a/drivers/iio/common/st_sensors/st_sensors_core.c
6560 +++ b/drivers/iio/common/st_sensors/st_sensors_core.c
6561 @@ -470,7 +470,7 @@ int st_sensors_set_dataready_irq(struct iio_dev *indio_dev, bool enable)
6562 * different one. Take into account irq status register
6563 * to understand if irq trigger can be properly supported
6564 */
6565 - if (sdata->sensor_settings->drdy_irq.addr_stat_drdy)
6566 + if (sdata->sensor_settings->drdy_irq.stat_drdy.addr)
6567 sdata->hw_irq_trigger = enable;
6568 return 0;
6569 }
6570 diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c
6571 index fa73e6795359..fdcc5a891958 100644
6572 --- a/drivers/iio/common/st_sensors/st_sensors_trigger.c
6573 +++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c
6574 @@ -31,7 +31,7 @@ static int st_sensors_new_samples_available(struct iio_dev *indio_dev,
6575 int ret;
6576
6577 /* How would I know if I can't check it? */
6578 - if (!sdata->sensor_settings->drdy_irq.addr_stat_drdy)
6579 + if (!sdata->sensor_settings->drdy_irq.stat_drdy.addr)
6580 return -EINVAL;
6581
6582 /* No scan mask, no interrupt */
6583 @@ -39,23 +39,15 @@ static int st_sensors_new_samples_available(struct iio_dev *indio_dev,
6584 return 0;
6585
6586 ret = sdata->tf->read_byte(&sdata->tb, sdata->dev,
6587 - sdata->sensor_settings->drdy_irq.addr_stat_drdy,
6588 + sdata->sensor_settings->drdy_irq.stat_drdy.addr,
6589 &status);
6590 if (ret < 0) {
6591 dev_err(sdata->dev,
6592 "error checking samples available\n");
6593 return ret;
6594 }
6595 - /*
6596 - * the lower bits of .active_scan_mask[0] is directly mapped
6597 - * to the channels on the sensor: either bit 0 for
6598 - * one-dimensional sensors, or e.g. x,y,z for accelerometers,
6599 - * gyroscopes or magnetometers. No sensor use more than 3
6600 - * channels, so cut the other status bits here.
6601 - */
6602 - status &= 0x07;
6603
6604 - if (status & (u8)indio_dev->active_scan_mask[0])
6605 + if (status & sdata->sensor_settings->drdy_irq.stat_drdy.mask)
6606 return 1;
6607
6608 return 0;
6609 @@ -212,7 +204,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
6610 * it was "our" interrupt.
6611 */
6612 if (sdata->int_pin_open_drain &&
6613 - sdata->sensor_settings->drdy_irq.addr_stat_drdy)
6614 + sdata->sensor_settings->drdy_irq.stat_drdy.addr)
6615 irq_trig |= IRQF_SHARED;
6616
6617 err = request_threaded_irq(sdata->get_irq_data_ready(indio_dev),
6618 diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c
6619 index e366422e8512..2536a8400c98 100644
6620 --- a/drivers/iio/gyro/st_gyro_core.c
6621 +++ b/drivers/iio/gyro/st_gyro_core.c
6622 @@ -118,7 +118,10 @@ static const struct st_sensor_settings st_gyro_sensors_settings[] = {
6623 * drain settings, but only for INT1 and not
6624 * for the DRDY line on INT2.
6625 */
6626 - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6627 + .stat_drdy = {
6628 + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6629 + .mask = 0x07,
6630 + },
6631 },
6632 .multi_read_bit = true,
6633 .bootime = 2,
6634 @@ -188,7 +191,10 @@ static const struct st_sensor_settings st_gyro_sensors_settings[] = {
6635 * drain settings, but only for INT1 and not
6636 * for the DRDY line on INT2.
6637 */
6638 - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6639 + .stat_drdy = {
6640 + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6641 + .mask = 0x07,
6642 + },
6643 },
6644 .multi_read_bit = true,
6645 .bootime = 2,
6646 @@ -253,7 +259,10 @@ static const struct st_sensor_settings st_gyro_sensors_settings[] = {
6647 * drain settings, but only for INT1 and not
6648 * for the DRDY line on INT2.
6649 */
6650 - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6651 + .stat_drdy = {
6652 + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6653 + .mask = 0x07,
6654 + },
6655 },
6656 .multi_read_bit = true,
6657 .bootime = 2,
6658 diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c
6659 index 08aafba4481c..19031a7bce23 100644
6660 --- a/drivers/iio/magnetometer/st_magn_core.c
6661 +++ b/drivers/iio/magnetometer/st_magn_core.c
6662 @@ -317,7 +317,10 @@ static const struct st_sensor_settings st_magn_sensors_settings[] = {
6663 },
6664 .drdy_irq = {
6665 /* drdy line is routed drdy pin */
6666 - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6667 + .stat_drdy = {
6668 + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6669 + .mask = 0x07,
6670 + },
6671 },
6672 .multi_read_bit = true,
6673 .bootime = 2,
6674 @@ -361,7 +364,10 @@ static const struct st_sensor_settings st_magn_sensors_settings[] = {
6675 .drdy_irq = {
6676 .addr = 0x62,
6677 .mask_int1 = 0x01,
6678 - .addr_stat_drdy = 0x67,
6679 + .stat_drdy = {
6680 + .addr = 0x67,
6681 + .mask = 0x07,
6682 + },
6683 },
6684 .multi_read_bit = false,
6685 .bootime = 2,
6686 diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
6687 index 34611a8ea2ce..ea075fcd5a6f 100644
6688 --- a/drivers/iio/pressure/st_pressure_core.c
6689 +++ b/drivers/iio/pressure/st_pressure_core.c
6690 @@ -287,7 +287,10 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
6691 .mask_ihl = 0x80,
6692 .addr_od = 0x22,
6693 .mask_od = 0x40,
6694 - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6695 + .stat_drdy = {
6696 + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6697 + .mask = 0x03,
6698 + },
6699 },
6700 .multi_read_bit = true,
6701 .bootime = 2,
6702 @@ -395,7 +398,10 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
6703 .mask_ihl = 0x80,
6704 .addr_od = 0x22,
6705 .mask_od = 0x40,
6706 - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6707 + .stat_drdy = {
6708 + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6709 + .mask = 0x03,
6710 + },
6711 },
6712 .multi_read_bit = true,
6713 .bootime = 2,
6714 @@ -454,7 +460,10 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
6715 .mask_ihl = 0x80,
6716 .addr_od = 0x12,
6717 .mask_od = 0x40,
6718 - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
6719 + .stat_drdy = {
6720 + .addr = ST_SENSORS_DEFAULT_STAT_ADDR,
6721 + .mask = 0x03,
6722 + },
6723 },
6724 .multi_read_bit = false,
6725 .bootime = 2,
6726 diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
6727 index 747efd1ae5a6..8208c30f03c5 100644
6728 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
6729 +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
6730 @@ -1001,6 +1001,11 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
6731 }
6732 }
6733
6734 + if (!ne) {
6735 + dev_err(dev, "Reseved loop qp is absent!\n");
6736 + goto free_work;
6737 + }
6738 +
6739 do {
6740 ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc);
6741 if (ret < 0) {
6742 diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
6743 index c1b5f38f31a5..3b4916680018 100644
6744 --- a/drivers/infiniband/sw/rxe/rxe_pool.c
6745 +++ b/drivers/infiniband/sw/rxe/rxe_pool.c
6746 @@ -404,6 +404,8 @@ void *rxe_alloc(struct rxe_pool *pool)
6747 elem = kmem_cache_zalloc(pool_cache(pool),
6748 (pool->flags & RXE_POOL_ATOMIC) ?
6749 GFP_ATOMIC : GFP_KERNEL);
6750 + if (!elem)
6751 + return NULL;
6752
6753 elem->pool = pool;
6754 kref_init(&elem->ref_cnt);
6755 diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
6756 index afa938bd26d6..a72278e9cd27 100644
6757 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
6758 +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
6759 @@ -139,6 +139,7 @@ void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter)
6760 rcu_assign_pointer(adapter->mactbl, NULL);
6761 synchronize_rcu();
6762 opa_vnic_free_mac_tbl(mactbl);
6763 + adapter->info.vport.mac_tbl_digest = 0;
6764 mutex_unlock(&adapter->mactbl_lock);
6765 }
6766
6767 diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
6768 index c2733964379c..9655cc3aa3a0 100644
6769 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
6770 +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c
6771 @@ -348,7 +348,7 @@ void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter,
6772 void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter,
6773 struct opa_veswport_iface_macs *macs)
6774 {
6775 - u16 start_idx, tot_macs, num_macs, idx = 0, count = 0;
6776 + u16 start_idx, tot_macs, num_macs, idx = 0, count = 0, em_macs = 0;
6777 struct netdev_hw_addr *ha;
6778
6779 start_idx = be16_to_cpu(macs->start_idx);
6780 @@ -359,8 +359,10 @@ void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter,
6781
6782 /* Do not include EM specified MAC address */
6783 if (!memcmp(adapter->info.vport.base_mac_addr, ha->addr,
6784 - ARRAY_SIZE(adapter->info.vport.base_mac_addr)))
6785 + ARRAY_SIZE(adapter->info.vport.base_mac_addr))) {
6786 + em_macs++;
6787 continue;
6788 + }
6789
6790 if (start_idx > idx++)
6791 continue;
6792 @@ -383,7 +385,7 @@ void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter,
6793 }
6794
6795 tot_macs = netdev_hw_addr_list_count(&adapter->netdev->dev_addrs) +
6796 - netdev_uc_count(adapter->netdev);
6797 + netdev_uc_count(adapter->netdev) - em_macs;
6798 macs->tot_macs_in_lst = cpu_to_be16(tot_macs);
6799 macs->num_macs_in_msg = cpu_to_be16(count);
6800 macs->gen_count = cpu_to_be16(adapter->info.vport.uc_macs_gen_count);
6801 diff --git a/drivers/input/mouse/vmmouse.c b/drivers/input/mouse/vmmouse.c
6802 index 0f586780ceb4..1ae5c1ef3f5b 100644
6803 --- a/drivers/input/mouse/vmmouse.c
6804 +++ b/drivers/input/mouse/vmmouse.c
6805 @@ -316,11 +316,9 @@ static int vmmouse_enable(struct psmouse *psmouse)
6806 /*
6807 * Array of supported hypervisors.
6808 */
6809 -static const struct hypervisor_x86 *vmmouse_supported_hypervisors[] = {
6810 - &x86_hyper_vmware,
6811 -#ifdef CONFIG_KVM_GUEST
6812 - &x86_hyper_kvm,
6813 -#endif
6814 +static enum x86_hypervisor_type vmmouse_supported_hypervisors[] = {
6815 + X86_HYPER_VMWARE,
6816 + X86_HYPER_KVM,
6817 };
6818
6819 /**
6820 @@ -331,7 +329,7 @@ static bool vmmouse_check_hypervisor(void)
6821 int i;
6822
6823 for (i = 0; i < ARRAY_SIZE(vmmouse_supported_hypervisors); i++)
6824 - if (vmmouse_supported_hypervisors[i] == x86_hyper)
6825 + if (vmmouse_supported_hypervisors[i] == x86_hyper_type)
6826 return true;
6827
6828 return false;
6829 diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c
6830 index 905729191d3e..78183f90820e 100644
6831 --- a/drivers/leds/leds-pca955x.c
6832 +++ b/drivers/leds/leds-pca955x.c
6833 @@ -61,6 +61,10 @@
6834 #define PCA955X_LS_BLINK0 0x2 /* Blink at PWM0 rate */
6835 #define PCA955X_LS_BLINK1 0x3 /* Blink at PWM1 rate */
6836
6837 +#define PCA955X_GPIO_INPUT LED_OFF
6838 +#define PCA955X_GPIO_HIGH LED_OFF
6839 +#define PCA955X_GPIO_LOW LED_FULL
6840 +
6841 enum pca955x_type {
6842 pca9550,
6843 pca9551,
6844 @@ -329,9 +333,9 @@ static int pca955x_set_value(struct gpio_chip *gc, unsigned int offset,
6845 struct pca955x_led *led = &pca955x->leds[offset];
6846
6847 if (val)
6848 - return pca955x_led_set(&led->led_cdev, LED_FULL);
6849 - else
6850 - return pca955x_led_set(&led->led_cdev, LED_OFF);
6851 + return pca955x_led_set(&led->led_cdev, PCA955X_GPIO_HIGH);
6852 +
6853 + return pca955x_led_set(&led->led_cdev, PCA955X_GPIO_LOW);
6854 }
6855
6856 static void pca955x_gpio_set_value(struct gpio_chip *gc, unsigned int offset,
6857 @@ -355,8 +359,11 @@ static int pca955x_gpio_get_value(struct gpio_chip *gc, unsigned int offset)
6858 static int pca955x_gpio_direction_input(struct gpio_chip *gc,
6859 unsigned int offset)
6860 {
6861 - /* To use as input ensure pin is not driven */
6862 - return pca955x_set_value(gc, offset, 0);
6863 + struct pca955x *pca955x = gpiochip_get_data(gc);
6864 + struct pca955x_led *led = &pca955x->leds[offset];
6865 +
6866 + /* To use as input ensure pin is not driven. */
6867 + return pca955x_led_set(&led->led_cdev, PCA955X_GPIO_INPUT);
6868 }
6869
6870 static int pca955x_gpio_direction_output(struct gpio_chip *gc,
6871 diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
6872 index 35e82b14ded7..ddf0a4341ae2 100644
6873 --- a/drivers/md/dm-mpath.c
6874 +++ b/drivers/md/dm-mpath.c
6875 @@ -366,7 +366,7 @@ static struct pgpath *choose_path_in_pg(struct multipath *m,
6876
6877 pgpath = path_to_pgpath(path);
6878
6879 - if (unlikely(lockless_dereference(m->current_pg) != pg)) {
6880 + if (unlikely(READ_ONCE(m->current_pg) != pg)) {
6881 /* Only update current_pgpath if pg changed */
6882 spin_lock_irqsave(&m->lock, flags);
6883 m->current_pgpath = pgpath;
6884 @@ -390,7 +390,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
6885 }
6886
6887 /* Were we instructed to switch PG? */
6888 - if (lockless_dereference(m->next_pg)) {
6889 + if (READ_ONCE(m->next_pg)) {
6890 spin_lock_irqsave(&m->lock, flags);
6891 pg = m->next_pg;
6892 if (!pg) {
6893 @@ -406,7 +406,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
6894
6895 /* Don't change PG until it has no remaining paths */
6896 check_current_pg:
6897 - pg = lockless_dereference(m->current_pg);
6898 + pg = READ_ONCE(m->current_pg);
6899 if (pg) {
6900 pgpath = choose_path_in_pg(m, pg, nr_bytes);
6901 if (!IS_ERR_OR_NULL(pgpath))
6902 @@ -473,7 +473,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
6903 struct request *clone;
6904
6905 /* Do we need to select a new pgpath? */
6906 - pgpath = lockless_dereference(m->current_pgpath);
6907 + pgpath = READ_ONCE(m->current_pgpath);
6908 if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags))
6909 pgpath = choose_pgpath(m, nr_bytes);
6910
6911 @@ -533,7 +533,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
6912 bool queue_io;
6913
6914 /* Do we need to select a new pgpath? */
6915 - pgpath = lockless_dereference(m->current_pgpath);
6916 + pgpath = READ_ONCE(m->current_pgpath);
6917 queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags);
6918 if (!pgpath || !queue_io)
6919 pgpath = choose_pgpath(m, nr_bytes);
6920 @@ -1802,7 +1802,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
6921 struct pgpath *current_pgpath;
6922 int r;
6923
6924 - current_pgpath = lockless_dereference(m->current_pgpath);
6925 + current_pgpath = READ_ONCE(m->current_pgpath);
6926 if (!current_pgpath)
6927 current_pgpath = choose_pgpath(m, 0);
6928
6929 @@ -1824,7 +1824,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
6930 }
6931
6932 if (r == -ENOTCONN) {
6933 - if (!lockless_dereference(m->current_pg)) {
6934 + if (!READ_ONCE(m->current_pg)) {
6935 /* Path status changed, redo selection */
6936 (void) choose_pgpath(m, 0);
6937 }
6938 @@ -1893,9 +1893,9 @@ static int multipath_busy(struct dm_target *ti)
6939 return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED);
6940
6941 /* Guess which priority_group will be used at next mapping time */
6942 - pg = lockless_dereference(m->current_pg);
6943 - next_pg = lockless_dereference(m->next_pg);
6944 - if (unlikely(!lockless_dereference(m->current_pgpath) && next_pg))
6945 + pg = READ_ONCE(m->current_pg);
6946 + next_pg = READ_ONCE(m->next_pg);
6947 + if (unlikely(!READ_ONCE(m->current_pgpath) && next_pg))
6948 pg = next_pg;
6949
6950 if (!pg) {
6951 diff --git a/drivers/md/md.c b/drivers/md/md.c
6952 index 98ea86309ceb..6bf093cef958 100644
6953 --- a/drivers/md/md.c
6954 +++ b/drivers/md/md.c
6955 @@ -7468,8 +7468,8 @@ void md_wakeup_thread(struct md_thread *thread)
6956 {
6957 if (thread) {
6958 pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
6959 - if (!test_and_set_bit(THREAD_WAKEUP, &thread->flags))
6960 - wake_up(&thread->wqueue);
6961 + set_bit(THREAD_WAKEUP, &thread->flags);
6962 + wake_up(&thread->wqueue);
6963 }
6964 }
6965 EXPORT_SYMBOL(md_wakeup_thread);
6966 diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c
6967 index eda38cbe8530..41f2a9f6851d 100644
6968 --- a/drivers/misc/pti.c
6969 +++ b/drivers/misc/pti.c
6970 @@ -32,7 +32,7 @@
6971 #include <linux/pci.h>
6972 #include <linux/mutex.h>
6973 #include <linux/miscdevice.h>
6974 -#include <linux/pti.h>
6975 +#include <linux/intel-pti.h>
6976 #include <linux/slab.h>
6977 #include <linux/uaccess.h>
6978
6979 diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
6980 index 1e688bfec567..9047c0a529b2 100644
6981 --- a/drivers/misc/vmw_balloon.c
6982 +++ b/drivers/misc/vmw_balloon.c
6983 @@ -1271,7 +1271,7 @@ static int __init vmballoon_init(void)
6984 * Check if we are running on VMware's hypervisor and bail out
6985 * if we are not.
6986 */
6987 - if (x86_hyper != &x86_hyper_vmware)
6988 + if (x86_hyper_type != X86_HYPER_VMWARE)
6989 return -ENODEV;
6990
6991 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
6992 diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
6993 index c66abd476023..3b0db01ead1f 100644
6994 --- a/drivers/net/ethernet/ibm/ibmvnic.c
6995 +++ b/drivers/net/ethernet/ibm/ibmvnic.c
6996 @@ -927,6 +927,7 @@ static int ibmvnic_open(struct net_device *netdev)
6997 }
6998
6999 rc = __ibmvnic_open(netdev);
7000 + netif_carrier_on(netdev);
7001 mutex_unlock(&adapter->reset_lock);
7002
7003 return rc;
7004 @@ -3899,6 +3900,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
7005 if (rc)
7006 goto ibmvnic_init_fail;
7007
7008 + netif_carrier_off(netdev);
7009 rc = register_netdev(netdev);
7010 if (rc) {
7011 dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc);
7012 diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
7013 index 689c413b7782..d2f9a2dd76a2 100644
7014 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h
7015 +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
7016 @@ -526,8 +526,8 @@ s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid);
7017 int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac);
7018 int fm10k_ndo_set_vf_vlan(struct net_device *netdev,
7019 int vf_idx, u16 vid, u8 qos, __be16 vlan_proto);
7020 -int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate,
7021 - int unused);
7022 +int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
7023 + int __always_unused min_rate, int max_rate);
7024 int fm10k_ndo_get_vf_config(struct net_device *netdev,
7025 int vf_idx, struct ifla_vf_info *ivi);
7026
7027 diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
7028 index 5f4dac0d36ef..e72fd52bacfe 100644
7029 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
7030 +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
7031 @@ -126,6 +126,9 @@ s32 fm10k_iov_mbx(struct fm10k_intfc *interface)
7032 struct fm10k_mbx_info *mbx = &vf_info->mbx;
7033 u16 glort = vf_info->glort;
7034
7035 + /* process the SM mailbox first to drain outgoing messages */
7036 + hw->mbx.ops.process(hw, &hw->mbx);
7037 +
7038 /* verify port mapping is valid, if not reset port */
7039 if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort))
7040 hw->iov.ops.reset_lport(hw, vf_info);
7041 @@ -482,7 +485,7 @@ int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid,
7042 }
7043
7044 int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
7045 - int __always_unused unused, int rate)
7046 + int __always_unused min_rate, int max_rate)
7047 {
7048 struct fm10k_intfc *interface = netdev_priv(netdev);
7049 struct fm10k_iov_data *iov_data = interface->iov_data;
7050 @@ -493,14 +496,15 @@ int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
7051 return -EINVAL;
7052
7053 /* rate limit cannot be less than 10Mbs or greater than link speed */
7054 - if (rate && ((rate < FM10K_VF_TC_MIN) || rate > FM10K_VF_TC_MAX))
7055 + if (max_rate &&
7056 + (max_rate < FM10K_VF_TC_MIN || max_rate > FM10K_VF_TC_MAX))
7057 return -EINVAL;
7058
7059 /* store values */
7060 - iov_data->vf_info[vf_idx].rate = rate;
7061 + iov_data->vf_info[vf_idx].rate = max_rate;
7062
7063 /* update hardware configuration */
7064 - hw->iov.ops.configure_tc(hw, vf_idx, rate);
7065 + hw->iov.ops.configure_tc(hw, vf_idx, max_rate);
7066
7067 return 0;
7068 }
7069 diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
7070 index ea20aacd5e1d..b2cde9b16d82 100644
7071 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
7072 +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
7073 @@ -2874,14 +2874,15 @@ static void i40e_vsi_free_rx_resources(struct i40e_vsi *vsi)
7074 static void i40e_config_xps_tx_ring(struct i40e_ring *ring)
7075 {
7076 struct i40e_vsi *vsi = ring->vsi;
7077 + int cpu;
7078
7079 if (!ring->q_vector || !ring->netdev)
7080 return;
7081
7082 if ((vsi->tc_config.numtc <= 1) &&
7083 !test_and_set_bit(__I40E_TX_XPS_INIT_DONE, &ring->state)) {
7084 - netif_set_xps_queue(ring->netdev,
7085 - get_cpu_mask(ring->q_vector->v_idx),
7086 + cpu = cpumask_local_spread(ring->q_vector->v_idx, -1);
7087 + netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu),
7088 ring->queue_index);
7089 }
7090
7091 @@ -3471,6 +3472,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
7092 int tx_int_idx = 0;
7093 int vector, err;
7094 int irq_num;
7095 + int cpu;
7096
7097 for (vector = 0; vector < q_vectors; vector++) {
7098 struct i40e_q_vector *q_vector = vsi->q_vectors[vector];
7099 @@ -3506,10 +3508,14 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
7100 q_vector->affinity_notify.notify = i40e_irq_affinity_notify;
7101 q_vector->affinity_notify.release = i40e_irq_affinity_release;
7102 irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
7103 - /* get_cpu_mask returns a static constant mask with
7104 - * a permanent lifetime so it's ok to use here.
7105 + /* Spread affinity hints out across online CPUs.
7106 + *
7107 + * get_cpu_mask returns a static constant mask with
7108 + * a permanent lifetime so it's ok to pass to
7109 + * irq_set_affinity_hint without making a copy.
7110 */
7111 - irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx));
7112 + cpu = cpumask_local_spread(q_vector->v_idx, -1);
7113 + irq_set_affinity_hint(irq_num, get_cpu_mask(cpu));
7114 }
7115
7116 vsi->irqs_ready = true;
7117 diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
7118 index 4d1e670f490e..e368b0237a1b 100644
7119 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
7120 +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
7121 @@ -1008,8 +1008,8 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf)
7122 set_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states);
7123 clear_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
7124 /* Do not notify the client during VF init */
7125 - if (test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE,
7126 - &vf->vf_states))
7127 + if (!test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE,
7128 + &vf->vf_states))
7129 i40e_notify_client_of_vf_reset(pf, abs_vf_id);
7130 vf->num_vlan = 0;
7131 }
7132 @@ -2779,6 +2779,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
7133 struct i40e_mac_filter *f;
7134 struct i40e_vf *vf;
7135 int ret = 0;
7136 + struct hlist_node *h;
7137 int bkt;
7138
7139 /* validate the request */
7140 @@ -2817,7 +2818,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
7141 /* Delete all the filters for this VSI - we're going to kill it
7142 * anyway.
7143 */
7144 - hash_for_each(vsi->mac_filter_hash, bkt, f, hlist)
7145 + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
7146 __i40e_del_filter(vsi, f);
7147
7148 spin_unlock_bh(&vsi->mac_filter_hash_lock);
7149 diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
7150 index 1825d956bb00..1ccad6f30ebf 100644
7151 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
7152 +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
7153 @@ -546,6 +546,7 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
7154 unsigned int vector, q_vectors;
7155 unsigned int rx_int_idx = 0, tx_int_idx = 0;
7156 int irq_num, err;
7157 + int cpu;
7158
7159 i40evf_irq_disable(adapter);
7160 /* Decrement for Other and TCP Timer vectors */
7161 @@ -584,10 +585,12 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
7162 q_vector->affinity_notify.release =
7163 i40evf_irq_affinity_release;
7164 irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
7165 - /* get_cpu_mask returns a static constant mask with
7166 - * a permanent lifetime so it's ok to use here.
7167 + /* Spread the IRQ affinity hints across online CPUs. Note that
7168 + * get_cpu_mask returns a mask with a permanent lifetime so
7169 + * it's safe to use as a hint for irq_set_affinity_hint.
7170 */
7171 - irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx));
7172 + cpu = cpumask_local_spread(q_vector->v_idx, -1);
7173 + irq_set_affinity_hint(irq_num, get_cpu_mask(cpu));
7174 }
7175
7176 return 0;
7177 diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
7178 index b0031c5ff767..667dbc7d4a4e 100644
7179 --- a/drivers/net/ethernet/intel/igb/igb_main.c
7180 +++ b/drivers/net/ethernet/intel/igb/igb_main.c
7181 @@ -3162,6 +3162,8 @@ static int igb_sw_init(struct igb_adapter *adapter)
7182 /* Setup and initialize a copy of the hw vlan table array */
7183 adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32),
7184 GFP_ATOMIC);
7185 + if (!adapter->shadow_vfta)
7186 + return -ENOMEM;
7187
7188 /* This call may decrease the number of queues */
7189 if (igb_init_interrupt_scheme(adapter, true)) {
7190 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
7191 index 6e6ab6f6875e..64429a14c630 100644
7192 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
7193 +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
7194 @@ -3781,10 +3781,10 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
7195 fw_cmd.ver_build = build;
7196 fw_cmd.ver_sub = sub;
7197 fw_cmd.hdr.checksum = 0;
7198 - fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
7199 - (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
7200 fw_cmd.pad = 0;
7201 fw_cmd.pad2 = 0;
7202 + fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
7203 + (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
7204
7205 for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
7206 ret_val = ixgbe_host_interface_command(hw, &fw_cmd,
7207 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
7208 index 19fbb2f28ea4..8a85217845ae 100644
7209 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
7210 +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
7211 @@ -900,6 +900,8 @@ static s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
7212 /* convert offset from words to bytes */
7213 buffer.address = cpu_to_be32((offset + current_word) * 2);
7214 buffer.length = cpu_to_be16(words_to_read * 2);
7215 + buffer.pad2 = 0;
7216 + buffer.pad3 = 0;
7217
7218 status = ixgbe_hic_unlocked(hw, (u32 *)&buffer, sizeof(buffer),
7219 IXGBE_HI_COMMAND_TIMEOUT);
7220 diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
7221 index c1e52b9dc58d..5f93e6add563 100644
7222 --- a/drivers/net/phy/at803x.c
7223 +++ b/drivers/net/phy/at803x.c
7224 @@ -167,7 +167,7 @@ static int at803x_set_wol(struct phy_device *phydev,
7225 mac = (const u8 *) ndev->dev_addr;
7226
7227 if (!is_valid_ether_addr(mac))
7228 - return -EFAULT;
7229 + return -EINVAL;
7230
7231 for (i = 0; i < 3; i++) {
7232 phy_write(phydev, AT803X_MMD_ACCESS_CONTROL,
7233 diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
7234 index ac41c8be9200..0fd8e164339c 100644
7235 --- a/drivers/pci/iov.c
7236 +++ b/drivers/pci/iov.c
7237 @@ -162,7 +162,6 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset)
7238
7239 pci_device_add(virtfn, virtfn->bus);
7240
7241 - pci_bus_add_device(virtfn);
7242 sprintf(buf, "virtfn%u", id);
7243 rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
7244 if (rc)
7245 @@ -173,6 +172,8 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset)
7246
7247 kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE);
7248
7249 + pci_bus_add_device(virtfn);
7250 +
7251 return 0;
7252
7253 failed2:
7254 diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
7255 index 6078dfc11b11..74f1c57ab93b 100644
7256 --- a/drivers/pci/pci.c
7257 +++ b/drivers/pci/pci.c
7258 @@ -4356,6 +4356,10 @@ static bool pci_bus_resetable(struct pci_bus *bus)
7259 {
7260 struct pci_dev *dev;
7261
7262 +
7263 + if (bus->self && (bus->self->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET))
7264 + return false;
7265 +
7266 list_for_each_entry(dev, &bus->devices, bus_list) {
7267 if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET ||
7268 (dev->subordinate && !pci_bus_resetable(dev->subordinate)))
7269 diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
7270 index 890efcc574cb..744805232155 100644
7271 --- a/drivers/pci/pcie/aer/aerdrv_core.c
7272 +++ b/drivers/pci/pcie/aer/aerdrv_core.c
7273 @@ -390,7 +390,14 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
7274 * If the error is reported by an end point, we think this
7275 * error is related to the upstream link of the end point.
7276 */
7277 - pci_walk_bus(dev->bus, cb, &result_data);
7278 + if (state == pci_channel_io_normal)
7279 + /*
7280 + * the error is non fatal so the bus is ok, just invoke
7281 + * the callback for the function that logged the error.
7282 + */
7283 + cb(dev, &result_data);
7284 + else
7285 + pci_walk_bus(dev->bus, cb, &result_data);
7286 }
7287
7288 return result_data.result;
7289 diff --git a/drivers/platform/x86/asus-wireless.c b/drivers/platform/x86/asus-wireless.c
7290 index f3796164329e..d4aeac3477f5 100644
7291 --- a/drivers/platform/x86/asus-wireless.c
7292 +++ b/drivers/platform/x86/asus-wireless.c
7293 @@ -118,6 +118,7 @@ static void asus_wireless_notify(struct acpi_device *adev, u32 event)
7294 return;
7295 }
7296 input_report_key(data->idev, KEY_RFKILL, 1);
7297 + input_sync(data->idev);
7298 input_report_key(data->idev, KEY_RFKILL, 0);
7299 input_sync(data->idev);
7300 }
7301 diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
7302 index 8cec9a02c0b8..9eb32ead63db 100644
7303 --- a/drivers/rtc/interface.c
7304 +++ b/drivers/rtc/interface.c
7305 @@ -779,7 +779,7 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
7306 }
7307
7308 timerqueue_add(&rtc->timerqueue, &timer->node);
7309 - if (!next) {
7310 + if (!next || ktime_before(timer->node.expires, next->expires)) {
7311 struct rtc_wkalrm alarm;
7312 int err;
7313 alarm.time = rtc_ktime_to_tm(timer->node.expires);
7314 diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
7315 index e1687e19c59f..a30f24cb6c83 100644
7316 --- a/drivers/rtc/rtc-pl031.c
7317 +++ b/drivers/rtc/rtc-pl031.c
7318 @@ -308,7 +308,8 @@ static int pl031_remove(struct amba_device *adev)
7319
7320 dev_pm_clear_wake_irq(&adev->dev);
7321 device_init_wakeup(&adev->dev, false);
7322 - free_irq(adev->irq[0], ldata);
7323 + if (adev->irq[0])
7324 + free_irq(adev->irq[0], ldata);
7325 rtc_device_unregister(ldata->rtc);
7326 iounmap(ldata->base);
7327 kfree(ldata);
7328 @@ -381,12 +382,13 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id)
7329 goto out_no_rtc;
7330 }
7331
7332 - if (request_irq(adev->irq[0], pl031_interrupt,
7333 - vendor->irqflags, "rtc-pl031", ldata)) {
7334 - ret = -EIO;
7335 - goto out_no_irq;
7336 + if (adev->irq[0]) {
7337 + ret = request_irq(adev->irq[0], pl031_interrupt,
7338 + vendor->irqflags, "rtc-pl031", ldata);
7339 + if (ret)
7340 + goto out_no_irq;
7341 + dev_pm_set_wake_irq(&adev->dev, adev->irq[0]);
7342 }
7343 - dev_pm_set_wake_irq(&adev->dev, adev->irq[0]);
7344 return 0;
7345
7346 out_no_irq:
7347 diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
7348 index 1d02cf9fe06c..30d5f0ef29bb 100644
7349 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
7350 +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
7351 @@ -1575,6 +1575,7 @@ static void release_offload_resources(struct cxgbi_sock *csk)
7352 csk, csk->state, csk->flags, csk->tid);
7353
7354 cxgbi_sock_free_cpl_skbs(csk);
7355 + cxgbi_sock_purge_write_queue(csk);
7356 if (csk->wr_cred != csk->wr_max_cred) {
7357 cxgbi_sock_purge_wr_queue(csk);
7358 cxgbi_sock_reset_wr_list(csk);
7359 diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
7360 index 499df9d17339..d9a03beb76a4 100644
7361 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c
7362 +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
7363 @@ -4983,7 +4983,8 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
7364 lpfc_cancel_retry_delay_tmo(vport, ndlp);
7365 if ((ndlp->nlp_flag & NLP_DEFER_RM) &&
7366 !(ndlp->nlp_flag & NLP_REG_LOGIN_SEND) &&
7367 - !(ndlp->nlp_flag & NLP_RPI_REGISTERED)) {
7368 + !(ndlp->nlp_flag & NLP_RPI_REGISTERED) &&
7369 + phba->sli_rev != LPFC_SLI_REV4) {
7370 /* For this case we need to cleanup the default rpi
7371 * allocated by the firmware.
7372 */
7373 diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
7374 index 1db0a38683f4..2b145966c73f 100644
7375 --- a/drivers/scsi/lpfc/lpfc_hw4.h
7376 +++ b/drivers/scsi/lpfc/lpfc_hw4.h
7377 @@ -3636,7 +3636,7 @@ struct lpfc_mbx_get_port_name {
7378 #define MB_CEQ_STATUS_QUEUE_FLUSHING 0x4
7379 #define MB_CQE_STATUS_DMA_FAILED 0x5
7380
7381 -#define LPFC_MBX_WR_CONFIG_MAX_BDE 8
7382 +#define LPFC_MBX_WR_CONFIG_MAX_BDE 1
7383 struct lpfc_mbx_wr_object {
7384 struct mbox_header header;
7385 union {
7386 diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
7387 index 3c5b054a56ac..7ac1a067d780 100644
7388 --- a/drivers/scsi/lpfc/lpfc_nvmet.c
7389 +++ b/drivers/scsi/lpfc/lpfc_nvmet.c
7390 @@ -1464,6 +1464,7 @@ static struct lpfc_nvmet_ctxbuf *
7391 lpfc_nvmet_replenish_context(struct lpfc_hba *phba,
7392 struct lpfc_nvmet_ctx_info *current_infop)
7393 {
7394 +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
7395 struct lpfc_nvmet_ctxbuf *ctx_buf = NULL;
7396 struct lpfc_nvmet_ctx_info *get_infop;
7397 int i;
7398 @@ -1511,6 +1512,7 @@ lpfc_nvmet_replenish_context(struct lpfc_hba *phba,
7399 get_infop = get_infop->nvmet_ctx_next_cpu;
7400 }
7401
7402 +#endif
7403 /* Nothing found, all contexts for the MRQ are in-flight */
7404 return NULL;
7405 }
7406 diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
7407 index 22998cbd538f..33ff691878e2 100644
7408 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
7409 +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
7410 @@ -4804,6 +4804,11 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
7411 } else if (log_info == VIRTUAL_IO_FAILED_RETRY) {
7412 scmd->result = DID_RESET << 16;
7413 break;
7414 + } else if ((scmd->device->channel == RAID_CHANNEL) &&
7415 + (scsi_state == (MPI2_SCSI_STATE_TERMINATED |
7416 + MPI2_SCSI_STATE_NO_SCSI_STATUS))) {
7417 + scmd->result = DID_RESET << 16;
7418 + break;
7419 }
7420 scmd->result = DID_SOFT_ERROR << 16;
7421 break;
7422 diff --git a/drivers/staging/greybus/light.c b/drivers/staging/greybus/light.c
7423 index 3f4148c92308..0f538b8c3a07 100644
7424 --- a/drivers/staging/greybus/light.c
7425 +++ b/drivers/staging/greybus/light.c
7426 @@ -925,6 +925,8 @@ static void __gb_lights_led_unregister(struct gb_channel *channel)
7427 return;
7428
7429 led_classdev_unregister(cdev);
7430 + kfree(cdev->name);
7431 + cdev->name = NULL;
7432 channel->led = NULL;
7433 }
7434
7435 diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c
7436 index 7952357df9c8..edb6e4e9ef3a 100644
7437 --- a/drivers/tee/optee/core.c
7438 +++ b/drivers/tee/optee/core.c
7439 @@ -590,7 +590,6 @@ static int __init optee_driver_init(void)
7440 return -ENODEV;
7441
7442 np = of_find_matching_node(fw_np, optee_match);
7443 - of_node_put(fw_np);
7444 if (!np)
7445 return -ENODEV;
7446
7447 diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c
7448 index bd3572c41585..6d8906d65476 100644
7449 --- a/drivers/thermal/hisi_thermal.c
7450 +++ b/drivers/thermal/hisi_thermal.c
7451 @@ -35,8 +35,9 @@
7452 #define TEMP0_RST_MSK (0x1C)
7453 #define TEMP0_VALUE (0x28)
7454
7455 -#define HISI_TEMP_BASE (-60)
7456 +#define HISI_TEMP_BASE (-60000)
7457 #define HISI_TEMP_RESET (100000)
7458 +#define HISI_TEMP_STEP (784)
7459
7460 #define HISI_MAX_SENSORS 4
7461
7462 @@ -61,19 +62,38 @@ struct hisi_thermal_data {
7463 void __iomem *regs;
7464 };
7465
7466 -/* in millicelsius */
7467 -static inline int _step_to_temp(int step)
7468 +/*
7469 + * The temperature computation on the tsensor is as follow:
7470 + * Unit: millidegree Celsius
7471 + * Step: 255/200 (0.7843)
7472 + * Temperature base: -60°C
7473 + *
7474 + * The register is programmed in temperature steps, every step is 784
7475 + * millidegree and begins at -60 000 m°C
7476 + *
7477 + * The temperature from the steps:
7478 + *
7479 + * Temp = TempBase + (steps x 784)
7480 + *
7481 + * and the steps from the temperature:
7482 + *
7483 + * steps = (Temp - TempBase) / 784
7484 + *
7485 + */
7486 +static inline int hisi_thermal_step_to_temp(int step)
7487 {
7488 - /*
7489 - * Every step equals (1 * 200) / 255 celsius, and finally
7490 - * need convert to millicelsius.
7491 - */
7492 - return (HISI_TEMP_BASE * 1000 + (step * 200000 / 255));
7493 + return HISI_TEMP_BASE + (step * HISI_TEMP_STEP);
7494 +}
7495 +
7496 +static inline long hisi_thermal_temp_to_step(long temp)
7497 +{
7498 + return (temp - HISI_TEMP_BASE) / HISI_TEMP_STEP;
7499 }
7500
7501 -static inline long _temp_to_step(long temp)
7502 +static inline long hisi_thermal_round_temp(int temp)
7503 {
7504 - return ((temp - HISI_TEMP_BASE * 1000) * 255) / 200000;
7505 + return hisi_thermal_step_to_temp(
7506 + hisi_thermal_temp_to_step(temp));
7507 }
7508
7509 static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data,
7510 @@ -99,7 +119,7 @@ static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data,
7511 usleep_range(3000, 5000);
7512
7513 val = readl(data->regs + TEMP0_VALUE);
7514 - val = _step_to_temp(val);
7515 + val = hisi_thermal_step_to_temp(val);
7516
7517 mutex_unlock(&data->thermal_lock);
7518
7519 @@ -126,10 +146,11 @@ static void hisi_thermal_enable_bind_irq_sensor
7520 writel((sensor->id << 12), data->regs + TEMP0_CFG);
7521
7522 /* enable for interrupt */
7523 - writel(_temp_to_step(sensor->thres_temp) | 0x0FFFFFF00,
7524 + writel(hisi_thermal_temp_to_step(sensor->thres_temp) | 0x0FFFFFF00,
7525 data->regs + TEMP0_TH);
7526
7527 - writel(_temp_to_step(HISI_TEMP_RESET), data->regs + TEMP0_RST_TH);
7528 + writel(hisi_thermal_temp_to_step(HISI_TEMP_RESET),
7529 + data->regs + TEMP0_RST_TH);
7530
7531 /* enable module */
7532 writel(0x1, data->regs + TEMP0_RST_MSK);
7533 @@ -230,7 +251,7 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev)
7534 sensor = &data->sensors[data->irq_bind_sensor];
7535
7536 dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n",
7537 - sensor->thres_temp / 1000);
7538 + sensor->thres_temp);
7539 mutex_unlock(&data->thermal_lock);
7540
7541 for (i = 0; i < HISI_MAX_SENSORS; i++) {
7542 @@ -269,7 +290,7 @@ static int hisi_thermal_register_sensor(struct platform_device *pdev,
7543
7544 for (i = 0; i < of_thermal_get_ntrips(sensor->tzd); i++) {
7545 if (trip[i].type == THERMAL_TRIP_PASSIVE) {
7546 - sensor->thres_temp = trip[i].temperature;
7547 + sensor->thres_temp = hisi_thermal_round_temp(trip[i].temperature);
7548 break;
7549 }
7550 }
7551 @@ -317,15 +338,6 @@ static int hisi_thermal_probe(struct platform_device *pdev)
7552 if (data->irq < 0)
7553 return data->irq;
7554
7555 - ret = devm_request_threaded_irq(&pdev->dev, data->irq,
7556 - hisi_thermal_alarm_irq,
7557 - hisi_thermal_alarm_irq_thread,
7558 - 0, "hisi_thermal", data);
7559 - if (ret < 0) {
7560 - dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret);
7561 - return ret;
7562 - }
7563 -
7564 platform_set_drvdata(pdev, data);
7565
7566 data->clk = devm_clk_get(&pdev->dev, "thermal_clk");
7567 @@ -345,8 +357,7 @@ static int hisi_thermal_probe(struct platform_device *pdev)
7568 }
7569
7570 hisi_thermal_enable_bind_irq_sensor(data);
7571 - irq_get_irqchip_state(data->irq, IRQCHIP_STATE_MASKED,
7572 - &data->irq_enabled);
7573 + data->irq_enabled = true;
7574
7575 for (i = 0; i < HISI_MAX_SENSORS; ++i) {
7576 ret = hisi_thermal_register_sensor(pdev, data,
7577 @@ -358,6 +369,17 @@ static int hisi_thermal_probe(struct platform_device *pdev)
7578 hisi_thermal_toggle_sensor(&data->sensors[i], true);
7579 }
7580
7581 + ret = devm_request_threaded_irq(&pdev->dev, data->irq,
7582 + hisi_thermal_alarm_irq,
7583 + hisi_thermal_alarm_irq_thread,
7584 + 0, "hisi_thermal", data);
7585 + if (ret < 0) {
7586 + dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret);
7587 + return ret;
7588 + }
7589 +
7590 + enable_irq(data->irq);
7591 +
7592 return 0;
7593 }
7594
7595 diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
7596 index 5628fe114347..91335e6de88a 100644
7597 --- a/drivers/vfio/pci/vfio_pci_config.c
7598 +++ b/drivers/vfio/pci/vfio_pci_config.c
7599 @@ -849,11 +849,13 @@ static int __init init_pci_cap_exp_perm(struct perm_bits *perm)
7600
7601 /*
7602 * Allow writes to device control fields, except devctl_phantom,
7603 - * which could confuse IOMMU, and the ARI bit in devctl2, which
7604 + * which could confuse IOMMU, MPS, which can break communication
7605 + * with other physical devices, and the ARI bit in devctl2, which
7606 * is set at probe time. FLR gets virtualized via our writefn.
7607 */
7608 p_setw(perm, PCI_EXP_DEVCTL,
7609 - PCI_EXP_DEVCTL_BCR_FLR, ~PCI_EXP_DEVCTL_PHANTOM);
7610 + PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD,
7611 + ~PCI_EXP_DEVCTL_PHANTOM);
7612 p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI);
7613 return 0;
7614 }
7615 diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
7616 index 9bd17682655a..1c2289ddd555 100644
7617 --- a/drivers/video/backlight/pwm_bl.c
7618 +++ b/drivers/video/backlight/pwm_bl.c
7619 @@ -79,14 +79,17 @@ static void pwm_backlight_power_off(struct pwm_bl_data *pb)
7620 static int compute_duty_cycle(struct pwm_bl_data *pb, int brightness)
7621 {
7622 unsigned int lth = pb->lth_brightness;
7623 - int duty_cycle;
7624 + u64 duty_cycle;
7625
7626 if (pb->levels)
7627 duty_cycle = pb->levels[brightness];
7628 else
7629 duty_cycle = brightness;
7630
7631 - return (duty_cycle * (pb->period - lth) / pb->scale) + lth;
7632 + duty_cycle *= pb->period - lth;
7633 + do_div(duty_cycle, pb->scale);
7634 +
7635 + return duty_cycle + lth;
7636 }
7637
7638 static int pwm_backlight_update_status(struct backlight_device *bl)
7639 diff --git a/fs/dcache.c b/fs/dcache.c
7640 index f90141387f01..34c852af215c 100644
7641 --- a/fs/dcache.c
7642 +++ b/fs/dcache.c
7643 @@ -231,7 +231,7 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c
7644 {
7645 /*
7646 * Be careful about RCU walk racing with rename:
7647 - * use 'lockless_dereference' to fetch the name pointer.
7648 + * use 'READ_ONCE' to fetch the name pointer.
7649 *
7650 * NOTE! Even if a rename will mean that the length
7651 * was not loaded atomically, we don't care. The
7652 @@ -245,7 +245,7 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c
7653 * early because the data cannot match (there can
7654 * be no NUL in the ct/tcount data)
7655 */
7656 - const unsigned char *cs = lockless_dereference(dentry->d_name.name);
7657 + const unsigned char *cs = READ_ONCE(dentry->d_name.name);
7658
7659 return dentry_string_cmp(cs, ct, tcount);
7660 }
7661 diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
7662 index 25d9b5adcd42..36b49bd09264 100644
7663 --- a/fs/overlayfs/ovl_entry.h
7664 +++ b/fs/overlayfs/ovl_entry.h
7665 @@ -77,5 +77,5 @@ static inline struct ovl_inode *OVL_I(struct inode *inode)
7666
7667 static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi)
7668 {
7669 - return lockless_dereference(oi->__upperdentry);
7670 + return READ_ONCE(oi->__upperdentry);
7671 }
7672 diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
7673 index b2c7f33e08fc..d94a51dc4e32 100644
7674 --- a/fs/overlayfs/readdir.c
7675 +++ b/fs/overlayfs/readdir.c
7676 @@ -757,7 +757,7 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
7677 if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) {
7678 struct inode *inode = file_inode(file);
7679
7680 - realfile = lockless_dereference(od->upperfile);
7681 + realfile = READ_ONCE(od->upperfile);
7682 if (!realfile) {
7683 struct path upperpath;
7684
7685 diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
7686 index e549bff87c5b..353f52fdc35e 100644
7687 --- a/include/asm-generic/vmlinux.lds.h
7688 +++ b/include/asm-generic/vmlinux.lds.h
7689 @@ -688,7 +688,7 @@
7690 #define BUG_TABLE
7691 #endif
7692
7693 -#ifdef CONFIG_ORC_UNWINDER
7694 +#ifdef CONFIG_UNWINDER_ORC
7695 #define ORC_UNWIND_TABLE \
7696 . = ALIGN(4); \
7697 .orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \
7698 diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
7699 index b8d200f60a40..73bec75b74c8 100644
7700 --- a/include/linux/bpf_verifier.h
7701 +++ b/include/linux/bpf_verifier.h
7702 @@ -15,11 +15,11 @@
7703 * In practice this is far bigger than any realistic pointer offset; this limit
7704 * ensures that umax_value + (int)off + (int)size cannot overflow a u64.
7705 */
7706 -#define BPF_MAX_VAR_OFF (1ULL << 31)
7707 +#define BPF_MAX_VAR_OFF (1 << 29)
7708 /* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures
7709 * that converting umax_value to int cannot overflow.
7710 */
7711 -#define BPF_MAX_VAR_SIZ INT_MAX
7712 +#define BPF_MAX_VAR_SIZ (1 << 29)
7713
7714 /* Liveness marks, used for registers and spilled-regs (in stack slots).
7715 * Read marks propagate upwards until they find a write mark; they record that
7716 @@ -110,7 +110,7 @@ struct bpf_insn_aux_data {
7717 struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */
7718 };
7719 int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
7720 - int converted_op_size; /* the valid value width after perceived conversion */
7721 + bool seen; /* this insn was processed by the verifier */
7722 };
7723
7724 #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
7725 diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
7726 index 780b1242bf24..3b609edffa8f 100644
7727 --- a/include/linux/compiler-clang.h
7728 +++ b/include/linux/compiler-clang.h
7729 @@ -1,5 +1,5 @@
7730 /* SPDX-License-Identifier: GPL-2.0 */
7731 -#ifndef __LINUX_COMPILER_H
7732 +#ifndef __LINUX_COMPILER_TYPES_H
7733 #error "Please don't include <linux/compiler-clang.h> directly, include <linux/compiler.h> instead."
7734 #endif
7735
7736 diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
7737 index bb78e5bdff26..2272ded07496 100644
7738 --- a/include/linux/compiler-gcc.h
7739 +++ b/include/linux/compiler-gcc.h
7740 @@ -1,5 +1,5 @@
7741 /* SPDX-License-Identifier: GPL-2.0 */
7742 -#ifndef __LINUX_COMPILER_H
7743 +#ifndef __LINUX_COMPILER_TYPES_H
7744 #error "Please don't include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead."
7745 #endif
7746
7747 diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
7748 index 523d1b74550f..bfa08160db3a 100644
7749 --- a/include/linux/compiler-intel.h
7750 +++ b/include/linux/compiler-intel.h
7751 @@ -1,5 +1,5 @@
7752 /* SPDX-License-Identifier: GPL-2.0 */
7753 -#ifndef __LINUX_COMPILER_H
7754 +#ifndef __LINUX_COMPILER_TYPES_H
7755 #error "Please don't include <linux/compiler-intel.h> directly, include <linux/compiler.h> instead."
7756 #endif
7757
7758 diff --git a/include/linux/compiler.h b/include/linux/compiler.h
7759 index 202710420d6d..fab5dc250c61 100644
7760 --- a/include/linux/compiler.h
7761 +++ b/include/linux/compiler.h
7762 @@ -2,111 +2,12 @@
7763 #ifndef __LINUX_COMPILER_H
7764 #define __LINUX_COMPILER_H
7765
7766 -#ifndef __ASSEMBLY__
7767 +#include <linux/compiler_types.h>
7768
7769 -#ifdef __CHECKER__
7770 -# define __user __attribute__((noderef, address_space(1)))
7771 -# define __kernel __attribute__((address_space(0)))
7772 -# define __safe __attribute__((safe))
7773 -# define __force __attribute__((force))
7774 -# define __nocast __attribute__((nocast))
7775 -# define __iomem __attribute__((noderef, address_space(2)))
7776 -# define __must_hold(x) __attribute__((context(x,1,1)))
7777 -# define __acquires(x) __attribute__((context(x,0,1)))
7778 -# define __releases(x) __attribute__((context(x,1,0)))
7779 -# define __acquire(x) __context__(x,1)
7780 -# define __release(x) __context__(x,-1)
7781 -# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0)
7782 -# define __percpu __attribute__((noderef, address_space(3)))
7783 -# define __rcu __attribute__((noderef, address_space(4)))
7784 -# define __private __attribute__((noderef))
7785 -extern void __chk_user_ptr(const volatile void __user *);
7786 -extern void __chk_io_ptr(const volatile void __iomem *);
7787 -# define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member))
7788 -#else /* __CHECKER__ */
7789 -# ifdef STRUCTLEAK_PLUGIN
7790 -# define __user __attribute__((user))
7791 -# else
7792 -# define __user
7793 -# endif
7794 -# define __kernel
7795 -# define __safe
7796 -# define __force
7797 -# define __nocast
7798 -# define __iomem
7799 -# define __chk_user_ptr(x) (void)0
7800 -# define __chk_io_ptr(x) (void)0
7801 -# define __builtin_warning(x, y...) (1)
7802 -# define __must_hold(x)
7803 -# define __acquires(x)
7804 -# define __releases(x)
7805 -# define __acquire(x) (void)0
7806 -# define __release(x) (void)0
7807 -# define __cond_lock(x,c) (c)
7808 -# define __percpu
7809 -# define __rcu
7810 -# define __private
7811 -# define ACCESS_PRIVATE(p, member) ((p)->member)
7812 -#endif /* __CHECKER__ */
7813 -
7814 -/* Indirect macros required for expanded argument pasting, eg. __LINE__. */
7815 -#define ___PASTE(a,b) a##b
7816 -#define __PASTE(a,b) ___PASTE(a,b)
7817 +#ifndef __ASSEMBLY__
7818
7819 #ifdef __KERNEL__
7820
7821 -#ifdef __GNUC__
7822 -#include <linux/compiler-gcc.h>
7823 -#endif
7824 -
7825 -#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
7826 -#define notrace __attribute__((hotpatch(0,0)))
7827 -#else
7828 -#define notrace __attribute__((no_instrument_function))
7829 -#endif
7830 -
7831 -/* Intel compiler defines __GNUC__. So we will overwrite implementations
7832 - * coming from above header files here
7833 - */
7834 -#ifdef __INTEL_COMPILER
7835 -# include <linux/compiler-intel.h>
7836 -#endif
7837 -
7838 -/* Clang compiler defines __GNUC__. So we will overwrite implementations
7839 - * coming from above header files here
7840 - */
7841 -#ifdef __clang__
7842 -#include <linux/compiler-clang.h>
7843 -#endif
7844 -
7845 -/*
7846 - * Generic compiler-dependent macros required for kernel
7847 - * build go below this comment. Actual compiler/compiler version
7848 - * specific implementations come from the above header files
7849 - */
7850 -
7851 -struct ftrace_branch_data {
7852 - const char *func;
7853 - const char *file;
7854 - unsigned line;
7855 - union {
7856 - struct {
7857 - unsigned long correct;
7858 - unsigned long incorrect;
7859 - };
7860 - struct {
7861 - unsigned long miss;
7862 - unsigned long hit;
7863 - };
7864 - unsigned long miss_hit[2];
7865 - };
7866 -};
7867 -
7868 -struct ftrace_likely_data {
7869 - struct ftrace_branch_data data;
7870 - unsigned long constant;
7871 -};
7872 -
7873 /*
7874 * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
7875 * to disable branch tracing on a per file basis.
7876 @@ -333,6 +234,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
7877 * with an explicit memory barrier or atomic instruction that provides the
7878 * required ordering.
7879 */
7880 +#include <asm/barrier.h>
7881
7882 #define __READ_ONCE(x, check) \
7883 ({ \
7884 @@ -341,6 +243,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
7885 __read_once_size(&(x), __u.__c, sizeof(x)); \
7886 else \
7887 __read_once_size_nocheck(&(x), __u.__c, sizeof(x)); \
7888 + smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \
7889 __u.__val; \
7890 })
7891 #define READ_ONCE(x) __READ_ONCE(x, 1)
7892 @@ -363,167 +266,6 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
7893
7894 #endif /* __ASSEMBLY__ */
7895
7896 -#ifdef __KERNEL__
7897 -/*
7898 - * Allow us to mark functions as 'deprecated' and have gcc emit a nice
7899 - * warning for each use, in hopes of speeding the functions removal.
7900 - * Usage is:
7901 - * int __deprecated foo(void)
7902 - */
7903 -#ifndef __deprecated
7904 -# define __deprecated /* unimplemented */
7905 -#endif
7906 -
7907 -#ifdef MODULE
7908 -#define __deprecated_for_modules __deprecated
7909 -#else
7910 -#define __deprecated_for_modules
7911 -#endif
7912 -
7913 -#ifndef __must_check
7914 -#define __must_check
7915 -#endif
7916 -
7917 -#ifndef CONFIG_ENABLE_MUST_CHECK
7918 -#undef __must_check
7919 -#define __must_check
7920 -#endif
7921 -#ifndef CONFIG_ENABLE_WARN_DEPRECATED
7922 -#undef __deprecated
7923 -#undef __deprecated_for_modules
7924 -#define __deprecated
7925 -#define __deprecated_for_modules
7926 -#endif
7927 -
7928 -#ifndef __malloc
7929 -#define __malloc
7930 -#endif
7931 -
7932 -/*
7933 - * Allow us to avoid 'defined but not used' warnings on functions and data,
7934 - * as well as force them to be emitted to the assembly file.
7935 - *
7936 - * As of gcc 3.4, static functions that are not marked with attribute((used))
7937 - * may be elided from the assembly file. As of gcc 3.4, static data not so
7938 - * marked will not be elided, but this may change in a future gcc version.
7939 - *
7940 - * NOTE: Because distributions shipped with a backported unit-at-a-time
7941 - * compiler in gcc 3.3, we must define __used to be __attribute__((used))
7942 - * for gcc >=3.3 instead of 3.4.
7943 - *
7944 - * In prior versions of gcc, such functions and data would be emitted, but
7945 - * would be warned about except with attribute((unused)).
7946 - *
7947 - * Mark functions that are referenced only in inline assembly as __used so
7948 - * the code is emitted even though it appears to be unreferenced.
7949 - */
7950 -#ifndef __used
7951 -# define __used /* unimplemented */
7952 -#endif
7953 -
7954 -#ifndef __maybe_unused
7955 -# define __maybe_unused /* unimplemented */
7956 -#endif
7957 -
7958 -#ifndef __always_unused
7959 -# define __always_unused /* unimplemented */
7960 -#endif
7961 -
7962 -#ifndef noinline
7963 -#define noinline
7964 -#endif
7965 -
7966 -/*
7967 - * Rather then using noinline to prevent stack consumption, use
7968 - * noinline_for_stack instead. For documentation reasons.
7969 - */
7970 -#define noinline_for_stack noinline
7971 -
7972 -#ifndef __always_inline
7973 -#define __always_inline inline
7974 -#endif
7975 -
7976 -#endif /* __KERNEL__ */
7977 -
7978 -/*
7979 - * From the GCC manual:
7980 - *
7981 - * Many functions do not examine any values except their arguments,
7982 - * and have no effects except the return value. Basically this is
7983 - * just slightly more strict class than the `pure' attribute above,
7984 - * since function is not allowed to read global memory.
7985 - *
7986 - * Note that a function that has pointer arguments and examines the
7987 - * data pointed to must _not_ be declared `const'. Likewise, a
7988 - * function that calls a non-`const' function usually must not be
7989 - * `const'. It does not make sense for a `const' function to return
7990 - * `void'.
7991 - */
7992 -#ifndef __attribute_const__
7993 -# define __attribute_const__ /* unimplemented */
7994 -#endif
7995 -
7996 -#ifndef __designated_init
7997 -# define __designated_init
7998 -#endif
7999 -
8000 -#ifndef __latent_entropy
8001 -# define __latent_entropy
8002 -#endif
8003 -
8004 -#ifndef __randomize_layout
8005 -# define __randomize_layout __designated_init
8006 -#endif
8007 -
8008 -#ifndef __no_randomize_layout
8009 -# define __no_randomize_layout
8010 -#endif
8011 -
8012 -#ifndef randomized_struct_fields_start
8013 -# define randomized_struct_fields_start
8014 -# define randomized_struct_fields_end
8015 -#endif
8016 -
8017 -/*
8018 - * Tell gcc if a function is cold. The compiler will assume any path
8019 - * directly leading to the call is unlikely.
8020 - */
8021 -
8022 -#ifndef __cold
8023 -#define __cold
8024 -#endif
8025 -
8026 -/* Simple shorthand for a section definition */
8027 -#ifndef __section
8028 -# define __section(S) __attribute__ ((__section__(#S)))
8029 -#endif
8030 -
8031 -#ifndef __visible
8032 -#define __visible
8033 -#endif
8034 -
8035 -#ifndef __nostackprotector
8036 -# define __nostackprotector
8037 -#endif
8038 -
8039 -/*
8040 - * Assume alignment of return value.
8041 - */
8042 -#ifndef __assume_aligned
8043 -#define __assume_aligned(a, ...)
8044 -#endif
8045 -
8046 -
8047 -/* Are two types/vars the same type (ignoring qualifiers)? */
8048 -#ifndef __same_type
8049 -# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
8050 -#endif
8051 -
8052 -/* Is this type a native word size -- useful for atomic operations */
8053 -#ifndef __native_word
8054 -# define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
8055 -#endif
8056 -
8057 /* Compile time object size, -1 for unknown */
8058 #ifndef __compiletime_object_size
8059 # define __compiletime_object_size(obj) -1
8060 diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
8061 new file mode 100644
8062 index 000000000000..6b79a9bba9a7
8063 --- /dev/null
8064 +++ b/include/linux/compiler_types.h
8065 @@ -0,0 +1,274 @@
8066 +#ifndef __LINUX_COMPILER_TYPES_H
8067 +#define __LINUX_COMPILER_TYPES_H
8068 +
8069 +#ifndef __ASSEMBLY__
8070 +
8071 +#ifdef __CHECKER__
8072 +# define __user __attribute__((noderef, address_space(1)))
8073 +# define __kernel __attribute__((address_space(0)))
8074 +# define __safe __attribute__((safe))
8075 +# define __force __attribute__((force))
8076 +# define __nocast __attribute__((nocast))
8077 +# define __iomem __attribute__((noderef, address_space(2)))
8078 +# define __must_hold(x) __attribute__((context(x,1,1)))
8079 +# define __acquires(x) __attribute__((context(x,0,1)))
8080 +# define __releases(x) __attribute__((context(x,1,0)))
8081 +# define __acquire(x) __context__(x,1)
8082 +# define __release(x) __context__(x,-1)
8083 +# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0)
8084 +# define __percpu __attribute__((noderef, address_space(3)))
8085 +# define __rcu __attribute__((noderef, address_space(4)))
8086 +# define __private __attribute__((noderef))
8087 +extern void __chk_user_ptr(const volatile void __user *);
8088 +extern void __chk_io_ptr(const volatile void __iomem *);
8089 +# define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member))
8090 +#else /* __CHECKER__ */
8091 +# ifdef STRUCTLEAK_PLUGIN
8092 +# define __user __attribute__((user))
8093 +# else
8094 +# define __user
8095 +# endif
8096 +# define __kernel
8097 +# define __safe
8098 +# define __force
8099 +# define __nocast
8100 +# define __iomem
8101 +# define __chk_user_ptr(x) (void)0
8102 +# define __chk_io_ptr(x) (void)0
8103 +# define __builtin_warning(x, y...) (1)
8104 +# define __must_hold(x)
8105 +# define __acquires(x)
8106 +# define __releases(x)
8107 +# define __acquire(x) (void)0
8108 +# define __release(x) (void)0
8109 +# define __cond_lock(x,c) (c)
8110 +# define __percpu
8111 +# define __rcu
8112 +# define __private
8113 +# define ACCESS_PRIVATE(p, member) ((p)->member)
8114 +#endif /* __CHECKER__ */
8115 +
8116 +/* Indirect macros required for expanded argument pasting, eg. __LINE__. */
8117 +#define ___PASTE(a,b) a##b
8118 +#define __PASTE(a,b) ___PASTE(a,b)
8119 +
8120 +#ifdef __KERNEL__
8121 +
8122 +#ifdef __GNUC__
8123 +#include <linux/compiler-gcc.h>
8124 +#endif
8125 +
8126 +#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
8127 +#define notrace __attribute__((hotpatch(0,0)))
8128 +#else
8129 +#define notrace __attribute__((no_instrument_function))
8130 +#endif
8131 +
8132 +/* Intel compiler defines __GNUC__. So we will overwrite implementations
8133 + * coming from above header files here
8134 + */
8135 +#ifdef __INTEL_COMPILER
8136 +# include <linux/compiler-intel.h>
8137 +#endif
8138 +
8139 +/* Clang compiler defines __GNUC__. So we will overwrite implementations
8140 + * coming from above header files here
8141 + */
8142 +#ifdef __clang__
8143 +#include <linux/compiler-clang.h>
8144 +#endif
8145 +
8146 +/*
8147 + * Generic compiler-dependent macros required for kernel
8148 + * build go below this comment. Actual compiler/compiler version
8149 + * specific implementations come from the above header files
8150 + */
8151 +
8152 +struct ftrace_branch_data {
8153 + const char *func;
8154 + const char *file;
8155 + unsigned line;
8156 + union {
8157 + struct {
8158 + unsigned long correct;
8159 + unsigned long incorrect;
8160 + };
8161 + struct {
8162 + unsigned long miss;
8163 + unsigned long hit;
8164 + };
8165 + unsigned long miss_hit[2];
8166 + };
8167 +};
8168 +
8169 +struct ftrace_likely_data {
8170 + struct ftrace_branch_data data;
8171 + unsigned long constant;
8172 +};
8173 +
8174 +#endif /* __KERNEL__ */
8175 +
8176 +#endif /* __ASSEMBLY__ */
8177 +
8178 +#ifdef __KERNEL__
8179 +/*
8180 + * Allow us to mark functions as 'deprecated' and have gcc emit a nice
8181 + * warning for each use, in hopes of speeding the functions removal.
8182 + * Usage is:
8183 + * int __deprecated foo(void)
8184 + */
8185 +#ifndef __deprecated
8186 +# define __deprecated /* unimplemented */
8187 +#endif
8188 +
8189 +#ifdef MODULE
8190 +#define __deprecated_for_modules __deprecated
8191 +#else
8192 +#define __deprecated_for_modules
8193 +#endif
8194 +
8195 +#ifndef __must_check
8196 +#define __must_check
8197 +#endif
8198 +
8199 +#ifndef CONFIG_ENABLE_MUST_CHECK
8200 +#undef __must_check
8201 +#define __must_check
8202 +#endif
8203 +#ifndef CONFIG_ENABLE_WARN_DEPRECATED
8204 +#undef __deprecated
8205 +#undef __deprecated_for_modules
8206 +#define __deprecated
8207 +#define __deprecated_for_modules
8208 +#endif
8209 +
8210 +#ifndef __malloc
8211 +#define __malloc
8212 +#endif
8213 +
8214 +/*
8215 + * Allow us to avoid 'defined but not used' warnings on functions and data,
8216 + * as well as force them to be emitted to the assembly file.
8217 + *
8218 + * As of gcc 3.4, static functions that are not marked with attribute((used))
8219 + * may be elided from the assembly file. As of gcc 3.4, static data not so
8220 + * marked will not be elided, but this may change in a future gcc version.
8221 + *
8222 + * NOTE: Because distributions shipped with a backported unit-at-a-time
8223 + * compiler in gcc 3.3, we must define __used to be __attribute__((used))
8224 + * for gcc >=3.3 instead of 3.4.
8225 + *
8226 + * In prior versions of gcc, such functions and data would be emitted, but
8227 + * would be warned about except with attribute((unused)).
8228 + *
8229 + * Mark functions that are referenced only in inline assembly as __used so
8230 + * the code is emitted even though it appears to be unreferenced.
8231 + */
8232 +#ifndef __used
8233 +# define __used /* unimplemented */
8234 +#endif
8235 +
8236 +#ifndef __maybe_unused
8237 +# define __maybe_unused /* unimplemented */
8238 +#endif
8239 +
8240 +#ifndef __always_unused
8241 +# define __always_unused /* unimplemented */
8242 +#endif
8243 +
8244 +#ifndef noinline
8245 +#define noinline
8246 +#endif
8247 +
8248 +/*
8249 + * Rather then using noinline to prevent stack consumption, use
8250 + * noinline_for_stack instead. For documentation reasons.
8251 + */
8252 +#define noinline_for_stack noinline
8253 +
8254 +#ifndef __always_inline
8255 +#define __always_inline inline
8256 +#endif
8257 +
8258 +#endif /* __KERNEL__ */
8259 +
8260 +/*
8261 + * From the GCC manual:
8262 + *
8263 + * Many functions do not examine any values except their arguments,
8264 + * and have no effects except the return value. Basically this is
8265 + * just slightly more strict class than the `pure' attribute above,
8266 + * since function is not allowed to read global memory.
8267 + *
8268 + * Note that a function that has pointer arguments and examines the
8269 + * data pointed to must _not_ be declared `const'. Likewise, a
8270 + * function that calls a non-`const' function usually must not be
8271 + * `const'. It does not make sense for a `const' function to return
8272 + * `void'.
8273 + */
8274 +#ifndef __attribute_const__
8275 +# define __attribute_const__ /* unimplemented */
8276 +#endif
8277 +
8278 +#ifndef __designated_init
8279 +# define __designated_init
8280 +#endif
8281 +
8282 +#ifndef __latent_entropy
8283 +# define __latent_entropy
8284 +#endif
8285 +
8286 +#ifndef __randomize_layout
8287 +# define __randomize_layout __designated_init
8288 +#endif
8289 +
8290 +#ifndef __no_randomize_layout
8291 +# define __no_randomize_layout
8292 +#endif
8293 +
8294 +#ifndef randomized_struct_fields_start
8295 +# define randomized_struct_fields_start
8296 +# define randomized_struct_fields_end
8297 +#endif
8298 +
8299 +/*
8300 + * Tell gcc if a function is cold. The compiler will assume any path
8301 + * directly leading to the call is unlikely.
8302 + */
8303 +
8304 +#ifndef __cold
8305 +#define __cold
8306 +#endif
8307 +
8308 +/* Simple shorthand for a section definition */
8309 +#ifndef __section
8310 +# define __section(S) __attribute__ ((__section__(#S)))
8311 +#endif
8312 +
8313 +#ifndef __visible
8314 +#define __visible
8315 +#endif
8316 +
8317 +#ifndef __nostackprotector
8318 +# define __nostackprotector
8319 +#endif
8320 +
8321 +/*
8322 + * Assume alignment of return value.
8323 + */
8324 +#ifndef __assume_aligned
8325 +#define __assume_aligned(a, ...)
8326 +#endif
8327 +
8328 +
8329 +/* Are two types/vars the same type (ignoring qualifiers)? */
8330 +#ifndef __same_type
8331 +# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
8332 +#endif
8333 +
8334 +/* Is this type a native word size -- useful for atomic operations */
8335 +#ifndef __native_word
8336 +# define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
8337 +#endif
8338 +
8339 +#endif /* __LINUX_COMPILER_TYPES_H */
8340 diff --git a/include/linux/hypervisor.h b/include/linux/hypervisor.h
8341 index b4054fd5b6f6..b19563f9a8eb 100644
8342 --- a/include/linux/hypervisor.h
8343 +++ b/include/linux/hypervisor.h
8344 @@ -7,8 +7,12 @@
8345 * Juergen Gross <jgross@suse.com>
8346 */
8347
8348 -#ifdef CONFIG_HYPERVISOR_GUEST
8349 -#include <asm/hypervisor.h>
8350 +#ifdef CONFIG_X86
8351 +#include <asm/x86_init.h>
8352 +static inline void hypervisor_pin_vcpu(int cpu)
8353 +{
8354 + x86_platform.hyper.pin_vcpu(cpu);
8355 +}
8356 #else
8357 static inline void hypervisor_pin_vcpu(int cpu)
8358 {
8359 diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
8360 index 7b0fa8b5c120..ce0ef1c0a30a 100644
8361 --- a/include/linux/iio/common/st_sensors.h
8362 +++ b/include/linux/iio/common/st_sensors.h
8363 @@ -139,7 +139,7 @@ struct st_sensor_das {
8364 * @mask_ihl: mask to enable/disable active low on the INT lines.
8365 * @addr_od: address to enable/disable Open Drain on the INT lines.
8366 * @mask_od: mask to enable/disable Open Drain on the INT lines.
8367 - * @addr_stat_drdy: address to read status of DRDY (data ready) interrupt
8368 + * struct stat_drdy - status register of DRDY (data ready) interrupt.
8369 * struct ig1 - represents the Interrupt Generator 1 of sensors.
8370 * @en_addr: address of the enable ig1 register.
8371 * @en_mask: mask to write the on/off value for enable.
8372 @@ -152,7 +152,10 @@ struct st_sensor_data_ready_irq {
8373 u8 mask_ihl;
8374 u8 addr_od;
8375 u8 mask_od;
8376 - u8 addr_stat_drdy;
8377 + struct {
8378 + u8 addr;
8379 + u8 mask;
8380 + } stat_drdy;
8381 struct {
8382 u8 en_addr;
8383 u8 en_mask;
8384 diff --git a/include/linux/intel-pti.h b/include/linux/intel-pti.h
8385 new file mode 100644
8386 index 000000000000..2710d72de3c9
8387 --- /dev/null
8388 +++ b/include/linux/intel-pti.h
8389 @@ -0,0 +1,43 @@
8390 +/*
8391 + * Copyright (C) Intel 2011
8392 + *
8393 + * This program is free software; you can redistribute it and/or modify
8394 + * it under the terms of the GNU General Public License version 2 as
8395 + * published by the Free Software Foundation.
8396 + *
8397 + * This program is distributed in the hope that it will be useful,
8398 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
8399 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8400 + * GNU General Public License for more details.
8401 + *
8402 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
8403 + *
8404 + * The PTI (Parallel Trace Interface) driver directs trace data routed from
8405 + * various parts in the system out through the Intel Penwell PTI port and
8406 + * out of the mobile device for analysis with a debugging tool
8407 + * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
8408 + * compact JTAG, standard.
8409 + *
8410 + * This header file will allow other parts of the OS to use the
8411 + * interface to write out it's contents for debugging a mobile system.
8412 + */
8413 +
8414 +#ifndef LINUX_INTEL_PTI_H_
8415 +#define LINUX_INTEL_PTI_H_
8416 +
8417 +/* offset for last dword of any PTI message. Part of MIPI P1149.7 */
8418 +#define PTI_LASTDWORD_DTS 0x30
8419 +
8420 +/* basic structure used as a write address to the PTI HW */
8421 +struct pti_masterchannel {
8422 + u8 master;
8423 + u8 channel;
8424 +};
8425 +
8426 +/* the following functions are defined in misc/pti.c */
8427 +void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count);
8428 +struct pti_masterchannel *pti_request_masterchannel(u8 type,
8429 + const char *thread_name);
8430 +void pti_release_masterchannel(struct pti_masterchannel *mc);
8431 +
8432 +#endif /* LINUX_INTEL_PTI_H_ */
8433 diff --git a/include/linux/linkage.h b/include/linux/linkage.h
8434 index 2e6f90bd52aa..f68db9e450eb 100644
8435 --- a/include/linux/linkage.h
8436 +++ b/include/linux/linkage.h
8437 @@ -2,7 +2,7 @@
8438 #ifndef _LINUX_LINKAGE_H
8439 #define _LINUX_LINKAGE_H
8440
8441 -#include <linux/compiler.h>
8442 +#include <linux/compiler_types.h>
8443 #include <linux/stringify.h>
8444 #include <linux/export.h>
8445 #include <asm/linkage.h>
8446 diff --git a/include/linux/mm.h b/include/linux/mm.h
8447 index db647d428100..f50deada0f5c 100644
8448 --- a/include/linux/mm.h
8449 +++ b/include/linux/mm.h
8450 @@ -2510,7 +2510,7 @@ void vmemmap_populate_print_last(void);
8451 void vmemmap_free(unsigned long start, unsigned long end);
8452 #endif
8453 void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
8454 - unsigned long size);
8455 + unsigned long nr_pages);
8456
8457 enum mf_flags {
8458 MF_COUNT_INCREASED = 1 << 0,
8459 diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
8460 index 18b06983131a..f0938257ee6d 100644
8461 --- a/include/linux/mmzone.h
8462 +++ b/include/linux/mmzone.h
8463 @@ -1152,13 +1152,17 @@ struct mem_section {
8464 #define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1)
8465
8466 #ifdef CONFIG_SPARSEMEM_EXTREME
8467 -extern struct mem_section *mem_section[NR_SECTION_ROOTS];
8468 +extern struct mem_section **mem_section;
8469 #else
8470 extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
8471 #endif
8472
8473 static inline struct mem_section *__nr_to_section(unsigned long nr)
8474 {
8475 +#ifdef CONFIG_SPARSEMEM_EXTREME
8476 + if (!mem_section)
8477 + return NULL;
8478 +#endif
8479 if (!mem_section[SECTION_NR_TO_ROOT(nr)])
8480 return NULL;
8481 return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
8482 diff --git a/include/linux/pti.h b/include/linux/pti.h
8483 deleted file mode 100644
8484 index b3ea01a3197e..000000000000
8485 --- a/include/linux/pti.h
8486 +++ /dev/null
8487 @@ -1,43 +0,0 @@
8488 -/*
8489 - * Copyright (C) Intel 2011
8490 - *
8491 - * This program is free software; you can redistribute it and/or modify
8492 - * it under the terms of the GNU General Public License version 2 as
8493 - * published by the Free Software Foundation.
8494 - *
8495 - * This program is distributed in the hope that it will be useful,
8496 - * but WITHOUT ANY WARRANTY; without even the implied warranty of
8497 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8498 - * GNU General Public License for more details.
8499 - *
8500 - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
8501 - *
8502 - * The PTI (Parallel Trace Interface) driver directs trace data routed from
8503 - * various parts in the system out through the Intel Penwell PTI port and
8504 - * out of the mobile device for analysis with a debugging tool
8505 - * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
8506 - * compact JTAG, standard.
8507 - *
8508 - * This header file will allow other parts of the OS to use the
8509 - * interface to write out it's contents for debugging a mobile system.
8510 - */
8511 -
8512 -#ifndef PTI_H_
8513 -#define PTI_H_
8514 -
8515 -/* offset for last dword of any PTI message. Part of MIPI P1149.7 */
8516 -#define PTI_LASTDWORD_DTS 0x30
8517 -
8518 -/* basic structure used as a write address to the PTI HW */
8519 -struct pti_masterchannel {
8520 - u8 master;
8521 - u8 channel;
8522 -};
8523 -
8524 -/* the following functions are defined in misc/pti.c */
8525 -void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count);
8526 -struct pti_masterchannel *pti_request_masterchannel(u8 type,
8527 - const char *thread_name);
8528 -void pti_release_masterchannel(struct pti_masterchannel *mc);
8529 -
8530 -#endif /*PTI_H_*/
8531 diff --git a/include/linux/rculist.h b/include/linux/rculist.h
8532 index c2cdd45a880a..127f534fec94 100644
8533 --- a/include/linux/rculist.h
8534 +++ b/include/linux/rculist.h
8535 @@ -275,7 +275,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
8536 * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
8537 */
8538 #define list_entry_rcu(ptr, type, member) \
8539 - container_of(lockless_dereference(ptr), type, member)
8540 + container_of(READ_ONCE(ptr), type, member)
8541
8542 /*
8543 * Where are list_empty_rcu() and list_first_entry_rcu()?
8544 @@ -368,7 +368,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
8545 * example is when items are added to the list, but never deleted.
8546 */
8547 #define list_entry_lockless(ptr, type, member) \
8548 - container_of((typeof(ptr))lockless_dereference(ptr), type, member)
8549 + container_of((typeof(ptr))READ_ONCE(ptr), type, member)
8550
8551 /**
8552 * list_for_each_entry_lockless - iterate over rcu list of given type
8553 diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
8554 index 1a9f70d44af9..a6ddc42f87a5 100644
8555 --- a/include/linux/rcupdate.h
8556 +++ b/include/linux/rcupdate.h
8557 @@ -346,7 +346,7 @@ static inline void rcu_preempt_sleep_check(void) { }
8558 #define __rcu_dereference_check(p, c, space) \
8559 ({ \
8560 /* Dependency order vs. p above. */ \
8561 - typeof(*p) *________p1 = (typeof(*p) *__force)lockless_dereference(p); \
8562 + typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \
8563 RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \
8564 rcu_dereference_sparse(p, space); \
8565 ((typeof(*p) __force __kernel *)(________p1)); \
8566 @@ -360,7 +360,7 @@ static inline void rcu_preempt_sleep_check(void) { }
8567 #define rcu_dereference_raw(p) \
8568 ({ \
8569 /* Dependency order vs. p above. */ \
8570 - typeof(p) ________p1 = lockless_dereference(p); \
8571 + typeof(p) ________p1 = READ_ONCE(p); \
8572 ((typeof(*p) __force __kernel *)(________p1)); \
8573 })
8574
8575 diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h
8576 index f65b92e0e1f9..ee8220f8dcf5 100644
8577 --- a/include/uapi/linux/stddef.h
8578 +++ b/include/uapi/linux/stddef.h
8579 @@ -1,5 +1,5 @@
8580 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
8581 -#include <linux/compiler.h>
8582 +#include <linux/compiler_types.h>
8583
8584 #ifndef __always_inline
8585 #define __always_inline inline
8586 diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
8587 index c48ca2a34b5e..c5ff809e86d0 100644
8588 --- a/kernel/bpf/verifier.c
8589 +++ b/kernel/bpf/verifier.c
8590 @@ -1061,6 +1061,11 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
8591 break;
8592 case PTR_TO_STACK:
8593 pointer_desc = "stack ";
8594 + /* The stack spill tracking logic in check_stack_write()
8595 + * and check_stack_read() relies on stack accesses being
8596 + * aligned.
8597 + */
8598 + strict = true;
8599 break;
8600 default:
8601 break;
8602 @@ -1068,6 +1073,29 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
8603 return check_generic_ptr_alignment(reg, pointer_desc, off, size, strict);
8604 }
8605
8606 +/* truncate register to smaller size (in bytes)
8607 + * must be called with size < BPF_REG_SIZE
8608 + */
8609 +static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
8610 +{
8611 + u64 mask;
8612 +
8613 + /* clear high bits in bit representation */
8614 + reg->var_off = tnum_cast(reg->var_off, size);
8615 +
8616 + /* fix arithmetic bounds */
8617 + mask = ((u64)1 << (size * 8)) - 1;
8618 + if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
8619 + reg->umin_value &= mask;
8620 + reg->umax_value &= mask;
8621 + } else {
8622 + reg->umin_value = 0;
8623 + reg->umax_value = mask;
8624 + }
8625 + reg->smin_value = reg->umin_value;
8626 + reg->smax_value = reg->umax_value;
8627 +}
8628 +
8629 /* check whether memory at (regno + off) is accessible for t = (read | write)
8630 * if t==write, value_regno is a register which value is stored into memory
8631 * if t==read, value_regno is a register which will receive the value from memory
8632 @@ -1200,9 +1228,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
8633 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
8634 state->regs[value_regno].type == SCALAR_VALUE) {
8635 /* b/h/w load zero-extends, mark upper bits as known 0 */
8636 - state->regs[value_regno].var_off = tnum_cast(
8637 - state->regs[value_regno].var_off, size);
8638 - __update_reg_bounds(&state->regs[value_regno]);
8639 + coerce_reg_to_size(&state->regs[value_regno], size);
8640 }
8641 return err;
8642 }
8643 @@ -1282,6 +1308,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
8644 tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off);
8645 verbose("invalid variable stack read R%d var_off=%s\n",
8646 regno, tn_buf);
8647 + return -EACCES;
8648 }
8649 off = regs[regno].off + regs[regno].var_off.value;
8650 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
8651 @@ -1742,14 +1769,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
8652 return 0;
8653 }
8654
8655 -static void coerce_reg_to_32(struct bpf_reg_state *reg)
8656 -{
8657 - /* clear high 32 bits */
8658 - reg->var_off = tnum_cast(reg->var_off, 4);
8659 - /* Update bounds */
8660 - __update_reg_bounds(reg);
8661 -}
8662 -
8663 static bool signed_add_overflows(s64 a, s64 b)
8664 {
8665 /* Do the add in u64, where overflow is well-defined */
8666 @@ -1770,6 +1789,41 @@ static bool signed_sub_overflows(s64 a, s64 b)
8667 return res > a;
8668 }
8669
8670 +static bool check_reg_sane_offset(struct bpf_verifier_env *env,
8671 + const struct bpf_reg_state *reg,
8672 + enum bpf_reg_type type)
8673 +{
8674 + bool known = tnum_is_const(reg->var_off);
8675 + s64 val = reg->var_off.value;
8676 + s64 smin = reg->smin_value;
8677 +
8678 + if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
8679 + verbose("math between %s pointer and %lld is not allowed\n",
8680 + reg_type_str[type], val);
8681 + return false;
8682 + }
8683 +
8684 + if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
8685 + verbose("%s pointer offset %d is not allowed\n",
8686 + reg_type_str[type], reg->off);
8687 + return false;
8688 + }
8689 +
8690 + if (smin == S64_MIN) {
8691 + verbose("math between %s pointer and register with unbounded min value is not allowed\n",
8692 + reg_type_str[type]);
8693 + return false;
8694 + }
8695 +
8696 + if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
8697 + verbose("value %lld makes %s pointer be out of bounds\n",
8698 + smin, reg_type_str[type]);
8699 + return false;
8700 + }
8701 +
8702 + return true;
8703 +}
8704 +
8705 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
8706 * Caller should also handle BPF_MOV case separately.
8707 * If we return -EACCES, caller may want to try again treating pointer as a
8708 @@ -1835,6 +1889,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
8709 dst_reg->type = ptr_reg->type;
8710 dst_reg->id = ptr_reg->id;
8711
8712 + if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
8713 + !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
8714 + return -EINVAL;
8715 +
8716 switch (opcode) {
8717 case BPF_ADD:
8718 /* We can take a fixed offset as long as it doesn't overflow
8719 @@ -1965,12 +2023,19 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
8720 return -EACCES;
8721 }
8722
8723 + if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
8724 + return -EINVAL;
8725 +
8726 __update_reg_bounds(dst_reg);
8727 __reg_deduce_bounds(dst_reg);
8728 __reg_bound_offset(dst_reg);
8729 return 0;
8730 }
8731
8732 +/* WARNING: This function does calculations on 64-bit values, but the actual
8733 + * execution may occur on 32-bit values. Therefore, things like bitshifts
8734 + * need extra checks in the 32-bit case.
8735 + */
8736 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
8737 struct bpf_insn *insn,
8738 struct bpf_reg_state *dst_reg,
8739 @@ -1981,12 +2046,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
8740 bool src_known, dst_known;
8741 s64 smin_val, smax_val;
8742 u64 umin_val, umax_val;
8743 + u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
8744
8745 - if (BPF_CLASS(insn->code) != BPF_ALU64) {
8746 - /* 32-bit ALU ops are (32,32)->64 */
8747 - coerce_reg_to_32(dst_reg);
8748 - coerce_reg_to_32(&src_reg);
8749 - }
8750 smin_val = src_reg.smin_value;
8751 smax_val = src_reg.smax_value;
8752 umin_val = src_reg.umin_value;
8753 @@ -1994,6 +2055,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
8754 src_known = tnum_is_const(src_reg.var_off);
8755 dst_known = tnum_is_const(dst_reg->var_off);
8756
8757 + if (!src_known &&
8758 + opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
8759 + __mark_reg_unknown(dst_reg);
8760 + return 0;
8761 + }
8762 +
8763 switch (opcode) {
8764 case BPF_ADD:
8765 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
8766 @@ -2122,9 +2189,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
8767 __update_reg_bounds(dst_reg);
8768 break;
8769 case BPF_LSH:
8770 - if (umax_val > 63) {
8771 - /* Shifts greater than 63 are undefined. This includes
8772 - * shifts by a negative number.
8773 + if (umax_val >= insn_bitness) {
8774 + /* Shifts greater than 31 or 63 are undefined.
8775 + * This includes shifts by a negative number.
8776 */
8777 mark_reg_unknown(regs, insn->dst_reg);
8778 break;
8779 @@ -2150,27 +2217,29 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
8780 __update_reg_bounds(dst_reg);
8781 break;
8782 case BPF_RSH:
8783 - if (umax_val > 63) {
8784 - /* Shifts greater than 63 are undefined. This includes
8785 - * shifts by a negative number.
8786 + if (umax_val >= insn_bitness) {
8787 + /* Shifts greater than 31 or 63 are undefined.
8788 + * This includes shifts by a negative number.
8789 */
8790 mark_reg_unknown(regs, insn->dst_reg);
8791 break;
8792 }
8793 - /* BPF_RSH is an unsigned shift, so make the appropriate casts */
8794 - if (dst_reg->smin_value < 0) {
8795 - if (umin_val) {
8796 - /* Sign bit will be cleared */
8797 - dst_reg->smin_value = 0;
8798 - } else {
8799 - /* Lost sign bit information */
8800 - dst_reg->smin_value = S64_MIN;
8801 - dst_reg->smax_value = S64_MAX;
8802 - }
8803 - } else {
8804 - dst_reg->smin_value =
8805 - (u64)(dst_reg->smin_value) >> umax_val;
8806 - }
8807 + /* BPF_RSH is an unsigned shift. If the value in dst_reg might
8808 + * be negative, then either:
8809 + * 1) src_reg might be zero, so the sign bit of the result is
8810 + * unknown, so we lose our signed bounds
8811 + * 2) it's known negative, thus the unsigned bounds capture the
8812 + * signed bounds
8813 + * 3) the signed bounds cross zero, so they tell us nothing
8814 + * about the result
8815 + * If the value in dst_reg is known nonnegative, then again the
8816 + * unsigned bounts capture the signed bounds.
8817 + * Thus, in all cases it suffices to blow away our signed bounds
8818 + * and rely on inferring new ones from the unsigned bounds and
8819 + * var_off of the result.
8820 + */
8821 + dst_reg->smin_value = S64_MIN;
8822 + dst_reg->smax_value = S64_MAX;
8823 if (src_known)
8824 dst_reg->var_off = tnum_rshift(dst_reg->var_off,
8825 umin_val);
8826 @@ -2186,6 +2255,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
8827 break;
8828 }
8829
8830 + if (BPF_CLASS(insn->code) != BPF_ALU64) {
8831 + /* 32-bit ALU ops are (32,32)->32 */
8832 + coerce_reg_to_size(dst_reg, 4);
8833 + coerce_reg_to_size(&src_reg, 4);
8834 + }
8835 +
8836 __reg_deduce_bounds(dst_reg);
8837 __reg_bound_offset(dst_reg);
8838 return 0;
8839 @@ -2362,17 +2437,20 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
8840 return -EACCES;
8841 }
8842 mark_reg_unknown(regs, insn->dst_reg);
8843 - /* high 32 bits are known zero. */
8844 - regs[insn->dst_reg].var_off = tnum_cast(
8845 - regs[insn->dst_reg].var_off, 4);
8846 - __update_reg_bounds(&regs[insn->dst_reg]);
8847 + coerce_reg_to_size(&regs[insn->dst_reg], 4);
8848 }
8849 } else {
8850 /* case: R = imm
8851 * remember the value we stored into this reg
8852 */
8853 regs[insn->dst_reg].type = SCALAR_VALUE;
8854 - __mark_reg_known(regs + insn->dst_reg, insn->imm);
8855 + if (BPF_CLASS(insn->code) == BPF_ALU64) {
8856 + __mark_reg_known(regs + insn->dst_reg,
8857 + insn->imm);
8858 + } else {
8859 + __mark_reg_known(regs + insn->dst_reg,
8860 + (u32)insn->imm);
8861 + }
8862 }
8863
8864 } else if (opcode > BPF_END) {
8865 @@ -3307,15 +3385,14 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
8866 return range_within(rold, rcur) &&
8867 tnum_in(rold->var_off, rcur->var_off);
8868 } else {
8869 - /* if we knew anything about the old value, we're not
8870 - * equal, because we can't know anything about the
8871 - * scalar value of the pointer in the new value.
8872 + /* We're trying to use a pointer in place of a scalar.
8873 + * Even if the scalar was unbounded, this could lead to
8874 + * pointer leaks because scalars are allowed to leak
8875 + * while pointers are not. We could make this safe in
8876 + * special cases if root is calling us, but it's
8877 + * probably not worth the hassle.
8878 */
8879 - return rold->umin_value == 0 &&
8880 - rold->umax_value == U64_MAX &&
8881 - rold->smin_value == S64_MIN &&
8882 - rold->smax_value == S64_MAX &&
8883 - tnum_is_unknown(rold->var_off);
8884 + return false;
8885 }
8886 case PTR_TO_MAP_VALUE:
8887 /* If the new min/max/var_off satisfy the old ones and
8888 @@ -3665,6 +3742,7 @@ static int do_check(struct bpf_verifier_env *env)
8889 if (err)
8890 return err;
8891
8892 + env->insn_aux_data[insn_idx].seen = true;
8893 if (class == BPF_ALU || class == BPF_ALU64) {
8894 err = check_alu_op(env, insn);
8895 if (err)
8896 @@ -3855,6 +3933,7 @@ static int do_check(struct bpf_verifier_env *env)
8897 return err;
8898
8899 insn_idx++;
8900 + env->insn_aux_data[insn_idx].seen = true;
8901 } else {
8902 verbose("invalid BPF_LD mode\n");
8903 return -EINVAL;
8904 @@ -4035,6 +4114,7 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
8905 u32 off, u32 cnt)
8906 {
8907 struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
8908 + int i;
8909
8910 if (cnt == 1)
8911 return 0;
8912 @@ -4044,6 +4124,8 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
8913 memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
8914 memcpy(new_data + off + cnt - 1, old_data + off,
8915 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
8916 + for (i = off; i < off + cnt - 1; i++)
8917 + new_data[i].seen = true;
8918 env->insn_aux_data = new_data;
8919 vfree(old_data);
8920 return 0;
8921 @@ -4062,6 +4144,25 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
8922 return new_prog;
8923 }
8924
8925 +/* The verifier does more data flow analysis than llvm and will not explore
8926 + * branches that are dead at run time. Malicious programs can have dead code
8927 + * too. Therefore replace all dead at-run-time code with nops.
8928 + */
8929 +static void sanitize_dead_code(struct bpf_verifier_env *env)
8930 +{
8931 + struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
8932 + struct bpf_insn nop = BPF_MOV64_REG(BPF_REG_0, BPF_REG_0);
8933 + struct bpf_insn *insn = env->prog->insnsi;
8934 + const int insn_cnt = env->prog->len;
8935 + int i;
8936 +
8937 + for (i = 0; i < insn_cnt; i++) {
8938 + if (aux_data[i].seen)
8939 + continue;
8940 + memcpy(insn + i, &nop, sizeof(nop));
8941 + }
8942 +}
8943 +
8944 /* convert load instructions that access fields of 'struct __sk_buff'
8945 * into sequence of instructions that access fields of 'struct sk_buff'
8946 */
8947 @@ -4378,6 +4479,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
8948 while (pop_stack(env, NULL) >= 0);
8949 free_states(env);
8950
8951 + if (ret == 0)
8952 + sanitize_dead_code(env);
8953 +
8954 if (ret == 0)
8955 /* program is valid, convert *(u32*)(ctx + off) accesses */
8956 ret = convert_ctx_accesses(env);
8957 diff --git a/kernel/events/core.c b/kernel/events/core.c
8958 index 4f1d4bfc607a..24ebad5567b4 100644
8959 --- a/kernel/events/core.c
8960 +++ b/kernel/events/core.c
8961 @@ -4233,7 +4233,7 @@ static void perf_remove_from_owner(struct perf_event *event)
8962 * indeed free this event, otherwise we need to serialize on
8963 * owner->perf_event_mutex.
8964 */
8965 - owner = lockless_dereference(event->owner);
8966 + owner = READ_ONCE(event->owner);
8967 if (owner) {
8968 /*
8969 * Since delayed_put_task_struct() also drops the last
8970 @@ -4330,7 +4330,7 @@ int perf_event_release_kernel(struct perf_event *event)
8971 * Cannot change, child events are not migrated, see the
8972 * comment with perf_event_ctx_lock_nested().
8973 */
8974 - ctx = lockless_dereference(child->ctx);
8975 + ctx = READ_ONCE(child->ctx);
8976 /*
8977 * Since child_mutex nests inside ctx::mutex, we must jump
8978 * through hoops. We start by grabbing a reference on the ctx.
8979 diff --git a/kernel/seccomp.c b/kernel/seccomp.c
8980 index 418a1c045933..5f0dfb2abb8d 100644
8981 --- a/kernel/seccomp.c
8982 +++ b/kernel/seccomp.c
8983 @@ -190,7 +190,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
8984 u32 ret = SECCOMP_RET_ALLOW;
8985 /* Make sure cross-thread synced filter points somewhere sane. */
8986 struct seccomp_filter *f =
8987 - lockless_dereference(current->seccomp.filter);
8988 + READ_ONCE(current->seccomp.filter);
8989
8990 /* Ensure unexpected behavior doesn't result in failing open. */
8991 if (unlikely(WARN_ON(f == NULL)))
8992 diff --git a/kernel/task_work.c b/kernel/task_work.c
8993 index 5718b3ea202a..0fef395662a6 100644
8994 --- a/kernel/task_work.c
8995 +++ b/kernel/task_work.c
8996 @@ -68,7 +68,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func)
8997 * we raced with task_work_run(), *pprev == NULL/exited.
8998 */
8999 raw_spin_lock_irqsave(&task->pi_lock, flags);
9000 - while ((work = lockless_dereference(*pprev))) {
9001 + while ((work = READ_ONCE(*pprev))) {
9002 if (work->func != func)
9003 pprev = &work->next;
9004 else if (cmpxchg(pprev, work, work->next) == work)
9005 diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
9006 index dc498b605d5d..6350f64d5aa4 100644
9007 --- a/kernel/trace/bpf_trace.c
9008 +++ b/kernel/trace/bpf_trace.c
9009 @@ -293,14 +293,13 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = {
9010 .arg2_type = ARG_ANYTHING,
9011 };
9012
9013 -static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd);
9014 +static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd);
9015
9016 static __always_inline u64
9017 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
9018 - u64 flags, struct perf_raw_record *raw)
9019 + u64 flags, struct perf_sample_data *sd)
9020 {
9021 struct bpf_array *array = container_of(map, struct bpf_array, map);
9022 - struct perf_sample_data *sd = this_cpu_ptr(&bpf_sd);
9023 unsigned int cpu = smp_processor_id();
9024 u64 index = flags & BPF_F_INDEX_MASK;
9025 struct bpf_event_entry *ee;
9026 @@ -323,8 +322,6 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
9027 if (unlikely(event->oncpu != cpu))
9028 return -EOPNOTSUPP;
9029
9030 - perf_sample_data_init(sd, 0, 0);
9031 - sd->raw = raw;
9032 perf_event_output(event, sd, regs);
9033 return 0;
9034 }
9035 @@ -332,6 +329,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
9036 BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
9037 u64, flags, void *, data, u64, size)
9038 {
9039 + struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd);
9040 struct perf_raw_record raw = {
9041 .frag = {
9042 .size = size,
9043 @@ -342,7 +340,10 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
9044 if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
9045 return -EINVAL;
9046
9047 - return __bpf_perf_event_output(regs, map, flags, &raw);
9048 + perf_sample_data_init(sd, 0, 0);
9049 + sd->raw = &raw;
9050 +
9051 + return __bpf_perf_event_output(regs, map, flags, sd);
9052 }
9053
9054 static const struct bpf_func_proto bpf_perf_event_output_proto = {
9055 @@ -357,10 +358,12 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
9056 };
9057
9058 static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
9059 +static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd);
9060
9061 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
9062 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
9063 {
9064 + struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd);
9065 struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
9066 struct perf_raw_frag frag = {
9067 .copy = ctx_copy,
9068 @@ -378,8 +381,10 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
9069 };
9070
9071 perf_fetch_caller_regs(regs);
9072 + perf_sample_data_init(sd, 0, 0);
9073 + sd->raw = &raw;
9074
9075 - return __bpf_perf_event_output(regs, map, flags, &raw);
9076 + return __bpf_perf_event_output(regs, map, flags, sd);
9077 }
9078
9079 BPF_CALL_0(bpf_get_current_task)
9080 diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
9081 index 1c21d0e2a145..7eb975a2d0e1 100644
9082 --- a/kernel/trace/trace_events_hist.c
9083 +++ b/kernel/trace/trace_events_hist.c
9084 @@ -450,7 +450,7 @@ static int create_val_field(struct hist_trigger_data *hist_data,
9085 }
9086
9087 field = trace_find_event_field(file->event_call, field_name);
9088 - if (!field) {
9089 + if (!field || !field->size) {
9090 ret = -EINVAL;
9091 goto out;
9092 }
9093 @@ -548,7 +548,7 @@ static int create_key_field(struct hist_trigger_data *hist_data,
9094 }
9095
9096 field = trace_find_event_field(file->event_call, field_name);
9097 - if (!field) {
9098 + if (!field || !field->size) {
9099 ret = -EINVAL;
9100 goto out;
9101 }
9102 diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
9103 index dfdad67d8f6c..ff21b4dbb392 100644
9104 --- a/lib/Kconfig.debug
9105 +++ b/lib/Kconfig.debug
9106 @@ -376,7 +376,7 @@ config STACK_VALIDATION
9107 that runtime stack traces are more reliable.
9108
9109 This is also a prerequisite for generation of ORC unwind data, which
9110 - is needed for CONFIG_ORC_UNWINDER.
9111 + is needed for CONFIG_UNWINDER_ORC.
9112
9113 For more information, see
9114 tools/objtool/Documentation/stack-validation.txt.
9115 diff --git a/mm/slab.h b/mm/slab.h
9116 index 028cdc7df67e..86d7c7d860f9 100644
9117 --- a/mm/slab.h
9118 +++ b/mm/slab.h
9119 @@ -259,7 +259,7 @@ cache_from_memcg_idx(struct kmem_cache *s, int idx)
9120 * memcg_caches issues a write barrier to match this (see
9121 * memcg_create_kmem_cache()).
9122 */
9123 - cachep = lockless_dereference(arr->entries[idx]);
9124 + cachep = READ_ONCE(arr->entries[idx]);
9125 rcu_read_unlock();
9126
9127 return cachep;
9128 diff --git a/mm/sparse.c b/mm/sparse.c
9129 index 4900707ae146..60805abf98af 100644
9130 --- a/mm/sparse.c
9131 +++ b/mm/sparse.c
9132 @@ -23,8 +23,7 @@
9133 * 1) mem_section - memory sections, mem_map's for valid memory
9134 */
9135 #ifdef CONFIG_SPARSEMEM_EXTREME
9136 -struct mem_section *mem_section[NR_SECTION_ROOTS]
9137 - ____cacheline_internodealigned_in_smp;
9138 +struct mem_section **mem_section;
9139 #else
9140 struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
9141 ____cacheline_internodealigned_in_smp;
9142 @@ -101,7 +100,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
9143 int __section_nr(struct mem_section* ms)
9144 {
9145 unsigned long root_nr;
9146 - struct mem_section* root;
9147 + struct mem_section *root = NULL;
9148
9149 for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
9150 root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
9151 @@ -112,7 +111,7 @@ int __section_nr(struct mem_section* ms)
9152 break;
9153 }
9154
9155 - VM_BUG_ON(root_nr == NR_SECTION_ROOTS);
9156 + VM_BUG_ON(!root);
9157
9158 return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
9159 }
9160 @@ -208,6 +207,16 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
9161 {
9162 unsigned long pfn;
9163
9164 +#ifdef CONFIG_SPARSEMEM_EXTREME
9165 + if (unlikely(!mem_section)) {
9166 + unsigned long size, align;
9167 +
9168 + size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
9169 + align = 1 << (INTERNODE_CACHE_SHIFT);
9170 + mem_section = memblock_virt_alloc(size, align);
9171 + }
9172 +#endif
9173 +
9174 start &= PAGE_SECTION_MASK;
9175 mminit_validate_memmodel_limits(&start, &end);
9176 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
9177 @@ -330,11 +339,17 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
9178 static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
9179 {
9180 unsigned long usemap_snr, pgdat_snr;
9181 - static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
9182 - static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
9183 + static unsigned long old_usemap_snr;
9184 + static unsigned long old_pgdat_snr;
9185 struct pglist_data *pgdat = NODE_DATA(nid);
9186 int usemap_nid;
9187
9188 + /* First call */
9189 + if (!old_usemap_snr) {
9190 + old_usemap_snr = NR_MEM_SECTIONS;
9191 + old_pgdat_snr = NR_MEM_SECTIONS;
9192 + }
9193 +
9194 usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
9195 pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
9196 if (usemap_snr == pgdat_snr)
9197 diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
9198 index 467e44d7587d..045331204097 100644
9199 --- a/net/ipv4/ip_gre.c
9200 +++ b/net/ipv4/ip_gre.c
9201 @@ -579,8 +579,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
9202 if (gre_handle_offloads(skb, false))
9203 goto err_free_rt;
9204
9205 - if (skb->len > dev->mtu) {
9206 - pskb_trim(skb, dev->mtu);
9207 + if (skb->len > dev->mtu + dev->hard_header_len) {
9208 + pskb_trim(skb, dev->mtu + dev->hard_header_len);
9209 truncate = true;
9210 }
9211
9212 @@ -731,8 +731,8 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
9213 if (skb_cow_head(skb, dev->needed_headroom))
9214 goto free_skb;
9215
9216 - if (skb->len - dev->hard_header_len > dev->mtu) {
9217 - pskb_trim(skb, dev->mtu);
9218 + if (skb->len > dev->mtu + dev->hard_header_len) {
9219 + pskb_trim(skb, dev->mtu + dev->hard_header_len);
9220 truncate = true;
9221 }
9222
9223 diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
9224 index 218cfcc77650..ee113ff15fd0 100644
9225 --- a/net/ipv4/tcp_vegas.c
9226 +++ b/net/ipv4/tcp_vegas.c
9227 @@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
9228
9229 static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
9230 {
9231 - return min(tp->snd_ssthresh, tp->snd_cwnd-1);
9232 + return min(tp->snd_ssthresh, tp->snd_cwnd);
9233 }
9234
9235 static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
9236 diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
9237 index 8a1c846d3df9..2ec39404c449 100644
9238 --- a/net/ipv6/addrconf.c
9239 +++ b/net/ipv6/addrconf.c
9240 @@ -303,10 +303,10 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
9241 .disable_policy = 0,
9242 };
9243
9244 -/* Check if a valid qdisc is available */
9245 -static inline bool addrconf_qdisc_ok(const struct net_device *dev)
9246 +/* Check if link is ready: is it up and is a valid qdisc available */
9247 +static inline bool addrconf_link_ready(const struct net_device *dev)
9248 {
9249 - return !qdisc_tx_is_noop(dev);
9250 + return netif_oper_up(dev) && !qdisc_tx_is_noop(dev);
9251 }
9252
9253 static void addrconf_del_rs_timer(struct inet6_dev *idev)
9254 @@ -451,7 +451,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
9255
9256 ndev->token = in6addr_any;
9257
9258 - if (netif_running(dev) && addrconf_qdisc_ok(dev))
9259 + if (netif_running(dev) && addrconf_link_ready(dev))
9260 ndev->if_flags |= IF_READY;
9261
9262 ipv6_mc_init_dev(ndev);
9263 @@ -3404,7 +3404,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
9264 /* restore routes for permanent addresses */
9265 addrconf_permanent_addr(dev);
9266
9267 - if (!addrconf_qdisc_ok(dev)) {
9268 + if (!addrconf_link_ready(dev)) {
9269 /* device is not ready yet. */
9270 pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
9271 dev->name);
9272 @@ -3419,7 +3419,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
9273 run_pending = 1;
9274 }
9275 } else if (event == NETDEV_CHANGE) {
9276 - if (!addrconf_qdisc_ok(dev)) {
9277 + if (!addrconf_link_ready(dev)) {
9278 /* device is still not ready. */
9279 break;
9280 }
9281 diff --git a/net/ipv6/route.c b/net/ipv6/route.c
9282 index 598efa8cfe25..76b47682f77f 100644
9283 --- a/net/ipv6/route.c
9284 +++ b/net/ipv6/route.c
9285 @@ -1055,7 +1055,6 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
9286
9287 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
9288 {
9289 - struct fib6_table *table = rt->rt6i_table;
9290 struct rt6_info *pcpu_rt, *prev, **p;
9291
9292 pcpu_rt = ip6_rt_pcpu_alloc(rt);
9293 @@ -1066,28 +1065,20 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
9294 return net->ipv6.ip6_null_entry;
9295 }
9296
9297 - read_lock_bh(&table->tb6_lock);
9298 - if (rt->rt6i_pcpu) {
9299 - p = this_cpu_ptr(rt->rt6i_pcpu);
9300 - prev = cmpxchg(p, NULL, pcpu_rt);
9301 - if (prev) {
9302 - /* If someone did it before us, return prev instead */
9303 - dst_release_immediate(&pcpu_rt->dst);
9304 - pcpu_rt = prev;
9305 - }
9306 - } else {
9307 - /* rt has been removed from the fib6 tree
9308 - * before we have a chance to acquire the read_lock.
9309 - * In this case, don't brother to create a pcpu rt
9310 - * since rt is going away anyway. The next
9311 - * dst_check() will trigger a re-lookup.
9312 - */
9313 + dst_hold(&pcpu_rt->dst);
9314 + p = this_cpu_ptr(rt->rt6i_pcpu);
9315 + prev = cmpxchg(p, NULL, pcpu_rt);
9316 + if (prev) {
9317 + /* If someone did it before us, return prev instead */
9318 + /* release refcnt taken by ip6_rt_pcpu_alloc() */
9319 dst_release_immediate(&pcpu_rt->dst);
9320 - pcpu_rt = rt;
9321 + /* release refcnt taken by above dst_hold() */
9322 + dst_release_immediate(&pcpu_rt->dst);
9323 + dst_hold(&prev->dst);
9324 + pcpu_rt = prev;
9325 }
9326 - dst_hold(&pcpu_rt->dst);
9327 +
9328 rt6_dst_from_metrics_check(pcpu_rt);
9329 - read_unlock_bh(&table->tb6_lock);
9330 return pcpu_rt;
9331 }
9332
9333 @@ -1177,19 +1168,28 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
9334 if (pcpu_rt) {
9335 read_unlock_bh(&table->tb6_lock);
9336 } else {
9337 - /* We have to do the read_unlock first
9338 - * because rt6_make_pcpu_route() may trigger
9339 - * ip6_dst_gc() which will take the write_lock.
9340 - */
9341 - dst_hold(&rt->dst);
9342 - read_unlock_bh(&table->tb6_lock);
9343 - pcpu_rt = rt6_make_pcpu_route(rt);
9344 - dst_release(&rt->dst);
9345 + /* atomic_inc_not_zero() is needed when using rcu */
9346 + if (atomic_inc_not_zero(&rt->rt6i_ref)) {
9347 + /* We have to do the read_unlock first
9348 + * because rt6_make_pcpu_route() may trigger
9349 + * ip6_dst_gc() which will take the write_lock.
9350 + *
9351 + * No dst_hold() on rt is needed because grabbing
9352 + * rt->rt6i_ref makes sure rt can't be released.
9353 + */
9354 + read_unlock_bh(&table->tb6_lock);
9355 + pcpu_rt = rt6_make_pcpu_route(rt);
9356 + rt6_release(rt);
9357 + } else {
9358 + /* rt is already removed from tree */
9359 + read_unlock_bh(&table->tb6_lock);
9360 + pcpu_rt = net->ipv6.ip6_null_entry;
9361 + dst_hold(&pcpu_rt->dst);
9362 + }
9363 }
9364
9365 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
9366 return pcpu_rt;
9367 -
9368 }
9369 }
9370 EXPORT_SYMBOL_GPL(ip6_pol_route);
9371 diff --git a/net/sctp/stream.c b/net/sctp/stream.c
9372 index fa8371ff05c4..724adf2786a2 100644
9373 --- a/net/sctp/stream.c
9374 +++ b/net/sctp/stream.c
9375 @@ -40,9 +40,14 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
9376 {
9377 int i;
9378
9379 + gfp |= __GFP_NOWARN;
9380 +
9381 /* Initial stream->out size may be very big, so free it and alloc
9382 - * a new one with new outcnt to save memory.
9383 + * a new one with new outcnt to save memory if needed.
9384 */
9385 + if (outcnt == stream->outcnt)
9386 + goto in;
9387 +
9388 kfree(stream->out);
9389
9390 stream->out = kcalloc(outcnt, sizeof(*stream->out), gfp);
9391 @@ -53,6 +58,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
9392 for (i = 0; i < stream->outcnt; i++)
9393 stream->out[i].state = SCTP_STREAM_OPEN;
9394
9395 +in:
9396 if (!incnt)
9397 return 0;
9398
9399 diff --git a/scripts/Makefile.build b/scripts/Makefile.build
9400 index bb831d49bcfd..e63af4e19382 100644
9401 --- a/scripts/Makefile.build
9402 +++ b/scripts/Makefile.build
9403 @@ -259,7 +259,7 @@ ifneq ($(SKIP_STACK_VALIDATION),1)
9404
9405 __objtool_obj := $(objtree)/tools/objtool/objtool
9406
9407 -objtool_args = $(if $(CONFIG_ORC_UNWINDER),orc generate,check)
9408 +objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
9409
9410 ifndef CONFIG_FRAME_POINTER
9411 objtool_args += --no-fp
9412 diff --git a/scripts/headers_install.sh b/scripts/headers_install.sh
9413 index 4d1ea96e8794..a18bca720995 100755
9414 --- a/scripts/headers_install.sh
9415 +++ b/scripts/headers_install.sh
9416 @@ -34,7 +34,7 @@ do
9417 sed -r \
9418 -e 's/([ \t(])(__user|__force|__iomem)[ \t]/\1/g' \
9419 -e 's/__attribute_const__([ \t]|$)/\1/g' \
9420 - -e 's@^#include <linux/compiler.h>@@' \
9421 + -e 's@^#include <linux/compiler(|_types).h>@@' \
9422 -e 's/(^|[^a-zA-Z0-9])__packed([^a-zA-Z0-9_]|$)/\1__attribute__((packed))\2/g' \
9423 -e 's/(^|[ \t(])(inline|asm|volatile)([ \t(]|$)/\1__\2__\3/g' \
9424 -e 's@#(ifndef|define|endif[ \t]*/[*])[ \t]*_UAPI@#\1 @' \
9425 diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c
9426 index 549c269acc7d..18933bf6473f 100644
9427 --- a/sound/soc/codecs/msm8916-wcd-analog.c
9428 +++ b/sound/soc/codecs/msm8916-wcd-analog.c
9429 @@ -104,7 +104,7 @@
9430 #define CDC_A_MICB_1_VAL (0xf141)
9431 #define MICB_MIN_VAL 1600
9432 #define MICB_STEP_SIZE 50
9433 -#define MICB_VOLTAGE_REGVAL(v) ((v - MICB_MIN_VAL)/MICB_STEP_SIZE)
9434 +#define MICB_VOLTAGE_REGVAL(v) (((v - MICB_MIN_VAL)/MICB_STEP_SIZE) << 3)
9435 #define MICB_1_VAL_MICB_OUT_VAL_MASK GENMASK(7, 3)
9436 #define MICB_1_VAL_MICB_OUT_VAL_V2P70V ((0x16) << 3)
9437 #define MICB_1_VAL_MICB_OUT_VAL_V1P80V ((0x4) << 3)
9438 @@ -349,8 +349,9 @@ static void pm8916_wcd_analog_micbias_enable(struct snd_soc_codec *codec)
9439 | MICB_1_CTL_EXT_PRECHARG_EN_ENABLE);
9440
9441 if (wcd->micbias_mv) {
9442 - snd_soc_write(codec, CDC_A_MICB_1_VAL,
9443 - MICB_VOLTAGE_REGVAL(wcd->micbias_mv));
9444 + snd_soc_update_bits(codec, CDC_A_MICB_1_VAL,
9445 + MICB_1_VAL_MICB_OUT_VAL_MASK,
9446 + MICB_VOLTAGE_REGVAL(wcd->micbias_mv));
9447 /*
9448 * Special headset needs MICBIAS as 2.7V so wait for
9449 * 50 msec for the MICBIAS to reach 2.7 volts.
9450 @@ -1241,6 +1242,8 @@ static const struct of_device_id pm8916_wcd_analog_spmi_match_table[] = {
9451 { }
9452 };
9453
9454 +MODULE_DEVICE_TABLE(of, pm8916_wcd_analog_spmi_match_table);
9455 +
9456 static struct platform_driver pm8916_wcd_analog_spmi_driver = {
9457 .driver = {
9458 .name = "qcom,pm8916-wcd-spmi-codec",
9459 diff --git a/sound/soc/img/img-parallel-out.c b/sound/soc/img/img-parallel-out.c
9460 index 23b0f0f6ec9c..2fc8a6372206 100644
9461 --- a/sound/soc/img/img-parallel-out.c
9462 +++ b/sound/soc/img/img-parallel-out.c
9463 @@ -164,9 +164,11 @@ static int img_prl_out_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
9464 return -EINVAL;
9465 }
9466
9467 + pm_runtime_get_sync(prl->dev);
9468 reg = img_prl_out_readl(prl, IMG_PRL_OUT_CTL);
9469 reg = (reg & ~IMG_PRL_OUT_CTL_EDGE_MASK) | control_set;
9470 img_prl_out_writel(prl, reg, IMG_PRL_OUT_CTL);
9471 + pm_runtime_put(prl->dev);
9472
9473 return 0;
9474 }
9475 diff --git a/tools/objtool/check.c b/tools/objtool/check.c
9476 index c0e26ad1fa7e..9b341584eb1b 100644
9477 --- a/tools/objtool/check.c
9478 +++ b/tools/objtool/check.c
9479 @@ -1757,11 +1757,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
9480 if (insn->dead_end)
9481 return 0;
9482
9483 - insn = next_insn;
9484 - if (!insn) {
9485 + if (!next_insn) {
9486 + if (state.cfa.base == CFI_UNDEFINED)
9487 + return 0;
9488 WARN("%s: unexpected end of section", sec->name);
9489 return 1;
9490 }
9491 +
9492 + insn = next_insn;
9493 }
9494
9495 return 0;
9496 diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
9497 index 31e0f9143840..07f329919828 100644
9498 --- a/tools/objtool/objtool.c
9499 +++ b/tools/objtool/objtool.c
9500 @@ -70,7 +70,7 @@ static void cmd_usage(void)
9501
9502 printf("\n");
9503
9504 - exit(1);
9505 + exit(129);
9506 }
9507
9508 static void handle_options(int *argc, const char ***argv)
9509 @@ -86,9 +86,7 @@ static void handle_options(int *argc, const char ***argv)
9510 break;
9511 } else {
9512 fprintf(stderr, "Unknown option: %s\n", cmd);
9513 - fprintf(stderr, "\n Usage: %s\n",
9514 - objtool_usage_string);
9515 - exit(1);
9516 + cmd_usage();
9517 }
9518
9519 (*argv)++;
9520 diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
9521 index 64ae21f64489..7a2d221c4702 100644
9522 --- a/tools/testing/selftests/bpf/test_verifier.c
9523 +++ b/tools/testing/selftests/bpf/test_verifier.c
9524 @@ -606,7 +606,6 @@ static struct bpf_test tests[] = {
9525 },
9526 .errstr = "misaligned stack access",
9527 .result = REJECT,
9528 - .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
9529 },
9530 {
9531 "invalid map_fd for function call",
9532 @@ -1797,7 +1796,6 @@ static struct bpf_test tests[] = {
9533 },
9534 .result = REJECT,
9535 .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
9536 - .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
9537 },
9538 {
9539 "PTR_TO_STACK store/load - bad alignment on reg",
9540 @@ -1810,7 +1808,6 @@ static struct bpf_test tests[] = {
9541 },
9542 .result = REJECT,
9543 .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
9544 - .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
9545 },
9546 {
9547 "PTR_TO_STACK store/load - out of bounds low",
9548 @@ -6115,7 +6112,7 @@ static struct bpf_test tests[] = {
9549 BPF_EXIT_INSN(),
9550 },
9551 .fixup_map1 = { 3 },
9552 - .errstr = "R0 min value is negative",
9553 + .errstr = "unbounded min value",
9554 .result = REJECT,
9555 },
9556 {
9557 @@ -6139,7 +6136,7 @@ static struct bpf_test tests[] = {
9558 BPF_EXIT_INSN(),
9559 },
9560 .fixup_map1 = { 3 },
9561 - .errstr = "R0 min value is negative",
9562 + .errstr = "unbounded min value",
9563 .result = REJECT,
9564 },
9565 {
9566 @@ -6165,7 +6162,7 @@ static struct bpf_test tests[] = {
9567 BPF_EXIT_INSN(),
9568 },
9569 .fixup_map1 = { 3 },
9570 - .errstr = "R8 invalid mem access 'inv'",
9571 + .errstr = "unbounded min value",
9572 .result = REJECT,
9573 },
9574 {
9575 @@ -6190,7 +6187,7 @@ static struct bpf_test tests[] = {
9576 BPF_EXIT_INSN(),
9577 },
9578 .fixup_map1 = { 3 },
9579 - .errstr = "R8 invalid mem access 'inv'",
9580 + .errstr = "unbounded min value",
9581 .result = REJECT,
9582 },
9583 {
9584 @@ -6238,7 +6235,7 @@ static struct bpf_test tests[] = {
9585 BPF_EXIT_INSN(),
9586 },
9587 .fixup_map1 = { 3 },
9588 - .errstr = "R0 min value is negative",
9589 + .errstr = "unbounded min value",
9590 .result = REJECT,
9591 },
9592 {
9593 @@ -6309,7 +6306,7 @@ static struct bpf_test tests[] = {
9594 BPF_EXIT_INSN(),
9595 },
9596 .fixup_map1 = { 3 },
9597 - .errstr = "R0 min value is negative",
9598 + .errstr = "unbounded min value",
9599 .result = REJECT,
9600 },
9601 {
9602 @@ -6360,7 +6357,7 @@ static struct bpf_test tests[] = {
9603 BPF_EXIT_INSN(),
9604 },
9605 .fixup_map1 = { 3 },
9606 - .errstr = "R0 min value is negative",
9607 + .errstr = "unbounded min value",
9608 .result = REJECT,
9609 },
9610 {
9611 @@ -6387,7 +6384,7 @@ static struct bpf_test tests[] = {
9612 BPF_EXIT_INSN(),
9613 },
9614 .fixup_map1 = { 3 },
9615 - .errstr = "R0 min value is negative",
9616 + .errstr = "unbounded min value",
9617 .result = REJECT,
9618 },
9619 {
9620 @@ -6413,7 +6410,7 @@ static struct bpf_test tests[] = {
9621 BPF_EXIT_INSN(),
9622 },
9623 .fixup_map1 = { 3 },
9624 - .errstr = "R0 min value is negative",
9625 + .errstr = "unbounded min value",
9626 .result = REJECT,
9627 },
9628 {
9629 @@ -6442,7 +6439,7 @@ static struct bpf_test tests[] = {
9630 BPF_EXIT_INSN(),
9631 },
9632 .fixup_map1 = { 3 },
9633 - .errstr = "R0 min value is negative",
9634 + .errstr = "unbounded min value",
9635 .result = REJECT,
9636 },
9637 {
9638 @@ -6472,7 +6469,7 @@ static struct bpf_test tests[] = {
9639 BPF_JMP_IMM(BPF_JA, 0, 0, -7),
9640 },
9641 .fixup_map1 = { 4 },
9642 - .errstr = "R0 min value is negative",
9643 + .errstr = "unbounded min value",
9644 .result = REJECT,
9645 },
9646 {
9647 @@ -6500,8 +6497,7 @@ static struct bpf_test tests[] = {
9648 BPF_EXIT_INSN(),
9649 },
9650 .fixup_map1 = { 3 },
9651 - .errstr_unpriv = "R0 pointer comparison prohibited",
9652 - .errstr = "R0 min value is negative",
9653 + .errstr = "unbounded min value",
9654 .result = REJECT,
9655 .result_unpriv = REJECT,
9656 },
9657 @@ -6556,6 +6552,462 @@ static struct bpf_test tests[] = {
9658 .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
9659 .result = REJECT,
9660 },
9661 + {
9662 + "bounds check based on zero-extended MOV",
9663 + .insns = {
9664 + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9665 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9666 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9667 + BPF_LD_MAP_FD(BPF_REG_1, 0),
9668 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9669 + BPF_FUNC_map_lookup_elem),
9670 + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
9671 + /* r2 = 0x0000'0000'ffff'ffff */
9672 + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff),
9673 + /* r2 = 0 */
9674 + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
9675 + /* no-op */
9676 + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
9677 + /* access at offset 0 */
9678 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
9679 + /* exit */
9680 + BPF_MOV64_IMM(BPF_REG_0, 0),
9681 + BPF_EXIT_INSN(),
9682 + },
9683 + .fixup_map1 = { 3 },
9684 + .result = ACCEPT
9685 + },
9686 + {
9687 + "bounds check based on sign-extended MOV. test1",
9688 + .insns = {
9689 + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9690 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9691 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9692 + BPF_LD_MAP_FD(BPF_REG_1, 0),
9693 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9694 + BPF_FUNC_map_lookup_elem),
9695 + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
9696 + /* r2 = 0xffff'ffff'ffff'ffff */
9697 + BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
9698 + /* r2 = 0xffff'ffff */
9699 + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
9700 + /* r0 = <oob pointer> */
9701 + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
9702 + /* access to OOB pointer */
9703 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
9704 + /* exit */
9705 + BPF_MOV64_IMM(BPF_REG_0, 0),
9706 + BPF_EXIT_INSN(),
9707 + },
9708 + .fixup_map1 = { 3 },
9709 + .errstr = "map_value pointer and 4294967295",
9710 + .result = REJECT
9711 + },
9712 + {
9713 + "bounds check based on sign-extended MOV. test2",
9714 + .insns = {
9715 + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9716 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9717 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9718 + BPF_LD_MAP_FD(BPF_REG_1, 0),
9719 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9720 + BPF_FUNC_map_lookup_elem),
9721 + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
9722 + /* r2 = 0xffff'ffff'ffff'ffff */
9723 + BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
9724 + /* r2 = 0xfff'ffff */
9725 + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36),
9726 + /* r0 = <oob pointer> */
9727 + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
9728 + /* access to OOB pointer */
9729 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
9730 + /* exit */
9731 + BPF_MOV64_IMM(BPF_REG_0, 0),
9732 + BPF_EXIT_INSN(),
9733 + },
9734 + .fixup_map1 = { 3 },
9735 + .errstr = "R0 min value is outside of the array range",
9736 + .result = REJECT
9737 + },
9738 + {
9739 + "bounds check based on reg_off + var_off + insn_off. test1",
9740 + .insns = {
9741 + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
9742 + offsetof(struct __sk_buff, mark)),
9743 + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9744 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9745 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9746 + BPF_LD_MAP_FD(BPF_REG_1, 0),
9747 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9748 + BPF_FUNC_map_lookup_elem),
9749 + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
9750 + BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
9751 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1),
9752 + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
9753 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
9754 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
9755 + BPF_MOV64_IMM(BPF_REG_0, 0),
9756 + BPF_EXIT_INSN(),
9757 + },
9758 + .fixup_map1 = { 4 },
9759 + .errstr = "value_size=8 off=1073741825",
9760 + .result = REJECT,
9761 + .prog_type = BPF_PROG_TYPE_SCHED_CLS,
9762 + },
9763 + {
9764 + "bounds check based on reg_off + var_off + insn_off. test2",
9765 + .insns = {
9766 + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
9767 + offsetof(struct __sk_buff, mark)),
9768 + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9769 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9770 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9771 + BPF_LD_MAP_FD(BPF_REG_1, 0),
9772 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9773 + BPF_FUNC_map_lookup_elem),
9774 + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
9775 + BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
9776 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1),
9777 + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
9778 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
9779 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
9780 + BPF_MOV64_IMM(BPF_REG_0, 0),
9781 + BPF_EXIT_INSN(),
9782 + },
9783 + .fixup_map1 = { 4 },
9784 + .errstr = "value 1073741823",
9785 + .result = REJECT,
9786 + .prog_type = BPF_PROG_TYPE_SCHED_CLS,
9787 + },
9788 + {
9789 + "bounds check after truncation of non-boundary-crossing range",
9790 + .insns = {
9791 + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9792 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9793 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9794 + BPF_LD_MAP_FD(BPF_REG_1, 0),
9795 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9796 + BPF_FUNC_map_lookup_elem),
9797 + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
9798 + /* r1 = [0x00, 0xff] */
9799 + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
9800 + BPF_MOV64_IMM(BPF_REG_2, 1),
9801 + /* r2 = 0x10'0000'0000 */
9802 + BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36),
9803 + /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */
9804 + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
9805 + /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */
9806 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
9807 + /* r1 = [0x00, 0xff] */
9808 + BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff),
9809 + /* r1 = 0 */
9810 + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
9811 + /* no-op */
9812 + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
9813 + /* access at offset 0 */
9814 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
9815 + /* exit */
9816 + BPF_MOV64_IMM(BPF_REG_0, 0),
9817 + BPF_EXIT_INSN(),
9818 + },
9819 + .fixup_map1 = { 3 },
9820 + .result = ACCEPT
9821 + },
9822 + {
9823 + "bounds check after truncation of boundary-crossing range (1)",
9824 + .insns = {
9825 + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9826 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9827 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9828 + BPF_LD_MAP_FD(BPF_REG_1, 0),
9829 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9830 + BPF_FUNC_map_lookup_elem),
9831 + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
9832 + /* r1 = [0x00, 0xff] */
9833 + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
9834 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
9835 + /* r1 = [0xffff'ff80, 0x1'0000'007f] */
9836 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
9837 + /* r1 = [0xffff'ff80, 0xffff'ffff] or
9838 + * [0x0000'0000, 0x0000'007f]
9839 + */
9840 + BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0),
9841 + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
9842 + /* r1 = [0x00, 0xff] or
9843 + * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
9844 + */
9845 + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
9846 + /* r1 = 0 or
9847 + * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
9848 + */
9849 + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
9850 + /* no-op or OOB pointer computation */
9851 + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
9852 + /* potentially OOB access */
9853 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
9854 + /* exit */
9855 + BPF_MOV64_IMM(BPF_REG_0, 0),
9856 + BPF_EXIT_INSN(),
9857 + },
9858 + .fixup_map1 = { 3 },
9859 + /* not actually fully unbounded, but the bound is very high */
9860 + .errstr = "R0 unbounded memory access",
9861 + .result = REJECT
9862 + },
9863 + {
9864 + "bounds check after truncation of boundary-crossing range (2)",
9865 + .insns = {
9866 + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9867 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9868 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9869 + BPF_LD_MAP_FD(BPF_REG_1, 0),
9870 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9871 + BPF_FUNC_map_lookup_elem),
9872 + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
9873 + /* r1 = [0x00, 0xff] */
9874 + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
9875 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
9876 + /* r1 = [0xffff'ff80, 0x1'0000'007f] */
9877 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
9878 + /* r1 = [0xffff'ff80, 0xffff'ffff] or
9879 + * [0x0000'0000, 0x0000'007f]
9880 + * difference to previous test: truncation via MOV32
9881 + * instead of ALU32.
9882 + */
9883 + BPF_MOV32_REG(BPF_REG_1, BPF_REG_1),
9884 + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
9885 + /* r1 = [0x00, 0xff] or
9886 + * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
9887 + */
9888 + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
9889 + /* r1 = 0 or
9890 + * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
9891 + */
9892 + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
9893 + /* no-op or OOB pointer computation */
9894 + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
9895 + /* potentially OOB access */
9896 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
9897 + /* exit */
9898 + BPF_MOV64_IMM(BPF_REG_0, 0),
9899 + BPF_EXIT_INSN(),
9900 + },
9901 + .fixup_map1 = { 3 },
9902 + /* not actually fully unbounded, but the bound is very high */
9903 + .errstr = "R0 unbounded memory access",
9904 + .result = REJECT
9905 + },
9906 + {
9907 + "bounds check after wrapping 32-bit addition",
9908 + .insns = {
9909 + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9910 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9911 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9912 + BPF_LD_MAP_FD(BPF_REG_1, 0),
9913 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9914 + BPF_FUNC_map_lookup_elem),
9915 + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
9916 + /* r1 = 0x7fff'ffff */
9917 + BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff),
9918 + /* r1 = 0xffff'fffe */
9919 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
9920 + /* r1 = 0 */
9921 + BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2),
9922 + /* no-op */
9923 + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
9924 + /* access at offset 0 */
9925 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
9926 + /* exit */
9927 + BPF_MOV64_IMM(BPF_REG_0, 0),
9928 + BPF_EXIT_INSN(),
9929 + },
9930 + .fixup_map1 = { 3 },
9931 + .result = ACCEPT
9932 + },
9933 + {
9934 + "bounds check after shift with oversized count operand",
9935 + .insns = {
9936 + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9937 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9938 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9939 + BPF_LD_MAP_FD(BPF_REG_1, 0),
9940 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9941 + BPF_FUNC_map_lookup_elem),
9942 + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
9943 + BPF_MOV64_IMM(BPF_REG_2, 32),
9944 + BPF_MOV64_IMM(BPF_REG_1, 1),
9945 + /* r1 = (u32)1 << (u32)32 = ? */
9946 + BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2),
9947 + /* r1 = [0x0000, 0xffff] */
9948 + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff),
9949 + /* computes unknown pointer, potentially OOB */
9950 + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
9951 + /* potentially OOB access */
9952 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
9953 + /* exit */
9954 + BPF_MOV64_IMM(BPF_REG_0, 0),
9955 + BPF_EXIT_INSN(),
9956 + },
9957 + .fixup_map1 = { 3 },
9958 + .errstr = "R0 max value is outside of the array range",
9959 + .result = REJECT
9960 + },
9961 + {
9962 + "bounds check after right shift of maybe-negative number",
9963 + .insns = {
9964 + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9965 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9966 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9967 + BPF_LD_MAP_FD(BPF_REG_1, 0),
9968 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9969 + BPF_FUNC_map_lookup_elem),
9970 + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
9971 + /* r1 = [0x00, 0xff] */
9972 + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
9973 + /* r1 = [-0x01, 0xfe] */
9974 + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
9975 + /* r1 = 0 or 0xff'ffff'ffff'ffff */
9976 + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
9977 + /* r1 = 0 or 0xffff'ffff'ffff */
9978 + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
9979 + /* computes unknown pointer, potentially OOB */
9980 + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
9981 + /* potentially OOB access */
9982 + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
9983 + /* exit */
9984 + BPF_MOV64_IMM(BPF_REG_0, 0),
9985 + BPF_EXIT_INSN(),
9986 + },
9987 + .fixup_map1 = { 3 },
9988 + .errstr = "R0 unbounded memory access",
9989 + .result = REJECT
9990 + },
9991 + {
9992 + "bounds check map access with off+size signed 32bit overflow. test1",
9993 + .insns = {
9994 + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
9995 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
9996 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
9997 + BPF_LD_MAP_FD(BPF_REG_1, 0),
9998 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
9999 + BPF_FUNC_map_lookup_elem),
10000 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
10001 + BPF_EXIT_INSN(),
10002 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe),
10003 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
10004 + BPF_JMP_A(0),
10005 + BPF_EXIT_INSN(),
10006 + },
10007 + .fixup_map1 = { 3 },
10008 + .errstr = "map_value pointer and 2147483646",
10009 + .result = REJECT
10010 + },
10011 + {
10012 + "bounds check map access with off+size signed 32bit overflow. test2",
10013 + .insns = {
10014 + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
10015 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
10016 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
10017 + BPF_LD_MAP_FD(BPF_REG_1, 0),
10018 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
10019 + BPF_FUNC_map_lookup_elem),
10020 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
10021 + BPF_EXIT_INSN(),
10022 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
10023 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
10024 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
10025 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
10026 + BPF_JMP_A(0),
10027 + BPF_EXIT_INSN(),
10028 + },
10029 + .fixup_map1 = { 3 },
10030 + .errstr = "pointer offset 1073741822",
10031 + .result = REJECT
10032 + },
10033 + {
10034 + "bounds check map access with off+size signed 32bit overflow. test3",
10035 + .insns = {
10036 + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
10037 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
10038 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
10039 + BPF_LD_MAP_FD(BPF_REG_1, 0),
10040 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
10041 + BPF_FUNC_map_lookup_elem),
10042 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
10043 + BPF_EXIT_INSN(),
10044 + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
10045 + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
10046 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
10047 + BPF_JMP_A(0),
10048 + BPF_EXIT_INSN(),
10049 + },
10050 + .fixup_map1 = { 3 },
10051 + .errstr = "pointer offset -1073741822",
10052 + .result = REJECT
10053 + },
10054 + {
10055 + "bounds check map access with off+size signed 32bit overflow. test4",
10056 + .insns = {
10057 + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
10058 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
10059 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
10060 + BPF_LD_MAP_FD(BPF_REG_1, 0),
10061 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
10062 + BPF_FUNC_map_lookup_elem),
10063 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
10064 + BPF_EXIT_INSN(),
10065 + BPF_MOV64_IMM(BPF_REG_1, 1000000),
10066 + BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000),
10067 + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
10068 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
10069 + BPF_JMP_A(0),
10070 + BPF_EXIT_INSN(),
10071 + },
10072 + .fixup_map1 = { 3 },
10073 + .errstr = "map_value pointer and 1000000000000",
10074 + .result = REJECT
10075 + },
10076 + {
10077 + "pointer/scalar confusion in state equality check (way 1)",
10078 + .insns = {
10079 + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
10080 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
10081 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
10082 + BPF_LD_MAP_FD(BPF_REG_1, 0),
10083 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
10084 + BPF_FUNC_map_lookup_elem),
10085 + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
10086 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
10087 + BPF_JMP_A(1),
10088 + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
10089 + BPF_JMP_A(0),
10090 + BPF_EXIT_INSN(),
10091 + },
10092 + .fixup_map1 = { 3 },
10093 + .result = ACCEPT,
10094 + .result_unpriv = REJECT,
10095 + .errstr_unpriv = "R0 leaks addr as return value"
10096 + },
10097 + {
10098 + "pointer/scalar confusion in state equality check (way 2)",
10099 + .insns = {
10100 + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
10101 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
10102 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
10103 + BPF_LD_MAP_FD(BPF_REG_1, 0),
10104 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
10105 + BPF_FUNC_map_lookup_elem),
10106 + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
10107 + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
10108 + BPF_JMP_A(1),
10109 + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
10110 + BPF_EXIT_INSN(),
10111 + },
10112 + .fixup_map1 = { 3 },
10113 + .result = ACCEPT,
10114 + .result_unpriv = REJECT,
10115 + .errstr_unpriv = "R0 leaks addr as return value"
10116 + },
10117 {
10118 "variable-offset ctx access",
10119 .insns = {
10120 @@ -6597,6 +7049,71 @@ static struct bpf_test tests[] = {
10121 .result = REJECT,
10122 .prog_type = BPF_PROG_TYPE_LWT_IN,
10123 },
10124 + {
10125 + "indirect variable-offset stack access",
10126 + .insns = {
10127 + /* Fill the top 8 bytes of the stack */
10128 + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
10129 + /* Get an unknown value */
10130 + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
10131 + /* Make it small and 4-byte aligned */
10132 + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
10133 + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
10134 + /* add it to fp. We now have either fp-4 or fp-8, but
10135 + * we don't know which
10136 + */
10137 + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
10138 + /* dereference it indirectly */
10139 + BPF_LD_MAP_FD(BPF_REG_1, 0),
10140 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
10141 + BPF_FUNC_map_lookup_elem),
10142 + BPF_MOV64_IMM(BPF_REG_0, 0),
10143 + BPF_EXIT_INSN(),
10144 + },
10145 + .fixup_map1 = { 5 },
10146 + .errstr = "variable stack read R2",
10147 + .result = REJECT,
10148 + .prog_type = BPF_PROG_TYPE_LWT_IN,
10149 + },
10150 + {
10151 + "direct stack access with 32-bit wraparound. test1",
10152 + .insns = {
10153 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
10154 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
10155 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
10156 + BPF_MOV32_IMM(BPF_REG_0, 0),
10157 + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
10158 + BPF_EXIT_INSN()
10159 + },
10160 + .errstr = "fp pointer and 2147483647",
10161 + .result = REJECT
10162 + },
10163 + {
10164 + "direct stack access with 32-bit wraparound. test2",
10165 + .insns = {
10166 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
10167 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
10168 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
10169 + BPF_MOV32_IMM(BPF_REG_0, 0),
10170 + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
10171 + BPF_EXIT_INSN()
10172 + },
10173 + .errstr = "fp pointer and 1073741823",
10174 + .result = REJECT
10175 + },
10176 + {
10177 + "direct stack access with 32-bit wraparound. test3",
10178 + .insns = {
10179 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
10180 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
10181 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
10182 + BPF_MOV32_IMM(BPF_REG_0, 0),
10183 + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
10184 + BPF_EXIT_INSN()
10185 + },
10186 + .errstr = "fp pointer offset 1073741822",
10187 + .result = REJECT
10188 + },
10189 {
10190 "liveness pruning and write screening",
10191 .insns = {
10192 diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
10193 index 2afc41a3730f..66e5ce5b91f0 100644
10194 --- a/tools/testing/selftests/x86/ldt_gdt.c
10195 +++ b/tools/testing/selftests/x86/ldt_gdt.c
10196 @@ -137,30 +137,51 @@ static void check_valid_segment(uint16_t index, int ldt,
10197 }
10198 }
10199
10200 -static bool install_valid_mode(const struct user_desc *desc, uint32_t ar,
10201 - bool oldmode)
10202 +static bool install_valid_mode(const struct user_desc *d, uint32_t ar,
10203 + bool oldmode, bool ldt)
10204 {
10205 - int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
10206 - desc, sizeof(*desc));
10207 - if (ret < -1)
10208 - errno = -ret;
10209 + struct user_desc desc = *d;
10210 + int ret;
10211 +
10212 + if (!ldt) {
10213 +#ifndef __i386__
10214 + /* No point testing set_thread_area in a 64-bit build */
10215 + return false;
10216 +#endif
10217 + if (!gdt_entry_num)
10218 + return false;
10219 + desc.entry_number = gdt_entry_num;
10220 +
10221 + ret = syscall(SYS_set_thread_area, &desc);
10222 + } else {
10223 + ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
10224 + &desc, sizeof(desc));
10225 +
10226 + if (ret < -1)
10227 + errno = -ret;
10228 +
10229 + if (ret != 0 && errno == ENOSYS) {
10230 + printf("[OK]\tmodify_ldt returned -ENOSYS\n");
10231 + return false;
10232 + }
10233 + }
10234 +
10235 if (ret == 0) {
10236 - uint32_t limit = desc->limit;
10237 - if (desc->limit_in_pages)
10238 + uint32_t limit = desc.limit;
10239 + if (desc.limit_in_pages)
10240 limit = (limit << 12) + 4095;
10241 - check_valid_segment(desc->entry_number, 1, ar, limit, true);
10242 + check_valid_segment(desc.entry_number, ldt, ar, limit, true);
10243 return true;
10244 - } else if (errno == ENOSYS) {
10245 - printf("[OK]\tmodify_ldt returned -ENOSYS\n");
10246 - return false;
10247 } else {
10248 - if (desc->seg_32bit) {
10249 - printf("[FAIL]\tUnexpected modify_ldt failure %d\n",
10250 + if (desc.seg_32bit) {
10251 + printf("[FAIL]\tUnexpected %s failure %d\n",
10252 + ldt ? "modify_ldt" : "set_thread_area",
10253 errno);
10254 nerrs++;
10255 return false;
10256 } else {
10257 - printf("[OK]\tmodify_ldt rejected 16 bit segment\n");
10258 + printf("[OK]\t%s rejected 16 bit segment\n",
10259 + ldt ? "modify_ldt" : "set_thread_area");
10260 return false;
10261 }
10262 }
10263 @@ -168,7 +189,15 @@ static bool install_valid_mode(const struct user_desc *desc, uint32_t ar,
10264
10265 static bool install_valid(const struct user_desc *desc, uint32_t ar)
10266 {
10267 - return install_valid_mode(desc, ar, false);
10268 + bool ret = install_valid_mode(desc, ar, false, true);
10269 +
10270 + if (desc->contents <= 1 && desc->seg_32bit &&
10271 + !desc->seg_not_present) {
10272 + /* Should work in the GDT, too. */
10273 + install_valid_mode(desc, ar, false, false);
10274 + }
10275 +
10276 + return ret;
10277 }
10278
10279 static void install_invalid(const struct user_desc *desc, bool oldmode)
10280 diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
10281 index 484e8820c382..2447d7c017e7 100644
10282 --- a/virt/kvm/kvm_main.c
10283 +++ b/virt/kvm/kvm_main.c
10284 @@ -4018,7 +4018,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
10285 if (!vcpu_align)
10286 vcpu_align = __alignof__(struct kvm_vcpu);
10287 kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align,
10288 - 0, NULL);
10289 + SLAB_ACCOUNT, NULL);
10290 if (!kvm_vcpu_cache) {
10291 r = -ENOMEM;
10292 goto out_free_3;