Contents of /trunk/kernel-alx/patches-4.14/0108-4.14.9-all-fixes.patch
Parent Directory | Revision Log
Revision 3238 -
(show annotations)
(download)
Fri Nov 9 12:14:58 2018 UTC (5 years, 10 months ago) by niro
File size: 356090 byte(s)
Fri Nov 9 12:14:58 2018 UTC (5 years, 10 months ago) by niro
File size: 356090 byte(s)
-added up to patches-4.14.79
1 | diff --git a/Documentation/x86/orc-unwinder.txt b/Documentation/x86/orc-unwinder.txt |
2 | index af0c9a4c65a6..cd4b29be29af 100644 |
3 | --- a/Documentation/x86/orc-unwinder.txt |
4 | +++ b/Documentation/x86/orc-unwinder.txt |
5 | @@ -4,7 +4,7 @@ ORC unwinder |
6 | Overview |
7 | -------- |
8 | |
9 | -The kernel CONFIG_ORC_UNWINDER option enables the ORC unwinder, which is |
10 | +The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is |
11 | similar in concept to a DWARF unwinder. The difference is that the |
12 | format of the ORC data is much simpler than DWARF, which in turn allows |
13 | the ORC unwinder to be much simpler and faster. |
14 | diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt |
15 | index b0798e281aa6..3448e675b462 100644 |
16 | --- a/Documentation/x86/x86_64/mm.txt |
17 | +++ b/Documentation/x86/x86_64/mm.txt |
18 | @@ -34,7 +34,7 @@ ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space |
19 | ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole |
20 | ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) |
21 | ... unused hole ... |
22 | -ffd8000000000000 - fff7ffffffffffff (=53 bits) kasan shadow memory (8PB) |
23 | +ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB) |
24 | ... unused hole ... |
25 | ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks |
26 | ... unused hole ... |
27 | diff --git a/Makefile b/Makefile |
28 | index 97b5ae76ac8c..ed2132c6d286 100644 |
29 | --- a/Makefile |
30 | +++ b/Makefile |
31 | @@ -1,7 +1,7 @@ |
32 | # SPDX-License-Identifier: GPL-2.0 |
33 | VERSION = 4 |
34 | PATCHLEVEL = 14 |
35 | -SUBLEVEL = 8 |
36 | +SUBLEVEL = 9 |
37 | EXTRAVERSION = |
38 | NAME = Petit Gorille |
39 | |
40 | @@ -935,8 +935,8 @@ ifdef CONFIG_STACK_VALIDATION |
41 | ifeq ($(has_libelf),1) |
42 | objtool_target := tools/objtool FORCE |
43 | else |
44 | - ifdef CONFIG_ORC_UNWINDER |
45 | - $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") |
46 | + ifdef CONFIG_UNWINDER_ORC |
47 | + $(error "Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") |
48 | else |
49 | $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") |
50 | endif |
51 | diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig |
52 | index 8c2a2619971b..f1d7834990ec 100644 |
53 | --- a/arch/arm/configs/exynos_defconfig |
54 | +++ b/arch/arm/configs/exynos_defconfig |
55 | @@ -244,7 +244,7 @@ CONFIG_USB_STORAGE_ONETOUCH=m |
56 | CONFIG_USB_STORAGE_KARMA=m |
57 | CONFIG_USB_STORAGE_CYPRESS_ATACB=m |
58 | CONFIG_USB_STORAGE_ENE_UB6250=m |
59 | -CONFIG_USB_UAS=m |
60 | +CONFIG_USB_UAS=y |
61 | CONFIG_USB_DWC3=y |
62 | CONFIG_USB_DWC2=y |
63 | CONFIG_USB_HSIC_USB3503=y |
64 | diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h |
65 | index e9c9a117bd25..c7cdbb43ae7c 100644 |
66 | --- a/arch/arm/include/asm/ptrace.h |
67 | +++ b/arch/arm/include/asm/ptrace.h |
68 | @@ -126,8 +126,7 @@ extern unsigned long profile_pc(struct pt_regs *regs); |
69 | /* |
70 | * kprobe-based event tracer support |
71 | */ |
72 | -#include <linux/stddef.h> |
73 | -#include <linux/types.h> |
74 | +#include <linux/compiler.h> |
75 | #define MAX_REG_OFFSET (offsetof(struct pt_regs, ARM_ORIG_r0)) |
76 | |
77 | extern int regs_query_register_offset(const char *name); |
78 | diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h |
79 | index caf86be815ba..4052ec39e8db 100644 |
80 | --- a/arch/arm64/include/asm/fixmap.h |
81 | +++ b/arch/arm64/include/asm/fixmap.h |
82 | @@ -51,6 +51,13 @@ enum fixed_addresses { |
83 | |
84 | FIX_EARLYCON_MEM_BASE, |
85 | FIX_TEXT_POKE0, |
86 | + |
87 | +#ifdef CONFIG_ACPI_APEI_GHES |
88 | + /* Used for GHES mapping from assorted contexts */ |
89 | + FIX_APEI_GHES_IRQ, |
90 | + FIX_APEI_GHES_NMI, |
91 | +#endif /* CONFIG_ACPI_APEI_GHES */ |
92 | + |
93 | __end_of_permanent_fixed_addresses, |
94 | |
95 | /* |
96 | diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c |
97 | index 57190f384f63..ce848ff84edd 100644 |
98 | --- a/arch/powerpc/kernel/watchdog.c |
99 | +++ b/arch/powerpc/kernel/watchdog.c |
100 | @@ -276,9 +276,12 @@ void arch_touch_nmi_watchdog(void) |
101 | { |
102 | unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000; |
103 | int cpu = smp_processor_id(); |
104 | + u64 tb = get_tb(); |
105 | |
106 | - if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks) |
107 | - watchdog_timer_interrupt(cpu); |
108 | + if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) { |
109 | + per_cpu(wd_timer_tb, cpu) = tb; |
110 | + wd_smp_clear_cpu_pending(cpu, tb); |
111 | + } |
112 | } |
113 | EXPORT_SYMBOL(arch_touch_nmi_watchdog); |
114 | |
115 | diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c |
116 | index a66e64b0b251..5d115bd32539 100644 |
117 | --- a/arch/powerpc/net/bpf_jit_comp64.c |
118 | +++ b/arch/powerpc/net/bpf_jit_comp64.c |
119 | @@ -762,7 +762,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, |
120 | func = (u8 *) __bpf_call_base + imm; |
121 | |
122 | /* Save skb pointer if we need to re-cache skb data */ |
123 | - if (bpf_helper_changes_pkt_data(func)) |
124 | + if ((ctx->seen & SEEN_SKB) && |
125 | + bpf_helper_changes_pkt_data(func)) |
126 | PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); |
127 | |
128 | bpf_jit_emit_func_call(image, ctx, (u64)func); |
129 | @@ -771,7 +772,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, |
130 | PPC_MR(b2p[BPF_REG_0], 3); |
131 | |
132 | /* refresh skb cache */ |
133 | - if (bpf_helper_changes_pkt_data(func)) { |
134 | + if ((ctx->seen & SEEN_SKB) && |
135 | + bpf_helper_changes_pkt_data(func)) { |
136 | /* reload skb pointer to r3 */ |
137 | PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); |
138 | bpf_jit_emit_skb_loads(image, ctx); |
139 | diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c |
140 | index c008083fbc4f..2c8b325591cc 100644 |
141 | --- a/arch/powerpc/xmon/xmon.c |
142 | +++ b/arch/powerpc/xmon/xmon.c |
143 | @@ -530,14 +530,19 @@ static int xmon_core(struct pt_regs *regs, int fromipi) |
144 | |
145 | waiting: |
146 | secondary = 1; |
147 | + spin_begin(); |
148 | while (secondary && !xmon_gate) { |
149 | if (in_xmon == 0) { |
150 | - if (fromipi) |
151 | + if (fromipi) { |
152 | + spin_end(); |
153 | goto leave; |
154 | + } |
155 | secondary = test_and_set_bit(0, &in_xmon); |
156 | } |
157 | - barrier(); |
158 | + spin_cpu_relax(); |
159 | + touch_nmi_watchdog(); |
160 | } |
161 | + spin_end(); |
162 | |
163 | if (!secondary && !xmon_gate) { |
164 | /* we are the first cpu to come in */ |
165 | @@ -568,21 +573,25 @@ static int xmon_core(struct pt_regs *regs, int fromipi) |
166 | mb(); |
167 | xmon_gate = 1; |
168 | barrier(); |
169 | + touch_nmi_watchdog(); |
170 | } |
171 | |
172 | cmdloop: |
173 | while (in_xmon) { |
174 | if (secondary) { |
175 | + spin_begin(); |
176 | if (cpu == xmon_owner) { |
177 | if (!test_and_set_bit(0, &xmon_taken)) { |
178 | secondary = 0; |
179 | + spin_end(); |
180 | continue; |
181 | } |
182 | /* missed it */ |
183 | while (cpu == xmon_owner) |
184 | - barrier(); |
185 | + spin_cpu_relax(); |
186 | } |
187 | - barrier(); |
188 | + spin_cpu_relax(); |
189 | + touch_nmi_watchdog(); |
190 | } else { |
191 | cmd = cmds(regs); |
192 | if (cmd != 0) { |
193 | diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c |
194 | index b15cd2f0320f..33e2785f6842 100644 |
195 | --- a/arch/s390/net/bpf_jit_comp.c |
196 | +++ b/arch/s390/net/bpf_jit_comp.c |
197 | @@ -55,8 +55,7 @@ struct bpf_jit { |
198 | #define SEEN_LITERAL 8 /* code uses literals */ |
199 | #define SEEN_FUNC 16 /* calls C functions */ |
200 | #define SEEN_TAIL_CALL 32 /* code uses tail calls */ |
201 | -#define SEEN_SKB_CHANGE 64 /* code changes skb data */ |
202 | -#define SEEN_REG_AX 128 /* code uses constant blinding */ |
203 | +#define SEEN_REG_AX 64 /* code uses constant blinding */ |
204 | #define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB) |
205 | |
206 | /* |
207 | @@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit) |
208 | EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, |
209 | REG_15, 152); |
210 | } |
211 | - if (jit->seen & SEEN_SKB) |
212 | + if (jit->seen & SEEN_SKB) { |
213 | emit_load_skb_data_hlen(jit); |
214 | - if (jit->seen & SEEN_SKB_CHANGE) |
215 | /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ |
216 | EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15, |
217 | STK_OFF_SKBP); |
218 | + } |
219 | } |
220 | |
221 | /* |
222 | @@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i |
223 | EMIT2(0x0d00, REG_14, REG_W1); |
224 | /* lgr %b0,%r2: load return value into %b0 */ |
225 | EMIT4(0xb9040000, BPF_REG_0, REG_2); |
226 | - if (bpf_helper_changes_pkt_data((void *)func)) { |
227 | - jit->seen |= SEEN_SKB_CHANGE; |
228 | + if ((jit->seen & SEEN_SKB) && |
229 | + bpf_helper_changes_pkt_data((void *)func)) { |
230 | /* lg %b1,ST_OFF_SKBP(%r15) */ |
231 | EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0, |
232 | REG_15, STK_OFF_SKBP); |
233 | diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h |
234 | index 6a339a78f4f4..71dd82b43cc5 100644 |
235 | --- a/arch/sparc/include/asm/ptrace.h |
236 | +++ b/arch/sparc/include/asm/ptrace.h |
237 | @@ -7,6 +7,7 @@ |
238 | #if defined(__sparc__) && defined(__arch64__) |
239 | #ifndef __ASSEMBLY__ |
240 | |
241 | +#include <linux/compiler.h> |
242 | #include <linux/threads.h> |
243 | #include <asm/switch_to.h> |
244 | |
245 | diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c |
246 | index 5765e7e711f7..ff5f9cb3039a 100644 |
247 | --- a/arch/sparc/net/bpf_jit_comp_64.c |
248 | +++ b/arch/sparc/net/bpf_jit_comp_64.c |
249 | @@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) |
250 | u8 *func = ((u8 *)__bpf_call_base) + imm; |
251 | |
252 | ctx->saw_call = true; |
253 | + if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func)) |
254 | + emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx); |
255 | |
256 | emit_call((u32 *)func, ctx); |
257 | emit_nop(ctx); |
258 | |
259 | emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); |
260 | |
261 | - if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind) |
262 | - load_skb_regs(ctx, bpf2sparc[BPF_REG_6]); |
263 | + if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func)) |
264 | + load_skb_regs(ctx, L7); |
265 | break; |
266 | } |
267 | |
268 | diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild |
269 | index 50a32c33d729..73c57f614c9e 100644 |
270 | --- a/arch/um/include/asm/Kbuild |
271 | +++ b/arch/um/include/asm/Kbuild |
272 | @@ -1,4 +1,5 @@ |
273 | generic-y += barrier.h |
274 | +generic-y += bpf_perf_event.h |
275 | generic-y += bug.h |
276 | generic-y += clkdev.h |
277 | generic-y += current.h |
278 | diff --git a/arch/um/include/shared/init.h b/arch/um/include/shared/init.h |
279 | index 390572daa40d..b3f5865a92c9 100644 |
280 | --- a/arch/um/include/shared/init.h |
281 | +++ b/arch/um/include/shared/init.h |
282 | @@ -41,7 +41,7 @@ |
283 | typedef int (*initcall_t)(void); |
284 | typedef void (*exitcall_t)(void); |
285 | |
286 | -#include <linux/compiler.h> |
287 | +#include <linux/compiler_types.h> |
288 | |
289 | /* These are for everybody (although not all archs will actually |
290 | discard it in modules) */ |
291 | diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig |
292 | index 9bceea6a5852..48646160eb83 100644 |
293 | --- a/arch/x86/Kconfig |
294 | +++ b/arch/x86/Kconfig |
295 | @@ -108,7 +108,7 @@ config X86 |
296 | select HAVE_ARCH_AUDITSYSCALL |
297 | select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE |
298 | select HAVE_ARCH_JUMP_LABEL |
299 | - select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP |
300 | + select HAVE_ARCH_KASAN if X86_64 |
301 | select HAVE_ARCH_KGDB |
302 | select HAVE_ARCH_KMEMCHECK |
303 | select HAVE_ARCH_MMAP_RND_BITS if MMU |
304 | @@ -171,7 +171,7 @@ config X86 |
305 | select HAVE_PERF_USER_STACK_DUMP |
306 | select HAVE_RCU_TABLE_FREE |
307 | select HAVE_REGS_AND_STACK_ACCESS_API |
308 | - select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION |
309 | + select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION |
310 | select HAVE_STACK_VALIDATION if X86_64 |
311 | select HAVE_SYSCALL_TRACEPOINTS |
312 | select HAVE_UNSTABLE_SCHED_CLOCK |
313 | @@ -303,7 +303,6 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC |
314 | config KASAN_SHADOW_OFFSET |
315 | hex |
316 | depends on KASAN |
317 | - default 0xdff8000000000000 if X86_5LEVEL |
318 | default 0xdffffc0000000000 |
319 | |
320 | config HAVE_INTEL_TXT |
321 | diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug |
322 | index 90b123056f4b..6293a8768a91 100644 |
323 | --- a/arch/x86/Kconfig.debug |
324 | +++ b/arch/x86/Kconfig.debug |
325 | @@ -359,28 +359,14 @@ config PUNIT_ATOM_DEBUG |
326 | |
327 | choice |
328 | prompt "Choose kernel unwinder" |
329 | - default FRAME_POINTER_UNWINDER |
330 | + default UNWINDER_ORC if X86_64 |
331 | + default UNWINDER_FRAME_POINTER if X86_32 |
332 | ---help--- |
333 | This determines which method will be used for unwinding kernel stack |
334 | traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack, |
335 | livepatch, lockdep, and more. |
336 | |
337 | -config FRAME_POINTER_UNWINDER |
338 | - bool "Frame pointer unwinder" |
339 | - select FRAME_POINTER |
340 | - ---help--- |
341 | - This option enables the frame pointer unwinder for unwinding kernel |
342 | - stack traces. |
343 | - |
344 | - The unwinder itself is fast and it uses less RAM than the ORC |
345 | - unwinder, but the kernel text size will grow by ~3% and the kernel's |
346 | - overall performance will degrade by roughly 5-10%. |
347 | - |
348 | - This option is recommended if you want to use the livepatch |
349 | - consistency model, as this is currently the only way to get a |
350 | - reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE). |
351 | - |
352 | -config ORC_UNWINDER |
353 | +config UNWINDER_ORC |
354 | bool "ORC unwinder" |
355 | depends on X86_64 |
356 | select STACK_VALIDATION |
357 | @@ -396,7 +382,22 @@ config ORC_UNWINDER |
358 | Enabling this option will increase the kernel's runtime memory usage |
359 | by roughly 2-4MB, depending on your kernel config. |
360 | |
361 | -config GUESS_UNWINDER |
362 | +config UNWINDER_FRAME_POINTER |
363 | + bool "Frame pointer unwinder" |
364 | + select FRAME_POINTER |
365 | + ---help--- |
366 | + This option enables the frame pointer unwinder for unwinding kernel |
367 | + stack traces. |
368 | + |
369 | + The unwinder itself is fast and it uses less RAM than the ORC |
370 | + unwinder, but the kernel text size will grow by ~3% and the kernel's |
371 | + overall performance will degrade by roughly 5-10%. |
372 | + |
373 | + This option is recommended if you want to use the livepatch |
374 | + consistency model, as this is currently the only way to get a |
375 | + reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE). |
376 | + |
377 | +config UNWINDER_GUESS |
378 | bool "Guess unwinder" |
379 | depends on EXPERT |
380 | ---help--- |
381 | @@ -411,7 +412,7 @@ config GUESS_UNWINDER |
382 | endchoice |
383 | |
384 | config FRAME_POINTER |
385 | - depends on !ORC_UNWINDER && !GUESS_UNWINDER |
386 | + depends on !UNWINDER_ORC && !UNWINDER_GUESS |
387 | bool |
388 | |
389 | endmenu |
390 | diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config |
391 | index 550cd5012b73..66c9e2aab16c 100644 |
392 | --- a/arch/x86/configs/tiny.config |
393 | +++ b/arch/x86/configs/tiny.config |
394 | @@ -1,5 +1,5 @@ |
395 | CONFIG_NOHIGHMEM=y |
396 | # CONFIG_HIGHMEM4G is not set |
397 | # CONFIG_HIGHMEM64G is not set |
398 | -CONFIG_GUESS_UNWINDER=y |
399 | -# CONFIG_FRAME_POINTER_UNWINDER is not set |
400 | +CONFIG_UNWINDER_GUESS=y |
401 | +# CONFIG_UNWINDER_FRAME_POINTER is not set |
402 | diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig |
403 | index 4a4b16e56d35..e32fc1f274d8 100644 |
404 | --- a/arch/x86/configs/x86_64_defconfig |
405 | +++ b/arch/x86/configs/x86_64_defconfig |
406 | @@ -299,6 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y |
407 | # CONFIG_DEBUG_RODATA_TEST is not set |
408 | CONFIG_DEBUG_BOOT_PARAMS=y |
409 | CONFIG_OPTIMIZE_INLINING=y |
410 | +CONFIG_UNWINDER_ORC=y |
411 | CONFIG_SECURITY=y |
412 | CONFIG_SECURITY_NETWORK=y |
413 | CONFIG_SECURITY_SELINUX=y |
414 | diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h |
415 | index 6e160031cfea..3fd8bc560fae 100644 |
416 | --- a/arch/x86/entry/calling.h |
417 | +++ b/arch/x86/entry/calling.h |
418 | @@ -142,56 +142,25 @@ For 32-bit we have the following conventions - kernel is built with |
419 | UNWIND_HINT_REGS offset=\offset |
420 | .endm |
421 | |
422 | - .macro RESTORE_EXTRA_REGS offset=0 |
423 | - movq 0*8+\offset(%rsp), %r15 |
424 | - movq 1*8+\offset(%rsp), %r14 |
425 | - movq 2*8+\offset(%rsp), %r13 |
426 | - movq 3*8+\offset(%rsp), %r12 |
427 | - movq 4*8+\offset(%rsp), %rbp |
428 | - movq 5*8+\offset(%rsp), %rbx |
429 | - UNWIND_HINT_REGS offset=\offset extra=0 |
430 | - .endm |
431 | - |
432 | - .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 |
433 | - .if \rstor_r11 |
434 | - movq 6*8(%rsp), %r11 |
435 | - .endif |
436 | - .if \rstor_r8910 |
437 | - movq 7*8(%rsp), %r10 |
438 | - movq 8*8(%rsp), %r9 |
439 | - movq 9*8(%rsp), %r8 |
440 | - .endif |
441 | - .if \rstor_rax |
442 | - movq 10*8(%rsp), %rax |
443 | - .endif |
444 | - .if \rstor_rcx |
445 | - movq 11*8(%rsp), %rcx |
446 | - .endif |
447 | - .if \rstor_rdx |
448 | - movq 12*8(%rsp), %rdx |
449 | - .endif |
450 | - movq 13*8(%rsp), %rsi |
451 | - movq 14*8(%rsp), %rdi |
452 | - UNWIND_HINT_IRET_REGS offset=16*8 |
453 | - .endm |
454 | - .macro RESTORE_C_REGS |
455 | - RESTORE_C_REGS_HELPER 1,1,1,1,1 |
456 | - .endm |
457 | - .macro RESTORE_C_REGS_EXCEPT_RAX |
458 | - RESTORE_C_REGS_HELPER 0,1,1,1,1 |
459 | - .endm |
460 | - .macro RESTORE_C_REGS_EXCEPT_RCX |
461 | - RESTORE_C_REGS_HELPER 1,0,1,1,1 |
462 | - .endm |
463 | - .macro RESTORE_C_REGS_EXCEPT_R11 |
464 | - RESTORE_C_REGS_HELPER 1,1,0,1,1 |
465 | - .endm |
466 | - .macro RESTORE_C_REGS_EXCEPT_RCX_R11 |
467 | - RESTORE_C_REGS_HELPER 1,0,0,1,1 |
468 | - .endm |
469 | - |
470 | - .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 |
471 | - subq $-(15*8+\addskip), %rsp |
472 | + .macro POP_EXTRA_REGS |
473 | + popq %r15 |
474 | + popq %r14 |
475 | + popq %r13 |
476 | + popq %r12 |
477 | + popq %rbp |
478 | + popq %rbx |
479 | + .endm |
480 | + |
481 | + .macro POP_C_REGS |
482 | + popq %r11 |
483 | + popq %r10 |
484 | + popq %r9 |
485 | + popq %r8 |
486 | + popq %rax |
487 | + popq %rcx |
488 | + popq %rdx |
489 | + popq %rsi |
490 | + popq %rdi |
491 | .endm |
492 | |
493 | .macro icebp |
494 | diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S |
495 | index 4838037f97f6..bd8b57a5c874 100644 |
496 | --- a/arch/x86/entry/entry_32.S |
497 | +++ b/arch/x86/entry/entry_32.S |
498 | @@ -941,7 +941,8 @@ ENTRY(debug) |
499 | movl %esp, %eax # pt_regs pointer |
500 | |
501 | /* Are we currently on the SYSENTER stack? */ |
502 | - PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) |
503 | + movl PER_CPU_VAR(cpu_entry_area), %ecx |
504 | + addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx |
505 | subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ |
506 | cmpl $SIZEOF_SYSENTER_stack, %ecx |
507 | jb .Ldebug_from_sysenter_stack |
508 | @@ -984,7 +985,8 @@ ENTRY(nmi) |
509 | movl %esp, %eax # pt_regs pointer |
510 | |
511 | /* Are we currently on the SYSENTER stack? */ |
512 | - PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) |
513 | + movl PER_CPU_VAR(cpu_entry_area), %ecx |
514 | + addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx |
515 | subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ |
516 | cmpl $SIZEOF_SYSENTER_stack, %ecx |
517 | jb .Lnmi_from_sysenter_stack |
518 | diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S |
519 | index 2e956afe272c..6abe3fcaece9 100644 |
520 | --- a/arch/x86/entry/entry_64.S |
521 | +++ b/arch/x86/entry/entry_64.S |
522 | @@ -136,6 +136,64 @@ END(native_usergs_sysret64) |
523 | * with them due to bugs in both AMD and Intel CPUs. |
524 | */ |
525 | |
526 | + .pushsection .entry_trampoline, "ax" |
527 | + |
528 | +/* |
529 | + * The code in here gets remapped into cpu_entry_area's trampoline. This means |
530 | + * that the assembler and linker have the wrong idea as to where this code |
531 | + * lives (and, in fact, it's mapped more than once, so it's not even at a |
532 | + * fixed address). So we can't reference any symbols outside the entry |
533 | + * trampoline and expect it to work. |
534 | + * |
535 | + * Instead, we carefully abuse %rip-relative addressing. |
536 | + * _entry_trampoline(%rip) refers to the start of the remapped) entry |
537 | + * trampoline. We can thus find cpu_entry_area with this macro: |
538 | + */ |
539 | + |
540 | +#define CPU_ENTRY_AREA \ |
541 | + _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip) |
542 | + |
543 | +/* The top word of the SYSENTER stack is hot and is usable as scratch space. */ |
544 | +#define RSP_SCRATCH CPU_ENTRY_AREA_SYSENTER_stack + \ |
545 | + SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA |
546 | + |
547 | +ENTRY(entry_SYSCALL_64_trampoline) |
548 | + UNWIND_HINT_EMPTY |
549 | + swapgs |
550 | + |
551 | + /* Stash the user RSP. */ |
552 | + movq %rsp, RSP_SCRATCH |
553 | + |
554 | + /* Load the top of the task stack into RSP */ |
555 | + movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp |
556 | + |
557 | + /* Start building the simulated IRET frame. */ |
558 | + pushq $__USER_DS /* pt_regs->ss */ |
559 | + pushq RSP_SCRATCH /* pt_regs->sp */ |
560 | + pushq %r11 /* pt_regs->flags */ |
561 | + pushq $__USER_CS /* pt_regs->cs */ |
562 | + pushq %rcx /* pt_regs->ip */ |
563 | + |
564 | + /* |
565 | + * x86 lacks a near absolute jump, and we can't jump to the real |
566 | + * entry text with a relative jump. We could push the target |
567 | + * address and then use retq, but this destroys the pipeline on |
568 | + * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead, |
569 | + * spill RDI and restore it in a second-stage trampoline. |
570 | + */ |
571 | + pushq %rdi |
572 | + movq $entry_SYSCALL_64_stage2, %rdi |
573 | + jmp *%rdi |
574 | +END(entry_SYSCALL_64_trampoline) |
575 | + |
576 | + .popsection |
577 | + |
578 | +ENTRY(entry_SYSCALL_64_stage2) |
579 | + UNWIND_HINT_EMPTY |
580 | + popq %rdi |
581 | + jmp entry_SYSCALL_64_after_hwframe |
582 | +END(entry_SYSCALL_64_stage2) |
583 | + |
584 | ENTRY(entry_SYSCALL_64) |
585 | UNWIND_HINT_EMPTY |
586 | /* |
587 | @@ -221,10 +279,9 @@ entry_SYSCALL_64_fastpath: |
588 | TRACE_IRQS_ON /* user mode is traced as IRQs on */ |
589 | movq RIP(%rsp), %rcx |
590 | movq EFLAGS(%rsp), %r11 |
591 | - RESTORE_C_REGS_EXCEPT_RCX_R11 |
592 | - movq RSP(%rsp), %rsp |
593 | + addq $6*8, %rsp /* skip extra regs -- they were preserved */ |
594 | UNWIND_HINT_EMPTY |
595 | - USERGS_SYSRET64 |
596 | + jmp .Lpop_c_regs_except_rcx_r11_and_sysret |
597 | |
598 | 1: |
599 | /* |
600 | @@ -246,17 +303,18 @@ entry_SYSCALL64_slow_path: |
601 | call do_syscall_64 /* returns with IRQs disabled */ |
602 | |
603 | return_from_SYSCALL_64: |
604 | - RESTORE_EXTRA_REGS |
605 | TRACE_IRQS_IRETQ /* we're about to change IF */ |
606 | |
607 | /* |
608 | * Try to use SYSRET instead of IRET if we're returning to |
609 | - * a completely clean 64-bit userspace context. |
610 | + * a completely clean 64-bit userspace context. If we're not, |
611 | + * go to the slow exit path. |
612 | */ |
613 | movq RCX(%rsp), %rcx |
614 | movq RIP(%rsp), %r11 |
615 | - cmpq %rcx, %r11 /* RCX == RIP */ |
616 | - jne opportunistic_sysret_failed |
617 | + |
618 | + cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */ |
619 | + jne swapgs_restore_regs_and_return_to_usermode |
620 | |
621 | /* |
622 | * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP |
623 | @@ -274,14 +332,14 @@ return_from_SYSCALL_64: |
624 | |
625 | /* If this changed %rcx, it was not canonical */ |
626 | cmpq %rcx, %r11 |
627 | - jne opportunistic_sysret_failed |
628 | + jne swapgs_restore_regs_and_return_to_usermode |
629 | |
630 | cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */ |
631 | - jne opportunistic_sysret_failed |
632 | + jne swapgs_restore_regs_and_return_to_usermode |
633 | |
634 | movq R11(%rsp), %r11 |
635 | cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */ |
636 | - jne opportunistic_sysret_failed |
637 | + jne swapgs_restore_regs_and_return_to_usermode |
638 | |
639 | /* |
640 | * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot |
641 | @@ -302,12 +360,12 @@ return_from_SYSCALL_64: |
642 | * would never get past 'stuck_here'. |
643 | */ |
644 | testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11 |
645 | - jnz opportunistic_sysret_failed |
646 | + jnz swapgs_restore_regs_and_return_to_usermode |
647 | |
648 | /* nothing to check for RSP */ |
649 | |
650 | cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */ |
651 | - jne opportunistic_sysret_failed |
652 | + jne swapgs_restore_regs_and_return_to_usermode |
653 | |
654 | /* |
655 | * We win! This label is here just for ease of understanding |
656 | @@ -315,14 +373,36 @@ return_from_SYSCALL_64: |
657 | */ |
658 | syscall_return_via_sysret: |
659 | /* rcx and r11 are already restored (see code above) */ |
660 | - RESTORE_C_REGS_EXCEPT_RCX_R11 |
661 | - movq RSP(%rsp), %rsp |
662 | UNWIND_HINT_EMPTY |
663 | - USERGS_SYSRET64 |
664 | + POP_EXTRA_REGS |
665 | +.Lpop_c_regs_except_rcx_r11_and_sysret: |
666 | + popq %rsi /* skip r11 */ |
667 | + popq %r10 |
668 | + popq %r9 |
669 | + popq %r8 |
670 | + popq %rax |
671 | + popq %rsi /* skip rcx */ |
672 | + popq %rdx |
673 | + popq %rsi |
674 | |
675 | -opportunistic_sysret_failed: |
676 | - SWAPGS |
677 | - jmp restore_c_regs_and_iret |
678 | + /* |
679 | + * Now all regs are restored except RSP and RDI. |
680 | + * Save old stack pointer and switch to trampoline stack. |
681 | + */ |
682 | + movq %rsp, %rdi |
683 | + movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp |
684 | + |
685 | + pushq RSP-RDI(%rdi) /* RSP */ |
686 | + pushq (%rdi) /* RDI */ |
687 | + |
688 | + /* |
689 | + * We are on the trampoline stack. All regs except RDI are live. |
690 | + * We can do future final exit work right here. |
691 | + */ |
692 | + |
693 | + popq %rdi |
694 | + popq %rsp |
695 | + USERGS_SYSRET64 |
696 | END(entry_SYSCALL_64) |
697 | |
698 | ENTRY(stub_ptregs_64) |
699 | @@ -423,8 +503,7 @@ ENTRY(ret_from_fork) |
700 | movq %rsp, %rdi |
701 | call syscall_return_slowpath /* returns with IRQs disabled */ |
702 | TRACE_IRQS_ON /* user mode is traced as IRQS on */ |
703 | - SWAPGS |
704 | - jmp restore_regs_and_iret |
705 | + jmp swapgs_restore_regs_and_return_to_usermode |
706 | |
707 | 1: |
708 | /* kernel thread */ |
709 | @@ -457,12 +536,13 @@ END(irq_entries_start) |
710 | |
711 | .macro DEBUG_ENTRY_ASSERT_IRQS_OFF |
712 | #ifdef CONFIG_DEBUG_ENTRY |
713 | - pushfq |
714 | - testl $X86_EFLAGS_IF, (%rsp) |
715 | + pushq %rax |
716 | + SAVE_FLAGS(CLBR_RAX) |
717 | + testl $X86_EFLAGS_IF, %eax |
718 | jz .Lokay_\@ |
719 | ud2 |
720 | .Lokay_\@: |
721 | - addq $8, %rsp |
722 | + popq %rax |
723 | #endif |
724 | .endm |
725 | |
726 | @@ -554,6 +634,13 @@ END(irq_entries_start) |
727 | /* 0(%rsp): ~(interrupt number) */ |
728 | .macro interrupt func |
729 | cld |
730 | + |
731 | + testb $3, CS-ORIG_RAX(%rsp) |
732 | + jz 1f |
733 | + SWAPGS |
734 | + call switch_to_thread_stack |
735 | +1: |
736 | + |
737 | ALLOC_PT_GPREGS_ON_STACK |
738 | SAVE_C_REGS |
739 | SAVE_EXTRA_REGS |
740 | @@ -563,12 +650,8 @@ END(irq_entries_start) |
741 | jz 1f |
742 | |
743 | /* |
744 | - * IRQ from user mode. Switch to kernel gsbase and inform context |
745 | - * tracking that we're in kernel mode. |
746 | - */ |
747 | - SWAPGS |
748 | - |
749 | - /* |
750 | + * IRQ from user mode. |
751 | + * |
752 | * We need to tell lockdep that IRQs are off. We can't do this until |
753 | * we fix gsbase, and we should do it before enter_from_user_mode |
754 | * (which can take locks). Since TRACE_IRQS_OFF idempotent, |
755 | @@ -612,8 +695,52 @@ GLOBAL(retint_user) |
756 | mov %rsp,%rdi |
757 | call prepare_exit_to_usermode |
758 | TRACE_IRQS_IRETQ |
759 | + |
760 | +GLOBAL(swapgs_restore_regs_and_return_to_usermode) |
761 | +#ifdef CONFIG_DEBUG_ENTRY |
762 | + /* Assert that pt_regs indicates user mode. */ |
763 | + testb $3, CS(%rsp) |
764 | + jnz 1f |
765 | + ud2 |
766 | +1: |
767 | +#endif |
768 | + POP_EXTRA_REGS |
769 | + popq %r11 |
770 | + popq %r10 |
771 | + popq %r9 |
772 | + popq %r8 |
773 | + popq %rax |
774 | + popq %rcx |
775 | + popq %rdx |
776 | + popq %rsi |
777 | + |
778 | + /* |
779 | + * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS. |
780 | + * Save old stack pointer and switch to trampoline stack. |
781 | + */ |
782 | + movq %rsp, %rdi |
783 | + movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp |
784 | + |
785 | + /* Copy the IRET frame to the trampoline stack. */ |
786 | + pushq 6*8(%rdi) /* SS */ |
787 | + pushq 5*8(%rdi) /* RSP */ |
788 | + pushq 4*8(%rdi) /* EFLAGS */ |
789 | + pushq 3*8(%rdi) /* CS */ |
790 | + pushq 2*8(%rdi) /* RIP */ |
791 | + |
792 | + /* Push user RDI on the trampoline stack. */ |
793 | + pushq (%rdi) |
794 | + |
795 | + /* |
796 | + * We are on the trampoline stack. All regs except RDI are live. |
797 | + * We can do future final exit work right here. |
798 | + */ |
799 | + |
800 | + /* Restore RDI. */ |
801 | + popq %rdi |
802 | SWAPGS |
803 | - jmp restore_regs_and_iret |
804 | + INTERRUPT_RETURN |
805 | + |
806 | |
807 | /* Returning to kernel space */ |
808 | retint_kernel: |
809 | @@ -633,15 +760,17 @@ retint_kernel: |
810 | */ |
811 | TRACE_IRQS_IRETQ |
812 | |
813 | -/* |
814 | - * At this label, code paths which return to kernel and to user, |
815 | - * which come from interrupts/exception and from syscalls, merge. |
816 | - */ |
817 | -GLOBAL(restore_regs_and_iret) |
818 | - RESTORE_EXTRA_REGS |
819 | -restore_c_regs_and_iret: |
820 | - RESTORE_C_REGS |
821 | - REMOVE_PT_GPREGS_FROM_STACK 8 |
822 | +GLOBAL(restore_regs_and_return_to_kernel) |
823 | +#ifdef CONFIG_DEBUG_ENTRY |
824 | + /* Assert that pt_regs indicates kernel mode. */ |
825 | + testb $3, CS(%rsp) |
826 | + jz 1f |
827 | + ud2 |
828 | +1: |
829 | +#endif |
830 | + POP_EXTRA_REGS |
831 | + POP_C_REGS |
832 | + addq $8, %rsp /* skip regs->orig_ax */ |
833 | INTERRUPT_RETURN |
834 | |
835 | ENTRY(native_iret) |
836 | @@ -805,7 +934,33 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt |
837 | /* |
838 | * Exception entry points. |
839 | */ |
840 | -#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8) |
841 | +#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) |
842 | + |
843 | +/* |
844 | + * Switch to the thread stack. This is called with the IRET frame and |
845 | + * orig_ax on the stack. (That is, RDI..R12 are not on the stack and |
846 | + * space has not been allocated for them.) |
847 | + */ |
848 | +ENTRY(switch_to_thread_stack) |
849 | + UNWIND_HINT_FUNC |
850 | + |
851 | + pushq %rdi |
852 | + movq %rsp, %rdi |
853 | + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp |
854 | + UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI |
855 | + |
856 | + pushq 7*8(%rdi) /* regs->ss */ |
857 | + pushq 6*8(%rdi) /* regs->rsp */ |
858 | + pushq 5*8(%rdi) /* regs->eflags */ |
859 | + pushq 4*8(%rdi) /* regs->cs */ |
860 | + pushq 3*8(%rdi) /* regs->ip */ |
861 | + pushq 2*8(%rdi) /* regs->orig_ax */ |
862 | + pushq 8(%rdi) /* return address */ |
863 | + UNWIND_HINT_FUNC |
864 | + |
865 | + movq (%rdi), %rdi |
866 | + ret |
867 | +END(switch_to_thread_stack) |
868 | |
869 | .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 |
870 | ENTRY(\sym) |
871 | @@ -818,17 +973,18 @@ ENTRY(\sym) |
872 | |
873 | ASM_CLAC |
874 | |
875 | - .ifeq \has_error_code |
876 | + .if \has_error_code == 0 |
877 | pushq $-1 /* ORIG_RAX: no syscall to restart */ |
878 | .endif |
879 | |
880 | ALLOC_PT_GPREGS_ON_STACK |
881 | |
882 | - .if \paranoid |
883 | - .if \paranoid == 1 |
884 | + .if \paranoid < 2 |
885 | testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ |
886 | - jnz 1f |
887 | + jnz .Lfrom_usermode_switch_stack_\@ |
888 | .endif |
889 | + |
890 | + .if \paranoid |
891 | call paranoid_entry |
892 | .else |
893 | call error_entry |
894 | @@ -870,20 +1026,15 @@ ENTRY(\sym) |
895 | jmp error_exit |
896 | .endif |
897 | |
898 | - .if \paranoid == 1 |
899 | + .if \paranoid < 2 |
900 | /* |
901 | - * Paranoid entry from userspace. Switch stacks and treat it |
902 | + * Entry from userspace. Switch stacks and treat it |
903 | * as a normal entry. This means that paranoid handlers |
904 | * run in real process context if user_mode(regs). |
905 | */ |
906 | -1: |
907 | +.Lfrom_usermode_switch_stack_\@: |
908 | call error_entry |
909 | |
910 | - |
911 | - movq %rsp, %rdi /* pt_regs pointer */ |
912 | - call sync_regs |
913 | - movq %rax, %rsp /* switch stack */ |
914 | - |
915 | movq %rsp, %rdi /* pt_regs pointer */ |
916 | |
917 | .if \has_error_code |
918 | @@ -1059,6 +1210,7 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK |
919 | idtentry stack_segment do_stack_segment has_error_code=1 |
920 | |
921 | #ifdef CONFIG_XEN |
922 | +idtentry xennmi do_nmi has_error_code=0 |
923 | idtentry xendebug do_debug has_error_code=0 |
924 | idtentry xenint3 do_int3 has_error_code=0 |
925 | #endif |
926 | @@ -1112,17 +1264,14 @@ ENTRY(paranoid_exit) |
927 | DISABLE_INTERRUPTS(CLBR_ANY) |
928 | TRACE_IRQS_OFF_DEBUG |
929 | testl %ebx, %ebx /* swapgs needed? */ |
930 | - jnz paranoid_exit_no_swapgs |
931 | + jnz .Lparanoid_exit_no_swapgs |
932 | TRACE_IRQS_IRETQ |
933 | SWAPGS_UNSAFE_STACK |
934 | - jmp paranoid_exit_restore |
935 | -paranoid_exit_no_swapgs: |
936 | + jmp .Lparanoid_exit_restore |
937 | +.Lparanoid_exit_no_swapgs: |
938 | TRACE_IRQS_IRETQ_DEBUG |
939 | -paranoid_exit_restore: |
940 | - RESTORE_EXTRA_REGS |
941 | - RESTORE_C_REGS |
942 | - REMOVE_PT_GPREGS_FROM_STACK 8 |
943 | - INTERRUPT_RETURN |
944 | +.Lparanoid_exit_restore: |
945 | + jmp restore_regs_and_return_to_kernel |
946 | END(paranoid_exit) |
947 | |
948 | /* |
949 | @@ -1146,6 +1295,14 @@ ENTRY(error_entry) |
950 | SWAPGS |
951 | |
952 | .Lerror_entry_from_usermode_after_swapgs: |
953 | + /* Put us onto the real thread stack. */ |
954 | + popq %r12 /* save return addr in %12 */ |
955 | + movq %rsp, %rdi /* arg0 = pt_regs pointer */ |
956 | + call sync_regs |
957 | + movq %rax, %rsp /* switch stack */ |
958 | + ENCODE_FRAME_POINTER |
959 | + pushq %r12 |
960 | + |
961 | /* |
962 | * We need to tell lockdep that IRQs are off. We can't do this until |
963 | * we fix gsbase, and we should do it before enter_from_user_mode |
964 | @@ -1223,10 +1380,13 @@ ENTRY(error_exit) |
965 | jmp retint_user |
966 | END(error_exit) |
967 | |
968 | -/* Runs on exception stack */ |
969 | -/* XXX: broken on Xen PV */ |
970 | +/* |
971 | + * Runs on exception stack. Xen PV does not go through this path at all, |
972 | + * so we can use real assembly here. |
973 | + */ |
974 | ENTRY(nmi) |
975 | UNWIND_HINT_IRET_REGS |
976 | + |
977 | /* |
978 | * We allow breakpoints in NMIs. If a breakpoint occurs, then |
979 | * the iretq it performs will take us out of NMI context. |
980 | @@ -1284,7 +1444,7 @@ ENTRY(nmi) |
981 | * stacks lest we corrupt the "NMI executing" variable. |
982 | */ |
983 | |
984 | - SWAPGS_UNSAFE_STACK |
985 | + swapgs |
986 | cld |
987 | movq %rsp, %rdx |
988 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp |
989 | @@ -1328,8 +1488,7 @@ ENTRY(nmi) |
990 | * Return back to user mode. We must *not* do the normal exit |
991 | * work, because we don't want to enable interrupts. |
992 | */ |
993 | - SWAPGS |
994 | - jmp restore_regs_and_iret |
995 | + jmp swapgs_restore_regs_and_return_to_usermode |
996 | |
997 | .Lnmi_from_kernel: |
998 | /* |
999 | @@ -1450,7 +1609,7 @@ nested_nmi_out: |
1000 | popq %rdx |
1001 | |
1002 | /* We are returning to kernel mode, so this cannot result in a fault. */ |
1003 | - INTERRUPT_RETURN |
1004 | + iretq |
1005 | |
1006 | first_nmi: |
1007 | /* Restore rdx. */ |
1008 | @@ -1481,7 +1640,7 @@ first_nmi: |
1009 | pushfq /* RFLAGS */ |
1010 | pushq $__KERNEL_CS /* CS */ |
1011 | pushq $1f /* RIP */ |
1012 | - INTERRUPT_RETURN /* continues at repeat_nmi below */ |
1013 | + iretq /* continues at repeat_nmi below */ |
1014 | UNWIND_HINT_IRET_REGS |
1015 | 1: |
1016 | #endif |
1017 | @@ -1544,29 +1703,34 @@ end_repeat_nmi: |
1018 | nmi_swapgs: |
1019 | SWAPGS_UNSAFE_STACK |
1020 | nmi_restore: |
1021 | - RESTORE_EXTRA_REGS |
1022 | - RESTORE_C_REGS |
1023 | + POP_EXTRA_REGS |
1024 | + POP_C_REGS |
1025 | |
1026 | - /* Point RSP at the "iret" frame. */ |
1027 | - REMOVE_PT_GPREGS_FROM_STACK 6*8 |
1028 | + /* |
1029 | + * Skip orig_ax and the "outermost" frame to point RSP at the "iret" |
1030 | + * at the "iret" frame. |
1031 | + */ |
1032 | + addq $6*8, %rsp |
1033 | |
1034 | /* |
1035 | * Clear "NMI executing". Set DF first so that we can easily |
1036 | * distinguish the remaining code between here and IRET from |
1037 | - * the SYSCALL entry and exit paths. On a native kernel, we |
1038 | - * could just inspect RIP, but, on paravirt kernels, |
1039 | - * INTERRUPT_RETURN can translate into a jump into a |
1040 | - * hypercall page. |
1041 | + * the SYSCALL entry and exit paths. |
1042 | + * |
1043 | + * We arguably should just inspect RIP instead, but I (Andy) wrote |
1044 | + * this code when I had the misapprehension that Xen PV supported |
1045 | + * NMIs, and Xen PV would break that approach. |
1046 | */ |
1047 | std |
1048 | movq $0, 5*8(%rsp) /* clear "NMI executing" */ |
1049 | |
1050 | /* |
1051 | - * INTERRUPT_RETURN reads the "iret" frame and exits the NMI |
1052 | - * stack in a single instruction. We are returning to kernel |
1053 | - * mode, so this cannot result in a fault. |
1054 | + * iretq reads the "iret" frame and exits the NMI stack in a |
1055 | + * single instruction. We are returning to kernel mode, so this |
1056 | + * cannot result in a fault. Similarly, we don't need to worry |
1057 | + * about espfix64 on the way back to kernel mode. |
1058 | */ |
1059 | - INTERRUPT_RETURN |
1060 | + iretq |
1061 | END(nmi) |
1062 | |
1063 | ENTRY(ignore_sysret) |
1064 | diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S |
1065 | index b5c7a56ed256..95ad40eb7eff 100644 |
1066 | --- a/arch/x86/entry/entry_64_compat.S |
1067 | +++ b/arch/x86/entry/entry_64_compat.S |
1068 | @@ -48,7 +48,7 @@ |
1069 | */ |
1070 | ENTRY(entry_SYSENTER_compat) |
1071 | /* Interrupts are off on entry. */ |
1072 | - SWAPGS_UNSAFE_STACK |
1073 | + SWAPGS |
1074 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp |
1075 | |
1076 | /* |
1077 | @@ -306,8 +306,11 @@ ENTRY(entry_INT80_compat) |
1078 | */ |
1079 | movl %eax, %eax |
1080 | |
1081 | - /* Construct struct pt_regs on stack (iret frame is already on stack) */ |
1082 | pushq %rax /* pt_regs->orig_ax */ |
1083 | + |
1084 | + /* switch to thread stack expects orig_ax to be pushed */ |
1085 | + call switch_to_thread_stack |
1086 | + |
1087 | pushq %rdi /* pt_regs->di */ |
1088 | pushq %rsi /* pt_regs->si */ |
1089 | pushq %rdx /* pt_regs->dx */ |
1090 | @@ -337,8 +340,7 @@ ENTRY(entry_INT80_compat) |
1091 | |
1092 | /* Go back to user mode. */ |
1093 | TRACE_IRQS_ON |
1094 | - SWAPGS |
1095 | - jmp restore_regs_and_iret |
1096 | + jmp swapgs_restore_regs_and_return_to_usermode |
1097 | END(entry_INT80_compat) |
1098 | |
1099 | ENTRY(stub32_clone) |
1100 | diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile |
1101 | index 331f1dca5085..6fb9b57ed5ba 100644 |
1102 | --- a/arch/x86/entry/syscalls/Makefile |
1103 | +++ b/arch/x86/entry/syscalls/Makefile |
1104 | @@ -1,6 +1,6 @@ |
1105 | # SPDX-License-Identifier: GPL-2.0 |
1106 | -out := $(obj)/../../include/generated/asm |
1107 | -uapi := $(obj)/../../include/generated/uapi/asm |
1108 | +out := arch/$(SRCARCH)/include/generated/asm |
1109 | +uapi := arch/$(SRCARCH)/include/generated/uapi/asm |
1110 | |
1111 | # Create output directory if not already present |
1112 | _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \ |
1113 | diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c |
1114 | index 80534d3c2480..589af1eec7c1 100644 |
1115 | --- a/arch/x86/events/core.c |
1116 | +++ b/arch/x86/events/core.c |
1117 | @@ -2371,7 +2371,7 @@ static unsigned long get_segment_base(unsigned int segment) |
1118 | struct ldt_struct *ldt; |
1119 | |
1120 | /* IRQs are off, so this synchronizes with smp_store_release */ |
1121 | - ldt = lockless_dereference(current->active_mm->context.ldt); |
1122 | + ldt = READ_ONCE(current->active_mm->context.ldt); |
1123 | if (!ldt || idx >= ldt->nr_entries) |
1124 | return 0; |
1125 | |
1126 | diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c |
1127 | index f94855000d4e..09c26a4f139c 100644 |
1128 | --- a/arch/x86/events/intel/core.c |
1129 | +++ b/arch/x86/events/intel/core.c |
1130 | @@ -2958,6 +2958,10 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event) |
1131 | |
1132 | if (event->attr.use_clockid) |
1133 | flags &= ~PERF_SAMPLE_TIME; |
1134 | + if (!event->attr.exclude_kernel) |
1135 | + flags &= ~PERF_SAMPLE_REGS_USER; |
1136 | + if (event->attr.sample_regs_user & ~PEBS_REGS) |
1137 | + flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR); |
1138 | return flags; |
1139 | } |
1140 | |
1141 | diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h |
1142 | index 4196f81ec0e1..f7aaadf9331f 100644 |
1143 | --- a/arch/x86/events/perf_event.h |
1144 | +++ b/arch/x86/events/perf_event.h |
1145 | @@ -85,13 +85,15 @@ struct amd_nb { |
1146 | * Flags PEBS can handle without an PMI. |
1147 | * |
1148 | * TID can only be handled by flushing at context switch. |
1149 | + * REGS_USER can be handled for events limited to ring 3. |
1150 | * |
1151 | */ |
1152 | #define PEBS_FREERUNNING_FLAGS \ |
1153 | (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \ |
1154 | PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \ |
1155 | PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ |
1156 | - PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR) |
1157 | + PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ |
1158 | + PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER) |
1159 | |
1160 | /* |
1161 | * A debug store configuration. |
1162 | @@ -110,6 +112,26 @@ struct debug_store { |
1163 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; |
1164 | }; |
1165 | |
1166 | +#define PEBS_REGS \ |
1167 | + (PERF_REG_X86_AX | \ |
1168 | + PERF_REG_X86_BX | \ |
1169 | + PERF_REG_X86_CX | \ |
1170 | + PERF_REG_X86_DX | \ |
1171 | + PERF_REG_X86_DI | \ |
1172 | + PERF_REG_X86_SI | \ |
1173 | + PERF_REG_X86_SP | \ |
1174 | + PERF_REG_X86_BP | \ |
1175 | + PERF_REG_X86_IP | \ |
1176 | + PERF_REG_X86_FLAGS | \ |
1177 | + PERF_REG_X86_R8 | \ |
1178 | + PERF_REG_X86_R9 | \ |
1179 | + PERF_REG_X86_R10 | \ |
1180 | + PERF_REG_X86_R11 | \ |
1181 | + PERF_REG_X86_R12 | \ |
1182 | + PERF_REG_X86_R13 | \ |
1183 | + PERF_REG_X86_R14 | \ |
1184 | + PERF_REG_X86_R15) |
1185 | + |
1186 | /* |
1187 | * Per register state. |
1188 | */ |
1189 | diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c |
1190 | index a5db63f728a2..a0b86cf486e0 100644 |
1191 | --- a/arch/x86/hyperv/hv_init.c |
1192 | +++ b/arch/x86/hyperv/hv_init.c |
1193 | @@ -113,7 +113,7 @@ void hyperv_init(void) |
1194 | u64 guest_id; |
1195 | union hv_x64_msr_hypercall_contents hypercall_msr; |
1196 | |
1197 | - if (x86_hyper != &x86_hyper_ms_hyperv) |
1198 | + if (x86_hyper_type != X86_HYPER_MS_HYPERV) |
1199 | return; |
1200 | |
1201 | /* Allocate percpu VP index */ |
1202 | diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h |
1203 | index 5b0579abb398..3ac991d81e74 100644 |
1204 | --- a/arch/x86/include/asm/archrandom.h |
1205 | +++ b/arch/x86/include/asm/archrandom.h |
1206 | @@ -45,7 +45,7 @@ static inline bool rdrand_long(unsigned long *v) |
1207 | bool ok; |
1208 | unsigned int retry = RDRAND_RETRY_LOOPS; |
1209 | do { |
1210 | - asm volatile(RDRAND_LONG "\n\t" |
1211 | + asm volatile(RDRAND_LONG |
1212 | CC_SET(c) |
1213 | : CC_OUT(c) (ok), "=a" (*v)); |
1214 | if (ok) |
1215 | @@ -59,7 +59,7 @@ static inline bool rdrand_int(unsigned int *v) |
1216 | bool ok; |
1217 | unsigned int retry = RDRAND_RETRY_LOOPS; |
1218 | do { |
1219 | - asm volatile(RDRAND_INT "\n\t" |
1220 | + asm volatile(RDRAND_INT |
1221 | CC_SET(c) |
1222 | : CC_OUT(c) (ok), "=a" (*v)); |
1223 | if (ok) |
1224 | @@ -71,7 +71,7 @@ static inline bool rdrand_int(unsigned int *v) |
1225 | static inline bool rdseed_long(unsigned long *v) |
1226 | { |
1227 | bool ok; |
1228 | - asm volatile(RDSEED_LONG "\n\t" |
1229 | + asm volatile(RDSEED_LONG |
1230 | CC_SET(c) |
1231 | : CC_OUT(c) (ok), "=a" (*v)); |
1232 | return ok; |
1233 | @@ -80,7 +80,7 @@ static inline bool rdseed_long(unsigned long *v) |
1234 | static inline bool rdseed_int(unsigned int *v) |
1235 | { |
1236 | bool ok; |
1237 | - asm volatile(RDSEED_INT "\n\t" |
1238 | + asm volatile(RDSEED_INT |
1239 | CC_SET(c) |
1240 | : CC_OUT(c) (ok), "=a" (*v)); |
1241 | return ok; |
1242 | diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h |
1243 | index 2bcf47314959..3fa039855b8f 100644 |
1244 | --- a/arch/x86/include/asm/bitops.h |
1245 | +++ b/arch/x86/include/asm/bitops.h |
1246 | @@ -143,7 +143,7 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) |
1247 | static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) |
1248 | { |
1249 | bool negative; |
1250 | - asm volatile(LOCK_PREFIX "andb %2,%1\n\t" |
1251 | + asm volatile(LOCK_PREFIX "andb %2,%1" |
1252 | CC_SET(s) |
1253 | : CC_OUT(s) (negative), ADDR |
1254 | : "ir" ((char) ~(1 << nr)) : "memory"); |
1255 | @@ -246,7 +246,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long * |
1256 | { |
1257 | bool oldbit; |
1258 | |
1259 | - asm("bts %2,%1\n\t" |
1260 | + asm("bts %2,%1" |
1261 | CC_SET(c) |
1262 | : CC_OUT(c) (oldbit), ADDR |
1263 | : "Ir" (nr)); |
1264 | @@ -286,7 +286,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long |
1265 | { |
1266 | bool oldbit; |
1267 | |
1268 | - asm volatile("btr %2,%1\n\t" |
1269 | + asm volatile("btr %2,%1" |
1270 | CC_SET(c) |
1271 | : CC_OUT(c) (oldbit), ADDR |
1272 | : "Ir" (nr)); |
1273 | @@ -298,7 +298,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon |
1274 | { |
1275 | bool oldbit; |
1276 | |
1277 | - asm volatile("btc %2,%1\n\t" |
1278 | + asm volatile("btc %2,%1" |
1279 | CC_SET(c) |
1280 | : CC_OUT(c) (oldbit), ADDR |
1281 | : "Ir" (nr) : "memory"); |
1282 | @@ -329,7 +329,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l |
1283 | { |
1284 | bool oldbit; |
1285 | |
1286 | - asm volatile("bt %2,%1\n\t" |
1287 | + asm volatile("bt %2,%1" |
1288 | CC_SET(c) |
1289 | : CC_OUT(c) (oldbit) |
1290 | : "m" (*(unsigned long *)addr), "Ir" (nr)); |
1291 | diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h |
1292 | index 70bc1df580b2..2cbd75dd2fd3 100644 |
1293 | --- a/arch/x86/include/asm/compat.h |
1294 | +++ b/arch/x86/include/asm/compat.h |
1295 | @@ -7,6 +7,7 @@ |
1296 | */ |
1297 | #include <linux/types.h> |
1298 | #include <linux/sched.h> |
1299 | +#include <linux/sched/task_stack.h> |
1300 | #include <asm/processor.h> |
1301 | #include <asm/user32.h> |
1302 | #include <asm/unistd.h> |
1303 | diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h |
1304 | index 0dfa68438e80..ea9a7dde62e5 100644 |
1305 | --- a/arch/x86/include/asm/cpufeature.h |
1306 | +++ b/arch/x86/include/asm/cpufeature.h |
1307 | @@ -126,16 +126,17 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; |
1308 | #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) |
1309 | |
1310 | #define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) |
1311 | -#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability)) |
1312 | -#define setup_clear_cpu_cap(bit) do { \ |
1313 | - clear_cpu_cap(&boot_cpu_data, bit); \ |
1314 | - set_bit(bit, (unsigned long *)cpu_caps_cleared); \ |
1315 | -} while (0) |
1316 | + |
1317 | +extern void setup_clear_cpu_cap(unsigned int bit); |
1318 | +extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); |
1319 | + |
1320 | #define setup_force_cpu_cap(bit) do { \ |
1321 | set_cpu_cap(&boot_cpu_data, bit); \ |
1322 | set_bit(bit, (unsigned long *)cpu_caps_set); \ |
1323 | } while (0) |
1324 | |
1325 | +#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) |
1326 | + |
1327 | #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) |
1328 | /* |
1329 | * Static testing of CPU features. Used the same as boot_cpu_has(). |
1330 | diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h |
1331 | index 793690fbda36..800104c8a3ed 100644 |
1332 | --- a/arch/x86/include/asm/cpufeatures.h |
1333 | +++ b/arch/x86/include/asm/cpufeatures.h |
1334 | @@ -13,173 +13,176 @@ |
1335 | /* |
1336 | * Defines x86 CPU feature bits |
1337 | */ |
1338 | -#define NCAPINTS 18 /* N 32-bit words worth of info */ |
1339 | -#define NBUGINTS 1 /* N 32-bit bug flags */ |
1340 | +#define NCAPINTS 18 /* N 32-bit words worth of info */ |
1341 | +#define NBUGINTS 1 /* N 32-bit bug flags */ |
1342 | |
1343 | /* |
1344 | * Note: If the comment begins with a quoted string, that string is used |
1345 | * in /proc/cpuinfo instead of the macro name. If the string is "", |
1346 | * this feature bit is not displayed in /proc/cpuinfo at all. |
1347 | + * |
1348 | + * When adding new features here that depend on other features, |
1349 | + * please update the table in kernel/cpu/cpuid-deps.c as well. |
1350 | */ |
1351 | |
1352 | -/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ |
1353 | -#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ |
1354 | -#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ |
1355 | -#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ |
1356 | -#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ |
1357 | -#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ |
1358 | -#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ |
1359 | -#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ |
1360 | -#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ |
1361 | -#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ |
1362 | -#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ |
1363 | -#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ |
1364 | -#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ |
1365 | -#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ |
1366 | -#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ |
1367 | -#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ |
1368 | - /* (plus FCMOVcc, FCOMI with FPU) */ |
1369 | -#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ |
1370 | -#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ |
1371 | -#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ |
1372 | -#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ |
1373 | -#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ |
1374 | -#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ |
1375 | -#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ |
1376 | -#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ |
1377 | -#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ |
1378 | -#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ |
1379 | -#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ |
1380 | -#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ |
1381 | -#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ |
1382 | -#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ |
1383 | -#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ |
1384 | +/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */ |
1385 | +#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ |
1386 | +#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ |
1387 | +#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ |
1388 | +#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ |
1389 | +#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ |
1390 | +#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ |
1391 | +#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ |
1392 | +#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ |
1393 | +#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ |
1394 | +#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ |
1395 | +#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ |
1396 | +#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ |
1397 | +#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ |
1398 | +#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ |
1399 | +#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ |
1400 | +#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ |
1401 | +#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ |
1402 | +#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ |
1403 | +#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ |
1404 | +#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ |
1405 | +#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ |
1406 | +#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ |
1407 | +#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ |
1408 | +#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ |
1409 | +#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ |
1410 | +#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ |
1411 | +#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ |
1412 | +#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ |
1413 | +#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ |
1414 | +#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ |
1415 | |
1416 | /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ |
1417 | /* Don't duplicate feature flags which are redundant with Intel! */ |
1418 | -#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ |
1419 | -#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ |
1420 | -#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ |
1421 | -#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ |
1422 | -#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ |
1423 | -#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ |
1424 | -#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ |
1425 | -#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ |
1426 | -#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ |
1427 | -#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ |
1428 | +#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ |
1429 | +#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */ |
1430 | +#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ |
1431 | +#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ |
1432 | +#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ |
1433 | +#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ |
1434 | +#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ |
1435 | +#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */ |
1436 | +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */ |
1437 | +#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */ |
1438 | |
1439 | /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ |
1440 | -#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ |
1441 | -#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ |
1442 | -#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ |
1443 | +#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ |
1444 | +#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ |
1445 | +#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ |
1446 | |
1447 | /* Other features, Linux-defined mapping, word 3 */ |
1448 | /* This range is used for feature bits which conflict or are synthesized */ |
1449 | -#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ |
1450 | -#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ |
1451 | -#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ |
1452 | -#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ |
1453 | -/* cpu types for specific tunings: */ |
1454 | -#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ |
1455 | -#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ |
1456 | -#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ |
1457 | -#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ |
1458 | -#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ |
1459 | -#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ |
1460 | -#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */ |
1461 | -#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ |
1462 | -#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ |
1463 | -#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ |
1464 | -#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ |
1465 | -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ |
1466 | -#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ |
1467 | -#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ |
1468 | -#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ |
1469 | -#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ |
1470 | -#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ |
1471 | -#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ |
1472 | -#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ |
1473 | -#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ |
1474 | -#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ |
1475 | -#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ |
1476 | -#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ |
1477 | -#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ |
1478 | -#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ |
1479 | -#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ |
1480 | -#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ |
1481 | +#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ |
1482 | +#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ |
1483 | +#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ |
1484 | +#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ |
1485 | + |
1486 | +/* CPU types for specific tunings: */ |
1487 | +#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ |
1488 | +#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ |
1489 | +#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ |
1490 | +#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ |
1491 | +#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ |
1492 | +#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */ |
1493 | +#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */ |
1494 | +#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ |
1495 | +#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ |
1496 | +#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ |
1497 | +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ |
1498 | +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ |
1499 | +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ |
1500 | +#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" MFENCE synchronizes RDTSC */ |
1501 | +#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ |
1502 | +#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ |
1503 | +#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ |
1504 | +#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ |
1505 | +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */ |
1506 | +#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ |
1507 | +#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ |
1508 | +#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ |
1509 | +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ |
1510 | +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ |
1511 | +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ |
1512 | +#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ |
1513 | +#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ |
1514 | |
1515 | -/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ |
1516 | -#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ |
1517 | -#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ |
1518 | -#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ |
1519 | -#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ |
1520 | -#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ |
1521 | -#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ |
1522 | -#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ |
1523 | -#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ |
1524 | -#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ |
1525 | -#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ |
1526 | -#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ |
1527 | -#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ |
1528 | -#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ |
1529 | -#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ |
1530 | -#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ |
1531 | -#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ |
1532 | -#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ |
1533 | -#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ |
1534 | -#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ |
1535 | -#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ |
1536 | -#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ |
1537 | -#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ |
1538 | -#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ |
1539 | -#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */ |
1540 | -#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ |
1541 | -#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ |
1542 | -#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ |
1543 | -#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ |
1544 | -#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ |
1545 | -#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ |
1546 | -#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ |
1547 | +/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */ |
1548 | +#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ |
1549 | +#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ |
1550 | +#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ |
1551 | +#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */ |
1552 | +#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */ |
1553 | +#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ |
1554 | +#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */ |
1555 | +#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ |
1556 | +#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ |
1557 | +#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ |
1558 | +#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ |
1559 | +#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ |
1560 | +#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ |
1561 | +#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */ |
1562 | +#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ |
1563 | +#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */ |
1564 | +#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ |
1565 | +#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ |
1566 | +#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ |
1567 | +#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ |
1568 | +#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */ |
1569 | +#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ |
1570 | +#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ |
1571 | +#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */ |
1572 | +#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ |
1573 | +#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */ |
1574 | +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */ |
1575 | +#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ |
1576 | +#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */ |
1577 | +#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */ |
1578 | +#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ |
1579 | |
1580 | /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ |
1581 | -#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ |
1582 | -#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ |
1583 | -#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ |
1584 | -#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ |
1585 | -#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ |
1586 | -#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ |
1587 | -#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ |
1588 | -#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ |
1589 | -#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ |
1590 | -#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ |
1591 | +#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ |
1592 | +#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ |
1593 | +#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ |
1594 | +#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ |
1595 | +#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ |
1596 | +#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ |
1597 | +#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ |
1598 | +#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ |
1599 | +#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ |
1600 | +#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ |
1601 | |
1602 | -/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ |
1603 | -#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ |
1604 | -#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ |
1605 | -#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ |
1606 | -#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ |
1607 | -#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ |
1608 | -#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ |
1609 | -#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ |
1610 | -#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ |
1611 | -#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ |
1612 | -#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ |
1613 | -#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ |
1614 | -#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ |
1615 | -#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ |
1616 | -#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ |
1617 | -#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ |
1618 | -#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ |
1619 | -#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ |
1620 | -#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ |
1621 | -#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ |
1622 | -#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ |
1623 | -#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ |
1624 | -#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ |
1625 | -#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ |
1626 | -#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */ |
1627 | -#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ |
1628 | -#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ |
1629 | +/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */ |
1630 | +#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ |
1631 | +#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ |
1632 | +#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */ |
1633 | +#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ |
1634 | +#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ |
1635 | +#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ |
1636 | +#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ |
1637 | +#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ |
1638 | +#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ |
1639 | +#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ |
1640 | +#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ |
1641 | +#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ |
1642 | +#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ |
1643 | +#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ |
1644 | +#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ |
1645 | +#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ |
1646 | +#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */ |
1647 | +#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ |
1648 | +#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */ |
1649 | +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */ |
1650 | +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */ |
1651 | +#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ |
1652 | +#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */ |
1653 | +#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */ |
1654 | +#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ |
1655 | +#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */ |
1656 | |
1657 | /* |
1658 | * Auxiliary flags: Linux defined - For features scattered in various |
1659 | @@ -187,146 +190,155 @@ |
1660 | * |
1661 | * Reuse free bits when adding new feature flags! |
1662 | */ |
1663 | -#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ |
1664 | -#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ |
1665 | -#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ |
1666 | -#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ |
1667 | -#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ |
1668 | -#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ |
1669 | -#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ |
1670 | +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */ |
1671 | +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ |
1672 | +#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ |
1673 | +#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ |
1674 | +#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ |
1675 | +#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ |
1676 | +#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ |
1677 | |
1678 | -#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ |
1679 | -#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ |
1680 | -#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ |
1681 | +#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ |
1682 | +#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ |
1683 | +#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ |
1684 | |
1685 | -#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ |
1686 | -#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ |
1687 | -#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ |
1688 | -#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ |
1689 | +#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ |
1690 | +#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ |
1691 | +#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ |
1692 | +#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ |
1693 | |
1694 | -#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ |
1695 | +#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ |
1696 | |
1697 | /* Virtualization flags: Linux defined, word 8 */ |
1698 | -#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ |
1699 | -#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ |
1700 | -#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ |
1701 | -#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ |
1702 | -#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ |
1703 | +#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ |
1704 | +#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ |
1705 | +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ |
1706 | +#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ |
1707 | +#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ |
1708 | |
1709 | -#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ |
1710 | -#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ |
1711 | +#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ |
1712 | +#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ |
1713 | |
1714 | |
1715 | -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ |
1716 | -#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ |
1717 | -#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ |
1718 | -#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ |
1719 | -#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ |
1720 | -#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ |
1721 | -#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ |
1722 | -#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ |
1723 | -#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ |
1724 | -#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ |
1725 | -#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ |
1726 | -#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ |
1727 | -#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ |
1728 | -#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ |
1729 | -#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ |
1730 | -#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ |
1731 | -#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ |
1732 | -#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ |
1733 | -#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ |
1734 | -#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ |
1735 | -#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ |
1736 | -#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ |
1737 | -#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ |
1738 | -#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ |
1739 | -#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ |
1740 | -#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ |
1741 | -#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ |
1742 | -#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ |
1743 | +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ |
1744 | +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ |
1745 | +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */ |
1746 | +#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ |
1747 | +#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ |
1748 | +#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ |
1749 | +#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ |
1750 | +#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ |
1751 | +#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */ |
1752 | +#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ |
1753 | +#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ |
1754 | +#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ |
1755 | +#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ |
1756 | +#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ |
1757 | +#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ |
1758 | +#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ |
1759 | +#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */ |
1760 | +#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */ |
1761 | +#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ |
1762 | +#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ |
1763 | +#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ |
1764 | +#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ |
1765 | +#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ |
1766 | +#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ |
1767 | +#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ |
1768 | +#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ |
1769 | +#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ |
1770 | +#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ |
1771 | |
1772 | -/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ |
1773 | -#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ |
1774 | -#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ |
1775 | -#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ |
1776 | -#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ |
1777 | +/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */ |
1778 | +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */ |
1779 | +#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ |
1780 | +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ |
1781 | +#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ |
1782 | |
1783 | -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ |
1784 | -#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ |
1785 | +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */ |
1786 | +#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ |
1787 | |
1788 | -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ |
1789 | -#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ |
1790 | -#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ |
1791 | -#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ |
1792 | +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */ |
1793 | +#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */ |
1794 | +#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ |
1795 | +#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ |
1796 | |
1797 | -/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ |
1798 | -#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ |
1799 | -#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ |
1800 | +/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ |
1801 | +#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ |
1802 | +#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ |
1803 | +#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ |
1804 | |
1805 | -/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ |
1806 | -#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ |
1807 | -#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ |
1808 | -#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ |
1809 | -#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ |
1810 | -#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ |
1811 | -#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ |
1812 | -#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ |
1813 | -#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ |
1814 | -#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ |
1815 | -#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ |
1816 | +/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ |
1817 | +#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ |
1818 | +#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ |
1819 | +#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ |
1820 | +#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ |
1821 | +#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ |
1822 | +#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ |
1823 | +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ |
1824 | +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ |
1825 | +#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ |
1826 | +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ |
1827 | |
1828 | -/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ |
1829 | -#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ |
1830 | -#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ |
1831 | -#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ |
1832 | -#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ |
1833 | -#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ |
1834 | -#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ |
1835 | -#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ |
1836 | -#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ |
1837 | -#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ |
1838 | -#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ |
1839 | -#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ |
1840 | -#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ |
1841 | -#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ |
1842 | +/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ |
1843 | +#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ |
1844 | +#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ |
1845 | +#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ |
1846 | +#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ |
1847 | +#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ |
1848 | +#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ |
1849 | +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ |
1850 | +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ |
1851 | +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ |
1852 | +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ |
1853 | +#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ |
1854 | +#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ |
1855 | +#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ |
1856 | |
1857 | -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ |
1858 | -#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ |
1859 | -#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ |
1860 | -#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ |
1861 | -#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ |
1862 | -#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ |
1863 | -#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ |
1864 | +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ |
1865 | +#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ |
1866 | +#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */ |
1867 | +#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ |
1868 | +#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ |
1869 | +#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ |
1870 | +#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ |
1871 | +#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ |
1872 | +#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ |
1873 | +#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ |
1874 | +#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ |
1875 | +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ |
1876 | +#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ |
1877 | +#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ |
1878 | |
1879 | -/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ |
1880 | -#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ |
1881 | -#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ |
1882 | -#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ |
1883 | +/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ |
1884 | +#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ |
1885 | +#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ |
1886 | +#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ |
1887 | |
1888 | /* |
1889 | * BUG word(s) |
1890 | */ |
1891 | -#define X86_BUG(x) (NCAPINTS*32 + (x)) |
1892 | +#define X86_BUG(x) (NCAPINTS*32 + (x)) |
1893 | |
1894 | -#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ |
1895 | -#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ |
1896 | -#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ |
1897 | -#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ |
1898 | -#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ |
1899 | -#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ |
1900 | -#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ |
1901 | -#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ |
1902 | -#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ |
1903 | +#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ |
1904 | +#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ |
1905 | +#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ |
1906 | +#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ |
1907 | +#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ |
1908 | +#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ |
1909 | +#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ |
1910 | +#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ |
1911 | +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ |
1912 | #ifdef CONFIG_X86_32 |
1913 | /* |
1914 | * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional |
1915 | * to avoid confusion. |
1916 | */ |
1917 | -#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ |
1918 | +#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ |
1919 | #endif |
1920 | -#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ |
1921 | -#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ |
1922 | -#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ |
1923 | -#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ |
1924 | +#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ |
1925 | +#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ |
1926 | +#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ |
1927 | +#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ |
1928 | + |
1929 | #endif /* _ASM_X86_CPUFEATURES_H */ |
1930 | diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h |
1931 | index 0a3e808b9123..2ace1f90d138 100644 |
1932 | --- a/arch/x86/include/asm/desc.h |
1933 | +++ b/arch/x86/include/asm/desc.h |
1934 | @@ -60,17 +60,10 @@ static inline struct desc_struct *get_current_gdt_rw(void) |
1935 | return this_cpu_ptr(&gdt_page)->gdt; |
1936 | } |
1937 | |
1938 | -/* Get the fixmap index for a specific processor */ |
1939 | -static inline unsigned int get_cpu_gdt_ro_index(int cpu) |
1940 | -{ |
1941 | - return FIX_GDT_REMAP_BEGIN + cpu; |
1942 | -} |
1943 | - |
1944 | /* Provide the fixmap address of the remapped GDT */ |
1945 | static inline struct desc_struct *get_cpu_gdt_ro(int cpu) |
1946 | { |
1947 | - unsigned int idx = get_cpu_gdt_ro_index(cpu); |
1948 | - return (struct desc_struct *)__fix_to_virt(idx); |
1949 | + return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt; |
1950 | } |
1951 | |
1952 | /* Provide the current read-only GDT */ |
1953 | @@ -185,7 +178,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, |
1954 | #endif |
1955 | } |
1956 | |
1957 | -static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr) |
1958 | +static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr) |
1959 | { |
1960 | struct desc_struct *d = get_cpu_gdt_rw(cpu); |
1961 | tss_desc tss; |
1962 | diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h |
1963 | index dcd9fb55e679..94fc4fa14127 100644 |
1964 | --- a/arch/x86/include/asm/fixmap.h |
1965 | +++ b/arch/x86/include/asm/fixmap.h |
1966 | @@ -44,6 +44,45 @@ extern unsigned long __FIXADDR_TOP; |
1967 | PAGE_SIZE) |
1968 | #endif |
1969 | |
1970 | +/* |
1971 | + * cpu_entry_area is a percpu region in the fixmap that contains things |
1972 | + * needed by the CPU and early entry/exit code. Real types aren't used |
1973 | + * for all fields here to avoid circular header dependencies. |
1974 | + * |
1975 | + * Every field is a virtual alias of some other allocated backing store. |
1976 | + * There is no direct allocation of a struct cpu_entry_area. |
1977 | + */ |
1978 | +struct cpu_entry_area { |
1979 | + char gdt[PAGE_SIZE]; |
1980 | + |
1981 | + /* |
1982 | + * The GDT is just below SYSENTER_stack and thus serves (on x86_64) as |
1983 | + * a a read-only guard page. |
1984 | + */ |
1985 | + struct SYSENTER_stack_page SYSENTER_stack_page; |
1986 | + |
1987 | + /* |
1988 | + * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because |
1989 | + * we need task switches to work, and task switches write to the TSS. |
1990 | + */ |
1991 | + struct tss_struct tss; |
1992 | + |
1993 | + char entry_trampoline[PAGE_SIZE]; |
1994 | + |
1995 | +#ifdef CONFIG_X86_64 |
1996 | + /* |
1997 | + * Exception stacks used for IST entries. |
1998 | + * |
1999 | + * In the future, this should have a separate slot for each stack |
2000 | + * with guard pages between them. |
2001 | + */ |
2002 | + char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; |
2003 | +#endif |
2004 | +}; |
2005 | + |
2006 | +#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE) |
2007 | + |
2008 | +extern void setup_cpu_entry_areas(void); |
2009 | |
2010 | /* |
2011 | * Here we define all the compile-time 'special' virtual |
2012 | @@ -101,8 +140,14 @@ enum fixed_addresses { |
2013 | FIX_LNW_VRTC, |
2014 | #endif |
2015 | /* Fixmap entries to remap the GDTs, one per processor. */ |
2016 | - FIX_GDT_REMAP_BEGIN, |
2017 | - FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1, |
2018 | + FIX_CPU_ENTRY_AREA_TOP, |
2019 | + FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1, |
2020 | + |
2021 | +#ifdef CONFIG_ACPI_APEI_GHES |
2022 | + /* Used for GHES mapping from assorted contexts */ |
2023 | + FIX_APEI_GHES_IRQ, |
2024 | + FIX_APEI_GHES_NMI, |
2025 | +#endif |
2026 | |
2027 | __end_of_permanent_fixed_addresses, |
2028 | |
2029 | @@ -185,5 +230,30 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr, |
2030 | void __early_set_fixmap(enum fixed_addresses idx, |
2031 | phys_addr_t phys, pgprot_t flags); |
2032 | |
2033 | +static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page) |
2034 | +{ |
2035 | + BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); |
2036 | + |
2037 | + return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page; |
2038 | +} |
2039 | + |
2040 | +#define __get_cpu_entry_area_offset_index(cpu, offset) ({ \ |
2041 | + BUILD_BUG_ON(offset % PAGE_SIZE != 0); \ |
2042 | + __get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE); \ |
2043 | + }) |
2044 | + |
2045 | +#define get_cpu_entry_area_index(cpu, field) \ |
2046 | + __get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field)) |
2047 | + |
2048 | +static inline struct cpu_entry_area *get_cpu_entry_area(int cpu) |
2049 | +{ |
2050 | + return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0)); |
2051 | +} |
2052 | + |
2053 | +static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu) |
2054 | +{ |
2055 | + return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack; |
2056 | +} |
2057 | + |
2058 | #endif /* !__ASSEMBLY__ */ |
2059 | #endif /* _ASM_X86_FIXMAP_H */ |
2060 | diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h |
2061 | index 0ead9dbb9130..96aa6b9884dc 100644 |
2062 | --- a/arch/x86/include/asm/hypervisor.h |
2063 | +++ b/arch/x86/include/asm/hypervisor.h |
2064 | @@ -20,14 +20,22 @@ |
2065 | #ifndef _ASM_X86_HYPERVISOR_H |
2066 | #define _ASM_X86_HYPERVISOR_H |
2067 | |
2068 | +/* x86 hypervisor types */ |
2069 | +enum x86_hypervisor_type { |
2070 | + X86_HYPER_NATIVE = 0, |
2071 | + X86_HYPER_VMWARE, |
2072 | + X86_HYPER_MS_HYPERV, |
2073 | + X86_HYPER_XEN_PV, |
2074 | + X86_HYPER_XEN_HVM, |
2075 | + X86_HYPER_KVM, |
2076 | +}; |
2077 | + |
2078 | #ifdef CONFIG_HYPERVISOR_GUEST |
2079 | |
2080 | #include <asm/kvm_para.h> |
2081 | +#include <asm/x86_init.h> |
2082 | #include <asm/xen/hypervisor.h> |
2083 | |
2084 | -/* |
2085 | - * x86 hypervisor information |
2086 | - */ |
2087 | struct hypervisor_x86 { |
2088 | /* Hypervisor name */ |
2089 | const char *name; |
2090 | @@ -35,40 +43,27 @@ struct hypervisor_x86 { |
2091 | /* Detection routine */ |
2092 | uint32_t (*detect)(void); |
2093 | |
2094 | - /* Platform setup (run once per boot) */ |
2095 | - void (*init_platform)(void); |
2096 | - |
2097 | - /* X2APIC detection (run once per boot) */ |
2098 | - bool (*x2apic_available)(void); |
2099 | + /* Hypervisor type */ |
2100 | + enum x86_hypervisor_type type; |
2101 | |
2102 | - /* pin current vcpu to specified physical cpu (run rarely) */ |
2103 | - void (*pin_vcpu)(int); |
2104 | + /* init time callbacks */ |
2105 | + struct x86_hyper_init init; |
2106 | |
2107 | - /* called during init_mem_mapping() to setup early mappings. */ |
2108 | - void (*init_mem_mapping)(void); |
2109 | + /* runtime callbacks */ |
2110 | + struct x86_hyper_runtime runtime; |
2111 | }; |
2112 | |
2113 | -extern const struct hypervisor_x86 *x86_hyper; |
2114 | - |
2115 | -/* Recognized hypervisors */ |
2116 | -extern const struct hypervisor_x86 x86_hyper_vmware; |
2117 | -extern const struct hypervisor_x86 x86_hyper_ms_hyperv; |
2118 | -extern const struct hypervisor_x86 x86_hyper_xen_pv; |
2119 | -extern const struct hypervisor_x86 x86_hyper_xen_hvm; |
2120 | -extern const struct hypervisor_x86 x86_hyper_kvm; |
2121 | - |
2122 | +extern enum x86_hypervisor_type x86_hyper_type; |
2123 | extern void init_hypervisor_platform(void); |
2124 | -extern bool hypervisor_x2apic_available(void); |
2125 | -extern void hypervisor_pin_vcpu(int cpu); |
2126 | - |
2127 | -static inline void hypervisor_init_mem_mapping(void) |
2128 | +static inline bool hypervisor_is_type(enum x86_hypervisor_type type) |
2129 | { |
2130 | - if (x86_hyper && x86_hyper->init_mem_mapping) |
2131 | - x86_hyper->init_mem_mapping(); |
2132 | + return x86_hyper_type == type; |
2133 | } |
2134 | #else |
2135 | static inline void init_hypervisor_platform(void) { } |
2136 | -static inline bool hypervisor_x2apic_available(void) { return false; } |
2137 | -static inline void hypervisor_init_mem_mapping(void) { } |
2138 | +static inline bool hypervisor_is_type(enum x86_hypervisor_type type) |
2139 | +{ |
2140 | + return type == X86_HYPER_NATIVE; |
2141 | +} |
2142 | #endif /* CONFIG_HYPERVISOR_GUEST */ |
2143 | #endif /* _ASM_X86_HYPERVISOR_H */ |
2144 | diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h |
2145 | index c8ef23f2c28f..89f08955fff7 100644 |
2146 | --- a/arch/x86/include/asm/irqflags.h |
2147 | +++ b/arch/x86/include/asm/irqflags.h |
2148 | @@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void) |
2149 | swapgs; \ |
2150 | sysretl |
2151 | |
2152 | +#ifdef CONFIG_DEBUG_ENTRY |
2153 | +#define SAVE_FLAGS(x) pushfq; popq %rax |
2154 | +#endif |
2155 | #else |
2156 | #define INTERRUPT_RETURN iret |
2157 | #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit |
2158 | diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h |
2159 | index f86a8caa561e..395c9631e000 100644 |
2160 | --- a/arch/x86/include/asm/kdebug.h |
2161 | +++ b/arch/x86/include/asm/kdebug.h |
2162 | @@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long); |
2163 | extern int __must_check __die(const char *, struct pt_regs *, long); |
2164 | extern void show_stack_regs(struct pt_regs *regs); |
2165 | extern void __show_regs(struct pt_regs *regs, int all); |
2166 | +extern void show_iret_regs(struct pt_regs *regs); |
2167 | extern unsigned long oops_begin(void); |
2168 | extern void oops_end(unsigned long, struct pt_regs *, int signr); |
2169 | |
2170 | diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h |
2171 | index 6699fc441644..6d16d15d09a0 100644 |
2172 | --- a/arch/x86/include/asm/mmu_context.h |
2173 | +++ b/arch/x86/include/asm/mmu_context.h |
2174 | @@ -73,8 +73,8 @@ static inline void load_mm_ldt(struct mm_struct *mm) |
2175 | #ifdef CONFIG_MODIFY_LDT_SYSCALL |
2176 | struct ldt_struct *ldt; |
2177 | |
2178 | - /* lockless_dereference synchronizes with smp_store_release */ |
2179 | - ldt = lockless_dereference(mm->context.ldt); |
2180 | + /* READ_ONCE synchronizes with smp_store_release */ |
2181 | + ldt = READ_ONCE(mm->context.ldt); |
2182 | |
2183 | /* |
2184 | * Any change to mm->context.ldt is followed by an IPI to all |
2185 | diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h |
2186 | index 8546fafa21a9..7948a17febb4 100644 |
2187 | --- a/arch/x86/include/asm/module.h |
2188 | +++ b/arch/x86/include/asm/module.h |
2189 | @@ -6,7 +6,7 @@ |
2190 | #include <asm/orc_types.h> |
2191 | |
2192 | struct mod_arch_specific { |
2193 | -#ifdef CONFIG_ORC_UNWINDER |
2194 | +#ifdef CONFIG_UNWINDER_ORC |
2195 | unsigned int num_orcs; |
2196 | int *orc_unwind_ip; |
2197 | struct orc_entry *orc_unwind; |
2198 | diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h |
2199 | index fd81228e8037..892df375b615 100644 |
2200 | --- a/arch/x86/include/asm/paravirt.h |
2201 | +++ b/arch/x86/include/asm/paravirt.h |
2202 | @@ -16,10 +16,9 @@ |
2203 | #include <linux/cpumask.h> |
2204 | #include <asm/frame.h> |
2205 | |
2206 | -static inline void load_sp0(struct tss_struct *tss, |
2207 | - struct thread_struct *thread) |
2208 | +static inline void load_sp0(unsigned long sp0) |
2209 | { |
2210 | - PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread); |
2211 | + PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0); |
2212 | } |
2213 | |
2214 | /* The paravirtualized CPUID instruction. */ |
2215 | @@ -928,6 +927,15 @@ extern void default_banner(void); |
2216 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ |
2217 | CLBR_NONE, \ |
2218 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) |
2219 | + |
2220 | +#ifdef CONFIG_DEBUG_ENTRY |
2221 | +#define SAVE_FLAGS(clobbers) \ |
2222 | + PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \ |
2223 | + PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ |
2224 | + call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \ |
2225 | + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) |
2226 | +#endif |
2227 | + |
2228 | #endif /* CONFIG_X86_32 */ |
2229 | |
2230 | #endif /* __ASSEMBLY__ */ |
2231 | diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h |
2232 | index 10cc3b9709fe..6ec54d01972d 100644 |
2233 | --- a/arch/x86/include/asm/paravirt_types.h |
2234 | +++ b/arch/x86/include/asm/paravirt_types.h |
2235 | @@ -134,7 +134,7 @@ struct pv_cpu_ops { |
2236 | void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); |
2237 | void (*free_ldt)(struct desc_struct *ldt, unsigned entries); |
2238 | |
2239 | - void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); |
2240 | + void (*load_sp0)(unsigned long sp0); |
2241 | |
2242 | void (*set_iopl_mask)(unsigned mask); |
2243 | |
2244 | diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h |
2245 | index 377f1ffd18be..ba3c523aaf16 100644 |
2246 | --- a/arch/x86/include/asm/percpu.h |
2247 | +++ b/arch/x86/include/asm/percpu.h |
2248 | @@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr, |
2249 | { |
2250 | bool oldbit; |
2251 | |
2252 | - asm volatile("bt "__percpu_arg(2)",%1\n\t" |
2253 | + asm volatile("bt "__percpu_arg(2)",%1" |
2254 | CC_SET(c) |
2255 | : CC_OUT(c) (oldbit) |
2256 | : "m" (*(unsigned long __percpu *)addr), "Ir" (nr)); |
2257 | diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h |
2258 | index 59df7b47a434..9e9b05fc4860 100644 |
2259 | --- a/arch/x86/include/asm/pgtable_types.h |
2260 | +++ b/arch/x86/include/asm/pgtable_types.h |
2261 | @@ -200,10 +200,9 @@ enum page_cache_mode { |
2262 | |
2263 | #define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) |
2264 | |
2265 | -#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ |
2266 | - _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_ENC) |
2267 | #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ |
2268 | _PAGE_DIRTY | _PAGE_ENC) |
2269 | +#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER) |
2270 | |
2271 | #define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC) |
2272 | #define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC) |
2273 | diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h |
2274 | index bdac19ab2488..da943411d3d8 100644 |
2275 | --- a/arch/x86/include/asm/processor.h |
2276 | +++ b/arch/x86/include/asm/processor.h |
2277 | @@ -162,9 +162,9 @@ enum cpuid_regs_idx { |
2278 | extern struct cpuinfo_x86 boot_cpu_data; |
2279 | extern struct cpuinfo_x86 new_cpu_data; |
2280 | |
2281 | -extern struct tss_struct doublefault_tss; |
2282 | -extern __u32 cpu_caps_cleared[NCAPINTS]; |
2283 | -extern __u32 cpu_caps_set[NCAPINTS]; |
2284 | +extern struct x86_hw_tss doublefault_tss; |
2285 | +extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; |
2286 | +extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; |
2287 | |
2288 | #ifdef CONFIG_SMP |
2289 | DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); |
2290 | @@ -252,6 +252,11 @@ static inline void load_cr3(pgd_t *pgdir) |
2291 | write_cr3(__sme_pa(pgdir)); |
2292 | } |
2293 | |
2294 | +/* |
2295 | + * Note that while the legacy 'TSS' name comes from 'Task State Segment', |
2296 | + * on modern x86 CPUs the TSS also holds information important to 64-bit mode, |
2297 | + * unrelated to the task-switch mechanism: |
2298 | + */ |
2299 | #ifdef CONFIG_X86_32 |
2300 | /* This is the TSS defined by the hardware. */ |
2301 | struct x86_hw_tss { |
2302 | @@ -304,7 +309,13 @@ struct x86_hw_tss { |
2303 | struct x86_hw_tss { |
2304 | u32 reserved1; |
2305 | u64 sp0; |
2306 | + |
2307 | + /* |
2308 | + * We store cpu_current_top_of_stack in sp1 so it's always accessible. |
2309 | + * Linux does not use ring 1, so sp1 is not otherwise needed. |
2310 | + */ |
2311 | u64 sp1; |
2312 | + |
2313 | u64 sp2; |
2314 | u64 reserved2; |
2315 | u64 ist[7]; |
2316 | @@ -322,12 +333,22 @@ struct x86_hw_tss { |
2317 | #define IO_BITMAP_BITS 65536 |
2318 | #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) |
2319 | #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) |
2320 | -#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap) |
2321 | +#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss)) |
2322 | #define INVALID_IO_BITMAP_OFFSET 0x8000 |
2323 | |
2324 | +struct SYSENTER_stack { |
2325 | + unsigned long words[64]; |
2326 | +}; |
2327 | + |
2328 | +struct SYSENTER_stack_page { |
2329 | + struct SYSENTER_stack stack; |
2330 | +} __aligned(PAGE_SIZE); |
2331 | + |
2332 | struct tss_struct { |
2333 | /* |
2334 | - * The hardware state: |
2335 | + * The fixed hardware portion. This must not cross a page boundary |
2336 | + * at risk of violating the SDM's advice and potentially triggering |
2337 | + * errata. |
2338 | */ |
2339 | struct x86_hw_tss x86_tss; |
2340 | |
2341 | @@ -338,18 +359,9 @@ struct tss_struct { |
2342 | * be within the limit. |
2343 | */ |
2344 | unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; |
2345 | +} __aligned(PAGE_SIZE); |
2346 | |
2347 | -#ifdef CONFIG_X86_32 |
2348 | - /* |
2349 | - * Space for the temporary SYSENTER stack. |
2350 | - */ |
2351 | - unsigned long SYSENTER_stack_canary; |
2352 | - unsigned long SYSENTER_stack[64]; |
2353 | -#endif |
2354 | - |
2355 | -} ____cacheline_aligned; |
2356 | - |
2357 | -DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); |
2358 | +DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); |
2359 | |
2360 | /* |
2361 | * sizeof(unsigned long) coming from an extra "long" at the end |
2362 | @@ -363,6 +375,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); |
2363 | |
2364 | #ifdef CONFIG_X86_32 |
2365 | DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); |
2366 | +#else |
2367 | +/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */ |
2368 | +#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1 |
2369 | #endif |
2370 | |
2371 | /* |
2372 | @@ -431,7 +446,9 @@ typedef struct { |
2373 | struct thread_struct { |
2374 | /* Cached TLS descriptors: */ |
2375 | struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; |
2376 | +#ifdef CONFIG_X86_32 |
2377 | unsigned long sp0; |
2378 | +#endif |
2379 | unsigned long sp; |
2380 | #ifdef CONFIG_X86_32 |
2381 | unsigned long sysenter_cs; |
2382 | @@ -518,16 +535,9 @@ static inline void native_set_iopl_mask(unsigned mask) |
2383 | } |
2384 | |
2385 | static inline void |
2386 | -native_load_sp0(struct tss_struct *tss, struct thread_struct *thread) |
2387 | +native_load_sp0(unsigned long sp0) |
2388 | { |
2389 | - tss->x86_tss.sp0 = thread->sp0; |
2390 | -#ifdef CONFIG_X86_32 |
2391 | - /* Only happens when SEP is enabled, no need to test "SEP"arately: */ |
2392 | - if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { |
2393 | - tss->x86_tss.ss1 = thread->sysenter_cs; |
2394 | - wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); |
2395 | - } |
2396 | -#endif |
2397 | + this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); |
2398 | } |
2399 | |
2400 | static inline void native_swapgs(void) |
2401 | @@ -539,12 +549,18 @@ static inline void native_swapgs(void) |
2402 | |
2403 | static inline unsigned long current_top_of_stack(void) |
2404 | { |
2405 | -#ifdef CONFIG_X86_64 |
2406 | - return this_cpu_read_stable(cpu_tss.x86_tss.sp0); |
2407 | -#else |
2408 | - /* sp0 on x86_32 is special in and around vm86 mode. */ |
2409 | + /* |
2410 | + * We can't read directly from tss.sp0: sp0 on x86_32 is special in |
2411 | + * and around vm86 mode and sp0 on x86_64 is special because of the |
2412 | + * entry trampoline. |
2413 | + */ |
2414 | return this_cpu_read_stable(cpu_current_top_of_stack); |
2415 | -#endif |
2416 | +} |
2417 | + |
2418 | +static inline bool on_thread_stack(void) |
2419 | +{ |
2420 | + return (unsigned long)(current_top_of_stack() - |
2421 | + current_stack_pointer) < THREAD_SIZE; |
2422 | } |
2423 | |
2424 | #ifdef CONFIG_PARAVIRT |
2425 | @@ -552,10 +568,9 @@ static inline unsigned long current_top_of_stack(void) |
2426 | #else |
2427 | #define __cpuid native_cpuid |
2428 | |
2429 | -static inline void load_sp0(struct tss_struct *tss, |
2430 | - struct thread_struct *thread) |
2431 | +static inline void load_sp0(unsigned long sp0) |
2432 | { |
2433 | - native_load_sp0(tss, thread); |
2434 | + native_load_sp0(sp0); |
2435 | } |
2436 | |
2437 | #define set_iopl_mask native_set_iopl_mask |
2438 | @@ -804,6 +819,15 @@ static inline void spin_lock_prefetch(const void *x) |
2439 | #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ |
2440 | TOP_OF_KERNEL_STACK_PADDING) |
2441 | |
2442 | +#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1)) |
2443 | + |
2444 | +#define task_pt_regs(task) \ |
2445 | +({ \ |
2446 | + unsigned long __ptr = (unsigned long)task_stack_page(task); \ |
2447 | + __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \ |
2448 | + ((struct pt_regs *)__ptr) - 1; \ |
2449 | +}) |
2450 | + |
2451 | #ifdef CONFIG_X86_32 |
2452 | /* |
2453 | * User space process size: 3GB (default). |
2454 | @@ -823,23 +847,6 @@ static inline void spin_lock_prefetch(const void *x) |
2455 | .addr_limit = KERNEL_DS, \ |
2456 | } |
2457 | |
2458 | -/* |
2459 | - * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack. |
2460 | - * This is necessary to guarantee that the entire "struct pt_regs" |
2461 | - * is accessible even if the CPU haven't stored the SS/ESP registers |
2462 | - * on the stack (interrupt gate does not save these registers |
2463 | - * when switching to the same priv ring). |
2464 | - * Therefore beware: accessing the ss/esp fields of the |
2465 | - * "struct pt_regs" is possible, but they may contain the |
2466 | - * completely wrong values. |
2467 | - */ |
2468 | -#define task_pt_regs(task) \ |
2469 | -({ \ |
2470 | - unsigned long __ptr = (unsigned long)task_stack_page(task); \ |
2471 | - __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \ |
2472 | - ((struct pt_regs *)__ptr) - 1; \ |
2473 | -}) |
2474 | - |
2475 | #define KSTK_ESP(task) (task_pt_regs(task)->sp) |
2476 | |
2477 | #else |
2478 | @@ -873,11 +880,9 @@ static inline void spin_lock_prefetch(const void *x) |
2479 | #define STACK_TOP_MAX TASK_SIZE_MAX |
2480 | |
2481 | #define INIT_THREAD { \ |
2482 | - .sp0 = TOP_OF_INIT_STACK, \ |
2483 | .addr_limit = KERNEL_DS, \ |
2484 | } |
2485 | |
2486 | -#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) |
2487 | extern unsigned long KSTK_ESP(struct task_struct *task); |
2488 | |
2489 | #endif /* CONFIG_X86_64 */ |
2490 | diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h |
2491 | index c0e3c45cf6ab..14131dd06b29 100644 |
2492 | --- a/arch/x86/include/asm/ptrace.h |
2493 | +++ b/arch/x86/include/asm/ptrace.h |
2494 | @@ -136,9 +136,9 @@ static inline int v8086_mode(struct pt_regs *regs) |
2495 | #endif |
2496 | } |
2497 | |
2498 | -#ifdef CONFIG_X86_64 |
2499 | static inline bool user_64bit_mode(struct pt_regs *regs) |
2500 | { |
2501 | +#ifdef CONFIG_X86_64 |
2502 | #ifndef CONFIG_PARAVIRT |
2503 | /* |
2504 | * On non-paravirt systems, this is the only long mode CPL 3 |
2505 | @@ -149,8 +149,12 @@ static inline bool user_64bit_mode(struct pt_regs *regs) |
2506 | /* Headers are too twisted for this to go in paravirt.h. */ |
2507 | return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs; |
2508 | #endif |
2509 | +#else /* !CONFIG_X86_64 */ |
2510 | + return false; |
2511 | +#endif |
2512 | } |
2513 | |
2514 | +#ifdef CONFIG_X86_64 |
2515 | #define current_user_stack_pointer() current_pt_regs()->sp |
2516 | #define compat_user_stack_pointer() current_pt_regs()->sp |
2517 | #endif |
2518 | diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h |
2519 | index d8f3a6ae9f6c..f91c365e57c3 100644 |
2520 | --- a/arch/x86/include/asm/rmwcc.h |
2521 | +++ b/arch/x86/include/asm/rmwcc.h |
2522 | @@ -29,7 +29,7 @@ cc_label: \ |
2523 | #define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \ |
2524 | do { \ |
2525 | bool c; \ |
2526 | - asm volatile (fullop ";" CC_SET(cc) \ |
2527 | + asm volatile (fullop CC_SET(cc) \ |
2528 | : [counter] "+m" (var), CC_OUT(cc) (c) \ |
2529 | : __VA_ARGS__ : clobbers); \ |
2530 | return c; \ |
2531 | diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h |
2532 | index 8da111b3c342..f8062bfd43a0 100644 |
2533 | --- a/arch/x86/include/asm/stacktrace.h |
2534 | +++ b/arch/x86/include/asm/stacktrace.h |
2535 | @@ -16,6 +16,7 @@ enum stack_type { |
2536 | STACK_TYPE_TASK, |
2537 | STACK_TYPE_IRQ, |
2538 | STACK_TYPE_SOFTIRQ, |
2539 | + STACK_TYPE_SYSENTER, |
2540 | STACK_TYPE_EXCEPTION, |
2541 | STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, |
2542 | }; |
2543 | @@ -28,6 +29,8 @@ struct stack_info { |
2544 | bool in_task_stack(unsigned long *stack, struct task_struct *task, |
2545 | struct stack_info *info); |
2546 | |
2547 | +bool in_sysenter_stack(unsigned long *stack, struct stack_info *info); |
2548 | + |
2549 | int get_stack_info(unsigned long *stack, struct task_struct *task, |
2550 | struct stack_info *info, unsigned long *visit_mask); |
2551 | |
2552 | diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h |
2553 | index 899084b70412..9b6df68d8fd1 100644 |
2554 | --- a/arch/x86/include/asm/switch_to.h |
2555 | +++ b/arch/x86/include/asm/switch_to.h |
2556 | @@ -2,6 +2,8 @@ |
2557 | #ifndef _ASM_X86_SWITCH_TO_H |
2558 | #define _ASM_X86_SWITCH_TO_H |
2559 | |
2560 | +#include <linux/sched/task_stack.h> |
2561 | + |
2562 | struct task_struct; /* one of the stranger aspects of C forward declarations */ |
2563 | |
2564 | struct task_struct *__switch_to_asm(struct task_struct *prev, |
2565 | @@ -73,4 +75,28 @@ do { \ |
2566 | ((last) = __switch_to_asm((prev), (next))); \ |
2567 | } while (0) |
2568 | |
2569 | +#ifdef CONFIG_X86_32 |
2570 | +static inline void refresh_sysenter_cs(struct thread_struct *thread) |
2571 | +{ |
2572 | + /* Only happens when SEP is enabled, no need to test "SEP"arately: */ |
2573 | + if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs)) |
2574 | + return; |
2575 | + |
2576 | + this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs); |
2577 | + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); |
2578 | +} |
2579 | +#endif |
2580 | + |
2581 | +/* This is used when switching tasks or entering/exiting vm86 mode. */ |
2582 | +static inline void update_sp0(struct task_struct *task) |
2583 | +{ |
2584 | + /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */ |
2585 | +#ifdef CONFIG_X86_32 |
2586 | + load_sp0(task->thread.sp0); |
2587 | +#else |
2588 | + if (static_cpu_has(X86_FEATURE_XENPV)) |
2589 | + load_sp0(task_top_of_stack(task)); |
2590 | +#endif |
2591 | +} |
2592 | + |
2593 | #endif /* _ASM_X86_SWITCH_TO_H */ |
2594 | diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h |
2595 | index 70f425947dc5..00223333821a 100644 |
2596 | --- a/arch/x86/include/asm/thread_info.h |
2597 | +++ b/arch/x86/include/asm/thread_info.h |
2598 | @@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack, |
2599 | #else /* !__ASSEMBLY__ */ |
2600 | |
2601 | #ifdef CONFIG_X86_64 |
2602 | -# define cpu_current_top_of_stack (cpu_tss + TSS_sp0) |
2603 | +# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1) |
2604 | #endif |
2605 | |
2606 | #endif |
2607 | diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h |
2608 | index fa60398bbc3a..069c04be1507 100644 |
2609 | --- a/arch/x86/include/asm/trace/fpu.h |
2610 | +++ b/arch/x86/include/asm/trace/fpu.h |
2611 | @@ -34,11 +34,6 @@ DECLARE_EVENT_CLASS(x86_fpu, |
2612 | ) |
2613 | ); |
2614 | |
2615 | -DEFINE_EVENT(x86_fpu, x86_fpu_state, |
2616 | - TP_PROTO(struct fpu *fpu), |
2617 | - TP_ARGS(fpu) |
2618 | -); |
2619 | - |
2620 | DEFINE_EVENT(x86_fpu, x86_fpu_before_save, |
2621 | TP_PROTO(struct fpu *fpu), |
2622 | TP_ARGS(fpu) |
2623 | @@ -74,11 +69,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_activate_state, |
2624 | TP_ARGS(fpu) |
2625 | ); |
2626 | |
2627 | -DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state, |
2628 | - TP_PROTO(struct fpu *fpu), |
2629 | - TP_ARGS(fpu) |
2630 | -); |
2631 | - |
2632 | DEFINE_EVENT(x86_fpu, x86_fpu_init_state, |
2633 | TP_PROTO(struct fpu *fpu), |
2634 | TP_ARGS(fpu) |
2635 | diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h |
2636 | index b0cced97a6ce..31051f35cbb7 100644 |
2637 | --- a/arch/x86/include/asm/traps.h |
2638 | +++ b/arch/x86/include/asm/traps.h |
2639 | @@ -38,9 +38,9 @@ asmlinkage void simd_coprocessor_error(void); |
2640 | |
2641 | #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) |
2642 | asmlinkage void xen_divide_error(void); |
2643 | +asmlinkage void xen_xennmi(void); |
2644 | asmlinkage void xen_xendebug(void); |
2645 | asmlinkage void xen_xenint3(void); |
2646 | -asmlinkage void xen_nmi(void); |
2647 | asmlinkage void xen_overflow(void); |
2648 | asmlinkage void xen_bounds(void); |
2649 | asmlinkage void xen_invalid_op(void); |
2650 | @@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long); |
2651 | dotraplinkage void do_stack_segment(struct pt_regs *, long); |
2652 | #ifdef CONFIG_X86_64 |
2653 | dotraplinkage void do_double_fault(struct pt_regs *, long); |
2654 | -asmlinkage struct pt_regs *sync_regs(struct pt_regs *); |
2655 | #endif |
2656 | dotraplinkage void do_general_protection(struct pt_regs *, long); |
2657 | dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); |
2658 | @@ -145,4 +144,22 @@ enum { |
2659 | X86_TRAP_IRET = 32, /* 32, IRET Exception */ |
2660 | }; |
2661 | |
2662 | +/* |
2663 | + * Page fault error code bits: |
2664 | + * |
2665 | + * bit 0 == 0: no page found 1: protection fault |
2666 | + * bit 1 == 0: read access 1: write access |
2667 | + * bit 2 == 0: kernel-mode access 1: user-mode access |
2668 | + * bit 3 == 1: use of reserved bit detected |
2669 | + * bit 4 == 1: fault was an instruction fetch |
2670 | + * bit 5 == 1: protection keys block access |
2671 | + */ |
2672 | +enum x86_pf_error_code { |
2673 | + X86_PF_PROT = 1 << 0, |
2674 | + X86_PF_WRITE = 1 << 1, |
2675 | + X86_PF_USER = 1 << 2, |
2676 | + X86_PF_RSVD = 1 << 3, |
2677 | + X86_PF_INSTR = 1 << 4, |
2678 | + X86_PF_PK = 1 << 5, |
2679 | +}; |
2680 | #endif /* _ASM_X86_TRAPS_H */ |
2681 | diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h |
2682 | index 87adc0d38c4a..c1688c2d0a12 100644 |
2683 | --- a/arch/x86/include/asm/unwind.h |
2684 | +++ b/arch/x86/include/asm/unwind.h |
2685 | @@ -7,17 +7,20 @@ |
2686 | #include <asm/ptrace.h> |
2687 | #include <asm/stacktrace.h> |
2688 | |
2689 | +#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip)) |
2690 | +#define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET) |
2691 | + |
2692 | struct unwind_state { |
2693 | struct stack_info stack_info; |
2694 | unsigned long stack_mask; |
2695 | struct task_struct *task; |
2696 | int graph_idx; |
2697 | bool error; |
2698 | -#if defined(CONFIG_ORC_UNWINDER) |
2699 | +#if defined(CONFIG_UNWINDER_ORC) |
2700 | bool signal, full_regs; |
2701 | unsigned long sp, bp, ip; |
2702 | struct pt_regs *regs; |
2703 | -#elif defined(CONFIG_FRAME_POINTER_UNWINDER) |
2704 | +#elif defined(CONFIG_UNWINDER_FRAME_POINTER) |
2705 | bool got_irq; |
2706 | unsigned long *bp, *orig_sp, ip; |
2707 | struct pt_regs *regs; |
2708 | @@ -51,7 +54,11 @@ void unwind_start(struct unwind_state *state, struct task_struct *task, |
2709 | __unwind_start(state, task, regs, first_frame); |
2710 | } |
2711 | |
2712 | -#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER) |
2713 | +#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) |
2714 | +/* |
2715 | + * WARNING: The entire pt_regs may not be safe to dereference. In some cases, |
2716 | + * only the iret frame registers are accessible. Use with caution! |
2717 | + */ |
2718 | static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) |
2719 | { |
2720 | if (unwind_done(state)) |
2721 | @@ -66,7 +73,7 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) |
2722 | } |
2723 | #endif |
2724 | |
2725 | -#ifdef CONFIG_ORC_UNWINDER |
2726 | +#ifdef CONFIG_UNWINDER_ORC |
2727 | void unwind_init(void); |
2728 | void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, |
2729 | void *orc, size_t orc_size); |
2730 | diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h |
2731 | index 8a1ebf9540dd..ad15a0fda917 100644 |
2732 | --- a/arch/x86/include/asm/x86_init.h |
2733 | +++ b/arch/x86/include/asm/x86_init.h |
2734 | @@ -114,6 +114,18 @@ struct x86_init_pci { |
2735 | void (*fixup_irqs)(void); |
2736 | }; |
2737 | |
2738 | +/** |
2739 | + * struct x86_hyper_init - x86 hypervisor init functions |
2740 | + * @init_platform: platform setup |
2741 | + * @x2apic_available: X2APIC detection |
2742 | + * @init_mem_mapping: setup early mappings during init_mem_mapping() |
2743 | + */ |
2744 | +struct x86_hyper_init { |
2745 | + void (*init_platform)(void); |
2746 | + bool (*x2apic_available)(void); |
2747 | + void (*init_mem_mapping)(void); |
2748 | +}; |
2749 | + |
2750 | /** |
2751 | * struct x86_init_ops - functions for platform specific setup |
2752 | * |
2753 | @@ -127,6 +139,7 @@ struct x86_init_ops { |
2754 | struct x86_init_timers timers; |
2755 | struct x86_init_iommu iommu; |
2756 | struct x86_init_pci pci; |
2757 | + struct x86_hyper_init hyper; |
2758 | }; |
2759 | |
2760 | /** |
2761 | @@ -199,6 +212,15 @@ struct x86_legacy_features { |
2762 | struct x86_legacy_devices devices; |
2763 | }; |
2764 | |
2765 | +/** |
2766 | + * struct x86_hyper_runtime - x86 hypervisor specific runtime callbacks |
2767 | + * |
2768 | + * @pin_vcpu: pin current vcpu to specified physical cpu (run rarely) |
2769 | + */ |
2770 | +struct x86_hyper_runtime { |
2771 | + void (*pin_vcpu)(int cpu); |
2772 | +}; |
2773 | + |
2774 | /** |
2775 | * struct x86_platform_ops - platform specific runtime functions |
2776 | * @calibrate_cpu: calibrate CPU |
2777 | @@ -218,6 +240,7 @@ struct x86_legacy_features { |
2778 | * possible in x86_early_init_platform_quirks() by |
2779 | * only using the current x86_hardware_subarch |
2780 | * semantics. |
2781 | + * @hyper: x86 hypervisor specific runtime callbacks |
2782 | */ |
2783 | struct x86_platform_ops { |
2784 | unsigned long (*calibrate_cpu)(void); |
2785 | @@ -233,6 +256,7 @@ struct x86_platform_ops { |
2786 | void (*apic_post_init)(void); |
2787 | struct x86_legacy_features legacy; |
2788 | void (*set_legacy_features)(void); |
2789 | + struct x86_hyper_runtime hyper; |
2790 | }; |
2791 | |
2792 | struct pci_dev; |
2793 | diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h |
2794 | index 6f3355399665..53b4ca55ebb6 100644 |
2795 | --- a/arch/x86/include/uapi/asm/processor-flags.h |
2796 | +++ b/arch/x86/include/uapi/asm/processor-flags.h |
2797 | @@ -152,5 +152,8 @@ |
2798 | #define CX86_ARR_BASE 0xc4 |
2799 | #define CX86_RCR_BASE 0xdc |
2800 | |
2801 | +#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ |
2802 | + X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ |
2803 | + X86_CR0_PG) |
2804 | |
2805 | #endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */ |
2806 | diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile |
2807 | index 5f70044340ff..295abaa58add 100644 |
2808 | --- a/arch/x86/kernel/Makefile |
2809 | +++ b/arch/x86/kernel/Makefile |
2810 | @@ -25,9 +25,9 @@ endif |
2811 | KASAN_SANITIZE_head$(BITS).o := n |
2812 | KASAN_SANITIZE_dumpstack.o := n |
2813 | KASAN_SANITIZE_dumpstack_$(BITS).o := n |
2814 | -KASAN_SANITIZE_stacktrace.o := n |
2815 | +KASAN_SANITIZE_stacktrace.o := n |
2816 | +KASAN_SANITIZE_paravirt.o := n |
2817 | |
2818 | -OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y |
2819 | OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y |
2820 | OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y |
2821 | OBJECT_FILES_NON_STANDARD_test_nx.o := y |
2822 | @@ -128,9 +128,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o |
2823 | obj-$(CONFIG_TRACING) += tracepoint.o |
2824 | obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o |
2825 | |
2826 | -obj-$(CONFIG_ORC_UNWINDER) += unwind_orc.o |
2827 | -obj-$(CONFIG_FRAME_POINTER_UNWINDER) += unwind_frame.o |
2828 | -obj-$(CONFIG_GUESS_UNWINDER) += unwind_guess.o |
2829 | +obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o |
2830 | +obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o |
2831 | +obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o |
2832 | |
2833 | ### |
2834 | # 64 bit specific files |
2835 | diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c |
2836 | index ff891772c9f8..89c7c8569e5e 100644 |
2837 | --- a/arch/x86/kernel/apic/apic.c |
2838 | +++ b/arch/x86/kernel/apic/apic.c |
2839 | @@ -1645,7 +1645,7 @@ static __init void try_to_enable_x2apic(int remap_mode) |
2840 | * under KVM |
2841 | */ |
2842 | if (max_physical_apicid > 255 || |
2843 | - !hypervisor_x2apic_available()) { |
2844 | + !x86_init.hyper.x2apic_available()) { |
2845 | pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n"); |
2846 | x2apic_disable(); |
2847 | return; |
2848 | diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c |
2849 | index 0d57bb9079c9..c0b694810ff4 100644 |
2850 | --- a/arch/x86/kernel/apic/x2apic_uv_x.c |
2851 | +++ b/arch/x86/kernel/apic/x2apic_uv_x.c |
2852 | @@ -920,9 +920,8 @@ static __init void uv_rtc_init(void) |
2853 | /* |
2854 | * percpu heartbeat timer |
2855 | */ |
2856 | -static void uv_heartbeat(unsigned long ignored) |
2857 | +static void uv_heartbeat(struct timer_list *timer) |
2858 | { |
2859 | - struct timer_list *timer = &uv_scir_info->timer; |
2860 | unsigned char bits = uv_scir_info->state; |
2861 | |
2862 | /* Flip heartbeat bit: */ |
2863 | @@ -947,7 +946,7 @@ static int uv_heartbeat_enable(unsigned int cpu) |
2864 | struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer; |
2865 | |
2866 | uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); |
2867 | - setup_pinned_timer(timer, uv_heartbeat, cpu); |
2868 | + timer_setup(timer, uv_heartbeat, TIMER_PINNED); |
2869 | timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; |
2870 | add_timer_on(timer, cpu); |
2871 | uv_cpu_scir_info(cpu)->enabled = 1; |
2872 | diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c |
2873 | index 8ea78275480d..cd360a5e0dca 100644 |
2874 | --- a/arch/x86/kernel/asm-offsets.c |
2875 | +++ b/arch/x86/kernel/asm-offsets.c |
2876 | @@ -93,4 +93,10 @@ void common(void) { |
2877 | |
2878 | BLANK(); |
2879 | DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); |
2880 | + |
2881 | + /* Layout info for cpu_entry_area */ |
2882 | + OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); |
2883 | + OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline); |
2884 | + OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page); |
2885 | + DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack)); |
2886 | } |
2887 | diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c |
2888 | index dedf428b20b6..7d20d9c0b3d6 100644 |
2889 | --- a/arch/x86/kernel/asm-offsets_32.c |
2890 | +++ b/arch/x86/kernel/asm-offsets_32.c |
2891 | @@ -47,13 +47,8 @@ void foo(void) |
2892 | BLANK(); |
2893 | |
2894 | /* Offset from the sysenter stack to tss.sp0 */ |
2895 | - DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - |
2896 | - offsetofend(struct tss_struct, SYSENTER_stack)); |
2897 | - |
2898 | - /* Offset from cpu_tss to SYSENTER_stack */ |
2899 | - OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack); |
2900 | - /* Size of SYSENTER_stack */ |
2901 | - DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); |
2902 | + DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) - |
2903 | + offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack)); |
2904 | |
2905 | #ifdef CONFIG_CC_STACKPROTECTOR |
2906 | BLANK(); |
2907 | diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c |
2908 | index 630212fa9b9d..bf51e51d808d 100644 |
2909 | --- a/arch/x86/kernel/asm-offsets_64.c |
2910 | +++ b/arch/x86/kernel/asm-offsets_64.c |
2911 | @@ -23,6 +23,9 @@ int main(void) |
2912 | #ifdef CONFIG_PARAVIRT |
2913 | OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); |
2914 | OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); |
2915 | +#ifdef CONFIG_DEBUG_ENTRY |
2916 | + OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl); |
2917 | +#endif |
2918 | BLANK(); |
2919 | #endif |
2920 | |
2921 | @@ -63,6 +66,7 @@ int main(void) |
2922 | |
2923 | OFFSET(TSS_ist, tss_struct, x86_tss.ist); |
2924 | OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); |
2925 | + OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); |
2926 | BLANK(); |
2927 | |
2928 | #ifdef CONFIG_CC_STACKPROTECTOR |
2929 | diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile |
2930 | index c60922a66385..90cb82dbba57 100644 |
2931 | --- a/arch/x86/kernel/cpu/Makefile |
2932 | +++ b/arch/x86/kernel/cpu/Makefile |
2933 | @@ -23,6 +23,7 @@ obj-y += rdrand.o |
2934 | obj-y += match.o |
2935 | obj-y += bugs.o |
2936 | obj-$(CONFIG_CPU_FREQ) += aperfmperf.o |
2937 | +obj-y += cpuid-deps.o |
2938 | |
2939 | obj-$(CONFIG_PROC_FS) += proc.o |
2940 | obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o |
2941 | diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c |
2942 | index d58184b7cd44..bcb75dc97d44 100644 |
2943 | --- a/arch/x86/kernel/cpu/amd.c |
2944 | +++ b/arch/x86/kernel/cpu/amd.c |
2945 | @@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 *c) |
2946 | case 0x17: init_amd_zn(c); break; |
2947 | } |
2948 | |
2949 | - /* Enable workaround for FXSAVE leak */ |
2950 | - if (c->x86 >= 6) |
2951 | + /* |
2952 | + * Enable workaround for FXSAVE leak on CPUs |
2953 | + * without a XSaveErPtr feature |
2954 | + */ |
2955 | + if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR))) |
2956 | set_cpu_bug(c, X86_BUG_FXSAVE_LEAK); |
2957 | |
2958 | cpu_detect_cache_sizes(c); |
2959 | diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c |
2960 | index c9176bae7fd8..034900623adf 100644 |
2961 | --- a/arch/x86/kernel/cpu/common.c |
2962 | +++ b/arch/x86/kernel/cpu/common.c |
2963 | @@ -452,8 +452,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c) |
2964 | return NULL; /* Not found */ |
2965 | } |
2966 | |
2967 | -__u32 cpu_caps_cleared[NCAPINTS]; |
2968 | -__u32 cpu_caps_set[NCAPINTS]; |
2969 | +__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; |
2970 | +__u32 cpu_caps_set[NCAPINTS + NBUGINTS]; |
2971 | |
2972 | void load_percpu_segment(int cpu) |
2973 | { |
2974 | @@ -466,27 +466,116 @@ void load_percpu_segment(int cpu) |
2975 | load_stack_canary_segment(); |
2976 | } |
2977 | |
2978 | -/* Setup the fixmap mapping only once per-processor */ |
2979 | -static inline void setup_fixmap_gdt(int cpu) |
2980 | +#ifdef CONFIG_X86_32 |
2981 | +/* The 32-bit entry code needs to find cpu_entry_area. */ |
2982 | +DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); |
2983 | +#endif |
2984 | + |
2985 | +#ifdef CONFIG_X86_64 |
2986 | +/* |
2987 | + * Special IST stacks which the CPU switches to when it calls |
2988 | + * an IST-marked descriptor entry. Up to 7 stacks (hardware |
2989 | + * limit), all of them are 4K, except the debug stack which |
2990 | + * is 8K. |
2991 | + */ |
2992 | +static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { |
2993 | + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, |
2994 | + [DEBUG_STACK - 1] = DEBUG_STKSZ |
2995 | +}; |
2996 | + |
2997 | +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks |
2998 | + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); |
2999 | +#endif |
3000 | + |
3001 | +static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page, |
3002 | + SYSENTER_stack_storage); |
3003 | + |
3004 | +static void __init |
3005 | +set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) |
3006 | +{ |
3007 | + for ( ; pages; pages--, idx--, ptr += PAGE_SIZE) |
3008 | + __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot); |
3009 | +} |
3010 | + |
3011 | +/* Setup the fixmap mappings only once per-processor */ |
3012 | +static void __init setup_cpu_entry_area(int cpu) |
3013 | { |
3014 | #ifdef CONFIG_X86_64 |
3015 | - /* On 64-bit systems, we use a read-only fixmap GDT. */ |
3016 | - pgprot_t prot = PAGE_KERNEL_RO; |
3017 | + extern char _entry_trampoline[]; |
3018 | + |
3019 | + /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ |
3020 | + pgprot_t gdt_prot = PAGE_KERNEL_RO; |
3021 | + pgprot_t tss_prot = PAGE_KERNEL_RO; |
3022 | #else |
3023 | /* |
3024 | * On native 32-bit systems, the GDT cannot be read-only because |
3025 | * our double fault handler uses a task gate, and entering through |
3026 | - * a task gate needs to change an available TSS to busy. If the GDT |
3027 | - * is read-only, that will triple fault. |
3028 | + * a task gate needs to change an available TSS to busy. If the |
3029 | + * GDT is read-only, that will triple fault. The TSS cannot be |
3030 | + * read-only because the CPU writes to it on task switches. |
3031 | * |
3032 | - * On Xen PV, the GDT must be read-only because the hypervisor requires |
3033 | - * it. |
3034 | + * On Xen PV, the GDT must be read-only because the hypervisor |
3035 | + * requires it. |
3036 | */ |
3037 | - pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ? |
3038 | + pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ? |
3039 | PAGE_KERNEL_RO : PAGE_KERNEL; |
3040 | + pgprot_t tss_prot = PAGE_KERNEL; |
3041 | +#endif |
3042 | + |
3043 | + __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); |
3044 | + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page), |
3045 | + per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1, |
3046 | + PAGE_KERNEL); |
3047 | + |
3048 | + /* |
3049 | + * The Intel SDM says (Volume 3, 7.2.1): |
3050 | + * |
3051 | + * Avoid placing a page boundary in the part of the TSS that the |
3052 | + * processor reads during a task switch (the first 104 bytes). The |
3053 | + * processor may not correctly perform address translations if a |
3054 | + * boundary occurs in this area. During a task switch, the processor |
3055 | + * reads and writes into the first 104 bytes of each TSS (using |
3056 | + * contiguous physical addresses beginning with the physical address |
3057 | + * of the first byte of the TSS). So, after TSS access begins, if |
3058 | + * part of the 104 bytes is not physically contiguous, the processor |
3059 | + * will access incorrect information without generating a page-fault |
3060 | + * exception. |
3061 | + * |
3062 | + * There are also a lot of errata involving the TSS spanning a page |
3063 | + * boundary. Assert that we're not doing that. |
3064 | + */ |
3065 | + BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ |
3066 | + offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); |
3067 | + BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); |
3068 | + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss), |
3069 | + &per_cpu(cpu_tss_rw, cpu), |
3070 | + sizeof(struct tss_struct) / PAGE_SIZE, |
3071 | + tss_prot); |
3072 | + |
3073 | +#ifdef CONFIG_X86_32 |
3074 | + per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); |
3075 | #endif |
3076 | |
3077 | - __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot); |
3078 | +#ifdef CONFIG_X86_64 |
3079 | + BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); |
3080 | + BUILD_BUG_ON(sizeof(exception_stacks) != |
3081 | + sizeof(((struct cpu_entry_area *)0)->exception_stacks)); |
3082 | + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks), |
3083 | + &per_cpu(exception_stacks, cpu), |
3084 | + sizeof(exception_stacks) / PAGE_SIZE, |
3085 | + PAGE_KERNEL); |
3086 | + |
3087 | + __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline), |
3088 | + __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); |
3089 | +#endif |
3090 | +} |
3091 | + |
3092 | +void __init setup_cpu_entry_areas(void) |
3093 | +{ |
3094 | + unsigned int cpu; |
3095 | + |
3096 | + for_each_possible_cpu(cpu) |
3097 | + setup_cpu_entry_area(cpu); |
3098 | } |
3099 | |
3100 | /* Load the original GDT from the per-cpu structure */ |
3101 | @@ -723,7 +812,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) |
3102 | { |
3103 | int i; |
3104 | |
3105 | - for (i = 0; i < NCAPINTS; i++) { |
3106 | + for (i = 0; i < NCAPINTS + NBUGINTS; i++) { |
3107 | c->x86_capability[i] &= ~cpu_caps_cleared[i]; |
3108 | c->x86_capability[i] |= cpu_caps_set[i]; |
3109 | } |
3110 | @@ -1225,7 +1314,7 @@ void enable_sep_cpu(void) |
3111 | return; |
3112 | |
3113 | cpu = get_cpu(); |
3114 | - tss = &per_cpu(cpu_tss, cpu); |
3115 | + tss = &per_cpu(cpu_tss_rw, cpu); |
3116 | |
3117 | /* |
3118 | * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- |
3119 | @@ -1234,11 +1323,7 @@ void enable_sep_cpu(void) |
3120 | |
3121 | tss->x86_tss.ss1 = __KERNEL_CS; |
3122 | wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); |
3123 | - |
3124 | - wrmsr(MSR_IA32_SYSENTER_ESP, |
3125 | - (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack), |
3126 | - 0); |
3127 | - |
3128 | + wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0); |
3129 | wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); |
3130 | |
3131 | put_cpu(); |
3132 | @@ -1301,18 +1386,16 @@ void print_cpu_info(struct cpuinfo_x86 *c) |
3133 | pr_cont(")\n"); |
3134 | } |
3135 | |
3136 | -static __init int setup_disablecpuid(char *arg) |
3137 | +/* |
3138 | + * clearcpuid= was already parsed in fpu__init_parse_early_param. |
3139 | + * But we need to keep a dummy __setup around otherwise it would |
3140 | + * show up as an environment variable for init. |
3141 | + */ |
3142 | +static __init int setup_clearcpuid(char *arg) |
3143 | { |
3144 | - int bit; |
3145 | - |
3146 | - if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32) |
3147 | - setup_clear_cpu_cap(bit); |
3148 | - else |
3149 | - return 0; |
3150 | - |
3151 | return 1; |
3152 | } |
3153 | -__setup("clearcpuid=", setup_disablecpuid); |
3154 | +__setup("clearcpuid=", setup_clearcpuid); |
3155 | |
3156 | #ifdef CONFIG_X86_64 |
3157 | DEFINE_PER_CPU_FIRST(union irq_stack_union, |
3158 | @@ -1334,25 +1417,19 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; |
3159 | DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; |
3160 | EXPORT_PER_CPU_SYMBOL(__preempt_count); |
3161 | |
3162 | -/* |
3163 | - * Special IST stacks which the CPU switches to when it calls |
3164 | - * an IST-marked descriptor entry. Up to 7 stacks (hardware |
3165 | - * limit), all of them are 4K, except the debug stack which |
3166 | - * is 8K. |
3167 | - */ |
3168 | -static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { |
3169 | - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, |
3170 | - [DEBUG_STACK - 1] = DEBUG_STKSZ |
3171 | -}; |
3172 | - |
3173 | -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks |
3174 | - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); |
3175 | - |
3176 | /* May not be marked __init: used by software suspend */ |
3177 | void syscall_init(void) |
3178 | { |
3179 | + extern char _entry_trampoline[]; |
3180 | + extern char entry_SYSCALL_64_trampoline[]; |
3181 | + |
3182 | + int cpu = smp_processor_id(); |
3183 | + unsigned long SYSCALL64_entry_trampoline = |
3184 | + (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline + |
3185 | + (entry_SYSCALL_64_trampoline - _entry_trampoline); |
3186 | + |
3187 | wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); |
3188 | - wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); |
3189 | + wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline); |
3190 | |
3191 | #ifdef CONFIG_IA32_EMULATION |
3192 | wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); |
3193 | @@ -1363,7 +1440,7 @@ void syscall_init(void) |
3194 | * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). |
3195 | */ |
3196 | wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); |
3197 | - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); |
3198 | + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1)); |
3199 | wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); |
3200 | #else |
3201 | wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); |
3202 | @@ -1507,7 +1584,7 @@ void cpu_init(void) |
3203 | if (cpu) |
3204 | load_ucode_ap(); |
3205 | |
3206 | - t = &per_cpu(cpu_tss, cpu); |
3207 | + t = &per_cpu(cpu_tss_rw, cpu); |
3208 | oist = &per_cpu(orig_ist, cpu); |
3209 | |
3210 | #ifdef CONFIG_NUMA |
3211 | @@ -1546,7 +1623,7 @@ void cpu_init(void) |
3212 | * set up and load the per-CPU TSS |
3213 | */ |
3214 | if (!oist->ist[0]) { |
3215 | - char *estacks = per_cpu(exception_stacks, cpu); |
3216 | + char *estacks = get_cpu_entry_area(cpu)->exception_stacks; |
3217 | |
3218 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { |
3219 | estacks += exception_stack_sizes[v]; |
3220 | @@ -1557,7 +1634,7 @@ void cpu_init(void) |
3221 | } |
3222 | } |
3223 | |
3224 | - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); |
3225 | + t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; |
3226 | |
3227 | /* |
3228 | * <= is required because the CPU will access up to |
3229 | @@ -1572,9 +1649,14 @@ void cpu_init(void) |
3230 | initialize_tlbstate_and_flush(); |
3231 | enter_lazy_tlb(&init_mm, me); |
3232 | |
3233 | - load_sp0(t, ¤t->thread); |
3234 | - set_tss_desc(cpu, t); |
3235 | + /* |
3236 | + * Initialize the TSS. sp0 points to the entry trampoline stack |
3237 | + * regardless of what task is running. |
3238 | + */ |
3239 | + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); |
3240 | load_TR_desc(); |
3241 | + load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1)); |
3242 | + |
3243 | load_mm_ldt(&init_mm); |
3244 | |
3245 | clear_all_debug_regs(); |
3246 | @@ -1585,7 +1667,6 @@ void cpu_init(void) |
3247 | if (is_uv_system()) |
3248 | uv_cpu_init(); |
3249 | |
3250 | - setup_fixmap_gdt(cpu); |
3251 | load_fixmap_gdt(cpu); |
3252 | } |
3253 | |
3254 | @@ -1595,8 +1676,7 @@ void cpu_init(void) |
3255 | { |
3256 | int cpu = smp_processor_id(); |
3257 | struct task_struct *curr = current; |
3258 | - struct tss_struct *t = &per_cpu(cpu_tss, cpu); |
3259 | - struct thread_struct *thread = &curr->thread; |
3260 | + struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu); |
3261 | |
3262 | wait_for_master_cpu(cpu); |
3263 | |
3264 | @@ -1627,12 +1707,16 @@ void cpu_init(void) |
3265 | initialize_tlbstate_and_flush(); |
3266 | enter_lazy_tlb(&init_mm, curr); |
3267 | |
3268 | - load_sp0(t, thread); |
3269 | - set_tss_desc(cpu, t); |
3270 | + /* |
3271 | + * Initialize the TSS. Don't bother initializing sp0, as the initial |
3272 | + * task never enters user mode. |
3273 | + */ |
3274 | + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); |
3275 | load_TR_desc(); |
3276 | + |
3277 | load_mm_ldt(&init_mm); |
3278 | |
3279 | - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); |
3280 | + t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; |
3281 | |
3282 | #ifdef CONFIG_DOUBLEFAULT |
3283 | /* Set up doublefault TSS pointer in the GDT */ |
3284 | @@ -1644,7 +1728,6 @@ void cpu_init(void) |
3285 | |
3286 | fpu__init_cpu(); |
3287 | |
3288 | - setup_fixmap_gdt(cpu); |
3289 | load_fixmap_gdt(cpu); |
3290 | } |
3291 | #endif |
3292 | diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c |
3293 | new file mode 100644 |
3294 | index 000000000000..904b0a3c4e53 |
3295 | --- /dev/null |
3296 | +++ b/arch/x86/kernel/cpu/cpuid-deps.c |
3297 | @@ -0,0 +1,121 @@ |
3298 | +/* Declare dependencies between CPUIDs */ |
3299 | +#include <linux/kernel.h> |
3300 | +#include <linux/init.h> |
3301 | +#include <linux/module.h> |
3302 | +#include <asm/cpufeature.h> |
3303 | + |
3304 | +struct cpuid_dep { |
3305 | + unsigned int feature; |
3306 | + unsigned int depends; |
3307 | +}; |
3308 | + |
3309 | +/* |
3310 | + * Table of CPUID features that depend on others. |
3311 | + * |
3312 | + * This only includes dependencies that can be usefully disabled, not |
3313 | + * features part of the base set (like FPU). |
3314 | + * |
3315 | + * Note this all is not __init / __initdata because it can be |
3316 | + * called from cpu hotplug. It shouldn't do anything in this case, |
3317 | + * but it's difficult to tell that to the init reference checker. |
3318 | + */ |
3319 | +const static struct cpuid_dep cpuid_deps[] = { |
3320 | + { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE }, |
3321 | + { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE }, |
3322 | + { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE }, |
3323 | + { X86_FEATURE_AVX, X86_FEATURE_XSAVE }, |
3324 | + { X86_FEATURE_PKU, X86_FEATURE_XSAVE }, |
3325 | + { X86_FEATURE_MPX, X86_FEATURE_XSAVE }, |
3326 | + { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE }, |
3327 | + { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR }, |
3328 | + { X86_FEATURE_XMM, X86_FEATURE_FXSR }, |
3329 | + { X86_FEATURE_XMM2, X86_FEATURE_XMM }, |
3330 | + { X86_FEATURE_XMM3, X86_FEATURE_XMM2 }, |
3331 | + { X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 }, |
3332 | + { X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 }, |
3333 | + { X86_FEATURE_XMM3, X86_FEATURE_XMM2 }, |
3334 | + { X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 }, |
3335 | + { X86_FEATURE_SSSE3, X86_FEATURE_XMM2, }, |
3336 | + { X86_FEATURE_F16C, X86_FEATURE_XMM2, }, |
3337 | + { X86_FEATURE_AES, X86_FEATURE_XMM2 }, |
3338 | + { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 }, |
3339 | + { X86_FEATURE_FMA, X86_FEATURE_AVX }, |
3340 | + { X86_FEATURE_AVX2, X86_FEATURE_AVX, }, |
3341 | + { X86_FEATURE_AVX512F, X86_FEATURE_AVX, }, |
3342 | + { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F }, |
3343 | + { X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F }, |
3344 | + { X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F }, |
3345 | + { X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F }, |
3346 | + { X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F }, |
3347 | + { X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F }, |
3348 | + { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F }, |
3349 | + { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F }, |
3350 | + { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL }, |
3351 | + { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL }, |
3352 | + { X86_FEATURE_VAES, X86_FEATURE_AVX512VL }, |
3353 | + { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL }, |
3354 | + { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL }, |
3355 | + { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL }, |
3356 | + { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, |
3357 | + { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F }, |
3358 | + { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F }, |
3359 | + {} |
3360 | +}; |
3361 | + |
3362 | +static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature) |
3363 | +{ |
3364 | + /* |
3365 | + * Note: This could use the non atomic __*_bit() variants, but the |
3366 | + * rest of the cpufeature code uses atomics as well, so keep it for |
3367 | + * consistency. Cleanup all of it separately. |
3368 | + */ |
3369 | + if (!c) { |
3370 | + clear_cpu_cap(&boot_cpu_data, feature); |
3371 | + set_bit(feature, (unsigned long *)cpu_caps_cleared); |
3372 | + } else { |
3373 | + clear_bit(feature, (unsigned long *)c->x86_capability); |
3374 | + } |
3375 | +} |
3376 | + |
3377 | +/* Take the capabilities and the BUG bits into account */ |
3378 | +#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8) |
3379 | + |
3380 | +static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature) |
3381 | +{ |
3382 | + DECLARE_BITMAP(disable, MAX_FEATURE_BITS); |
3383 | + const struct cpuid_dep *d; |
3384 | + bool changed; |
3385 | + |
3386 | + if (WARN_ON(feature >= MAX_FEATURE_BITS)) |
3387 | + return; |
3388 | + |
3389 | + clear_feature(c, feature); |
3390 | + |
3391 | + /* Collect all features to disable, handling dependencies */ |
3392 | + memset(disable, 0, sizeof(disable)); |
3393 | + __set_bit(feature, disable); |
3394 | + |
3395 | + /* Loop until we get a stable state. */ |
3396 | + do { |
3397 | + changed = false; |
3398 | + for (d = cpuid_deps; d->feature; d++) { |
3399 | + if (!test_bit(d->depends, disable)) |
3400 | + continue; |
3401 | + if (__test_and_set_bit(d->feature, disable)) |
3402 | + continue; |
3403 | + |
3404 | + changed = true; |
3405 | + clear_feature(c, d->feature); |
3406 | + } |
3407 | + } while (changed); |
3408 | +} |
3409 | + |
3410 | +void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature) |
3411 | +{ |
3412 | + do_clear_cpu_cap(c, feature); |
3413 | +} |
3414 | + |
3415 | +void setup_clear_cpu_cap(unsigned int feature) |
3416 | +{ |
3417 | + do_clear_cpu_cap(NULL, feature); |
3418 | +} |
3419 | diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c |
3420 | index 4fa90006ac68..bea8d3e24f50 100644 |
3421 | --- a/arch/x86/kernel/cpu/hypervisor.c |
3422 | +++ b/arch/x86/kernel/cpu/hypervisor.c |
3423 | @@ -26,6 +26,12 @@ |
3424 | #include <asm/processor.h> |
3425 | #include <asm/hypervisor.h> |
3426 | |
3427 | +extern const struct hypervisor_x86 x86_hyper_vmware; |
3428 | +extern const struct hypervisor_x86 x86_hyper_ms_hyperv; |
3429 | +extern const struct hypervisor_x86 x86_hyper_xen_pv; |
3430 | +extern const struct hypervisor_x86 x86_hyper_xen_hvm; |
3431 | +extern const struct hypervisor_x86 x86_hyper_kvm; |
3432 | + |
3433 | static const __initconst struct hypervisor_x86 * const hypervisors[] = |
3434 | { |
3435 | #ifdef CONFIG_XEN_PV |
3436 | @@ -41,54 +47,52 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = |
3437 | #endif |
3438 | }; |
3439 | |
3440 | -const struct hypervisor_x86 *x86_hyper; |
3441 | -EXPORT_SYMBOL(x86_hyper); |
3442 | +enum x86_hypervisor_type x86_hyper_type; |
3443 | +EXPORT_SYMBOL(x86_hyper_type); |
3444 | |
3445 | -static inline void __init |
3446 | +static inline const struct hypervisor_x86 * __init |
3447 | detect_hypervisor_vendor(void) |
3448 | { |
3449 | - const struct hypervisor_x86 *h, * const *p; |
3450 | + const struct hypervisor_x86 *h = NULL, * const *p; |
3451 | uint32_t pri, max_pri = 0; |
3452 | |
3453 | for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) { |
3454 | - h = *p; |
3455 | - pri = h->detect(); |
3456 | - if (pri != 0 && pri > max_pri) { |
3457 | + pri = (*p)->detect(); |
3458 | + if (pri > max_pri) { |
3459 | max_pri = pri; |
3460 | - x86_hyper = h; |
3461 | + h = *p; |
3462 | } |
3463 | } |
3464 | |
3465 | - if (max_pri) |
3466 | - pr_info("Hypervisor detected: %s\n", x86_hyper->name); |
3467 | + if (h) |
3468 | + pr_info("Hypervisor detected: %s\n", h->name); |
3469 | + |
3470 | + return h; |
3471 | } |
3472 | |
3473 | -void __init init_hypervisor_platform(void) |
3474 | +static void __init copy_array(const void *src, void *target, unsigned int size) |
3475 | { |
3476 | + unsigned int i, n = size / sizeof(void *); |
3477 | + const void * const *from = (const void * const *)src; |
3478 | + const void **to = (const void **)target; |
3479 | |
3480 | - detect_hypervisor_vendor(); |
3481 | - |
3482 | - if (!x86_hyper) |
3483 | - return; |
3484 | - |
3485 | - if (x86_hyper->init_platform) |
3486 | - x86_hyper->init_platform(); |
3487 | + for (i = 0; i < n; i++) |
3488 | + if (from[i]) |
3489 | + to[i] = from[i]; |
3490 | } |
3491 | |
3492 | -bool __init hypervisor_x2apic_available(void) |
3493 | +void __init init_hypervisor_platform(void) |
3494 | { |
3495 | - return x86_hyper && |
3496 | - x86_hyper->x2apic_available && |
3497 | - x86_hyper->x2apic_available(); |
3498 | -} |
3499 | + const struct hypervisor_x86 *h; |
3500 | |
3501 | -void hypervisor_pin_vcpu(int cpu) |
3502 | -{ |
3503 | - if (!x86_hyper) |
3504 | + h = detect_hypervisor_vendor(); |
3505 | + |
3506 | + if (!h) |
3507 | return; |
3508 | |
3509 | - if (x86_hyper->pin_vcpu) |
3510 | - x86_hyper->pin_vcpu(cpu); |
3511 | - else |
3512 | - WARN_ONCE(1, "vcpu pinning requested but not supported!\n"); |
3513 | + copy_array(&h->init, &x86_init.hyper, sizeof(h->init)); |
3514 | + copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime)); |
3515 | + |
3516 | + x86_hyper_type = h->type; |
3517 | + x86_init.hyper.init_platform(); |
3518 | } |
3519 | diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c |
3520 | index 236324e83a3a..85eb5fc180c8 100644 |
3521 | --- a/arch/x86/kernel/cpu/mshyperv.c |
3522 | +++ b/arch/x86/kernel/cpu/mshyperv.c |
3523 | @@ -254,9 +254,9 @@ static void __init ms_hyperv_init_platform(void) |
3524 | #endif |
3525 | } |
3526 | |
3527 | -const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { |
3528 | +const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { |
3529 | .name = "Microsoft Hyper-V", |
3530 | .detect = ms_hyperv_platform, |
3531 | - .init_platform = ms_hyperv_init_platform, |
3532 | + .type = X86_HYPER_MS_HYPERV, |
3533 | + .init.init_platform = ms_hyperv_init_platform, |
3534 | }; |
3535 | -EXPORT_SYMBOL(x86_hyper_ms_hyperv); |
3536 | diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c |
3537 | index 40ed26852ebd..8e005329648b 100644 |
3538 | --- a/arch/x86/kernel/cpu/vmware.c |
3539 | +++ b/arch/x86/kernel/cpu/vmware.c |
3540 | @@ -205,10 +205,10 @@ static bool __init vmware_legacy_x2apic_available(void) |
3541 | (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0; |
3542 | } |
3543 | |
3544 | -const __refconst struct hypervisor_x86 x86_hyper_vmware = { |
3545 | +const __initconst struct hypervisor_x86 x86_hyper_vmware = { |
3546 | .name = "VMware", |
3547 | .detect = vmware_platform, |
3548 | - .init_platform = vmware_platform_setup, |
3549 | - .x2apic_available = vmware_legacy_x2apic_available, |
3550 | + .type = X86_HYPER_VMWARE, |
3551 | + .init.init_platform = vmware_platform_setup, |
3552 | + .init.x2apic_available = vmware_legacy_x2apic_available, |
3553 | }; |
3554 | -EXPORT_SYMBOL(x86_hyper_vmware); |
3555 | diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c |
3556 | index 0e662c55ae90..0b8cedb20d6d 100644 |
3557 | --- a/arch/x86/kernel/doublefault.c |
3558 | +++ b/arch/x86/kernel/doublefault.c |
3559 | @@ -50,25 +50,23 @@ static void doublefault_fn(void) |
3560 | cpu_relax(); |
3561 | } |
3562 | |
3563 | -struct tss_struct doublefault_tss __cacheline_aligned = { |
3564 | - .x86_tss = { |
3565 | - .sp0 = STACK_START, |
3566 | - .ss0 = __KERNEL_DS, |
3567 | - .ldt = 0, |
3568 | - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, |
3569 | - |
3570 | - .ip = (unsigned long) doublefault_fn, |
3571 | - /* 0x2 bit is always set */ |
3572 | - .flags = X86_EFLAGS_SF | 0x2, |
3573 | - .sp = STACK_START, |
3574 | - .es = __USER_DS, |
3575 | - .cs = __KERNEL_CS, |
3576 | - .ss = __KERNEL_DS, |
3577 | - .ds = __USER_DS, |
3578 | - .fs = __KERNEL_PERCPU, |
3579 | - |
3580 | - .__cr3 = __pa_nodebug(swapper_pg_dir), |
3581 | - } |
3582 | +struct x86_hw_tss doublefault_tss __cacheline_aligned = { |
3583 | + .sp0 = STACK_START, |
3584 | + .ss0 = __KERNEL_DS, |
3585 | + .ldt = 0, |
3586 | + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, |
3587 | + |
3588 | + .ip = (unsigned long) doublefault_fn, |
3589 | + /* 0x2 bit is always set */ |
3590 | + .flags = X86_EFLAGS_SF | 0x2, |
3591 | + .sp = STACK_START, |
3592 | + .es = __USER_DS, |
3593 | + .cs = __KERNEL_CS, |
3594 | + .ss = __KERNEL_DS, |
3595 | + .ds = __USER_DS, |
3596 | + .fs = __KERNEL_PERCPU, |
3597 | + |
3598 | + .__cr3 = __pa_nodebug(swapper_pg_dir), |
3599 | }; |
3600 | |
3601 | /* dummy for do_double_fault() call */ |
3602 | diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c |
3603 | index f13b4c00a5de..bbd6d986e2d0 100644 |
3604 | --- a/arch/x86/kernel/dumpstack.c |
3605 | +++ b/arch/x86/kernel/dumpstack.c |
3606 | @@ -43,6 +43,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task, |
3607 | return true; |
3608 | } |
3609 | |
3610 | +bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) |
3611 | +{ |
3612 | + struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id()); |
3613 | + |
3614 | + void *begin = ss; |
3615 | + void *end = ss + 1; |
3616 | + |
3617 | + if ((void *)stack < begin || (void *)stack >= end) |
3618 | + return false; |
3619 | + |
3620 | + info->type = STACK_TYPE_SYSENTER; |
3621 | + info->begin = begin; |
3622 | + info->end = end; |
3623 | + info->next_sp = NULL; |
3624 | + |
3625 | + return true; |
3626 | +} |
3627 | + |
3628 | static void printk_stack_address(unsigned long address, int reliable, |
3629 | char *log_lvl) |
3630 | { |
3631 | @@ -50,6 +68,28 @@ static void printk_stack_address(unsigned long address, int reliable, |
3632 | printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); |
3633 | } |
3634 | |
3635 | +void show_iret_regs(struct pt_regs *regs) |
3636 | +{ |
3637 | + printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip); |
3638 | + printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss, |
3639 | + regs->sp, regs->flags); |
3640 | +} |
3641 | + |
3642 | +static void show_regs_safe(struct stack_info *info, struct pt_regs *regs) |
3643 | +{ |
3644 | + if (on_stack(info, regs, sizeof(*regs))) |
3645 | + __show_regs(regs, 0); |
3646 | + else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET, |
3647 | + IRET_FRAME_SIZE)) { |
3648 | + /* |
3649 | + * When an interrupt or exception occurs in entry code, the |
3650 | + * full pt_regs might not have been saved yet. In that case |
3651 | + * just print the iret frame. |
3652 | + */ |
3653 | + show_iret_regs(regs); |
3654 | + } |
3655 | +} |
3656 | + |
3657 | void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
3658 | unsigned long *stack, char *log_lvl) |
3659 | { |
3660 | @@ -71,31 +111,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
3661 | * - task stack |
3662 | * - interrupt stack |
3663 | * - HW exception stacks (double fault, nmi, debug, mce) |
3664 | + * - SYSENTER stack |
3665 | * |
3666 | - * x86-32 can have up to three stacks: |
3667 | + * x86-32 can have up to four stacks: |
3668 | * - task stack |
3669 | * - softirq stack |
3670 | * - hardirq stack |
3671 | + * - SYSENTER stack |
3672 | */ |
3673 | for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { |
3674 | const char *stack_name; |
3675 | |
3676 | - /* |
3677 | - * If we overflowed the task stack into a guard page, jump back |
3678 | - * to the bottom of the usable stack. |
3679 | - */ |
3680 | - if (task_stack_page(task) - (void *)stack < PAGE_SIZE) |
3681 | - stack = task_stack_page(task); |
3682 | - |
3683 | - if (get_stack_info(stack, task, &stack_info, &visit_mask)) |
3684 | - break; |
3685 | + if (get_stack_info(stack, task, &stack_info, &visit_mask)) { |
3686 | + /* |
3687 | + * We weren't on a valid stack. It's possible that |
3688 | + * we overflowed a valid stack into a guard page. |
3689 | + * See if the next page up is valid so that we can |
3690 | + * generate some kind of backtrace if this happens. |
3691 | + */ |
3692 | + stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack); |
3693 | + if (get_stack_info(stack, task, &stack_info, &visit_mask)) |
3694 | + break; |
3695 | + } |
3696 | |
3697 | stack_name = stack_type_name(stack_info.type); |
3698 | if (stack_name) |
3699 | printk("%s <%s>\n", log_lvl, stack_name); |
3700 | |
3701 | - if (regs && on_stack(&stack_info, regs, sizeof(*regs))) |
3702 | - __show_regs(regs, 0); |
3703 | + if (regs) |
3704 | + show_regs_safe(&stack_info, regs); |
3705 | |
3706 | /* |
3707 | * Scan the stack, printing any text addresses we find. At the |
3708 | @@ -119,7 +163,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
3709 | |
3710 | /* |
3711 | * Don't print regs->ip again if it was already printed |
3712 | - * by __show_regs() below. |
3713 | + * by show_regs_safe() below. |
3714 | */ |
3715 | if (regs && stack == ®s->ip) |
3716 | goto next; |
3717 | @@ -155,8 +199,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
3718 | |
3719 | /* if the frame has entry regs, print them */ |
3720 | regs = unwind_get_entry_regs(&state); |
3721 | - if (regs && on_stack(&stack_info, regs, sizeof(*regs))) |
3722 | - __show_regs(regs, 0); |
3723 | + if (regs) |
3724 | + show_regs_safe(&stack_info, regs); |
3725 | } |
3726 | |
3727 | if (stack_name) |
3728 | diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c |
3729 | index daefae83a3aa..5ff13a6b3680 100644 |
3730 | --- a/arch/x86/kernel/dumpstack_32.c |
3731 | +++ b/arch/x86/kernel/dumpstack_32.c |
3732 | @@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type) |
3733 | if (type == STACK_TYPE_SOFTIRQ) |
3734 | return "SOFTIRQ"; |
3735 | |
3736 | + if (type == STACK_TYPE_SYSENTER) |
3737 | + return "SYSENTER"; |
3738 | + |
3739 | return NULL; |
3740 | } |
3741 | |
3742 | @@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, |
3743 | if (task != current) |
3744 | goto unknown; |
3745 | |
3746 | + if (in_sysenter_stack(stack, info)) |
3747 | + goto recursion_check; |
3748 | + |
3749 | if (in_hardirq_stack(stack, info)) |
3750 | goto recursion_check; |
3751 | |
3752 | diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c |
3753 | index 88ce2ffdb110..abc828f8c297 100644 |
3754 | --- a/arch/x86/kernel/dumpstack_64.c |
3755 | +++ b/arch/x86/kernel/dumpstack_64.c |
3756 | @@ -37,6 +37,9 @@ const char *stack_type_name(enum stack_type type) |
3757 | if (type == STACK_TYPE_IRQ) |
3758 | return "IRQ"; |
3759 | |
3760 | + if (type == STACK_TYPE_SYSENTER) |
3761 | + return "SYSENTER"; |
3762 | + |
3763 | if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST) |
3764 | return exception_stack_names[type - STACK_TYPE_EXCEPTION]; |
3765 | |
3766 | @@ -115,6 +118,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, |
3767 | if (in_irq_stack(stack, info)) |
3768 | goto recursion_check; |
3769 | |
3770 | + if (in_sysenter_stack(stack, info)) |
3771 | + goto recursion_check; |
3772 | + |
3773 | goto unknown; |
3774 | |
3775 | recursion_check: |
3776 | diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c |
3777 | index 7affb7e3d9a5..6abd83572b01 100644 |
3778 | --- a/arch/x86/kernel/fpu/init.c |
3779 | +++ b/arch/x86/kernel/fpu/init.c |
3780 | @@ -249,6 +249,10 @@ static void __init fpu__init_system_ctx_switch(void) |
3781 | */ |
3782 | static void __init fpu__init_parse_early_param(void) |
3783 | { |
3784 | + char arg[32]; |
3785 | + char *argptr = arg; |
3786 | + int bit; |
3787 | + |
3788 | if (cmdline_find_option_bool(boot_command_line, "no387")) |
3789 | setup_clear_cpu_cap(X86_FEATURE_FPU); |
3790 | |
3791 | @@ -266,6 +270,13 @@ static void __init fpu__init_parse_early_param(void) |
3792 | |
3793 | if (cmdline_find_option_bool(boot_command_line, "noxsaves")) |
3794 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); |
3795 | + |
3796 | + if (cmdline_find_option(boot_command_line, "clearcpuid", arg, |
3797 | + sizeof(arg)) && |
3798 | + get_option(&argptr, &bit) && |
3799 | + bit >= 0 && |
3800 | + bit < NCAPINTS * 32) |
3801 | + setup_clear_cpu_cap(bit); |
3802 | } |
3803 | |
3804 | /* |
3805 | diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c |
3806 | index f1d5476c9022..87a57b7642d3 100644 |
3807 | --- a/arch/x86/kernel/fpu/xstate.c |
3808 | +++ b/arch/x86/kernel/fpu/xstate.c |
3809 | @@ -15,6 +15,7 @@ |
3810 | #include <asm/fpu/xstate.h> |
3811 | |
3812 | #include <asm/tlbflush.h> |
3813 | +#include <asm/cpufeature.h> |
3814 | |
3815 | /* |
3816 | * Although we spell it out in here, the Processor Trace |
3817 | @@ -36,6 +37,19 @@ static const char *xfeature_names[] = |
3818 | "unknown xstate feature" , |
3819 | }; |
3820 | |
3821 | +static short xsave_cpuid_features[] __initdata = { |
3822 | + X86_FEATURE_FPU, |
3823 | + X86_FEATURE_XMM, |
3824 | + X86_FEATURE_AVX, |
3825 | + X86_FEATURE_MPX, |
3826 | + X86_FEATURE_MPX, |
3827 | + X86_FEATURE_AVX512F, |
3828 | + X86_FEATURE_AVX512F, |
3829 | + X86_FEATURE_AVX512F, |
3830 | + X86_FEATURE_INTEL_PT, |
3831 | + X86_FEATURE_PKU, |
3832 | +}; |
3833 | + |
3834 | /* |
3835 | * Mask of xstate features supported by the CPU and the kernel: |
3836 | */ |
3837 | @@ -59,26 +73,6 @@ unsigned int fpu_user_xstate_size; |
3838 | void fpu__xstate_clear_all_cpu_caps(void) |
3839 | { |
3840 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); |
3841 | - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); |
3842 | - setup_clear_cpu_cap(X86_FEATURE_XSAVEC); |
3843 | - setup_clear_cpu_cap(X86_FEATURE_XSAVES); |
3844 | - setup_clear_cpu_cap(X86_FEATURE_AVX); |
3845 | - setup_clear_cpu_cap(X86_FEATURE_AVX2); |
3846 | - setup_clear_cpu_cap(X86_FEATURE_AVX512F); |
3847 | - setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA); |
3848 | - setup_clear_cpu_cap(X86_FEATURE_AVX512PF); |
3849 | - setup_clear_cpu_cap(X86_FEATURE_AVX512ER); |
3850 | - setup_clear_cpu_cap(X86_FEATURE_AVX512CD); |
3851 | - setup_clear_cpu_cap(X86_FEATURE_AVX512DQ); |
3852 | - setup_clear_cpu_cap(X86_FEATURE_AVX512BW); |
3853 | - setup_clear_cpu_cap(X86_FEATURE_AVX512VL); |
3854 | - setup_clear_cpu_cap(X86_FEATURE_MPX); |
3855 | - setup_clear_cpu_cap(X86_FEATURE_XGETBV1); |
3856 | - setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI); |
3857 | - setup_clear_cpu_cap(X86_FEATURE_PKU); |
3858 | - setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW); |
3859 | - setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS); |
3860 | - setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ); |
3861 | } |
3862 | |
3863 | /* |
3864 | @@ -726,6 +720,7 @@ void __init fpu__init_system_xstate(void) |
3865 | unsigned int eax, ebx, ecx, edx; |
3866 | static int on_boot_cpu __initdata = 1; |
3867 | int err; |
3868 | + int i; |
3869 | |
3870 | WARN_ON_FPU(!on_boot_cpu); |
3871 | on_boot_cpu = 0; |
3872 | @@ -759,6 +754,14 @@ void __init fpu__init_system_xstate(void) |
3873 | goto out_disable; |
3874 | } |
3875 | |
3876 | + /* |
3877 | + * Clear XSAVE features that are disabled in the normal CPUID. |
3878 | + */ |
3879 | + for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) { |
3880 | + if (!boot_cpu_has(xsave_cpuid_features[i])) |
3881 | + xfeatures_mask &= ~BIT(i); |
3882 | + } |
3883 | + |
3884 | xfeatures_mask &= fpu__get_supported_xfeatures_mask(); |
3885 | |
3886 | /* Enable xstate instructions to be able to continue with initialization: */ |
3887 | diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S |
3888 | index f1d528bb66a6..c29020907886 100644 |
3889 | --- a/arch/x86/kernel/head_32.S |
3890 | +++ b/arch/x86/kernel/head_32.S |
3891 | @@ -212,9 +212,6 @@ ENTRY(startup_32_smp) |
3892 | #endif |
3893 | |
3894 | .Ldefault_entry: |
3895 | -#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ |
3896 | - X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ |
3897 | - X86_CR0_PG) |
3898 | movl $(CR0_STATE & ~X86_CR0_PG),%eax |
3899 | movl %eax,%cr0 |
3900 | |
3901 | @@ -402,7 +399,7 @@ ENTRY(early_idt_handler_array) |
3902 | # 24(%rsp) error code |
3903 | i = 0 |
3904 | .rept NUM_EXCEPTION_VECTORS |
3905 | - .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 |
3906 | + .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0 |
3907 | pushl $0 # Dummy error code, to make stack frame uniform |
3908 | .endif |
3909 | pushl $i # 20(%esp) Vector number |
3910 | diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S |
3911 | index 6dde3f3fc1f8..7dca675fe78d 100644 |
3912 | --- a/arch/x86/kernel/head_64.S |
3913 | +++ b/arch/x86/kernel/head_64.S |
3914 | @@ -38,11 +38,12 @@ |
3915 | * |
3916 | */ |
3917 | |
3918 | -#define p4d_index(x) (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1)) |
3919 | #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) |
3920 | |
3921 | +#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) |
3922 | PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE) |
3923 | PGD_START_KERNEL = pgd_index(__START_KERNEL_map) |
3924 | +#endif |
3925 | L3_START_KERNEL = pud_index(__START_KERNEL_map) |
3926 | |
3927 | .text |
3928 | @@ -50,6 +51,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map) |
3929 | .code64 |
3930 | .globl startup_64 |
3931 | startup_64: |
3932 | + UNWIND_HINT_EMPTY |
3933 | /* |
3934 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, |
3935 | * and someone has loaded an identity mapped page table |
3936 | @@ -89,6 +91,7 @@ startup_64: |
3937 | addq $(early_top_pgt - __START_KERNEL_map), %rax |
3938 | jmp 1f |
3939 | ENTRY(secondary_startup_64) |
3940 | + UNWIND_HINT_EMPTY |
3941 | /* |
3942 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, |
3943 | * and someone has loaded a mapped page table. |
3944 | @@ -133,6 +136,7 @@ ENTRY(secondary_startup_64) |
3945 | movq $1f, %rax |
3946 | jmp *%rax |
3947 | 1: |
3948 | + UNWIND_HINT_EMPTY |
3949 | |
3950 | /* Check if nx is implemented */ |
3951 | movl $0x80000001, %eax |
3952 | @@ -150,9 +154,6 @@ ENTRY(secondary_startup_64) |
3953 | 1: wrmsr /* Make changes effective */ |
3954 | |
3955 | /* Setup cr0 */ |
3956 | -#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ |
3957 | - X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ |
3958 | - X86_CR0_PG) |
3959 | movl $CR0_STATE, %eax |
3960 | /* Make changes effective */ |
3961 | movq %rax, %cr0 |
3962 | @@ -235,7 +236,7 @@ ENTRY(secondary_startup_64) |
3963 | pushq %rax # target address in negative space |
3964 | lretq |
3965 | .Lafter_lret: |
3966 | -ENDPROC(secondary_startup_64) |
3967 | +END(secondary_startup_64) |
3968 | |
3969 | #include "verify_cpu.S" |
3970 | |
3971 | @@ -247,6 +248,7 @@ ENDPROC(secondary_startup_64) |
3972 | */ |
3973 | ENTRY(start_cpu0) |
3974 | movq initial_stack(%rip), %rsp |
3975 | + UNWIND_HINT_EMPTY |
3976 | jmp .Ljump_to_C_code |
3977 | ENDPROC(start_cpu0) |
3978 | #endif |
3979 | @@ -266,26 +268,24 @@ ENDPROC(start_cpu0) |
3980 | .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS |
3981 | __FINITDATA |
3982 | |
3983 | -bad_address: |
3984 | - jmp bad_address |
3985 | - |
3986 | __INIT |
3987 | ENTRY(early_idt_handler_array) |
3988 | - # 104(%rsp) %rflags |
3989 | - # 96(%rsp) %cs |
3990 | - # 88(%rsp) %rip |
3991 | - # 80(%rsp) error code |
3992 | i = 0 |
3993 | .rept NUM_EXCEPTION_VECTORS |
3994 | - .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 |
3995 | - pushq $0 # Dummy error code, to make stack frame uniform |
3996 | + .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0 |
3997 | + UNWIND_HINT_IRET_REGS |
3998 | + pushq $0 # Dummy error code, to make stack frame uniform |
3999 | + .else |
4000 | + UNWIND_HINT_IRET_REGS offset=8 |
4001 | .endif |
4002 | pushq $i # 72(%rsp) Vector number |
4003 | jmp early_idt_handler_common |
4004 | + UNWIND_HINT_IRET_REGS |
4005 | i = i + 1 |
4006 | .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc |
4007 | .endr |
4008 | -ENDPROC(early_idt_handler_array) |
4009 | + UNWIND_HINT_IRET_REGS offset=16 |
4010 | +END(early_idt_handler_array) |
4011 | |
4012 | early_idt_handler_common: |
4013 | /* |
4014 | @@ -313,6 +313,7 @@ early_idt_handler_common: |
4015 | pushq %r13 /* pt_regs->r13 */ |
4016 | pushq %r14 /* pt_regs->r14 */ |
4017 | pushq %r15 /* pt_regs->r15 */ |
4018 | + UNWIND_HINT_REGS |
4019 | |
4020 | cmpq $14,%rsi /* Page fault? */ |
4021 | jnz 10f |
4022 | @@ -327,8 +328,8 @@ early_idt_handler_common: |
4023 | |
4024 | 20: |
4025 | decl early_recursion_flag(%rip) |
4026 | - jmp restore_regs_and_iret |
4027 | -ENDPROC(early_idt_handler_common) |
4028 | + jmp restore_regs_and_return_to_kernel |
4029 | +END(early_idt_handler_common) |
4030 | |
4031 | __INITDATA |
4032 | |
4033 | @@ -362,10 +363,7 @@ NEXT_PAGE(early_dynamic_pgts) |
4034 | |
4035 | .data |
4036 | |
4037 | -#ifndef CONFIG_XEN |
4038 | -NEXT_PAGE(init_top_pgt) |
4039 | - .fill 512,8,0 |
4040 | -#else |
4041 | +#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) |
4042 | NEXT_PAGE(init_top_pgt) |
4043 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC |
4044 | .org init_top_pgt + PGD_PAGE_OFFSET*8, 0 |
4045 | @@ -382,6 +380,9 @@ NEXT_PAGE(level2_ident_pgt) |
4046 | * Don't set NX because code runs from these pages. |
4047 | */ |
4048 | PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) |
4049 | +#else |
4050 | +NEXT_PAGE(init_top_pgt) |
4051 | + .fill 512,8,0 |
4052 | #endif |
4053 | |
4054 | #ifdef CONFIG_X86_5LEVEL |
4055 | @@ -435,7 +436,7 @@ ENTRY(phys_base) |
4056 | EXPORT_SYMBOL(phys_base) |
4057 | |
4058 | #include "../../x86/xen/xen-head.S" |
4059 | - |
4060 | + |
4061 | __PAGE_ALIGNED_BSS |
4062 | NEXT_PAGE(empty_zero_page) |
4063 | .skip PAGE_SIZE |
4064 | diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c |
4065 | index 3feb648781c4..2f723301eb58 100644 |
4066 | --- a/arch/x86/kernel/ioport.c |
4067 | +++ b/arch/x86/kernel/ioport.c |
4068 | @@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) |
4069 | * because the ->io_bitmap_max value must match the bitmap |
4070 | * contents: |
4071 | */ |
4072 | - tss = &per_cpu(cpu_tss, get_cpu()); |
4073 | + tss = &per_cpu(cpu_tss_rw, get_cpu()); |
4074 | |
4075 | if (turn_on) |
4076 | bitmap_clear(t->io_bitmap_ptr, from, num); |
4077 | diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c |
4078 | index 52089c043160..aa9d51eea9d0 100644 |
4079 | --- a/arch/x86/kernel/irq.c |
4080 | +++ b/arch/x86/kernel/irq.c |
4081 | @@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) |
4082 | /* high bit used in ret_from_ code */ |
4083 | unsigned vector = ~regs->orig_ax; |
4084 | |
4085 | - /* |
4086 | - * NB: Unlike exception entries, IRQ entries do not reliably |
4087 | - * handle context tracking in the low-level entry code. This is |
4088 | - * because syscall entries execute briefly with IRQs on before |
4089 | - * updating context tracking state, so we can take an IRQ from |
4090 | - * kernel mode with CONTEXT_USER. The low-level entry code only |
4091 | - * updates the context if we came from user mode, so we won't |
4092 | - * switch to CONTEXT_KERNEL. We'll fix that once the syscall |
4093 | - * code is cleaned up enough that we can cleanly defer enabling |
4094 | - * IRQs. |
4095 | - */ |
4096 | - |
4097 | entering_irq(); |
4098 | |
4099 | /* entering_irq() tells RCU that we're not quiescent. Check it. */ |
4100 | diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c |
4101 | index 020efbf5786b..d86e344f5b3d 100644 |
4102 | --- a/arch/x86/kernel/irq_64.c |
4103 | +++ b/arch/x86/kernel/irq_64.c |
4104 | @@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs) |
4105 | if (regs->sp >= estack_top && regs->sp <= estack_bottom) |
4106 | return; |
4107 | |
4108 | - WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n", |
4109 | + WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n", |
4110 | current->comm, curbase, regs->sp, |
4111 | irq_stack_top, irq_stack_bottom, |
4112 | - estack_top, estack_bottom); |
4113 | + estack_top, estack_bottom, (void *)regs->ip); |
4114 | |
4115 | if (sysctl_panic_on_stackoverflow) |
4116 | panic("low stack detected by irq handler - check messages\n"); |
4117 | diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c |
4118 | index 8bb9594d0761..a94de09edbed 100644 |
4119 | --- a/arch/x86/kernel/kvm.c |
4120 | +++ b/arch/x86/kernel/kvm.c |
4121 | @@ -544,12 +544,12 @@ static uint32_t __init kvm_detect(void) |
4122 | return kvm_cpuid_base(); |
4123 | } |
4124 | |
4125 | -const struct hypervisor_x86 x86_hyper_kvm __refconst = { |
4126 | +const __initconst struct hypervisor_x86 x86_hyper_kvm = { |
4127 | .name = "KVM", |
4128 | .detect = kvm_detect, |
4129 | - .x2apic_available = kvm_para_available, |
4130 | + .type = X86_HYPER_KVM, |
4131 | + .init.x2apic_available = kvm_para_available, |
4132 | }; |
4133 | -EXPORT_SYMBOL_GPL(x86_hyper_kvm); |
4134 | |
4135 | static __init int activate_jump_labels(void) |
4136 | { |
4137 | diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c |
4138 | index ae5615b03def..1c1eae961340 100644 |
4139 | --- a/arch/x86/kernel/ldt.c |
4140 | +++ b/arch/x86/kernel/ldt.c |
4141 | @@ -103,7 +103,7 @@ static void finalize_ldt_struct(struct ldt_struct *ldt) |
4142 | static void install_ldt(struct mm_struct *current_mm, |
4143 | struct ldt_struct *ldt) |
4144 | { |
4145 | - /* Synchronizes with lockless_dereference in load_mm_ldt. */ |
4146 | + /* Synchronizes with READ_ONCE in load_mm_ldt. */ |
4147 | smp_store_release(¤t_mm->context.ldt, ldt); |
4148 | |
4149 | /* Activate the LDT for all CPUs using current_mm. */ |
4150 | diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c |
4151 | index ac0be8283325..9edadabf04f6 100644 |
4152 | --- a/arch/x86/kernel/paravirt_patch_64.c |
4153 | +++ b/arch/x86/kernel/paravirt_patch_64.c |
4154 | @@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); |
4155 | DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); |
4156 | DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); |
4157 | DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); |
4158 | -DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); |
4159 | DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); |
4160 | |
4161 | DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); |
4162 | @@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, |
4163 | PATCH_SITE(pv_mmu_ops, read_cr2); |
4164 | PATCH_SITE(pv_mmu_ops, read_cr3); |
4165 | PATCH_SITE(pv_mmu_ops, write_cr3); |
4166 | - PATCH_SITE(pv_mmu_ops, flush_tlb_single); |
4167 | PATCH_SITE(pv_cpu_ops, wbinvd); |
4168 | #if defined(CONFIG_PARAVIRT_SPINLOCKS) |
4169 | case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): |
4170 | diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c |
4171 | index c67685337c5a..517415978409 100644 |
4172 | --- a/arch/x86/kernel/process.c |
4173 | +++ b/arch/x86/kernel/process.c |
4174 | @@ -47,9 +47,25 @@ |
4175 | * section. Since TSS's are completely CPU-local, we want them |
4176 | * on exact cacheline boundaries, to eliminate cacheline ping-pong. |
4177 | */ |
4178 | -__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { |
4179 | +__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = { |
4180 | .x86_tss = { |
4181 | - .sp0 = TOP_OF_INIT_STACK, |
4182 | + /* |
4183 | + * .sp0 is only used when entering ring 0 from a lower |
4184 | + * privilege level. Since the init task never runs anything |
4185 | + * but ring 0 code, there is no need for a valid value here. |
4186 | + * Poison it. |
4187 | + */ |
4188 | + .sp0 = (1UL << (BITS_PER_LONG-1)) + 1, |
4189 | + |
4190 | +#ifdef CONFIG_X86_64 |
4191 | + /* |
4192 | + * .sp1 is cpu_current_top_of_stack. The init task never |
4193 | + * runs user code, but cpu_current_top_of_stack should still |
4194 | + * be well defined before the first context switch. |
4195 | + */ |
4196 | + .sp1 = TOP_OF_INIT_STACK, |
4197 | +#endif |
4198 | + |
4199 | #ifdef CONFIG_X86_32 |
4200 | .ss0 = __KERNEL_DS, |
4201 | .ss1 = __KERNEL_CS, |
4202 | @@ -65,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { |
4203 | */ |
4204 | .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, |
4205 | #endif |
4206 | -#ifdef CONFIG_X86_32 |
4207 | - .SYSENTER_stack_canary = STACK_END_MAGIC, |
4208 | -#endif |
4209 | }; |
4210 | -EXPORT_PER_CPU_SYMBOL(cpu_tss); |
4211 | +EXPORT_PER_CPU_SYMBOL(cpu_tss_rw); |
4212 | |
4213 | DEFINE_PER_CPU(bool, __tss_limit_invalid); |
4214 | EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid); |
4215 | @@ -98,7 +111,7 @@ void exit_thread(struct task_struct *tsk) |
4216 | struct fpu *fpu = &t->fpu; |
4217 | |
4218 | if (bp) { |
4219 | - struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu()); |
4220 | + struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu()); |
4221 | |
4222 | t->io_bitmap_ptr = NULL; |
4223 | clear_thread_flag(TIF_IO_BITMAP); |
4224 | diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c |
4225 | index 11966251cd42..5224c6099184 100644 |
4226 | --- a/arch/x86/kernel/process_32.c |
4227 | +++ b/arch/x86/kernel/process_32.c |
4228 | @@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
4229 | struct fpu *prev_fpu = &prev->fpu; |
4230 | struct fpu *next_fpu = &next->fpu; |
4231 | int cpu = smp_processor_id(); |
4232 | - struct tss_struct *tss = &per_cpu(cpu_tss, cpu); |
4233 | + struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); |
4234 | |
4235 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ |
4236 | |
4237 | @@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
4238 | |
4239 | /* |
4240 | * Reload esp0 and cpu_current_top_of_stack. This changes |
4241 | - * current_thread_info(). |
4242 | + * current_thread_info(). Refresh the SYSENTER configuration in |
4243 | + * case prev or next is vm86. |
4244 | */ |
4245 | - load_sp0(tss, next); |
4246 | + update_sp0(next_p); |
4247 | + refresh_sysenter_cs(next); |
4248 | this_cpu_write(cpu_current_top_of_stack, |
4249 | (unsigned long)task_stack_page(next_p) + |
4250 | THREAD_SIZE); |
4251 | diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c |
4252 | index 302e7b2572d1..c75466232016 100644 |
4253 | --- a/arch/x86/kernel/process_64.c |
4254 | +++ b/arch/x86/kernel/process_64.c |
4255 | @@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all) |
4256 | unsigned int fsindex, gsindex; |
4257 | unsigned int ds, cs, es; |
4258 | |
4259 | - printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip); |
4260 | - printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss, |
4261 | - regs->sp, regs->flags); |
4262 | + show_iret_regs(regs); |
4263 | + |
4264 | if (regs->orig_ax != -1) |
4265 | pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax); |
4266 | else |
4267 | @@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all) |
4268 | printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", |
4269 | regs->r13, regs->r14, regs->r15); |
4270 | |
4271 | + if (!all) |
4272 | + return; |
4273 | + |
4274 | asm("movl %%ds,%0" : "=r" (ds)); |
4275 | asm("movl %%cs,%0" : "=r" (cs)); |
4276 | asm("movl %%es,%0" : "=r" (es)); |
4277 | @@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all) |
4278 | rdmsrl(MSR_GS_BASE, gs); |
4279 | rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); |
4280 | |
4281 | - if (!all) |
4282 | - return; |
4283 | - |
4284 | cr0 = read_cr0(); |
4285 | cr2 = read_cr2(); |
4286 | cr3 = __read_cr3(); |
4287 | @@ -274,7 +273,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, |
4288 | struct inactive_task_frame *frame; |
4289 | struct task_struct *me = current; |
4290 | |
4291 | - p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; |
4292 | childregs = task_pt_regs(p); |
4293 | fork_frame = container_of(childregs, struct fork_frame, regs); |
4294 | frame = &fork_frame->frame; |
4295 | @@ -401,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
4296 | struct fpu *prev_fpu = &prev->fpu; |
4297 | struct fpu *next_fpu = &next->fpu; |
4298 | int cpu = smp_processor_id(); |
4299 | - struct tss_struct *tss = &per_cpu(cpu_tss, cpu); |
4300 | + struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); |
4301 | |
4302 | WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && |
4303 | this_cpu_read(irq_count) != -1); |
4304 | @@ -463,9 +461,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
4305 | * Switch the PDA and FPU contexts. |
4306 | */ |
4307 | this_cpu_write(current_task, next_p); |
4308 | + this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); |
4309 | |
4310 | - /* Reload esp0 and ss1. This changes current_thread_info(). */ |
4311 | - load_sp0(tss, next); |
4312 | + /* Reload sp0. */ |
4313 | + update_sp0(next_p); |
4314 | |
4315 | /* |
4316 | * Now maybe reload the debug registers and handle I/O bitmaps |
4317 | diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c |
4318 | index 5e0453f18a57..142126ab5aae 100644 |
4319 | --- a/arch/x86/kernel/smpboot.c |
4320 | +++ b/arch/x86/kernel/smpboot.c |
4321 | @@ -962,8 +962,7 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle) |
4322 | #ifdef CONFIG_X86_32 |
4323 | /* Stack for startup_32 can be just as for start_secondary onwards */ |
4324 | irq_ctx_init(cpu); |
4325 | - per_cpu(cpu_current_top_of_stack, cpu) = |
4326 | - (unsigned long)task_stack_page(idle) + THREAD_SIZE; |
4327 | + per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle); |
4328 | #else |
4329 | initial_gs = per_cpu_offset(cpu); |
4330 | #endif |
4331 | diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c |
4332 | index 5a6b8f809792..74136fd16f49 100644 |
4333 | --- a/arch/x86/kernel/traps.c |
4334 | +++ b/arch/x86/kernel/traps.c |
4335 | @@ -141,8 +141,7 @@ void ist_begin_non_atomic(struct pt_regs *regs) |
4336 | * will catch asm bugs and any attempt to use ist_preempt_enable |
4337 | * from double_fault. |
4338 | */ |
4339 | - BUG_ON((unsigned long)(current_top_of_stack() - |
4340 | - current_stack_pointer) >= THREAD_SIZE); |
4341 | + BUG_ON(!on_thread_stack()); |
4342 | |
4343 | preempt_enable_no_resched(); |
4344 | } |
4345 | @@ -349,9 +348,15 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) |
4346 | |
4347 | /* |
4348 | * If IRET takes a non-IST fault on the espfix64 stack, then we |
4349 | - * end up promoting it to a doublefault. In that case, modify |
4350 | - * the stack to make it look like we just entered the #GP |
4351 | - * handler from user space, similar to bad_iret. |
4352 | + * end up promoting it to a doublefault. In that case, take |
4353 | + * advantage of the fact that we're not using the normal (TSS.sp0) |
4354 | + * stack right now. We can write a fake #GP(0) frame at TSS.sp0 |
4355 | + * and then modify our own IRET frame so that, when we return, |
4356 | + * we land directly at the #GP(0) vector with the stack already |
4357 | + * set up according to its expectations. |
4358 | + * |
4359 | + * The net result is that our #GP handler will think that we |
4360 | + * entered from usermode with the bad user context. |
4361 | * |
4362 | * No need for ist_enter here because we don't use RCU. |
4363 | */ |
4364 | @@ -359,13 +364,26 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) |
4365 | regs->cs == __KERNEL_CS && |
4366 | regs->ip == (unsigned long)native_irq_return_iret) |
4367 | { |
4368 | - struct pt_regs *normal_regs = task_pt_regs(current); |
4369 | + struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; |
4370 | |
4371 | - /* Fake a #GP(0) from userspace. */ |
4372 | - memmove(&normal_regs->ip, (void *)regs->sp, 5*8); |
4373 | - normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ |
4374 | + /* |
4375 | + * regs->sp points to the failing IRET frame on the |
4376 | + * ESPFIX64 stack. Copy it to the entry stack. This fills |
4377 | + * in gpregs->ss through gpregs->ip. |
4378 | + * |
4379 | + */ |
4380 | + memmove(&gpregs->ip, (void *)regs->sp, 5*8); |
4381 | + gpregs->orig_ax = 0; /* Missing (lost) #GP error code */ |
4382 | + |
4383 | + /* |
4384 | + * Adjust our frame so that we return straight to the #GP |
4385 | + * vector with the expected RSP value. This is safe because |
4386 | + * we won't enable interupts or schedule before we invoke |
4387 | + * general_protection, so nothing will clobber the stack |
4388 | + * frame we just set up. |
4389 | + */ |
4390 | regs->ip = (unsigned long)general_protection; |
4391 | - regs->sp = (unsigned long)&normal_regs->orig_ax; |
4392 | + regs->sp = (unsigned long)&gpregs->orig_ax; |
4393 | |
4394 | return; |
4395 | } |
4396 | @@ -390,7 +408,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) |
4397 | * |
4398 | * Processors update CR2 whenever a page fault is detected. If a |
4399 | * second page fault occurs while an earlier page fault is being |
4400 | - * deliv- ered, the faulting linear address of the second fault will |
4401 | + * delivered, the faulting linear address of the second fault will |
4402 | * overwrite the contents of CR2 (replacing the previous |
4403 | * address). These updates to CR2 occur even if the page fault |
4404 | * results in a double fault or occurs during the delivery of a |
4405 | @@ -601,14 +619,15 @@ NOKPROBE_SYMBOL(do_int3); |
4406 | |
4407 | #ifdef CONFIG_X86_64 |
4408 | /* |
4409 | - * Help handler running on IST stack to switch off the IST stack if the |
4410 | - * interrupted code was in user mode. The actual stack switch is done in |
4411 | - * entry_64.S |
4412 | + * Help handler running on a per-cpu (IST or entry trampoline) stack |
4413 | + * to switch to the normal thread stack if the interrupted code was in |
4414 | + * user mode. The actual stack switch is done in entry_64.S |
4415 | */ |
4416 | asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) |
4417 | { |
4418 | - struct pt_regs *regs = task_pt_regs(current); |
4419 | - *regs = *eregs; |
4420 | + struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1; |
4421 | + if (regs != eregs) |
4422 | + *regs = *eregs; |
4423 | return regs; |
4424 | } |
4425 | NOKPROBE_SYMBOL(sync_regs); |
4426 | @@ -624,13 +643,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) |
4427 | /* |
4428 | * This is called from entry_64.S early in handling a fault |
4429 | * caused by a bad iret to user mode. To handle the fault |
4430 | - * correctly, we want move our stack frame to task_pt_regs |
4431 | - * and we want to pretend that the exception came from the |
4432 | - * iret target. |
4433 | + * correctly, we want to move our stack frame to where it would |
4434 | + * be had we entered directly on the entry stack (rather than |
4435 | + * just below the IRET frame) and we want to pretend that the |
4436 | + * exception came from the IRET target. |
4437 | */ |
4438 | struct bad_iret_stack *new_stack = |
4439 | - container_of(task_pt_regs(current), |
4440 | - struct bad_iret_stack, regs); |
4441 | + (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; |
4442 | |
4443 | /* Copy the IRET target to the new stack. */ |
4444 | memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); |
4445 | @@ -795,14 +814,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) |
4446 | debug_stack_usage_dec(); |
4447 | |
4448 | exit: |
4449 | -#if defined(CONFIG_X86_32) |
4450 | - /* |
4451 | - * This is the most likely code path that involves non-trivial use |
4452 | - * of the SYSENTER stack. Check that we haven't overrun it. |
4453 | - */ |
4454 | - WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC, |
4455 | - "Overran or corrupted SYSENTER stack\n"); |
4456 | -#endif |
4457 | ist_exit(regs); |
4458 | } |
4459 | NOKPROBE_SYMBOL(do_debug); |
4460 | @@ -929,6 +940,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) |
4461 | |
4462 | void __init trap_init(void) |
4463 | { |
4464 | + /* Init cpu_entry_area before IST entries are set up */ |
4465 | + setup_cpu_entry_areas(); |
4466 | + |
4467 | idt_setup_traps(); |
4468 | |
4469 | /* |
4470 | diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c |
4471 | index a3f973b2c97a..be86a865087a 100644 |
4472 | --- a/arch/x86/kernel/unwind_orc.c |
4473 | +++ b/arch/x86/kernel/unwind_orc.c |
4474 | @@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) |
4475 | return NULL; |
4476 | } |
4477 | |
4478 | -static bool stack_access_ok(struct unwind_state *state, unsigned long addr, |
4479 | +static bool stack_access_ok(struct unwind_state *state, unsigned long _addr, |
4480 | size_t len) |
4481 | { |
4482 | struct stack_info *info = &state->stack_info; |
4483 | + void *addr = (void *)_addr; |
4484 | |
4485 | - /* |
4486 | - * If the address isn't on the current stack, switch to the next one. |
4487 | - * |
4488 | - * We may have to traverse multiple stacks to deal with the possibility |
4489 | - * that info->next_sp could point to an empty stack and the address |
4490 | - * could be on a subsequent stack. |
4491 | - */ |
4492 | - while (!on_stack(info, (void *)addr, len)) |
4493 | - if (get_stack_info(info->next_sp, state->task, info, |
4494 | - &state->stack_mask)) |
4495 | - return false; |
4496 | + if (!on_stack(info, addr, len) && |
4497 | + (get_stack_info(addr, state->task, info, &state->stack_mask))) |
4498 | + return false; |
4499 | |
4500 | return true; |
4501 | } |
4502 | @@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr, |
4503 | return true; |
4504 | } |
4505 | |
4506 | -#define REGS_SIZE (sizeof(struct pt_regs)) |
4507 | -#define SP_OFFSET (offsetof(struct pt_regs, sp)) |
4508 | -#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip)) |
4509 | -#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip)) |
4510 | - |
4511 | static bool deref_stack_regs(struct unwind_state *state, unsigned long addr, |
4512 | - unsigned long *ip, unsigned long *sp, bool full) |
4513 | + unsigned long *ip, unsigned long *sp) |
4514 | { |
4515 | - size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE; |
4516 | - size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET; |
4517 | - struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE); |
4518 | - |
4519 | - if (IS_ENABLED(CONFIG_X86_64)) { |
4520 | - if (!stack_access_ok(state, addr, regs_size)) |
4521 | - return false; |
4522 | + struct pt_regs *regs = (struct pt_regs *)addr; |
4523 | |
4524 | - *ip = regs->ip; |
4525 | - *sp = regs->sp; |
4526 | + /* x86-32 support will be more complicated due to the ®s->sp hack */ |
4527 | + BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32)); |
4528 | |
4529 | - return true; |
4530 | - } |
4531 | - |
4532 | - if (!stack_access_ok(state, addr, sp_offset)) |
4533 | + if (!stack_access_ok(state, addr, sizeof(struct pt_regs))) |
4534 | return false; |
4535 | |
4536 | *ip = regs->ip; |
4537 | + *sp = regs->sp; |
4538 | + return true; |
4539 | +} |
4540 | |
4541 | - if (user_mode(regs)) { |
4542 | - if (!stack_access_ok(state, addr + sp_offset, |
4543 | - REGS_SIZE - SP_OFFSET)) |
4544 | - return false; |
4545 | +static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr, |
4546 | + unsigned long *ip, unsigned long *sp) |
4547 | +{ |
4548 | + struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET; |
4549 | |
4550 | - *sp = regs->sp; |
4551 | - } else |
4552 | - *sp = (unsigned long)®s->sp; |
4553 | + if (!stack_access_ok(state, addr, IRET_FRAME_SIZE)) |
4554 | + return false; |
4555 | |
4556 | + *ip = regs->ip; |
4557 | + *sp = regs->sp; |
4558 | return true; |
4559 | } |
4560 | |
4561 | @@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state) |
4562 | unsigned long ip_p, sp, orig_ip, prev_sp = state->sp; |
4563 | enum stack_type prev_type = state->stack_info.type; |
4564 | struct orc_entry *orc; |
4565 | - struct pt_regs *ptregs; |
4566 | bool indirect = false; |
4567 | |
4568 | if (unwind_done(state)) |
4569 | @@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state) |
4570 | break; |
4571 | |
4572 | case ORC_TYPE_REGS: |
4573 | - if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) { |
4574 | + if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) { |
4575 | orc_warn("can't dereference registers at %p for ip %pB\n", |
4576 | (void *)sp, (void *)orig_ip); |
4577 | goto done; |
4578 | @@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state) |
4579 | break; |
4580 | |
4581 | case ORC_TYPE_REGS_IRET: |
4582 | - if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) { |
4583 | + if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) { |
4584 | orc_warn("can't dereference iret registers at %p for ip %pB\n", |
4585 | (void *)sp, (void *)orig_ip); |
4586 | goto done; |
4587 | } |
4588 | |
4589 | - ptregs = container_of((void *)sp, struct pt_regs, ip); |
4590 | - if ((unsigned long)ptregs >= prev_sp && |
4591 | - on_stack(&state->stack_info, ptregs, REGS_SIZE)) { |
4592 | - state->regs = ptregs; |
4593 | - state->full_regs = false; |
4594 | - } else |
4595 | - state->regs = NULL; |
4596 | - |
4597 | + state->regs = (void *)sp - IRET_FRAME_OFFSET; |
4598 | + state->full_regs = false; |
4599 | state->signal = true; |
4600 | break; |
4601 | |
4602 | @@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, |
4603 | } |
4604 | |
4605 | if (get_stack_info((unsigned long *)state->sp, state->task, |
4606 | - &state->stack_info, &state->stack_mask)) |
4607 | - return; |
4608 | + &state->stack_info, &state->stack_mask)) { |
4609 | + /* |
4610 | + * We weren't on a valid stack. It's possible that |
4611 | + * we overflowed a valid stack into a guard page. |
4612 | + * See if the next page up is valid so that we can |
4613 | + * generate some kind of backtrace if this happens. |
4614 | + */ |
4615 | + void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp); |
4616 | + if (get_stack_info(next_page, state->task, &state->stack_info, |
4617 | + &state->stack_mask)) |
4618 | + return; |
4619 | + } |
4620 | |
4621 | /* |
4622 | * The caller can provide the address of the first frame directly |
4623 | diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S |
4624 | index 014ea59aa153..3d3c2f71f617 100644 |
4625 | --- a/arch/x86/kernel/verify_cpu.S |
4626 | +++ b/arch/x86/kernel/verify_cpu.S |
4627 | @@ -33,7 +33,7 @@ |
4628 | #include <asm/cpufeatures.h> |
4629 | #include <asm/msr-index.h> |
4630 | |
4631 | -verify_cpu: |
4632 | +ENTRY(verify_cpu) |
4633 | pushf # Save caller passed flags |
4634 | push $0 # Kill any dangerous flags |
4635 | popf |
4636 | @@ -139,3 +139,4 @@ verify_cpu: |
4637 | popf # Restore caller passed flags |
4638 | xorl %eax, %eax |
4639 | ret |
4640 | +ENDPROC(verify_cpu) |
4641 | diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c |
4642 | index 68244742ecb0..5edb27f1a2c4 100644 |
4643 | --- a/arch/x86/kernel/vm86_32.c |
4644 | +++ b/arch/x86/kernel/vm86_32.c |
4645 | @@ -55,6 +55,7 @@ |
4646 | #include <asm/irq.h> |
4647 | #include <asm/traps.h> |
4648 | #include <asm/vm86.h> |
4649 | +#include <asm/switch_to.h> |
4650 | |
4651 | /* |
4652 | * Known problems: |
4653 | @@ -94,7 +95,6 @@ |
4654 | |
4655 | void save_v86_state(struct kernel_vm86_regs *regs, int retval) |
4656 | { |
4657 | - struct tss_struct *tss; |
4658 | struct task_struct *tsk = current; |
4659 | struct vm86plus_struct __user *user; |
4660 | struct vm86 *vm86 = current->thread.vm86; |
4661 | @@ -146,12 +146,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) |
4662 | do_exit(SIGSEGV); |
4663 | } |
4664 | |
4665 | - tss = &per_cpu(cpu_tss, get_cpu()); |
4666 | + preempt_disable(); |
4667 | tsk->thread.sp0 = vm86->saved_sp0; |
4668 | tsk->thread.sysenter_cs = __KERNEL_CS; |
4669 | - load_sp0(tss, &tsk->thread); |
4670 | + update_sp0(tsk); |
4671 | + refresh_sysenter_cs(&tsk->thread); |
4672 | vm86->saved_sp0 = 0; |
4673 | - put_cpu(); |
4674 | + preempt_enable(); |
4675 | |
4676 | memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs)); |
4677 | |
4678 | @@ -237,7 +238,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg) |
4679 | |
4680 | static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) |
4681 | { |
4682 | - struct tss_struct *tss; |
4683 | struct task_struct *tsk = current; |
4684 | struct vm86 *vm86 = tsk->thread.vm86; |
4685 | struct kernel_vm86_regs vm86regs; |
4686 | @@ -365,15 +365,17 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) |
4687 | vm86->saved_sp0 = tsk->thread.sp0; |
4688 | lazy_save_gs(vm86->regs32.gs); |
4689 | |
4690 | - tss = &per_cpu(cpu_tss, get_cpu()); |
4691 | /* make room for real-mode segments */ |
4692 | + preempt_disable(); |
4693 | tsk->thread.sp0 += 16; |
4694 | |
4695 | - if (static_cpu_has(X86_FEATURE_SEP)) |
4696 | + if (static_cpu_has(X86_FEATURE_SEP)) { |
4697 | tsk->thread.sysenter_cs = 0; |
4698 | + refresh_sysenter_cs(&tsk->thread); |
4699 | + } |
4700 | |
4701 | - load_sp0(tss, &tsk->thread); |
4702 | - put_cpu(); |
4703 | + update_sp0(tsk); |
4704 | + preempt_enable(); |
4705 | |
4706 | if (vm86->flags & VM86_SCREEN_BITMAP) |
4707 | mark_screen_rdonly(tsk->mm); |
4708 | diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S |
4709 | index a4009fb9be87..d2a8b5a24a44 100644 |
4710 | --- a/arch/x86/kernel/vmlinux.lds.S |
4711 | +++ b/arch/x86/kernel/vmlinux.lds.S |
4712 | @@ -107,6 +107,15 @@ SECTIONS |
4713 | SOFTIRQENTRY_TEXT |
4714 | *(.fixup) |
4715 | *(.gnu.warning) |
4716 | + |
4717 | +#ifdef CONFIG_X86_64 |
4718 | + . = ALIGN(PAGE_SIZE); |
4719 | + _entry_trampoline = .; |
4720 | + *(.entry_trampoline) |
4721 | + . = ALIGN(PAGE_SIZE); |
4722 | + ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big"); |
4723 | +#endif |
4724 | + |
4725 | /* End of text section */ |
4726 | _etext = .; |
4727 | } :text = 0x9090 |
4728 | diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c |
4729 | index a088b2c47f73..5b2d10c1973a 100644 |
4730 | --- a/arch/x86/kernel/x86_init.c |
4731 | +++ b/arch/x86/kernel/x86_init.c |
4732 | @@ -28,6 +28,8 @@ void x86_init_noop(void) { } |
4733 | void __init x86_init_uint_noop(unsigned int unused) { } |
4734 | int __init iommu_init_noop(void) { return 0; } |
4735 | void iommu_shutdown_noop(void) { } |
4736 | +bool __init bool_x86_init_noop(void) { return false; } |
4737 | +void x86_op_int_noop(int cpu) { } |
4738 | |
4739 | /* |
4740 | * The platform setup functions are preset with the default functions |
4741 | @@ -81,6 +83,12 @@ struct x86_init_ops x86_init __initdata = { |
4742 | .init_irq = x86_default_pci_init_irq, |
4743 | .fixup_irqs = x86_default_pci_fixup_irqs, |
4744 | }, |
4745 | + |
4746 | + .hyper = { |
4747 | + .init_platform = x86_init_noop, |
4748 | + .x2apic_available = bool_x86_init_noop, |
4749 | + .init_mem_mapping = x86_init_noop, |
4750 | + }, |
4751 | }; |
4752 | |
4753 | struct x86_cpuinit_ops x86_cpuinit = { |
4754 | @@ -101,6 +109,7 @@ struct x86_platform_ops x86_platform __ro_after_init = { |
4755 | .get_nmi_reason = default_get_nmi_reason, |
4756 | .save_sched_clock_state = tsc_save_sched_clock_state, |
4757 | .restore_sched_clock_state = tsc_restore_sched_clock_state, |
4758 | + .hyper.pin_vcpu = x86_op_int_noop, |
4759 | }; |
4760 | |
4761 | EXPORT_SYMBOL_GPL(x86_platform); |
4762 | diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c |
4763 | index 7a69cf053711..13ebeedcec07 100644 |
4764 | --- a/arch/x86/kvm/mmu.c |
4765 | +++ b/arch/x86/kvm/mmu.c |
4766 | @@ -5476,13 +5476,13 @@ int kvm_mmu_module_init(void) |
4767 | |
4768 | pte_list_desc_cache = kmem_cache_create("pte_list_desc", |
4769 | sizeof(struct pte_list_desc), |
4770 | - 0, 0, NULL); |
4771 | + 0, SLAB_ACCOUNT, NULL); |
4772 | if (!pte_list_desc_cache) |
4773 | goto nomem; |
4774 | |
4775 | mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", |
4776 | sizeof(struct kvm_mmu_page), |
4777 | - 0, 0, NULL); |
4778 | + 0, SLAB_ACCOUNT, NULL); |
4779 | if (!mmu_page_header_cache) |
4780 | goto nomem; |
4781 | |
4782 | diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c |
4783 | index bc5921c1e2f2..47d9432756f3 100644 |
4784 | --- a/arch/x86/kvm/vmx.c |
4785 | +++ b/arch/x86/kvm/vmx.c |
4786 | @@ -2295,7 +2295,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
4787 | * processors. See 22.2.4. |
4788 | */ |
4789 | vmcs_writel(HOST_TR_BASE, |
4790 | - (unsigned long)this_cpu_ptr(&cpu_tss)); |
4791 | + (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss); |
4792 | vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ |
4793 | |
4794 | /* |
4795 | diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c |
4796 | index 553f8fd23cc4..4846eff7e4c8 100644 |
4797 | --- a/arch/x86/lib/delay.c |
4798 | +++ b/arch/x86/lib/delay.c |
4799 | @@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops) |
4800 | delay = min_t(u64, MWAITX_MAX_LOOPS, loops); |
4801 | |
4802 | /* |
4803 | - * Use cpu_tss as a cacheline-aligned, seldomly |
4804 | + * Use cpu_tss_rw as a cacheline-aligned, seldomly |
4805 | * accessed per-cpu variable as the monitor target. |
4806 | */ |
4807 | - __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0); |
4808 | + __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0); |
4809 | |
4810 | /* |
4811 | * AMD, like Intel, supports the EAX hint and EAX=0xf |
4812 | diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c |
4813 | index b0ff378650a9..3109ba6c6ede 100644 |
4814 | --- a/arch/x86/mm/fault.c |
4815 | +++ b/arch/x86/mm/fault.c |
4816 | @@ -29,26 +29,6 @@ |
4817 | #define CREATE_TRACE_POINTS |
4818 | #include <asm/trace/exceptions.h> |
4819 | |
4820 | -/* |
4821 | - * Page fault error code bits: |
4822 | - * |
4823 | - * bit 0 == 0: no page found 1: protection fault |
4824 | - * bit 1 == 0: read access 1: write access |
4825 | - * bit 2 == 0: kernel-mode access 1: user-mode access |
4826 | - * bit 3 == 1: use of reserved bit detected |
4827 | - * bit 4 == 1: fault was an instruction fetch |
4828 | - * bit 5 == 1: protection keys block access |
4829 | - */ |
4830 | -enum x86_pf_error_code { |
4831 | - |
4832 | - PF_PROT = 1 << 0, |
4833 | - PF_WRITE = 1 << 1, |
4834 | - PF_USER = 1 << 2, |
4835 | - PF_RSVD = 1 << 3, |
4836 | - PF_INSTR = 1 << 4, |
4837 | - PF_PK = 1 << 5, |
4838 | -}; |
4839 | - |
4840 | /* |
4841 | * Returns 0 if mmiotrace is disabled, or if the fault is not |
4842 | * handled by mmiotrace: |
4843 | @@ -150,7 +130,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) |
4844 | * If it was a exec (instruction fetch) fault on NX page, then |
4845 | * do not ignore the fault: |
4846 | */ |
4847 | - if (error_code & PF_INSTR) |
4848 | + if (error_code & X86_PF_INSTR) |
4849 | return 0; |
4850 | |
4851 | instr = (void *)convert_ip_to_linear(current, regs); |
4852 | @@ -180,7 +160,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) |
4853 | * siginfo so userspace can discover which protection key was set |
4854 | * on the PTE. |
4855 | * |
4856 | - * If we get here, we know that the hardware signaled a PF_PK |
4857 | + * If we get here, we know that the hardware signaled a X86_PF_PK |
4858 | * fault and that there was a VMA once we got in the fault |
4859 | * handler. It does *not* guarantee that the VMA we find here |
4860 | * was the one that we faulted on. |
4861 | @@ -205,7 +185,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey) |
4862 | /* |
4863 | * force_sig_info_fault() is called from a number of |
4864 | * contexts, some of which have a VMA and some of which |
4865 | - * do not. The PF_PK handing happens after we have a |
4866 | + * do not. The X86_PF_PK handing happens after we have a |
4867 | * valid VMA, so we should never reach this without a |
4868 | * valid VMA. |
4869 | */ |
4870 | @@ -698,7 +678,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, |
4871 | if (!oops_may_print()) |
4872 | return; |
4873 | |
4874 | - if (error_code & PF_INSTR) { |
4875 | + if (error_code & X86_PF_INSTR) { |
4876 | unsigned int level; |
4877 | pgd_t *pgd; |
4878 | pte_t *pte; |
4879 | @@ -780,7 +760,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, |
4880 | */ |
4881 | if (current->thread.sig_on_uaccess_err && signal) { |
4882 | tsk->thread.trap_nr = X86_TRAP_PF; |
4883 | - tsk->thread.error_code = error_code | PF_USER; |
4884 | + tsk->thread.error_code = error_code | X86_PF_USER; |
4885 | tsk->thread.cr2 = address; |
4886 | |
4887 | /* XXX: hwpoison faults will set the wrong code. */ |
4888 | @@ -898,7 +878,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, |
4889 | struct task_struct *tsk = current; |
4890 | |
4891 | /* User mode accesses just cause a SIGSEGV */ |
4892 | - if (error_code & PF_USER) { |
4893 | + if (error_code & X86_PF_USER) { |
4894 | /* |
4895 | * It's possible to have interrupts off here: |
4896 | */ |
4897 | @@ -919,7 +899,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, |
4898 | * Instruction fetch faults in the vsyscall page might need |
4899 | * emulation. |
4900 | */ |
4901 | - if (unlikely((error_code & PF_INSTR) && |
4902 | + if (unlikely((error_code & X86_PF_INSTR) && |
4903 | ((address & ~0xfff) == VSYSCALL_ADDR))) { |
4904 | if (emulate_vsyscall(regs, address)) |
4905 | return; |
4906 | @@ -932,7 +912,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, |
4907 | * are always protection faults. |
4908 | */ |
4909 | if (address >= TASK_SIZE_MAX) |
4910 | - error_code |= PF_PROT; |
4911 | + error_code |= X86_PF_PROT; |
4912 | |
4913 | if (likely(show_unhandled_signals)) |
4914 | show_signal_msg(regs, error_code, address, tsk); |
4915 | @@ -993,11 +973,11 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code, |
4916 | |
4917 | if (!boot_cpu_has(X86_FEATURE_OSPKE)) |
4918 | return false; |
4919 | - if (error_code & PF_PK) |
4920 | + if (error_code & X86_PF_PK) |
4921 | return true; |
4922 | /* this checks permission keys on the VMA: */ |
4923 | - if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), |
4924 | - (error_code & PF_INSTR), foreign)) |
4925 | + if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE), |
4926 | + (error_code & X86_PF_INSTR), foreign)) |
4927 | return true; |
4928 | return false; |
4929 | } |
4930 | @@ -1025,7 +1005,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, |
4931 | int code = BUS_ADRERR; |
4932 | |
4933 | /* Kernel mode? Handle exceptions or die: */ |
4934 | - if (!(error_code & PF_USER)) { |
4935 | + if (!(error_code & X86_PF_USER)) { |
4936 | no_context(regs, error_code, address, SIGBUS, BUS_ADRERR); |
4937 | return; |
4938 | } |
4939 | @@ -1053,14 +1033,14 @@ static noinline void |
4940 | mm_fault_error(struct pt_regs *regs, unsigned long error_code, |
4941 | unsigned long address, u32 *pkey, unsigned int fault) |
4942 | { |
4943 | - if (fatal_signal_pending(current) && !(error_code & PF_USER)) { |
4944 | + if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) { |
4945 | no_context(regs, error_code, address, 0, 0); |
4946 | return; |
4947 | } |
4948 | |
4949 | if (fault & VM_FAULT_OOM) { |
4950 | /* Kernel mode? Handle exceptions or die: */ |
4951 | - if (!(error_code & PF_USER)) { |
4952 | + if (!(error_code & X86_PF_USER)) { |
4953 | no_context(regs, error_code, address, |
4954 | SIGSEGV, SEGV_MAPERR); |
4955 | return; |
4956 | @@ -1085,16 +1065,16 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, |
4957 | |
4958 | static int spurious_fault_check(unsigned long error_code, pte_t *pte) |
4959 | { |
4960 | - if ((error_code & PF_WRITE) && !pte_write(*pte)) |
4961 | + if ((error_code & X86_PF_WRITE) && !pte_write(*pte)) |
4962 | return 0; |
4963 | |
4964 | - if ((error_code & PF_INSTR) && !pte_exec(*pte)) |
4965 | + if ((error_code & X86_PF_INSTR) && !pte_exec(*pte)) |
4966 | return 0; |
4967 | /* |
4968 | * Note: We do not do lazy flushing on protection key |
4969 | - * changes, so no spurious fault will ever set PF_PK. |
4970 | + * changes, so no spurious fault will ever set X86_PF_PK. |
4971 | */ |
4972 | - if ((error_code & PF_PK)) |
4973 | + if ((error_code & X86_PF_PK)) |
4974 | return 1; |
4975 | |
4976 | return 1; |
4977 | @@ -1140,8 +1120,8 @@ spurious_fault(unsigned long error_code, unsigned long address) |
4978 | * change, so user accesses are not expected to cause spurious |
4979 | * faults. |
4980 | */ |
4981 | - if (error_code != (PF_WRITE | PF_PROT) |
4982 | - && error_code != (PF_INSTR | PF_PROT)) |
4983 | + if (error_code != (X86_PF_WRITE | X86_PF_PROT) && |
4984 | + error_code != (X86_PF_INSTR | X86_PF_PROT)) |
4985 | return 0; |
4986 | |
4987 | pgd = init_mm.pgd + pgd_index(address); |
4988 | @@ -1201,19 +1181,19 @@ access_error(unsigned long error_code, struct vm_area_struct *vma) |
4989 | * always an unconditional error and can never result in |
4990 | * a follow-up action to resolve the fault, like a COW. |
4991 | */ |
4992 | - if (error_code & PF_PK) |
4993 | + if (error_code & X86_PF_PK) |
4994 | return 1; |
4995 | |
4996 | /* |
4997 | * Make sure to check the VMA so that we do not perform |
4998 | - * faults just to hit a PF_PK as soon as we fill in a |
4999 | + * faults just to hit a X86_PF_PK as soon as we fill in a |
5000 | * page. |
5001 | */ |
5002 | - if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), |
5003 | - (error_code & PF_INSTR), foreign)) |
5004 | + if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE), |
5005 | + (error_code & X86_PF_INSTR), foreign)) |
5006 | return 1; |
5007 | |
5008 | - if (error_code & PF_WRITE) { |
5009 | + if (error_code & X86_PF_WRITE) { |
5010 | /* write, present and write, not present: */ |
5011 | if (unlikely(!(vma->vm_flags & VM_WRITE))) |
5012 | return 1; |
5013 | @@ -1221,7 +1201,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma) |
5014 | } |
5015 | |
5016 | /* read, present: */ |
5017 | - if (unlikely(error_code & PF_PROT)) |
5018 | + if (unlikely(error_code & X86_PF_PROT)) |
5019 | return 1; |
5020 | |
5021 | /* read, not present: */ |
5022 | @@ -1244,7 +1224,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) |
5023 | if (!static_cpu_has(X86_FEATURE_SMAP)) |
5024 | return false; |
5025 | |
5026 | - if (error_code & PF_USER) |
5027 | + if (error_code & X86_PF_USER) |
5028 | return false; |
5029 | |
5030 | if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC)) |
5031 | @@ -1297,7 +1277,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, |
5032 | * protection error (error_code & 9) == 0. |
5033 | */ |
5034 | if (unlikely(fault_in_kernel_space(address))) { |
5035 | - if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) { |
5036 | + if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { |
5037 | if (vmalloc_fault(address) >= 0) |
5038 | return; |
5039 | |
5040 | @@ -1325,7 +1305,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, |
5041 | if (unlikely(kprobes_fault(regs))) |
5042 | return; |
5043 | |
5044 | - if (unlikely(error_code & PF_RSVD)) |
5045 | + if (unlikely(error_code & X86_PF_RSVD)) |
5046 | pgtable_bad(regs, error_code, address); |
5047 | |
5048 | if (unlikely(smap_violation(error_code, regs))) { |
5049 | @@ -1351,7 +1331,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, |
5050 | */ |
5051 | if (user_mode(regs)) { |
5052 | local_irq_enable(); |
5053 | - error_code |= PF_USER; |
5054 | + error_code |= X86_PF_USER; |
5055 | flags |= FAULT_FLAG_USER; |
5056 | } else { |
5057 | if (regs->flags & X86_EFLAGS_IF) |
5058 | @@ -1360,9 +1340,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, |
5059 | |
5060 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); |
5061 | |
5062 | - if (error_code & PF_WRITE) |
5063 | + if (error_code & X86_PF_WRITE) |
5064 | flags |= FAULT_FLAG_WRITE; |
5065 | - if (error_code & PF_INSTR) |
5066 | + if (error_code & X86_PF_INSTR) |
5067 | flags |= FAULT_FLAG_INSTRUCTION; |
5068 | |
5069 | /* |
5070 | @@ -1382,7 +1362,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, |
5071 | * space check, thus avoiding the deadlock: |
5072 | */ |
5073 | if (unlikely(!down_read_trylock(&mm->mmap_sem))) { |
5074 | - if ((error_code & PF_USER) == 0 && |
5075 | + if (!(error_code & X86_PF_USER) && |
5076 | !search_exception_tables(regs->ip)) { |
5077 | bad_area_nosemaphore(regs, error_code, address, NULL); |
5078 | return; |
5079 | @@ -1409,7 +1389,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, |
5080 | bad_area(regs, error_code, address); |
5081 | return; |
5082 | } |
5083 | - if (error_code & PF_USER) { |
5084 | + if (error_code & X86_PF_USER) { |
5085 | /* |
5086 | * Accessing the stack below %sp is always a bug. |
5087 | * The large cushion allows instructions like enter |
5088 | diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c |
5089 | index af5c1ed21d43..a22c2b95e513 100644 |
5090 | --- a/arch/x86/mm/init.c |
5091 | +++ b/arch/x86/mm/init.c |
5092 | @@ -671,7 +671,7 @@ void __init init_mem_mapping(void) |
5093 | load_cr3(swapper_pg_dir); |
5094 | __flush_tlb_all(); |
5095 | |
5096 | - hypervisor_init_mem_mapping(); |
5097 | + x86_init.hyper.init_mem_mapping(); |
5098 | |
5099 | early_memtest(0, max_pfn_mapped << PAGE_SHIFT); |
5100 | } |
5101 | diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c |
5102 | index 048fbe8fc274..adcea90a2046 100644 |
5103 | --- a/arch/x86/mm/init_64.c |
5104 | +++ b/arch/x86/mm/init_64.c |
5105 | @@ -1426,16 +1426,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) |
5106 | |
5107 | #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE) |
5108 | void register_page_bootmem_memmap(unsigned long section_nr, |
5109 | - struct page *start_page, unsigned long size) |
5110 | + struct page *start_page, unsigned long nr_pages) |
5111 | { |
5112 | unsigned long addr = (unsigned long)start_page; |
5113 | - unsigned long end = (unsigned long)(start_page + size); |
5114 | + unsigned long end = (unsigned long)(start_page + nr_pages); |
5115 | unsigned long next; |
5116 | pgd_t *pgd; |
5117 | p4d_t *p4d; |
5118 | pud_t *pud; |
5119 | pmd_t *pmd; |
5120 | - unsigned int nr_pages; |
5121 | + unsigned int nr_pmd_pages; |
5122 | struct page *page; |
5123 | |
5124 | for (; addr < end; addr = next) { |
5125 | @@ -1482,9 +1482,9 @@ void register_page_bootmem_memmap(unsigned long section_nr, |
5126 | if (pmd_none(*pmd)) |
5127 | continue; |
5128 | |
5129 | - nr_pages = 1 << (get_order(PMD_SIZE)); |
5130 | + nr_pmd_pages = 1 << get_order(PMD_SIZE); |
5131 | page = pmd_page(*pmd); |
5132 | - while (nr_pages--) |
5133 | + while (nr_pmd_pages--) |
5134 | get_page_bootmem(section_nr, page++, |
5135 | SECTION_INFO); |
5136 | } |
5137 | diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c |
5138 | index 8f5be3eb40dd..9ec70d780f1f 100644 |
5139 | --- a/arch/x86/mm/kasan_init_64.c |
5140 | +++ b/arch/x86/mm/kasan_init_64.c |
5141 | @@ -4,19 +4,150 @@ |
5142 | #include <linux/bootmem.h> |
5143 | #include <linux/kasan.h> |
5144 | #include <linux/kdebug.h> |
5145 | +#include <linux/memblock.h> |
5146 | #include <linux/mm.h> |
5147 | #include <linux/sched.h> |
5148 | #include <linux/sched/task.h> |
5149 | #include <linux/vmalloc.h> |
5150 | |
5151 | #include <asm/e820/types.h> |
5152 | +#include <asm/pgalloc.h> |
5153 | #include <asm/tlbflush.h> |
5154 | #include <asm/sections.h> |
5155 | #include <asm/pgtable.h> |
5156 | |
5157 | extern struct range pfn_mapped[E820_MAX_ENTRIES]; |
5158 | |
5159 | -static int __init map_range(struct range *range) |
5160 | +static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); |
5161 | + |
5162 | +static __init void *early_alloc(size_t size, int nid) |
5163 | +{ |
5164 | + return memblock_virt_alloc_try_nid_nopanic(size, size, |
5165 | + __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); |
5166 | +} |
5167 | + |
5168 | +static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, |
5169 | + unsigned long end, int nid) |
5170 | +{ |
5171 | + pte_t *pte; |
5172 | + |
5173 | + if (pmd_none(*pmd)) { |
5174 | + void *p; |
5175 | + |
5176 | + if (boot_cpu_has(X86_FEATURE_PSE) && |
5177 | + ((end - addr) == PMD_SIZE) && |
5178 | + IS_ALIGNED(addr, PMD_SIZE)) { |
5179 | + p = early_alloc(PMD_SIZE, nid); |
5180 | + if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL)) |
5181 | + return; |
5182 | + else if (p) |
5183 | + memblock_free(__pa(p), PMD_SIZE); |
5184 | + } |
5185 | + |
5186 | + p = early_alloc(PAGE_SIZE, nid); |
5187 | + pmd_populate_kernel(&init_mm, pmd, p); |
5188 | + } |
5189 | + |
5190 | + pte = pte_offset_kernel(pmd, addr); |
5191 | + do { |
5192 | + pte_t entry; |
5193 | + void *p; |
5194 | + |
5195 | + if (!pte_none(*pte)) |
5196 | + continue; |
5197 | + |
5198 | + p = early_alloc(PAGE_SIZE, nid); |
5199 | + entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL); |
5200 | + set_pte_at(&init_mm, addr, pte, entry); |
5201 | + } while (pte++, addr += PAGE_SIZE, addr != end); |
5202 | +} |
5203 | + |
5204 | +static void __init kasan_populate_pud(pud_t *pud, unsigned long addr, |
5205 | + unsigned long end, int nid) |
5206 | +{ |
5207 | + pmd_t *pmd; |
5208 | + unsigned long next; |
5209 | + |
5210 | + if (pud_none(*pud)) { |
5211 | + void *p; |
5212 | + |
5213 | + if (boot_cpu_has(X86_FEATURE_GBPAGES) && |
5214 | + ((end - addr) == PUD_SIZE) && |
5215 | + IS_ALIGNED(addr, PUD_SIZE)) { |
5216 | + p = early_alloc(PUD_SIZE, nid); |
5217 | + if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL)) |
5218 | + return; |
5219 | + else if (p) |
5220 | + memblock_free(__pa(p), PUD_SIZE); |
5221 | + } |
5222 | + |
5223 | + p = early_alloc(PAGE_SIZE, nid); |
5224 | + pud_populate(&init_mm, pud, p); |
5225 | + } |
5226 | + |
5227 | + pmd = pmd_offset(pud, addr); |
5228 | + do { |
5229 | + next = pmd_addr_end(addr, end); |
5230 | + if (!pmd_large(*pmd)) |
5231 | + kasan_populate_pmd(pmd, addr, next, nid); |
5232 | + } while (pmd++, addr = next, addr != end); |
5233 | +} |
5234 | + |
5235 | +static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr, |
5236 | + unsigned long end, int nid) |
5237 | +{ |
5238 | + pud_t *pud; |
5239 | + unsigned long next; |
5240 | + |
5241 | + if (p4d_none(*p4d)) { |
5242 | + void *p = early_alloc(PAGE_SIZE, nid); |
5243 | + |
5244 | + p4d_populate(&init_mm, p4d, p); |
5245 | + } |
5246 | + |
5247 | + pud = pud_offset(p4d, addr); |
5248 | + do { |
5249 | + next = pud_addr_end(addr, end); |
5250 | + if (!pud_large(*pud)) |
5251 | + kasan_populate_pud(pud, addr, next, nid); |
5252 | + } while (pud++, addr = next, addr != end); |
5253 | +} |
5254 | + |
5255 | +static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr, |
5256 | + unsigned long end, int nid) |
5257 | +{ |
5258 | + void *p; |
5259 | + p4d_t *p4d; |
5260 | + unsigned long next; |
5261 | + |
5262 | + if (pgd_none(*pgd)) { |
5263 | + p = early_alloc(PAGE_SIZE, nid); |
5264 | + pgd_populate(&init_mm, pgd, p); |
5265 | + } |
5266 | + |
5267 | + p4d = p4d_offset(pgd, addr); |
5268 | + do { |
5269 | + next = p4d_addr_end(addr, end); |
5270 | + kasan_populate_p4d(p4d, addr, next, nid); |
5271 | + } while (p4d++, addr = next, addr != end); |
5272 | +} |
5273 | + |
5274 | +static void __init kasan_populate_shadow(unsigned long addr, unsigned long end, |
5275 | + int nid) |
5276 | +{ |
5277 | + pgd_t *pgd; |
5278 | + unsigned long next; |
5279 | + |
5280 | + addr = addr & PAGE_MASK; |
5281 | + end = round_up(end, PAGE_SIZE); |
5282 | + pgd = pgd_offset_k(addr); |
5283 | + do { |
5284 | + next = pgd_addr_end(addr, end); |
5285 | + kasan_populate_pgd(pgd, addr, next, nid); |
5286 | + } while (pgd++, addr = next, addr != end); |
5287 | +} |
5288 | + |
5289 | +static void __init map_range(struct range *range) |
5290 | { |
5291 | unsigned long start; |
5292 | unsigned long end; |
5293 | @@ -24,15 +155,17 @@ static int __init map_range(struct range *range) |
5294 | start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start)); |
5295 | end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end)); |
5296 | |
5297 | - return vmemmap_populate(start, end, NUMA_NO_NODE); |
5298 | + kasan_populate_shadow(start, end, early_pfn_to_nid(range->start)); |
5299 | } |
5300 | |
5301 | static void __init clear_pgds(unsigned long start, |
5302 | unsigned long end) |
5303 | { |
5304 | pgd_t *pgd; |
5305 | + /* See comment in kasan_init() */ |
5306 | + unsigned long pgd_end = end & PGDIR_MASK; |
5307 | |
5308 | - for (; start < end; start += PGDIR_SIZE) { |
5309 | + for (; start < pgd_end; start += PGDIR_SIZE) { |
5310 | pgd = pgd_offset_k(start); |
5311 | /* |
5312 | * With folded p4d, pgd_clear() is nop, use p4d_clear() |
5313 | @@ -43,29 +176,61 @@ static void __init clear_pgds(unsigned long start, |
5314 | else |
5315 | pgd_clear(pgd); |
5316 | } |
5317 | + |
5318 | + pgd = pgd_offset_k(start); |
5319 | + for (; start < end; start += P4D_SIZE) |
5320 | + p4d_clear(p4d_offset(pgd, start)); |
5321 | +} |
5322 | + |
5323 | +static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr) |
5324 | +{ |
5325 | + unsigned long p4d; |
5326 | + |
5327 | + if (!IS_ENABLED(CONFIG_X86_5LEVEL)) |
5328 | + return (p4d_t *)pgd; |
5329 | + |
5330 | + p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK; |
5331 | + p4d += __START_KERNEL_map - phys_base; |
5332 | + return (p4d_t *)p4d + p4d_index(addr); |
5333 | +} |
5334 | + |
5335 | +static void __init kasan_early_p4d_populate(pgd_t *pgd, |
5336 | + unsigned long addr, |
5337 | + unsigned long end) |
5338 | +{ |
5339 | + pgd_t pgd_entry; |
5340 | + p4d_t *p4d, p4d_entry; |
5341 | + unsigned long next; |
5342 | + |
5343 | + if (pgd_none(*pgd)) { |
5344 | + pgd_entry = __pgd(_KERNPG_TABLE | __pa_nodebug(kasan_zero_p4d)); |
5345 | + set_pgd(pgd, pgd_entry); |
5346 | + } |
5347 | + |
5348 | + p4d = early_p4d_offset(pgd, addr); |
5349 | + do { |
5350 | + next = p4d_addr_end(addr, end); |
5351 | + |
5352 | + if (!p4d_none(*p4d)) |
5353 | + continue; |
5354 | + |
5355 | + p4d_entry = __p4d(_KERNPG_TABLE | __pa_nodebug(kasan_zero_pud)); |
5356 | + set_p4d(p4d, p4d_entry); |
5357 | + } while (p4d++, addr = next, addr != end && p4d_none(*p4d)); |
5358 | } |
5359 | |
5360 | static void __init kasan_map_early_shadow(pgd_t *pgd) |
5361 | { |
5362 | - int i; |
5363 | - unsigned long start = KASAN_SHADOW_START; |
5364 | + /* See comment in kasan_init() */ |
5365 | + unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK; |
5366 | unsigned long end = KASAN_SHADOW_END; |
5367 | + unsigned long next; |
5368 | |
5369 | - for (i = pgd_index(start); start < end; i++) { |
5370 | - switch (CONFIG_PGTABLE_LEVELS) { |
5371 | - case 4: |
5372 | - pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) | |
5373 | - _KERNPG_TABLE); |
5374 | - break; |
5375 | - case 5: |
5376 | - pgd[i] = __pgd(__pa_nodebug(kasan_zero_p4d) | |
5377 | - _KERNPG_TABLE); |
5378 | - break; |
5379 | - default: |
5380 | - BUILD_BUG(); |
5381 | - } |
5382 | - start += PGDIR_SIZE; |
5383 | - } |
5384 | + pgd += pgd_index(addr); |
5385 | + do { |
5386 | + next = pgd_addr_end(addr, end); |
5387 | + kasan_early_p4d_populate(pgd, addr, next); |
5388 | + } while (pgd++, addr = next, addr != end); |
5389 | } |
5390 | |
5391 | #ifdef CONFIG_KASAN_INLINE |
5392 | @@ -102,7 +267,7 @@ void __init kasan_early_init(void) |
5393 | for (i = 0; i < PTRS_PER_PUD; i++) |
5394 | kasan_zero_pud[i] = __pud(pud_val); |
5395 | |
5396 | - for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++) |
5397 | + for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++) |
5398 | kasan_zero_p4d[i] = __p4d(p4d_val); |
5399 | |
5400 | kasan_map_early_shadow(early_top_pgt); |
5401 | @@ -112,37 +277,76 @@ void __init kasan_early_init(void) |
5402 | void __init kasan_init(void) |
5403 | { |
5404 | int i; |
5405 | + void *shadow_cpu_entry_begin, *shadow_cpu_entry_end; |
5406 | |
5407 | #ifdef CONFIG_KASAN_INLINE |
5408 | register_die_notifier(&kasan_die_notifier); |
5409 | #endif |
5410 | |
5411 | memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); |
5412 | + |
5413 | + /* |
5414 | + * We use the same shadow offset for 4- and 5-level paging to |
5415 | + * facilitate boot-time switching between paging modes. |
5416 | + * As result in 5-level paging mode KASAN_SHADOW_START and |
5417 | + * KASAN_SHADOW_END are not aligned to PGD boundary. |
5418 | + * |
5419 | + * KASAN_SHADOW_START doesn't share PGD with anything else. |
5420 | + * We claim whole PGD entry to make things easier. |
5421 | + * |
5422 | + * KASAN_SHADOW_END lands in the last PGD entry and it collides with |
5423 | + * bunch of things like kernel code, modules, EFI mapping, etc. |
5424 | + * We need to take extra steps to not overwrite them. |
5425 | + */ |
5426 | + if (IS_ENABLED(CONFIG_X86_5LEVEL)) { |
5427 | + void *ptr; |
5428 | + |
5429 | + ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END)); |
5430 | + memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table)); |
5431 | + set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)], |
5432 | + __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE)); |
5433 | + } |
5434 | + |
5435 | load_cr3(early_top_pgt); |
5436 | __flush_tlb_all(); |
5437 | |
5438 | - clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); |
5439 | + clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END); |
5440 | |
5441 | - kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, |
5442 | + kasan_populate_zero_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK), |
5443 | kasan_mem_to_shadow((void *)PAGE_OFFSET)); |
5444 | |
5445 | for (i = 0; i < E820_MAX_ENTRIES; i++) { |
5446 | if (pfn_mapped[i].end == 0) |
5447 | break; |
5448 | |
5449 | - if (map_range(&pfn_mapped[i])) |
5450 | - panic("kasan: unable to allocate shadow!"); |
5451 | + map_range(&pfn_mapped[i]); |
5452 | } |
5453 | + |
5454 | kasan_populate_zero_shadow( |
5455 | kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), |
5456 | kasan_mem_to_shadow((void *)__START_KERNEL_map)); |
5457 | |
5458 | - vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext), |
5459 | - (unsigned long)kasan_mem_to_shadow(_end), |
5460 | - NUMA_NO_NODE); |
5461 | + kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), |
5462 | + (unsigned long)kasan_mem_to_shadow(_end), |
5463 | + early_pfn_to_nid(__pa(_stext))); |
5464 | + |
5465 | + shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM); |
5466 | + shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin); |
5467 | + shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin, |
5468 | + PAGE_SIZE); |
5469 | + |
5470 | + shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE); |
5471 | + shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end); |
5472 | + shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end, |
5473 | + PAGE_SIZE); |
5474 | |
5475 | kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), |
5476 | - (void *)KASAN_SHADOW_END); |
5477 | + shadow_cpu_entry_begin); |
5478 | + |
5479 | + kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, |
5480 | + (unsigned long)shadow_cpu_entry_end, 0); |
5481 | + |
5482 | + kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END); |
5483 | |
5484 | load_cr3(init_top_pgt); |
5485 | __flush_tlb_all(); |
5486 | diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c |
5487 | index 84fcfde53f8f..04d5157fe7f8 100644 |
5488 | --- a/arch/x86/power/cpu.c |
5489 | +++ b/arch/x86/power/cpu.c |
5490 | @@ -160,17 +160,19 @@ static void do_fpu_end(void) |
5491 | static void fix_processor_context(void) |
5492 | { |
5493 | int cpu = smp_processor_id(); |
5494 | - struct tss_struct *t = &per_cpu(cpu_tss, cpu); |
5495 | #ifdef CONFIG_X86_64 |
5496 | struct desc_struct *desc = get_cpu_gdt_rw(cpu); |
5497 | tss_desc tss; |
5498 | #endif |
5499 | - set_tss_desc(cpu, t); /* |
5500 | - * This just modifies memory; should not be |
5501 | - * necessary. But... This is necessary, because |
5502 | - * 386 hardware has concept of busy TSS or some |
5503 | - * similar stupidity. |
5504 | - */ |
5505 | + |
5506 | + /* |
5507 | + * We need to reload TR, which requires that we change the |
5508 | + * GDT entry to indicate "available" first. |
5509 | + * |
5510 | + * XXX: This could probably all be replaced by a call to |
5511 | + * force_reload_TR(). |
5512 | + */ |
5513 | + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); |
5514 | |
5515 | #ifdef CONFIG_X86_64 |
5516 | memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc)); |
5517 | diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c |
5518 | index de503c225ae1..754d5391d9fa 100644 |
5519 | --- a/arch/x86/xen/enlighten_hvm.c |
5520 | +++ b/arch/x86/xen/enlighten_hvm.c |
5521 | @@ -226,12 +226,12 @@ static uint32_t __init xen_platform_hvm(void) |
5522 | return xen_cpuid_base(); |
5523 | } |
5524 | |
5525 | -const struct hypervisor_x86 x86_hyper_xen_hvm = { |
5526 | +const __initconst struct hypervisor_x86 x86_hyper_xen_hvm = { |
5527 | .name = "Xen HVM", |
5528 | .detect = xen_platform_hvm, |
5529 | - .init_platform = xen_hvm_guest_init, |
5530 | - .pin_vcpu = xen_pin_vcpu, |
5531 | - .x2apic_available = xen_x2apic_para_available, |
5532 | - .init_mem_mapping = xen_hvm_init_mem_mapping, |
5533 | + .type = X86_HYPER_XEN_HVM, |
5534 | + .init.init_platform = xen_hvm_guest_init, |
5535 | + .init.x2apic_available = xen_x2apic_para_available, |
5536 | + .init.init_mem_mapping = xen_hvm_init_mem_mapping, |
5537 | + .runtime.pin_vcpu = xen_pin_vcpu, |
5538 | }; |
5539 | -EXPORT_SYMBOL(x86_hyper_xen_hvm); |
5540 | diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c |
5541 | index d4396e27b1fb..ae3a071e1d0f 100644 |
5542 | --- a/arch/x86/xen/enlighten_pv.c |
5543 | +++ b/arch/x86/xen/enlighten_pv.c |
5544 | @@ -601,7 +601,7 @@ static struct trap_array_entry trap_array[] = { |
5545 | #ifdef CONFIG_X86_MCE |
5546 | { machine_check, xen_machine_check, true }, |
5547 | #endif |
5548 | - { nmi, xen_nmi, true }, |
5549 | + { nmi, xen_xennmi, true }, |
5550 | { overflow, xen_overflow, false }, |
5551 | #ifdef CONFIG_IA32_EMULATION |
5552 | { entry_INT80_compat, xen_entry_INT80_compat, false }, |
5553 | @@ -811,15 +811,14 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, |
5554 | } |
5555 | } |
5556 | |
5557 | -static void xen_load_sp0(struct tss_struct *tss, |
5558 | - struct thread_struct *thread) |
5559 | +static void xen_load_sp0(unsigned long sp0) |
5560 | { |
5561 | struct multicall_space mcs; |
5562 | |
5563 | mcs = xen_mc_entry(0); |
5564 | - MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); |
5565 | + MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); |
5566 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
5567 | - tss->x86_tss.sp0 = thread->sp0; |
5568 | + this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); |
5569 | } |
5570 | |
5571 | void xen_set_iopl_mask(unsigned mask) |
5572 | @@ -1460,9 +1459,9 @@ static uint32_t __init xen_platform_pv(void) |
5573 | return 0; |
5574 | } |
5575 | |
5576 | -const struct hypervisor_x86 x86_hyper_xen_pv = { |
5577 | +const __initconst struct hypervisor_x86 x86_hyper_xen_pv = { |
5578 | .name = "Xen PV", |
5579 | .detect = xen_platform_pv, |
5580 | - .pin_vcpu = xen_pin_vcpu, |
5581 | + .type = X86_HYPER_XEN_PV, |
5582 | + .runtime.pin_vcpu = xen_pin_vcpu, |
5583 | }; |
5584 | -EXPORT_SYMBOL(x86_hyper_xen_pv); |
5585 | diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c |
5586 | index 71495f1a86d7..c2454237fa67 100644 |
5587 | --- a/arch/x86/xen/mmu_pv.c |
5588 | +++ b/arch/x86/xen/mmu_pv.c |
5589 | @@ -449,7 +449,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd) |
5590 | } |
5591 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); |
5592 | |
5593 | -#if CONFIG_PGTABLE_LEVELS == 4 |
5594 | +#ifdef CONFIG_X86_64 |
5595 | __visible pudval_t xen_pud_val(pud_t pud) |
5596 | { |
5597 | return pte_mfn_to_pfn(pud.pud); |
5598 | @@ -538,7 +538,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val) |
5599 | |
5600 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
5601 | } |
5602 | -#endif /* CONFIG_PGTABLE_LEVELS == 4 */ |
5603 | +#endif /* CONFIG_X86_64 */ |
5604 | |
5605 | static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, |
5606 | int (*func)(struct mm_struct *mm, struct page *, enum pt_level), |
5607 | @@ -580,21 +580,17 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d, |
5608 | int (*func)(struct mm_struct *mm, struct page *, enum pt_level), |
5609 | bool last, unsigned long limit) |
5610 | { |
5611 | - int i, nr, flush = 0; |
5612 | + int flush = 0; |
5613 | + pud_t *pud; |
5614 | |
5615 | - nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D; |
5616 | - for (i = 0; i < nr; i++) { |
5617 | - pud_t *pud; |
5618 | |
5619 | - if (p4d_none(p4d[i])) |
5620 | - continue; |
5621 | + if (p4d_none(*p4d)) |
5622 | + return flush; |
5623 | |
5624 | - pud = pud_offset(&p4d[i], 0); |
5625 | - if (PTRS_PER_PUD > 1) |
5626 | - flush |= (*func)(mm, virt_to_page(pud), PT_PUD); |
5627 | - flush |= xen_pud_walk(mm, pud, func, |
5628 | - last && i == nr - 1, limit); |
5629 | - } |
5630 | + pud = pud_offset(p4d, 0); |
5631 | + if (PTRS_PER_PUD > 1) |
5632 | + flush |= (*func)(mm, virt_to_page(pud), PT_PUD); |
5633 | + flush |= xen_pud_walk(mm, pud, func, last, limit); |
5634 | return flush; |
5635 | } |
5636 | |
5637 | @@ -644,8 +640,6 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, |
5638 | continue; |
5639 | |
5640 | p4d = p4d_offset(&pgd[i], 0); |
5641 | - if (PTRS_PER_P4D > 1) |
5642 | - flush |= (*func)(mm, virt_to_page(p4d), PT_P4D); |
5643 | flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit); |
5644 | } |
5645 | |
5646 | @@ -1176,22 +1170,14 @@ static void __init xen_cleanmfnmap(unsigned long vaddr) |
5647 | { |
5648 | pgd_t *pgd; |
5649 | p4d_t *p4d; |
5650 | - unsigned int i; |
5651 | bool unpin; |
5652 | |
5653 | unpin = (vaddr == 2 * PGDIR_SIZE); |
5654 | vaddr &= PMD_MASK; |
5655 | pgd = pgd_offset_k(vaddr); |
5656 | p4d = p4d_offset(pgd, 0); |
5657 | - for (i = 0; i < PTRS_PER_P4D; i++) { |
5658 | - if (p4d_none(p4d[i])) |
5659 | - continue; |
5660 | - xen_cleanmfnmap_p4d(p4d + i, unpin); |
5661 | - } |
5662 | - if (IS_ENABLED(CONFIG_X86_5LEVEL)) { |
5663 | - set_pgd(pgd, __pgd(0)); |
5664 | - xen_cleanmfnmap_free_pgtbl(p4d, unpin); |
5665 | - } |
5666 | + if (!p4d_none(*p4d)) |
5667 | + xen_cleanmfnmap_p4d(p4d, unpin); |
5668 | } |
5669 | |
5670 | static void __init xen_pagetable_p2m_free(void) |
5671 | @@ -1692,7 +1678,7 @@ static void xen_release_pmd(unsigned long pfn) |
5672 | xen_release_ptpage(pfn, PT_PMD); |
5673 | } |
5674 | |
5675 | -#if CONFIG_PGTABLE_LEVELS >= 4 |
5676 | +#ifdef CONFIG_X86_64 |
5677 | static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) |
5678 | { |
5679 | xen_alloc_ptpage(mm, pfn, PT_PUD); |
5680 | @@ -2029,13 +2015,12 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr) |
5681 | */ |
5682 | void __init xen_relocate_p2m(void) |
5683 | { |
5684 | - phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys; |
5685 | + phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys; |
5686 | unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end; |
5687 | - int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d; |
5688 | + int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud; |
5689 | pte_t *pt; |
5690 | pmd_t *pmd; |
5691 | pud_t *pud; |
5692 | - p4d_t *p4d = NULL; |
5693 | pgd_t *pgd; |
5694 | unsigned long *new_p2m; |
5695 | int save_pud; |
5696 | @@ -2045,11 +2030,7 @@ void __init xen_relocate_p2m(void) |
5697 | n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT; |
5698 | n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT; |
5699 | n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT; |
5700 | - if (PTRS_PER_P4D > 1) |
5701 | - n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT; |
5702 | - else |
5703 | - n_p4d = 0; |
5704 | - n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d; |
5705 | + n_frames = n_pte + n_pt + n_pmd + n_pud; |
5706 | |
5707 | new_area = xen_find_free_area(PFN_PHYS(n_frames)); |
5708 | if (!new_area) { |
5709 | @@ -2065,76 +2046,56 @@ void __init xen_relocate_p2m(void) |
5710 | * To avoid any possible virtual address collision, just use |
5711 | * 2 * PUD_SIZE for the new area. |
5712 | */ |
5713 | - p4d_phys = new_area; |
5714 | - pud_phys = p4d_phys + PFN_PHYS(n_p4d); |
5715 | + pud_phys = new_area; |
5716 | pmd_phys = pud_phys + PFN_PHYS(n_pud); |
5717 | pt_phys = pmd_phys + PFN_PHYS(n_pmd); |
5718 | p2m_pfn = PFN_DOWN(pt_phys) + n_pt; |
5719 | |
5720 | pgd = __va(read_cr3_pa()); |
5721 | new_p2m = (unsigned long *)(2 * PGDIR_SIZE); |
5722 | - idx_p4d = 0; |
5723 | save_pud = n_pud; |
5724 | - do { |
5725 | - if (n_p4d > 0) { |
5726 | - p4d = early_memremap(p4d_phys, PAGE_SIZE); |
5727 | - clear_page(p4d); |
5728 | - n_pud = min(save_pud, PTRS_PER_P4D); |
5729 | - } |
5730 | - for (idx_pud = 0; idx_pud < n_pud; idx_pud++) { |
5731 | - pud = early_memremap(pud_phys, PAGE_SIZE); |
5732 | - clear_page(pud); |
5733 | - for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD); |
5734 | - idx_pmd++) { |
5735 | - pmd = early_memremap(pmd_phys, PAGE_SIZE); |
5736 | - clear_page(pmd); |
5737 | - for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD); |
5738 | - idx_pt++) { |
5739 | - pt = early_memremap(pt_phys, PAGE_SIZE); |
5740 | - clear_page(pt); |
5741 | - for (idx_pte = 0; |
5742 | - idx_pte < min(n_pte, PTRS_PER_PTE); |
5743 | - idx_pte++) { |
5744 | - set_pte(pt + idx_pte, |
5745 | - pfn_pte(p2m_pfn, PAGE_KERNEL)); |
5746 | - p2m_pfn++; |
5747 | - } |
5748 | - n_pte -= PTRS_PER_PTE; |
5749 | - early_memunmap(pt, PAGE_SIZE); |
5750 | - make_lowmem_page_readonly(__va(pt_phys)); |
5751 | - pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, |
5752 | - PFN_DOWN(pt_phys)); |
5753 | - set_pmd(pmd + idx_pt, |
5754 | - __pmd(_PAGE_TABLE | pt_phys)); |
5755 | - pt_phys += PAGE_SIZE; |
5756 | + for (idx_pud = 0; idx_pud < n_pud; idx_pud++) { |
5757 | + pud = early_memremap(pud_phys, PAGE_SIZE); |
5758 | + clear_page(pud); |
5759 | + for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD); |
5760 | + idx_pmd++) { |
5761 | + pmd = early_memremap(pmd_phys, PAGE_SIZE); |
5762 | + clear_page(pmd); |
5763 | + for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD); |
5764 | + idx_pt++) { |
5765 | + pt = early_memremap(pt_phys, PAGE_SIZE); |
5766 | + clear_page(pt); |
5767 | + for (idx_pte = 0; |
5768 | + idx_pte < min(n_pte, PTRS_PER_PTE); |
5769 | + idx_pte++) { |
5770 | + set_pte(pt + idx_pte, |
5771 | + pfn_pte(p2m_pfn, PAGE_KERNEL)); |
5772 | + p2m_pfn++; |
5773 | } |
5774 | - n_pt -= PTRS_PER_PMD; |
5775 | - early_memunmap(pmd, PAGE_SIZE); |
5776 | - make_lowmem_page_readonly(__va(pmd_phys)); |
5777 | - pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, |
5778 | - PFN_DOWN(pmd_phys)); |
5779 | - set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys)); |
5780 | - pmd_phys += PAGE_SIZE; |
5781 | + n_pte -= PTRS_PER_PTE; |
5782 | + early_memunmap(pt, PAGE_SIZE); |
5783 | + make_lowmem_page_readonly(__va(pt_phys)); |
5784 | + pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, |
5785 | + PFN_DOWN(pt_phys)); |
5786 | + set_pmd(pmd + idx_pt, |
5787 | + __pmd(_PAGE_TABLE | pt_phys)); |
5788 | + pt_phys += PAGE_SIZE; |
5789 | } |
5790 | - n_pmd -= PTRS_PER_PUD; |
5791 | - early_memunmap(pud, PAGE_SIZE); |
5792 | - make_lowmem_page_readonly(__va(pud_phys)); |
5793 | - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys)); |
5794 | - if (n_p4d > 0) |
5795 | - set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys)); |
5796 | - else |
5797 | - set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys)); |
5798 | - pud_phys += PAGE_SIZE; |
5799 | - } |
5800 | - if (n_p4d > 0) { |
5801 | - save_pud -= PTRS_PER_P4D; |
5802 | - early_memunmap(p4d, PAGE_SIZE); |
5803 | - make_lowmem_page_readonly(__va(p4d_phys)); |
5804 | - pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys)); |
5805 | - set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys)); |
5806 | - p4d_phys += PAGE_SIZE; |
5807 | + n_pt -= PTRS_PER_PMD; |
5808 | + early_memunmap(pmd, PAGE_SIZE); |
5809 | + make_lowmem_page_readonly(__va(pmd_phys)); |
5810 | + pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, |
5811 | + PFN_DOWN(pmd_phys)); |
5812 | + set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys)); |
5813 | + pmd_phys += PAGE_SIZE; |
5814 | } |
5815 | - } while (++idx_p4d < n_p4d); |
5816 | + n_pmd -= PTRS_PER_PUD; |
5817 | + early_memunmap(pud, PAGE_SIZE); |
5818 | + make_lowmem_page_readonly(__va(pud_phys)); |
5819 | + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys)); |
5820 | + set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys)); |
5821 | + pud_phys += PAGE_SIZE; |
5822 | + } |
5823 | |
5824 | /* Now copy the old p2m info to the new area. */ |
5825 | memcpy(new_p2m, xen_p2m_addr, size); |
5826 | @@ -2311,7 +2272,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) |
5827 | #endif |
5828 | case FIX_TEXT_POKE0: |
5829 | case FIX_TEXT_POKE1: |
5830 | - case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END: |
5831 | + case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM: |
5832 | /* All local page mappings */ |
5833 | pte = pfn_pte(phys, prot); |
5834 | break; |
5835 | @@ -2361,7 +2322,7 @@ static void __init xen_post_allocator_init(void) |
5836 | pv_mmu_ops.set_pte = xen_set_pte; |
5837 | pv_mmu_ops.set_pmd = xen_set_pmd; |
5838 | pv_mmu_ops.set_pud = xen_set_pud; |
5839 | -#if CONFIG_PGTABLE_LEVELS >= 4 |
5840 | +#ifdef CONFIG_X86_64 |
5841 | pv_mmu_ops.set_p4d = xen_set_p4d; |
5842 | #endif |
5843 | |
5844 | @@ -2371,7 +2332,7 @@ static void __init xen_post_allocator_init(void) |
5845 | pv_mmu_ops.alloc_pmd = xen_alloc_pmd; |
5846 | pv_mmu_ops.release_pte = xen_release_pte; |
5847 | pv_mmu_ops.release_pmd = xen_release_pmd; |
5848 | -#if CONFIG_PGTABLE_LEVELS >= 4 |
5849 | +#ifdef CONFIG_X86_64 |
5850 | pv_mmu_ops.alloc_pud = xen_alloc_pud; |
5851 | pv_mmu_ops.release_pud = xen_release_pud; |
5852 | #endif |
5853 | @@ -2435,14 +2396,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { |
5854 | .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), |
5855 | .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), |
5856 | |
5857 | -#if CONFIG_PGTABLE_LEVELS >= 4 |
5858 | +#ifdef CONFIG_X86_64 |
5859 | .pud_val = PV_CALLEE_SAVE(xen_pud_val), |
5860 | .make_pud = PV_CALLEE_SAVE(xen_make_pud), |
5861 | .set_p4d = xen_set_p4d_hyper, |
5862 | |
5863 | .alloc_pud = xen_alloc_pmd_init, |
5864 | .release_pud = xen_release_pmd_init, |
5865 | -#endif /* CONFIG_PGTABLE_LEVELS == 4 */ |
5866 | +#endif /* CONFIG_X86_64 */ |
5867 | |
5868 | .activate_mm = xen_activate_mm, |
5869 | .dup_mmap = xen_dup_mmap, |
5870 | diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c |
5871 | index 05f91ce9b55e..c0c756c76afe 100644 |
5872 | --- a/arch/x86/xen/smp_pv.c |
5873 | +++ b/arch/x86/xen/smp_pv.c |
5874 | @@ -14,6 +14,7 @@ |
5875 | * single-threaded. |
5876 | */ |
5877 | #include <linux/sched.h> |
5878 | +#include <linux/sched/task_stack.h> |
5879 | #include <linux/err.h> |
5880 | #include <linux/slab.h> |
5881 | #include <linux/smp.h> |
5882 | @@ -294,12 +295,19 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) |
5883 | #endif |
5884 | memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); |
5885 | |
5886 | + /* |
5887 | + * Bring up the CPU in cpu_bringup_and_idle() with the stack |
5888 | + * pointing just below where pt_regs would be if it were a normal |
5889 | + * kernel entry. |
5890 | + */ |
5891 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; |
5892 | ctxt->flags = VGCF_IN_KERNEL; |
5893 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ |
5894 | ctxt->user_regs.ds = __USER_DS; |
5895 | ctxt->user_regs.es = __USER_DS; |
5896 | ctxt->user_regs.ss = __KERNEL_DS; |
5897 | + ctxt->user_regs.cs = __KERNEL_CS; |
5898 | + ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle); |
5899 | |
5900 | xen_copy_trap_info(ctxt->trap_ctxt); |
5901 | |
5902 | @@ -314,8 +322,13 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) |
5903 | ctxt->gdt_frames[0] = gdt_mfn; |
5904 | ctxt->gdt_ents = GDT_ENTRIES; |
5905 | |
5906 | + /* |
5907 | + * Set SS:SP that Xen will use when entering guest kernel mode |
5908 | + * from guest user mode. Subsequent calls to load_sp0() can |
5909 | + * change this value. |
5910 | + */ |
5911 | ctxt->kernel_ss = __KERNEL_DS; |
5912 | - ctxt->kernel_sp = idle->thread.sp0; |
5913 | + ctxt->kernel_sp = task_top_of_stack(idle); |
5914 | |
5915 | #ifdef CONFIG_X86_32 |
5916 | ctxt->event_callback_cs = __KERNEL_CS; |
5917 | @@ -327,10 +340,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) |
5918 | (unsigned long)xen_hypervisor_callback; |
5919 | ctxt->failsafe_callback_eip = |
5920 | (unsigned long)xen_failsafe_callback; |
5921 | - ctxt->user_regs.cs = __KERNEL_CS; |
5922 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); |
5923 | |
5924 | - ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); |
5925 | ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir)); |
5926 | if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt)) |
5927 | BUG(); |
5928 | diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S |
5929 | index c98a48c861fd..8a10c9a9e2b5 100644 |
5930 | --- a/arch/x86/xen/xen-asm_64.S |
5931 | +++ b/arch/x86/xen/xen-asm_64.S |
5932 | @@ -30,7 +30,7 @@ xen_pv_trap debug |
5933 | xen_pv_trap xendebug |
5934 | xen_pv_trap int3 |
5935 | xen_pv_trap xenint3 |
5936 | -xen_pv_trap nmi |
5937 | +xen_pv_trap xennmi |
5938 | xen_pv_trap overflow |
5939 | xen_pv_trap bounds |
5940 | xen_pv_trap invalid_op |
5941 | diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S |
5942 | index b5b8d7f43557..497cc55a0c16 100644 |
5943 | --- a/arch/x86/xen/xen-head.S |
5944 | +++ b/arch/x86/xen/xen-head.S |
5945 | @@ -10,6 +10,7 @@ |
5946 | #include <asm/boot.h> |
5947 | #include <asm/asm.h> |
5948 | #include <asm/page_types.h> |
5949 | +#include <asm/unwind_hints.h> |
5950 | |
5951 | #include <xen/interface/elfnote.h> |
5952 | #include <xen/interface/features.h> |
5953 | @@ -20,6 +21,7 @@ |
5954 | #ifdef CONFIG_XEN_PV |
5955 | __INIT |
5956 | ENTRY(startup_xen) |
5957 | + UNWIND_HINT_EMPTY |
5958 | cld |
5959 | |
5960 | /* Clear .bss */ |
5961 | @@ -34,21 +36,24 @@ ENTRY(startup_xen) |
5962 | mov $init_thread_union+THREAD_SIZE, %_ASM_SP |
5963 | |
5964 | jmp xen_start_kernel |
5965 | - |
5966 | +END(startup_xen) |
5967 | __FINIT |
5968 | #endif |
5969 | |
5970 | .pushsection .text |
5971 | .balign PAGE_SIZE |
5972 | ENTRY(hypercall_page) |
5973 | - .skip PAGE_SIZE |
5974 | + .rept (PAGE_SIZE / 32) |
5975 | + UNWIND_HINT_EMPTY |
5976 | + .skip 32 |
5977 | + .endr |
5978 | |
5979 | #define HYPERCALL(n) \ |
5980 | .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \ |
5981 | .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32 |
5982 | #include <asm/xen-hypercalls.h> |
5983 | #undef HYPERCALL |
5984 | - |
5985 | +END(hypercall_page) |
5986 | .popsection |
5987 | |
5988 | ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") |
5989 | diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c |
5990 | index a4783da90ba8..0f860cf0d56d 100644 |
5991 | --- a/block/bfq-iosched.c |
5992 | +++ b/block/bfq-iosched.c |
5993 | @@ -108,6 +108,7 @@ |
5994 | #include "blk-mq-tag.h" |
5995 | #include "blk-mq-sched.h" |
5996 | #include "bfq-iosched.h" |
5997 | +#include "blk-wbt.h" |
5998 | |
5999 | #define BFQ_BFQQ_FNS(name) \ |
6000 | void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \ |
6001 | @@ -4775,7 +4776,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) |
6002 | bfq_init_root_group(bfqd->root_group, bfqd); |
6003 | bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group); |
6004 | |
6005 | - |
6006 | + wbt_disable_default(q); |
6007 | return 0; |
6008 | |
6009 | out_free: |
6010 | diff --git a/block/blk-wbt.c b/block/blk-wbt.c |
6011 | index 6a9a0f03a67b..e59d59c11ebb 100644 |
6012 | --- a/block/blk-wbt.c |
6013 | +++ b/block/blk-wbt.c |
6014 | @@ -654,7 +654,7 @@ void wbt_set_write_cache(struct rq_wb *rwb, bool write_cache_on) |
6015 | } |
6016 | |
6017 | /* |
6018 | - * Disable wbt, if enabled by default. Only called from CFQ. |
6019 | + * Disable wbt, if enabled by default. |
6020 | */ |
6021 | void wbt_disable_default(struct request_queue *q) |
6022 | { |
6023 | diff --git a/crypto/lrw.c b/crypto/lrw.c |
6024 | index a8bfae4451bf..eb681e9fe574 100644 |
6025 | --- a/crypto/lrw.c |
6026 | +++ b/crypto/lrw.c |
6027 | @@ -610,8 +610,10 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb) |
6028 | ecb_name[len - 1] = 0; |
6029 | |
6030 | if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, |
6031 | - "lrw(%s)", ecb_name) >= CRYPTO_MAX_ALG_NAME) |
6032 | - return -ENAMETOOLONG; |
6033 | + "lrw(%s)", ecb_name) >= CRYPTO_MAX_ALG_NAME) { |
6034 | + err = -ENAMETOOLONG; |
6035 | + goto err_drop_spawn; |
6036 | + } |
6037 | } |
6038 | |
6039 | inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; |
6040 | diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c |
6041 | index 3c3a37b8503b..572b6c7303ed 100644 |
6042 | --- a/drivers/acpi/apei/ghes.c |
6043 | +++ b/drivers/acpi/apei/ghes.c |
6044 | @@ -51,6 +51,7 @@ |
6045 | #include <acpi/actbl1.h> |
6046 | #include <acpi/ghes.h> |
6047 | #include <acpi/apei.h> |
6048 | +#include <asm/fixmap.h> |
6049 | #include <asm/tlbflush.h> |
6050 | #include <ras/ras_event.h> |
6051 | |
6052 | @@ -112,7 +113,7 @@ static DEFINE_MUTEX(ghes_list_mutex); |
6053 | * Because the memory area used to transfer hardware error information |
6054 | * from BIOS to Linux can be determined only in NMI, IRQ or timer |
6055 | * handler, but general ioremap can not be used in atomic context, so |
6056 | - * a special version of atomic ioremap is implemented for that. |
6057 | + * the fixmap is used instead. |
6058 | */ |
6059 | |
6060 | /* |
6061 | @@ -126,8 +127,8 @@ static DEFINE_MUTEX(ghes_list_mutex); |
6062 | /* virtual memory area for atomic ioremap */ |
6063 | static struct vm_struct *ghes_ioremap_area; |
6064 | /* |
6065 | - * These 2 spinlock is used to prevent atomic ioremap virtual memory |
6066 | - * area from being mapped simultaneously. |
6067 | + * These 2 spinlocks are used to prevent the fixmap entries from being used |
6068 | + * simultaneously. |
6069 | */ |
6070 | static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); |
6071 | static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); |
6072 | @@ -159,52 +160,36 @@ static void ghes_ioremap_exit(void) |
6073 | |
6074 | static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn) |
6075 | { |
6076 | - unsigned long vaddr; |
6077 | phys_addr_t paddr; |
6078 | pgprot_t prot; |
6079 | |
6080 | - vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr); |
6081 | - |
6082 | paddr = pfn << PAGE_SHIFT; |
6083 | prot = arch_apei_get_mem_attribute(paddr); |
6084 | - ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot); |
6085 | + __set_fixmap(FIX_APEI_GHES_NMI, paddr, prot); |
6086 | |
6087 | - return (void __iomem *)vaddr; |
6088 | + return (void __iomem *) fix_to_virt(FIX_APEI_GHES_NMI); |
6089 | } |
6090 | |
6091 | static void __iomem *ghes_ioremap_pfn_irq(u64 pfn) |
6092 | { |
6093 | - unsigned long vaddr, paddr; |
6094 | + phys_addr_t paddr; |
6095 | pgprot_t prot; |
6096 | |
6097 | - vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr); |
6098 | - |
6099 | paddr = pfn << PAGE_SHIFT; |
6100 | prot = arch_apei_get_mem_attribute(paddr); |
6101 | + __set_fixmap(FIX_APEI_GHES_IRQ, paddr, prot); |
6102 | |
6103 | - ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot); |
6104 | - |
6105 | - return (void __iomem *)vaddr; |
6106 | + return (void __iomem *) fix_to_virt(FIX_APEI_GHES_IRQ); |
6107 | } |
6108 | |
6109 | -static void ghes_iounmap_nmi(void __iomem *vaddr_ptr) |
6110 | +static void ghes_iounmap_nmi(void) |
6111 | { |
6112 | - unsigned long vaddr = (unsigned long __force)vaddr_ptr; |
6113 | - void *base = ghes_ioremap_area->addr; |
6114 | - |
6115 | - BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base)); |
6116 | - unmap_kernel_range_noflush(vaddr, PAGE_SIZE); |
6117 | - arch_apei_flush_tlb_one(vaddr); |
6118 | + clear_fixmap(FIX_APEI_GHES_NMI); |
6119 | } |
6120 | |
6121 | -static void ghes_iounmap_irq(void __iomem *vaddr_ptr) |
6122 | +static void ghes_iounmap_irq(void) |
6123 | { |
6124 | - unsigned long vaddr = (unsigned long __force)vaddr_ptr; |
6125 | - void *base = ghes_ioremap_area->addr; |
6126 | - |
6127 | - BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base)); |
6128 | - unmap_kernel_range_noflush(vaddr, PAGE_SIZE); |
6129 | - arch_apei_flush_tlb_one(vaddr); |
6130 | + clear_fixmap(FIX_APEI_GHES_IRQ); |
6131 | } |
6132 | |
6133 | static int ghes_estatus_pool_init(void) |
6134 | @@ -360,10 +345,10 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, |
6135 | paddr += trunk; |
6136 | buffer += trunk; |
6137 | if (in_nmi) { |
6138 | - ghes_iounmap_nmi(vaddr); |
6139 | + ghes_iounmap_nmi(); |
6140 | raw_spin_unlock(&ghes_ioremap_lock_nmi); |
6141 | } else { |
6142 | - ghes_iounmap_irq(vaddr); |
6143 | + ghes_iounmap_irq(); |
6144 | spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags); |
6145 | } |
6146 | } |
6147 | @@ -851,17 +836,8 @@ static void ghes_sea_remove(struct ghes *ghes) |
6148 | synchronize_rcu(); |
6149 | } |
6150 | #else /* CONFIG_ACPI_APEI_SEA */ |
6151 | -static inline void ghes_sea_add(struct ghes *ghes) |
6152 | -{ |
6153 | - pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not supported\n", |
6154 | - ghes->generic->header.source_id); |
6155 | -} |
6156 | - |
6157 | -static inline void ghes_sea_remove(struct ghes *ghes) |
6158 | -{ |
6159 | - pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not supported\n", |
6160 | - ghes->generic->header.source_id); |
6161 | -} |
6162 | +static inline void ghes_sea_add(struct ghes *ghes) { } |
6163 | +static inline void ghes_sea_remove(struct ghes *ghes) { } |
6164 | #endif /* CONFIG_ACPI_APEI_SEA */ |
6165 | |
6166 | #ifdef CONFIG_HAVE_ACPI_APEI_NMI |
6167 | @@ -1063,23 +1039,9 @@ static void ghes_nmi_init_cxt(void) |
6168 | init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); |
6169 | } |
6170 | #else /* CONFIG_HAVE_ACPI_APEI_NMI */ |
6171 | -static inline void ghes_nmi_add(struct ghes *ghes) |
6172 | -{ |
6173 | - pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n", |
6174 | - ghes->generic->header.source_id); |
6175 | - BUG(); |
6176 | -} |
6177 | - |
6178 | -static inline void ghes_nmi_remove(struct ghes *ghes) |
6179 | -{ |
6180 | - pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n", |
6181 | - ghes->generic->header.source_id); |
6182 | - BUG(); |
6183 | -} |
6184 | - |
6185 | -static inline void ghes_nmi_init_cxt(void) |
6186 | -{ |
6187 | -} |
6188 | +static inline void ghes_nmi_add(struct ghes *ghes) { } |
6189 | +static inline void ghes_nmi_remove(struct ghes *ghes) { } |
6190 | +static inline void ghes_nmi_init_cxt(void) { } |
6191 | #endif /* CONFIG_HAVE_ACPI_APEI_NMI */ |
6192 | |
6193 | static int ghes_probe(struct platform_device *ghes_dev) |
6194 | diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c |
6195 | index a6de32530693..0459b1204694 100644 |
6196 | --- a/drivers/base/power/opp/core.c |
6197 | +++ b/drivers/base/power/opp/core.c |
6198 | @@ -296,7 +296,7 @@ int dev_pm_opp_get_opp_count(struct device *dev) |
6199 | opp_table = _find_opp_table(dev); |
6200 | if (IS_ERR(opp_table)) { |
6201 | count = PTR_ERR(opp_table); |
6202 | - dev_err(dev, "%s: OPP table not found (%d)\n", |
6203 | + dev_dbg(dev, "%s: OPP table not found (%d)\n", |
6204 | __func__, count); |
6205 | return count; |
6206 | } |
6207 | diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c |
6208 | index e2540113d0da..73d2d88ddc03 100644 |
6209 | --- a/drivers/bluetooth/hci_bcm.c |
6210 | +++ b/drivers/bluetooth/hci_bcm.c |
6211 | @@ -68,7 +68,7 @@ struct bcm_device { |
6212 | u32 init_speed; |
6213 | u32 oper_speed; |
6214 | int irq; |
6215 | - u8 irq_polarity; |
6216 | + bool irq_active_low; |
6217 | |
6218 | #ifdef CONFIG_PM |
6219 | struct hci_uart *hu; |
6220 | @@ -213,7 +213,9 @@ static int bcm_request_irq(struct bcm_data *bcm) |
6221 | } |
6222 | |
6223 | err = devm_request_irq(&bdev->pdev->dev, bdev->irq, bcm_host_wake, |
6224 | - IRQF_TRIGGER_RISING, "host_wake", bdev); |
6225 | + bdev->irq_active_low ? IRQF_TRIGGER_FALLING : |
6226 | + IRQF_TRIGGER_RISING, |
6227 | + "host_wake", bdev); |
6228 | if (err) |
6229 | goto unlock; |
6230 | |
6231 | @@ -253,7 +255,7 @@ static int bcm_setup_sleep(struct hci_uart *hu) |
6232 | struct sk_buff *skb; |
6233 | struct bcm_set_sleep_mode sleep_params = default_sleep_params; |
6234 | |
6235 | - sleep_params.host_wake_active = !bcm->dev->irq_polarity; |
6236 | + sleep_params.host_wake_active = !bcm->dev->irq_active_low; |
6237 | |
6238 | skb = __hci_cmd_sync(hu->hdev, 0xfc27, sizeof(sleep_params), |
6239 | &sleep_params, HCI_INIT_TIMEOUT); |
6240 | @@ -690,10 +692,8 @@ static const struct acpi_gpio_mapping acpi_bcm_int_first_gpios[] = { |
6241 | }; |
6242 | |
6243 | #ifdef CONFIG_ACPI |
6244 | -static u8 acpi_active_low = ACPI_ACTIVE_LOW; |
6245 | - |
6246 | /* IRQ polarity of some chipsets are not defined correctly in ACPI table. */ |
6247 | -static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = { |
6248 | +static const struct dmi_system_id bcm_active_low_irq_dmi_table[] = { |
6249 | { |
6250 | .ident = "Asus T100TA", |
6251 | .matches = { |
6252 | @@ -701,7 +701,6 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = { |
6253 | "ASUSTeK COMPUTER INC."), |
6254 | DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TA"), |
6255 | }, |
6256 | - .driver_data = &acpi_active_low, |
6257 | }, |
6258 | { |
6259 | .ident = "Asus T100CHI", |
6260 | @@ -710,7 +709,6 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = { |
6261 | "ASUSTeK COMPUTER INC."), |
6262 | DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100CHI"), |
6263 | }, |
6264 | - .driver_data = &acpi_active_low, |
6265 | }, |
6266 | { /* Handle ThinkPad 8 tablets with BCM2E55 chipset ACPI ID */ |
6267 | .ident = "Lenovo ThinkPad 8", |
6268 | @@ -718,7 +716,6 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = { |
6269 | DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), |
6270 | DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "ThinkPad 8"), |
6271 | }, |
6272 | - .driver_data = &acpi_active_low, |
6273 | }, |
6274 | { } |
6275 | }; |
6276 | @@ -733,13 +730,13 @@ static int bcm_resource(struct acpi_resource *ares, void *data) |
6277 | switch (ares->type) { |
6278 | case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: |
6279 | irq = &ares->data.extended_irq; |
6280 | - dev->irq_polarity = irq->polarity; |
6281 | + dev->irq_active_low = irq->polarity == ACPI_ACTIVE_LOW; |
6282 | break; |
6283 | |
6284 | case ACPI_RESOURCE_TYPE_GPIO: |
6285 | gpio = &ares->data.gpio; |
6286 | if (gpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT) |
6287 | - dev->irq_polarity = gpio->polarity; |
6288 | + dev->irq_active_low = gpio->polarity == ACPI_ACTIVE_LOW; |
6289 | break; |
6290 | |
6291 | case ACPI_RESOURCE_TYPE_SERIAL_BUS: |
6292 | @@ -834,11 +831,11 @@ static int bcm_acpi_probe(struct bcm_device *dev) |
6293 | return ret; |
6294 | acpi_dev_free_resource_list(&resources); |
6295 | |
6296 | - dmi_id = dmi_first_match(bcm_wrong_irq_dmi_table); |
6297 | + dmi_id = dmi_first_match(bcm_active_low_irq_dmi_table); |
6298 | if (dmi_id) { |
6299 | bt_dev_warn(dev, "%s: Overwriting IRQ polarity to active low", |
6300 | dmi_id->ident); |
6301 | - dev->irq_polarity = *(u8 *)dmi_id->driver_data; |
6302 | + dev->irq_active_low = true; |
6303 | } |
6304 | |
6305 | return 0; |
6306 | diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c |
6307 | index 6e2403805784..6aef3bde10d7 100644 |
6308 | --- a/drivers/bluetooth/hci_ldisc.c |
6309 | +++ b/drivers/bluetooth/hci_ldisc.c |
6310 | @@ -41,6 +41,7 @@ |
6311 | #include <linux/ioctl.h> |
6312 | #include <linux/skbuff.h> |
6313 | #include <linux/firmware.h> |
6314 | +#include <linux/serdev.h> |
6315 | |
6316 | #include <net/bluetooth/bluetooth.h> |
6317 | #include <net/bluetooth/hci_core.h> |
6318 | @@ -298,6 +299,12 @@ void hci_uart_set_flow_control(struct hci_uart *hu, bool enable) |
6319 | unsigned int set = 0; |
6320 | unsigned int clear = 0; |
6321 | |
6322 | + if (hu->serdev) { |
6323 | + serdev_device_set_flow_control(hu->serdev, !enable); |
6324 | + serdev_device_set_rts(hu->serdev, !enable); |
6325 | + return; |
6326 | + } |
6327 | + |
6328 | if (enable) { |
6329 | /* Disable hardware flow control */ |
6330 | ktermios = tty->termios; |
6331 | diff --git a/drivers/clk/sunxi-ng/ccu-sun5i.c b/drivers/clk/sunxi-ng/ccu-sun5i.c |
6332 | index ab9e850b3707..2f385a57cd91 100644 |
6333 | --- a/drivers/clk/sunxi-ng/ccu-sun5i.c |
6334 | +++ b/drivers/clk/sunxi-ng/ccu-sun5i.c |
6335 | @@ -982,8 +982,8 @@ static void __init sun5i_ccu_init(struct device_node *node, |
6336 | |
6337 | /* Force the PLL-Audio-1x divider to 4 */ |
6338 | val = readl(reg + SUN5I_PLL_AUDIO_REG); |
6339 | - val &= ~GENMASK(19, 16); |
6340 | - writel(val | (3 << 16), reg + SUN5I_PLL_AUDIO_REG); |
6341 | + val &= ~GENMASK(29, 26); |
6342 | + writel(val | (3 << 26), reg + SUN5I_PLL_AUDIO_REG); |
6343 | |
6344 | /* |
6345 | * Use the peripheral PLL as the AHB parent, instead of CPU / |
6346 | diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c |
6347 | index 8af434815fba..241fb13f1c06 100644 |
6348 | --- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c |
6349 | +++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c |
6350 | @@ -608,7 +608,7 @@ static SUNXI_CCU_M_WITH_MUX_GATE(hdmi_clk, "hdmi", lcd_ch1_parents, |
6351 | 0x150, 0, 4, 24, 2, BIT(31), |
6352 | CLK_SET_RATE_PARENT); |
6353 | |
6354 | -static SUNXI_CCU_GATE(hdmi_ddc_clk, "hdmi-ddc", "osc24M", 0x150, BIT(30), 0); |
6355 | +static SUNXI_CCU_GATE(hdmi_ddc_clk, "ddc", "osc24M", 0x150, BIT(30), 0); |
6356 | |
6357 | static SUNXI_CCU_GATE(ps_clk, "ps", "lcd1-ch1", 0x140, BIT(31), 0); |
6358 | |
6359 | diff --git a/drivers/clk/sunxi-ng/ccu_nm.c b/drivers/clk/sunxi-ng/ccu_nm.c |
6360 | index a32158e8f2e3..84a5e7f17f6f 100644 |
6361 | --- a/drivers/clk/sunxi-ng/ccu_nm.c |
6362 | +++ b/drivers/clk/sunxi-ng/ccu_nm.c |
6363 | @@ -99,6 +99,9 @@ static long ccu_nm_round_rate(struct clk_hw *hw, unsigned long rate, |
6364 | struct ccu_nm *nm = hw_to_ccu_nm(hw); |
6365 | struct _ccu_nm _nm; |
6366 | |
6367 | + if (ccu_frac_helper_has_rate(&nm->common, &nm->frac, rate)) |
6368 | + return rate; |
6369 | + |
6370 | _nm.min_n = nm->n.min ?: 1; |
6371 | _nm.max_n = nm->n.max ?: 1 << nm->n.width; |
6372 | _nm.min_m = 1; |
6373 | diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c |
6374 | index 484cc8909d5c..ed4df58a855e 100644 |
6375 | --- a/drivers/cpuidle/cpuidle.c |
6376 | +++ b/drivers/cpuidle/cpuidle.c |
6377 | @@ -208,6 +208,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, |
6378 | return -EBUSY; |
6379 | } |
6380 | target_state = &drv->states[index]; |
6381 | + broadcast = false; |
6382 | } |
6383 | |
6384 | /* Take note of the planned idle state. */ |
6385 | diff --git a/drivers/crypto/amcc/crypto4xx_core.h b/drivers/crypto/amcc/crypto4xx_core.h |
6386 | index ecfdcfe3698d..4f41d6da5acc 100644 |
6387 | --- a/drivers/crypto/amcc/crypto4xx_core.h |
6388 | +++ b/drivers/crypto/amcc/crypto4xx_core.h |
6389 | @@ -34,12 +34,12 @@ |
6390 | #define PPC405EX_CE_RESET 0x00000008 |
6391 | |
6392 | #define CRYPTO4XX_CRYPTO_PRIORITY 300 |
6393 | -#define PPC4XX_LAST_PD 63 |
6394 | -#define PPC4XX_NUM_PD 64 |
6395 | -#define PPC4XX_LAST_GD 1023 |
6396 | +#define PPC4XX_NUM_PD 256 |
6397 | +#define PPC4XX_LAST_PD (PPC4XX_NUM_PD - 1) |
6398 | #define PPC4XX_NUM_GD 1024 |
6399 | -#define PPC4XX_LAST_SD 63 |
6400 | -#define PPC4XX_NUM_SD 64 |
6401 | +#define PPC4XX_LAST_GD (PPC4XX_NUM_GD - 1) |
6402 | +#define PPC4XX_NUM_SD 256 |
6403 | +#define PPC4XX_LAST_SD (PPC4XX_NUM_SD - 1) |
6404 | #define PPC4XX_SD_BUFFER_SIZE 2048 |
6405 | |
6406 | #define PD_ENTRY_INUSE 1 |
6407 | diff --git a/drivers/gpu/drm/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/drm_dp_dual_mode_helper.c |
6408 | index 0ef9011a1856..02a50929af67 100644 |
6409 | --- a/drivers/gpu/drm/drm_dp_dual_mode_helper.c |
6410 | +++ b/drivers/gpu/drm/drm_dp_dual_mode_helper.c |
6411 | @@ -410,6 +410,7 @@ int drm_lspcon_get_mode(struct i2c_adapter *adapter, |
6412 | { |
6413 | u8 data; |
6414 | int ret = 0; |
6415 | + int retry; |
6416 | |
6417 | if (!mode) { |
6418 | DRM_ERROR("NULL input\n"); |
6419 | @@ -417,10 +418,19 @@ int drm_lspcon_get_mode(struct i2c_adapter *adapter, |
6420 | } |
6421 | |
6422 | /* Read Status: i2c over aux */ |
6423 | - ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_LSPCON_CURRENT_MODE, |
6424 | - &data, sizeof(data)); |
6425 | + for (retry = 0; retry < 6; retry++) { |
6426 | + if (retry) |
6427 | + usleep_range(500, 1000); |
6428 | + |
6429 | + ret = drm_dp_dual_mode_read(adapter, |
6430 | + DP_DUAL_MODE_LSPCON_CURRENT_MODE, |
6431 | + &data, sizeof(data)); |
6432 | + if (!ret) |
6433 | + break; |
6434 | + } |
6435 | + |
6436 | if (ret < 0) { |
6437 | - DRM_ERROR("LSPCON read(0x80, 0x41) failed\n"); |
6438 | + DRM_DEBUG_KMS("LSPCON read(0x80, 0x41) failed\n"); |
6439 | return -EFAULT; |
6440 | } |
6441 | |
6442 | diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c |
6443 | index d1e0dc908048..04796d7d0fdb 100644 |
6444 | --- a/drivers/gpu/drm/vc4/vc4_dsi.c |
6445 | +++ b/drivers/gpu/drm/vc4/vc4_dsi.c |
6446 | @@ -866,7 +866,8 @@ static bool vc4_dsi_encoder_mode_fixup(struct drm_encoder *encoder, |
6447 | adjusted_mode->clock = pixel_clock_hz / 1000 + 1; |
6448 | |
6449 | /* Given the new pixel clock, adjust HFP to keep vrefresh the same. */ |
6450 | - adjusted_mode->htotal = pixel_clock_hz / (mode->vrefresh * mode->vtotal); |
6451 | + adjusted_mode->htotal = adjusted_mode->clock * mode->htotal / |
6452 | + mode->clock; |
6453 | adjusted_mode->hsync_end += adjusted_mode->htotal - mode->htotal; |
6454 | adjusted_mode->hsync_start += adjusted_mode->htotal - mode->htotal; |
6455 | |
6456 | diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c |
6457 | index 937801ac2fe0..2cd134dd94d2 100644 |
6458 | --- a/drivers/hv/vmbus_drv.c |
6459 | +++ b/drivers/hv/vmbus_drv.c |
6460 | @@ -1534,7 +1534,7 @@ static int __init hv_acpi_init(void) |
6461 | { |
6462 | int ret, t; |
6463 | |
6464 | - if (x86_hyper != &x86_hyper_ms_hyperv) |
6465 | + if (x86_hyper_type != X86_HYPER_MS_HYPERV) |
6466 | return -ENODEV; |
6467 | |
6468 | init_completion(&probe_event); |
6469 | diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c |
6470 | index 752856b3a849..379de1829cdb 100644 |
6471 | --- a/drivers/iio/accel/st_accel_core.c |
6472 | +++ b/drivers/iio/accel/st_accel_core.c |
6473 | @@ -164,7 +164,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { |
6474 | .mask_int2 = 0x00, |
6475 | .addr_ihl = 0x25, |
6476 | .mask_ihl = 0x02, |
6477 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, |
6478 | + .stat_drdy = { |
6479 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, |
6480 | + .mask = 0x07, |
6481 | + }, |
6482 | }, |
6483 | .sim = { |
6484 | .addr = 0x23, |
6485 | @@ -236,7 +239,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { |
6486 | .mask_ihl = 0x80, |
6487 | .addr_od = 0x22, |
6488 | .mask_od = 0x40, |
6489 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, |
6490 | + .stat_drdy = { |
6491 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, |
6492 | + .mask = 0x07, |
6493 | + }, |
6494 | }, |
6495 | .sim = { |
6496 | .addr = 0x23, |
6497 | @@ -318,7 +324,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { |
6498 | .mask_int2 = 0x00, |
6499 | .addr_ihl = 0x23, |
6500 | .mask_ihl = 0x40, |
6501 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, |
6502 | + .stat_drdy = { |
6503 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, |
6504 | + .mask = 0x07, |
6505 | + }, |
6506 | .ig1 = { |
6507 | .en_addr = 0x23, |
6508 | .en_mask = 0x08, |
6509 | @@ -389,7 +398,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { |
6510 | .drdy_irq = { |
6511 | .addr = 0x21, |
6512 | .mask_int1 = 0x04, |
6513 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, |
6514 | + .stat_drdy = { |
6515 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, |
6516 | + .mask = 0x07, |
6517 | + }, |
6518 | }, |
6519 | .sim = { |
6520 | .addr = 0x21, |
6521 | @@ -451,7 +463,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { |
6522 | .mask_ihl = 0x80, |
6523 | .addr_od = 0x22, |
6524 | .mask_od = 0x40, |
6525 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, |
6526 | + .stat_drdy = { |
6527 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, |
6528 | + .mask = 0x07, |
6529 | + }, |
6530 | }, |
6531 | .sim = { |
6532 | .addr = 0x21, |
6533 | @@ -569,7 +584,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { |
6534 | .drdy_irq = { |
6535 | .addr = 0x21, |
6536 | .mask_int1 = 0x04, |
6537 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, |
6538 | + .stat_drdy = { |
6539 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, |
6540 | + .mask = 0x07, |
6541 | + }, |
6542 | }, |
6543 | .sim = { |
6544 | .addr = 0x21, |
6545 | @@ -640,7 +658,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { |
6546 | .mask_int2 = 0x00, |
6547 | .addr_ihl = 0x25, |
6548 | .mask_ihl = 0x02, |
6549 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, |
6550 | + .stat_drdy = { |
6551 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, |
6552 | + .mask = 0x07, |
6553 | + }, |
6554 | }, |
6555 | .sim = { |
6556 | .addr = 0x23, |
6557 | diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c |
6558 | index 02e833b14db0..34115f05d5c4 100644 |
6559 | --- a/drivers/iio/common/st_sensors/st_sensors_core.c |
6560 | +++ b/drivers/iio/common/st_sensors/st_sensors_core.c |
6561 | @@ -470,7 +470,7 @@ int st_sensors_set_dataready_irq(struct iio_dev *indio_dev, bool enable) |
6562 | * different one. Take into account irq status register |
6563 | * to understand if irq trigger can be properly supported |
6564 | */ |
6565 | - if (sdata->sensor_settings->drdy_irq.addr_stat_drdy) |
6566 | + if (sdata->sensor_settings->drdy_irq.stat_drdy.addr) |
6567 | sdata->hw_irq_trigger = enable; |
6568 | return 0; |
6569 | } |
6570 | diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c |
6571 | index fa73e6795359..fdcc5a891958 100644 |
6572 | --- a/drivers/iio/common/st_sensors/st_sensors_trigger.c |
6573 | +++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c |
6574 | @@ -31,7 +31,7 @@ static int st_sensors_new_samples_available(struct iio_dev *indio_dev, |
6575 | int ret; |
6576 | |
6577 | /* How would I know if I can't check it? */ |
6578 | - if (!sdata->sensor_settings->drdy_irq.addr_stat_drdy) |
6579 | + if (!sdata->sensor_settings->drdy_irq.stat_drdy.addr) |
6580 | return -EINVAL; |
6581 | |
6582 | /* No scan mask, no interrupt */ |
6583 | @@ -39,23 +39,15 @@ static int st_sensors_new_samples_available(struct iio_dev *indio_dev, |
6584 | return 0; |
6585 | |
6586 | ret = sdata->tf->read_byte(&sdata->tb, sdata->dev, |
6587 | - sdata->sensor_settings->drdy_irq.addr_stat_drdy, |
6588 | + sdata->sensor_settings->drdy_irq.stat_drdy.addr, |
6589 | &status); |
6590 | if (ret < 0) { |
6591 | dev_err(sdata->dev, |
6592 | "error checking samples available\n"); |
6593 | return ret; |
6594 | } |
6595 | - /* |
6596 | - * the lower bits of .active_scan_mask[0] is directly mapped |
6597 | - * to the channels on the sensor: either bit 0 for |
6598 | - * one-dimensional sensors, or e.g. x,y,z for accelerometers, |
6599 | - * gyroscopes or magnetometers. No sensor use more than 3 |
6600 | - * channels, so cut the other status bits here. |
6601 | - */ |
6602 | - status &= 0x07; |
6603 | |
6604 | - if (status & (u8)indio_dev->active_scan_mask[0]) |
6605 | + if (status & sdata->sensor_settings->drdy_irq.stat_drdy.mask) |
6606 | return 1; |
6607 | |
6608 | return 0; |
6609 | @@ -212,7 +204,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, |
6610 | * it was "our" interrupt. |
6611 | */ |
6612 | if (sdata->int_pin_open_drain && |
6613 | - sdata->sensor_settings->drdy_irq.addr_stat_drdy) |
6614 | + sdata->sensor_settings->drdy_irq.stat_drdy.addr) |
6615 | irq_trig |= IRQF_SHARED; |
6616 | |
6617 | err = request_threaded_irq(sdata->get_irq_data_ready(indio_dev), |
6618 | diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c |
6619 | index e366422e8512..2536a8400c98 100644 |
6620 | --- a/drivers/iio/gyro/st_gyro_core.c |
6621 | +++ b/drivers/iio/gyro/st_gyro_core.c |
6622 | @@ -118,7 +118,10 @@ static const struct st_sensor_settings st_gyro_sensors_settings[] = { |
6623 | * drain settings, but only for INT1 and not |
6624 | * for the DRDY line on INT2. |
6625 | */ |
6626 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, |
6627 | + .stat_drdy = { |
6628 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, |
6629 | + .mask = 0x07, |
6630 | + }, |
6631 | }, |
6632 | .multi_read_bit = true, |
6633 | .bootime = 2, |
6634 | @@ -188,7 +191,10 @@ static const struct st_sensor_settings st_gyro_sensors_settings[] = { |
6635 | * drain settings, but only for INT1 and not |
6636 | * for the DRDY line on INT2. |
6637 | */ |
6638 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, |
6639 | + .stat_drdy = { |
6640 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, |
6641 | + .mask = 0x07, |
6642 | + }, |
6643 | }, |
6644 | .multi_read_bit = true, |
6645 | .bootime = 2, |
6646 | @@ -253,7 +259,10 @@ static const struct st_sensor_settings st_gyro_sensors_settings[] = { |
6647 | * drain settings, but only for INT1 and not |
6648 | * for the DRDY line on INT2. |
6649 | */ |
6650 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, |
6651 | + .stat_drdy = { |
6652 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, |
6653 | + .mask = 0x07, |
6654 | + }, |
6655 | }, |
6656 | .multi_read_bit = true, |
6657 | .bootime = 2, |
6658 | diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c |
6659 | index 08aafba4481c..19031a7bce23 100644 |
6660 | --- a/drivers/iio/magnetometer/st_magn_core.c |
6661 | +++ b/drivers/iio/magnetometer/st_magn_core.c |
6662 | @@ -317,7 +317,10 @@ static const struct st_sensor_settings st_magn_sensors_settings[] = { |
6663 | }, |
6664 | .drdy_irq = { |
6665 | /* drdy line is routed drdy pin */ |
6666 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, |
6667 | + .stat_drdy = { |
6668 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, |
6669 | + .mask = 0x07, |
6670 | + }, |
6671 | }, |
6672 | .multi_read_bit = true, |
6673 | .bootime = 2, |
6674 | @@ -361,7 +364,10 @@ static const struct st_sensor_settings st_magn_sensors_settings[] = { |
6675 | .drdy_irq = { |
6676 | .addr = 0x62, |
6677 | .mask_int1 = 0x01, |
6678 | - .addr_stat_drdy = 0x67, |
6679 | + .stat_drdy = { |
6680 | + .addr = 0x67, |
6681 | + .mask = 0x07, |
6682 | + }, |
6683 | }, |
6684 | .multi_read_bit = false, |
6685 | .bootime = 2, |
6686 | diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c |
6687 | index 34611a8ea2ce..ea075fcd5a6f 100644 |
6688 | --- a/drivers/iio/pressure/st_pressure_core.c |
6689 | +++ b/drivers/iio/pressure/st_pressure_core.c |
6690 | @@ -287,7 +287,10 @@ static const struct st_sensor_settings st_press_sensors_settings[] = { |
6691 | .mask_ihl = 0x80, |
6692 | .addr_od = 0x22, |
6693 | .mask_od = 0x40, |
6694 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, |
6695 | + .stat_drdy = { |
6696 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, |
6697 | + .mask = 0x03, |
6698 | + }, |
6699 | }, |
6700 | .multi_read_bit = true, |
6701 | .bootime = 2, |
6702 | @@ -395,7 +398,10 @@ static const struct st_sensor_settings st_press_sensors_settings[] = { |
6703 | .mask_ihl = 0x80, |
6704 | .addr_od = 0x22, |
6705 | .mask_od = 0x40, |
6706 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, |
6707 | + .stat_drdy = { |
6708 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, |
6709 | + .mask = 0x03, |
6710 | + }, |
6711 | }, |
6712 | .multi_read_bit = true, |
6713 | .bootime = 2, |
6714 | @@ -454,7 +460,10 @@ static const struct st_sensor_settings st_press_sensors_settings[] = { |
6715 | .mask_ihl = 0x80, |
6716 | .addr_od = 0x12, |
6717 | .mask_od = 0x40, |
6718 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, |
6719 | + .stat_drdy = { |
6720 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, |
6721 | + .mask = 0x03, |
6722 | + }, |
6723 | }, |
6724 | .multi_read_bit = false, |
6725 | .bootime = 2, |
6726 | diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c |
6727 | index 747efd1ae5a6..8208c30f03c5 100644 |
6728 | --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c |
6729 | +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c |
6730 | @@ -1001,6 +1001,11 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) |
6731 | } |
6732 | } |
6733 | |
6734 | + if (!ne) { |
6735 | + dev_err(dev, "Reseved loop qp is absent!\n"); |
6736 | + goto free_work; |
6737 | + } |
6738 | + |
6739 | do { |
6740 | ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc); |
6741 | if (ret < 0) { |
6742 | diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c |
6743 | index c1b5f38f31a5..3b4916680018 100644 |
6744 | --- a/drivers/infiniband/sw/rxe/rxe_pool.c |
6745 | +++ b/drivers/infiniband/sw/rxe/rxe_pool.c |
6746 | @@ -404,6 +404,8 @@ void *rxe_alloc(struct rxe_pool *pool) |
6747 | elem = kmem_cache_zalloc(pool_cache(pool), |
6748 | (pool->flags & RXE_POOL_ATOMIC) ? |
6749 | GFP_ATOMIC : GFP_KERNEL); |
6750 | + if (!elem) |
6751 | + return NULL; |
6752 | |
6753 | elem->pool = pool; |
6754 | kref_init(&elem->ref_cnt); |
6755 | diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c |
6756 | index afa938bd26d6..a72278e9cd27 100644 |
6757 | --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c |
6758 | +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c |
6759 | @@ -139,6 +139,7 @@ void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter) |
6760 | rcu_assign_pointer(adapter->mactbl, NULL); |
6761 | synchronize_rcu(); |
6762 | opa_vnic_free_mac_tbl(mactbl); |
6763 | + adapter->info.vport.mac_tbl_digest = 0; |
6764 | mutex_unlock(&adapter->mactbl_lock); |
6765 | } |
6766 | |
6767 | diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c |
6768 | index c2733964379c..9655cc3aa3a0 100644 |
6769 | --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c |
6770 | +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c |
6771 | @@ -348,7 +348,7 @@ void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter, |
6772 | void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter, |
6773 | struct opa_veswport_iface_macs *macs) |
6774 | { |
6775 | - u16 start_idx, tot_macs, num_macs, idx = 0, count = 0; |
6776 | + u16 start_idx, tot_macs, num_macs, idx = 0, count = 0, em_macs = 0; |
6777 | struct netdev_hw_addr *ha; |
6778 | |
6779 | start_idx = be16_to_cpu(macs->start_idx); |
6780 | @@ -359,8 +359,10 @@ void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter, |
6781 | |
6782 | /* Do not include EM specified MAC address */ |
6783 | if (!memcmp(adapter->info.vport.base_mac_addr, ha->addr, |
6784 | - ARRAY_SIZE(adapter->info.vport.base_mac_addr))) |
6785 | + ARRAY_SIZE(adapter->info.vport.base_mac_addr))) { |
6786 | + em_macs++; |
6787 | continue; |
6788 | + } |
6789 | |
6790 | if (start_idx > idx++) |
6791 | continue; |
6792 | @@ -383,7 +385,7 @@ void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter, |
6793 | } |
6794 | |
6795 | tot_macs = netdev_hw_addr_list_count(&adapter->netdev->dev_addrs) + |
6796 | - netdev_uc_count(adapter->netdev); |
6797 | + netdev_uc_count(adapter->netdev) - em_macs; |
6798 | macs->tot_macs_in_lst = cpu_to_be16(tot_macs); |
6799 | macs->num_macs_in_msg = cpu_to_be16(count); |
6800 | macs->gen_count = cpu_to_be16(adapter->info.vport.uc_macs_gen_count); |
6801 | diff --git a/drivers/input/mouse/vmmouse.c b/drivers/input/mouse/vmmouse.c |
6802 | index 0f586780ceb4..1ae5c1ef3f5b 100644 |
6803 | --- a/drivers/input/mouse/vmmouse.c |
6804 | +++ b/drivers/input/mouse/vmmouse.c |
6805 | @@ -316,11 +316,9 @@ static int vmmouse_enable(struct psmouse *psmouse) |
6806 | /* |
6807 | * Array of supported hypervisors. |
6808 | */ |
6809 | -static const struct hypervisor_x86 *vmmouse_supported_hypervisors[] = { |
6810 | - &x86_hyper_vmware, |
6811 | -#ifdef CONFIG_KVM_GUEST |
6812 | - &x86_hyper_kvm, |
6813 | -#endif |
6814 | +static enum x86_hypervisor_type vmmouse_supported_hypervisors[] = { |
6815 | + X86_HYPER_VMWARE, |
6816 | + X86_HYPER_KVM, |
6817 | }; |
6818 | |
6819 | /** |
6820 | @@ -331,7 +329,7 @@ static bool vmmouse_check_hypervisor(void) |
6821 | int i; |
6822 | |
6823 | for (i = 0; i < ARRAY_SIZE(vmmouse_supported_hypervisors); i++) |
6824 | - if (vmmouse_supported_hypervisors[i] == x86_hyper) |
6825 | + if (vmmouse_supported_hypervisors[i] == x86_hyper_type) |
6826 | return true; |
6827 | |
6828 | return false; |
6829 | diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c |
6830 | index 905729191d3e..78183f90820e 100644 |
6831 | --- a/drivers/leds/leds-pca955x.c |
6832 | +++ b/drivers/leds/leds-pca955x.c |
6833 | @@ -61,6 +61,10 @@ |
6834 | #define PCA955X_LS_BLINK0 0x2 /* Blink at PWM0 rate */ |
6835 | #define PCA955X_LS_BLINK1 0x3 /* Blink at PWM1 rate */ |
6836 | |
6837 | +#define PCA955X_GPIO_INPUT LED_OFF |
6838 | +#define PCA955X_GPIO_HIGH LED_OFF |
6839 | +#define PCA955X_GPIO_LOW LED_FULL |
6840 | + |
6841 | enum pca955x_type { |
6842 | pca9550, |
6843 | pca9551, |
6844 | @@ -329,9 +333,9 @@ static int pca955x_set_value(struct gpio_chip *gc, unsigned int offset, |
6845 | struct pca955x_led *led = &pca955x->leds[offset]; |
6846 | |
6847 | if (val) |
6848 | - return pca955x_led_set(&led->led_cdev, LED_FULL); |
6849 | - else |
6850 | - return pca955x_led_set(&led->led_cdev, LED_OFF); |
6851 | + return pca955x_led_set(&led->led_cdev, PCA955X_GPIO_HIGH); |
6852 | + |
6853 | + return pca955x_led_set(&led->led_cdev, PCA955X_GPIO_LOW); |
6854 | } |
6855 | |
6856 | static void pca955x_gpio_set_value(struct gpio_chip *gc, unsigned int offset, |
6857 | @@ -355,8 +359,11 @@ static int pca955x_gpio_get_value(struct gpio_chip *gc, unsigned int offset) |
6858 | static int pca955x_gpio_direction_input(struct gpio_chip *gc, |
6859 | unsigned int offset) |
6860 | { |
6861 | - /* To use as input ensure pin is not driven */ |
6862 | - return pca955x_set_value(gc, offset, 0); |
6863 | + struct pca955x *pca955x = gpiochip_get_data(gc); |
6864 | + struct pca955x_led *led = &pca955x->leds[offset]; |
6865 | + |
6866 | + /* To use as input ensure pin is not driven. */ |
6867 | + return pca955x_led_set(&led->led_cdev, PCA955X_GPIO_INPUT); |
6868 | } |
6869 | |
6870 | static int pca955x_gpio_direction_output(struct gpio_chip *gc, |
6871 | diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c |
6872 | index 35e82b14ded7..ddf0a4341ae2 100644 |
6873 | --- a/drivers/md/dm-mpath.c |
6874 | +++ b/drivers/md/dm-mpath.c |
6875 | @@ -366,7 +366,7 @@ static struct pgpath *choose_path_in_pg(struct multipath *m, |
6876 | |
6877 | pgpath = path_to_pgpath(path); |
6878 | |
6879 | - if (unlikely(lockless_dereference(m->current_pg) != pg)) { |
6880 | + if (unlikely(READ_ONCE(m->current_pg) != pg)) { |
6881 | /* Only update current_pgpath if pg changed */ |
6882 | spin_lock_irqsave(&m->lock, flags); |
6883 | m->current_pgpath = pgpath; |
6884 | @@ -390,7 +390,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) |
6885 | } |
6886 | |
6887 | /* Were we instructed to switch PG? */ |
6888 | - if (lockless_dereference(m->next_pg)) { |
6889 | + if (READ_ONCE(m->next_pg)) { |
6890 | spin_lock_irqsave(&m->lock, flags); |
6891 | pg = m->next_pg; |
6892 | if (!pg) { |
6893 | @@ -406,7 +406,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) |
6894 | |
6895 | /* Don't change PG until it has no remaining paths */ |
6896 | check_current_pg: |
6897 | - pg = lockless_dereference(m->current_pg); |
6898 | + pg = READ_ONCE(m->current_pg); |
6899 | if (pg) { |
6900 | pgpath = choose_path_in_pg(m, pg, nr_bytes); |
6901 | if (!IS_ERR_OR_NULL(pgpath)) |
6902 | @@ -473,7 +473,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, |
6903 | struct request *clone; |
6904 | |
6905 | /* Do we need to select a new pgpath? */ |
6906 | - pgpath = lockless_dereference(m->current_pgpath); |
6907 | + pgpath = READ_ONCE(m->current_pgpath); |
6908 | if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags)) |
6909 | pgpath = choose_pgpath(m, nr_bytes); |
6910 | |
6911 | @@ -533,7 +533,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m |
6912 | bool queue_io; |
6913 | |
6914 | /* Do we need to select a new pgpath? */ |
6915 | - pgpath = lockless_dereference(m->current_pgpath); |
6916 | + pgpath = READ_ONCE(m->current_pgpath); |
6917 | queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags); |
6918 | if (!pgpath || !queue_io) |
6919 | pgpath = choose_pgpath(m, nr_bytes); |
6920 | @@ -1802,7 +1802,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti, |
6921 | struct pgpath *current_pgpath; |
6922 | int r; |
6923 | |
6924 | - current_pgpath = lockless_dereference(m->current_pgpath); |
6925 | + current_pgpath = READ_ONCE(m->current_pgpath); |
6926 | if (!current_pgpath) |
6927 | current_pgpath = choose_pgpath(m, 0); |
6928 | |
6929 | @@ -1824,7 +1824,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti, |
6930 | } |
6931 | |
6932 | if (r == -ENOTCONN) { |
6933 | - if (!lockless_dereference(m->current_pg)) { |
6934 | + if (!READ_ONCE(m->current_pg)) { |
6935 | /* Path status changed, redo selection */ |
6936 | (void) choose_pgpath(m, 0); |
6937 | } |
6938 | @@ -1893,9 +1893,9 @@ static int multipath_busy(struct dm_target *ti) |
6939 | return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED); |
6940 | |
6941 | /* Guess which priority_group will be used at next mapping time */ |
6942 | - pg = lockless_dereference(m->current_pg); |
6943 | - next_pg = lockless_dereference(m->next_pg); |
6944 | - if (unlikely(!lockless_dereference(m->current_pgpath) && next_pg)) |
6945 | + pg = READ_ONCE(m->current_pg); |
6946 | + next_pg = READ_ONCE(m->next_pg); |
6947 | + if (unlikely(!READ_ONCE(m->current_pgpath) && next_pg)) |
6948 | pg = next_pg; |
6949 | |
6950 | if (!pg) { |
6951 | diff --git a/drivers/md/md.c b/drivers/md/md.c |
6952 | index 98ea86309ceb..6bf093cef958 100644 |
6953 | --- a/drivers/md/md.c |
6954 | +++ b/drivers/md/md.c |
6955 | @@ -7468,8 +7468,8 @@ void md_wakeup_thread(struct md_thread *thread) |
6956 | { |
6957 | if (thread) { |
6958 | pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm); |
6959 | - if (!test_and_set_bit(THREAD_WAKEUP, &thread->flags)) |
6960 | - wake_up(&thread->wqueue); |
6961 | + set_bit(THREAD_WAKEUP, &thread->flags); |
6962 | + wake_up(&thread->wqueue); |
6963 | } |
6964 | } |
6965 | EXPORT_SYMBOL(md_wakeup_thread); |
6966 | diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c |
6967 | index eda38cbe8530..41f2a9f6851d 100644 |
6968 | --- a/drivers/misc/pti.c |
6969 | +++ b/drivers/misc/pti.c |
6970 | @@ -32,7 +32,7 @@ |
6971 | #include <linux/pci.h> |
6972 | #include <linux/mutex.h> |
6973 | #include <linux/miscdevice.h> |
6974 | -#include <linux/pti.h> |
6975 | +#include <linux/intel-pti.h> |
6976 | #include <linux/slab.h> |
6977 | #include <linux/uaccess.h> |
6978 | |
6979 | diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c |
6980 | index 1e688bfec567..9047c0a529b2 100644 |
6981 | --- a/drivers/misc/vmw_balloon.c |
6982 | +++ b/drivers/misc/vmw_balloon.c |
6983 | @@ -1271,7 +1271,7 @@ static int __init vmballoon_init(void) |
6984 | * Check if we are running on VMware's hypervisor and bail out |
6985 | * if we are not. |
6986 | */ |
6987 | - if (x86_hyper != &x86_hyper_vmware) |
6988 | + if (x86_hyper_type != X86_HYPER_VMWARE) |
6989 | return -ENODEV; |
6990 | |
6991 | for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; |
6992 | diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c |
6993 | index c66abd476023..3b0db01ead1f 100644 |
6994 | --- a/drivers/net/ethernet/ibm/ibmvnic.c |
6995 | +++ b/drivers/net/ethernet/ibm/ibmvnic.c |
6996 | @@ -927,6 +927,7 @@ static int ibmvnic_open(struct net_device *netdev) |
6997 | } |
6998 | |
6999 | rc = __ibmvnic_open(netdev); |
7000 | + netif_carrier_on(netdev); |
7001 | mutex_unlock(&adapter->reset_lock); |
7002 | |
7003 | return rc; |
7004 | @@ -3899,6 +3900,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) |
7005 | if (rc) |
7006 | goto ibmvnic_init_fail; |
7007 | |
7008 | + netif_carrier_off(netdev); |
7009 | rc = register_netdev(netdev); |
7010 | if (rc) { |
7011 | dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc); |
7012 | diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h |
7013 | index 689c413b7782..d2f9a2dd76a2 100644 |
7014 | --- a/drivers/net/ethernet/intel/fm10k/fm10k.h |
7015 | +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h |
7016 | @@ -526,8 +526,8 @@ s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid); |
7017 | int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac); |
7018 | int fm10k_ndo_set_vf_vlan(struct net_device *netdev, |
7019 | int vf_idx, u16 vid, u8 qos, __be16 vlan_proto); |
7020 | -int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate, |
7021 | - int unused); |
7022 | +int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, |
7023 | + int __always_unused min_rate, int max_rate); |
7024 | int fm10k_ndo_get_vf_config(struct net_device *netdev, |
7025 | int vf_idx, struct ifla_vf_info *ivi); |
7026 | |
7027 | diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c |
7028 | index 5f4dac0d36ef..e72fd52bacfe 100644 |
7029 | --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c |
7030 | +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c |
7031 | @@ -126,6 +126,9 @@ s32 fm10k_iov_mbx(struct fm10k_intfc *interface) |
7032 | struct fm10k_mbx_info *mbx = &vf_info->mbx; |
7033 | u16 glort = vf_info->glort; |
7034 | |
7035 | + /* process the SM mailbox first to drain outgoing messages */ |
7036 | + hw->mbx.ops.process(hw, &hw->mbx); |
7037 | + |
7038 | /* verify port mapping is valid, if not reset port */ |
7039 | if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort)) |
7040 | hw->iov.ops.reset_lport(hw, vf_info); |
7041 | @@ -482,7 +485,7 @@ int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, |
7042 | } |
7043 | |
7044 | int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, |
7045 | - int __always_unused unused, int rate) |
7046 | + int __always_unused min_rate, int max_rate) |
7047 | { |
7048 | struct fm10k_intfc *interface = netdev_priv(netdev); |
7049 | struct fm10k_iov_data *iov_data = interface->iov_data; |
7050 | @@ -493,14 +496,15 @@ int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, |
7051 | return -EINVAL; |
7052 | |
7053 | /* rate limit cannot be less than 10Mbs or greater than link speed */ |
7054 | - if (rate && ((rate < FM10K_VF_TC_MIN) || rate > FM10K_VF_TC_MAX)) |
7055 | + if (max_rate && |
7056 | + (max_rate < FM10K_VF_TC_MIN || max_rate > FM10K_VF_TC_MAX)) |
7057 | return -EINVAL; |
7058 | |
7059 | /* store values */ |
7060 | - iov_data->vf_info[vf_idx].rate = rate; |
7061 | + iov_data->vf_info[vf_idx].rate = max_rate; |
7062 | |
7063 | /* update hardware configuration */ |
7064 | - hw->iov.ops.configure_tc(hw, vf_idx, rate); |
7065 | + hw->iov.ops.configure_tc(hw, vf_idx, max_rate); |
7066 | |
7067 | return 0; |
7068 | } |
7069 | diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c |
7070 | index ea20aacd5e1d..b2cde9b16d82 100644 |
7071 | --- a/drivers/net/ethernet/intel/i40e/i40e_main.c |
7072 | +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c |
7073 | @@ -2874,14 +2874,15 @@ static void i40e_vsi_free_rx_resources(struct i40e_vsi *vsi) |
7074 | static void i40e_config_xps_tx_ring(struct i40e_ring *ring) |
7075 | { |
7076 | struct i40e_vsi *vsi = ring->vsi; |
7077 | + int cpu; |
7078 | |
7079 | if (!ring->q_vector || !ring->netdev) |
7080 | return; |
7081 | |
7082 | if ((vsi->tc_config.numtc <= 1) && |
7083 | !test_and_set_bit(__I40E_TX_XPS_INIT_DONE, &ring->state)) { |
7084 | - netif_set_xps_queue(ring->netdev, |
7085 | - get_cpu_mask(ring->q_vector->v_idx), |
7086 | + cpu = cpumask_local_spread(ring->q_vector->v_idx, -1); |
7087 | + netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu), |
7088 | ring->queue_index); |
7089 | } |
7090 | |
7091 | @@ -3471,6 +3472,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) |
7092 | int tx_int_idx = 0; |
7093 | int vector, err; |
7094 | int irq_num; |
7095 | + int cpu; |
7096 | |
7097 | for (vector = 0; vector < q_vectors; vector++) { |
7098 | struct i40e_q_vector *q_vector = vsi->q_vectors[vector]; |
7099 | @@ -3506,10 +3508,14 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) |
7100 | q_vector->affinity_notify.notify = i40e_irq_affinity_notify; |
7101 | q_vector->affinity_notify.release = i40e_irq_affinity_release; |
7102 | irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); |
7103 | - /* get_cpu_mask returns a static constant mask with |
7104 | - * a permanent lifetime so it's ok to use here. |
7105 | + /* Spread affinity hints out across online CPUs. |
7106 | + * |
7107 | + * get_cpu_mask returns a static constant mask with |
7108 | + * a permanent lifetime so it's ok to pass to |
7109 | + * irq_set_affinity_hint without making a copy. |
7110 | */ |
7111 | - irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx)); |
7112 | + cpu = cpumask_local_spread(q_vector->v_idx, -1); |
7113 | + irq_set_affinity_hint(irq_num, get_cpu_mask(cpu)); |
7114 | } |
7115 | |
7116 | vsi->irqs_ready = true; |
7117 | diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c |
7118 | index 4d1e670f490e..e368b0237a1b 100644 |
7119 | --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c |
7120 | +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c |
7121 | @@ -1008,8 +1008,8 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf) |
7122 | set_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states); |
7123 | clear_bit(I40E_VF_STATE_DISABLED, &vf->vf_states); |
7124 | /* Do not notify the client during VF init */ |
7125 | - if (test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE, |
7126 | - &vf->vf_states)) |
7127 | + if (!test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE, |
7128 | + &vf->vf_states)) |
7129 | i40e_notify_client_of_vf_reset(pf, abs_vf_id); |
7130 | vf->num_vlan = 0; |
7131 | } |
7132 | @@ -2779,6 +2779,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) |
7133 | struct i40e_mac_filter *f; |
7134 | struct i40e_vf *vf; |
7135 | int ret = 0; |
7136 | + struct hlist_node *h; |
7137 | int bkt; |
7138 | |
7139 | /* validate the request */ |
7140 | @@ -2817,7 +2818,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) |
7141 | /* Delete all the filters for this VSI - we're going to kill it |
7142 | * anyway. |
7143 | */ |
7144 | - hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) |
7145 | + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) |
7146 | __i40e_del_filter(vsi, f); |
7147 | |
7148 | spin_unlock_bh(&vsi->mac_filter_hash_lock); |
7149 | diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c |
7150 | index 1825d956bb00..1ccad6f30ebf 100644 |
7151 | --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c |
7152 | +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c |
7153 | @@ -546,6 +546,7 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename) |
7154 | unsigned int vector, q_vectors; |
7155 | unsigned int rx_int_idx = 0, tx_int_idx = 0; |
7156 | int irq_num, err; |
7157 | + int cpu; |
7158 | |
7159 | i40evf_irq_disable(adapter); |
7160 | /* Decrement for Other and TCP Timer vectors */ |
7161 | @@ -584,10 +585,12 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename) |
7162 | q_vector->affinity_notify.release = |
7163 | i40evf_irq_affinity_release; |
7164 | irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); |
7165 | - /* get_cpu_mask returns a static constant mask with |
7166 | - * a permanent lifetime so it's ok to use here. |
7167 | + /* Spread the IRQ affinity hints across online CPUs. Note that |
7168 | + * get_cpu_mask returns a mask with a permanent lifetime so |
7169 | + * it's safe to use as a hint for irq_set_affinity_hint. |
7170 | */ |
7171 | - irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx)); |
7172 | + cpu = cpumask_local_spread(q_vector->v_idx, -1); |
7173 | + irq_set_affinity_hint(irq_num, get_cpu_mask(cpu)); |
7174 | } |
7175 | |
7176 | return 0; |
7177 | diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c |
7178 | index b0031c5ff767..667dbc7d4a4e 100644 |
7179 | --- a/drivers/net/ethernet/intel/igb/igb_main.c |
7180 | +++ b/drivers/net/ethernet/intel/igb/igb_main.c |
7181 | @@ -3162,6 +3162,8 @@ static int igb_sw_init(struct igb_adapter *adapter) |
7182 | /* Setup and initialize a copy of the hw vlan table array */ |
7183 | adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32), |
7184 | GFP_ATOMIC); |
7185 | + if (!adapter->shadow_vfta) |
7186 | + return -ENOMEM; |
7187 | |
7188 | /* This call may decrease the number of queues */ |
7189 | if (igb_init_interrupt_scheme(adapter, true)) { |
7190 | diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c |
7191 | index 6e6ab6f6875e..64429a14c630 100644 |
7192 | --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c |
7193 | +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c |
7194 | @@ -3781,10 +3781,10 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, |
7195 | fw_cmd.ver_build = build; |
7196 | fw_cmd.ver_sub = sub; |
7197 | fw_cmd.hdr.checksum = 0; |
7198 | - fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd, |
7199 | - (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len)); |
7200 | fw_cmd.pad = 0; |
7201 | fw_cmd.pad2 = 0; |
7202 | + fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd, |
7203 | + (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len)); |
7204 | |
7205 | for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) { |
7206 | ret_val = ixgbe_host_interface_command(hw, &fw_cmd, |
7207 | diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c |
7208 | index 19fbb2f28ea4..8a85217845ae 100644 |
7209 | --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c |
7210 | +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c |
7211 | @@ -900,6 +900,8 @@ static s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw, |
7212 | /* convert offset from words to bytes */ |
7213 | buffer.address = cpu_to_be32((offset + current_word) * 2); |
7214 | buffer.length = cpu_to_be16(words_to_read * 2); |
7215 | + buffer.pad2 = 0; |
7216 | + buffer.pad3 = 0; |
7217 | |
7218 | status = ixgbe_hic_unlocked(hw, (u32 *)&buffer, sizeof(buffer), |
7219 | IXGBE_HI_COMMAND_TIMEOUT); |
7220 | diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c |
7221 | index c1e52b9dc58d..5f93e6add563 100644 |
7222 | --- a/drivers/net/phy/at803x.c |
7223 | +++ b/drivers/net/phy/at803x.c |
7224 | @@ -167,7 +167,7 @@ static int at803x_set_wol(struct phy_device *phydev, |
7225 | mac = (const u8 *) ndev->dev_addr; |
7226 | |
7227 | if (!is_valid_ether_addr(mac)) |
7228 | - return -EFAULT; |
7229 | + return -EINVAL; |
7230 | |
7231 | for (i = 0; i < 3; i++) { |
7232 | phy_write(phydev, AT803X_MMD_ACCESS_CONTROL, |
7233 | diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c |
7234 | index ac41c8be9200..0fd8e164339c 100644 |
7235 | --- a/drivers/pci/iov.c |
7236 | +++ b/drivers/pci/iov.c |
7237 | @@ -162,7 +162,6 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset) |
7238 | |
7239 | pci_device_add(virtfn, virtfn->bus); |
7240 | |
7241 | - pci_bus_add_device(virtfn); |
7242 | sprintf(buf, "virtfn%u", id); |
7243 | rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf); |
7244 | if (rc) |
7245 | @@ -173,6 +172,8 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset) |
7246 | |
7247 | kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE); |
7248 | |
7249 | + pci_bus_add_device(virtfn); |
7250 | + |
7251 | return 0; |
7252 | |
7253 | failed2: |
7254 | diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c |
7255 | index 6078dfc11b11..74f1c57ab93b 100644 |
7256 | --- a/drivers/pci/pci.c |
7257 | +++ b/drivers/pci/pci.c |
7258 | @@ -4356,6 +4356,10 @@ static bool pci_bus_resetable(struct pci_bus *bus) |
7259 | { |
7260 | struct pci_dev *dev; |
7261 | |
7262 | + |
7263 | + if (bus->self && (bus->self->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET)) |
7264 | + return false; |
7265 | + |
7266 | list_for_each_entry(dev, &bus->devices, bus_list) { |
7267 | if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET || |
7268 | (dev->subordinate && !pci_bus_resetable(dev->subordinate))) |
7269 | diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c |
7270 | index 890efcc574cb..744805232155 100644 |
7271 | --- a/drivers/pci/pcie/aer/aerdrv_core.c |
7272 | +++ b/drivers/pci/pcie/aer/aerdrv_core.c |
7273 | @@ -390,7 +390,14 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, |
7274 | * If the error is reported by an end point, we think this |
7275 | * error is related to the upstream link of the end point. |
7276 | */ |
7277 | - pci_walk_bus(dev->bus, cb, &result_data); |
7278 | + if (state == pci_channel_io_normal) |
7279 | + /* |
7280 | + * the error is non fatal so the bus is ok, just invoke |
7281 | + * the callback for the function that logged the error. |
7282 | + */ |
7283 | + cb(dev, &result_data); |
7284 | + else |
7285 | + pci_walk_bus(dev->bus, cb, &result_data); |
7286 | } |
7287 | |
7288 | return result_data.result; |
7289 | diff --git a/drivers/platform/x86/asus-wireless.c b/drivers/platform/x86/asus-wireless.c |
7290 | index f3796164329e..d4aeac3477f5 100644 |
7291 | --- a/drivers/platform/x86/asus-wireless.c |
7292 | +++ b/drivers/platform/x86/asus-wireless.c |
7293 | @@ -118,6 +118,7 @@ static void asus_wireless_notify(struct acpi_device *adev, u32 event) |
7294 | return; |
7295 | } |
7296 | input_report_key(data->idev, KEY_RFKILL, 1); |
7297 | + input_sync(data->idev); |
7298 | input_report_key(data->idev, KEY_RFKILL, 0); |
7299 | input_sync(data->idev); |
7300 | } |
7301 | diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c |
7302 | index 8cec9a02c0b8..9eb32ead63db 100644 |
7303 | --- a/drivers/rtc/interface.c |
7304 | +++ b/drivers/rtc/interface.c |
7305 | @@ -779,7 +779,7 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) |
7306 | } |
7307 | |
7308 | timerqueue_add(&rtc->timerqueue, &timer->node); |
7309 | - if (!next) { |
7310 | + if (!next || ktime_before(timer->node.expires, next->expires)) { |
7311 | struct rtc_wkalrm alarm; |
7312 | int err; |
7313 | alarm.time = rtc_ktime_to_tm(timer->node.expires); |
7314 | diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c |
7315 | index e1687e19c59f..a30f24cb6c83 100644 |
7316 | --- a/drivers/rtc/rtc-pl031.c |
7317 | +++ b/drivers/rtc/rtc-pl031.c |
7318 | @@ -308,7 +308,8 @@ static int pl031_remove(struct amba_device *adev) |
7319 | |
7320 | dev_pm_clear_wake_irq(&adev->dev); |
7321 | device_init_wakeup(&adev->dev, false); |
7322 | - free_irq(adev->irq[0], ldata); |
7323 | + if (adev->irq[0]) |
7324 | + free_irq(adev->irq[0], ldata); |
7325 | rtc_device_unregister(ldata->rtc); |
7326 | iounmap(ldata->base); |
7327 | kfree(ldata); |
7328 | @@ -381,12 +382,13 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id) |
7329 | goto out_no_rtc; |
7330 | } |
7331 | |
7332 | - if (request_irq(adev->irq[0], pl031_interrupt, |
7333 | - vendor->irqflags, "rtc-pl031", ldata)) { |
7334 | - ret = -EIO; |
7335 | - goto out_no_irq; |
7336 | + if (adev->irq[0]) { |
7337 | + ret = request_irq(adev->irq[0], pl031_interrupt, |
7338 | + vendor->irqflags, "rtc-pl031", ldata); |
7339 | + if (ret) |
7340 | + goto out_no_irq; |
7341 | + dev_pm_set_wake_irq(&adev->dev, adev->irq[0]); |
7342 | } |
7343 | - dev_pm_set_wake_irq(&adev->dev, adev->irq[0]); |
7344 | return 0; |
7345 | |
7346 | out_no_irq: |
7347 | diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c |
7348 | index 1d02cf9fe06c..30d5f0ef29bb 100644 |
7349 | --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c |
7350 | +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c |
7351 | @@ -1575,6 +1575,7 @@ static void release_offload_resources(struct cxgbi_sock *csk) |
7352 | csk, csk->state, csk->flags, csk->tid); |
7353 | |
7354 | cxgbi_sock_free_cpl_skbs(csk); |
7355 | + cxgbi_sock_purge_write_queue(csk); |
7356 | if (csk->wr_cred != csk->wr_max_cred) { |
7357 | cxgbi_sock_purge_wr_queue(csk); |
7358 | cxgbi_sock_reset_wr_list(csk); |
7359 | diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c |
7360 | index 499df9d17339..d9a03beb76a4 100644 |
7361 | --- a/drivers/scsi/lpfc/lpfc_hbadisc.c |
7362 | +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c |
7363 | @@ -4983,7 +4983,8 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) |
7364 | lpfc_cancel_retry_delay_tmo(vport, ndlp); |
7365 | if ((ndlp->nlp_flag & NLP_DEFER_RM) && |
7366 | !(ndlp->nlp_flag & NLP_REG_LOGIN_SEND) && |
7367 | - !(ndlp->nlp_flag & NLP_RPI_REGISTERED)) { |
7368 | + !(ndlp->nlp_flag & NLP_RPI_REGISTERED) && |
7369 | + phba->sli_rev != LPFC_SLI_REV4) { |
7370 | /* For this case we need to cleanup the default rpi |
7371 | * allocated by the firmware. |
7372 | */ |
7373 | diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h |
7374 | index 1db0a38683f4..2b145966c73f 100644 |
7375 | --- a/drivers/scsi/lpfc/lpfc_hw4.h |
7376 | +++ b/drivers/scsi/lpfc/lpfc_hw4.h |
7377 | @@ -3636,7 +3636,7 @@ struct lpfc_mbx_get_port_name { |
7378 | #define MB_CEQ_STATUS_QUEUE_FLUSHING 0x4 |
7379 | #define MB_CQE_STATUS_DMA_FAILED 0x5 |
7380 | |
7381 | -#define LPFC_MBX_WR_CONFIG_MAX_BDE 8 |
7382 | +#define LPFC_MBX_WR_CONFIG_MAX_BDE 1 |
7383 | struct lpfc_mbx_wr_object { |
7384 | struct mbox_header header; |
7385 | union { |
7386 | diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c |
7387 | index 3c5b054a56ac..7ac1a067d780 100644 |
7388 | --- a/drivers/scsi/lpfc/lpfc_nvmet.c |
7389 | +++ b/drivers/scsi/lpfc/lpfc_nvmet.c |
7390 | @@ -1464,6 +1464,7 @@ static struct lpfc_nvmet_ctxbuf * |
7391 | lpfc_nvmet_replenish_context(struct lpfc_hba *phba, |
7392 | struct lpfc_nvmet_ctx_info *current_infop) |
7393 | { |
7394 | +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) |
7395 | struct lpfc_nvmet_ctxbuf *ctx_buf = NULL; |
7396 | struct lpfc_nvmet_ctx_info *get_infop; |
7397 | int i; |
7398 | @@ -1511,6 +1512,7 @@ lpfc_nvmet_replenish_context(struct lpfc_hba *phba, |
7399 | get_infop = get_infop->nvmet_ctx_next_cpu; |
7400 | } |
7401 | |
7402 | +#endif |
7403 | /* Nothing found, all contexts for the MRQ are in-flight */ |
7404 | return NULL; |
7405 | } |
7406 | diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c |
7407 | index 22998cbd538f..33ff691878e2 100644 |
7408 | --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c |
7409 | +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c |
7410 | @@ -4804,6 +4804,11 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) |
7411 | } else if (log_info == VIRTUAL_IO_FAILED_RETRY) { |
7412 | scmd->result = DID_RESET << 16; |
7413 | break; |
7414 | + } else if ((scmd->device->channel == RAID_CHANNEL) && |
7415 | + (scsi_state == (MPI2_SCSI_STATE_TERMINATED | |
7416 | + MPI2_SCSI_STATE_NO_SCSI_STATUS))) { |
7417 | + scmd->result = DID_RESET << 16; |
7418 | + break; |
7419 | } |
7420 | scmd->result = DID_SOFT_ERROR << 16; |
7421 | break; |
7422 | diff --git a/drivers/staging/greybus/light.c b/drivers/staging/greybus/light.c |
7423 | index 3f4148c92308..0f538b8c3a07 100644 |
7424 | --- a/drivers/staging/greybus/light.c |
7425 | +++ b/drivers/staging/greybus/light.c |
7426 | @@ -925,6 +925,8 @@ static void __gb_lights_led_unregister(struct gb_channel *channel) |
7427 | return; |
7428 | |
7429 | led_classdev_unregister(cdev); |
7430 | + kfree(cdev->name); |
7431 | + cdev->name = NULL; |
7432 | channel->led = NULL; |
7433 | } |
7434 | |
7435 | diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c |
7436 | index 7952357df9c8..edb6e4e9ef3a 100644 |
7437 | --- a/drivers/tee/optee/core.c |
7438 | +++ b/drivers/tee/optee/core.c |
7439 | @@ -590,7 +590,6 @@ static int __init optee_driver_init(void) |
7440 | return -ENODEV; |
7441 | |
7442 | np = of_find_matching_node(fw_np, optee_match); |
7443 | - of_node_put(fw_np); |
7444 | if (!np) |
7445 | return -ENODEV; |
7446 | |
7447 | diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c |
7448 | index bd3572c41585..6d8906d65476 100644 |
7449 | --- a/drivers/thermal/hisi_thermal.c |
7450 | +++ b/drivers/thermal/hisi_thermal.c |
7451 | @@ -35,8 +35,9 @@ |
7452 | #define TEMP0_RST_MSK (0x1C) |
7453 | #define TEMP0_VALUE (0x28) |
7454 | |
7455 | -#define HISI_TEMP_BASE (-60) |
7456 | +#define HISI_TEMP_BASE (-60000) |
7457 | #define HISI_TEMP_RESET (100000) |
7458 | +#define HISI_TEMP_STEP (784) |
7459 | |
7460 | #define HISI_MAX_SENSORS 4 |
7461 | |
7462 | @@ -61,19 +62,38 @@ struct hisi_thermal_data { |
7463 | void __iomem *regs; |
7464 | }; |
7465 | |
7466 | -/* in millicelsius */ |
7467 | -static inline int _step_to_temp(int step) |
7468 | +/* |
7469 | + * The temperature computation on the tsensor is as follow: |
7470 | + * Unit: millidegree Celsius |
7471 | + * Step: 255/200 (0.7843) |
7472 | + * Temperature base: -60°C |
7473 | + * |
7474 | + * The register is programmed in temperature steps, every step is 784 |
7475 | + * millidegree and begins at -60 000 m°C |
7476 | + * |
7477 | + * The temperature from the steps: |
7478 | + * |
7479 | + * Temp = TempBase + (steps x 784) |
7480 | + * |
7481 | + * and the steps from the temperature: |
7482 | + * |
7483 | + * steps = (Temp - TempBase) / 784 |
7484 | + * |
7485 | + */ |
7486 | +static inline int hisi_thermal_step_to_temp(int step) |
7487 | { |
7488 | - /* |
7489 | - * Every step equals (1 * 200) / 255 celsius, and finally |
7490 | - * need convert to millicelsius. |
7491 | - */ |
7492 | - return (HISI_TEMP_BASE * 1000 + (step * 200000 / 255)); |
7493 | + return HISI_TEMP_BASE + (step * HISI_TEMP_STEP); |
7494 | +} |
7495 | + |
7496 | +static inline long hisi_thermal_temp_to_step(long temp) |
7497 | +{ |
7498 | + return (temp - HISI_TEMP_BASE) / HISI_TEMP_STEP; |
7499 | } |
7500 | |
7501 | -static inline long _temp_to_step(long temp) |
7502 | +static inline long hisi_thermal_round_temp(int temp) |
7503 | { |
7504 | - return ((temp - HISI_TEMP_BASE * 1000) * 255) / 200000; |
7505 | + return hisi_thermal_step_to_temp( |
7506 | + hisi_thermal_temp_to_step(temp)); |
7507 | } |
7508 | |
7509 | static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, |
7510 | @@ -99,7 +119,7 @@ static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, |
7511 | usleep_range(3000, 5000); |
7512 | |
7513 | val = readl(data->regs + TEMP0_VALUE); |
7514 | - val = _step_to_temp(val); |
7515 | + val = hisi_thermal_step_to_temp(val); |
7516 | |
7517 | mutex_unlock(&data->thermal_lock); |
7518 | |
7519 | @@ -126,10 +146,11 @@ static void hisi_thermal_enable_bind_irq_sensor |
7520 | writel((sensor->id << 12), data->regs + TEMP0_CFG); |
7521 | |
7522 | /* enable for interrupt */ |
7523 | - writel(_temp_to_step(sensor->thres_temp) | 0x0FFFFFF00, |
7524 | + writel(hisi_thermal_temp_to_step(sensor->thres_temp) | 0x0FFFFFF00, |
7525 | data->regs + TEMP0_TH); |
7526 | |
7527 | - writel(_temp_to_step(HISI_TEMP_RESET), data->regs + TEMP0_RST_TH); |
7528 | + writel(hisi_thermal_temp_to_step(HISI_TEMP_RESET), |
7529 | + data->regs + TEMP0_RST_TH); |
7530 | |
7531 | /* enable module */ |
7532 | writel(0x1, data->regs + TEMP0_RST_MSK); |
7533 | @@ -230,7 +251,7 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) |
7534 | sensor = &data->sensors[data->irq_bind_sensor]; |
7535 | |
7536 | dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n", |
7537 | - sensor->thres_temp / 1000); |
7538 | + sensor->thres_temp); |
7539 | mutex_unlock(&data->thermal_lock); |
7540 | |
7541 | for (i = 0; i < HISI_MAX_SENSORS; i++) { |
7542 | @@ -269,7 +290,7 @@ static int hisi_thermal_register_sensor(struct platform_device *pdev, |
7543 | |
7544 | for (i = 0; i < of_thermal_get_ntrips(sensor->tzd); i++) { |
7545 | if (trip[i].type == THERMAL_TRIP_PASSIVE) { |
7546 | - sensor->thres_temp = trip[i].temperature; |
7547 | + sensor->thres_temp = hisi_thermal_round_temp(trip[i].temperature); |
7548 | break; |
7549 | } |
7550 | } |
7551 | @@ -317,15 +338,6 @@ static int hisi_thermal_probe(struct platform_device *pdev) |
7552 | if (data->irq < 0) |
7553 | return data->irq; |
7554 | |
7555 | - ret = devm_request_threaded_irq(&pdev->dev, data->irq, |
7556 | - hisi_thermal_alarm_irq, |
7557 | - hisi_thermal_alarm_irq_thread, |
7558 | - 0, "hisi_thermal", data); |
7559 | - if (ret < 0) { |
7560 | - dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret); |
7561 | - return ret; |
7562 | - } |
7563 | - |
7564 | platform_set_drvdata(pdev, data); |
7565 | |
7566 | data->clk = devm_clk_get(&pdev->dev, "thermal_clk"); |
7567 | @@ -345,8 +357,7 @@ static int hisi_thermal_probe(struct platform_device *pdev) |
7568 | } |
7569 | |
7570 | hisi_thermal_enable_bind_irq_sensor(data); |
7571 | - irq_get_irqchip_state(data->irq, IRQCHIP_STATE_MASKED, |
7572 | - &data->irq_enabled); |
7573 | + data->irq_enabled = true; |
7574 | |
7575 | for (i = 0; i < HISI_MAX_SENSORS; ++i) { |
7576 | ret = hisi_thermal_register_sensor(pdev, data, |
7577 | @@ -358,6 +369,17 @@ static int hisi_thermal_probe(struct platform_device *pdev) |
7578 | hisi_thermal_toggle_sensor(&data->sensors[i], true); |
7579 | } |
7580 | |
7581 | + ret = devm_request_threaded_irq(&pdev->dev, data->irq, |
7582 | + hisi_thermal_alarm_irq, |
7583 | + hisi_thermal_alarm_irq_thread, |
7584 | + 0, "hisi_thermal", data); |
7585 | + if (ret < 0) { |
7586 | + dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret); |
7587 | + return ret; |
7588 | + } |
7589 | + |
7590 | + enable_irq(data->irq); |
7591 | + |
7592 | return 0; |
7593 | } |
7594 | |
7595 | diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c |
7596 | index 5628fe114347..91335e6de88a 100644 |
7597 | --- a/drivers/vfio/pci/vfio_pci_config.c |
7598 | +++ b/drivers/vfio/pci/vfio_pci_config.c |
7599 | @@ -849,11 +849,13 @@ static int __init init_pci_cap_exp_perm(struct perm_bits *perm) |
7600 | |
7601 | /* |
7602 | * Allow writes to device control fields, except devctl_phantom, |
7603 | - * which could confuse IOMMU, and the ARI bit in devctl2, which |
7604 | + * which could confuse IOMMU, MPS, which can break communication |
7605 | + * with other physical devices, and the ARI bit in devctl2, which |
7606 | * is set at probe time. FLR gets virtualized via our writefn. |
7607 | */ |
7608 | p_setw(perm, PCI_EXP_DEVCTL, |
7609 | - PCI_EXP_DEVCTL_BCR_FLR, ~PCI_EXP_DEVCTL_PHANTOM); |
7610 | + PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD, |
7611 | + ~PCI_EXP_DEVCTL_PHANTOM); |
7612 | p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI); |
7613 | return 0; |
7614 | } |
7615 | diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c |
7616 | index 9bd17682655a..1c2289ddd555 100644 |
7617 | --- a/drivers/video/backlight/pwm_bl.c |
7618 | +++ b/drivers/video/backlight/pwm_bl.c |
7619 | @@ -79,14 +79,17 @@ static void pwm_backlight_power_off(struct pwm_bl_data *pb) |
7620 | static int compute_duty_cycle(struct pwm_bl_data *pb, int brightness) |
7621 | { |
7622 | unsigned int lth = pb->lth_brightness; |
7623 | - int duty_cycle; |
7624 | + u64 duty_cycle; |
7625 | |
7626 | if (pb->levels) |
7627 | duty_cycle = pb->levels[brightness]; |
7628 | else |
7629 | duty_cycle = brightness; |
7630 | |
7631 | - return (duty_cycle * (pb->period - lth) / pb->scale) + lth; |
7632 | + duty_cycle *= pb->period - lth; |
7633 | + do_div(duty_cycle, pb->scale); |
7634 | + |
7635 | + return duty_cycle + lth; |
7636 | } |
7637 | |
7638 | static int pwm_backlight_update_status(struct backlight_device *bl) |
7639 | diff --git a/fs/dcache.c b/fs/dcache.c |
7640 | index f90141387f01..34c852af215c 100644 |
7641 | --- a/fs/dcache.c |
7642 | +++ b/fs/dcache.c |
7643 | @@ -231,7 +231,7 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c |
7644 | { |
7645 | /* |
7646 | * Be careful about RCU walk racing with rename: |
7647 | - * use 'lockless_dereference' to fetch the name pointer. |
7648 | + * use 'READ_ONCE' to fetch the name pointer. |
7649 | * |
7650 | * NOTE! Even if a rename will mean that the length |
7651 | * was not loaded atomically, we don't care. The |
7652 | @@ -245,7 +245,7 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c |
7653 | * early because the data cannot match (there can |
7654 | * be no NUL in the ct/tcount data) |
7655 | */ |
7656 | - const unsigned char *cs = lockless_dereference(dentry->d_name.name); |
7657 | + const unsigned char *cs = READ_ONCE(dentry->d_name.name); |
7658 | |
7659 | return dentry_string_cmp(cs, ct, tcount); |
7660 | } |
7661 | diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h |
7662 | index 25d9b5adcd42..36b49bd09264 100644 |
7663 | --- a/fs/overlayfs/ovl_entry.h |
7664 | +++ b/fs/overlayfs/ovl_entry.h |
7665 | @@ -77,5 +77,5 @@ static inline struct ovl_inode *OVL_I(struct inode *inode) |
7666 | |
7667 | static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi) |
7668 | { |
7669 | - return lockless_dereference(oi->__upperdentry); |
7670 | + return READ_ONCE(oi->__upperdentry); |
7671 | } |
7672 | diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c |
7673 | index b2c7f33e08fc..d94a51dc4e32 100644 |
7674 | --- a/fs/overlayfs/readdir.c |
7675 | +++ b/fs/overlayfs/readdir.c |
7676 | @@ -757,7 +757,7 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, |
7677 | if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) { |
7678 | struct inode *inode = file_inode(file); |
7679 | |
7680 | - realfile = lockless_dereference(od->upperfile); |
7681 | + realfile = READ_ONCE(od->upperfile); |
7682 | if (!realfile) { |
7683 | struct path upperpath; |
7684 | |
7685 | diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h |
7686 | index e549bff87c5b..353f52fdc35e 100644 |
7687 | --- a/include/asm-generic/vmlinux.lds.h |
7688 | +++ b/include/asm-generic/vmlinux.lds.h |
7689 | @@ -688,7 +688,7 @@ |
7690 | #define BUG_TABLE |
7691 | #endif |
7692 | |
7693 | -#ifdef CONFIG_ORC_UNWINDER |
7694 | +#ifdef CONFIG_UNWINDER_ORC |
7695 | #define ORC_UNWIND_TABLE \ |
7696 | . = ALIGN(4); \ |
7697 | .orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \ |
7698 | diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h |
7699 | index b8d200f60a40..73bec75b74c8 100644 |
7700 | --- a/include/linux/bpf_verifier.h |
7701 | +++ b/include/linux/bpf_verifier.h |
7702 | @@ -15,11 +15,11 @@ |
7703 | * In practice this is far bigger than any realistic pointer offset; this limit |
7704 | * ensures that umax_value + (int)off + (int)size cannot overflow a u64. |
7705 | */ |
7706 | -#define BPF_MAX_VAR_OFF (1ULL << 31) |
7707 | +#define BPF_MAX_VAR_OFF (1 << 29) |
7708 | /* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures |
7709 | * that converting umax_value to int cannot overflow. |
7710 | */ |
7711 | -#define BPF_MAX_VAR_SIZ INT_MAX |
7712 | +#define BPF_MAX_VAR_SIZ (1 << 29) |
7713 | |
7714 | /* Liveness marks, used for registers and spilled-regs (in stack slots). |
7715 | * Read marks propagate upwards until they find a write mark; they record that |
7716 | @@ -110,7 +110,7 @@ struct bpf_insn_aux_data { |
7717 | struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ |
7718 | }; |
7719 | int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ |
7720 | - int converted_op_size; /* the valid value width after perceived conversion */ |
7721 | + bool seen; /* this insn was processed by the verifier */ |
7722 | }; |
7723 | |
7724 | #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ |
7725 | diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h |
7726 | index 780b1242bf24..3b609edffa8f 100644 |
7727 | --- a/include/linux/compiler-clang.h |
7728 | +++ b/include/linux/compiler-clang.h |
7729 | @@ -1,5 +1,5 @@ |
7730 | /* SPDX-License-Identifier: GPL-2.0 */ |
7731 | -#ifndef __LINUX_COMPILER_H |
7732 | +#ifndef __LINUX_COMPILER_TYPES_H |
7733 | #error "Please don't include <linux/compiler-clang.h> directly, include <linux/compiler.h> instead." |
7734 | #endif |
7735 | |
7736 | diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h |
7737 | index bb78e5bdff26..2272ded07496 100644 |
7738 | --- a/include/linux/compiler-gcc.h |
7739 | +++ b/include/linux/compiler-gcc.h |
7740 | @@ -1,5 +1,5 @@ |
7741 | /* SPDX-License-Identifier: GPL-2.0 */ |
7742 | -#ifndef __LINUX_COMPILER_H |
7743 | +#ifndef __LINUX_COMPILER_TYPES_H |
7744 | #error "Please don't include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead." |
7745 | #endif |
7746 | |
7747 | diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h |
7748 | index 523d1b74550f..bfa08160db3a 100644 |
7749 | --- a/include/linux/compiler-intel.h |
7750 | +++ b/include/linux/compiler-intel.h |
7751 | @@ -1,5 +1,5 @@ |
7752 | /* SPDX-License-Identifier: GPL-2.0 */ |
7753 | -#ifndef __LINUX_COMPILER_H |
7754 | +#ifndef __LINUX_COMPILER_TYPES_H |
7755 | #error "Please don't include <linux/compiler-intel.h> directly, include <linux/compiler.h> instead." |
7756 | #endif |
7757 | |
7758 | diff --git a/include/linux/compiler.h b/include/linux/compiler.h |
7759 | index 202710420d6d..fab5dc250c61 100644 |
7760 | --- a/include/linux/compiler.h |
7761 | +++ b/include/linux/compiler.h |
7762 | @@ -2,111 +2,12 @@ |
7763 | #ifndef __LINUX_COMPILER_H |
7764 | #define __LINUX_COMPILER_H |
7765 | |
7766 | -#ifndef __ASSEMBLY__ |
7767 | +#include <linux/compiler_types.h> |
7768 | |
7769 | -#ifdef __CHECKER__ |
7770 | -# define __user __attribute__((noderef, address_space(1))) |
7771 | -# define __kernel __attribute__((address_space(0))) |
7772 | -# define __safe __attribute__((safe)) |
7773 | -# define __force __attribute__((force)) |
7774 | -# define __nocast __attribute__((nocast)) |
7775 | -# define __iomem __attribute__((noderef, address_space(2))) |
7776 | -# define __must_hold(x) __attribute__((context(x,1,1))) |
7777 | -# define __acquires(x) __attribute__((context(x,0,1))) |
7778 | -# define __releases(x) __attribute__((context(x,1,0))) |
7779 | -# define __acquire(x) __context__(x,1) |
7780 | -# define __release(x) __context__(x,-1) |
7781 | -# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) |
7782 | -# define __percpu __attribute__((noderef, address_space(3))) |
7783 | -# define __rcu __attribute__((noderef, address_space(4))) |
7784 | -# define __private __attribute__((noderef)) |
7785 | -extern void __chk_user_ptr(const volatile void __user *); |
7786 | -extern void __chk_io_ptr(const volatile void __iomem *); |
7787 | -# define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member)) |
7788 | -#else /* __CHECKER__ */ |
7789 | -# ifdef STRUCTLEAK_PLUGIN |
7790 | -# define __user __attribute__((user)) |
7791 | -# else |
7792 | -# define __user |
7793 | -# endif |
7794 | -# define __kernel |
7795 | -# define __safe |
7796 | -# define __force |
7797 | -# define __nocast |
7798 | -# define __iomem |
7799 | -# define __chk_user_ptr(x) (void)0 |
7800 | -# define __chk_io_ptr(x) (void)0 |
7801 | -# define __builtin_warning(x, y...) (1) |
7802 | -# define __must_hold(x) |
7803 | -# define __acquires(x) |
7804 | -# define __releases(x) |
7805 | -# define __acquire(x) (void)0 |
7806 | -# define __release(x) (void)0 |
7807 | -# define __cond_lock(x,c) (c) |
7808 | -# define __percpu |
7809 | -# define __rcu |
7810 | -# define __private |
7811 | -# define ACCESS_PRIVATE(p, member) ((p)->member) |
7812 | -#endif /* __CHECKER__ */ |
7813 | - |
7814 | -/* Indirect macros required for expanded argument pasting, eg. __LINE__. */ |
7815 | -#define ___PASTE(a,b) a##b |
7816 | -#define __PASTE(a,b) ___PASTE(a,b) |
7817 | +#ifndef __ASSEMBLY__ |
7818 | |
7819 | #ifdef __KERNEL__ |
7820 | |
7821 | -#ifdef __GNUC__ |
7822 | -#include <linux/compiler-gcc.h> |
7823 | -#endif |
7824 | - |
7825 | -#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__) |
7826 | -#define notrace __attribute__((hotpatch(0,0))) |
7827 | -#else |
7828 | -#define notrace __attribute__((no_instrument_function)) |
7829 | -#endif |
7830 | - |
7831 | -/* Intel compiler defines __GNUC__. So we will overwrite implementations |
7832 | - * coming from above header files here |
7833 | - */ |
7834 | -#ifdef __INTEL_COMPILER |
7835 | -# include <linux/compiler-intel.h> |
7836 | -#endif |
7837 | - |
7838 | -/* Clang compiler defines __GNUC__. So we will overwrite implementations |
7839 | - * coming from above header files here |
7840 | - */ |
7841 | -#ifdef __clang__ |
7842 | -#include <linux/compiler-clang.h> |
7843 | -#endif |
7844 | - |
7845 | -/* |
7846 | - * Generic compiler-dependent macros required for kernel |
7847 | - * build go below this comment. Actual compiler/compiler version |
7848 | - * specific implementations come from the above header files |
7849 | - */ |
7850 | - |
7851 | -struct ftrace_branch_data { |
7852 | - const char *func; |
7853 | - const char *file; |
7854 | - unsigned line; |
7855 | - union { |
7856 | - struct { |
7857 | - unsigned long correct; |
7858 | - unsigned long incorrect; |
7859 | - }; |
7860 | - struct { |
7861 | - unsigned long miss; |
7862 | - unsigned long hit; |
7863 | - }; |
7864 | - unsigned long miss_hit[2]; |
7865 | - }; |
7866 | -}; |
7867 | - |
7868 | -struct ftrace_likely_data { |
7869 | - struct ftrace_branch_data data; |
7870 | - unsigned long constant; |
7871 | -}; |
7872 | - |
7873 | /* |
7874 | * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code |
7875 | * to disable branch tracing on a per file basis. |
7876 | @@ -333,6 +234,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s |
7877 | * with an explicit memory barrier or atomic instruction that provides the |
7878 | * required ordering. |
7879 | */ |
7880 | +#include <asm/barrier.h> |
7881 | |
7882 | #define __READ_ONCE(x, check) \ |
7883 | ({ \ |
7884 | @@ -341,6 +243,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s |
7885 | __read_once_size(&(x), __u.__c, sizeof(x)); \ |
7886 | else \ |
7887 | __read_once_size_nocheck(&(x), __u.__c, sizeof(x)); \ |
7888 | + smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \ |
7889 | __u.__val; \ |
7890 | }) |
7891 | #define READ_ONCE(x) __READ_ONCE(x, 1) |
7892 | @@ -363,167 +266,6 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s |
7893 | |
7894 | #endif /* __ASSEMBLY__ */ |
7895 | |
7896 | -#ifdef __KERNEL__ |
7897 | -/* |
7898 | - * Allow us to mark functions as 'deprecated' and have gcc emit a nice |
7899 | - * warning for each use, in hopes of speeding the functions removal. |
7900 | - * Usage is: |
7901 | - * int __deprecated foo(void) |
7902 | - */ |
7903 | -#ifndef __deprecated |
7904 | -# define __deprecated /* unimplemented */ |
7905 | -#endif |
7906 | - |
7907 | -#ifdef MODULE |
7908 | -#define __deprecated_for_modules __deprecated |
7909 | -#else |
7910 | -#define __deprecated_for_modules |
7911 | -#endif |
7912 | - |
7913 | -#ifndef __must_check |
7914 | -#define __must_check |
7915 | -#endif |
7916 | - |
7917 | -#ifndef CONFIG_ENABLE_MUST_CHECK |
7918 | -#undef __must_check |
7919 | -#define __must_check |
7920 | -#endif |
7921 | -#ifndef CONFIG_ENABLE_WARN_DEPRECATED |
7922 | -#undef __deprecated |
7923 | -#undef __deprecated_for_modules |
7924 | -#define __deprecated |
7925 | -#define __deprecated_for_modules |
7926 | -#endif |
7927 | - |
7928 | -#ifndef __malloc |
7929 | -#define __malloc |
7930 | -#endif |
7931 | - |
7932 | -/* |
7933 | - * Allow us to avoid 'defined but not used' warnings on functions and data, |
7934 | - * as well as force them to be emitted to the assembly file. |
7935 | - * |
7936 | - * As of gcc 3.4, static functions that are not marked with attribute((used)) |
7937 | - * may be elided from the assembly file. As of gcc 3.4, static data not so |
7938 | - * marked will not be elided, but this may change in a future gcc version. |
7939 | - * |
7940 | - * NOTE: Because distributions shipped with a backported unit-at-a-time |
7941 | - * compiler in gcc 3.3, we must define __used to be __attribute__((used)) |
7942 | - * for gcc >=3.3 instead of 3.4. |
7943 | - * |
7944 | - * In prior versions of gcc, such functions and data would be emitted, but |
7945 | - * would be warned about except with attribute((unused)). |
7946 | - * |
7947 | - * Mark functions that are referenced only in inline assembly as __used so |
7948 | - * the code is emitted even though it appears to be unreferenced. |
7949 | - */ |
7950 | -#ifndef __used |
7951 | -# define __used /* unimplemented */ |
7952 | -#endif |
7953 | - |
7954 | -#ifndef __maybe_unused |
7955 | -# define __maybe_unused /* unimplemented */ |
7956 | -#endif |
7957 | - |
7958 | -#ifndef __always_unused |
7959 | -# define __always_unused /* unimplemented */ |
7960 | -#endif |
7961 | - |
7962 | -#ifndef noinline |
7963 | -#define noinline |
7964 | -#endif |
7965 | - |
7966 | -/* |
7967 | - * Rather then using noinline to prevent stack consumption, use |
7968 | - * noinline_for_stack instead. For documentation reasons. |
7969 | - */ |
7970 | -#define noinline_for_stack noinline |
7971 | - |
7972 | -#ifndef __always_inline |
7973 | -#define __always_inline inline |
7974 | -#endif |
7975 | - |
7976 | -#endif /* __KERNEL__ */ |
7977 | - |
7978 | -/* |
7979 | - * From the GCC manual: |
7980 | - * |
7981 | - * Many functions do not examine any values except their arguments, |
7982 | - * and have no effects except the return value. Basically this is |
7983 | - * just slightly more strict class than the `pure' attribute above, |
7984 | - * since function is not allowed to read global memory. |
7985 | - * |
7986 | - * Note that a function that has pointer arguments and examines the |
7987 | - * data pointed to must _not_ be declared `const'. Likewise, a |
7988 | - * function that calls a non-`const' function usually must not be |
7989 | - * `const'. It does not make sense for a `const' function to return |
7990 | - * `void'. |
7991 | - */ |
7992 | -#ifndef __attribute_const__ |
7993 | -# define __attribute_const__ /* unimplemented */ |
7994 | -#endif |
7995 | - |
7996 | -#ifndef __designated_init |
7997 | -# define __designated_init |
7998 | -#endif |
7999 | - |
8000 | -#ifndef __latent_entropy |
8001 | -# define __latent_entropy |
8002 | -#endif |
8003 | - |
8004 | -#ifndef __randomize_layout |
8005 | -# define __randomize_layout __designated_init |
8006 | -#endif |
8007 | - |
8008 | -#ifndef __no_randomize_layout |
8009 | -# define __no_randomize_layout |
8010 | -#endif |
8011 | - |
8012 | -#ifndef randomized_struct_fields_start |
8013 | -# define randomized_struct_fields_start |
8014 | -# define randomized_struct_fields_end |
8015 | -#endif |
8016 | - |
8017 | -/* |
8018 | - * Tell gcc if a function is cold. The compiler will assume any path |
8019 | - * directly leading to the call is unlikely. |
8020 | - */ |
8021 | - |
8022 | -#ifndef __cold |
8023 | -#define __cold |
8024 | -#endif |
8025 | - |
8026 | -/* Simple shorthand for a section definition */ |
8027 | -#ifndef __section |
8028 | -# define __section(S) __attribute__ ((__section__(#S))) |
8029 | -#endif |
8030 | - |
8031 | -#ifndef __visible |
8032 | -#define __visible |
8033 | -#endif |
8034 | - |
8035 | -#ifndef __nostackprotector |
8036 | -# define __nostackprotector |
8037 | -#endif |
8038 | - |
8039 | -/* |
8040 | - * Assume alignment of return value. |
8041 | - */ |
8042 | -#ifndef __assume_aligned |
8043 | -#define __assume_aligned(a, ...) |
8044 | -#endif |
8045 | - |
8046 | - |
8047 | -/* Are two types/vars the same type (ignoring qualifiers)? */ |
8048 | -#ifndef __same_type |
8049 | -# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) |
8050 | -#endif |
8051 | - |
8052 | -/* Is this type a native word size -- useful for atomic operations */ |
8053 | -#ifndef __native_word |
8054 | -# define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) |
8055 | -#endif |
8056 | - |
8057 | /* Compile time object size, -1 for unknown */ |
8058 | #ifndef __compiletime_object_size |
8059 | # define __compiletime_object_size(obj) -1 |
8060 | diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h |
8061 | new file mode 100644 |
8062 | index 000000000000..6b79a9bba9a7 |
8063 | --- /dev/null |
8064 | +++ b/include/linux/compiler_types.h |
8065 | @@ -0,0 +1,274 @@ |
8066 | +#ifndef __LINUX_COMPILER_TYPES_H |
8067 | +#define __LINUX_COMPILER_TYPES_H |
8068 | + |
8069 | +#ifndef __ASSEMBLY__ |
8070 | + |
8071 | +#ifdef __CHECKER__ |
8072 | +# define __user __attribute__((noderef, address_space(1))) |
8073 | +# define __kernel __attribute__((address_space(0))) |
8074 | +# define __safe __attribute__((safe)) |
8075 | +# define __force __attribute__((force)) |
8076 | +# define __nocast __attribute__((nocast)) |
8077 | +# define __iomem __attribute__((noderef, address_space(2))) |
8078 | +# define __must_hold(x) __attribute__((context(x,1,1))) |
8079 | +# define __acquires(x) __attribute__((context(x,0,1))) |
8080 | +# define __releases(x) __attribute__((context(x,1,0))) |
8081 | +# define __acquire(x) __context__(x,1) |
8082 | +# define __release(x) __context__(x,-1) |
8083 | +# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) |
8084 | +# define __percpu __attribute__((noderef, address_space(3))) |
8085 | +# define __rcu __attribute__((noderef, address_space(4))) |
8086 | +# define __private __attribute__((noderef)) |
8087 | +extern void __chk_user_ptr(const volatile void __user *); |
8088 | +extern void __chk_io_ptr(const volatile void __iomem *); |
8089 | +# define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member)) |
8090 | +#else /* __CHECKER__ */ |
8091 | +# ifdef STRUCTLEAK_PLUGIN |
8092 | +# define __user __attribute__((user)) |
8093 | +# else |
8094 | +# define __user |
8095 | +# endif |
8096 | +# define __kernel |
8097 | +# define __safe |
8098 | +# define __force |
8099 | +# define __nocast |
8100 | +# define __iomem |
8101 | +# define __chk_user_ptr(x) (void)0 |
8102 | +# define __chk_io_ptr(x) (void)0 |
8103 | +# define __builtin_warning(x, y...) (1) |
8104 | +# define __must_hold(x) |
8105 | +# define __acquires(x) |
8106 | +# define __releases(x) |
8107 | +# define __acquire(x) (void)0 |
8108 | +# define __release(x) (void)0 |
8109 | +# define __cond_lock(x,c) (c) |
8110 | +# define __percpu |
8111 | +# define __rcu |
8112 | +# define __private |
8113 | +# define ACCESS_PRIVATE(p, member) ((p)->member) |
8114 | +#endif /* __CHECKER__ */ |
8115 | + |
8116 | +/* Indirect macros required for expanded argument pasting, eg. __LINE__. */ |
8117 | +#define ___PASTE(a,b) a##b |
8118 | +#define __PASTE(a,b) ___PASTE(a,b) |
8119 | + |
8120 | +#ifdef __KERNEL__ |
8121 | + |
8122 | +#ifdef __GNUC__ |
8123 | +#include <linux/compiler-gcc.h> |
8124 | +#endif |
8125 | + |
8126 | +#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__) |
8127 | +#define notrace __attribute__((hotpatch(0,0))) |
8128 | +#else |
8129 | +#define notrace __attribute__((no_instrument_function)) |
8130 | +#endif |
8131 | + |
8132 | +/* Intel compiler defines __GNUC__. So we will overwrite implementations |
8133 | + * coming from above header files here |
8134 | + */ |
8135 | +#ifdef __INTEL_COMPILER |
8136 | +# include <linux/compiler-intel.h> |
8137 | +#endif |
8138 | + |
8139 | +/* Clang compiler defines __GNUC__. So we will overwrite implementations |
8140 | + * coming from above header files here |
8141 | + */ |
8142 | +#ifdef __clang__ |
8143 | +#include <linux/compiler-clang.h> |
8144 | +#endif |
8145 | + |
8146 | +/* |
8147 | + * Generic compiler-dependent macros required for kernel |
8148 | + * build go below this comment. Actual compiler/compiler version |
8149 | + * specific implementations come from the above header files |
8150 | + */ |
8151 | + |
8152 | +struct ftrace_branch_data { |
8153 | + const char *func; |
8154 | + const char *file; |
8155 | + unsigned line; |
8156 | + union { |
8157 | + struct { |
8158 | + unsigned long correct; |
8159 | + unsigned long incorrect; |
8160 | + }; |
8161 | + struct { |
8162 | + unsigned long miss; |
8163 | + unsigned long hit; |
8164 | + }; |
8165 | + unsigned long miss_hit[2]; |
8166 | + }; |
8167 | +}; |
8168 | + |
8169 | +struct ftrace_likely_data { |
8170 | + struct ftrace_branch_data data; |
8171 | + unsigned long constant; |
8172 | +}; |
8173 | + |
8174 | +#endif /* __KERNEL__ */ |
8175 | + |
8176 | +#endif /* __ASSEMBLY__ */ |
8177 | + |
8178 | +#ifdef __KERNEL__ |
8179 | +/* |
8180 | + * Allow us to mark functions as 'deprecated' and have gcc emit a nice |
8181 | + * warning for each use, in hopes of speeding the functions removal. |
8182 | + * Usage is: |
8183 | + * int __deprecated foo(void) |
8184 | + */ |
8185 | +#ifndef __deprecated |
8186 | +# define __deprecated /* unimplemented */ |
8187 | +#endif |
8188 | + |
8189 | +#ifdef MODULE |
8190 | +#define __deprecated_for_modules __deprecated |
8191 | +#else |
8192 | +#define __deprecated_for_modules |
8193 | +#endif |
8194 | + |
8195 | +#ifndef __must_check |
8196 | +#define __must_check |
8197 | +#endif |
8198 | + |
8199 | +#ifndef CONFIG_ENABLE_MUST_CHECK |
8200 | +#undef __must_check |
8201 | +#define __must_check |
8202 | +#endif |
8203 | +#ifndef CONFIG_ENABLE_WARN_DEPRECATED |
8204 | +#undef __deprecated |
8205 | +#undef __deprecated_for_modules |
8206 | +#define __deprecated |
8207 | +#define __deprecated_for_modules |
8208 | +#endif |
8209 | + |
8210 | +#ifndef __malloc |
8211 | +#define __malloc |
8212 | +#endif |
8213 | + |
8214 | +/* |
8215 | + * Allow us to avoid 'defined but not used' warnings on functions and data, |
8216 | + * as well as force them to be emitted to the assembly file. |
8217 | + * |
8218 | + * As of gcc 3.4, static functions that are not marked with attribute((used)) |
8219 | + * may be elided from the assembly file. As of gcc 3.4, static data not so |
8220 | + * marked will not be elided, but this may change in a future gcc version. |
8221 | + * |
8222 | + * NOTE: Because distributions shipped with a backported unit-at-a-time |
8223 | + * compiler in gcc 3.3, we must define __used to be __attribute__((used)) |
8224 | + * for gcc >=3.3 instead of 3.4. |
8225 | + * |
8226 | + * In prior versions of gcc, such functions and data would be emitted, but |
8227 | + * would be warned about except with attribute((unused)). |
8228 | + * |
8229 | + * Mark functions that are referenced only in inline assembly as __used so |
8230 | + * the code is emitted even though it appears to be unreferenced. |
8231 | + */ |
8232 | +#ifndef __used |
8233 | +# define __used /* unimplemented */ |
8234 | +#endif |
8235 | + |
8236 | +#ifndef __maybe_unused |
8237 | +# define __maybe_unused /* unimplemented */ |
8238 | +#endif |
8239 | + |
8240 | +#ifndef __always_unused |
8241 | +# define __always_unused /* unimplemented */ |
8242 | +#endif |
8243 | + |
8244 | +#ifndef noinline |
8245 | +#define noinline |
8246 | +#endif |
8247 | + |
8248 | +/* |
8249 | + * Rather then using noinline to prevent stack consumption, use |
8250 | + * noinline_for_stack instead. For documentation reasons. |
8251 | + */ |
8252 | +#define noinline_for_stack noinline |
8253 | + |
8254 | +#ifndef __always_inline |
8255 | +#define __always_inline inline |
8256 | +#endif |
8257 | + |
8258 | +#endif /* __KERNEL__ */ |
8259 | + |
8260 | +/* |
8261 | + * From the GCC manual: |
8262 | + * |
8263 | + * Many functions do not examine any values except their arguments, |
8264 | + * and have no effects except the return value. Basically this is |
8265 | + * just slightly more strict class than the `pure' attribute above, |
8266 | + * since function is not allowed to read global memory. |
8267 | + * |
8268 | + * Note that a function that has pointer arguments and examines the |
8269 | + * data pointed to must _not_ be declared `const'. Likewise, a |
8270 | + * function that calls a non-`const' function usually must not be |
8271 | + * `const'. It does not make sense for a `const' function to return |
8272 | + * `void'. |
8273 | + */ |
8274 | +#ifndef __attribute_const__ |
8275 | +# define __attribute_const__ /* unimplemented */ |
8276 | +#endif |
8277 | + |
8278 | +#ifndef __designated_init |
8279 | +# define __designated_init |
8280 | +#endif |
8281 | + |
8282 | +#ifndef __latent_entropy |
8283 | +# define __latent_entropy |
8284 | +#endif |
8285 | + |
8286 | +#ifndef __randomize_layout |
8287 | +# define __randomize_layout __designated_init |
8288 | +#endif |
8289 | + |
8290 | +#ifndef __no_randomize_layout |
8291 | +# define __no_randomize_layout |
8292 | +#endif |
8293 | + |
8294 | +#ifndef randomized_struct_fields_start |
8295 | +# define randomized_struct_fields_start |
8296 | +# define randomized_struct_fields_end |
8297 | +#endif |
8298 | + |
8299 | +/* |
8300 | + * Tell gcc if a function is cold. The compiler will assume any path |
8301 | + * directly leading to the call is unlikely. |
8302 | + */ |
8303 | + |
8304 | +#ifndef __cold |
8305 | +#define __cold |
8306 | +#endif |
8307 | + |
8308 | +/* Simple shorthand for a section definition */ |
8309 | +#ifndef __section |
8310 | +# define __section(S) __attribute__ ((__section__(#S))) |
8311 | +#endif |
8312 | + |
8313 | +#ifndef __visible |
8314 | +#define __visible |
8315 | +#endif |
8316 | + |
8317 | +#ifndef __nostackprotector |
8318 | +# define __nostackprotector |
8319 | +#endif |
8320 | + |
8321 | +/* |
8322 | + * Assume alignment of return value. |
8323 | + */ |
8324 | +#ifndef __assume_aligned |
8325 | +#define __assume_aligned(a, ...) |
8326 | +#endif |
8327 | + |
8328 | + |
8329 | +/* Are two types/vars the same type (ignoring qualifiers)? */ |
8330 | +#ifndef __same_type |
8331 | +# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) |
8332 | +#endif |
8333 | + |
8334 | +/* Is this type a native word size -- useful for atomic operations */ |
8335 | +#ifndef __native_word |
8336 | +# define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) |
8337 | +#endif |
8338 | + |
8339 | +#endif /* __LINUX_COMPILER_TYPES_H */ |
8340 | diff --git a/include/linux/hypervisor.h b/include/linux/hypervisor.h |
8341 | index b4054fd5b6f6..b19563f9a8eb 100644 |
8342 | --- a/include/linux/hypervisor.h |
8343 | +++ b/include/linux/hypervisor.h |
8344 | @@ -7,8 +7,12 @@ |
8345 | * Juergen Gross <jgross@suse.com> |
8346 | */ |
8347 | |
8348 | -#ifdef CONFIG_HYPERVISOR_GUEST |
8349 | -#include <asm/hypervisor.h> |
8350 | +#ifdef CONFIG_X86 |
8351 | +#include <asm/x86_init.h> |
8352 | +static inline void hypervisor_pin_vcpu(int cpu) |
8353 | +{ |
8354 | + x86_platform.hyper.pin_vcpu(cpu); |
8355 | +} |
8356 | #else |
8357 | static inline void hypervisor_pin_vcpu(int cpu) |
8358 | { |
8359 | diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h |
8360 | index 7b0fa8b5c120..ce0ef1c0a30a 100644 |
8361 | --- a/include/linux/iio/common/st_sensors.h |
8362 | +++ b/include/linux/iio/common/st_sensors.h |
8363 | @@ -139,7 +139,7 @@ struct st_sensor_das { |
8364 | * @mask_ihl: mask to enable/disable active low on the INT lines. |
8365 | * @addr_od: address to enable/disable Open Drain on the INT lines. |
8366 | * @mask_od: mask to enable/disable Open Drain on the INT lines. |
8367 | - * @addr_stat_drdy: address to read status of DRDY (data ready) interrupt |
8368 | + * struct stat_drdy - status register of DRDY (data ready) interrupt. |
8369 | * struct ig1 - represents the Interrupt Generator 1 of sensors. |
8370 | * @en_addr: address of the enable ig1 register. |
8371 | * @en_mask: mask to write the on/off value for enable. |
8372 | @@ -152,7 +152,10 @@ struct st_sensor_data_ready_irq { |
8373 | u8 mask_ihl; |
8374 | u8 addr_od; |
8375 | u8 mask_od; |
8376 | - u8 addr_stat_drdy; |
8377 | + struct { |
8378 | + u8 addr; |
8379 | + u8 mask; |
8380 | + } stat_drdy; |
8381 | struct { |
8382 | u8 en_addr; |
8383 | u8 en_mask; |
8384 | diff --git a/include/linux/intel-pti.h b/include/linux/intel-pti.h |
8385 | new file mode 100644 |
8386 | index 000000000000..2710d72de3c9 |
8387 | --- /dev/null |
8388 | +++ b/include/linux/intel-pti.h |
8389 | @@ -0,0 +1,43 @@ |
8390 | +/* |
8391 | + * Copyright (C) Intel 2011 |
8392 | + * |
8393 | + * This program is free software; you can redistribute it and/or modify |
8394 | + * it under the terms of the GNU General Public License version 2 as |
8395 | + * published by the Free Software Foundation. |
8396 | + * |
8397 | + * This program is distributed in the hope that it will be useful, |
8398 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
8399 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
8400 | + * GNU General Public License for more details. |
8401 | + * |
8402 | + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
8403 | + * |
8404 | + * The PTI (Parallel Trace Interface) driver directs trace data routed from |
8405 | + * various parts in the system out through the Intel Penwell PTI port and |
8406 | + * out of the mobile device for analysis with a debugging tool |
8407 | + * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7, |
8408 | + * compact JTAG, standard. |
8409 | + * |
8410 | + * This header file will allow other parts of the OS to use the |
8411 | + * interface to write out it's contents for debugging a mobile system. |
8412 | + */ |
8413 | + |
8414 | +#ifndef LINUX_INTEL_PTI_H_ |
8415 | +#define LINUX_INTEL_PTI_H_ |
8416 | + |
8417 | +/* offset for last dword of any PTI message. Part of MIPI P1149.7 */ |
8418 | +#define PTI_LASTDWORD_DTS 0x30 |
8419 | + |
8420 | +/* basic structure used as a write address to the PTI HW */ |
8421 | +struct pti_masterchannel { |
8422 | + u8 master; |
8423 | + u8 channel; |
8424 | +}; |
8425 | + |
8426 | +/* the following functions are defined in misc/pti.c */ |
8427 | +void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count); |
8428 | +struct pti_masterchannel *pti_request_masterchannel(u8 type, |
8429 | + const char *thread_name); |
8430 | +void pti_release_masterchannel(struct pti_masterchannel *mc); |
8431 | + |
8432 | +#endif /* LINUX_INTEL_PTI_H_ */ |
8433 | diff --git a/include/linux/linkage.h b/include/linux/linkage.h |
8434 | index 2e6f90bd52aa..f68db9e450eb 100644 |
8435 | --- a/include/linux/linkage.h |
8436 | +++ b/include/linux/linkage.h |
8437 | @@ -2,7 +2,7 @@ |
8438 | #ifndef _LINUX_LINKAGE_H |
8439 | #define _LINUX_LINKAGE_H |
8440 | |
8441 | -#include <linux/compiler.h> |
8442 | +#include <linux/compiler_types.h> |
8443 | #include <linux/stringify.h> |
8444 | #include <linux/export.h> |
8445 | #include <asm/linkage.h> |
8446 | diff --git a/include/linux/mm.h b/include/linux/mm.h |
8447 | index db647d428100..f50deada0f5c 100644 |
8448 | --- a/include/linux/mm.h |
8449 | +++ b/include/linux/mm.h |
8450 | @@ -2510,7 +2510,7 @@ void vmemmap_populate_print_last(void); |
8451 | void vmemmap_free(unsigned long start, unsigned long end); |
8452 | #endif |
8453 | void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, |
8454 | - unsigned long size); |
8455 | + unsigned long nr_pages); |
8456 | |
8457 | enum mf_flags { |
8458 | MF_COUNT_INCREASED = 1 << 0, |
8459 | diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h |
8460 | index 18b06983131a..f0938257ee6d 100644 |
8461 | --- a/include/linux/mmzone.h |
8462 | +++ b/include/linux/mmzone.h |
8463 | @@ -1152,13 +1152,17 @@ struct mem_section { |
8464 | #define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1) |
8465 | |
8466 | #ifdef CONFIG_SPARSEMEM_EXTREME |
8467 | -extern struct mem_section *mem_section[NR_SECTION_ROOTS]; |
8468 | +extern struct mem_section **mem_section; |
8469 | #else |
8470 | extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; |
8471 | #endif |
8472 | |
8473 | static inline struct mem_section *__nr_to_section(unsigned long nr) |
8474 | { |
8475 | +#ifdef CONFIG_SPARSEMEM_EXTREME |
8476 | + if (!mem_section) |
8477 | + return NULL; |
8478 | +#endif |
8479 | if (!mem_section[SECTION_NR_TO_ROOT(nr)]) |
8480 | return NULL; |
8481 | return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; |
8482 | diff --git a/include/linux/pti.h b/include/linux/pti.h |
8483 | deleted file mode 100644 |
8484 | index b3ea01a3197e..000000000000 |
8485 | --- a/include/linux/pti.h |
8486 | +++ /dev/null |
8487 | @@ -1,43 +0,0 @@ |
8488 | -/* |
8489 | - * Copyright (C) Intel 2011 |
8490 | - * |
8491 | - * This program is free software; you can redistribute it and/or modify |
8492 | - * it under the terms of the GNU General Public License version 2 as |
8493 | - * published by the Free Software Foundation. |
8494 | - * |
8495 | - * This program is distributed in the hope that it will be useful, |
8496 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of |
8497 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
8498 | - * GNU General Public License for more details. |
8499 | - * |
8500 | - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
8501 | - * |
8502 | - * The PTI (Parallel Trace Interface) driver directs trace data routed from |
8503 | - * various parts in the system out through the Intel Penwell PTI port and |
8504 | - * out of the mobile device for analysis with a debugging tool |
8505 | - * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7, |
8506 | - * compact JTAG, standard. |
8507 | - * |
8508 | - * This header file will allow other parts of the OS to use the |
8509 | - * interface to write out it's contents for debugging a mobile system. |
8510 | - */ |
8511 | - |
8512 | -#ifndef PTI_H_ |
8513 | -#define PTI_H_ |
8514 | - |
8515 | -/* offset for last dword of any PTI message. Part of MIPI P1149.7 */ |
8516 | -#define PTI_LASTDWORD_DTS 0x30 |
8517 | - |
8518 | -/* basic structure used as a write address to the PTI HW */ |
8519 | -struct pti_masterchannel { |
8520 | - u8 master; |
8521 | - u8 channel; |
8522 | -}; |
8523 | - |
8524 | -/* the following functions are defined in misc/pti.c */ |
8525 | -void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count); |
8526 | -struct pti_masterchannel *pti_request_masterchannel(u8 type, |
8527 | - const char *thread_name); |
8528 | -void pti_release_masterchannel(struct pti_masterchannel *mc); |
8529 | - |
8530 | -#endif /*PTI_H_*/ |
8531 | diff --git a/include/linux/rculist.h b/include/linux/rculist.h |
8532 | index c2cdd45a880a..127f534fec94 100644 |
8533 | --- a/include/linux/rculist.h |
8534 | +++ b/include/linux/rculist.h |
8535 | @@ -275,7 +275,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, |
8536 | * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). |
8537 | */ |
8538 | #define list_entry_rcu(ptr, type, member) \ |
8539 | - container_of(lockless_dereference(ptr), type, member) |
8540 | + container_of(READ_ONCE(ptr), type, member) |
8541 | |
8542 | /* |
8543 | * Where are list_empty_rcu() and list_first_entry_rcu()? |
8544 | @@ -368,7 +368,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, |
8545 | * example is when items are added to the list, but never deleted. |
8546 | */ |
8547 | #define list_entry_lockless(ptr, type, member) \ |
8548 | - container_of((typeof(ptr))lockless_dereference(ptr), type, member) |
8549 | + container_of((typeof(ptr))READ_ONCE(ptr), type, member) |
8550 | |
8551 | /** |
8552 | * list_for_each_entry_lockless - iterate over rcu list of given type |
8553 | diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h |
8554 | index 1a9f70d44af9..a6ddc42f87a5 100644 |
8555 | --- a/include/linux/rcupdate.h |
8556 | +++ b/include/linux/rcupdate.h |
8557 | @@ -346,7 +346,7 @@ static inline void rcu_preempt_sleep_check(void) { } |
8558 | #define __rcu_dereference_check(p, c, space) \ |
8559 | ({ \ |
8560 | /* Dependency order vs. p above. */ \ |
8561 | - typeof(*p) *________p1 = (typeof(*p) *__force)lockless_dereference(p); \ |
8562 | + typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \ |
8563 | RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \ |
8564 | rcu_dereference_sparse(p, space); \ |
8565 | ((typeof(*p) __force __kernel *)(________p1)); \ |
8566 | @@ -360,7 +360,7 @@ static inline void rcu_preempt_sleep_check(void) { } |
8567 | #define rcu_dereference_raw(p) \ |
8568 | ({ \ |
8569 | /* Dependency order vs. p above. */ \ |
8570 | - typeof(p) ________p1 = lockless_dereference(p); \ |
8571 | + typeof(p) ________p1 = READ_ONCE(p); \ |
8572 | ((typeof(*p) __force __kernel *)(________p1)); \ |
8573 | }) |
8574 | |
8575 | diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h |
8576 | index f65b92e0e1f9..ee8220f8dcf5 100644 |
8577 | --- a/include/uapi/linux/stddef.h |
8578 | +++ b/include/uapi/linux/stddef.h |
8579 | @@ -1,5 +1,5 @@ |
8580 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ |
8581 | -#include <linux/compiler.h> |
8582 | +#include <linux/compiler_types.h> |
8583 | |
8584 | #ifndef __always_inline |
8585 | #define __always_inline inline |
8586 | diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c |
8587 | index c48ca2a34b5e..c5ff809e86d0 100644 |
8588 | --- a/kernel/bpf/verifier.c |
8589 | +++ b/kernel/bpf/verifier.c |
8590 | @@ -1061,6 +1061,11 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, |
8591 | break; |
8592 | case PTR_TO_STACK: |
8593 | pointer_desc = "stack "; |
8594 | + /* The stack spill tracking logic in check_stack_write() |
8595 | + * and check_stack_read() relies on stack accesses being |
8596 | + * aligned. |
8597 | + */ |
8598 | + strict = true; |
8599 | break; |
8600 | default: |
8601 | break; |
8602 | @@ -1068,6 +1073,29 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, |
8603 | return check_generic_ptr_alignment(reg, pointer_desc, off, size, strict); |
8604 | } |
8605 | |
8606 | +/* truncate register to smaller size (in bytes) |
8607 | + * must be called with size < BPF_REG_SIZE |
8608 | + */ |
8609 | +static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) |
8610 | +{ |
8611 | + u64 mask; |
8612 | + |
8613 | + /* clear high bits in bit representation */ |
8614 | + reg->var_off = tnum_cast(reg->var_off, size); |
8615 | + |
8616 | + /* fix arithmetic bounds */ |
8617 | + mask = ((u64)1 << (size * 8)) - 1; |
8618 | + if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) { |
8619 | + reg->umin_value &= mask; |
8620 | + reg->umax_value &= mask; |
8621 | + } else { |
8622 | + reg->umin_value = 0; |
8623 | + reg->umax_value = mask; |
8624 | + } |
8625 | + reg->smin_value = reg->umin_value; |
8626 | + reg->smax_value = reg->umax_value; |
8627 | +} |
8628 | + |
8629 | /* check whether memory at (regno + off) is accessible for t = (read | write) |
8630 | * if t==write, value_regno is a register which value is stored into memory |
8631 | * if t==read, value_regno is a register which will receive the value from memory |
8632 | @@ -1200,9 +1228,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn |
8633 | if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && |
8634 | state->regs[value_regno].type == SCALAR_VALUE) { |
8635 | /* b/h/w load zero-extends, mark upper bits as known 0 */ |
8636 | - state->regs[value_regno].var_off = tnum_cast( |
8637 | - state->regs[value_regno].var_off, size); |
8638 | - __update_reg_bounds(&state->regs[value_regno]); |
8639 | + coerce_reg_to_size(&state->regs[value_regno], size); |
8640 | } |
8641 | return err; |
8642 | } |
8643 | @@ -1282,6 +1308,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, |
8644 | tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off); |
8645 | verbose("invalid variable stack read R%d var_off=%s\n", |
8646 | regno, tn_buf); |
8647 | + return -EACCES; |
8648 | } |
8649 | off = regs[regno].off + regs[regno].var_off.value; |
8650 | if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || |
8651 | @@ -1742,14 +1769,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) |
8652 | return 0; |
8653 | } |
8654 | |
8655 | -static void coerce_reg_to_32(struct bpf_reg_state *reg) |
8656 | -{ |
8657 | - /* clear high 32 bits */ |
8658 | - reg->var_off = tnum_cast(reg->var_off, 4); |
8659 | - /* Update bounds */ |
8660 | - __update_reg_bounds(reg); |
8661 | -} |
8662 | - |
8663 | static bool signed_add_overflows(s64 a, s64 b) |
8664 | { |
8665 | /* Do the add in u64, where overflow is well-defined */ |
8666 | @@ -1770,6 +1789,41 @@ static bool signed_sub_overflows(s64 a, s64 b) |
8667 | return res > a; |
8668 | } |
8669 | |
8670 | +static bool check_reg_sane_offset(struct bpf_verifier_env *env, |
8671 | + const struct bpf_reg_state *reg, |
8672 | + enum bpf_reg_type type) |
8673 | +{ |
8674 | + bool known = tnum_is_const(reg->var_off); |
8675 | + s64 val = reg->var_off.value; |
8676 | + s64 smin = reg->smin_value; |
8677 | + |
8678 | + if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) { |
8679 | + verbose("math between %s pointer and %lld is not allowed\n", |
8680 | + reg_type_str[type], val); |
8681 | + return false; |
8682 | + } |
8683 | + |
8684 | + if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) { |
8685 | + verbose("%s pointer offset %d is not allowed\n", |
8686 | + reg_type_str[type], reg->off); |
8687 | + return false; |
8688 | + } |
8689 | + |
8690 | + if (smin == S64_MIN) { |
8691 | + verbose("math between %s pointer and register with unbounded min value is not allowed\n", |
8692 | + reg_type_str[type]); |
8693 | + return false; |
8694 | + } |
8695 | + |
8696 | + if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) { |
8697 | + verbose("value %lld makes %s pointer be out of bounds\n", |
8698 | + smin, reg_type_str[type]); |
8699 | + return false; |
8700 | + } |
8701 | + |
8702 | + return true; |
8703 | +} |
8704 | + |
8705 | /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. |
8706 | * Caller should also handle BPF_MOV case separately. |
8707 | * If we return -EACCES, caller may want to try again treating pointer as a |
8708 | @@ -1835,6 +1889,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, |
8709 | dst_reg->type = ptr_reg->type; |
8710 | dst_reg->id = ptr_reg->id; |
8711 | |
8712 | + if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) || |
8713 | + !check_reg_sane_offset(env, ptr_reg, ptr_reg->type)) |
8714 | + return -EINVAL; |
8715 | + |
8716 | switch (opcode) { |
8717 | case BPF_ADD: |
8718 | /* We can take a fixed offset as long as it doesn't overflow |
8719 | @@ -1965,12 +2023,19 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, |
8720 | return -EACCES; |
8721 | } |
8722 | |
8723 | + if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) |
8724 | + return -EINVAL; |
8725 | + |
8726 | __update_reg_bounds(dst_reg); |
8727 | __reg_deduce_bounds(dst_reg); |
8728 | __reg_bound_offset(dst_reg); |
8729 | return 0; |
8730 | } |
8731 | |
8732 | +/* WARNING: This function does calculations on 64-bit values, but the actual |
8733 | + * execution may occur on 32-bit values. Therefore, things like bitshifts |
8734 | + * need extra checks in the 32-bit case. |
8735 | + */ |
8736 | static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, |
8737 | struct bpf_insn *insn, |
8738 | struct bpf_reg_state *dst_reg, |
8739 | @@ -1981,12 +2046,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, |
8740 | bool src_known, dst_known; |
8741 | s64 smin_val, smax_val; |
8742 | u64 umin_val, umax_val; |
8743 | + u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; |
8744 | |
8745 | - if (BPF_CLASS(insn->code) != BPF_ALU64) { |
8746 | - /* 32-bit ALU ops are (32,32)->64 */ |
8747 | - coerce_reg_to_32(dst_reg); |
8748 | - coerce_reg_to_32(&src_reg); |
8749 | - } |
8750 | smin_val = src_reg.smin_value; |
8751 | smax_val = src_reg.smax_value; |
8752 | umin_val = src_reg.umin_value; |
8753 | @@ -1994,6 +2055,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, |
8754 | src_known = tnum_is_const(src_reg.var_off); |
8755 | dst_known = tnum_is_const(dst_reg->var_off); |
8756 | |
8757 | + if (!src_known && |
8758 | + opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) { |
8759 | + __mark_reg_unknown(dst_reg); |
8760 | + return 0; |
8761 | + } |
8762 | + |
8763 | switch (opcode) { |
8764 | case BPF_ADD: |
8765 | if (signed_add_overflows(dst_reg->smin_value, smin_val) || |
8766 | @@ -2122,9 +2189,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, |
8767 | __update_reg_bounds(dst_reg); |
8768 | break; |
8769 | case BPF_LSH: |
8770 | - if (umax_val > 63) { |
8771 | - /* Shifts greater than 63 are undefined. This includes |
8772 | - * shifts by a negative number. |
8773 | + if (umax_val >= insn_bitness) { |
8774 | + /* Shifts greater than 31 or 63 are undefined. |
8775 | + * This includes shifts by a negative number. |
8776 | */ |
8777 | mark_reg_unknown(regs, insn->dst_reg); |
8778 | break; |
8779 | @@ -2150,27 +2217,29 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, |
8780 | __update_reg_bounds(dst_reg); |
8781 | break; |
8782 | case BPF_RSH: |
8783 | - if (umax_val > 63) { |
8784 | - /* Shifts greater than 63 are undefined. This includes |
8785 | - * shifts by a negative number. |
8786 | + if (umax_val >= insn_bitness) { |
8787 | + /* Shifts greater than 31 or 63 are undefined. |
8788 | + * This includes shifts by a negative number. |
8789 | */ |
8790 | mark_reg_unknown(regs, insn->dst_reg); |
8791 | break; |
8792 | } |
8793 | - /* BPF_RSH is an unsigned shift, so make the appropriate casts */ |
8794 | - if (dst_reg->smin_value < 0) { |
8795 | - if (umin_val) { |
8796 | - /* Sign bit will be cleared */ |
8797 | - dst_reg->smin_value = 0; |
8798 | - } else { |
8799 | - /* Lost sign bit information */ |
8800 | - dst_reg->smin_value = S64_MIN; |
8801 | - dst_reg->smax_value = S64_MAX; |
8802 | - } |
8803 | - } else { |
8804 | - dst_reg->smin_value = |
8805 | - (u64)(dst_reg->smin_value) >> umax_val; |
8806 | - } |
8807 | + /* BPF_RSH is an unsigned shift. If the value in dst_reg might |
8808 | + * be negative, then either: |
8809 | + * 1) src_reg might be zero, so the sign bit of the result is |
8810 | + * unknown, so we lose our signed bounds |
8811 | + * 2) it's known negative, thus the unsigned bounds capture the |
8812 | + * signed bounds |
8813 | + * 3) the signed bounds cross zero, so they tell us nothing |
8814 | + * about the result |
8815 | + * If the value in dst_reg is known nonnegative, then again the |
8816 | + * unsigned bounts capture the signed bounds. |
8817 | + * Thus, in all cases it suffices to blow away our signed bounds |
8818 | + * and rely on inferring new ones from the unsigned bounds and |
8819 | + * var_off of the result. |
8820 | + */ |
8821 | + dst_reg->smin_value = S64_MIN; |
8822 | + dst_reg->smax_value = S64_MAX; |
8823 | if (src_known) |
8824 | dst_reg->var_off = tnum_rshift(dst_reg->var_off, |
8825 | umin_val); |
8826 | @@ -2186,6 +2255,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, |
8827 | break; |
8828 | } |
8829 | |
8830 | + if (BPF_CLASS(insn->code) != BPF_ALU64) { |
8831 | + /* 32-bit ALU ops are (32,32)->32 */ |
8832 | + coerce_reg_to_size(dst_reg, 4); |
8833 | + coerce_reg_to_size(&src_reg, 4); |
8834 | + } |
8835 | + |
8836 | __reg_deduce_bounds(dst_reg); |
8837 | __reg_bound_offset(dst_reg); |
8838 | return 0; |
8839 | @@ -2362,17 +2437,20 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) |
8840 | return -EACCES; |
8841 | } |
8842 | mark_reg_unknown(regs, insn->dst_reg); |
8843 | - /* high 32 bits are known zero. */ |
8844 | - regs[insn->dst_reg].var_off = tnum_cast( |
8845 | - regs[insn->dst_reg].var_off, 4); |
8846 | - __update_reg_bounds(®s[insn->dst_reg]); |
8847 | + coerce_reg_to_size(®s[insn->dst_reg], 4); |
8848 | } |
8849 | } else { |
8850 | /* case: R = imm |
8851 | * remember the value we stored into this reg |
8852 | */ |
8853 | regs[insn->dst_reg].type = SCALAR_VALUE; |
8854 | - __mark_reg_known(regs + insn->dst_reg, insn->imm); |
8855 | + if (BPF_CLASS(insn->code) == BPF_ALU64) { |
8856 | + __mark_reg_known(regs + insn->dst_reg, |
8857 | + insn->imm); |
8858 | + } else { |
8859 | + __mark_reg_known(regs + insn->dst_reg, |
8860 | + (u32)insn->imm); |
8861 | + } |
8862 | } |
8863 | |
8864 | } else if (opcode > BPF_END) { |
8865 | @@ -3307,15 +3385,14 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, |
8866 | return range_within(rold, rcur) && |
8867 | tnum_in(rold->var_off, rcur->var_off); |
8868 | } else { |
8869 | - /* if we knew anything about the old value, we're not |
8870 | - * equal, because we can't know anything about the |
8871 | - * scalar value of the pointer in the new value. |
8872 | + /* We're trying to use a pointer in place of a scalar. |
8873 | + * Even if the scalar was unbounded, this could lead to |
8874 | + * pointer leaks because scalars are allowed to leak |
8875 | + * while pointers are not. We could make this safe in |
8876 | + * special cases if root is calling us, but it's |
8877 | + * probably not worth the hassle. |
8878 | */ |
8879 | - return rold->umin_value == 0 && |
8880 | - rold->umax_value == U64_MAX && |
8881 | - rold->smin_value == S64_MIN && |
8882 | - rold->smax_value == S64_MAX && |
8883 | - tnum_is_unknown(rold->var_off); |
8884 | + return false; |
8885 | } |
8886 | case PTR_TO_MAP_VALUE: |
8887 | /* If the new min/max/var_off satisfy the old ones and |
8888 | @@ -3665,6 +3742,7 @@ static int do_check(struct bpf_verifier_env *env) |
8889 | if (err) |
8890 | return err; |
8891 | |
8892 | + env->insn_aux_data[insn_idx].seen = true; |
8893 | if (class == BPF_ALU || class == BPF_ALU64) { |
8894 | err = check_alu_op(env, insn); |
8895 | if (err) |
8896 | @@ -3855,6 +3933,7 @@ static int do_check(struct bpf_verifier_env *env) |
8897 | return err; |
8898 | |
8899 | insn_idx++; |
8900 | + env->insn_aux_data[insn_idx].seen = true; |
8901 | } else { |
8902 | verbose("invalid BPF_LD mode\n"); |
8903 | return -EINVAL; |
8904 | @@ -4035,6 +4114,7 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len, |
8905 | u32 off, u32 cnt) |
8906 | { |
8907 | struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; |
8908 | + int i; |
8909 | |
8910 | if (cnt == 1) |
8911 | return 0; |
8912 | @@ -4044,6 +4124,8 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len, |
8913 | memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); |
8914 | memcpy(new_data + off + cnt - 1, old_data + off, |
8915 | sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); |
8916 | + for (i = off; i < off + cnt - 1; i++) |
8917 | + new_data[i].seen = true; |
8918 | env->insn_aux_data = new_data; |
8919 | vfree(old_data); |
8920 | return 0; |
8921 | @@ -4062,6 +4144,25 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of |
8922 | return new_prog; |
8923 | } |
8924 | |
8925 | +/* The verifier does more data flow analysis than llvm and will not explore |
8926 | + * branches that are dead at run time. Malicious programs can have dead code |
8927 | + * too. Therefore replace all dead at-run-time code with nops. |
8928 | + */ |
8929 | +static void sanitize_dead_code(struct bpf_verifier_env *env) |
8930 | +{ |
8931 | + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; |
8932 | + struct bpf_insn nop = BPF_MOV64_REG(BPF_REG_0, BPF_REG_0); |
8933 | + struct bpf_insn *insn = env->prog->insnsi; |
8934 | + const int insn_cnt = env->prog->len; |
8935 | + int i; |
8936 | + |
8937 | + for (i = 0; i < insn_cnt; i++) { |
8938 | + if (aux_data[i].seen) |
8939 | + continue; |
8940 | + memcpy(insn + i, &nop, sizeof(nop)); |
8941 | + } |
8942 | +} |
8943 | + |
8944 | /* convert load instructions that access fields of 'struct __sk_buff' |
8945 | * into sequence of instructions that access fields of 'struct sk_buff' |
8946 | */ |
8947 | @@ -4378,6 +4479,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) |
8948 | while (pop_stack(env, NULL) >= 0); |
8949 | free_states(env); |
8950 | |
8951 | + if (ret == 0) |
8952 | + sanitize_dead_code(env); |
8953 | + |
8954 | if (ret == 0) |
8955 | /* program is valid, convert *(u32*)(ctx + off) accesses */ |
8956 | ret = convert_ctx_accesses(env); |
8957 | diff --git a/kernel/events/core.c b/kernel/events/core.c |
8958 | index 4f1d4bfc607a..24ebad5567b4 100644 |
8959 | --- a/kernel/events/core.c |
8960 | +++ b/kernel/events/core.c |
8961 | @@ -4233,7 +4233,7 @@ static void perf_remove_from_owner(struct perf_event *event) |
8962 | * indeed free this event, otherwise we need to serialize on |
8963 | * owner->perf_event_mutex. |
8964 | */ |
8965 | - owner = lockless_dereference(event->owner); |
8966 | + owner = READ_ONCE(event->owner); |
8967 | if (owner) { |
8968 | /* |
8969 | * Since delayed_put_task_struct() also drops the last |
8970 | @@ -4330,7 +4330,7 @@ int perf_event_release_kernel(struct perf_event *event) |
8971 | * Cannot change, child events are not migrated, see the |
8972 | * comment with perf_event_ctx_lock_nested(). |
8973 | */ |
8974 | - ctx = lockless_dereference(child->ctx); |
8975 | + ctx = READ_ONCE(child->ctx); |
8976 | /* |
8977 | * Since child_mutex nests inside ctx::mutex, we must jump |
8978 | * through hoops. We start by grabbing a reference on the ctx. |
8979 | diff --git a/kernel/seccomp.c b/kernel/seccomp.c |
8980 | index 418a1c045933..5f0dfb2abb8d 100644 |
8981 | --- a/kernel/seccomp.c |
8982 | +++ b/kernel/seccomp.c |
8983 | @@ -190,7 +190,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd, |
8984 | u32 ret = SECCOMP_RET_ALLOW; |
8985 | /* Make sure cross-thread synced filter points somewhere sane. */ |
8986 | struct seccomp_filter *f = |
8987 | - lockless_dereference(current->seccomp.filter); |
8988 | + READ_ONCE(current->seccomp.filter); |
8989 | |
8990 | /* Ensure unexpected behavior doesn't result in failing open. */ |
8991 | if (unlikely(WARN_ON(f == NULL))) |
8992 | diff --git a/kernel/task_work.c b/kernel/task_work.c |
8993 | index 5718b3ea202a..0fef395662a6 100644 |
8994 | --- a/kernel/task_work.c |
8995 | +++ b/kernel/task_work.c |
8996 | @@ -68,7 +68,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func) |
8997 | * we raced with task_work_run(), *pprev == NULL/exited. |
8998 | */ |
8999 | raw_spin_lock_irqsave(&task->pi_lock, flags); |
9000 | - while ((work = lockless_dereference(*pprev))) { |
9001 | + while ((work = READ_ONCE(*pprev))) { |
9002 | if (work->func != func) |
9003 | pprev = &work->next; |
9004 | else if (cmpxchg(pprev, work, work->next) == work) |
9005 | diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c |
9006 | index dc498b605d5d..6350f64d5aa4 100644 |
9007 | --- a/kernel/trace/bpf_trace.c |
9008 | +++ b/kernel/trace/bpf_trace.c |
9009 | @@ -293,14 +293,13 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = { |
9010 | .arg2_type = ARG_ANYTHING, |
9011 | }; |
9012 | |
9013 | -static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd); |
9014 | +static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd); |
9015 | |
9016 | static __always_inline u64 |
9017 | __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, |
9018 | - u64 flags, struct perf_raw_record *raw) |
9019 | + u64 flags, struct perf_sample_data *sd) |
9020 | { |
9021 | struct bpf_array *array = container_of(map, struct bpf_array, map); |
9022 | - struct perf_sample_data *sd = this_cpu_ptr(&bpf_sd); |
9023 | unsigned int cpu = smp_processor_id(); |
9024 | u64 index = flags & BPF_F_INDEX_MASK; |
9025 | struct bpf_event_entry *ee; |
9026 | @@ -323,8 +322,6 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, |
9027 | if (unlikely(event->oncpu != cpu)) |
9028 | return -EOPNOTSUPP; |
9029 | |
9030 | - perf_sample_data_init(sd, 0, 0); |
9031 | - sd->raw = raw; |
9032 | perf_event_output(event, sd, regs); |
9033 | return 0; |
9034 | } |
9035 | @@ -332,6 +329,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, |
9036 | BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, |
9037 | u64, flags, void *, data, u64, size) |
9038 | { |
9039 | + struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd); |
9040 | struct perf_raw_record raw = { |
9041 | .frag = { |
9042 | .size = size, |
9043 | @@ -342,7 +340,10 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, |
9044 | if (unlikely(flags & ~(BPF_F_INDEX_MASK))) |
9045 | return -EINVAL; |
9046 | |
9047 | - return __bpf_perf_event_output(regs, map, flags, &raw); |
9048 | + perf_sample_data_init(sd, 0, 0); |
9049 | + sd->raw = &raw; |
9050 | + |
9051 | + return __bpf_perf_event_output(regs, map, flags, sd); |
9052 | } |
9053 | |
9054 | static const struct bpf_func_proto bpf_perf_event_output_proto = { |
9055 | @@ -357,10 +358,12 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = { |
9056 | }; |
9057 | |
9058 | static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); |
9059 | +static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd); |
9060 | |
9061 | u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, |
9062 | void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) |
9063 | { |
9064 | + struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd); |
9065 | struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); |
9066 | struct perf_raw_frag frag = { |
9067 | .copy = ctx_copy, |
9068 | @@ -378,8 +381,10 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, |
9069 | }; |
9070 | |
9071 | perf_fetch_caller_regs(regs); |
9072 | + perf_sample_data_init(sd, 0, 0); |
9073 | + sd->raw = &raw; |
9074 | |
9075 | - return __bpf_perf_event_output(regs, map, flags, &raw); |
9076 | + return __bpf_perf_event_output(regs, map, flags, sd); |
9077 | } |
9078 | |
9079 | BPF_CALL_0(bpf_get_current_task) |
9080 | diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c |
9081 | index 1c21d0e2a145..7eb975a2d0e1 100644 |
9082 | --- a/kernel/trace/trace_events_hist.c |
9083 | +++ b/kernel/trace/trace_events_hist.c |
9084 | @@ -450,7 +450,7 @@ static int create_val_field(struct hist_trigger_data *hist_data, |
9085 | } |
9086 | |
9087 | field = trace_find_event_field(file->event_call, field_name); |
9088 | - if (!field) { |
9089 | + if (!field || !field->size) { |
9090 | ret = -EINVAL; |
9091 | goto out; |
9092 | } |
9093 | @@ -548,7 +548,7 @@ static int create_key_field(struct hist_trigger_data *hist_data, |
9094 | } |
9095 | |
9096 | field = trace_find_event_field(file->event_call, field_name); |
9097 | - if (!field) { |
9098 | + if (!field || !field->size) { |
9099 | ret = -EINVAL; |
9100 | goto out; |
9101 | } |
9102 | diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug |
9103 | index dfdad67d8f6c..ff21b4dbb392 100644 |
9104 | --- a/lib/Kconfig.debug |
9105 | +++ b/lib/Kconfig.debug |
9106 | @@ -376,7 +376,7 @@ config STACK_VALIDATION |
9107 | that runtime stack traces are more reliable. |
9108 | |
9109 | This is also a prerequisite for generation of ORC unwind data, which |
9110 | - is needed for CONFIG_ORC_UNWINDER. |
9111 | + is needed for CONFIG_UNWINDER_ORC. |
9112 | |
9113 | For more information, see |
9114 | tools/objtool/Documentation/stack-validation.txt. |
9115 | diff --git a/mm/slab.h b/mm/slab.h |
9116 | index 028cdc7df67e..86d7c7d860f9 100644 |
9117 | --- a/mm/slab.h |
9118 | +++ b/mm/slab.h |
9119 | @@ -259,7 +259,7 @@ cache_from_memcg_idx(struct kmem_cache *s, int idx) |
9120 | * memcg_caches issues a write barrier to match this (see |
9121 | * memcg_create_kmem_cache()). |
9122 | */ |
9123 | - cachep = lockless_dereference(arr->entries[idx]); |
9124 | + cachep = READ_ONCE(arr->entries[idx]); |
9125 | rcu_read_unlock(); |
9126 | |
9127 | return cachep; |
9128 | diff --git a/mm/sparse.c b/mm/sparse.c |
9129 | index 4900707ae146..60805abf98af 100644 |
9130 | --- a/mm/sparse.c |
9131 | +++ b/mm/sparse.c |
9132 | @@ -23,8 +23,7 @@ |
9133 | * 1) mem_section - memory sections, mem_map's for valid memory |
9134 | */ |
9135 | #ifdef CONFIG_SPARSEMEM_EXTREME |
9136 | -struct mem_section *mem_section[NR_SECTION_ROOTS] |
9137 | - ____cacheline_internodealigned_in_smp; |
9138 | +struct mem_section **mem_section; |
9139 | #else |
9140 | struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] |
9141 | ____cacheline_internodealigned_in_smp; |
9142 | @@ -101,7 +100,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid) |
9143 | int __section_nr(struct mem_section* ms) |
9144 | { |
9145 | unsigned long root_nr; |
9146 | - struct mem_section* root; |
9147 | + struct mem_section *root = NULL; |
9148 | |
9149 | for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) { |
9150 | root = __nr_to_section(root_nr * SECTIONS_PER_ROOT); |
9151 | @@ -112,7 +111,7 @@ int __section_nr(struct mem_section* ms) |
9152 | break; |
9153 | } |
9154 | |
9155 | - VM_BUG_ON(root_nr == NR_SECTION_ROOTS); |
9156 | + VM_BUG_ON(!root); |
9157 | |
9158 | return (root_nr * SECTIONS_PER_ROOT) + (ms - root); |
9159 | } |
9160 | @@ -208,6 +207,16 @@ void __init memory_present(int nid, unsigned long start, unsigned long end) |
9161 | { |
9162 | unsigned long pfn; |
9163 | |
9164 | +#ifdef CONFIG_SPARSEMEM_EXTREME |
9165 | + if (unlikely(!mem_section)) { |
9166 | + unsigned long size, align; |
9167 | + |
9168 | + size = sizeof(struct mem_section) * NR_SECTION_ROOTS; |
9169 | + align = 1 << (INTERNODE_CACHE_SHIFT); |
9170 | + mem_section = memblock_virt_alloc(size, align); |
9171 | + } |
9172 | +#endif |
9173 | + |
9174 | start &= PAGE_SECTION_MASK; |
9175 | mminit_validate_memmodel_limits(&start, &end); |
9176 | for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { |
9177 | @@ -330,11 +339,17 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, |
9178 | static void __init check_usemap_section_nr(int nid, unsigned long *usemap) |
9179 | { |
9180 | unsigned long usemap_snr, pgdat_snr; |
9181 | - static unsigned long old_usemap_snr = NR_MEM_SECTIONS; |
9182 | - static unsigned long old_pgdat_snr = NR_MEM_SECTIONS; |
9183 | + static unsigned long old_usemap_snr; |
9184 | + static unsigned long old_pgdat_snr; |
9185 | struct pglist_data *pgdat = NODE_DATA(nid); |
9186 | int usemap_nid; |
9187 | |
9188 | + /* First call */ |
9189 | + if (!old_usemap_snr) { |
9190 | + old_usemap_snr = NR_MEM_SECTIONS; |
9191 | + old_pgdat_snr = NR_MEM_SECTIONS; |
9192 | + } |
9193 | + |
9194 | usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT); |
9195 | pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); |
9196 | if (usemap_snr == pgdat_snr) |
9197 | diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c |
9198 | index 467e44d7587d..045331204097 100644 |
9199 | --- a/net/ipv4/ip_gre.c |
9200 | +++ b/net/ipv4/ip_gre.c |
9201 | @@ -579,8 +579,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, |
9202 | if (gre_handle_offloads(skb, false)) |
9203 | goto err_free_rt; |
9204 | |
9205 | - if (skb->len > dev->mtu) { |
9206 | - pskb_trim(skb, dev->mtu); |
9207 | + if (skb->len > dev->mtu + dev->hard_header_len) { |
9208 | + pskb_trim(skb, dev->mtu + dev->hard_header_len); |
9209 | truncate = true; |
9210 | } |
9211 | |
9212 | @@ -731,8 +731,8 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, |
9213 | if (skb_cow_head(skb, dev->needed_headroom)) |
9214 | goto free_skb; |
9215 | |
9216 | - if (skb->len - dev->hard_header_len > dev->mtu) { |
9217 | - pskb_trim(skb, dev->mtu); |
9218 | + if (skb->len > dev->mtu + dev->hard_header_len) { |
9219 | + pskb_trim(skb, dev->mtu + dev->hard_header_len); |
9220 | truncate = true; |
9221 | } |
9222 | |
9223 | diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c |
9224 | index 218cfcc77650..ee113ff15fd0 100644 |
9225 | --- a/net/ipv4/tcp_vegas.c |
9226 | +++ b/net/ipv4/tcp_vegas.c |
9227 | @@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); |
9228 | |
9229 | static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) |
9230 | { |
9231 | - return min(tp->snd_ssthresh, tp->snd_cwnd-1); |
9232 | + return min(tp->snd_ssthresh, tp->snd_cwnd); |
9233 | } |
9234 | |
9235 | static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) |
9236 | diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c |
9237 | index 8a1c846d3df9..2ec39404c449 100644 |
9238 | --- a/net/ipv6/addrconf.c |
9239 | +++ b/net/ipv6/addrconf.c |
9240 | @@ -303,10 +303,10 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { |
9241 | .disable_policy = 0, |
9242 | }; |
9243 | |
9244 | -/* Check if a valid qdisc is available */ |
9245 | -static inline bool addrconf_qdisc_ok(const struct net_device *dev) |
9246 | +/* Check if link is ready: is it up and is a valid qdisc available */ |
9247 | +static inline bool addrconf_link_ready(const struct net_device *dev) |
9248 | { |
9249 | - return !qdisc_tx_is_noop(dev); |
9250 | + return netif_oper_up(dev) && !qdisc_tx_is_noop(dev); |
9251 | } |
9252 | |
9253 | static void addrconf_del_rs_timer(struct inet6_dev *idev) |
9254 | @@ -451,7 +451,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) |
9255 | |
9256 | ndev->token = in6addr_any; |
9257 | |
9258 | - if (netif_running(dev) && addrconf_qdisc_ok(dev)) |
9259 | + if (netif_running(dev) && addrconf_link_ready(dev)) |
9260 | ndev->if_flags |= IF_READY; |
9261 | |
9262 | ipv6_mc_init_dev(ndev); |
9263 | @@ -3404,7 +3404,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, |
9264 | /* restore routes for permanent addresses */ |
9265 | addrconf_permanent_addr(dev); |
9266 | |
9267 | - if (!addrconf_qdisc_ok(dev)) { |
9268 | + if (!addrconf_link_ready(dev)) { |
9269 | /* device is not ready yet. */ |
9270 | pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n", |
9271 | dev->name); |
9272 | @@ -3419,7 +3419,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, |
9273 | run_pending = 1; |
9274 | } |
9275 | } else if (event == NETDEV_CHANGE) { |
9276 | - if (!addrconf_qdisc_ok(dev)) { |
9277 | + if (!addrconf_link_ready(dev)) { |
9278 | /* device is still not ready. */ |
9279 | break; |
9280 | } |
9281 | diff --git a/net/ipv6/route.c b/net/ipv6/route.c |
9282 | index 598efa8cfe25..76b47682f77f 100644 |
9283 | --- a/net/ipv6/route.c |
9284 | +++ b/net/ipv6/route.c |
9285 | @@ -1055,7 +1055,6 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) |
9286 | |
9287 | static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) |
9288 | { |
9289 | - struct fib6_table *table = rt->rt6i_table; |
9290 | struct rt6_info *pcpu_rt, *prev, **p; |
9291 | |
9292 | pcpu_rt = ip6_rt_pcpu_alloc(rt); |
9293 | @@ -1066,28 +1065,20 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) |
9294 | return net->ipv6.ip6_null_entry; |
9295 | } |
9296 | |
9297 | - read_lock_bh(&table->tb6_lock); |
9298 | - if (rt->rt6i_pcpu) { |
9299 | - p = this_cpu_ptr(rt->rt6i_pcpu); |
9300 | - prev = cmpxchg(p, NULL, pcpu_rt); |
9301 | - if (prev) { |
9302 | - /* If someone did it before us, return prev instead */ |
9303 | - dst_release_immediate(&pcpu_rt->dst); |
9304 | - pcpu_rt = prev; |
9305 | - } |
9306 | - } else { |
9307 | - /* rt has been removed from the fib6 tree |
9308 | - * before we have a chance to acquire the read_lock. |
9309 | - * In this case, don't brother to create a pcpu rt |
9310 | - * since rt is going away anyway. The next |
9311 | - * dst_check() will trigger a re-lookup. |
9312 | - */ |
9313 | + dst_hold(&pcpu_rt->dst); |
9314 | + p = this_cpu_ptr(rt->rt6i_pcpu); |
9315 | + prev = cmpxchg(p, NULL, pcpu_rt); |
9316 | + if (prev) { |
9317 | + /* If someone did it before us, return prev instead */ |
9318 | + /* release refcnt taken by ip6_rt_pcpu_alloc() */ |
9319 | dst_release_immediate(&pcpu_rt->dst); |
9320 | - pcpu_rt = rt; |
9321 | + /* release refcnt taken by above dst_hold() */ |
9322 | + dst_release_immediate(&pcpu_rt->dst); |
9323 | + dst_hold(&prev->dst); |
9324 | + pcpu_rt = prev; |
9325 | } |
9326 | - dst_hold(&pcpu_rt->dst); |
9327 | + |
9328 | rt6_dst_from_metrics_check(pcpu_rt); |
9329 | - read_unlock_bh(&table->tb6_lock); |
9330 | return pcpu_rt; |
9331 | } |
9332 | |
9333 | @@ -1177,19 +1168,28 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, |
9334 | if (pcpu_rt) { |
9335 | read_unlock_bh(&table->tb6_lock); |
9336 | } else { |
9337 | - /* We have to do the read_unlock first |
9338 | - * because rt6_make_pcpu_route() may trigger |
9339 | - * ip6_dst_gc() which will take the write_lock. |
9340 | - */ |
9341 | - dst_hold(&rt->dst); |
9342 | - read_unlock_bh(&table->tb6_lock); |
9343 | - pcpu_rt = rt6_make_pcpu_route(rt); |
9344 | - dst_release(&rt->dst); |
9345 | + /* atomic_inc_not_zero() is needed when using rcu */ |
9346 | + if (atomic_inc_not_zero(&rt->rt6i_ref)) { |
9347 | + /* We have to do the read_unlock first |
9348 | + * because rt6_make_pcpu_route() may trigger |
9349 | + * ip6_dst_gc() which will take the write_lock. |
9350 | + * |
9351 | + * No dst_hold() on rt is needed because grabbing |
9352 | + * rt->rt6i_ref makes sure rt can't be released. |
9353 | + */ |
9354 | + read_unlock_bh(&table->tb6_lock); |
9355 | + pcpu_rt = rt6_make_pcpu_route(rt); |
9356 | + rt6_release(rt); |
9357 | + } else { |
9358 | + /* rt is already removed from tree */ |
9359 | + read_unlock_bh(&table->tb6_lock); |
9360 | + pcpu_rt = net->ipv6.ip6_null_entry; |
9361 | + dst_hold(&pcpu_rt->dst); |
9362 | + } |
9363 | } |
9364 | |
9365 | trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6); |
9366 | return pcpu_rt; |
9367 | - |
9368 | } |
9369 | } |
9370 | EXPORT_SYMBOL_GPL(ip6_pol_route); |
9371 | diff --git a/net/sctp/stream.c b/net/sctp/stream.c |
9372 | index fa8371ff05c4..724adf2786a2 100644 |
9373 | --- a/net/sctp/stream.c |
9374 | +++ b/net/sctp/stream.c |
9375 | @@ -40,9 +40,14 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, |
9376 | { |
9377 | int i; |
9378 | |
9379 | + gfp |= __GFP_NOWARN; |
9380 | + |
9381 | /* Initial stream->out size may be very big, so free it and alloc |
9382 | - * a new one with new outcnt to save memory. |
9383 | + * a new one with new outcnt to save memory if needed. |
9384 | */ |
9385 | + if (outcnt == stream->outcnt) |
9386 | + goto in; |
9387 | + |
9388 | kfree(stream->out); |
9389 | |
9390 | stream->out = kcalloc(outcnt, sizeof(*stream->out), gfp); |
9391 | @@ -53,6 +58,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, |
9392 | for (i = 0; i < stream->outcnt; i++) |
9393 | stream->out[i].state = SCTP_STREAM_OPEN; |
9394 | |
9395 | +in: |
9396 | if (!incnt) |
9397 | return 0; |
9398 | |
9399 | diff --git a/scripts/Makefile.build b/scripts/Makefile.build |
9400 | index bb831d49bcfd..e63af4e19382 100644 |
9401 | --- a/scripts/Makefile.build |
9402 | +++ b/scripts/Makefile.build |
9403 | @@ -259,7 +259,7 @@ ifneq ($(SKIP_STACK_VALIDATION),1) |
9404 | |
9405 | __objtool_obj := $(objtree)/tools/objtool/objtool |
9406 | |
9407 | -objtool_args = $(if $(CONFIG_ORC_UNWINDER),orc generate,check) |
9408 | +objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check) |
9409 | |
9410 | ifndef CONFIG_FRAME_POINTER |
9411 | objtool_args += --no-fp |
9412 | diff --git a/scripts/headers_install.sh b/scripts/headers_install.sh |
9413 | index 4d1ea96e8794..a18bca720995 100755 |
9414 | --- a/scripts/headers_install.sh |
9415 | +++ b/scripts/headers_install.sh |
9416 | @@ -34,7 +34,7 @@ do |
9417 | sed -r \ |
9418 | -e 's/([ \t(])(__user|__force|__iomem)[ \t]/\1/g' \ |
9419 | -e 's/__attribute_const__([ \t]|$)/\1/g' \ |
9420 | - -e 's@^#include <linux/compiler.h>@@' \ |
9421 | + -e 's@^#include <linux/compiler(|_types).h>@@' \ |
9422 | -e 's/(^|[^a-zA-Z0-9])__packed([^a-zA-Z0-9_]|$)/\1__attribute__((packed))\2/g' \ |
9423 | -e 's/(^|[ \t(])(inline|asm|volatile)([ \t(]|$)/\1__\2__\3/g' \ |
9424 | -e 's@#(ifndef|define|endif[ \t]*/[*])[ \t]*_UAPI@#\1 @' \ |
9425 | diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c |
9426 | index 549c269acc7d..18933bf6473f 100644 |
9427 | --- a/sound/soc/codecs/msm8916-wcd-analog.c |
9428 | +++ b/sound/soc/codecs/msm8916-wcd-analog.c |
9429 | @@ -104,7 +104,7 @@ |
9430 | #define CDC_A_MICB_1_VAL (0xf141) |
9431 | #define MICB_MIN_VAL 1600 |
9432 | #define MICB_STEP_SIZE 50 |
9433 | -#define MICB_VOLTAGE_REGVAL(v) ((v - MICB_MIN_VAL)/MICB_STEP_SIZE) |
9434 | +#define MICB_VOLTAGE_REGVAL(v) (((v - MICB_MIN_VAL)/MICB_STEP_SIZE) << 3) |
9435 | #define MICB_1_VAL_MICB_OUT_VAL_MASK GENMASK(7, 3) |
9436 | #define MICB_1_VAL_MICB_OUT_VAL_V2P70V ((0x16) << 3) |
9437 | #define MICB_1_VAL_MICB_OUT_VAL_V1P80V ((0x4) << 3) |
9438 | @@ -349,8 +349,9 @@ static void pm8916_wcd_analog_micbias_enable(struct snd_soc_codec *codec) |
9439 | | MICB_1_CTL_EXT_PRECHARG_EN_ENABLE); |
9440 | |
9441 | if (wcd->micbias_mv) { |
9442 | - snd_soc_write(codec, CDC_A_MICB_1_VAL, |
9443 | - MICB_VOLTAGE_REGVAL(wcd->micbias_mv)); |
9444 | + snd_soc_update_bits(codec, CDC_A_MICB_1_VAL, |
9445 | + MICB_1_VAL_MICB_OUT_VAL_MASK, |
9446 | + MICB_VOLTAGE_REGVAL(wcd->micbias_mv)); |
9447 | /* |
9448 | * Special headset needs MICBIAS as 2.7V so wait for |
9449 | * 50 msec for the MICBIAS to reach 2.7 volts. |
9450 | @@ -1241,6 +1242,8 @@ static const struct of_device_id pm8916_wcd_analog_spmi_match_table[] = { |
9451 | { } |
9452 | }; |
9453 | |
9454 | +MODULE_DEVICE_TABLE(of, pm8916_wcd_analog_spmi_match_table); |
9455 | + |
9456 | static struct platform_driver pm8916_wcd_analog_spmi_driver = { |
9457 | .driver = { |
9458 | .name = "qcom,pm8916-wcd-spmi-codec", |
9459 | diff --git a/sound/soc/img/img-parallel-out.c b/sound/soc/img/img-parallel-out.c |
9460 | index 23b0f0f6ec9c..2fc8a6372206 100644 |
9461 | --- a/sound/soc/img/img-parallel-out.c |
9462 | +++ b/sound/soc/img/img-parallel-out.c |
9463 | @@ -164,9 +164,11 @@ static int img_prl_out_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) |
9464 | return -EINVAL; |
9465 | } |
9466 | |
9467 | + pm_runtime_get_sync(prl->dev); |
9468 | reg = img_prl_out_readl(prl, IMG_PRL_OUT_CTL); |
9469 | reg = (reg & ~IMG_PRL_OUT_CTL_EDGE_MASK) | control_set; |
9470 | img_prl_out_writel(prl, reg, IMG_PRL_OUT_CTL); |
9471 | + pm_runtime_put(prl->dev); |
9472 | |
9473 | return 0; |
9474 | } |
9475 | diff --git a/tools/objtool/check.c b/tools/objtool/check.c |
9476 | index c0e26ad1fa7e..9b341584eb1b 100644 |
9477 | --- a/tools/objtool/check.c |
9478 | +++ b/tools/objtool/check.c |
9479 | @@ -1757,11 +1757,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, |
9480 | if (insn->dead_end) |
9481 | return 0; |
9482 | |
9483 | - insn = next_insn; |
9484 | - if (!insn) { |
9485 | + if (!next_insn) { |
9486 | + if (state.cfa.base == CFI_UNDEFINED) |
9487 | + return 0; |
9488 | WARN("%s: unexpected end of section", sec->name); |
9489 | return 1; |
9490 | } |
9491 | + |
9492 | + insn = next_insn; |
9493 | } |
9494 | |
9495 | return 0; |
9496 | diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c |
9497 | index 31e0f9143840..07f329919828 100644 |
9498 | --- a/tools/objtool/objtool.c |
9499 | +++ b/tools/objtool/objtool.c |
9500 | @@ -70,7 +70,7 @@ static void cmd_usage(void) |
9501 | |
9502 | printf("\n"); |
9503 | |
9504 | - exit(1); |
9505 | + exit(129); |
9506 | } |
9507 | |
9508 | static void handle_options(int *argc, const char ***argv) |
9509 | @@ -86,9 +86,7 @@ static void handle_options(int *argc, const char ***argv) |
9510 | break; |
9511 | } else { |
9512 | fprintf(stderr, "Unknown option: %s\n", cmd); |
9513 | - fprintf(stderr, "\n Usage: %s\n", |
9514 | - objtool_usage_string); |
9515 | - exit(1); |
9516 | + cmd_usage(); |
9517 | } |
9518 | |
9519 | (*argv)++; |
9520 | diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c |
9521 | index 64ae21f64489..7a2d221c4702 100644 |
9522 | --- a/tools/testing/selftests/bpf/test_verifier.c |
9523 | +++ b/tools/testing/selftests/bpf/test_verifier.c |
9524 | @@ -606,7 +606,6 @@ static struct bpf_test tests[] = { |
9525 | }, |
9526 | .errstr = "misaligned stack access", |
9527 | .result = REJECT, |
9528 | - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, |
9529 | }, |
9530 | { |
9531 | "invalid map_fd for function call", |
9532 | @@ -1797,7 +1796,6 @@ static struct bpf_test tests[] = { |
9533 | }, |
9534 | .result = REJECT, |
9535 | .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8", |
9536 | - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, |
9537 | }, |
9538 | { |
9539 | "PTR_TO_STACK store/load - bad alignment on reg", |
9540 | @@ -1810,7 +1808,6 @@ static struct bpf_test tests[] = { |
9541 | }, |
9542 | .result = REJECT, |
9543 | .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8", |
9544 | - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, |
9545 | }, |
9546 | { |
9547 | "PTR_TO_STACK store/load - out of bounds low", |
9548 | @@ -6115,7 +6112,7 @@ static struct bpf_test tests[] = { |
9549 | BPF_EXIT_INSN(), |
9550 | }, |
9551 | .fixup_map1 = { 3 }, |
9552 | - .errstr = "R0 min value is negative", |
9553 | + .errstr = "unbounded min value", |
9554 | .result = REJECT, |
9555 | }, |
9556 | { |
9557 | @@ -6139,7 +6136,7 @@ static struct bpf_test tests[] = { |
9558 | BPF_EXIT_INSN(), |
9559 | }, |
9560 | .fixup_map1 = { 3 }, |
9561 | - .errstr = "R0 min value is negative", |
9562 | + .errstr = "unbounded min value", |
9563 | .result = REJECT, |
9564 | }, |
9565 | { |
9566 | @@ -6165,7 +6162,7 @@ static struct bpf_test tests[] = { |
9567 | BPF_EXIT_INSN(), |
9568 | }, |
9569 | .fixup_map1 = { 3 }, |
9570 | - .errstr = "R8 invalid mem access 'inv'", |
9571 | + .errstr = "unbounded min value", |
9572 | .result = REJECT, |
9573 | }, |
9574 | { |
9575 | @@ -6190,7 +6187,7 @@ static struct bpf_test tests[] = { |
9576 | BPF_EXIT_INSN(), |
9577 | }, |
9578 | .fixup_map1 = { 3 }, |
9579 | - .errstr = "R8 invalid mem access 'inv'", |
9580 | + .errstr = "unbounded min value", |
9581 | .result = REJECT, |
9582 | }, |
9583 | { |
9584 | @@ -6238,7 +6235,7 @@ static struct bpf_test tests[] = { |
9585 | BPF_EXIT_INSN(), |
9586 | }, |
9587 | .fixup_map1 = { 3 }, |
9588 | - .errstr = "R0 min value is negative", |
9589 | + .errstr = "unbounded min value", |
9590 | .result = REJECT, |
9591 | }, |
9592 | { |
9593 | @@ -6309,7 +6306,7 @@ static struct bpf_test tests[] = { |
9594 | BPF_EXIT_INSN(), |
9595 | }, |
9596 | .fixup_map1 = { 3 }, |
9597 | - .errstr = "R0 min value is negative", |
9598 | + .errstr = "unbounded min value", |
9599 | .result = REJECT, |
9600 | }, |
9601 | { |
9602 | @@ -6360,7 +6357,7 @@ static struct bpf_test tests[] = { |
9603 | BPF_EXIT_INSN(), |
9604 | }, |
9605 | .fixup_map1 = { 3 }, |
9606 | - .errstr = "R0 min value is negative", |
9607 | + .errstr = "unbounded min value", |
9608 | .result = REJECT, |
9609 | }, |
9610 | { |
9611 | @@ -6387,7 +6384,7 @@ static struct bpf_test tests[] = { |
9612 | BPF_EXIT_INSN(), |
9613 | }, |
9614 | .fixup_map1 = { 3 }, |
9615 | - .errstr = "R0 min value is negative", |
9616 | + .errstr = "unbounded min value", |
9617 | .result = REJECT, |
9618 | }, |
9619 | { |
9620 | @@ -6413,7 +6410,7 @@ static struct bpf_test tests[] = { |
9621 | BPF_EXIT_INSN(), |
9622 | }, |
9623 | .fixup_map1 = { 3 }, |
9624 | - .errstr = "R0 min value is negative", |
9625 | + .errstr = "unbounded min value", |
9626 | .result = REJECT, |
9627 | }, |
9628 | { |
9629 | @@ -6442,7 +6439,7 @@ static struct bpf_test tests[] = { |
9630 | BPF_EXIT_INSN(), |
9631 | }, |
9632 | .fixup_map1 = { 3 }, |
9633 | - .errstr = "R0 min value is negative", |
9634 | + .errstr = "unbounded min value", |
9635 | .result = REJECT, |
9636 | }, |
9637 | { |
9638 | @@ -6472,7 +6469,7 @@ static struct bpf_test tests[] = { |
9639 | BPF_JMP_IMM(BPF_JA, 0, 0, -7), |
9640 | }, |
9641 | .fixup_map1 = { 4 }, |
9642 | - .errstr = "R0 min value is negative", |
9643 | + .errstr = "unbounded min value", |
9644 | .result = REJECT, |
9645 | }, |
9646 | { |
9647 | @@ -6500,8 +6497,7 @@ static struct bpf_test tests[] = { |
9648 | BPF_EXIT_INSN(), |
9649 | }, |
9650 | .fixup_map1 = { 3 }, |
9651 | - .errstr_unpriv = "R0 pointer comparison prohibited", |
9652 | - .errstr = "R0 min value is negative", |
9653 | + .errstr = "unbounded min value", |
9654 | .result = REJECT, |
9655 | .result_unpriv = REJECT, |
9656 | }, |
9657 | @@ -6556,6 +6552,462 @@ static struct bpf_test tests[] = { |
9658 | .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", |
9659 | .result = REJECT, |
9660 | }, |
9661 | + { |
9662 | + "bounds check based on zero-extended MOV", |
9663 | + .insns = { |
9664 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), |
9665 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), |
9666 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), |
9667 | + BPF_LD_MAP_FD(BPF_REG_1, 0), |
9668 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
9669 | + BPF_FUNC_map_lookup_elem), |
9670 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), |
9671 | + /* r2 = 0x0000'0000'ffff'ffff */ |
9672 | + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff), |
9673 | + /* r2 = 0 */ |
9674 | + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32), |
9675 | + /* no-op */ |
9676 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), |
9677 | + /* access at offset 0 */ |
9678 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), |
9679 | + /* exit */ |
9680 | + BPF_MOV64_IMM(BPF_REG_0, 0), |
9681 | + BPF_EXIT_INSN(), |
9682 | + }, |
9683 | + .fixup_map1 = { 3 }, |
9684 | + .result = ACCEPT |
9685 | + }, |
9686 | + { |
9687 | + "bounds check based on sign-extended MOV. test1", |
9688 | + .insns = { |
9689 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), |
9690 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), |
9691 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), |
9692 | + BPF_LD_MAP_FD(BPF_REG_1, 0), |
9693 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
9694 | + BPF_FUNC_map_lookup_elem), |
9695 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), |
9696 | + /* r2 = 0xffff'ffff'ffff'ffff */ |
9697 | + BPF_MOV64_IMM(BPF_REG_2, 0xffffffff), |
9698 | + /* r2 = 0xffff'ffff */ |
9699 | + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32), |
9700 | + /* r0 = <oob pointer> */ |
9701 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), |
9702 | + /* access to OOB pointer */ |
9703 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), |
9704 | + /* exit */ |
9705 | + BPF_MOV64_IMM(BPF_REG_0, 0), |
9706 | + BPF_EXIT_INSN(), |
9707 | + }, |
9708 | + .fixup_map1 = { 3 }, |
9709 | + .errstr = "map_value pointer and 4294967295", |
9710 | + .result = REJECT |
9711 | + }, |
9712 | + { |
9713 | + "bounds check based on sign-extended MOV. test2", |
9714 | + .insns = { |
9715 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), |
9716 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), |
9717 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), |
9718 | + BPF_LD_MAP_FD(BPF_REG_1, 0), |
9719 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
9720 | + BPF_FUNC_map_lookup_elem), |
9721 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), |
9722 | + /* r2 = 0xffff'ffff'ffff'ffff */ |
9723 | + BPF_MOV64_IMM(BPF_REG_2, 0xffffffff), |
9724 | + /* r2 = 0xfff'ffff */ |
9725 | + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36), |
9726 | + /* r0 = <oob pointer> */ |
9727 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), |
9728 | + /* access to OOB pointer */ |
9729 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), |
9730 | + /* exit */ |
9731 | + BPF_MOV64_IMM(BPF_REG_0, 0), |
9732 | + BPF_EXIT_INSN(), |
9733 | + }, |
9734 | + .fixup_map1 = { 3 }, |
9735 | + .errstr = "R0 min value is outside of the array range", |
9736 | + .result = REJECT |
9737 | + }, |
9738 | + { |
9739 | + "bounds check based on reg_off + var_off + insn_off. test1", |
9740 | + .insns = { |
9741 | + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, |
9742 | + offsetof(struct __sk_buff, mark)), |
9743 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), |
9744 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), |
9745 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), |
9746 | + BPF_LD_MAP_FD(BPF_REG_1, 0), |
9747 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
9748 | + BPF_FUNC_map_lookup_elem), |
9749 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), |
9750 | + BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1), |
9751 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1), |
9752 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6), |
9753 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1), |
9754 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3), |
9755 | + BPF_MOV64_IMM(BPF_REG_0, 0), |
9756 | + BPF_EXIT_INSN(), |
9757 | + }, |
9758 | + .fixup_map1 = { 4 }, |
9759 | + .errstr = "value_size=8 off=1073741825", |
9760 | + .result = REJECT, |
9761 | + .prog_type = BPF_PROG_TYPE_SCHED_CLS, |
9762 | + }, |
9763 | + { |
9764 | + "bounds check based on reg_off + var_off + insn_off. test2", |
9765 | + .insns = { |
9766 | + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, |
9767 | + offsetof(struct __sk_buff, mark)), |
9768 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), |
9769 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), |
9770 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), |
9771 | + BPF_LD_MAP_FD(BPF_REG_1, 0), |
9772 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
9773 | + BPF_FUNC_map_lookup_elem), |
9774 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), |
9775 | + BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1), |
9776 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1), |
9777 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6), |
9778 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1), |
9779 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3), |
9780 | + BPF_MOV64_IMM(BPF_REG_0, 0), |
9781 | + BPF_EXIT_INSN(), |
9782 | + }, |
9783 | + .fixup_map1 = { 4 }, |
9784 | + .errstr = "value 1073741823", |
9785 | + .result = REJECT, |
9786 | + .prog_type = BPF_PROG_TYPE_SCHED_CLS, |
9787 | + }, |
9788 | + { |
9789 | + "bounds check after truncation of non-boundary-crossing range", |
9790 | + .insns = { |
9791 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), |
9792 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), |
9793 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), |
9794 | + BPF_LD_MAP_FD(BPF_REG_1, 0), |
9795 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
9796 | + BPF_FUNC_map_lookup_elem), |
9797 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), |
9798 | + /* r1 = [0x00, 0xff] */ |
9799 | + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), |
9800 | + BPF_MOV64_IMM(BPF_REG_2, 1), |
9801 | + /* r2 = 0x10'0000'0000 */ |
9802 | + BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36), |
9803 | + /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */ |
9804 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), |
9805 | + /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */ |
9806 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), |
9807 | + /* r1 = [0x00, 0xff] */ |
9808 | + BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff), |
9809 | + /* r1 = 0 */ |
9810 | + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), |
9811 | + /* no-op */ |
9812 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), |
9813 | + /* access at offset 0 */ |
9814 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), |
9815 | + /* exit */ |
9816 | + BPF_MOV64_IMM(BPF_REG_0, 0), |
9817 | + BPF_EXIT_INSN(), |
9818 | + }, |
9819 | + .fixup_map1 = { 3 }, |
9820 | + .result = ACCEPT |
9821 | + }, |
9822 | + { |
9823 | + "bounds check after truncation of boundary-crossing range (1)", |
9824 | + .insns = { |
9825 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), |
9826 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), |
9827 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), |
9828 | + BPF_LD_MAP_FD(BPF_REG_1, 0), |
9829 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
9830 | + BPF_FUNC_map_lookup_elem), |
9831 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), |
9832 | + /* r1 = [0x00, 0xff] */ |
9833 | + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), |
9834 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), |
9835 | + /* r1 = [0xffff'ff80, 0x1'0000'007f] */ |
9836 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), |
9837 | + /* r1 = [0xffff'ff80, 0xffff'ffff] or |
9838 | + * [0x0000'0000, 0x0000'007f] |
9839 | + */ |
9840 | + BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0), |
9841 | + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), |
9842 | + /* r1 = [0x00, 0xff] or |
9843 | + * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff] |
9844 | + */ |
9845 | + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), |
9846 | + /* r1 = 0 or |
9847 | + * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff] |
9848 | + */ |
9849 | + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), |
9850 | + /* no-op or OOB pointer computation */ |
9851 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), |
9852 | + /* potentially OOB access */ |
9853 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), |
9854 | + /* exit */ |
9855 | + BPF_MOV64_IMM(BPF_REG_0, 0), |
9856 | + BPF_EXIT_INSN(), |
9857 | + }, |
9858 | + .fixup_map1 = { 3 }, |
9859 | + /* not actually fully unbounded, but the bound is very high */ |
9860 | + .errstr = "R0 unbounded memory access", |
9861 | + .result = REJECT |
9862 | + }, |
9863 | + { |
9864 | + "bounds check after truncation of boundary-crossing range (2)", |
9865 | + .insns = { |
9866 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), |
9867 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), |
9868 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), |
9869 | + BPF_LD_MAP_FD(BPF_REG_1, 0), |
9870 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
9871 | + BPF_FUNC_map_lookup_elem), |
9872 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), |
9873 | + /* r1 = [0x00, 0xff] */ |
9874 | + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), |
9875 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), |
9876 | + /* r1 = [0xffff'ff80, 0x1'0000'007f] */ |
9877 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), |
9878 | + /* r1 = [0xffff'ff80, 0xffff'ffff] or |
9879 | + * [0x0000'0000, 0x0000'007f] |
9880 | + * difference to previous test: truncation via MOV32 |
9881 | + * instead of ALU32. |
9882 | + */ |
9883 | + BPF_MOV32_REG(BPF_REG_1, BPF_REG_1), |
9884 | + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), |
9885 | + /* r1 = [0x00, 0xff] or |
9886 | + * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff] |
9887 | + */ |
9888 | + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), |
9889 | + /* r1 = 0 or |
9890 | + * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff] |
9891 | + */ |
9892 | + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), |
9893 | + /* no-op or OOB pointer computation */ |
9894 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), |
9895 | + /* potentially OOB access */ |
9896 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), |
9897 | + /* exit */ |
9898 | + BPF_MOV64_IMM(BPF_REG_0, 0), |
9899 | + BPF_EXIT_INSN(), |
9900 | + }, |
9901 | + .fixup_map1 = { 3 }, |
9902 | + /* not actually fully unbounded, but the bound is very high */ |
9903 | + .errstr = "R0 unbounded memory access", |
9904 | + .result = REJECT |
9905 | + }, |
9906 | + { |
9907 | + "bounds check after wrapping 32-bit addition", |
9908 | + .insns = { |
9909 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), |
9910 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), |
9911 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), |
9912 | + BPF_LD_MAP_FD(BPF_REG_1, 0), |
9913 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
9914 | + BPF_FUNC_map_lookup_elem), |
9915 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), |
9916 | + /* r1 = 0x7fff'ffff */ |
9917 | + BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff), |
9918 | + /* r1 = 0xffff'fffe */ |
9919 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), |
9920 | + /* r1 = 0 */ |
9921 | + BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2), |
9922 | + /* no-op */ |
9923 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), |
9924 | + /* access at offset 0 */ |
9925 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), |
9926 | + /* exit */ |
9927 | + BPF_MOV64_IMM(BPF_REG_0, 0), |
9928 | + BPF_EXIT_INSN(), |
9929 | + }, |
9930 | + .fixup_map1 = { 3 }, |
9931 | + .result = ACCEPT |
9932 | + }, |
9933 | + { |
9934 | + "bounds check after shift with oversized count operand", |
9935 | + .insns = { |
9936 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), |
9937 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), |
9938 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), |
9939 | + BPF_LD_MAP_FD(BPF_REG_1, 0), |
9940 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
9941 | + BPF_FUNC_map_lookup_elem), |
9942 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), |
9943 | + BPF_MOV64_IMM(BPF_REG_2, 32), |
9944 | + BPF_MOV64_IMM(BPF_REG_1, 1), |
9945 | + /* r1 = (u32)1 << (u32)32 = ? */ |
9946 | + BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2), |
9947 | + /* r1 = [0x0000, 0xffff] */ |
9948 | + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff), |
9949 | + /* computes unknown pointer, potentially OOB */ |
9950 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), |
9951 | + /* potentially OOB access */ |
9952 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), |
9953 | + /* exit */ |
9954 | + BPF_MOV64_IMM(BPF_REG_0, 0), |
9955 | + BPF_EXIT_INSN(), |
9956 | + }, |
9957 | + .fixup_map1 = { 3 }, |
9958 | + .errstr = "R0 max value is outside of the array range", |
9959 | + .result = REJECT |
9960 | + }, |
9961 | + { |
9962 | + "bounds check after right shift of maybe-negative number", |
9963 | + .insns = { |
9964 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), |
9965 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), |
9966 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), |
9967 | + BPF_LD_MAP_FD(BPF_REG_1, 0), |
9968 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
9969 | + BPF_FUNC_map_lookup_elem), |
9970 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), |
9971 | + /* r1 = [0x00, 0xff] */ |
9972 | + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), |
9973 | + /* r1 = [-0x01, 0xfe] */ |
9974 | + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1), |
9975 | + /* r1 = 0 or 0xff'ffff'ffff'ffff */ |
9976 | + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), |
9977 | + /* r1 = 0 or 0xffff'ffff'ffff */ |
9978 | + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), |
9979 | + /* computes unknown pointer, potentially OOB */ |
9980 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), |
9981 | + /* potentially OOB access */ |
9982 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), |
9983 | + /* exit */ |
9984 | + BPF_MOV64_IMM(BPF_REG_0, 0), |
9985 | + BPF_EXIT_INSN(), |
9986 | + }, |
9987 | + .fixup_map1 = { 3 }, |
9988 | + .errstr = "R0 unbounded memory access", |
9989 | + .result = REJECT |
9990 | + }, |
9991 | + { |
9992 | + "bounds check map access with off+size signed 32bit overflow. test1", |
9993 | + .insns = { |
9994 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), |
9995 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), |
9996 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), |
9997 | + BPF_LD_MAP_FD(BPF_REG_1, 0), |
9998 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
9999 | + BPF_FUNC_map_lookup_elem), |
10000 | + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), |
10001 | + BPF_EXIT_INSN(), |
10002 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe), |
10003 | + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), |
10004 | + BPF_JMP_A(0), |
10005 | + BPF_EXIT_INSN(), |
10006 | + }, |
10007 | + .fixup_map1 = { 3 }, |
10008 | + .errstr = "map_value pointer and 2147483646", |
10009 | + .result = REJECT |
10010 | + }, |
10011 | + { |
10012 | + "bounds check map access with off+size signed 32bit overflow. test2", |
10013 | + .insns = { |
10014 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), |
10015 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), |
10016 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), |
10017 | + BPF_LD_MAP_FD(BPF_REG_1, 0), |
10018 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
10019 | + BPF_FUNC_map_lookup_elem), |
10020 | + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), |
10021 | + BPF_EXIT_INSN(), |
10022 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff), |
10023 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff), |
10024 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff), |
10025 | + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), |
10026 | + BPF_JMP_A(0), |
10027 | + BPF_EXIT_INSN(), |
10028 | + }, |
10029 | + .fixup_map1 = { 3 }, |
10030 | + .errstr = "pointer offset 1073741822", |
10031 | + .result = REJECT |
10032 | + }, |
10033 | + { |
10034 | + "bounds check map access with off+size signed 32bit overflow. test3", |
10035 | + .insns = { |
10036 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), |
10037 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), |
10038 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), |
10039 | + BPF_LD_MAP_FD(BPF_REG_1, 0), |
10040 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
10041 | + BPF_FUNC_map_lookup_elem), |
10042 | + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), |
10043 | + BPF_EXIT_INSN(), |
10044 | + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff), |
10045 | + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff), |
10046 | + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2), |
10047 | + BPF_JMP_A(0), |
10048 | + BPF_EXIT_INSN(), |
10049 | + }, |
10050 | + .fixup_map1 = { 3 }, |
10051 | + .errstr = "pointer offset -1073741822", |
10052 | + .result = REJECT |
10053 | + }, |
10054 | + { |
10055 | + "bounds check map access with off+size signed 32bit overflow. test4", |
10056 | + .insns = { |
10057 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), |
10058 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), |
10059 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), |
10060 | + BPF_LD_MAP_FD(BPF_REG_1, 0), |
10061 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
10062 | + BPF_FUNC_map_lookup_elem), |
10063 | + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), |
10064 | + BPF_EXIT_INSN(), |
10065 | + BPF_MOV64_IMM(BPF_REG_1, 1000000), |
10066 | + BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000), |
10067 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), |
10068 | + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2), |
10069 | + BPF_JMP_A(0), |
10070 | + BPF_EXIT_INSN(), |
10071 | + }, |
10072 | + .fixup_map1 = { 3 }, |
10073 | + .errstr = "map_value pointer and 1000000000000", |
10074 | + .result = REJECT |
10075 | + }, |
10076 | + { |
10077 | + "pointer/scalar confusion in state equality check (way 1)", |
10078 | + .insns = { |
10079 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), |
10080 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), |
10081 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), |
10082 | + BPF_LD_MAP_FD(BPF_REG_1, 0), |
10083 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
10084 | + BPF_FUNC_map_lookup_elem), |
10085 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), |
10086 | + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), |
10087 | + BPF_JMP_A(1), |
10088 | + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), |
10089 | + BPF_JMP_A(0), |
10090 | + BPF_EXIT_INSN(), |
10091 | + }, |
10092 | + .fixup_map1 = { 3 }, |
10093 | + .result = ACCEPT, |
10094 | + .result_unpriv = REJECT, |
10095 | + .errstr_unpriv = "R0 leaks addr as return value" |
10096 | + }, |
10097 | + { |
10098 | + "pointer/scalar confusion in state equality check (way 2)", |
10099 | + .insns = { |
10100 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), |
10101 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), |
10102 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), |
10103 | + BPF_LD_MAP_FD(BPF_REG_1, 0), |
10104 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
10105 | + BPF_FUNC_map_lookup_elem), |
10106 | + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), |
10107 | + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), |
10108 | + BPF_JMP_A(1), |
10109 | + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), |
10110 | + BPF_EXIT_INSN(), |
10111 | + }, |
10112 | + .fixup_map1 = { 3 }, |
10113 | + .result = ACCEPT, |
10114 | + .result_unpriv = REJECT, |
10115 | + .errstr_unpriv = "R0 leaks addr as return value" |
10116 | + }, |
10117 | { |
10118 | "variable-offset ctx access", |
10119 | .insns = { |
10120 | @@ -6597,6 +7049,71 @@ static struct bpf_test tests[] = { |
10121 | .result = REJECT, |
10122 | .prog_type = BPF_PROG_TYPE_LWT_IN, |
10123 | }, |
10124 | + { |
10125 | + "indirect variable-offset stack access", |
10126 | + .insns = { |
10127 | + /* Fill the top 8 bytes of the stack */ |
10128 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), |
10129 | + /* Get an unknown value */ |
10130 | + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), |
10131 | + /* Make it small and 4-byte aligned */ |
10132 | + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), |
10133 | + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8), |
10134 | + /* add it to fp. We now have either fp-4 or fp-8, but |
10135 | + * we don't know which |
10136 | + */ |
10137 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), |
10138 | + /* dereference it indirectly */ |
10139 | + BPF_LD_MAP_FD(BPF_REG_1, 0), |
10140 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
10141 | + BPF_FUNC_map_lookup_elem), |
10142 | + BPF_MOV64_IMM(BPF_REG_0, 0), |
10143 | + BPF_EXIT_INSN(), |
10144 | + }, |
10145 | + .fixup_map1 = { 5 }, |
10146 | + .errstr = "variable stack read R2", |
10147 | + .result = REJECT, |
10148 | + .prog_type = BPF_PROG_TYPE_LWT_IN, |
10149 | + }, |
10150 | + { |
10151 | + "direct stack access with 32-bit wraparound. test1", |
10152 | + .insns = { |
10153 | + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), |
10154 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), |
10155 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), |
10156 | + BPF_MOV32_IMM(BPF_REG_0, 0), |
10157 | + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), |
10158 | + BPF_EXIT_INSN() |
10159 | + }, |
10160 | + .errstr = "fp pointer and 2147483647", |
10161 | + .result = REJECT |
10162 | + }, |
10163 | + { |
10164 | + "direct stack access with 32-bit wraparound. test2", |
10165 | + .insns = { |
10166 | + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), |
10167 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff), |
10168 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff), |
10169 | + BPF_MOV32_IMM(BPF_REG_0, 0), |
10170 | + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), |
10171 | + BPF_EXIT_INSN() |
10172 | + }, |
10173 | + .errstr = "fp pointer and 1073741823", |
10174 | + .result = REJECT |
10175 | + }, |
10176 | + { |
10177 | + "direct stack access with 32-bit wraparound. test3", |
10178 | + .insns = { |
10179 | + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), |
10180 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff), |
10181 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff), |
10182 | + BPF_MOV32_IMM(BPF_REG_0, 0), |
10183 | + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), |
10184 | + BPF_EXIT_INSN() |
10185 | + }, |
10186 | + .errstr = "fp pointer offset 1073741822", |
10187 | + .result = REJECT |
10188 | + }, |
10189 | { |
10190 | "liveness pruning and write screening", |
10191 | .insns = { |
10192 | diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c |
10193 | index 2afc41a3730f..66e5ce5b91f0 100644 |
10194 | --- a/tools/testing/selftests/x86/ldt_gdt.c |
10195 | +++ b/tools/testing/selftests/x86/ldt_gdt.c |
10196 | @@ -137,30 +137,51 @@ static void check_valid_segment(uint16_t index, int ldt, |
10197 | } |
10198 | } |
10199 | |
10200 | -static bool install_valid_mode(const struct user_desc *desc, uint32_t ar, |
10201 | - bool oldmode) |
10202 | +static bool install_valid_mode(const struct user_desc *d, uint32_t ar, |
10203 | + bool oldmode, bool ldt) |
10204 | { |
10205 | - int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, |
10206 | - desc, sizeof(*desc)); |
10207 | - if (ret < -1) |
10208 | - errno = -ret; |
10209 | + struct user_desc desc = *d; |
10210 | + int ret; |
10211 | + |
10212 | + if (!ldt) { |
10213 | +#ifndef __i386__ |
10214 | + /* No point testing set_thread_area in a 64-bit build */ |
10215 | + return false; |
10216 | +#endif |
10217 | + if (!gdt_entry_num) |
10218 | + return false; |
10219 | + desc.entry_number = gdt_entry_num; |
10220 | + |
10221 | + ret = syscall(SYS_set_thread_area, &desc); |
10222 | + } else { |
10223 | + ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, |
10224 | + &desc, sizeof(desc)); |
10225 | + |
10226 | + if (ret < -1) |
10227 | + errno = -ret; |
10228 | + |
10229 | + if (ret != 0 && errno == ENOSYS) { |
10230 | + printf("[OK]\tmodify_ldt returned -ENOSYS\n"); |
10231 | + return false; |
10232 | + } |
10233 | + } |
10234 | + |
10235 | if (ret == 0) { |
10236 | - uint32_t limit = desc->limit; |
10237 | - if (desc->limit_in_pages) |
10238 | + uint32_t limit = desc.limit; |
10239 | + if (desc.limit_in_pages) |
10240 | limit = (limit << 12) + 4095; |
10241 | - check_valid_segment(desc->entry_number, 1, ar, limit, true); |
10242 | + check_valid_segment(desc.entry_number, ldt, ar, limit, true); |
10243 | return true; |
10244 | - } else if (errno == ENOSYS) { |
10245 | - printf("[OK]\tmodify_ldt returned -ENOSYS\n"); |
10246 | - return false; |
10247 | } else { |
10248 | - if (desc->seg_32bit) { |
10249 | - printf("[FAIL]\tUnexpected modify_ldt failure %d\n", |
10250 | + if (desc.seg_32bit) { |
10251 | + printf("[FAIL]\tUnexpected %s failure %d\n", |
10252 | + ldt ? "modify_ldt" : "set_thread_area", |
10253 | errno); |
10254 | nerrs++; |
10255 | return false; |
10256 | } else { |
10257 | - printf("[OK]\tmodify_ldt rejected 16 bit segment\n"); |
10258 | + printf("[OK]\t%s rejected 16 bit segment\n", |
10259 | + ldt ? "modify_ldt" : "set_thread_area"); |
10260 | return false; |
10261 | } |
10262 | } |
10263 | @@ -168,7 +189,15 @@ static bool install_valid_mode(const struct user_desc *desc, uint32_t ar, |
10264 | |
10265 | static bool install_valid(const struct user_desc *desc, uint32_t ar) |
10266 | { |
10267 | - return install_valid_mode(desc, ar, false); |
10268 | + bool ret = install_valid_mode(desc, ar, false, true); |
10269 | + |
10270 | + if (desc->contents <= 1 && desc->seg_32bit && |
10271 | + !desc->seg_not_present) { |
10272 | + /* Should work in the GDT, too. */ |
10273 | + install_valid_mode(desc, ar, false, false); |
10274 | + } |
10275 | + |
10276 | + return ret; |
10277 | } |
10278 | |
10279 | static void install_invalid(const struct user_desc *desc, bool oldmode) |
10280 | diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c |
10281 | index 484e8820c382..2447d7c017e7 100644 |
10282 | --- a/virt/kvm/kvm_main.c |
10283 | +++ b/virt/kvm/kvm_main.c |
10284 | @@ -4018,7 +4018,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, |
10285 | if (!vcpu_align) |
10286 | vcpu_align = __alignof__(struct kvm_vcpu); |
10287 | kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, |
10288 | - 0, NULL); |
10289 | + SLAB_ACCOUNT, NULL); |
10290 | if (!kvm_vcpu_cache) { |
10291 | r = -ENOMEM; |
10292 | goto out_free_3; |