Annotation of /trunk/kernel-alx/patches-4.14/0108-4.14.9-all-fixes.patch
Parent Directory | Revision Log
Revision 3238 -
(hide annotations)
(download)
Fri Nov 9 12:14:58 2018 UTC (5 years, 10 months ago) by niro
File size: 356090 byte(s)
Fri Nov 9 12:14:58 2018 UTC (5 years, 10 months ago) by niro
File size: 356090 byte(s)
-added up to patches-4.14.79
1 | niro | 3238 | diff --git a/Documentation/x86/orc-unwinder.txt b/Documentation/x86/orc-unwinder.txt |
2 | index af0c9a4c65a6..cd4b29be29af 100644 | ||
3 | --- a/Documentation/x86/orc-unwinder.txt | ||
4 | +++ b/Documentation/x86/orc-unwinder.txt | ||
5 | @@ -4,7 +4,7 @@ ORC unwinder | ||
6 | Overview | ||
7 | -------- | ||
8 | |||
9 | -The kernel CONFIG_ORC_UNWINDER option enables the ORC unwinder, which is | ||
10 | +The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is | ||
11 | similar in concept to a DWARF unwinder. The difference is that the | ||
12 | format of the ORC data is much simpler than DWARF, which in turn allows | ||
13 | the ORC unwinder to be much simpler and faster. | ||
14 | diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt | ||
15 | index b0798e281aa6..3448e675b462 100644 | ||
16 | --- a/Documentation/x86/x86_64/mm.txt | ||
17 | +++ b/Documentation/x86/x86_64/mm.txt | ||
18 | @@ -34,7 +34,7 @@ ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space | ||
19 | ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole | ||
20 | ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) | ||
21 | ... unused hole ... | ||
22 | -ffd8000000000000 - fff7ffffffffffff (=53 bits) kasan shadow memory (8PB) | ||
23 | +ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB) | ||
24 | ... unused hole ... | ||
25 | ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks | ||
26 | ... unused hole ... | ||
27 | diff --git a/Makefile b/Makefile | ||
28 | index 97b5ae76ac8c..ed2132c6d286 100644 | ||
29 | --- a/Makefile | ||
30 | +++ b/Makefile | ||
31 | @@ -1,7 +1,7 @@ | ||
32 | # SPDX-License-Identifier: GPL-2.0 | ||
33 | VERSION = 4 | ||
34 | PATCHLEVEL = 14 | ||
35 | -SUBLEVEL = 8 | ||
36 | +SUBLEVEL = 9 | ||
37 | EXTRAVERSION = | ||
38 | NAME = Petit Gorille | ||
39 | |||
40 | @@ -935,8 +935,8 @@ ifdef CONFIG_STACK_VALIDATION | ||
41 | ifeq ($(has_libelf),1) | ||
42 | objtool_target := tools/objtool FORCE | ||
43 | else | ||
44 | - ifdef CONFIG_ORC_UNWINDER | ||
45 | - $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") | ||
46 | + ifdef CONFIG_UNWINDER_ORC | ||
47 | + $(error "Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") | ||
48 | else | ||
49 | $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel") | ||
50 | endif | ||
51 | diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig | ||
52 | index 8c2a2619971b..f1d7834990ec 100644 | ||
53 | --- a/arch/arm/configs/exynos_defconfig | ||
54 | +++ b/arch/arm/configs/exynos_defconfig | ||
55 | @@ -244,7 +244,7 @@ CONFIG_USB_STORAGE_ONETOUCH=m | ||
56 | CONFIG_USB_STORAGE_KARMA=m | ||
57 | CONFIG_USB_STORAGE_CYPRESS_ATACB=m | ||
58 | CONFIG_USB_STORAGE_ENE_UB6250=m | ||
59 | -CONFIG_USB_UAS=m | ||
60 | +CONFIG_USB_UAS=y | ||
61 | CONFIG_USB_DWC3=y | ||
62 | CONFIG_USB_DWC2=y | ||
63 | CONFIG_USB_HSIC_USB3503=y | ||
64 | diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h | ||
65 | index e9c9a117bd25..c7cdbb43ae7c 100644 | ||
66 | --- a/arch/arm/include/asm/ptrace.h | ||
67 | +++ b/arch/arm/include/asm/ptrace.h | ||
68 | @@ -126,8 +126,7 @@ extern unsigned long profile_pc(struct pt_regs *regs); | ||
69 | /* | ||
70 | * kprobe-based event tracer support | ||
71 | */ | ||
72 | -#include <linux/stddef.h> | ||
73 | -#include <linux/types.h> | ||
74 | +#include <linux/compiler.h> | ||
75 | #define MAX_REG_OFFSET (offsetof(struct pt_regs, ARM_ORIG_r0)) | ||
76 | |||
77 | extern int regs_query_register_offset(const char *name); | ||
78 | diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h | ||
79 | index caf86be815ba..4052ec39e8db 100644 | ||
80 | --- a/arch/arm64/include/asm/fixmap.h | ||
81 | +++ b/arch/arm64/include/asm/fixmap.h | ||
82 | @@ -51,6 +51,13 @@ enum fixed_addresses { | ||
83 | |||
84 | FIX_EARLYCON_MEM_BASE, | ||
85 | FIX_TEXT_POKE0, | ||
86 | + | ||
87 | +#ifdef CONFIG_ACPI_APEI_GHES | ||
88 | + /* Used for GHES mapping from assorted contexts */ | ||
89 | + FIX_APEI_GHES_IRQ, | ||
90 | + FIX_APEI_GHES_NMI, | ||
91 | +#endif /* CONFIG_ACPI_APEI_GHES */ | ||
92 | + | ||
93 | __end_of_permanent_fixed_addresses, | ||
94 | |||
95 | /* | ||
96 | diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c | ||
97 | index 57190f384f63..ce848ff84edd 100644 | ||
98 | --- a/arch/powerpc/kernel/watchdog.c | ||
99 | +++ b/arch/powerpc/kernel/watchdog.c | ||
100 | @@ -276,9 +276,12 @@ void arch_touch_nmi_watchdog(void) | ||
101 | { | ||
102 | unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000; | ||
103 | int cpu = smp_processor_id(); | ||
104 | + u64 tb = get_tb(); | ||
105 | |||
106 | - if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks) | ||
107 | - watchdog_timer_interrupt(cpu); | ||
108 | + if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) { | ||
109 | + per_cpu(wd_timer_tb, cpu) = tb; | ||
110 | + wd_smp_clear_cpu_pending(cpu, tb); | ||
111 | + } | ||
112 | } | ||
113 | EXPORT_SYMBOL(arch_touch_nmi_watchdog); | ||
114 | |||
115 | diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c | ||
116 | index a66e64b0b251..5d115bd32539 100644 | ||
117 | --- a/arch/powerpc/net/bpf_jit_comp64.c | ||
118 | +++ b/arch/powerpc/net/bpf_jit_comp64.c | ||
119 | @@ -762,7 +762,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, | ||
120 | func = (u8 *) __bpf_call_base + imm; | ||
121 | |||
122 | /* Save skb pointer if we need to re-cache skb data */ | ||
123 | - if (bpf_helper_changes_pkt_data(func)) | ||
124 | + if ((ctx->seen & SEEN_SKB) && | ||
125 | + bpf_helper_changes_pkt_data(func)) | ||
126 | PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); | ||
127 | |||
128 | bpf_jit_emit_func_call(image, ctx, (u64)func); | ||
129 | @@ -771,7 +772,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, | ||
130 | PPC_MR(b2p[BPF_REG_0], 3); | ||
131 | |||
132 | /* refresh skb cache */ | ||
133 | - if (bpf_helper_changes_pkt_data(func)) { | ||
134 | + if ((ctx->seen & SEEN_SKB) && | ||
135 | + bpf_helper_changes_pkt_data(func)) { | ||
136 | /* reload skb pointer to r3 */ | ||
137 | PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); | ||
138 | bpf_jit_emit_skb_loads(image, ctx); | ||
139 | diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c | ||
140 | index c008083fbc4f..2c8b325591cc 100644 | ||
141 | --- a/arch/powerpc/xmon/xmon.c | ||
142 | +++ b/arch/powerpc/xmon/xmon.c | ||
143 | @@ -530,14 +530,19 @@ static int xmon_core(struct pt_regs *regs, int fromipi) | ||
144 | |||
145 | waiting: | ||
146 | secondary = 1; | ||
147 | + spin_begin(); | ||
148 | while (secondary && !xmon_gate) { | ||
149 | if (in_xmon == 0) { | ||
150 | - if (fromipi) | ||
151 | + if (fromipi) { | ||
152 | + spin_end(); | ||
153 | goto leave; | ||
154 | + } | ||
155 | secondary = test_and_set_bit(0, &in_xmon); | ||
156 | } | ||
157 | - barrier(); | ||
158 | + spin_cpu_relax(); | ||
159 | + touch_nmi_watchdog(); | ||
160 | } | ||
161 | + spin_end(); | ||
162 | |||
163 | if (!secondary && !xmon_gate) { | ||
164 | /* we are the first cpu to come in */ | ||
165 | @@ -568,21 +573,25 @@ static int xmon_core(struct pt_regs *regs, int fromipi) | ||
166 | mb(); | ||
167 | xmon_gate = 1; | ||
168 | barrier(); | ||
169 | + touch_nmi_watchdog(); | ||
170 | } | ||
171 | |||
172 | cmdloop: | ||
173 | while (in_xmon) { | ||
174 | if (secondary) { | ||
175 | + spin_begin(); | ||
176 | if (cpu == xmon_owner) { | ||
177 | if (!test_and_set_bit(0, &xmon_taken)) { | ||
178 | secondary = 0; | ||
179 | + spin_end(); | ||
180 | continue; | ||
181 | } | ||
182 | /* missed it */ | ||
183 | while (cpu == xmon_owner) | ||
184 | - barrier(); | ||
185 | + spin_cpu_relax(); | ||
186 | } | ||
187 | - barrier(); | ||
188 | + spin_cpu_relax(); | ||
189 | + touch_nmi_watchdog(); | ||
190 | } else { | ||
191 | cmd = cmds(regs); | ||
192 | if (cmd != 0) { | ||
193 | diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c | ||
194 | index b15cd2f0320f..33e2785f6842 100644 | ||
195 | --- a/arch/s390/net/bpf_jit_comp.c | ||
196 | +++ b/arch/s390/net/bpf_jit_comp.c | ||
197 | @@ -55,8 +55,7 @@ struct bpf_jit { | ||
198 | #define SEEN_LITERAL 8 /* code uses literals */ | ||
199 | #define SEEN_FUNC 16 /* calls C functions */ | ||
200 | #define SEEN_TAIL_CALL 32 /* code uses tail calls */ | ||
201 | -#define SEEN_SKB_CHANGE 64 /* code changes skb data */ | ||
202 | -#define SEEN_REG_AX 128 /* code uses constant blinding */ | ||
203 | +#define SEEN_REG_AX 64 /* code uses constant blinding */ | ||
204 | #define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB) | ||
205 | |||
206 | /* | ||
207 | @@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit) | ||
208 | EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, | ||
209 | REG_15, 152); | ||
210 | } | ||
211 | - if (jit->seen & SEEN_SKB) | ||
212 | + if (jit->seen & SEEN_SKB) { | ||
213 | emit_load_skb_data_hlen(jit); | ||
214 | - if (jit->seen & SEEN_SKB_CHANGE) | ||
215 | /* stg %b1,ST_OFF_SKBP(%r0,%r15) */ | ||
216 | EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15, | ||
217 | STK_OFF_SKBP); | ||
218 | + } | ||
219 | } | ||
220 | |||
221 | /* | ||
222 | @@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i | ||
223 | EMIT2(0x0d00, REG_14, REG_W1); | ||
224 | /* lgr %b0,%r2: load return value into %b0 */ | ||
225 | EMIT4(0xb9040000, BPF_REG_0, REG_2); | ||
226 | - if (bpf_helper_changes_pkt_data((void *)func)) { | ||
227 | - jit->seen |= SEEN_SKB_CHANGE; | ||
228 | + if ((jit->seen & SEEN_SKB) && | ||
229 | + bpf_helper_changes_pkt_data((void *)func)) { | ||
230 | /* lg %b1,ST_OFF_SKBP(%r15) */ | ||
231 | EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0, | ||
232 | REG_15, STK_OFF_SKBP); | ||
233 | diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h | ||
234 | index 6a339a78f4f4..71dd82b43cc5 100644 | ||
235 | --- a/arch/sparc/include/asm/ptrace.h | ||
236 | +++ b/arch/sparc/include/asm/ptrace.h | ||
237 | @@ -7,6 +7,7 @@ | ||
238 | #if defined(__sparc__) && defined(__arch64__) | ||
239 | #ifndef __ASSEMBLY__ | ||
240 | |||
241 | +#include <linux/compiler.h> | ||
242 | #include <linux/threads.h> | ||
243 | #include <asm/switch_to.h> | ||
244 | |||
245 | diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c | ||
246 | index 5765e7e711f7..ff5f9cb3039a 100644 | ||
247 | --- a/arch/sparc/net/bpf_jit_comp_64.c | ||
248 | +++ b/arch/sparc/net/bpf_jit_comp_64.c | ||
249 | @@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) | ||
250 | u8 *func = ((u8 *)__bpf_call_base) + imm; | ||
251 | |||
252 | ctx->saw_call = true; | ||
253 | + if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func)) | ||
254 | + emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx); | ||
255 | |||
256 | emit_call((u32 *)func, ctx); | ||
257 | emit_nop(ctx); | ||
258 | |||
259 | emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx); | ||
260 | |||
261 | - if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind) | ||
262 | - load_skb_regs(ctx, bpf2sparc[BPF_REG_6]); | ||
263 | + if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func)) | ||
264 | + load_skb_regs(ctx, L7); | ||
265 | break; | ||
266 | } | ||
267 | |||
268 | diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild | ||
269 | index 50a32c33d729..73c57f614c9e 100644 | ||
270 | --- a/arch/um/include/asm/Kbuild | ||
271 | +++ b/arch/um/include/asm/Kbuild | ||
272 | @@ -1,4 +1,5 @@ | ||
273 | generic-y += barrier.h | ||
274 | +generic-y += bpf_perf_event.h | ||
275 | generic-y += bug.h | ||
276 | generic-y += clkdev.h | ||
277 | generic-y += current.h | ||
278 | diff --git a/arch/um/include/shared/init.h b/arch/um/include/shared/init.h | ||
279 | index 390572daa40d..b3f5865a92c9 100644 | ||
280 | --- a/arch/um/include/shared/init.h | ||
281 | +++ b/arch/um/include/shared/init.h | ||
282 | @@ -41,7 +41,7 @@ | ||
283 | typedef int (*initcall_t)(void); | ||
284 | typedef void (*exitcall_t)(void); | ||
285 | |||
286 | -#include <linux/compiler.h> | ||
287 | +#include <linux/compiler_types.h> | ||
288 | |||
289 | /* These are for everybody (although not all archs will actually | ||
290 | discard it in modules) */ | ||
291 | diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig | ||
292 | index 9bceea6a5852..48646160eb83 100644 | ||
293 | --- a/arch/x86/Kconfig | ||
294 | +++ b/arch/x86/Kconfig | ||
295 | @@ -108,7 +108,7 @@ config X86 | ||
296 | select HAVE_ARCH_AUDITSYSCALL | ||
297 | select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE | ||
298 | select HAVE_ARCH_JUMP_LABEL | ||
299 | - select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP | ||
300 | + select HAVE_ARCH_KASAN if X86_64 | ||
301 | select HAVE_ARCH_KGDB | ||
302 | select HAVE_ARCH_KMEMCHECK | ||
303 | select HAVE_ARCH_MMAP_RND_BITS if MMU | ||
304 | @@ -171,7 +171,7 @@ config X86 | ||
305 | select HAVE_PERF_USER_STACK_DUMP | ||
306 | select HAVE_RCU_TABLE_FREE | ||
307 | select HAVE_REGS_AND_STACK_ACCESS_API | ||
308 | - select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION | ||
309 | + select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION | ||
310 | select HAVE_STACK_VALIDATION if X86_64 | ||
311 | select HAVE_SYSCALL_TRACEPOINTS | ||
312 | select HAVE_UNSTABLE_SCHED_CLOCK | ||
313 | @@ -303,7 +303,6 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC | ||
314 | config KASAN_SHADOW_OFFSET | ||
315 | hex | ||
316 | depends on KASAN | ||
317 | - default 0xdff8000000000000 if X86_5LEVEL | ||
318 | default 0xdffffc0000000000 | ||
319 | |||
320 | config HAVE_INTEL_TXT | ||
321 | diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug | ||
322 | index 90b123056f4b..6293a8768a91 100644 | ||
323 | --- a/arch/x86/Kconfig.debug | ||
324 | +++ b/arch/x86/Kconfig.debug | ||
325 | @@ -359,28 +359,14 @@ config PUNIT_ATOM_DEBUG | ||
326 | |||
327 | choice | ||
328 | prompt "Choose kernel unwinder" | ||
329 | - default FRAME_POINTER_UNWINDER | ||
330 | + default UNWINDER_ORC if X86_64 | ||
331 | + default UNWINDER_FRAME_POINTER if X86_32 | ||
332 | ---help--- | ||
333 | This determines which method will be used for unwinding kernel stack | ||
334 | traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack, | ||
335 | livepatch, lockdep, and more. | ||
336 | |||
337 | -config FRAME_POINTER_UNWINDER | ||
338 | - bool "Frame pointer unwinder" | ||
339 | - select FRAME_POINTER | ||
340 | - ---help--- | ||
341 | - This option enables the frame pointer unwinder for unwinding kernel | ||
342 | - stack traces. | ||
343 | - | ||
344 | - The unwinder itself is fast and it uses less RAM than the ORC | ||
345 | - unwinder, but the kernel text size will grow by ~3% and the kernel's | ||
346 | - overall performance will degrade by roughly 5-10%. | ||
347 | - | ||
348 | - This option is recommended if you want to use the livepatch | ||
349 | - consistency model, as this is currently the only way to get a | ||
350 | - reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE). | ||
351 | - | ||
352 | -config ORC_UNWINDER | ||
353 | +config UNWINDER_ORC | ||
354 | bool "ORC unwinder" | ||
355 | depends on X86_64 | ||
356 | select STACK_VALIDATION | ||
357 | @@ -396,7 +382,22 @@ config ORC_UNWINDER | ||
358 | Enabling this option will increase the kernel's runtime memory usage | ||
359 | by roughly 2-4MB, depending on your kernel config. | ||
360 | |||
361 | -config GUESS_UNWINDER | ||
362 | +config UNWINDER_FRAME_POINTER | ||
363 | + bool "Frame pointer unwinder" | ||
364 | + select FRAME_POINTER | ||
365 | + ---help--- | ||
366 | + This option enables the frame pointer unwinder for unwinding kernel | ||
367 | + stack traces. | ||
368 | + | ||
369 | + The unwinder itself is fast and it uses less RAM than the ORC | ||
370 | + unwinder, but the kernel text size will grow by ~3% and the kernel's | ||
371 | + overall performance will degrade by roughly 5-10%. | ||
372 | + | ||
373 | + This option is recommended if you want to use the livepatch | ||
374 | + consistency model, as this is currently the only way to get a | ||
375 | + reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE). | ||
376 | + | ||
377 | +config UNWINDER_GUESS | ||
378 | bool "Guess unwinder" | ||
379 | depends on EXPERT | ||
380 | ---help--- | ||
381 | @@ -411,7 +412,7 @@ config GUESS_UNWINDER | ||
382 | endchoice | ||
383 | |||
384 | config FRAME_POINTER | ||
385 | - depends on !ORC_UNWINDER && !GUESS_UNWINDER | ||
386 | + depends on !UNWINDER_ORC && !UNWINDER_GUESS | ||
387 | bool | ||
388 | |||
389 | endmenu | ||
390 | diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config | ||
391 | index 550cd5012b73..66c9e2aab16c 100644 | ||
392 | --- a/arch/x86/configs/tiny.config | ||
393 | +++ b/arch/x86/configs/tiny.config | ||
394 | @@ -1,5 +1,5 @@ | ||
395 | CONFIG_NOHIGHMEM=y | ||
396 | # CONFIG_HIGHMEM4G is not set | ||
397 | # CONFIG_HIGHMEM64G is not set | ||
398 | -CONFIG_GUESS_UNWINDER=y | ||
399 | -# CONFIG_FRAME_POINTER_UNWINDER is not set | ||
400 | +CONFIG_UNWINDER_GUESS=y | ||
401 | +# CONFIG_UNWINDER_FRAME_POINTER is not set | ||
402 | diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig | ||
403 | index 4a4b16e56d35..e32fc1f274d8 100644 | ||
404 | --- a/arch/x86/configs/x86_64_defconfig | ||
405 | +++ b/arch/x86/configs/x86_64_defconfig | ||
406 | @@ -299,6 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y | ||
407 | # CONFIG_DEBUG_RODATA_TEST is not set | ||
408 | CONFIG_DEBUG_BOOT_PARAMS=y | ||
409 | CONFIG_OPTIMIZE_INLINING=y | ||
410 | +CONFIG_UNWINDER_ORC=y | ||
411 | CONFIG_SECURITY=y | ||
412 | CONFIG_SECURITY_NETWORK=y | ||
413 | CONFIG_SECURITY_SELINUX=y | ||
414 | diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h | ||
415 | index 6e160031cfea..3fd8bc560fae 100644 | ||
416 | --- a/arch/x86/entry/calling.h | ||
417 | +++ b/arch/x86/entry/calling.h | ||
418 | @@ -142,56 +142,25 @@ For 32-bit we have the following conventions - kernel is built with | ||
419 | UNWIND_HINT_REGS offset=\offset | ||
420 | .endm | ||
421 | |||
422 | - .macro RESTORE_EXTRA_REGS offset=0 | ||
423 | - movq 0*8+\offset(%rsp), %r15 | ||
424 | - movq 1*8+\offset(%rsp), %r14 | ||
425 | - movq 2*8+\offset(%rsp), %r13 | ||
426 | - movq 3*8+\offset(%rsp), %r12 | ||
427 | - movq 4*8+\offset(%rsp), %rbp | ||
428 | - movq 5*8+\offset(%rsp), %rbx | ||
429 | - UNWIND_HINT_REGS offset=\offset extra=0 | ||
430 | - .endm | ||
431 | - | ||
432 | - .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 | ||
433 | - .if \rstor_r11 | ||
434 | - movq 6*8(%rsp), %r11 | ||
435 | - .endif | ||
436 | - .if \rstor_r8910 | ||
437 | - movq 7*8(%rsp), %r10 | ||
438 | - movq 8*8(%rsp), %r9 | ||
439 | - movq 9*8(%rsp), %r8 | ||
440 | - .endif | ||
441 | - .if \rstor_rax | ||
442 | - movq 10*8(%rsp), %rax | ||
443 | - .endif | ||
444 | - .if \rstor_rcx | ||
445 | - movq 11*8(%rsp), %rcx | ||
446 | - .endif | ||
447 | - .if \rstor_rdx | ||
448 | - movq 12*8(%rsp), %rdx | ||
449 | - .endif | ||
450 | - movq 13*8(%rsp), %rsi | ||
451 | - movq 14*8(%rsp), %rdi | ||
452 | - UNWIND_HINT_IRET_REGS offset=16*8 | ||
453 | - .endm | ||
454 | - .macro RESTORE_C_REGS | ||
455 | - RESTORE_C_REGS_HELPER 1,1,1,1,1 | ||
456 | - .endm | ||
457 | - .macro RESTORE_C_REGS_EXCEPT_RAX | ||
458 | - RESTORE_C_REGS_HELPER 0,1,1,1,1 | ||
459 | - .endm | ||
460 | - .macro RESTORE_C_REGS_EXCEPT_RCX | ||
461 | - RESTORE_C_REGS_HELPER 1,0,1,1,1 | ||
462 | - .endm | ||
463 | - .macro RESTORE_C_REGS_EXCEPT_R11 | ||
464 | - RESTORE_C_REGS_HELPER 1,1,0,1,1 | ||
465 | - .endm | ||
466 | - .macro RESTORE_C_REGS_EXCEPT_RCX_R11 | ||
467 | - RESTORE_C_REGS_HELPER 1,0,0,1,1 | ||
468 | - .endm | ||
469 | - | ||
470 | - .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 | ||
471 | - subq $-(15*8+\addskip), %rsp | ||
472 | + .macro POP_EXTRA_REGS | ||
473 | + popq %r15 | ||
474 | + popq %r14 | ||
475 | + popq %r13 | ||
476 | + popq %r12 | ||
477 | + popq %rbp | ||
478 | + popq %rbx | ||
479 | + .endm | ||
480 | + | ||
481 | + .macro POP_C_REGS | ||
482 | + popq %r11 | ||
483 | + popq %r10 | ||
484 | + popq %r9 | ||
485 | + popq %r8 | ||
486 | + popq %rax | ||
487 | + popq %rcx | ||
488 | + popq %rdx | ||
489 | + popq %rsi | ||
490 | + popq %rdi | ||
491 | .endm | ||
492 | |||
493 | .macro icebp | ||
494 | diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S | ||
495 | index 4838037f97f6..bd8b57a5c874 100644 | ||
496 | --- a/arch/x86/entry/entry_32.S | ||
497 | +++ b/arch/x86/entry/entry_32.S | ||
498 | @@ -941,7 +941,8 @@ ENTRY(debug) | ||
499 | movl %esp, %eax # pt_regs pointer | ||
500 | |||
501 | /* Are we currently on the SYSENTER stack? */ | ||
502 | - PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) | ||
503 | + movl PER_CPU_VAR(cpu_entry_area), %ecx | ||
504 | + addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx | ||
505 | subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ | ||
506 | cmpl $SIZEOF_SYSENTER_stack, %ecx | ||
507 | jb .Ldebug_from_sysenter_stack | ||
508 | @@ -984,7 +985,8 @@ ENTRY(nmi) | ||
509 | movl %esp, %eax # pt_regs pointer | ||
510 | |||
511 | /* Are we currently on the SYSENTER stack? */ | ||
512 | - PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) | ||
513 | + movl PER_CPU_VAR(cpu_entry_area), %ecx | ||
514 | + addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx | ||
515 | subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ | ||
516 | cmpl $SIZEOF_SYSENTER_stack, %ecx | ||
517 | jb .Lnmi_from_sysenter_stack | ||
518 | diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S | ||
519 | index 2e956afe272c..6abe3fcaece9 100644 | ||
520 | --- a/arch/x86/entry/entry_64.S | ||
521 | +++ b/arch/x86/entry/entry_64.S | ||
522 | @@ -136,6 +136,64 @@ END(native_usergs_sysret64) | ||
523 | * with them due to bugs in both AMD and Intel CPUs. | ||
524 | */ | ||
525 | |||
526 | + .pushsection .entry_trampoline, "ax" | ||
527 | + | ||
528 | +/* | ||
529 | + * The code in here gets remapped into cpu_entry_area's trampoline. This means | ||
530 | + * that the assembler and linker have the wrong idea as to where this code | ||
531 | + * lives (and, in fact, it's mapped more than once, so it's not even at a | ||
532 | + * fixed address). So we can't reference any symbols outside the entry | ||
533 | + * trampoline and expect it to work. | ||
534 | + * | ||
535 | + * Instead, we carefully abuse %rip-relative addressing. | ||
536 | + * _entry_trampoline(%rip) refers to the start of the remapped) entry | ||
537 | + * trampoline. We can thus find cpu_entry_area with this macro: | ||
538 | + */ | ||
539 | + | ||
540 | +#define CPU_ENTRY_AREA \ | ||
541 | + _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip) | ||
542 | + | ||
543 | +/* The top word of the SYSENTER stack is hot and is usable as scratch space. */ | ||
544 | +#define RSP_SCRATCH CPU_ENTRY_AREA_SYSENTER_stack + \ | ||
545 | + SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA | ||
546 | + | ||
547 | +ENTRY(entry_SYSCALL_64_trampoline) | ||
548 | + UNWIND_HINT_EMPTY | ||
549 | + swapgs | ||
550 | + | ||
551 | + /* Stash the user RSP. */ | ||
552 | + movq %rsp, RSP_SCRATCH | ||
553 | + | ||
554 | + /* Load the top of the task stack into RSP */ | ||
555 | + movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp | ||
556 | + | ||
557 | + /* Start building the simulated IRET frame. */ | ||
558 | + pushq $__USER_DS /* pt_regs->ss */ | ||
559 | + pushq RSP_SCRATCH /* pt_regs->sp */ | ||
560 | + pushq %r11 /* pt_regs->flags */ | ||
561 | + pushq $__USER_CS /* pt_regs->cs */ | ||
562 | + pushq %rcx /* pt_regs->ip */ | ||
563 | + | ||
564 | + /* | ||
565 | + * x86 lacks a near absolute jump, and we can't jump to the real | ||
566 | + * entry text with a relative jump. We could push the target | ||
567 | + * address and then use retq, but this destroys the pipeline on | ||
568 | + * many CPUs (wasting over 20 cycles on Sandy Bridge). Instead, | ||
569 | + * spill RDI and restore it in a second-stage trampoline. | ||
570 | + */ | ||
571 | + pushq %rdi | ||
572 | + movq $entry_SYSCALL_64_stage2, %rdi | ||
573 | + jmp *%rdi | ||
574 | +END(entry_SYSCALL_64_trampoline) | ||
575 | + | ||
576 | + .popsection | ||
577 | + | ||
578 | +ENTRY(entry_SYSCALL_64_stage2) | ||
579 | + UNWIND_HINT_EMPTY | ||
580 | + popq %rdi | ||
581 | + jmp entry_SYSCALL_64_after_hwframe | ||
582 | +END(entry_SYSCALL_64_stage2) | ||
583 | + | ||
584 | ENTRY(entry_SYSCALL_64) | ||
585 | UNWIND_HINT_EMPTY | ||
586 | /* | ||
587 | @@ -221,10 +279,9 @@ entry_SYSCALL_64_fastpath: | ||
588 | TRACE_IRQS_ON /* user mode is traced as IRQs on */ | ||
589 | movq RIP(%rsp), %rcx | ||
590 | movq EFLAGS(%rsp), %r11 | ||
591 | - RESTORE_C_REGS_EXCEPT_RCX_R11 | ||
592 | - movq RSP(%rsp), %rsp | ||
593 | + addq $6*8, %rsp /* skip extra regs -- they were preserved */ | ||
594 | UNWIND_HINT_EMPTY | ||
595 | - USERGS_SYSRET64 | ||
596 | + jmp .Lpop_c_regs_except_rcx_r11_and_sysret | ||
597 | |||
598 | 1: | ||
599 | /* | ||
600 | @@ -246,17 +303,18 @@ entry_SYSCALL64_slow_path: | ||
601 | call do_syscall_64 /* returns with IRQs disabled */ | ||
602 | |||
603 | return_from_SYSCALL_64: | ||
604 | - RESTORE_EXTRA_REGS | ||
605 | TRACE_IRQS_IRETQ /* we're about to change IF */ | ||
606 | |||
607 | /* | ||
608 | * Try to use SYSRET instead of IRET if we're returning to | ||
609 | - * a completely clean 64-bit userspace context. | ||
610 | + * a completely clean 64-bit userspace context. If we're not, | ||
611 | + * go to the slow exit path. | ||
612 | */ | ||
613 | movq RCX(%rsp), %rcx | ||
614 | movq RIP(%rsp), %r11 | ||
615 | - cmpq %rcx, %r11 /* RCX == RIP */ | ||
616 | - jne opportunistic_sysret_failed | ||
617 | + | ||
618 | + cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */ | ||
619 | + jne swapgs_restore_regs_and_return_to_usermode | ||
620 | |||
621 | /* | ||
622 | * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP | ||
623 | @@ -274,14 +332,14 @@ return_from_SYSCALL_64: | ||
624 | |||
625 | /* If this changed %rcx, it was not canonical */ | ||
626 | cmpq %rcx, %r11 | ||
627 | - jne opportunistic_sysret_failed | ||
628 | + jne swapgs_restore_regs_and_return_to_usermode | ||
629 | |||
630 | cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */ | ||
631 | - jne opportunistic_sysret_failed | ||
632 | + jne swapgs_restore_regs_and_return_to_usermode | ||
633 | |||
634 | movq R11(%rsp), %r11 | ||
635 | cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */ | ||
636 | - jne opportunistic_sysret_failed | ||
637 | + jne swapgs_restore_regs_and_return_to_usermode | ||
638 | |||
639 | /* | ||
640 | * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot | ||
641 | @@ -302,12 +360,12 @@ return_from_SYSCALL_64: | ||
642 | * would never get past 'stuck_here'. | ||
643 | */ | ||
644 | testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11 | ||
645 | - jnz opportunistic_sysret_failed | ||
646 | + jnz swapgs_restore_regs_and_return_to_usermode | ||
647 | |||
648 | /* nothing to check for RSP */ | ||
649 | |||
650 | cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */ | ||
651 | - jne opportunistic_sysret_failed | ||
652 | + jne swapgs_restore_regs_and_return_to_usermode | ||
653 | |||
654 | /* | ||
655 | * We win! This label is here just for ease of understanding | ||
656 | @@ -315,14 +373,36 @@ return_from_SYSCALL_64: | ||
657 | */ | ||
658 | syscall_return_via_sysret: | ||
659 | /* rcx and r11 are already restored (see code above) */ | ||
660 | - RESTORE_C_REGS_EXCEPT_RCX_R11 | ||
661 | - movq RSP(%rsp), %rsp | ||
662 | UNWIND_HINT_EMPTY | ||
663 | - USERGS_SYSRET64 | ||
664 | + POP_EXTRA_REGS | ||
665 | +.Lpop_c_regs_except_rcx_r11_and_sysret: | ||
666 | + popq %rsi /* skip r11 */ | ||
667 | + popq %r10 | ||
668 | + popq %r9 | ||
669 | + popq %r8 | ||
670 | + popq %rax | ||
671 | + popq %rsi /* skip rcx */ | ||
672 | + popq %rdx | ||
673 | + popq %rsi | ||
674 | |||
675 | -opportunistic_sysret_failed: | ||
676 | - SWAPGS | ||
677 | - jmp restore_c_regs_and_iret | ||
678 | + /* | ||
679 | + * Now all regs are restored except RSP and RDI. | ||
680 | + * Save old stack pointer and switch to trampoline stack. | ||
681 | + */ | ||
682 | + movq %rsp, %rdi | ||
683 | + movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp | ||
684 | + | ||
685 | + pushq RSP-RDI(%rdi) /* RSP */ | ||
686 | + pushq (%rdi) /* RDI */ | ||
687 | + | ||
688 | + /* | ||
689 | + * We are on the trampoline stack. All regs except RDI are live. | ||
690 | + * We can do future final exit work right here. | ||
691 | + */ | ||
692 | + | ||
693 | + popq %rdi | ||
694 | + popq %rsp | ||
695 | + USERGS_SYSRET64 | ||
696 | END(entry_SYSCALL_64) | ||
697 | |||
698 | ENTRY(stub_ptregs_64) | ||
699 | @@ -423,8 +503,7 @@ ENTRY(ret_from_fork) | ||
700 | movq %rsp, %rdi | ||
701 | call syscall_return_slowpath /* returns with IRQs disabled */ | ||
702 | TRACE_IRQS_ON /* user mode is traced as IRQS on */ | ||
703 | - SWAPGS | ||
704 | - jmp restore_regs_and_iret | ||
705 | + jmp swapgs_restore_regs_and_return_to_usermode | ||
706 | |||
707 | 1: | ||
708 | /* kernel thread */ | ||
709 | @@ -457,12 +536,13 @@ END(irq_entries_start) | ||
710 | |||
711 | .macro DEBUG_ENTRY_ASSERT_IRQS_OFF | ||
712 | #ifdef CONFIG_DEBUG_ENTRY | ||
713 | - pushfq | ||
714 | - testl $X86_EFLAGS_IF, (%rsp) | ||
715 | + pushq %rax | ||
716 | + SAVE_FLAGS(CLBR_RAX) | ||
717 | + testl $X86_EFLAGS_IF, %eax | ||
718 | jz .Lokay_\@ | ||
719 | ud2 | ||
720 | .Lokay_\@: | ||
721 | - addq $8, %rsp | ||
722 | + popq %rax | ||
723 | #endif | ||
724 | .endm | ||
725 | |||
726 | @@ -554,6 +634,13 @@ END(irq_entries_start) | ||
727 | /* 0(%rsp): ~(interrupt number) */ | ||
728 | .macro interrupt func | ||
729 | cld | ||
730 | + | ||
731 | + testb $3, CS-ORIG_RAX(%rsp) | ||
732 | + jz 1f | ||
733 | + SWAPGS | ||
734 | + call switch_to_thread_stack | ||
735 | +1: | ||
736 | + | ||
737 | ALLOC_PT_GPREGS_ON_STACK | ||
738 | SAVE_C_REGS | ||
739 | SAVE_EXTRA_REGS | ||
740 | @@ -563,12 +650,8 @@ END(irq_entries_start) | ||
741 | jz 1f | ||
742 | |||
743 | /* | ||
744 | - * IRQ from user mode. Switch to kernel gsbase and inform context | ||
745 | - * tracking that we're in kernel mode. | ||
746 | - */ | ||
747 | - SWAPGS | ||
748 | - | ||
749 | - /* | ||
750 | + * IRQ from user mode. | ||
751 | + * | ||
752 | * We need to tell lockdep that IRQs are off. We can't do this until | ||
753 | * we fix gsbase, and we should do it before enter_from_user_mode | ||
754 | * (which can take locks). Since TRACE_IRQS_OFF idempotent, | ||
755 | @@ -612,8 +695,52 @@ GLOBAL(retint_user) | ||
756 | mov %rsp,%rdi | ||
757 | call prepare_exit_to_usermode | ||
758 | TRACE_IRQS_IRETQ | ||
759 | + | ||
760 | +GLOBAL(swapgs_restore_regs_and_return_to_usermode) | ||
761 | +#ifdef CONFIG_DEBUG_ENTRY | ||
762 | + /* Assert that pt_regs indicates user mode. */ | ||
763 | + testb $3, CS(%rsp) | ||
764 | + jnz 1f | ||
765 | + ud2 | ||
766 | +1: | ||
767 | +#endif | ||
768 | + POP_EXTRA_REGS | ||
769 | + popq %r11 | ||
770 | + popq %r10 | ||
771 | + popq %r9 | ||
772 | + popq %r8 | ||
773 | + popq %rax | ||
774 | + popq %rcx | ||
775 | + popq %rdx | ||
776 | + popq %rsi | ||
777 | + | ||
778 | + /* | ||
779 | + * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS. | ||
780 | + * Save old stack pointer and switch to trampoline stack. | ||
781 | + */ | ||
782 | + movq %rsp, %rdi | ||
783 | + movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp | ||
784 | + | ||
785 | + /* Copy the IRET frame to the trampoline stack. */ | ||
786 | + pushq 6*8(%rdi) /* SS */ | ||
787 | + pushq 5*8(%rdi) /* RSP */ | ||
788 | + pushq 4*8(%rdi) /* EFLAGS */ | ||
789 | + pushq 3*8(%rdi) /* CS */ | ||
790 | + pushq 2*8(%rdi) /* RIP */ | ||
791 | + | ||
792 | + /* Push user RDI on the trampoline stack. */ | ||
793 | + pushq (%rdi) | ||
794 | + | ||
795 | + /* | ||
796 | + * We are on the trampoline stack. All regs except RDI are live. | ||
797 | + * We can do future final exit work right here. | ||
798 | + */ | ||
799 | + | ||
800 | + /* Restore RDI. */ | ||
801 | + popq %rdi | ||
802 | SWAPGS | ||
803 | - jmp restore_regs_and_iret | ||
804 | + INTERRUPT_RETURN | ||
805 | + | ||
806 | |||
807 | /* Returning to kernel space */ | ||
808 | retint_kernel: | ||
809 | @@ -633,15 +760,17 @@ retint_kernel: | ||
810 | */ | ||
811 | TRACE_IRQS_IRETQ | ||
812 | |||
813 | -/* | ||
814 | - * At this label, code paths which return to kernel and to user, | ||
815 | - * which come from interrupts/exception and from syscalls, merge. | ||
816 | - */ | ||
817 | -GLOBAL(restore_regs_and_iret) | ||
818 | - RESTORE_EXTRA_REGS | ||
819 | -restore_c_regs_and_iret: | ||
820 | - RESTORE_C_REGS | ||
821 | - REMOVE_PT_GPREGS_FROM_STACK 8 | ||
822 | +GLOBAL(restore_regs_and_return_to_kernel) | ||
823 | +#ifdef CONFIG_DEBUG_ENTRY | ||
824 | + /* Assert that pt_regs indicates kernel mode. */ | ||
825 | + testb $3, CS(%rsp) | ||
826 | + jz 1f | ||
827 | + ud2 | ||
828 | +1: | ||
829 | +#endif | ||
830 | + POP_EXTRA_REGS | ||
831 | + POP_C_REGS | ||
832 | + addq $8, %rsp /* skip regs->orig_ax */ | ||
833 | INTERRUPT_RETURN | ||
834 | |||
835 | ENTRY(native_iret) | ||
836 | @@ -805,7 +934,33 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt | ||
837 | /* | ||
838 | * Exception entry points. | ||
839 | */ | ||
840 | -#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8) | ||
841 | +#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) | ||
842 | + | ||
843 | +/* | ||
844 | + * Switch to the thread stack. This is called with the IRET frame and | ||
845 | + * orig_ax on the stack. (That is, RDI..R12 are not on the stack and | ||
846 | + * space has not been allocated for them.) | ||
847 | + */ | ||
848 | +ENTRY(switch_to_thread_stack) | ||
849 | + UNWIND_HINT_FUNC | ||
850 | + | ||
851 | + pushq %rdi | ||
852 | + movq %rsp, %rdi | ||
853 | + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp | ||
854 | + UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI | ||
855 | + | ||
856 | + pushq 7*8(%rdi) /* regs->ss */ | ||
857 | + pushq 6*8(%rdi) /* regs->rsp */ | ||
858 | + pushq 5*8(%rdi) /* regs->eflags */ | ||
859 | + pushq 4*8(%rdi) /* regs->cs */ | ||
860 | + pushq 3*8(%rdi) /* regs->ip */ | ||
861 | + pushq 2*8(%rdi) /* regs->orig_ax */ | ||
862 | + pushq 8(%rdi) /* return address */ | ||
863 | + UNWIND_HINT_FUNC | ||
864 | + | ||
865 | + movq (%rdi), %rdi | ||
866 | + ret | ||
867 | +END(switch_to_thread_stack) | ||
868 | |||
869 | .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 | ||
870 | ENTRY(\sym) | ||
871 | @@ -818,17 +973,18 @@ ENTRY(\sym) | ||
872 | |||
873 | ASM_CLAC | ||
874 | |||
875 | - .ifeq \has_error_code | ||
876 | + .if \has_error_code == 0 | ||
877 | pushq $-1 /* ORIG_RAX: no syscall to restart */ | ||
878 | .endif | ||
879 | |||
880 | ALLOC_PT_GPREGS_ON_STACK | ||
881 | |||
882 | - .if \paranoid | ||
883 | - .if \paranoid == 1 | ||
884 | + .if \paranoid < 2 | ||
885 | testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ | ||
886 | - jnz 1f | ||
887 | + jnz .Lfrom_usermode_switch_stack_\@ | ||
888 | .endif | ||
889 | + | ||
890 | + .if \paranoid | ||
891 | call paranoid_entry | ||
892 | .else | ||
893 | call error_entry | ||
894 | @@ -870,20 +1026,15 @@ ENTRY(\sym) | ||
895 | jmp error_exit | ||
896 | .endif | ||
897 | |||
898 | - .if \paranoid == 1 | ||
899 | + .if \paranoid < 2 | ||
900 | /* | ||
901 | - * Paranoid entry from userspace. Switch stacks and treat it | ||
902 | + * Entry from userspace. Switch stacks and treat it | ||
903 | * as a normal entry. This means that paranoid handlers | ||
904 | * run in real process context if user_mode(regs). | ||
905 | */ | ||
906 | -1: | ||
907 | +.Lfrom_usermode_switch_stack_\@: | ||
908 | call error_entry | ||
909 | |||
910 | - | ||
911 | - movq %rsp, %rdi /* pt_regs pointer */ | ||
912 | - call sync_regs | ||
913 | - movq %rax, %rsp /* switch stack */ | ||
914 | - | ||
915 | movq %rsp, %rdi /* pt_regs pointer */ | ||
916 | |||
917 | .if \has_error_code | ||
918 | @@ -1059,6 +1210,7 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK | ||
919 | idtentry stack_segment do_stack_segment has_error_code=1 | ||
920 | |||
921 | #ifdef CONFIG_XEN | ||
922 | +idtentry xennmi do_nmi has_error_code=0 | ||
923 | idtentry xendebug do_debug has_error_code=0 | ||
924 | idtentry xenint3 do_int3 has_error_code=0 | ||
925 | #endif | ||
926 | @@ -1112,17 +1264,14 @@ ENTRY(paranoid_exit) | ||
927 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
928 | TRACE_IRQS_OFF_DEBUG | ||
929 | testl %ebx, %ebx /* swapgs needed? */ | ||
930 | - jnz paranoid_exit_no_swapgs | ||
931 | + jnz .Lparanoid_exit_no_swapgs | ||
932 | TRACE_IRQS_IRETQ | ||
933 | SWAPGS_UNSAFE_STACK | ||
934 | - jmp paranoid_exit_restore | ||
935 | -paranoid_exit_no_swapgs: | ||
936 | + jmp .Lparanoid_exit_restore | ||
937 | +.Lparanoid_exit_no_swapgs: | ||
938 | TRACE_IRQS_IRETQ_DEBUG | ||
939 | -paranoid_exit_restore: | ||
940 | - RESTORE_EXTRA_REGS | ||
941 | - RESTORE_C_REGS | ||
942 | - REMOVE_PT_GPREGS_FROM_STACK 8 | ||
943 | - INTERRUPT_RETURN | ||
944 | +.Lparanoid_exit_restore: | ||
945 | + jmp restore_regs_and_return_to_kernel | ||
946 | END(paranoid_exit) | ||
947 | |||
948 | /* | ||
949 | @@ -1146,6 +1295,14 @@ ENTRY(error_entry) | ||
950 | SWAPGS | ||
951 | |||
952 | .Lerror_entry_from_usermode_after_swapgs: | ||
953 | + /* Put us onto the real thread stack. */ | ||
954 | + popq %r12 /* save return addr in %12 */ | ||
955 | + movq %rsp, %rdi /* arg0 = pt_regs pointer */ | ||
956 | + call sync_regs | ||
957 | + movq %rax, %rsp /* switch stack */ | ||
958 | + ENCODE_FRAME_POINTER | ||
959 | + pushq %r12 | ||
960 | + | ||
961 | /* | ||
962 | * We need to tell lockdep that IRQs are off. We can't do this until | ||
963 | * we fix gsbase, and we should do it before enter_from_user_mode | ||
964 | @@ -1223,10 +1380,13 @@ ENTRY(error_exit) | ||
965 | jmp retint_user | ||
966 | END(error_exit) | ||
967 | |||
968 | -/* Runs on exception stack */ | ||
969 | -/* XXX: broken on Xen PV */ | ||
970 | +/* | ||
971 | + * Runs on exception stack. Xen PV does not go through this path at all, | ||
972 | + * so we can use real assembly here. | ||
973 | + */ | ||
974 | ENTRY(nmi) | ||
975 | UNWIND_HINT_IRET_REGS | ||
976 | + | ||
977 | /* | ||
978 | * We allow breakpoints in NMIs. If a breakpoint occurs, then | ||
979 | * the iretq it performs will take us out of NMI context. | ||
980 | @@ -1284,7 +1444,7 @@ ENTRY(nmi) | ||
981 | * stacks lest we corrupt the "NMI executing" variable. | ||
982 | */ | ||
983 | |||
984 | - SWAPGS_UNSAFE_STACK | ||
985 | + swapgs | ||
986 | cld | ||
987 | movq %rsp, %rdx | ||
988 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp | ||
989 | @@ -1328,8 +1488,7 @@ ENTRY(nmi) | ||
990 | * Return back to user mode. We must *not* do the normal exit | ||
991 | * work, because we don't want to enable interrupts. | ||
992 | */ | ||
993 | - SWAPGS | ||
994 | - jmp restore_regs_and_iret | ||
995 | + jmp swapgs_restore_regs_and_return_to_usermode | ||
996 | |||
997 | .Lnmi_from_kernel: | ||
998 | /* | ||
999 | @@ -1450,7 +1609,7 @@ nested_nmi_out: | ||
1000 | popq %rdx | ||
1001 | |||
1002 | /* We are returning to kernel mode, so this cannot result in a fault. */ | ||
1003 | - INTERRUPT_RETURN | ||
1004 | + iretq | ||
1005 | |||
1006 | first_nmi: | ||
1007 | /* Restore rdx. */ | ||
1008 | @@ -1481,7 +1640,7 @@ first_nmi: | ||
1009 | pushfq /* RFLAGS */ | ||
1010 | pushq $__KERNEL_CS /* CS */ | ||
1011 | pushq $1f /* RIP */ | ||
1012 | - INTERRUPT_RETURN /* continues at repeat_nmi below */ | ||
1013 | + iretq /* continues at repeat_nmi below */ | ||
1014 | UNWIND_HINT_IRET_REGS | ||
1015 | 1: | ||
1016 | #endif | ||
1017 | @@ -1544,29 +1703,34 @@ end_repeat_nmi: | ||
1018 | nmi_swapgs: | ||
1019 | SWAPGS_UNSAFE_STACK | ||
1020 | nmi_restore: | ||
1021 | - RESTORE_EXTRA_REGS | ||
1022 | - RESTORE_C_REGS | ||
1023 | + POP_EXTRA_REGS | ||
1024 | + POP_C_REGS | ||
1025 | |||
1026 | - /* Point RSP at the "iret" frame. */ | ||
1027 | - REMOVE_PT_GPREGS_FROM_STACK 6*8 | ||
1028 | + /* | ||
1029 | + * Skip orig_ax and the "outermost" frame to point RSP at the "iret" | ||
1030 | + * at the "iret" frame. | ||
1031 | + */ | ||
1032 | + addq $6*8, %rsp | ||
1033 | |||
1034 | /* | ||
1035 | * Clear "NMI executing". Set DF first so that we can easily | ||
1036 | * distinguish the remaining code between here and IRET from | ||
1037 | - * the SYSCALL entry and exit paths. On a native kernel, we | ||
1038 | - * could just inspect RIP, but, on paravirt kernels, | ||
1039 | - * INTERRUPT_RETURN can translate into a jump into a | ||
1040 | - * hypercall page. | ||
1041 | + * the SYSCALL entry and exit paths. | ||
1042 | + * | ||
1043 | + * We arguably should just inspect RIP instead, but I (Andy) wrote | ||
1044 | + * this code when I had the misapprehension that Xen PV supported | ||
1045 | + * NMIs, and Xen PV would break that approach. | ||
1046 | */ | ||
1047 | std | ||
1048 | movq $0, 5*8(%rsp) /* clear "NMI executing" */ | ||
1049 | |||
1050 | /* | ||
1051 | - * INTERRUPT_RETURN reads the "iret" frame and exits the NMI | ||
1052 | - * stack in a single instruction. We are returning to kernel | ||
1053 | - * mode, so this cannot result in a fault. | ||
1054 | + * iretq reads the "iret" frame and exits the NMI stack in a | ||
1055 | + * single instruction. We are returning to kernel mode, so this | ||
1056 | + * cannot result in a fault. Similarly, we don't need to worry | ||
1057 | + * about espfix64 on the way back to kernel mode. | ||
1058 | */ | ||
1059 | - INTERRUPT_RETURN | ||
1060 | + iretq | ||
1061 | END(nmi) | ||
1062 | |||
1063 | ENTRY(ignore_sysret) | ||
1064 | diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S | ||
1065 | index b5c7a56ed256..95ad40eb7eff 100644 | ||
1066 | --- a/arch/x86/entry/entry_64_compat.S | ||
1067 | +++ b/arch/x86/entry/entry_64_compat.S | ||
1068 | @@ -48,7 +48,7 @@ | ||
1069 | */ | ||
1070 | ENTRY(entry_SYSENTER_compat) | ||
1071 | /* Interrupts are off on entry. */ | ||
1072 | - SWAPGS_UNSAFE_STACK | ||
1073 | + SWAPGS | ||
1074 | movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp | ||
1075 | |||
1076 | /* | ||
1077 | @@ -306,8 +306,11 @@ ENTRY(entry_INT80_compat) | ||
1078 | */ | ||
1079 | movl %eax, %eax | ||
1080 | |||
1081 | - /* Construct struct pt_regs on stack (iret frame is already on stack) */ | ||
1082 | pushq %rax /* pt_regs->orig_ax */ | ||
1083 | + | ||
1084 | + /* switch to thread stack expects orig_ax to be pushed */ | ||
1085 | + call switch_to_thread_stack | ||
1086 | + | ||
1087 | pushq %rdi /* pt_regs->di */ | ||
1088 | pushq %rsi /* pt_regs->si */ | ||
1089 | pushq %rdx /* pt_regs->dx */ | ||
1090 | @@ -337,8 +340,7 @@ ENTRY(entry_INT80_compat) | ||
1091 | |||
1092 | /* Go back to user mode. */ | ||
1093 | TRACE_IRQS_ON | ||
1094 | - SWAPGS | ||
1095 | - jmp restore_regs_and_iret | ||
1096 | + jmp swapgs_restore_regs_and_return_to_usermode | ||
1097 | END(entry_INT80_compat) | ||
1098 | |||
1099 | ENTRY(stub32_clone) | ||
1100 | diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile | ||
1101 | index 331f1dca5085..6fb9b57ed5ba 100644 | ||
1102 | --- a/arch/x86/entry/syscalls/Makefile | ||
1103 | +++ b/arch/x86/entry/syscalls/Makefile | ||
1104 | @@ -1,6 +1,6 @@ | ||
1105 | # SPDX-License-Identifier: GPL-2.0 | ||
1106 | -out := $(obj)/../../include/generated/asm | ||
1107 | -uapi := $(obj)/../../include/generated/uapi/asm | ||
1108 | +out := arch/$(SRCARCH)/include/generated/asm | ||
1109 | +uapi := arch/$(SRCARCH)/include/generated/uapi/asm | ||
1110 | |||
1111 | # Create output directory if not already present | ||
1112 | _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \ | ||
1113 | diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c | ||
1114 | index 80534d3c2480..589af1eec7c1 100644 | ||
1115 | --- a/arch/x86/events/core.c | ||
1116 | +++ b/arch/x86/events/core.c | ||
1117 | @@ -2371,7 +2371,7 @@ static unsigned long get_segment_base(unsigned int segment) | ||
1118 | struct ldt_struct *ldt; | ||
1119 | |||
1120 | /* IRQs are off, so this synchronizes with smp_store_release */ | ||
1121 | - ldt = lockless_dereference(current->active_mm->context.ldt); | ||
1122 | + ldt = READ_ONCE(current->active_mm->context.ldt); | ||
1123 | if (!ldt || idx >= ldt->nr_entries) | ||
1124 | return 0; | ||
1125 | |||
1126 | diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c | ||
1127 | index f94855000d4e..09c26a4f139c 100644 | ||
1128 | --- a/arch/x86/events/intel/core.c | ||
1129 | +++ b/arch/x86/events/intel/core.c | ||
1130 | @@ -2958,6 +2958,10 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event) | ||
1131 | |||
1132 | if (event->attr.use_clockid) | ||
1133 | flags &= ~PERF_SAMPLE_TIME; | ||
1134 | + if (!event->attr.exclude_kernel) | ||
1135 | + flags &= ~PERF_SAMPLE_REGS_USER; | ||
1136 | + if (event->attr.sample_regs_user & ~PEBS_REGS) | ||
1137 | + flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR); | ||
1138 | return flags; | ||
1139 | } | ||
1140 | |||
1141 | diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h | ||
1142 | index 4196f81ec0e1..f7aaadf9331f 100644 | ||
1143 | --- a/arch/x86/events/perf_event.h | ||
1144 | +++ b/arch/x86/events/perf_event.h | ||
1145 | @@ -85,13 +85,15 @@ struct amd_nb { | ||
1146 | * Flags PEBS can handle without an PMI. | ||
1147 | * | ||
1148 | * TID can only be handled by flushing at context switch. | ||
1149 | + * REGS_USER can be handled for events limited to ring 3. | ||
1150 | * | ||
1151 | */ | ||
1152 | #define PEBS_FREERUNNING_FLAGS \ | ||
1153 | (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \ | ||
1154 | PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \ | ||
1155 | PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ | ||
1156 | - PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR) | ||
1157 | + PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ | ||
1158 | + PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER) | ||
1159 | |||
1160 | /* | ||
1161 | * A debug store configuration. | ||
1162 | @@ -110,6 +112,26 @@ struct debug_store { | ||
1163 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
1164 | }; | ||
1165 | |||
1166 | +#define PEBS_REGS \ | ||
1167 | + (PERF_REG_X86_AX | \ | ||
1168 | + PERF_REG_X86_BX | \ | ||
1169 | + PERF_REG_X86_CX | \ | ||
1170 | + PERF_REG_X86_DX | \ | ||
1171 | + PERF_REG_X86_DI | \ | ||
1172 | + PERF_REG_X86_SI | \ | ||
1173 | + PERF_REG_X86_SP | \ | ||
1174 | + PERF_REG_X86_BP | \ | ||
1175 | + PERF_REG_X86_IP | \ | ||
1176 | + PERF_REG_X86_FLAGS | \ | ||
1177 | + PERF_REG_X86_R8 | \ | ||
1178 | + PERF_REG_X86_R9 | \ | ||
1179 | + PERF_REG_X86_R10 | \ | ||
1180 | + PERF_REG_X86_R11 | \ | ||
1181 | + PERF_REG_X86_R12 | \ | ||
1182 | + PERF_REG_X86_R13 | \ | ||
1183 | + PERF_REG_X86_R14 | \ | ||
1184 | + PERF_REG_X86_R15) | ||
1185 | + | ||
1186 | /* | ||
1187 | * Per register state. | ||
1188 | */ | ||
1189 | diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c | ||
1190 | index a5db63f728a2..a0b86cf486e0 100644 | ||
1191 | --- a/arch/x86/hyperv/hv_init.c | ||
1192 | +++ b/arch/x86/hyperv/hv_init.c | ||
1193 | @@ -113,7 +113,7 @@ void hyperv_init(void) | ||
1194 | u64 guest_id; | ||
1195 | union hv_x64_msr_hypercall_contents hypercall_msr; | ||
1196 | |||
1197 | - if (x86_hyper != &x86_hyper_ms_hyperv) | ||
1198 | + if (x86_hyper_type != X86_HYPER_MS_HYPERV) | ||
1199 | return; | ||
1200 | |||
1201 | /* Allocate percpu VP index */ | ||
1202 | diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h | ||
1203 | index 5b0579abb398..3ac991d81e74 100644 | ||
1204 | --- a/arch/x86/include/asm/archrandom.h | ||
1205 | +++ b/arch/x86/include/asm/archrandom.h | ||
1206 | @@ -45,7 +45,7 @@ static inline bool rdrand_long(unsigned long *v) | ||
1207 | bool ok; | ||
1208 | unsigned int retry = RDRAND_RETRY_LOOPS; | ||
1209 | do { | ||
1210 | - asm volatile(RDRAND_LONG "\n\t" | ||
1211 | + asm volatile(RDRAND_LONG | ||
1212 | CC_SET(c) | ||
1213 | : CC_OUT(c) (ok), "=a" (*v)); | ||
1214 | if (ok) | ||
1215 | @@ -59,7 +59,7 @@ static inline bool rdrand_int(unsigned int *v) | ||
1216 | bool ok; | ||
1217 | unsigned int retry = RDRAND_RETRY_LOOPS; | ||
1218 | do { | ||
1219 | - asm volatile(RDRAND_INT "\n\t" | ||
1220 | + asm volatile(RDRAND_INT | ||
1221 | CC_SET(c) | ||
1222 | : CC_OUT(c) (ok), "=a" (*v)); | ||
1223 | if (ok) | ||
1224 | @@ -71,7 +71,7 @@ static inline bool rdrand_int(unsigned int *v) | ||
1225 | static inline bool rdseed_long(unsigned long *v) | ||
1226 | { | ||
1227 | bool ok; | ||
1228 | - asm volatile(RDSEED_LONG "\n\t" | ||
1229 | + asm volatile(RDSEED_LONG | ||
1230 | CC_SET(c) | ||
1231 | : CC_OUT(c) (ok), "=a" (*v)); | ||
1232 | return ok; | ||
1233 | @@ -80,7 +80,7 @@ static inline bool rdseed_long(unsigned long *v) | ||
1234 | static inline bool rdseed_int(unsigned int *v) | ||
1235 | { | ||
1236 | bool ok; | ||
1237 | - asm volatile(RDSEED_INT "\n\t" | ||
1238 | + asm volatile(RDSEED_INT | ||
1239 | CC_SET(c) | ||
1240 | : CC_OUT(c) (ok), "=a" (*v)); | ||
1241 | return ok; | ||
1242 | diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h | ||
1243 | index 2bcf47314959..3fa039855b8f 100644 | ||
1244 | --- a/arch/x86/include/asm/bitops.h | ||
1245 | +++ b/arch/x86/include/asm/bitops.h | ||
1246 | @@ -143,7 +143,7 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) | ||
1247 | static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) | ||
1248 | { | ||
1249 | bool negative; | ||
1250 | - asm volatile(LOCK_PREFIX "andb %2,%1\n\t" | ||
1251 | + asm volatile(LOCK_PREFIX "andb %2,%1" | ||
1252 | CC_SET(s) | ||
1253 | : CC_OUT(s) (negative), ADDR | ||
1254 | : "ir" ((char) ~(1 << nr)) : "memory"); | ||
1255 | @@ -246,7 +246,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long * | ||
1256 | { | ||
1257 | bool oldbit; | ||
1258 | |||
1259 | - asm("bts %2,%1\n\t" | ||
1260 | + asm("bts %2,%1" | ||
1261 | CC_SET(c) | ||
1262 | : CC_OUT(c) (oldbit), ADDR | ||
1263 | : "Ir" (nr)); | ||
1264 | @@ -286,7 +286,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long | ||
1265 | { | ||
1266 | bool oldbit; | ||
1267 | |||
1268 | - asm volatile("btr %2,%1\n\t" | ||
1269 | + asm volatile("btr %2,%1" | ||
1270 | CC_SET(c) | ||
1271 | : CC_OUT(c) (oldbit), ADDR | ||
1272 | : "Ir" (nr)); | ||
1273 | @@ -298,7 +298,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon | ||
1274 | { | ||
1275 | bool oldbit; | ||
1276 | |||
1277 | - asm volatile("btc %2,%1\n\t" | ||
1278 | + asm volatile("btc %2,%1" | ||
1279 | CC_SET(c) | ||
1280 | : CC_OUT(c) (oldbit), ADDR | ||
1281 | : "Ir" (nr) : "memory"); | ||
1282 | @@ -329,7 +329,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l | ||
1283 | { | ||
1284 | bool oldbit; | ||
1285 | |||
1286 | - asm volatile("bt %2,%1\n\t" | ||
1287 | + asm volatile("bt %2,%1" | ||
1288 | CC_SET(c) | ||
1289 | : CC_OUT(c) (oldbit) | ||
1290 | : "m" (*(unsigned long *)addr), "Ir" (nr)); | ||
1291 | diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h | ||
1292 | index 70bc1df580b2..2cbd75dd2fd3 100644 | ||
1293 | --- a/arch/x86/include/asm/compat.h | ||
1294 | +++ b/arch/x86/include/asm/compat.h | ||
1295 | @@ -7,6 +7,7 @@ | ||
1296 | */ | ||
1297 | #include <linux/types.h> | ||
1298 | #include <linux/sched.h> | ||
1299 | +#include <linux/sched/task_stack.h> | ||
1300 | #include <asm/processor.h> | ||
1301 | #include <asm/user32.h> | ||
1302 | #include <asm/unistd.h> | ||
1303 | diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h | ||
1304 | index 0dfa68438e80..ea9a7dde62e5 100644 | ||
1305 | --- a/arch/x86/include/asm/cpufeature.h | ||
1306 | +++ b/arch/x86/include/asm/cpufeature.h | ||
1307 | @@ -126,16 +126,17 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; | ||
1308 | #define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit) | ||
1309 | |||
1310 | #define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability)) | ||
1311 | -#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability)) | ||
1312 | -#define setup_clear_cpu_cap(bit) do { \ | ||
1313 | - clear_cpu_cap(&boot_cpu_data, bit); \ | ||
1314 | - set_bit(bit, (unsigned long *)cpu_caps_cleared); \ | ||
1315 | -} while (0) | ||
1316 | + | ||
1317 | +extern void setup_clear_cpu_cap(unsigned int bit); | ||
1318 | +extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); | ||
1319 | + | ||
1320 | #define setup_force_cpu_cap(bit) do { \ | ||
1321 | set_cpu_cap(&boot_cpu_data, bit); \ | ||
1322 | set_bit(bit, (unsigned long *)cpu_caps_set); \ | ||
1323 | } while (0) | ||
1324 | |||
1325 | +#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) | ||
1326 | + | ||
1327 | #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) | ||
1328 | /* | ||
1329 | * Static testing of CPU features. Used the same as boot_cpu_has(). | ||
1330 | diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h | ||
1331 | index 793690fbda36..800104c8a3ed 100644 | ||
1332 | --- a/arch/x86/include/asm/cpufeatures.h | ||
1333 | +++ b/arch/x86/include/asm/cpufeatures.h | ||
1334 | @@ -13,173 +13,176 @@ | ||
1335 | /* | ||
1336 | * Defines x86 CPU feature bits | ||
1337 | */ | ||
1338 | -#define NCAPINTS 18 /* N 32-bit words worth of info */ | ||
1339 | -#define NBUGINTS 1 /* N 32-bit bug flags */ | ||
1340 | +#define NCAPINTS 18 /* N 32-bit words worth of info */ | ||
1341 | +#define NBUGINTS 1 /* N 32-bit bug flags */ | ||
1342 | |||
1343 | /* | ||
1344 | * Note: If the comment begins with a quoted string, that string is used | ||
1345 | * in /proc/cpuinfo instead of the macro name. If the string is "", | ||
1346 | * this feature bit is not displayed in /proc/cpuinfo at all. | ||
1347 | + * | ||
1348 | + * When adding new features here that depend on other features, | ||
1349 | + * please update the table in kernel/cpu/cpuid-deps.c as well. | ||
1350 | */ | ||
1351 | |||
1352 | -/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ | ||
1353 | -#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ | ||
1354 | -#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ | ||
1355 | -#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ | ||
1356 | -#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ | ||
1357 | -#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ | ||
1358 | -#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ | ||
1359 | -#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ | ||
1360 | -#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ | ||
1361 | -#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ | ||
1362 | -#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ | ||
1363 | -#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ | ||
1364 | -#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ | ||
1365 | -#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ | ||
1366 | -#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ | ||
1367 | -#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ | ||
1368 | - /* (plus FCMOVcc, FCOMI with FPU) */ | ||
1369 | -#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ | ||
1370 | -#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ | ||
1371 | -#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ | ||
1372 | -#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ | ||
1373 | -#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ | ||
1374 | -#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ | ||
1375 | -#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ | ||
1376 | -#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ | ||
1377 | -#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ | ||
1378 | -#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ | ||
1379 | -#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ | ||
1380 | -#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ | ||
1381 | -#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ | ||
1382 | -#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ | ||
1383 | -#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ | ||
1384 | +/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */ | ||
1385 | +#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ | ||
1386 | +#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ | ||
1387 | +#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ | ||
1388 | +#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ | ||
1389 | +#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ | ||
1390 | +#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ | ||
1391 | +#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ | ||
1392 | +#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ | ||
1393 | +#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ | ||
1394 | +#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ | ||
1395 | +#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ | ||
1396 | +#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ | ||
1397 | +#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ | ||
1398 | +#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ | ||
1399 | +#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ | ||
1400 | +#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ | ||
1401 | +#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ | ||
1402 | +#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ | ||
1403 | +#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ | ||
1404 | +#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ | ||
1405 | +#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ | ||
1406 | +#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ | ||
1407 | +#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ | ||
1408 | +#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ | ||
1409 | +#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ | ||
1410 | +#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ | ||
1411 | +#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ | ||
1412 | +#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ | ||
1413 | +#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ | ||
1414 | +#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ | ||
1415 | |||
1416 | /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ | ||
1417 | /* Don't duplicate feature flags which are redundant with Intel! */ | ||
1418 | -#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ | ||
1419 | -#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ | ||
1420 | -#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ | ||
1421 | -#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ | ||
1422 | -#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ | ||
1423 | -#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ | ||
1424 | -#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ | ||
1425 | -#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ | ||
1426 | -#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ | ||
1427 | -#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ | ||
1428 | +#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ | ||
1429 | +#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */ | ||
1430 | +#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ | ||
1431 | +#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ | ||
1432 | +#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ | ||
1433 | +#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ | ||
1434 | +#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ | ||
1435 | +#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */ | ||
1436 | +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */ | ||
1437 | +#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */ | ||
1438 | |||
1439 | /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ | ||
1440 | -#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ | ||
1441 | -#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ | ||
1442 | -#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ | ||
1443 | +#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ | ||
1444 | +#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ | ||
1445 | +#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ | ||
1446 | |||
1447 | /* Other features, Linux-defined mapping, word 3 */ | ||
1448 | /* This range is used for feature bits which conflict or are synthesized */ | ||
1449 | -#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ | ||
1450 | -#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ | ||
1451 | -#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ | ||
1452 | -#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ | ||
1453 | -/* cpu types for specific tunings: */ | ||
1454 | -#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ | ||
1455 | -#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ | ||
1456 | -#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ | ||
1457 | -#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ | ||
1458 | -#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ | ||
1459 | -#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ | ||
1460 | -#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */ | ||
1461 | -#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ | ||
1462 | -#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ | ||
1463 | -#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ | ||
1464 | -#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ | ||
1465 | -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ | ||
1466 | -#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ | ||
1467 | -#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ | ||
1468 | -#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ | ||
1469 | -#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ | ||
1470 | -#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ | ||
1471 | -#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ | ||
1472 | -#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ | ||
1473 | -#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ | ||
1474 | -#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ | ||
1475 | -#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ | ||
1476 | -#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ | ||
1477 | -#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ | ||
1478 | -#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ | ||
1479 | -#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ | ||
1480 | -#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ | ||
1481 | +#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ | ||
1482 | +#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ | ||
1483 | +#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ | ||
1484 | +#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ | ||
1485 | + | ||
1486 | +/* CPU types for specific tunings: */ | ||
1487 | +#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ | ||
1488 | +#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ | ||
1489 | +#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ | ||
1490 | +#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ | ||
1491 | +#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ | ||
1492 | +#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */ | ||
1493 | +#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */ | ||
1494 | +#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ | ||
1495 | +#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ | ||
1496 | +#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ | ||
1497 | +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ | ||
1498 | +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ | ||
1499 | +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ | ||
1500 | +#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" MFENCE synchronizes RDTSC */ | ||
1501 | +#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ | ||
1502 | +#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ | ||
1503 | +#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ | ||
1504 | +#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ | ||
1505 | +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */ | ||
1506 | +#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ | ||
1507 | +#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ | ||
1508 | +#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ | ||
1509 | +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ | ||
1510 | +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ | ||
1511 | +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ | ||
1512 | +#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ | ||
1513 | +#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ | ||
1514 | |||
1515 | -/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ | ||
1516 | -#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ | ||
1517 | -#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ | ||
1518 | -#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ | ||
1519 | -#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ | ||
1520 | -#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ | ||
1521 | -#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ | ||
1522 | -#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ | ||
1523 | -#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ | ||
1524 | -#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ | ||
1525 | -#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ | ||
1526 | -#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ | ||
1527 | -#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ | ||
1528 | -#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ | ||
1529 | -#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ | ||
1530 | -#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ | ||
1531 | -#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ | ||
1532 | -#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ | ||
1533 | -#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ | ||
1534 | -#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ | ||
1535 | -#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ | ||
1536 | -#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ | ||
1537 | -#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ | ||
1538 | -#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ | ||
1539 | -#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */ | ||
1540 | -#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ | ||
1541 | -#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ | ||
1542 | -#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ | ||
1543 | -#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ | ||
1544 | -#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ | ||
1545 | -#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ | ||
1546 | -#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ | ||
1547 | +/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */ | ||
1548 | +#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ | ||
1549 | +#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ | ||
1550 | +#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ | ||
1551 | +#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */ | ||
1552 | +#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */ | ||
1553 | +#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ | ||
1554 | +#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */ | ||
1555 | +#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ | ||
1556 | +#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ | ||
1557 | +#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ | ||
1558 | +#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ | ||
1559 | +#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ | ||
1560 | +#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ | ||
1561 | +#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */ | ||
1562 | +#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ | ||
1563 | +#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */ | ||
1564 | +#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ | ||
1565 | +#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ | ||
1566 | +#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ | ||
1567 | +#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ | ||
1568 | +#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */ | ||
1569 | +#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ | ||
1570 | +#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ | ||
1571 | +#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */ | ||
1572 | +#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ | ||
1573 | +#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */ | ||
1574 | +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */ | ||
1575 | +#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ | ||
1576 | +#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */ | ||
1577 | +#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */ | ||
1578 | +#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ | ||
1579 | |||
1580 | /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ | ||
1581 | -#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ | ||
1582 | -#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ | ||
1583 | -#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ | ||
1584 | -#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ | ||
1585 | -#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ | ||
1586 | -#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ | ||
1587 | -#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ | ||
1588 | -#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ | ||
1589 | -#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ | ||
1590 | -#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ | ||
1591 | +#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ | ||
1592 | +#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ | ||
1593 | +#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ | ||
1594 | +#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ | ||
1595 | +#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ | ||
1596 | +#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ | ||
1597 | +#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ | ||
1598 | +#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ | ||
1599 | +#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ | ||
1600 | +#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ | ||
1601 | |||
1602 | -/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ | ||
1603 | -#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ | ||
1604 | -#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ | ||
1605 | -#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ | ||
1606 | -#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ | ||
1607 | -#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ | ||
1608 | -#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ | ||
1609 | -#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ | ||
1610 | -#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ | ||
1611 | -#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ | ||
1612 | -#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ | ||
1613 | -#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ | ||
1614 | -#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ | ||
1615 | -#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ | ||
1616 | -#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ | ||
1617 | -#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ | ||
1618 | -#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ | ||
1619 | -#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ | ||
1620 | -#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ | ||
1621 | -#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ | ||
1622 | -#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ | ||
1623 | -#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ | ||
1624 | -#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ | ||
1625 | -#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ | ||
1626 | -#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */ | ||
1627 | -#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ | ||
1628 | -#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ | ||
1629 | +/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */ | ||
1630 | +#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ | ||
1631 | +#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ | ||
1632 | +#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */ | ||
1633 | +#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ | ||
1634 | +#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ | ||
1635 | +#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ | ||
1636 | +#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ | ||
1637 | +#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ | ||
1638 | +#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ | ||
1639 | +#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ | ||
1640 | +#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ | ||
1641 | +#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ | ||
1642 | +#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ | ||
1643 | +#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ | ||
1644 | +#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ | ||
1645 | +#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ | ||
1646 | +#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */ | ||
1647 | +#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ | ||
1648 | +#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */ | ||
1649 | +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */ | ||
1650 | +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */ | ||
1651 | +#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ | ||
1652 | +#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */ | ||
1653 | +#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */ | ||
1654 | +#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ | ||
1655 | +#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */ | ||
1656 | |||
1657 | /* | ||
1658 | * Auxiliary flags: Linux defined - For features scattered in various | ||
1659 | @@ -187,146 +190,155 @@ | ||
1660 | * | ||
1661 | * Reuse free bits when adding new feature flags! | ||
1662 | */ | ||
1663 | -#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ | ||
1664 | -#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ | ||
1665 | -#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ | ||
1666 | -#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ | ||
1667 | -#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ | ||
1668 | -#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ | ||
1669 | -#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ | ||
1670 | +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */ | ||
1671 | +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ | ||
1672 | +#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ | ||
1673 | +#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ | ||
1674 | +#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ | ||
1675 | +#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ | ||
1676 | +#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ | ||
1677 | |||
1678 | -#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ | ||
1679 | -#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ | ||
1680 | -#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ | ||
1681 | +#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ | ||
1682 | +#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ | ||
1683 | +#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ | ||
1684 | |||
1685 | -#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ | ||
1686 | -#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ | ||
1687 | -#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ | ||
1688 | -#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ | ||
1689 | +#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ | ||
1690 | +#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ | ||
1691 | +#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ | ||
1692 | +#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ | ||
1693 | |||
1694 | -#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ | ||
1695 | +#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ | ||
1696 | |||
1697 | /* Virtualization flags: Linux defined, word 8 */ | ||
1698 | -#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ | ||
1699 | -#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ | ||
1700 | -#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ | ||
1701 | -#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ | ||
1702 | -#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ | ||
1703 | +#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ | ||
1704 | +#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ | ||
1705 | +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ | ||
1706 | +#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ | ||
1707 | +#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ | ||
1708 | |||
1709 | -#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ | ||
1710 | -#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ | ||
1711 | +#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ | ||
1712 | +#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ | ||
1713 | |||
1714 | |||
1715 | -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ | ||
1716 | -#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ | ||
1717 | -#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ | ||
1718 | -#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ | ||
1719 | -#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ | ||
1720 | -#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ | ||
1721 | -#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ | ||
1722 | -#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ | ||
1723 | -#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ | ||
1724 | -#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ | ||
1725 | -#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ | ||
1726 | -#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ | ||
1727 | -#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ | ||
1728 | -#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ | ||
1729 | -#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ | ||
1730 | -#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ | ||
1731 | -#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ | ||
1732 | -#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ | ||
1733 | -#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ | ||
1734 | -#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ | ||
1735 | -#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ | ||
1736 | -#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ | ||
1737 | -#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ | ||
1738 | -#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ | ||
1739 | -#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ | ||
1740 | -#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ | ||
1741 | -#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ | ||
1742 | -#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ | ||
1743 | +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ | ||
1744 | +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ | ||
1745 | +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */ | ||
1746 | +#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ | ||
1747 | +#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ | ||
1748 | +#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ | ||
1749 | +#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ | ||
1750 | +#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ | ||
1751 | +#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */ | ||
1752 | +#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ | ||
1753 | +#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ | ||
1754 | +#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ | ||
1755 | +#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ | ||
1756 | +#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ | ||
1757 | +#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ | ||
1758 | +#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ | ||
1759 | +#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */ | ||
1760 | +#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */ | ||
1761 | +#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ | ||
1762 | +#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ | ||
1763 | +#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ | ||
1764 | +#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ | ||
1765 | +#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ | ||
1766 | +#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ | ||
1767 | +#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ | ||
1768 | +#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ | ||
1769 | +#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ | ||
1770 | +#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ | ||
1771 | |||
1772 | -/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ | ||
1773 | -#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ | ||
1774 | -#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ | ||
1775 | -#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ | ||
1776 | -#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ | ||
1777 | +/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */ | ||
1778 | +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */ | ||
1779 | +#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ | ||
1780 | +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ | ||
1781 | +#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ | ||
1782 | |||
1783 | -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ | ||
1784 | -#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ | ||
1785 | +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */ | ||
1786 | +#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ | ||
1787 | |||
1788 | -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ | ||
1789 | -#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ | ||
1790 | -#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ | ||
1791 | -#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ | ||
1792 | +/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */ | ||
1793 | +#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */ | ||
1794 | +#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ | ||
1795 | +#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ | ||
1796 | |||
1797 | -/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ | ||
1798 | -#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ | ||
1799 | -#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ | ||
1800 | +/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ | ||
1801 | +#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ | ||
1802 | +#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ | ||
1803 | +#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ | ||
1804 | |||
1805 | -/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ | ||
1806 | -#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ | ||
1807 | -#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ | ||
1808 | -#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ | ||
1809 | -#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ | ||
1810 | -#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ | ||
1811 | -#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ | ||
1812 | -#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ | ||
1813 | -#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ | ||
1814 | -#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ | ||
1815 | -#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ | ||
1816 | +/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ | ||
1817 | +#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ | ||
1818 | +#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ | ||
1819 | +#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ | ||
1820 | +#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ | ||
1821 | +#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ | ||
1822 | +#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ | ||
1823 | +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ | ||
1824 | +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ | ||
1825 | +#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ | ||
1826 | +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ | ||
1827 | |||
1828 | -/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ | ||
1829 | -#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ | ||
1830 | -#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ | ||
1831 | -#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ | ||
1832 | -#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ | ||
1833 | -#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ | ||
1834 | -#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ | ||
1835 | -#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ | ||
1836 | -#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ | ||
1837 | -#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ | ||
1838 | -#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ | ||
1839 | -#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ | ||
1840 | -#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ | ||
1841 | -#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ | ||
1842 | +/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ | ||
1843 | +#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ | ||
1844 | +#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ | ||
1845 | +#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ | ||
1846 | +#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ | ||
1847 | +#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ | ||
1848 | +#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ | ||
1849 | +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ | ||
1850 | +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ | ||
1851 | +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ | ||
1852 | +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ | ||
1853 | +#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ | ||
1854 | +#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ | ||
1855 | +#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ | ||
1856 | |||
1857 | -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ | ||
1858 | -#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ | ||
1859 | -#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ | ||
1860 | -#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ | ||
1861 | -#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ | ||
1862 | -#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ | ||
1863 | -#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ | ||
1864 | +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ | ||
1865 | +#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ | ||
1866 | +#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */ | ||
1867 | +#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ | ||
1868 | +#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ | ||
1869 | +#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ | ||
1870 | +#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ | ||
1871 | +#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ | ||
1872 | +#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ | ||
1873 | +#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ | ||
1874 | +#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ | ||
1875 | +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ | ||
1876 | +#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ | ||
1877 | +#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ | ||
1878 | |||
1879 | -/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ | ||
1880 | -#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ | ||
1881 | -#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ | ||
1882 | -#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ | ||
1883 | +/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ | ||
1884 | +#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ | ||
1885 | +#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ | ||
1886 | +#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ | ||
1887 | |||
1888 | /* | ||
1889 | * BUG word(s) | ||
1890 | */ | ||
1891 | -#define X86_BUG(x) (NCAPINTS*32 + (x)) | ||
1892 | +#define X86_BUG(x) (NCAPINTS*32 + (x)) | ||
1893 | |||
1894 | -#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ | ||
1895 | -#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ | ||
1896 | -#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ | ||
1897 | -#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ | ||
1898 | -#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ | ||
1899 | -#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ | ||
1900 | -#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ | ||
1901 | -#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ | ||
1902 | -#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ | ||
1903 | +#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ | ||
1904 | +#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ | ||
1905 | +#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ | ||
1906 | +#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ | ||
1907 | +#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ | ||
1908 | +#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ | ||
1909 | +#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ | ||
1910 | +#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ | ||
1911 | +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ | ||
1912 | #ifdef CONFIG_X86_32 | ||
1913 | /* | ||
1914 | * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional | ||
1915 | * to avoid confusion. | ||
1916 | */ | ||
1917 | -#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ | ||
1918 | +#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ | ||
1919 | #endif | ||
1920 | -#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ | ||
1921 | -#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ | ||
1922 | -#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ | ||
1923 | -#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ | ||
1924 | +#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ | ||
1925 | +#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ | ||
1926 | +#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ | ||
1927 | +#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ | ||
1928 | + | ||
1929 | #endif /* _ASM_X86_CPUFEATURES_H */ | ||
1930 | diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h | ||
1931 | index 0a3e808b9123..2ace1f90d138 100644 | ||
1932 | --- a/arch/x86/include/asm/desc.h | ||
1933 | +++ b/arch/x86/include/asm/desc.h | ||
1934 | @@ -60,17 +60,10 @@ static inline struct desc_struct *get_current_gdt_rw(void) | ||
1935 | return this_cpu_ptr(&gdt_page)->gdt; | ||
1936 | } | ||
1937 | |||
1938 | -/* Get the fixmap index for a specific processor */ | ||
1939 | -static inline unsigned int get_cpu_gdt_ro_index(int cpu) | ||
1940 | -{ | ||
1941 | - return FIX_GDT_REMAP_BEGIN + cpu; | ||
1942 | -} | ||
1943 | - | ||
1944 | /* Provide the fixmap address of the remapped GDT */ | ||
1945 | static inline struct desc_struct *get_cpu_gdt_ro(int cpu) | ||
1946 | { | ||
1947 | - unsigned int idx = get_cpu_gdt_ro_index(cpu); | ||
1948 | - return (struct desc_struct *)__fix_to_virt(idx); | ||
1949 | + return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt; | ||
1950 | } | ||
1951 | |||
1952 | /* Provide the current read-only GDT */ | ||
1953 | @@ -185,7 +178,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, | ||
1954 | #endif | ||
1955 | } | ||
1956 | |||
1957 | -static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr) | ||
1958 | +static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr) | ||
1959 | { | ||
1960 | struct desc_struct *d = get_cpu_gdt_rw(cpu); | ||
1961 | tss_desc tss; | ||
1962 | diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h | ||
1963 | index dcd9fb55e679..94fc4fa14127 100644 | ||
1964 | --- a/arch/x86/include/asm/fixmap.h | ||
1965 | +++ b/arch/x86/include/asm/fixmap.h | ||
1966 | @@ -44,6 +44,45 @@ extern unsigned long __FIXADDR_TOP; | ||
1967 | PAGE_SIZE) | ||
1968 | #endif | ||
1969 | |||
1970 | +/* | ||
1971 | + * cpu_entry_area is a percpu region in the fixmap that contains things | ||
1972 | + * needed by the CPU and early entry/exit code. Real types aren't used | ||
1973 | + * for all fields here to avoid circular header dependencies. | ||
1974 | + * | ||
1975 | + * Every field is a virtual alias of some other allocated backing store. | ||
1976 | + * There is no direct allocation of a struct cpu_entry_area. | ||
1977 | + */ | ||
1978 | +struct cpu_entry_area { | ||
1979 | + char gdt[PAGE_SIZE]; | ||
1980 | + | ||
1981 | + /* | ||
1982 | + * The GDT is just below SYSENTER_stack and thus serves (on x86_64) as | ||
1983 | + * a a read-only guard page. | ||
1984 | + */ | ||
1985 | + struct SYSENTER_stack_page SYSENTER_stack_page; | ||
1986 | + | ||
1987 | + /* | ||
1988 | + * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because | ||
1989 | + * we need task switches to work, and task switches write to the TSS. | ||
1990 | + */ | ||
1991 | + struct tss_struct tss; | ||
1992 | + | ||
1993 | + char entry_trampoline[PAGE_SIZE]; | ||
1994 | + | ||
1995 | +#ifdef CONFIG_X86_64 | ||
1996 | + /* | ||
1997 | + * Exception stacks used for IST entries. | ||
1998 | + * | ||
1999 | + * In the future, this should have a separate slot for each stack | ||
2000 | + * with guard pages between them. | ||
2001 | + */ | ||
2002 | + char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; | ||
2003 | +#endif | ||
2004 | +}; | ||
2005 | + | ||
2006 | +#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE) | ||
2007 | + | ||
2008 | +extern void setup_cpu_entry_areas(void); | ||
2009 | |||
2010 | /* | ||
2011 | * Here we define all the compile-time 'special' virtual | ||
2012 | @@ -101,8 +140,14 @@ enum fixed_addresses { | ||
2013 | FIX_LNW_VRTC, | ||
2014 | #endif | ||
2015 | /* Fixmap entries to remap the GDTs, one per processor. */ | ||
2016 | - FIX_GDT_REMAP_BEGIN, | ||
2017 | - FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1, | ||
2018 | + FIX_CPU_ENTRY_AREA_TOP, | ||
2019 | + FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1, | ||
2020 | + | ||
2021 | +#ifdef CONFIG_ACPI_APEI_GHES | ||
2022 | + /* Used for GHES mapping from assorted contexts */ | ||
2023 | + FIX_APEI_GHES_IRQ, | ||
2024 | + FIX_APEI_GHES_NMI, | ||
2025 | +#endif | ||
2026 | |||
2027 | __end_of_permanent_fixed_addresses, | ||
2028 | |||
2029 | @@ -185,5 +230,30 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr, | ||
2030 | void __early_set_fixmap(enum fixed_addresses idx, | ||
2031 | phys_addr_t phys, pgprot_t flags); | ||
2032 | |||
2033 | +static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page) | ||
2034 | +{ | ||
2035 | + BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); | ||
2036 | + | ||
2037 | + return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page; | ||
2038 | +} | ||
2039 | + | ||
2040 | +#define __get_cpu_entry_area_offset_index(cpu, offset) ({ \ | ||
2041 | + BUILD_BUG_ON(offset % PAGE_SIZE != 0); \ | ||
2042 | + __get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE); \ | ||
2043 | + }) | ||
2044 | + | ||
2045 | +#define get_cpu_entry_area_index(cpu, field) \ | ||
2046 | + __get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field)) | ||
2047 | + | ||
2048 | +static inline struct cpu_entry_area *get_cpu_entry_area(int cpu) | ||
2049 | +{ | ||
2050 | + return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0)); | ||
2051 | +} | ||
2052 | + | ||
2053 | +static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu) | ||
2054 | +{ | ||
2055 | + return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack; | ||
2056 | +} | ||
2057 | + | ||
2058 | #endif /* !__ASSEMBLY__ */ | ||
2059 | #endif /* _ASM_X86_FIXMAP_H */ | ||
2060 | diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h | ||
2061 | index 0ead9dbb9130..96aa6b9884dc 100644 | ||
2062 | --- a/arch/x86/include/asm/hypervisor.h | ||
2063 | +++ b/arch/x86/include/asm/hypervisor.h | ||
2064 | @@ -20,14 +20,22 @@ | ||
2065 | #ifndef _ASM_X86_HYPERVISOR_H | ||
2066 | #define _ASM_X86_HYPERVISOR_H | ||
2067 | |||
2068 | +/* x86 hypervisor types */ | ||
2069 | +enum x86_hypervisor_type { | ||
2070 | + X86_HYPER_NATIVE = 0, | ||
2071 | + X86_HYPER_VMWARE, | ||
2072 | + X86_HYPER_MS_HYPERV, | ||
2073 | + X86_HYPER_XEN_PV, | ||
2074 | + X86_HYPER_XEN_HVM, | ||
2075 | + X86_HYPER_KVM, | ||
2076 | +}; | ||
2077 | + | ||
2078 | #ifdef CONFIG_HYPERVISOR_GUEST | ||
2079 | |||
2080 | #include <asm/kvm_para.h> | ||
2081 | +#include <asm/x86_init.h> | ||
2082 | #include <asm/xen/hypervisor.h> | ||
2083 | |||
2084 | -/* | ||
2085 | - * x86 hypervisor information | ||
2086 | - */ | ||
2087 | struct hypervisor_x86 { | ||
2088 | /* Hypervisor name */ | ||
2089 | const char *name; | ||
2090 | @@ -35,40 +43,27 @@ struct hypervisor_x86 { | ||
2091 | /* Detection routine */ | ||
2092 | uint32_t (*detect)(void); | ||
2093 | |||
2094 | - /* Platform setup (run once per boot) */ | ||
2095 | - void (*init_platform)(void); | ||
2096 | - | ||
2097 | - /* X2APIC detection (run once per boot) */ | ||
2098 | - bool (*x2apic_available)(void); | ||
2099 | + /* Hypervisor type */ | ||
2100 | + enum x86_hypervisor_type type; | ||
2101 | |||
2102 | - /* pin current vcpu to specified physical cpu (run rarely) */ | ||
2103 | - void (*pin_vcpu)(int); | ||
2104 | + /* init time callbacks */ | ||
2105 | + struct x86_hyper_init init; | ||
2106 | |||
2107 | - /* called during init_mem_mapping() to setup early mappings. */ | ||
2108 | - void (*init_mem_mapping)(void); | ||
2109 | + /* runtime callbacks */ | ||
2110 | + struct x86_hyper_runtime runtime; | ||
2111 | }; | ||
2112 | |||
2113 | -extern const struct hypervisor_x86 *x86_hyper; | ||
2114 | - | ||
2115 | -/* Recognized hypervisors */ | ||
2116 | -extern const struct hypervisor_x86 x86_hyper_vmware; | ||
2117 | -extern const struct hypervisor_x86 x86_hyper_ms_hyperv; | ||
2118 | -extern const struct hypervisor_x86 x86_hyper_xen_pv; | ||
2119 | -extern const struct hypervisor_x86 x86_hyper_xen_hvm; | ||
2120 | -extern const struct hypervisor_x86 x86_hyper_kvm; | ||
2121 | - | ||
2122 | +extern enum x86_hypervisor_type x86_hyper_type; | ||
2123 | extern void init_hypervisor_platform(void); | ||
2124 | -extern bool hypervisor_x2apic_available(void); | ||
2125 | -extern void hypervisor_pin_vcpu(int cpu); | ||
2126 | - | ||
2127 | -static inline void hypervisor_init_mem_mapping(void) | ||
2128 | +static inline bool hypervisor_is_type(enum x86_hypervisor_type type) | ||
2129 | { | ||
2130 | - if (x86_hyper && x86_hyper->init_mem_mapping) | ||
2131 | - x86_hyper->init_mem_mapping(); | ||
2132 | + return x86_hyper_type == type; | ||
2133 | } | ||
2134 | #else | ||
2135 | static inline void init_hypervisor_platform(void) { } | ||
2136 | -static inline bool hypervisor_x2apic_available(void) { return false; } | ||
2137 | -static inline void hypervisor_init_mem_mapping(void) { } | ||
2138 | +static inline bool hypervisor_is_type(enum x86_hypervisor_type type) | ||
2139 | +{ | ||
2140 | + return type == X86_HYPER_NATIVE; | ||
2141 | +} | ||
2142 | #endif /* CONFIG_HYPERVISOR_GUEST */ | ||
2143 | #endif /* _ASM_X86_HYPERVISOR_H */ | ||
2144 | diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h | ||
2145 | index c8ef23f2c28f..89f08955fff7 100644 | ||
2146 | --- a/arch/x86/include/asm/irqflags.h | ||
2147 | +++ b/arch/x86/include/asm/irqflags.h | ||
2148 | @@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void) | ||
2149 | swapgs; \ | ||
2150 | sysretl | ||
2151 | |||
2152 | +#ifdef CONFIG_DEBUG_ENTRY | ||
2153 | +#define SAVE_FLAGS(x) pushfq; popq %rax | ||
2154 | +#endif | ||
2155 | #else | ||
2156 | #define INTERRUPT_RETURN iret | ||
2157 | #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit | ||
2158 | diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h | ||
2159 | index f86a8caa561e..395c9631e000 100644 | ||
2160 | --- a/arch/x86/include/asm/kdebug.h | ||
2161 | +++ b/arch/x86/include/asm/kdebug.h | ||
2162 | @@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long); | ||
2163 | extern int __must_check __die(const char *, struct pt_regs *, long); | ||
2164 | extern void show_stack_regs(struct pt_regs *regs); | ||
2165 | extern void __show_regs(struct pt_regs *regs, int all); | ||
2166 | +extern void show_iret_regs(struct pt_regs *regs); | ||
2167 | extern unsigned long oops_begin(void); | ||
2168 | extern void oops_end(unsigned long, struct pt_regs *, int signr); | ||
2169 | |||
2170 | diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h | ||
2171 | index 6699fc441644..6d16d15d09a0 100644 | ||
2172 | --- a/arch/x86/include/asm/mmu_context.h | ||
2173 | +++ b/arch/x86/include/asm/mmu_context.h | ||
2174 | @@ -73,8 +73,8 @@ static inline void load_mm_ldt(struct mm_struct *mm) | ||
2175 | #ifdef CONFIG_MODIFY_LDT_SYSCALL | ||
2176 | struct ldt_struct *ldt; | ||
2177 | |||
2178 | - /* lockless_dereference synchronizes with smp_store_release */ | ||
2179 | - ldt = lockless_dereference(mm->context.ldt); | ||
2180 | + /* READ_ONCE synchronizes with smp_store_release */ | ||
2181 | + ldt = READ_ONCE(mm->context.ldt); | ||
2182 | |||
2183 | /* | ||
2184 | * Any change to mm->context.ldt is followed by an IPI to all | ||
2185 | diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h | ||
2186 | index 8546fafa21a9..7948a17febb4 100644 | ||
2187 | --- a/arch/x86/include/asm/module.h | ||
2188 | +++ b/arch/x86/include/asm/module.h | ||
2189 | @@ -6,7 +6,7 @@ | ||
2190 | #include <asm/orc_types.h> | ||
2191 | |||
2192 | struct mod_arch_specific { | ||
2193 | -#ifdef CONFIG_ORC_UNWINDER | ||
2194 | +#ifdef CONFIG_UNWINDER_ORC | ||
2195 | unsigned int num_orcs; | ||
2196 | int *orc_unwind_ip; | ||
2197 | struct orc_entry *orc_unwind; | ||
2198 | diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h | ||
2199 | index fd81228e8037..892df375b615 100644 | ||
2200 | --- a/arch/x86/include/asm/paravirt.h | ||
2201 | +++ b/arch/x86/include/asm/paravirt.h | ||
2202 | @@ -16,10 +16,9 @@ | ||
2203 | #include <linux/cpumask.h> | ||
2204 | #include <asm/frame.h> | ||
2205 | |||
2206 | -static inline void load_sp0(struct tss_struct *tss, | ||
2207 | - struct thread_struct *thread) | ||
2208 | +static inline void load_sp0(unsigned long sp0) | ||
2209 | { | ||
2210 | - PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread); | ||
2211 | + PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0); | ||
2212 | } | ||
2213 | |||
2214 | /* The paravirtualized CPUID instruction. */ | ||
2215 | @@ -928,6 +927,15 @@ extern void default_banner(void); | ||
2216 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ | ||
2217 | CLBR_NONE, \ | ||
2218 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) | ||
2219 | + | ||
2220 | +#ifdef CONFIG_DEBUG_ENTRY | ||
2221 | +#define SAVE_FLAGS(clobbers) \ | ||
2222 | + PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \ | ||
2223 | + PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ | ||
2224 | + call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \ | ||
2225 | + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) | ||
2226 | +#endif | ||
2227 | + | ||
2228 | #endif /* CONFIG_X86_32 */ | ||
2229 | |||
2230 | #endif /* __ASSEMBLY__ */ | ||
2231 | diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h | ||
2232 | index 10cc3b9709fe..6ec54d01972d 100644 | ||
2233 | --- a/arch/x86/include/asm/paravirt_types.h | ||
2234 | +++ b/arch/x86/include/asm/paravirt_types.h | ||
2235 | @@ -134,7 +134,7 @@ struct pv_cpu_ops { | ||
2236 | void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); | ||
2237 | void (*free_ldt)(struct desc_struct *ldt, unsigned entries); | ||
2238 | |||
2239 | - void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); | ||
2240 | + void (*load_sp0)(unsigned long sp0); | ||
2241 | |||
2242 | void (*set_iopl_mask)(unsigned mask); | ||
2243 | |||
2244 | diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h | ||
2245 | index 377f1ffd18be..ba3c523aaf16 100644 | ||
2246 | --- a/arch/x86/include/asm/percpu.h | ||
2247 | +++ b/arch/x86/include/asm/percpu.h | ||
2248 | @@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr, | ||
2249 | { | ||
2250 | bool oldbit; | ||
2251 | |||
2252 | - asm volatile("bt "__percpu_arg(2)",%1\n\t" | ||
2253 | + asm volatile("bt "__percpu_arg(2)",%1" | ||
2254 | CC_SET(c) | ||
2255 | : CC_OUT(c) (oldbit) | ||
2256 | : "m" (*(unsigned long __percpu *)addr), "Ir" (nr)); | ||
2257 | diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h | ||
2258 | index 59df7b47a434..9e9b05fc4860 100644 | ||
2259 | --- a/arch/x86/include/asm/pgtable_types.h | ||
2260 | +++ b/arch/x86/include/asm/pgtable_types.h | ||
2261 | @@ -200,10 +200,9 @@ enum page_cache_mode { | ||
2262 | |||
2263 | #define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) | ||
2264 | |||
2265 | -#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ | ||
2266 | - _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_ENC) | ||
2267 | #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ | ||
2268 | _PAGE_DIRTY | _PAGE_ENC) | ||
2269 | +#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER) | ||
2270 | |||
2271 | #define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC) | ||
2272 | #define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC) | ||
2273 | diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h | ||
2274 | index bdac19ab2488..da943411d3d8 100644 | ||
2275 | --- a/arch/x86/include/asm/processor.h | ||
2276 | +++ b/arch/x86/include/asm/processor.h | ||
2277 | @@ -162,9 +162,9 @@ enum cpuid_regs_idx { | ||
2278 | extern struct cpuinfo_x86 boot_cpu_data; | ||
2279 | extern struct cpuinfo_x86 new_cpu_data; | ||
2280 | |||
2281 | -extern struct tss_struct doublefault_tss; | ||
2282 | -extern __u32 cpu_caps_cleared[NCAPINTS]; | ||
2283 | -extern __u32 cpu_caps_set[NCAPINTS]; | ||
2284 | +extern struct x86_hw_tss doublefault_tss; | ||
2285 | +extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; | ||
2286 | +extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS]; | ||
2287 | |||
2288 | #ifdef CONFIG_SMP | ||
2289 | DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); | ||
2290 | @@ -252,6 +252,11 @@ static inline void load_cr3(pgd_t *pgdir) | ||
2291 | write_cr3(__sme_pa(pgdir)); | ||
2292 | } | ||
2293 | |||
2294 | +/* | ||
2295 | + * Note that while the legacy 'TSS' name comes from 'Task State Segment', | ||
2296 | + * on modern x86 CPUs the TSS also holds information important to 64-bit mode, | ||
2297 | + * unrelated to the task-switch mechanism: | ||
2298 | + */ | ||
2299 | #ifdef CONFIG_X86_32 | ||
2300 | /* This is the TSS defined by the hardware. */ | ||
2301 | struct x86_hw_tss { | ||
2302 | @@ -304,7 +309,13 @@ struct x86_hw_tss { | ||
2303 | struct x86_hw_tss { | ||
2304 | u32 reserved1; | ||
2305 | u64 sp0; | ||
2306 | + | ||
2307 | + /* | ||
2308 | + * We store cpu_current_top_of_stack in sp1 so it's always accessible. | ||
2309 | + * Linux does not use ring 1, so sp1 is not otherwise needed. | ||
2310 | + */ | ||
2311 | u64 sp1; | ||
2312 | + | ||
2313 | u64 sp2; | ||
2314 | u64 reserved2; | ||
2315 | u64 ist[7]; | ||
2316 | @@ -322,12 +333,22 @@ struct x86_hw_tss { | ||
2317 | #define IO_BITMAP_BITS 65536 | ||
2318 | #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) | ||
2319 | #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) | ||
2320 | -#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap) | ||
2321 | +#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss)) | ||
2322 | #define INVALID_IO_BITMAP_OFFSET 0x8000 | ||
2323 | |||
2324 | +struct SYSENTER_stack { | ||
2325 | + unsigned long words[64]; | ||
2326 | +}; | ||
2327 | + | ||
2328 | +struct SYSENTER_stack_page { | ||
2329 | + struct SYSENTER_stack stack; | ||
2330 | +} __aligned(PAGE_SIZE); | ||
2331 | + | ||
2332 | struct tss_struct { | ||
2333 | /* | ||
2334 | - * The hardware state: | ||
2335 | + * The fixed hardware portion. This must not cross a page boundary | ||
2336 | + * at risk of violating the SDM's advice and potentially triggering | ||
2337 | + * errata. | ||
2338 | */ | ||
2339 | struct x86_hw_tss x86_tss; | ||
2340 | |||
2341 | @@ -338,18 +359,9 @@ struct tss_struct { | ||
2342 | * be within the limit. | ||
2343 | */ | ||
2344 | unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; | ||
2345 | +} __aligned(PAGE_SIZE); | ||
2346 | |||
2347 | -#ifdef CONFIG_X86_32 | ||
2348 | - /* | ||
2349 | - * Space for the temporary SYSENTER stack. | ||
2350 | - */ | ||
2351 | - unsigned long SYSENTER_stack_canary; | ||
2352 | - unsigned long SYSENTER_stack[64]; | ||
2353 | -#endif | ||
2354 | - | ||
2355 | -} ____cacheline_aligned; | ||
2356 | - | ||
2357 | -DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); | ||
2358 | +DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); | ||
2359 | |||
2360 | /* | ||
2361 | * sizeof(unsigned long) coming from an extra "long" at the end | ||
2362 | @@ -363,6 +375,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); | ||
2363 | |||
2364 | #ifdef CONFIG_X86_32 | ||
2365 | DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); | ||
2366 | +#else | ||
2367 | +/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */ | ||
2368 | +#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1 | ||
2369 | #endif | ||
2370 | |||
2371 | /* | ||
2372 | @@ -431,7 +446,9 @@ typedef struct { | ||
2373 | struct thread_struct { | ||
2374 | /* Cached TLS descriptors: */ | ||
2375 | struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; | ||
2376 | +#ifdef CONFIG_X86_32 | ||
2377 | unsigned long sp0; | ||
2378 | +#endif | ||
2379 | unsigned long sp; | ||
2380 | #ifdef CONFIG_X86_32 | ||
2381 | unsigned long sysenter_cs; | ||
2382 | @@ -518,16 +535,9 @@ static inline void native_set_iopl_mask(unsigned mask) | ||
2383 | } | ||
2384 | |||
2385 | static inline void | ||
2386 | -native_load_sp0(struct tss_struct *tss, struct thread_struct *thread) | ||
2387 | +native_load_sp0(unsigned long sp0) | ||
2388 | { | ||
2389 | - tss->x86_tss.sp0 = thread->sp0; | ||
2390 | -#ifdef CONFIG_X86_32 | ||
2391 | - /* Only happens when SEP is enabled, no need to test "SEP"arately: */ | ||
2392 | - if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { | ||
2393 | - tss->x86_tss.ss1 = thread->sysenter_cs; | ||
2394 | - wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | ||
2395 | - } | ||
2396 | -#endif | ||
2397 | + this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); | ||
2398 | } | ||
2399 | |||
2400 | static inline void native_swapgs(void) | ||
2401 | @@ -539,12 +549,18 @@ static inline void native_swapgs(void) | ||
2402 | |||
2403 | static inline unsigned long current_top_of_stack(void) | ||
2404 | { | ||
2405 | -#ifdef CONFIG_X86_64 | ||
2406 | - return this_cpu_read_stable(cpu_tss.x86_tss.sp0); | ||
2407 | -#else | ||
2408 | - /* sp0 on x86_32 is special in and around vm86 mode. */ | ||
2409 | + /* | ||
2410 | + * We can't read directly from tss.sp0: sp0 on x86_32 is special in | ||
2411 | + * and around vm86 mode and sp0 on x86_64 is special because of the | ||
2412 | + * entry trampoline. | ||
2413 | + */ | ||
2414 | return this_cpu_read_stable(cpu_current_top_of_stack); | ||
2415 | -#endif | ||
2416 | +} | ||
2417 | + | ||
2418 | +static inline bool on_thread_stack(void) | ||
2419 | +{ | ||
2420 | + return (unsigned long)(current_top_of_stack() - | ||
2421 | + current_stack_pointer) < THREAD_SIZE; | ||
2422 | } | ||
2423 | |||
2424 | #ifdef CONFIG_PARAVIRT | ||
2425 | @@ -552,10 +568,9 @@ static inline unsigned long current_top_of_stack(void) | ||
2426 | #else | ||
2427 | #define __cpuid native_cpuid | ||
2428 | |||
2429 | -static inline void load_sp0(struct tss_struct *tss, | ||
2430 | - struct thread_struct *thread) | ||
2431 | +static inline void load_sp0(unsigned long sp0) | ||
2432 | { | ||
2433 | - native_load_sp0(tss, thread); | ||
2434 | + native_load_sp0(sp0); | ||
2435 | } | ||
2436 | |||
2437 | #define set_iopl_mask native_set_iopl_mask | ||
2438 | @@ -804,6 +819,15 @@ static inline void spin_lock_prefetch(const void *x) | ||
2439 | #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ | ||
2440 | TOP_OF_KERNEL_STACK_PADDING) | ||
2441 | |||
2442 | +#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1)) | ||
2443 | + | ||
2444 | +#define task_pt_regs(task) \ | ||
2445 | +({ \ | ||
2446 | + unsigned long __ptr = (unsigned long)task_stack_page(task); \ | ||
2447 | + __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \ | ||
2448 | + ((struct pt_regs *)__ptr) - 1; \ | ||
2449 | +}) | ||
2450 | + | ||
2451 | #ifdef CONFIG_X86_32 | ||
2452 | /* | ||
2453 | * User space process size: 3GB (default). | ||
2454 | @@ -823,23 +847,6 @@ static inline void spin_lock_prefetch(const void *x) | ||
2455 | .addr_limit = KERNEL_DS, \ | ||
2456 | } | ||
2457 | |||
2458 | -/* | ||
2459 | - * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack. | ||
2460 | - * This is necessary to guarantee that the entire "struct pt_regs" | ||
2461 | - * is accessible even if the CPU haven't stored the SS/ESP registers | ||
2462 | - * on the stack (interrupt gate does not save these registers | ||
2463 | - * when switching to the same priv ring). | ||
2464 | - * Therefore beware: accessing the ss/esp fields of the | ||
2465 | - * "struct pt_regs" is possible, but they may contain the | ||
2466 | - * completely wrong values. | ||
2467 | - */ | ||
2468 | -#define task_pt_regs(task) \ | ||
2469 | -({ \ | ||
2470 | - unsigned long __ptr = (unsigned long)task_stack_page(task); \ | ||
2471 | - __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \ | ||
2472 | - ((struct pt_regs *)__ptr) - 1; \ | ||
2473 | -}) | ||
2474 | - | ||
2475 | #define KSTK_ESP(task) (task_pt_regs(task)->sp) | ||
2476 | |||
2477 | #else | ||
2478 | @@ -873,11 +880,9 @@ static inline void spin_lock_prefetch(const void *x) | ||
2479 | #define STACK_TOP_MAX TASK_SIZE_MAX | ||
2480 | |||
2481 | #define INIT_THREAD { \ | ||
2482 | - .sp0 = TOP_OF_INIT_STACK, \ | ||
2483 | .addr_limit = KERNEL_DS, \ | ||
2484 | } | ||
2485 | |||
2486 | -#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) | ||
2487 | extern unsigned long KSTK_ESP(struct task_struct *task); | ||
2488 | |||
2489 | #endif /* CONFIG_X86_64 */ | ||
2490 | diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h | ||
2491 | index c0e3c45cf6ab..14131dd06b29 100644 | ||
2492 | --- a/arch/x86/include/asm/ptrace.h | ||
2493 | +++ b/arch/x86/include/asm/ptrace.h | ||
2494 | @@ -136,9 +136,9 @@ static inline int v8086_mode(struct pt_regs *regs) | ||
2495 | #endif | ||
2496 | } | ||
2497 | |||
2498 | -#ifdef CONFIG_X86_64 | ||
2499 | static inline bool user_64bit_mode(struct pt_regs *regs) | ||
2500 | { | ||
2501 | +#ifdef CONFIG_X86_64 | ||
2502 | #ifndef CONFIG_PARAVIRT | ||
2503 | /* | ||
2504 | * On non-paravirt systems, this is the only long mode CPL 3 | ||
2505 | @@ -149,8 +149,12 @@ static inline bool user_64bit_mode(struct pt_regs *regs) | ||
2506 | /* Headers are too twisted for this to go in paravirt.h. */ | ||
2507 | return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs; | ||
2508 | #endif | ||
2509 | +#else /* !CONFIG_X86_64 */ | ||
2510 | + return false; | ||
2511 | +#endif | ||
2512 | } | ||
2513 | |||
2514 | +#ifdef CONFIG_X86_64 | ||
2515 | #define current_user_stack_pointer() current_pt_regs()->sp | ||
2516 | #define compat_user_stack_pointer() current_pt_regs()->sp | ||
2517 | #endif | ||
2518 | diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h | ||
2519 | index d8f3a6ae9f6c..f91c365e57c3 100644 | ||
2520 | --- a/arch/x86/include/asm/rmwcc.h | ||
2521 | +++ b/arch/x86/include/asm/rmwcc.h | ||
2522 | @@ -29,7 +29,7 @@ cc_label: \ | ||
2523 | #define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \ | ||
2524 | do { \ | ||
2525 | bool c; \ | ||
2526 | - asm volatile (fullop ";" CC_SET(cc) \ | ||
2527 | + asm volatile (fullop CC_SET(cc) \ | ||
2528 | : [counter] "+m" (var), CC_OUT(cc) (c) \ | ||
2529 | : __VA_ARGS__ : clobbers); \ | ||
2530 | return c; \ | ||
2531 | diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h | ||
2532 | index 8da111b3c342..f8062bfd43a0 100644 | ||
2533 | --- a/arch/x86/include/asm/stacktrace.h | ||
2534 | +++ b/arch/x86/include/asm/stacktrace.h | ||
2535 | @@ -16,6 +16,7 @@ enum stack_type { | ||
2536 | STACK_TYPE_TASK, | ||
2537 | STACK_TYPE_IRQ, | ||
2538 | STACK_TYPE_SOFTIRQ, | ||
2539 | + STACK_TYPE_SYSENTER, | ||
2540 | STACK_TYPE_EXCEPTION, | ||
2541 | STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1, | ||
2542 | }; | ||
2543 | @@ -28,6 +29,8 @@ struct stack_info { | ||
2544 | bool in_task_stack(unsigned long *stack, struct task_struct *task, | ||
2545 | struct stack_info *info); | ||
2546 | |||
2547 | +bool in_sysenter_stack(unsigned long *stack, struct stack_info *info); | ||
2548 | + | ||
2549 | int get_stack_info(unsigned long *stack, struct task_struct *task, | ||
2550 | struct stack_info *info, unsigned long *visit_mask); | ||
2551 | |||
2552 | diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h | ||
2553 | index 899084b70412..9b6df68d8fd1 100644 | ||
2554 | --- a/arch/x86/include/asm/switch_to.h | ||
2555 | +++ b/arch/x86/include/asm/switch_to.h | ||
2556 | @@ -2,6 +2,8 @@ | ||
2557 | #ifndef _ASM_X86_SWITCH_TO_H | ||
2558 | #define _ASM_X86_SWITCH_TO_H | ||
2559 | |||
2560 | +#include <linux/sched/task_stack.h> | ||
2561 | + | ||
2562 | struct task_struct; /* one of the stranger aspects of C forward declarations */ | ||
2563 | |||
2564 | struct task_struct *__switch_to_asm(struct task_struct *prev, | ||
2565 | @@ -73,4 +75,28 @@ do { \ | ||
2566 | ((last) = __switch_to_asm((prev), (next))); \ | ||
2567 | } while (0) | ||
2568 | |||
2569 | +#ifdef CONFIG_X86_32 | ||
2570 | +static inline void refresh_sysenter_cs(struct thread_struct *thread) | ||
2571 | +{ | ||
2572 | + /* Only happens when SEP is enabled, no need to test "SEP"arately: */ | ||
2573 | + if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs)) | ||
2574 | + return; | ||
2575 | + | ||
2576 | + this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs); | ||
2577 | + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | ||
2578 | +} | ||
2579 | +#endif | ||
2580 | + | ||
2581 | +/* This is used when switching tasks or entering/exiting vm86 mode. */ | ||
2582 | +static inline void update_sp0(struct task_struct *task) | ||
2583 | +{ | ||
2584 | + /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */ | ||
2585 | +#ifdef CONFIG_X86_32 | ||
2586 | + load_sp0(task->thread.sp0); | ||
2587 | +#else | ||
2588 | + if (static_cpu_has(X86_FEATURE_XENPV)) | ||
2589 | + load_sp0(task_top_of_stack(task)); | ||
2590 | +#endif | ||
2591 | +} | ||
2592 | + | ||
2593 | #endif /* _ASM_X86_SWITCH_TO_H */ | ||
2594 | diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h | ||
2595 | index 70f425947dc5..00223333821a 100644 | ||
2596 | --- a/arch/x86/include/asm/thread_info.h | ||
2597 | +++ b/arch/x86/include/asm/thread_info.h | ||
2598 | @@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack, | ||
2599 | #else /* !__ASSEMBLY__ */ | ||
2600 | |||
2601 | #ifdef CONFIG_X86_64 | ||
2602 | -# define cpu_current_top_of_stack (cpu_tss + TSS_sp0) | ||
2603 | +# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1) | ||
2604 | #endif | ||
2605 | |||
2606 | #endif | ||
2607 | diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h | ||
2608 | index fa60398bbc3a..069c04be1507 100644 | ||
2609 | --- a/arch/x86/include/asm/trace/fpu.h | ||
2610 | +++ b/arch/x86/include/asm/trace/fpu.h | ||
2611 | @@ -34,11 +34,6 @@ DECLARE_EVENT_CLASS(x86_fpu, | ||
2612 | ) | ||
2613 | ); | ||
2614 | |||
2615 | -DEFINE_EVENT(x86_fpu, x86_fpu_state, | ||
2616 | - TP_PROTO(struct fpu *fpu), | ||
2617 | - TP_ARGS(fpu) | ||
2618 | -); | ||
2619 | - | ||
2620 | DEFINE_EVENT(x86_fpu, x86_fpu_before_save, | ||
2621 | TP_PROTO(struct fpu *fpu), | ||
2622 | TP_ARGS(fpu) | ||
2623 | @@ -74,11 +69,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_activate_state, | ||
2624 | TP_ARGS(fpu) | ||
2625 | ); | ||
2626 | |||
2627 | -DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state, | ||
2628 | - TP_PROTO(struct fpu *fpu), | ||
2629 | - TP_ARGS(fpu) | ||
2630 | -); | ||
2631 | - | ||
2632 | DEFINE_EVENT(x86_fpu, x86_fpu_init_state, | ||
2633 | TP_PROTO(struct fpu *fpu), | ||
2634 | TP_ARGS(fpu) | ||
2635 | diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h | ||
2636 | index b0cced97a6ce..31051f35cbb7 100644 | ||
2637 | --- a/arch/x86/include/asm/traps.h | ||
2638 | +++ b/arch/x86/include/asm/traps.h | ||
2639 | @@ -38,9 +38,9 @@ asmlinkage void simd_coprocessor_error(void); | ||
2640 | |||
2641 | #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) | ||
2642 | asmlinkage void xen_divide_error(void); | ||
2643 | +asmlinkage void xen_xennmi(void); | ||
2644 | asmlinkage void xen_xendebug(void); | ||
2645 | asmlinkage void xen_xenint3(void); | ||
2646 | -asmlinkage void xen_nmi(void); | ||
2647 | asmlinkage void xen_overflow(void); | ||
2648 | asmlinkage void xen_bounds(void); | ||
2649 | asmlinkage void xen_invalid_op(void); | ||
2650 | @@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long); | ||
2651 | dotraplinkage void do_stack_segment(struct pt_regs *, long); | ||
2652 | #ifdef CONFIG_X86_64 | ||
2653 | dotraplinkage void do_double_fault(struct pt_regs *, long); | ||
2654 | -asmlinkage struct pt_regs *sync_regs(struct pt_regs *); | ||
2655 | #endif | ||
2656 | dotraplinkage void do_general_protection(struct pt_regs *, long); | ||
2657 | dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); | ||
2658 | @@ -145,4 +144,22 @@ enum { | ||
2659 | X86_TRAP_IRET = 32, /* 32, IRET Exception */ | ||
2660 | }; | ||
2661 | |||
2662 | +/* | ||
2663 | + * Page fault error code bits: | ||
2664 | + * | ||
2665 | + * bit 0 == 0: no page found 1: protection fault | ||
2666 | + * bit 1 == 0: read access 1: write access | ||
2667 | + * bit 2 == 0: kernel-mode access 1: user-mode access | ||
2668 | + * bit 3 == 1: use of reserved bit detected | ||
2669 | + * bit 4 == 1: fault was an instruction fetch | ||
2670 | + * bit 5 == 1: protection keys block access | ||
2671 | + */ | ||
2672 | +enum x86_pf_error_code { | ||
2673 | + X86_PF_PROT = 1 << 0, | ||
2674 | + X86_PF_WRITE = 1 << 1, | ||
2675 | + X86_PF_USER = 1 << 2, | ||
2676 | + X86_PF_RSVD = 1 << 3, | ||
2677 | + X86_PF_INSTR = 1 << 4, | ||
2678 | + X86_PF_PK = 1 << 5, | ||
2679 | +}; | ||
2680 | #endif /* _ASM_X86_TRAPS_H */ | ||
2681 | diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h | ||
2682 | index 87adc0d38c4a..c1688c2d0a12 100644 | ||
2683 | --- a/arch/x86/include/asm/unwind.h | ||
2684 | +++ b/arch/x86/include/asm/unwind.h | ||
2685 | @@ -7,17 +7,20 @@ | ||
2686 | #include <asm/ptrace.h> | ||
2687 | #include <asm/stacktrace.h> | ||
2688 | |||
2689 | +#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip)) | ||
2690 | +#define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET) | ||
2691 | + | ||
2692 | struct unwind_state { | ||
2693 | struct stack_info stack_info; | ||
2694 | unsigned long stack_mask; | ||
2695 | struct task_struct *task; | ||
2696 | int graph_idx; | ||
2697 | bool error; | ||
2698 | -#if defined(CONFIG_ORC_UNWINDER) | ||
2699 | +#if defined(CONFIG_UNWINDER_ORC) | ||
2700 | bool signal, full_regs; | ||
2701 | unsigned long sp, bp, ip; | ||
2702 | struct pt_regs *regs; | ||
2703 | -#elif defined(CONFIG_FRAME_POINTER_UNWINDER) | ||
2704 | +#elif defined(CONFIG_UNWINDER_FRAME_POINTER) | ||
2705 | bool got_irq; | ||
2706 | unsigned long *bp, *orig_sp, ip; | ||
2707 | struct pt_regs *regs; | ||
2708 | @@ -51,7 +54,11 @@ void unwind_start(struct unwind_state *state, struct task_struct *task, | ||
2709 | __unwind_start(state, task, regs, first_frame); | ||
2710 | } | ||
2711 | |||
2712 | -#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER) | ||
2713 | +#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) | ||
2714 | +/* | ||
2715 | + * WARNING: The entire pt_regs may not be safe to dereference. In some cases, | ||
2716 | + * only the iret frame registers are accessible. Use with caution! | ||
2717 | + */ | ||
2718 | static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) | ||
2719 | { | ||
2720 | if (unwind_done(state)) | ||
2721 | @@ -66,7 +73,7 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state) | ||
2722 | } | ||
2723 | #endif | ||
2724 | |||
2725 | -#ifdef CONFIG_ORC_UNWINDER | ||
2726 | +#ifdef CONFIG_UNWINDER_ORC | ||
2727 | void unwind_init(void); | ||
2728 | void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, | ||
2729 | void *orc, size_t orc_size); | ||
2730 | diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h | ||
2731 | index 8a1ebf9540dd..ad15a0fda917 100644 | ||
2732 | --- a/arch/x86/include/asm/x86_init.h | ||
2733 | +++ b/arch/x86/include/asm/x86_init.h | ||
2734 | @@ -114,6 +114,18 @@ struct x86_init_pci { | ||
2735 | void (*fixup_irqs)(void); | ||
2736 | }; | ||
2737 | |||
2738 | +/** | ||
2739 | + * struct x86_hyper_init - x86 hypervisor init functions | ||
2740 | + * @init_platform: platform setup | ||
2741 | + * @x2apic_available: X2APIC detection | ||
2742 | + * @init_mem_mapping: setup early mappings during init_mem_mapping() | ||
2743 | + */ | ||
2744 | +struct x86_hyper_init { | ||
2745 | + void (*init_platform)(void); | ||
2746 | + bool (*x2apic_available)(void); | ||
2747 | + void (*init_mem_mapping)(void); | ||
2748 | +}; | ||
2749 | + | ||
2750 | /** | ||
2751 | * struct x86_init_ops - functions for platform specific setup | ||
2752 | * | ||
2753 | @@ -127,6 +139,7 @@ struct x86_init_ops { | ||
2754 | struct x86_init_timers timers; | ||
2755 | struct x86_init_iommu iommu; | ||
2756 | struct x86_init_pci pci; | ||
2757 | + struct x86_hyper_init hyper; | ||
2758 | }; | ||
2759 | |||
2760 | /** | ||
2761 | @@ -199,6 +212,15 @@ struct x86_legacy_features { | ||
2762 | struct x86_legacy_devices devices; | ||
2763 | }; | ||
2764 | |||
2765 | +/** | ||
2766 | + * struct x86_hyper_runtime - x86 hypervisor specific runtime callbacks | ||
2767 | + * | ||
2768 | + * @pin_vcpu: pin current vcpu to specified physical cpu (run rarely) | ||
2769 | + */ | ||
2770 | +struct x86_hyper_runtime { | ||
2771 | + void (*pin_vcpu)(int cpu); | ||
2772 | +}; | ||
2773 | + | ||
2774 | /** | ||
2775 | * struct x86_platform_ops - platform specific runtime functions | ||
2776 | * @calibrate_cpu: calibrate CPU | ||
2777 | @@ -218,6 +240,7 @@ struct x86_legacy_features { | ||
2778 | * possible in x86_early_init_platform_quirks() by | ||
2779 | * only using the current x86_hardware_subarch | ||
2780 | * semantics. | ||
2781 | + * @hyper: x86 hypervisor specific runtime callbacks | ||
2782 | */ | ||
2783 | struct x86_platform_ops { | ||
2784 | unsigned long (*calibrate_cpu)(void); | ||
2785 | @@ -233,6 +256,7 @@ struct x86_platform_ops { | ||
2786 | void (*apic_post_init)(void); | ||
2787 | struct x86_legacy_features legacy; | ||
2788 | void (*set_legacy_features)(void); | ||
2789 | + struct x86_hyper_runtime hyper; | ||
2790 | }; | ||
2791 | |||
2792 | struct pci_dev; | ||
2793 | diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h | ||
2794 | index 6f3355399665..53b4ca55ebb6 100644 | ||
2795 | --- a/arch/x86/include/uapi/asm/processor-flags.h | ||
2796 | +++ b/arch/x86/include/uapi/asm/processor-flags.h | ||
2797 | @@ -152,5 +152,8 @@ | ||
2798 | #define CX86_ARR_BASE 0xc4 | ||
2799 | #define CX86_RCR_BASE 0xdc | ||
2800 | |||
2801 | +#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ | ||
2802 | + X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ | ||
2803 | + X86_CR0_PG) | ||
2804 | |||
2805 | #endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */ | ||
2806 | diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile | ||
2807 | index 5f70044340ff..295abaa58add 100644 | ||
2808 | --- a/arch/x86/kernel/Makefile | ||
2809 | +++ b/arch/x86/kernel/Makefile | ||
2810 | @@ -25,9 +25,9 @@ endif | ||
2811 | KASAN_SANITIZE_head$(BITS).o := n | ||
2812 | KASAN_SANITIZE_dumpstack.o := n | ||
2813 | KASAN_SANITIZE_dumpstack_$(BITS).o := n | ||
2814 | -KASAN_SANITIZE_stacktrace.o := n | ||
2815 | +KASAN_SANITIZE_stacktrace.o := n | ||
2816 | +KASAN_SANITIZE_paravirt.o := n | ||
2817 | |||
2818 | -OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y | ||
2819 | OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y | ||
2820 | OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y | ||
2821 | OBJECT_FILES_NON_STANDARD_test_nx.o := y | ||
2822 | @@ -128,9 +128,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o | ||
2823 | obj-$(CONFIG_TRACING) += tracepoint.o | ||
2824 | obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o | ||
2825 | |||
2826 | -obj-$(CONFIG_ORC_UNWINDER) += unwind_orc.o | ||
2827 | -obj-$(CONFIG_FRAME_POINTER_UNWINDER) += unwind_frame.o | ||
2828 | -obj-$(CONFIG_GUESS_UNWINDER) += unwind_guess.o | ||
2829 | +obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o | ||
2830 | +obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o | ||
2831 | +obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o | ||
2832 | |||
2833 | ### | ||
2834 | # 64 bit specific files | ||
2835 | diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c | ||
2836 | index ff891772c9f8..89c7c8569e5e 100644 | ||
2837 | --- a/arch/x86/kernel/apic/apic.c | ||
2838 | +++ b/arch/x86/kernel/apic/apic.c | ||
2839 | @@ -1645,7 +1645,7 @@ static __init void try_to_enable_x2apic(int remap_mode) | ||
2840 | * under KVM | ||
2841 | */ | ||
2842 | if (max_physical_apicid > 255 || | ||
2843 | - !hypervisor_x2apic_available()) { | ||
2844 | + !x86_init.hyper.x2apic_available()) { | ||
2845 | pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n"); | ||
2846 | x2apic_disable(); | ||
2847 | return; | ||
2848 | diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c | ||
2849 | index 0d57bb9079c9..c0b694810ff4 100644 | ||
2850 | --- a/arch/x86/kernel/apic/x2apic_uv_x.c | ||
2851 | +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | ||
2852 | @@ -920,9 +920,8 @@ static __init void uv_rtc_init(void) | ||
2853 | /* | ||
2854 | * percpu heartbeat timer | ||
2855 | */ | ||
2856 | -static void uv_heartbeat(unsigned long ignored) | ||
2857 | +static void uv_heartbeat(struct timer_list *timer) | ||
2858 | { | ||
2859 | - struct timer_list *timer = &uv_scir_info->timer; | ||
2860 | unsigned char bits = uv_scir_info->state; | ||
2861 | |||
2862 | /* Flip heartbeat bit: */ | ||
2863 | @@ -947,7 +946,7 @@ static int uv_heartbeat_enable(unsigned int cpu) | ||
2864 | struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer; | ||
2865 | |||
2866 | uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); | ||
2867 | - setup_pinned_timer(timer, uv_heartbeat, cpu); | ||
2868 | + timer_setup(timer, uv_heartbeat, TIMER_PINNED); | ||
2869 | timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; | ||
2870 | add_timer_on(timer, cpu); | ||
2871 | uv_cpu_scir_info(cpu)->enabled = 1; | ||
2872 | diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c | ||
2873 | index 8ea78275480d..cd360a5e0dca 100644 | ||
2874 | --- a/arch/x86/kernel/asm-offsets.c | ||
2875 | +++ b/arch/x86/kernel/asm-offsets.c | ||
2876 | @@ -93,4 +93,10 @@ void common(void) { | ||
2877 | |||
2878 | BLANK(); | ||
2879 | DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); | ||
2880 | + | ||
2881 | + /* Layout info for cpu_entry_area */ | ||
2882 | + OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); | ||
2883 | + OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline); | ||
2884 | + OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page); | ||
2885 | + DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack)); | ||
2886 | } | ||
2887 | diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c | ||
2888 | index dedf428b20b6..7d20d9c0b3d6 100644 | ||
2889 | --- a/arch/x86/kernel/asm-offsets_32.c | ||
2890 | +++ b/arch/x86/kernel/asm-offsets_32.c | ||
2891 | @@ -47,13 +47,8 @@ void foo(void) | ||
2892 | BLANK(); | ||
2893 | |||
2894 | /* Offset from the sysenter stack to tss.sp0 */ | ||
2895 | - DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - | ||
2896 | - offsetofend(struct tss_struct, SYSENTER_stack)); | ||
2897 | - | ||
2898 | - /* Offset from cpu_tss to SYSENTER_stack */ | ||
2899 | - OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack); | ||
2900 | - /* Size of SYSENTER_stack */ | ||
2901 | - DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); | ||
2902 | + DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) - | ||
2903 | + offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack)); | ||
2904 | |||
2905 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
2906 | BLANK(); | ||
2907 | diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c | ||
2908 | index 630212fa9b9d..bf51e51d808d 100644 | ||
2909 | --- a/arch/x86/kernel/asm-offsets_64.c | ||
2910 | +++ b/arch/x86/kernel/asm-offsets_64.c | ||
2911 | @@ -23,6 +23,9 @@ int main(void) | ||
2912 | #ifdef CONFIG_PARAVIRT | ||
2913 | OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); | ||
2914 | OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); | ||
2915 | +#ifdef CONFIG_DEBUG_ENTRY | ||
2916 | + OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl); | ||
2917 | +#endif | ||
2918 | BLANK(); | ||
2919 | #endif | ||
2920 | |||
2921 | @@ -63,6 +66,7 @@ int main(void) | ||
2922 | |||
2923 | OFFSET(TSS_ist, tss_struct, x86_tss.ist); | ||
2924 | OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); | ||
2925 | + OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); | ||
2926 | BLANK(); | ||
2927 | |||
2928 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
2929 | diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile | ||
2930 | index c60922a66385..90cb82dbba57 100644 | ||
2931 | --- a/arch/x86/kernel/cpu/Makefile | ||
2932 | +++ b/arch/x86/kernel/cpu/Makefile | ||
2933 | @@ -23,6 +23,7 @@ obj-y += rdrand.o | ||
2934 | obj-y += match.o | ||
2935 | obj-y += bugs.o | ||
2936 | obj-$(CONFIG_CPU_FREQ) += aperfmperf.o | ||
2937 | +obj-y += cpuid-deps.o | ||
2938 | |||
2939 | obj-$(CONFIG_PROC_FS) += proc.o | ||
2940 | obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o | ||
2941 | diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c | ||
2942 | index d58184b7cd44..bcb75dc97d44 100644 | ||
2943 | --- a/arch/x86/kernel/cpu/amd.c | ||
2944 | +++ b/arch/x86/kernel/cpu/amd.c | ||
2945 | @@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 *c) | ||
2946 | case 0x17: init_amd_zn(c); break; | ||
2947 | } | ||
2948 | |||
2949 | - /* Enable workaround for FXSAVE leak */ | ||
2950 | - if (c->x86 >= 6) | ||
2951 | + /* | ||
2952 | + * Enable workaround for FXSAVE leak on CPUs | ||
2953 | + * without a XSaveErPtr feature | ||
2954 | + */ | ||
2955 | + if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR))) | ||
2956 | set_cpu_bug(c, X86_BUG_FXSAVE_LEAK); | ||
2957 | |||
2958 | cpu_detect_cache_sizes(c); | ||
2959 | diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c | ||
2960 | index c9176bae7fd8..034900623adf 100644 | ||
2961 | --- a/arch/x86/kernel/cpu/common.c | ||
2962 | +++ b/arch/x86/kernel/cpu/common.c | ||
2963 | @@ -452,8 +452,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c) | ||
2964 | return NULL; /* Not found */ | ||
2965 | } | ||
2966 | |||
2967 | -__u32 cpu_caps_cleared[NCAPINTS]; | ||
2968 | -__u32 cpu_caps_set[NCAPINTS]; | ||
2969 | +__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS]; | ||
2970 | +__u32 cpu_caps_set[NCAPINTS + NBUGINTS]; | ||
2971 | |||
2972 | void load_percpu_segment(int cpu) | ||
2973 | { | ||
2974 | @@ -466,27 +466,116 @@ void load_percpu_segment(int cpu) | ||
2975 | load_stack_canary_segment(); | ||
2976 | } | ||
2977 | |||
2978 | -/* Setup the fixmap mapping only once per-processor */ | ||
2979 | -static inline void setup_fixmap_gdt(int cpu) | ||
2980 | +#ifdef CONFIG_X86_32 | ||
2981 | +/* The 32-bit entry code needs to find cpu_entry_area. */ | ||
2982 | +DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); | ||
2983 | +#endif | ||
2984 | + | ||
2985 | +#ifdef CONFIG_X86_64 | ||
2986 | +/* | ||
2987 | + * Special IST stacks which the CPU switches to when it calls | ||
2988 | + * an IST-marked descriptor entry. Up to 7 stacks (hardware | ||
2989 | + * limit), all of them are 4K, except the debug stack which | ||
2990 | + * is 8K. | ||
2991 | + */ | ||
2992 | +static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { | ||
2993 | + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, | ||
2994 | + [DEBUG_STACK - 1] = DEBUG_STKSZ | ||
2995 | +}; | ||
2996 | + | ||
2997 | +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks | ||
2998 | + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); | ||
2999 | +#endif | ||
3000 | + | ||
3001 | +static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page, | ||
3002 | + SYSENTER_stack_storage); | ||
3003 | + | ||
3004 | +static void __init | ||
3005 | +set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) | ||
3006 | +{ | ||
3007 | + for ( ; pages; pages--, idx--, ptr += PAGE_SIZE) | ||
3008 | + __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot); | ||
3009 | +} | ||
3010 | + | ||
3011 | +/* Setup the fixmap mappings only once per-processor */ | ||
3012 | +static void __init setup_cpu_entry_area(int cpu) | ||
3013 | { | ||
3014 | #ifdef CONFIG_X86_64 | ||
3015 | - /* On 64-bit systems, we use a read-only fixmap GDT. */ | ||
3016 | - pgprot_t prot = PAGE_KERNEL_RO; | ||
3017 | + extern char _entry_trampoline[]; | ||
3018 | + | ||
3019 | + /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ | ||
3020 | + pgprot_t gdt_prot = PAGE_KERNEL_RO; | ||
3021 | + pgprot_t tss_prot = PAGE_KERNEL_RO; | ||
3022 | #else | ||
3023 | /* | ||
3024 | * On native 32-bit systems, the GDT cannot be read-only because | ||
3025 | * our double fault handler uses a task gate, and entering through | ||
3026 | - * a task gate needs to change an available TSS to busy. If the GDT | ||
3027 | - * is read-only, that will triple fault. | ||
3028 | + * a task gate needs to change an available TSS to busy. If the | ||
3029 | + * GDT is read-only, that will triple fault. The TSS cannot be | ||
3030 | + * read-only because the CPU writes to it on task switches. | ||
3031 | * | ||
3032 | - * On Xen PV, the GDT must be read-only because the hypervisor requires | ||
3033 | - * it. | ||
3034 | + * On Xen PV, the GDT must be read-only because the hypervisor | ||
3035 | + * requires it. | ||
3036 | */ | ||
3037 | - pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ? | ||
3038 | + pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ? | ||
3039 | PAGE_KERNEL_RO : PAGE_KERNEL; | ||
3040 | + pgprot_t tss_prot = PAGE_KERNEL; | ||
3041 | +#endif | ||
3042 | + | ||
3043 | + __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); | ||
3044 | + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page), | ||
3045 | + per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1, | ||
3046 | + PAGE_KERNEL); | ||
3047 | + | ||
3048 | + /* | ||
3049 | + * The Intel SDM says (Volume 3, 7.2.1): | ||
3050 | + * | ||
3051 | + * Avoid placing a page boundary in the part of the TSS that the | ||
3052 | + * processor reads during a task switch (the first 104 bytes). The | ||
3053 | + * processor may not correctly perform address translations if a | ||
3054 | + * boundary occurs in this area. During a task switch, the processor | ||
3055 | + * reads and writes into the first 104 bytes of each TSS (using | ||
3056 | + * contiguous physical addresses beginning with the physical address | ||
3057 | + * of the first byte of the TSS). So, after TSS access begins, if | ||
3058 | + * part of the 104 bytes is not physically contiguous, the processor | ||
3059 | + * will access incorrect information without generating a page-fault | ||
3060 | + * exception. | ||
3061 | + * | ||
3062 | + * There are also a lot of errata involving the TSS spanning a page | ||
3063 | + * boundary. Assert that we're not doing that. | ||
3064 | + */ | ||
3065 | + BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ | ||
3066 | + offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); | ||
3067 | + BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); | ||
3068 | + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss), | ||
3069 | + &per_cpu(cpu_tss_rw, cpu), | ||
3070 | + sizeof(struct tss_struct) / PAGE_SIZE, | ||
3071 | + tss_prot); | ||
3072 | + | ||
3073 | +#ifdef CONFIG_X86_32 | ||
3074 | + per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); | ||
3075 | #endif | ||
3076 | |||
3077 | - __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot); | ||
3078 | +#ifdef CONFIG_X86_64 | ||
3079 | + BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); | ||
3080 | + BUILD_BUG_ON(sizeof(exception_stacks) != | ||
3081 | + sizeof(((struct cpu_entry_area *)0)->exception_stacks)); | ||
3082 | + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks), | ||
3083 | + &per_cpu(exception_stacks, cpu), | ||
3084 | + sizeof(exception_stacks) / PAGE_SIZE, | ||
3085 | + PAGE_KERNEL); | ||
3086 | + | ||
3087 | + __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline), | ||
3088 | + __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); | ||
3089 | +#endif | ||
3090 | +} | ||
3091 | + | ||
3092 | +void __init setup_cpu_entry_areas(void) | ||
3093 | +{ | ||
3094 | + unsigned int cpu; | ||
3095 | + | ||
3096 | + for_each_possible_cpu(cpu) | ||
3097 | + setup_cpu_entry_area(cpu); | ||
3098 | } | ||
3099 | |||
3100 | /* Load the original GDT from the per-cpu structure */ | ||
3101 | @@ -723,7 +812,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c) | ||
3102 | { | ||
3103 | int i; | ||
3104 | |||
3105 | - for (i = 0; i < NCAPINTS; i++) { | ||
3106 | + for (i = 0; i < NCAPINTS + NBUGINTS; i++) { | ||
3107 | c->x86_capability[i] &= ~cpu_caps_cleared[i]; | ||
3108 | c->x86_capability[i] |= cpu_caps_set[i]; | ||
3109 | } | ||
3110 | @@ -1225,7 +1314,7 @@ void enable_sep_cpu(void) | ||
3111 | return; | ||
3112 | |||
3113 | cpu = get_cpu(); | ||
3114 | - tss = &per_cpu(cpu_tss, cpu); | ||
3115 | + tss = &per_cpu(cpu_tss_rw, cpu); | ||
3116 | |||
3117 | /* | ||
3118 | * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- | ||
3119 | @@ -1234,11 +1323,7 @@ void enable_sep_cpu(void) | ||
3120 | |||
3121 | tss->x86_tss.ss1 = __KERNEL_CS; | ||
3122 | wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); | ||
3123 | - | ||
3124 | - wrmsr(MSR_IA32_SYSENTER_ESP, | ||
3125 | - (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack), | ||
3126 | - 0); | ||
3127 | - | ||
3128 | + wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0); | ||
3129 | wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); | ||
3130 | |||
3131 | put_cpu(); | ||
3132 | @@ -1301,18 +1386,16 @@ void print_cpu_info(struct cpuinfo_x86 *c) | ||
3133 | pr_cont(")\n"); | ||
3134 | } | ||
3135 | |||
3136 | -static __init int setup_disablecpuid(char *arg) | ||
3137 | +/* | ||
3138 | + * clearcpuid= was already parsed in fpu__init_parse_early_param. | ||
3139 | + * But we need to keep a dummy __setup around otherwise it would | ||
3140 | + * show up as an environment variable for init. | ||
3141 | + */ | ||
3142 | +static __init int setup_clearcpuid(char *arg) | ||
3143 | { | ||
3144 | - int bit; | ||
3145 | - | ||
3146 | - if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32) | ||
3147 | - setup_clear_cpu_cap(bit); | ||
3148 | - else | ||
3149 | - return 0; | ||
3150 | - | ||
3151 | return 1; | ||
3152 | } | ||
3153 | -__setup("clearcpuid=", setup_disablecpuid); | ||
3154 | +__setup("clearcpuid=", setup_clearcpuid); | ||
3155 | |||
3156 | #ifdef CONFIG_X86_64 | ||
3157 | DEFINE_PER_CPU_FIRST(union irq_stack_union, | ||
3158 | @@ -1334,25 +1417,19 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; | ||
3159 | DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; | ||
3160 | EXPORT_PER_CPU_SYMBOL(__preempt_count); | ||
3161 | |||
3162 | -/* | ||
3163 | - * Special IST stacks which the CPU switches to when it calls | ||
3164 | - * an IST-marked descriptor entry. Up to 7 stacks (hardware | ||
3165 | - * limit), all of them are 4K, except the debug stack which | ||
3166 | - * is 8K. | ||
3167 | - */ | ||
3168 | -static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { | ||
3169 | - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, | ||
3170 | - [DEBUG_STACK - 1] = DEBUG_STKSZ | ||
3171 | -}; | ||
3172 | - | ||
3173 | -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks | ||
3174 | - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); | ||
3175 | - | ||
3176 | /* May not be marked __init: used by software suspend */ | ||
3177 | void syscall_init(void) | ||
3178 | { | ||
3179 | + extern char _entry_trampoline[]; | ||
3180 | + extern char entry_SYSCALL_64_trampoline[]; | ||
3181 | + | ||
3182 | + int cpu = smp_processor_id(); | ||
3183 | + unsigned long SYSCALL64_entry_trampoline = | ||
3184 | + (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline + | ||
3185 | + (entry_SYSCALL_64_trampoline - _entry_trampoline); | ||
3186 | + | ||
3187 | wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); | ||
3188 | - wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); | ||
3189 | + wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline); | ||
3190 | |||
3191 | #ifdef CONFIG_IA32_EMULATION | ||
3192 | wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); | ||
3193 | @@ -1363,7 +1440,7 @@ void syscall_init(void) | ||
3194 | * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). | ||
3195 | */ | ||
3196 | wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); | ||
3197 | - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); | ||
3198 | + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1)); | ||
3199 | wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); | ||
3200 | #else | ||
3201 | wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); | ||
3202 | @@ -1507,7 +1584,7 @@ void cpu_init(void) | ||
3203 | if (cpu) | ||
3204 | load_ucode_ap(); | ||
3205 | |||
3206 | - t = &per_cpu(cpu_tss, cpu); | ||
3207 | + t = &per_cpu(cpu_tss_rw, cpu); | ||
3208 | oist = &per_cpu(orig_ist, cpu); | ||
3209 | |||
3210 | #ifdef CONFIG_NUMA | ||
3211 | @@ -1546,7 +1623,7 @@ void cpu_init(void) | ||
3212 | * set up and load the per-CPU TSS | ||
3213 | */ | ||
3214 | if (!oist->ist[0]) { | ||
3215 | - char *estacks = per_cpu(exception_stacks, cpu); | ||
3216 | + char *estacks = get_cpu_entry_area(cpu)->exception_stacks; | ||
3217 | |||
3218 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { | ||
3219 | estacks += exception_stack_sizes[v]; | ||
3220 | @@ -1557,7 +1634,7 @@ void cpu_init(void) | ||
3221 | } | ||
3222 | } | ||
3223 | |||
3224 | - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); | ||
3225 | + t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; | ||
3226 | |||
3227 | /* | ||
3228 | * <= is required because the CPU will access up to | ||
3229 | @@ -1572,9 +1649,14 @@ void cpu_init(void) | ||
3230 | initialize_tlbstate_and_flush(); | ||
3231 | enter_lazy_tlb(&init_mm, me); | ||
3232 | |||
3233 | - load_sp0(t, ¤t->thread); | ||
3234 | - set_tss_desc(cpu, t); | ||
3235 | + /* | ||
3236 | + * Initialize the TSS. sp0 points to the entry trampoline stack | ||
3237 | + * regardless of what task is running. | ||
3238 | + */ | ||
3239 | + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); | ||
3240 | load_TR_desc(); | ||
3241 | + load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1)); | ||
3242 | + | ||
3243 | load_mm_ldt(&init_mm); | ||
3244 | |||
3245 | clear_all_debug_regs(); | ||
3246 | @@ -1585,7 +1667,6 @@ void cpu_init(void) | ||
3247 | if (is_uv_system()) | ||
3248 | uv_cpu_init(); | ||
3249 | |||
3250 | - setup_fixmap_gdt(cpu); | ||
3251 | load_fixmap_gdt(cpu); | ||
3252 | } | ||
3253 | |||
3254 | @@ -1595,8 +1676,7 @@ void cpu_init(void) | ||
3255 | { | ||
3256 | int cpu = smp_processor_id(); | ||
3257 | struct task_struct *curr = current; | ||
3258 | - struct tss_struct *t = &per_cpu(cpu_tss, cpu); | ||
3259 | - struct thread_struct *thread = &curr->thread; | ||
3260 | + struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu); | ||
3261 | |||
3262 | wait_for_master_cpu(cpu); | ||
3263 | |||
3264 | @@ -1627,12 +1707,16 @@ void cpu_init(void) | ||
3265 | initialize_tlbstate_and_flush(); | ||
3266 | enter_lazy_tlb(&init_mm, curr); | ||
3267 | |||
3268 | - load_sp0(t, thread); | ||
3269 | - set_tss_desc(cpu, t); | ||
3270 | + /* | ||
3271 | + * Initialize the TSS. Don't bother initializing sp0, as the initial | ||
3272 | + * task never enters user mode. | ||
3273 | + */ | ||
3274 | + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); | ||
3275 | load_TR_desc(); | ||
3276 | + | ||
3277 | load_mm_ldt(&init_mm); | ||
3278 | |||
3279 | - t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); | ||
3280 | + t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; | ||
3281 | |||
3282 | #ifdef CONFIG_DOUBLEFAULT | ||
3283 | /* Set up doublefault TSS pointer in the GDT */ | ||
3284 | @@ -1644,7 +1728,6 @@ void cpu_init(void) | ||
3285 | |||
3286 | fpu__init_cpu(); | ||
3287 | |||
3288 | - setup_fixmap_gdt(cpu); | ||
3289 | load_fixmap_gdt(cpu); | ||
3290 | } | ||
3291 | #endif | ||
3292 | diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c | ||
3293 | new file mode 100644 | ||
3294 | index 000000000000..904b0a3c4e53 | ||
3295 | --- /dev/null | ||
3296 | +++ b/arch/x86/kernel/cpu/cpuid-deps.c | ||
3297 | @@ -0,0 +1,121 @@ | ||
3298 | +/* Declare dependencies between CPUIDs */ | ||
3299 | +#include <linux/kernel.h> | ||
3300 | +#include <linux/init.h> | ||
3301 | +#include <linux/module.h> | ||
3302 | +#include <asm/cpufeature.h> | ||
3303 | + | ||
3304 | +struct cpuid_dep { | ||
3305 | + unsigned int feature; | ||
3306 | + unsigned int depends; | ||
3307 | +}; | ||
3308 | + | ||
3309 | +/* | ||
3310 | + * Table of CPUID features that depend on others. | ||
3311 | + * | ||
3312 | + * This only includes dependencies that can be usefully disabled, not | ||
3313 | + * features part of the base set (like FPU). | ||
3314 | + * | ||
3315 | + * Note this all is not __init / __initdata because it can be | ||
3316 | + * called from cpu hotplug. It shouldn't do anything in this case, | ||
3317 | + * but it's difficult to tell that to the init reference checker. | ||
3318 | + */ | ||
3319 | +const static struct cpuid_dep cpuid_deps[] = { | ||
3320 | + { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE }, | ||
3321 | + { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE }, | ||
3322 | + { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE }, | ||
3323 | + { X86_FEATURE_AVX, X86_FEATURE_XSAVE }, | ||
3324 | + { X86_FEATURE_PKU, X86_FEATURE_XSAVE }, | ||
3325 | + { X86_FEATURE_MPX, X86_FEATURE_XSAVE }, | ||
3326 | + { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE }, | ||
3327 | + { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR }, | ||
3328 | + { X86_FEATURE_XMM, X86_FEATURE_FXSR }, | ||
3329 | + { X86_FEATURE_XMM2, X86_FEATURE_XMM }, | ||
3330 | + { X86_FEATURE_XMM3, X86_FEATURE_XMM2 }, | ||
3331 | + { X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 }, | ||
3332 | + { X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 }, | ||
3333 | + { X86_FEATURE_XMM3, X86_FEATURE_XMM2 }, | ||
3334 | + { X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 }, | ||
3335 | + { X86_FEATURE_SSSE3, X86_FEATURE_XMM2, }, | ||
3336 | + { X86_FEATURE_F16C, X86_FEATURE_XMM2, }, | ||
3337 | + { X86_FEATURE_AES, X86_FEATURE_XMM2 }, | ||
3338 | + { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 }, | ||
3339 | + { X86_FEATURE_FMA, X86_FEATURE_AVX }, | ||
3340 | + { X86_FEATURE_AVX2, X86_FEATURE_AVX, }, | ||
3341 | + { X86_FEATURE_AVX512F, X86_FEATURE_AVX, }, | ||
3342 | + { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F }, | ||
3343 | + { X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F }, | ||
3344 | + { X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F }, | ||
3345 | + { X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F }, | ||
3346 | + { X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F }, | ||
3347 | + { X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F }, | ||
3348 | + { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F }, | ||
3349 | + { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F }, | ||
3350 | + { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL }, | ||
3351 | + { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL }, | ||
3352 | + { X86_FEATURE_VAES, X86_FEATURE_AVX512VL }, | ||
3353 | + { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL }, | ||
3354 | + { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL }, | ||
3355 | + { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL }, | ||
3356 | + { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, | ||
3357 | + { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F }, | ||
3358 | + { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F }, | ||
3359 | + {} | ||
3360 | +}; | ||
3361 | + | ||
3362 | +static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature) | ||
3363 | +{ | ||
3364 | + /* | ||
3365 | + * Note: This could use the non atomic __*_bit() variants, but the | ||
3366 | + * rest of the cpufeature code uses atomics as well, so keep it for | ||
3367 | + * consistency. Cleanup all of it separately. | ||
3368 | + */ | ||
3369 | + if (!c) { | ||
3370 | + clear_cpu_cap(&boot_cpu_data, feature); | ||
3371 | + set_bit(feature, (unsigned long *)cpu_caps_cleared); | ||
3372 | + } else { | ||
3373 | + clear_bit(feature, (unsigned long *)c->x86_capability); | ||
3374 | + } | ||
3375 | +} | ||
3376 | + | ||
3377 | +/* Take the capabilities and the BUG bits into account */ | ||
3378 | +#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8) | ||
3379 | + | ||
3380 | +static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature) | ||
3381 | +{ | ||
3382 | + DECLARE_BITMAP(disable, MAX_FEATURE_BITS); | ||
3383 | + const struct cpuid_dep *d; | ||
3384 | + bool changed; | ||
3385 | + | ||
3386 | + if (WARN_ON(feature >= MAX_FEATURE_BITS)) | ||
3387 | + return; | ||
3388 | + | ||
3389 | + clear_feature(c, feature); | ||
3390 | + | ||
3391 | + /* Collect all features to disable, handling dependencies */ | ||
3392 | + memset(disable, 0, sizeof(disable)); | ||
3393 | + __set_bit(feature, disable); | ||
3394 | + | ||
3395 | + /* Loop until we get a stable state. */ | ||
3396 | + do { | ||
3397 | + changed = false; | ||
3398 | + for (d = cpuid_deps; d->feature; d++) { | ||
3399 | + if (!test_bit(d->depends, disable)) | ||
3400 | + continue; | ||
3401 | + if (__test_and_set_bit(d->feature, disable)) | ||
3402 | + continue; | ||
3403 | + | ||
3404 | + changed = true; | ||
3405 | + clear_feature(c, d->feature); | ||
3406 | + } | ||
3407 | + } while (changed); | ||
3408 | +} | ||
3409 | + | ||
3410 | +void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature) | ||
3411 | +{ | ||
3412 | + do_clear_cpu_cap(c, feature); | ||
3413 | +} | ||
3414 | + | ||
3415 | +void setup_clear_cpu_cap(unsigned int feature) | ||
3416 | +{ | ||
3417 | + do_clear_cpu_cap(NULL, feature); | ||
3418 | +} | ||
3419 | diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c | ||
3420 | index 4fa90006ac68..bea8d3e24f50 100644 | ||
3421 | --- a/arch/x86/kernel/cpu/hypervisor.c | ||
3422 | +++ b/arch/x86/kernel/cpu/hypervisor.c | ||
3423 | @@ -26,6 +26,12 @@ | ||
3424 | #include <asm/processor.h> | ||
3425 | #include <asm/hypervisor.h> | ||
3426 | |||
3427 | +extern const struct hypervisor_x86 x86_hyper_vmware; | ||
3428 | +extern const struct hypervisor_x86 x86_hyper_ms_hyperv; | ||
3429 | +extern const struct hypervisor_x86 x86_hyper_xen_pv; | ||
3430 | +extern const struct hypervisor_x86 x86_hyper_xen_hvm; | ||
3431 | +extern const struct hypervisor_x86 x86_hyper_kvm; | ||
3432 | + | ||
3433 | static const __initconst struct hypervisor_x86 * const hypervisors[] = | ||
3434 | { | ||
3435 | #ifdef CONFIG_XEN_PV | ||
3436 | @@ -41,54 +47,52 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = | ||
3437 | #endif | ||
3438 | }; | ||
3439 | |||
3440 | -const struct hypervisor_x86 *x86_hyper; | ||
3441 | -EXPORT_SYMBOL(x86_hyper); | ||
3442 | +enum x86_hypervisor_type x86_hyper_type; | ||
3443 | +EXPORT_SYMBOL(x86_hyper_type); | ||
3444 | |||
3445 | -static inline void __init | ||
3446 | +static inline const struct hypervisor_x86 * __init | ||
3447 | detect_hypervisor_vendor(void) | ||
3448 | { | ||
3449 | - const struct hypervisor_x86 *h, * const *p; | ||
3450 | + const struct hypervisor_x86 *h = NULL, * const *p; | ||
3451 | uint32_t pri, max_pri = 0; | ||
3452 | |||
3453 | for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) { | ||
3454 | - h = *p; | ||
3455 | - pri = h->detect(); | ||
3456 | - if (pri != 0 && pri > max_pri) { | ||
3457 | + pri = (*p)->detect(); | ||
3458 | + if (pri > max_pri) { | ||
3459 | max_pri = pri; | ||
3460 | - x86_hyper = h; | ||
3461 | + h = *p; | ||
3462 | } | ||
3463 | } | ||
3464 | |||
3465 | - if (max_pri) | ||
3466 | - pr_info("Hypervisor detected: %s\n", x86_hyper->name); | ||
3467 | + if (h) | ||
3468 | + pr_info("Hypervisor detected: %s\n", h->name); | ||
3469 | + | ||
3470 | + return h; | ||
3471 | } | ||
3472 | |||
3473 | -void __init init_hypervisor_platform(void) | ||
3474 | +static void __init copy_array(const void *src, void *target, unsigned int size) | ||
3475 | { | ||
3476 | + unsigned int i, n = size / sizeof(void *); | ||
3477 | + const void * const *from = (const void * const *)src; | ||
3478 | + const void **to = (const void **)target; | ||
3479 | |||
3480 | - detect_hypervisor_vendor(); | ||
3481 | - | ||
3482 | - if (!x86_hyper) | ||
3483 | - return; | ||
3484 | - | ||
3485 | - if (x86_hyper->init_platform) | ||
3486 | - x86_hyper->init_platform(); | ||
3487 | + for (i = 0; i < n; i++) | ||
3488 | + if (from[i]) | ||
3489 | + to[i] = from[i]; | ||
3490 | } | ||
3491 | |||
3492 | -bool __init hypervisor_x2apic_available(void) | ||
3493 | +void __init init_hypervisor_platform(void) | ||
3494 | { | ||
3495 | - return x86_hyper && | ||
3496 | - x86_hyper->x2apic_available && | ||
3497 | - x86_hyper->x2apic_available(); | ||
3498 | -} | ||
3499 | + const struct hypervisor_x86 *h; | ||
3500 | |||
3501 | -void hypervisor_pin_vcpu(int cpu) | ||
3502 | -{ | ||
3503 | - if (!x86_hyper) | ||
3504 | + h = detect_hypervisor_vendor(); | ||
3505 | + | ||
3506 | + if (!h) | ||
3507 | return; | ||
3508 | |||
3509 | - if (x86_hyper->pin_vcpu) | ||
3510 | - x86_hyper->pin_vcpu(cpu); | ||
3511 | - else | ||
3512 | - WARN_ONCE(1, "vcpu pinning requested but not supported!\n"); | ||
3513 | + copy_array(&h->init, &x86_init.hyper, sizeof(h->init)); | ||
3514 | + copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime)); | ||
3515 | + | ||
3516 | + x86_hyper_type = h->type; | ||
3517 | + x86_init.hyper.init_platform(); | ||
3518 | } | ||
3519 | diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c | ||
3520 | index 236324e83a3a..85eb5fc180c8 100644 | ||
3521 | --- a/arch/x86/kernel/cpu/mshyperv.c | ||
3522 | +++ b/arch/x86/kernel/cpu/mshyperv.c | ||
3523 | @@ -254,9 +254,9 @@ static void __init ms_hyperv_init_platform(void) | ||
3524 | #endif | ||
3525 | } | ||
3526 | |||
3527 | -const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { | ||
3528 | +const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { | ||
3529 | .name = "Microsoft Hyper-V", | ||
3530 | .detect = ms_hyperv_platform, | ||
3531 | - .init_platform = ms_hyperv_init_platform, | ||
3532 | + .type = X86_HYPER_MS_HYPERV, | ||
3533 | + .init.init_platform = ms_hyperv_init_platform, | ||
3534 | }; | ||
3535 | -EXPORT_SYMBOL(x86_hyper_ms_hyperv); | ||
3536 | diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c | ||
3537 | index 40ed26852ebd..8e005329648b 100644 | ||
3538 | --- a/arch/x86/kernel/cpu/vmware.c | ||
3539 | +++ b/arch/x86/kernel/cpu/vmware.c | ||
3540 | @@ -205,10 +205,10 @@ static bool __init vmware_legacy_x2apic_available(void) | ||
3541 | (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0; | ||
3542 | } | ||
3543 | |||
3544 | -const __refconst struct hypervisor_x86 x86_hyper_vmware = { | ||
3545 | +const __initconst struct hypervisor_x86 x86_hyper_vmware = { | ||
3546 | .name = "VMware", | ||
3547 | .detect = vmware_platform, | ||
3548 | - .init_platform = vmware_platform_setup, | ||
3549 | - .x2apic_available = vmware_legacy_x2apic_available, | ||
3550 | + .type = X86_HYPER_VMWARE, | ||
3551 | + .init.init_platform = vmware_platform_setup, | ||
3552 | + .init.x2apic_available = vmware_legacy_x2apic_available, | ||
3553 | }; | ||
3554 | -EXPORT_SYMBOL(x86_hyper_vmware); | ||
3555 | diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c | ||
3556 | index 0e662c55ae90..0b8cedb20d6d 100644 | ||
3557 | --- a/arch/x86/kernel/doublefault.c | ||
3558 | +++ b/arch/x86/kernel/doublefault.c | ||
3559 | @@ -50,25 +50,23 @@ static void doublefault_fn(void) | ||
3560 | cpu_relax(); | ||
3561 | } | ||
3562 | |||
3563 | -struct tss_struct doublefault_tss __cacheline_aligned = { | ||
3564 | - .x86_tss = { | ||
3565 | - .sp0 = STACK_START, | ||
3566 | - .ss0 = __KERNEL_DS, | ||
3567 | - .ldt = 0, | ||
3568 | - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, | ||
3569 | - | ||
3570 | - .ip = (unsigned long) doublefault_fn, | ||
3571 | - /* 0x2 bit is always set */ | ||
3572 | - .flags = X86_EFLAGS_SF | 0x2, | ||
3573 | - .sp = STACK_START, | ||
3574 | - .es = __USER_DS, | ||
3575 | - .cs = __KERNEL_CS, | ||
3576 | - .ss = __KERNEL_DS, | ||
3577 | - .ds = __USER_DS, | ||
3578 | - .fs = __KERNEL_PERCPU, | ||
3579 | - | ||
3580 | - .__cr3 = __pa_nodebug(swapper_pg_dir), | ||
3581 | - } | ||
3582 | +struct x86_hw_tss doublefault_tss __cacheline_aligned = { | ||
3583 | + .sp0 = STACK_START, | ||
3584 | + .ss0 = __KERNEL_DS, | ||
3585 | + .ldt = 0, | ||
3586 | + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, | ||
3587 | + | ||
3588 | + .ip = (unsigned long) doublefault_fn, | ||
3589 | + /* 0x2 bit is always set */ | ||
3590 | + .flags = X86_EFLAGS_SF | 0x2, | ||
3591 | + .sp = STACK_START, | ||
3592 | + .es = __USER_DS, | ||
3593 | + .cs = __KERNEL_CS, | ||
3594 | + .ss = __KERNEL_DS, | ||
3595 | + .ds = __USER_DS, | ||
3596 | + .fs = __KERNEL_PERCPU, | ||
3597 | + | ||
3598 | + .__cr3 = __pa_nodebug(swapper_pg_dir), | ||
3599 | }; | ||
3600 | |||
3601 | /* dummy for do_double_fault() call */ | ||
3602 | diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c | ||
3603 | index f13b4c00a5de..bbd6d986e2d0 100644 | ||
3604 | --- a/arch/x86/kernel/dumpstack.c | ||
3605 | +++ b/arch/x86/kernel/dumpstack.c | ||
3606 | @@ -43,6 +43,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task, | ||
3607 | return true; | ||
3608 | } | ||
3609 | |||
3610 | +bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) | ||
3611 | +{ | ||
3612 | + struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id()); | ||
3613 | + | ||
3614 | + void *begin = ss; | ||
3615 | + void *end = ss + 1; | ||
3616 | + | ||
3617 | + if ((void *)stack < begin || (void *)stack >= end) | ||
3618 | + return false; | ||
3619 | + | ||
3620 | + info->type = STACK_TYPE_SYSENTER; | ||
3621 | + info->begin = begin; | ||
3622 | + info->end = end; | ||
3623 | + info->next_sp = NULL; | ||
3624 | + | ||
3625 | + return true; | ||
3626 | +} | ||
3627 | + | ||
3628 | static void printk_stack_address(unsigned long address, int reliable, | ||
3629 | char *log_lvl) | ||
3630 | { | ||
3631 | @@ -50,6 +68,28 @@ static void printk_stack_address(unsigned long address, int reliable, | ||
3632 | printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); | ||
3633 | } | ||
3634 | |||
3635 | +void show_iret_regs(struct pt_regs *regs) | ||
3636 | +{ | ||
3637 | + printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip); | ||
3638 | + printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss, | ||
3639 | + regs->sp, regs->flags); | ||
3640 | +} | ||
3641 | + | ||
3642 | +static void show_regs_safe(struct stack_info *info, struct pt_regs *regs) | ||
3643 | +{ | ||
3644 | + if (on_stack(info, regs, sizeof(*regs))) | ||
3645 | + __show_regs(regs, 0); | ||
3646 | + else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET, | ||
3647 | + IRET_FRAME_SIZE)) { | ||
3648 | + /* | ||
3649 | + * When an interrupt or exception occurs in entry code, the | ||
3650 | + * full pt_regs might not have been saved yet. In that case | ||
3651 | + * just print the iret frame. | ||
3652 | + */ | ||
3653 | + show_iret_regs(regs); | ||
3654 | + } | ||
3655 | +} | ||
3656 | + | ||
3657 | void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
3658 | unsigned long *stack, char *log_lvl) | ||
3659 | { | ||
3660 | @@ -71,31 +111,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
3661 | * - task stack | ||
3662 | * - interrupt stack | ||
3663 | * - HW exception stacks (double fault, nmi, debug, mce) | ||
3664 | + * - SYSENTER stack | ||
3665 | * | ||
3666 | - * x86-32 can have up to three stacks: | ||
3667 | + * x86-32 can have up to four stacks: | ||
3668 | * - task stack | ||
3669 | * - softirq stack | ||
3670 | * - hardirq stack | ||
3671 | + * - SYSENTER stack | ||
3672 | */ | ||
3673 | for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { | ||
3674 | const char *stack_name; | ||
3675 | |||
3676 | - /* | ||
3677 | - * If we overflowed the task stack into a guard page, jump back | ||
3678 | - * to the bottom of the usable stack. | ||
3679 | - */ | ||
3680 | - if (task_stack_page(task) - (void *)stack < PAGE_SIZE) | ||
3681 | - stack = task_stack_page(task); | ||
3682 | - | ||
3683 | - if (get_stack_info(stack, task, &stack_info, &visit_mask)) | ||
3684 | - break; | ||
3685 | + if (get_stack_info(stack, task, &stack_info, &visit_mask)) { | ||
3686 | + /* | ||
3687 | + * We weren't on a valid stack. It's possible that | ||
3688 | + * we overflowed a valid stack into a guard page. | ||
3689 | + * See if the next page up is valid so that we can | ||
3690 | + * generate some kind of backtrace if this happens. | ||
3691 | + */ | ||
3692 | + stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack); | ||
3693 | + if (get_stack_info(stack, task, &stack_info, &visit_mask)) | ||
3694 | + break; | ||
3695 | + } | ||
3696 | |||
3697 | stack_name = stack_type_name(stack_info.type); | ||
3698 | if (stack_name) | ||
3699 | printk("%s <%s>\n", log_lvl, stack_name); | ||
3700 | |||
3701 | - if (regs && on_stack(&stack_info, regs, sizeof(*regs))) | ||
3702 | - __show_regs(regs, 0); | ||
3703 | + if (regs) | ||
3704 | + show_regs_safe(&stack_info, regs); | ||
3705 | |||
3706 | /* | ||
3707 | * Scan the stack, printing any text addresses we find. At the | ||
3708 | @@ -119,7 +163,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
3709 | |||
3710 | /* | ||
3711 | * Don't print regs->ip again if it was already printed | ||
3712 | - * by __show_regs() below. | ||
3713 | + * by show_regs_safe() below. | ||
3714 | */ | ||
3715 | if (regs && stack == ®s->ip) | ||
3716 | goto next; | ||
3717 | @@ -155,8 +199,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
3718 | |||
3719 | /* if the frame has entry regs, print them */ | ||
3720 | regs = unwind_get_entry_regs(&state); | ||
3721 | - if (regs && on_stack(&stack_info, regs, sizeof(*regs))) | ||
3722 | - __show_regs(regs, 0); | ||
3723 | + if (regs) | ||
3724 | + show_regs_safe(&stack_info, regs); | ||
3725 | } | ||
3726 | |||
3727 | if (stack_name) | ||
3728 | diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c | ||
3729 | index daefae83a3aa..5ff13a6b3680 100644 | ||
3730 | --- a/arch/x86/kernel/dumpstack_32.c | ||
3731 | +++ b/arch/x86/kernel/dumpstack_32.c | ||
3732 | @@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type) | ||
3733 | if (type == STACK_TYPE_SOFTIRQ) | ||
3734 | return "SOFTIRQ"; | ||
3735 | |||
3736 | + if (type == STACK_TYPE_SYSENTER) | ||
3737 | + return "SYSENTER"; | ||
3738 | + | ||
3739 | return NULL; | ||
3740 | } | ||
3741 | |||
3742 | @@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, | ||
3743 | if (task != current) | ||
3744 | goto unknown; | ||
3745 | |||
3746 | + if (in_sysenter_stack(stack, info)) | ||
3747 | + goto recursion_check; | ||
3748 | + | ||
3749 | if (in_hardirq_stack(stack, info)) | ||
3750 | goto recursion_check; | ||
3751 | |||
3752 | diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c | ||
3753 | index 88ce2ffdb110..abc828f8c297 100644 | ||
3754 | --- a/arch/x86/kernel/dumpstack_64.c | ||
3755 | +++ b/arch/x86/kernel/dumpstack_64.c | ||
3756 | @@ -37,6 +37,9 @@ const char *stack_type_name(enum stack_type type) | ||
3757 | if (type == STACK_TYPE_IRQ) | ||
3758 | return "IRQ"; | ||
3759 | |||
3760 | + if (type == STACK_TYPE_SYSENTER) | ||
3761 | + return "SYSENTER"; | ||
3762 | + | ||
3763 | if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST) | ||
3764 | return exception_stack_names[type - STACK_TYPE_EXCEPTION]; | ||
3765 | |||
3766 | @@ -115,6 +118,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task, | ||
3767 | if (in_irq_stack(stack, info)) | ||
3768 | goto recursion_check; | ||
3769 | |||
3770 | + if (in_sysenter_stack(stack, info)) | ||
3771 | + goto recursion_check; | ||
3772 | + | ||
3773 | goto unknown; | ||
3774 | |||
3775 | recursion_check: | ||
3776 | diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c | ||
3777 | index 7affb7e3d9a5..6abd83572b01 100644 | ||
3778 | --- a/arch/x86/kernel/fpu/init.c | ||
3779 | +++ b/arch/x86/kernel/fpu/init.c | ||
3780 | @@ -249,6 +249,10 @@ static void __init fpu__init_system_ctx_switch(void) | ||
3781 | */ | ||
3782 | static void __init fpu__init_parse_early_param(void) | ||
3783 | { | ||
3784 | + char arg[32]; | ||
3785 | + char *argptr = arg; | ||
3786 | + int bit; | ||
3787 | + | ||
3788 | if (cmdline_find_option_bool(boot_command_line, "no387")) | ||
3789 | setup_clear_cpu_cap(X86_FEATURE_FPU); | ||
3790 | |||
3791 | @@ -266,6 +270,13 @@ static void __init fpu__init_parse_early_param(void) | ||
3792 | |||
3793 | if (cmdline_find_option_bool(boot_command_line, "noxsaves")) | ||
3794 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | ||
3795 | + | ||
3796 | + if (cmdline_find_option(boot_command_line, "clearcpuid", arg, | ||
3797 | + sizeof(arg)) && | ||
3798 | + get_option(&argptr, &bit) && | ||
3799 | + bit >= 0 && | ||
3800 | + bit < NCAPINTS * 32) | ||
3801 | + setup_clear_cpu_cap(bit); | ||
3802 | } | ||
3803 | |||
3804 | /* | ||
3805 | diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c | ||
3806 | index f1d5476c9022..87a57b7642d3 100644 | ||
3807 | --- a/arch/x86/kernel/fpu/xstate.c | ||
3808 | +++ b/arch/x86/kernel/fpu/xstate.c | ||
3809 | @@ -15,6 +15,7 @@ | ||
3810 | #include <asm/fpu/xstate.h> | ||
3811 | |||
3812 | #include <asm/tlbflush.h> | ||
3813 | +#include <asm/cpufeature.h> | ||
3814 | |||
3815 | /* | ||
3816 | * Although we spell it out in here, the Processor Trace | ||
3817 | @@ -36,6 +37,19 @@ static const char *xfeature_names[] = | ||
3818 | "unknown xstate feature" , | ||
3819 | }; | ||
3820 | |||
3821 | +static short xsave_cpuid_features[] __initdata = { | ||
3822 | + X86_FEATURE_FPU, | ||
3823 | + X86_FEATURE_XMM, | ||
3824 | + X86_FEATURE_AVX, | ||
3825 | + X86_FEATURE_MPX, | ||
3826 | + X86_FEATURE_MPX, | ||
3827 | + X86_FEATURE_AVX512F, | ||
3828 | + X86_FEATURE_AVX512F, | ||
3829 | + X86_FEATURE_AVX512F, | ||
3830 | + X86_FEATURE_INTEL_PT, | ||
3831 | + X86_FEATURE_PKU, | ||
3832 | +}; | ||
3833 | + | ||
3834 | /* | ||
3835 | * Mask of xstate features supported by the CPU and the kernel: | ||
3836 | */ | ||
3837 | @@ -59,26 +73,6 @@ unsigned int fpu_user_xstate_size; | ||
3838 | void fpu__xstate_clear_all_cpu_caps(void) | ||
3839 | { | ||
3840 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); | ||
3841 | - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | ||
3842 | - setup_clear_cpu_cap(X86_FEATURE_XSAVEC); | ||
3843 | - setup_clear_cpu_cap(X86_FEATURE_XSAVES); | ||
3844 | - setup_clear_cpu_cap(X86_FEATURE_AVX); | ||
3845 | - setup_clear_cpu_cap(X86_FEATURE_AVX2); | ||
3846 | - setup_clear_cpu_cap(X86_FEATURE_AVX512F); | ||
3847 | - setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA); | ||
3848 | - setup_clear_cpu_cap(X86_FEATURE_AVX512PF); | ||
3849 | - setup_clear_cpu_cap(X86_FEATURE_AVX512ER); | ||
3850 | - setup_clear_cpu_cap(X86_FEATURE_AVX512CD); | ||
3851 | - setup_clear_cpu_cap(X86_FEATURE_AVX512DQ); | ||
3852 | - setup_clear_cpu_cap(X86_FEATURE_AVX512BW); | ||
3853 | - setup_clear_cpu_cap(X86_FEATURE_AVX512VL); | ||
3854 | - setup_clear_cpu_cap(X86_FEATURE_MPX); | ||
3855 | - setup_clear_cpu_cap(X86_FEATURE_XGETBV1); | ||
3856 | - setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI); | ||
3857 | - setup_clear_cpu_cap(X86_FEATURE_PKU); | ||
3858 | - setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW); | ||
3859 | - setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS); | ||
3860 | - setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ); | ||
3861 | } | ||
3862 | |||
3863 | /* | ||
3864 | @@ -726,6 +720,7 @@ void __init fpu__init_system_xstate(void) | ||
3865 | unsigned int eax, ebx, ecx, edx; | ||
3866 | static int on_boot_cpu __initdata = 1; | ||
3867 | int err; | ||
3868 | + int i; | ||
3869 | |||
3870 | WARN_ON_FPU(!on_boot_cpu); | ||
3871 | on_boot_cpu = 0; | ||
3872 | @@ -759,6 +754,14 @@ void __init fpu__init_system_xstate(void) | ||
3873 | goto out_disable; | ||
3874 | } | ||
3875 | |||
3876 | + /* | ||
3877 | + * Clear XSAVE features that are disabled in the normal CPUID. | ||
3878 | + */ | ||
3879 | + for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) { | ||
3880 | + if (!boot_cpu_has(xsave_cpuid_features[i])) | ||
3881 | + xfeatures_mask &= ~BIT(i); | ||
3882 | + } | ||
3883 | + | ||
3884 | xfeatures_mask &= fpu__get_supported_xfeatures_mask(); | ||
3885 | |||
3886 | /* Enable xstate instructions to be able to continue with initialization: */ | ||
3887 | diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S | ||
3888 | index f1d528bb66a6..c29020907886 100644 | ||
3889 | --- a/arch/x86/kernel/head_32.S | ||
3890 | +++ b/arch/x86/kernel/head_32.S | ||
3891 | @@ -212,9 +212,6 @@ ENTRY(startup_32_smp) | ||
3892 | #endif | ||
3893 | |||
3894 | .Ldefault_entry: | ||
3895 | -#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ | ||
3896 | - X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ | ||
3897 | - X86_CR0_PG) | ||
3898 | movl $(CR0_STATE & ~X86_CR0_PG),%eax | ||
3899 | movl %eax,%cr0 | ||
3900 | |||
3901 | @@ -402,7 +399,7 @@ ENTRY(early_idt_handler_array) | ||
3902 | # 24(%rsp) error code | ||
3903 | i = 0 | ||
3904 | .rept NUM_EXCEPTION_VECTORS | ||
3905 | - .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 | ||
3906 | + .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0 | ||
3907 | pushl $0 # Dummy error code, to make stack frame uniform | ||
3908 | .endif | ||
3909 | pushl $i # 20(%esp) Vector number | ||
3910 | diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S | ||
3911 | index 6dde3f3fc1f8..7dca675fe78d 100644 | ||
3912 | --- a/arch/x86/kernel/head_64.S | ||
3913 | +++ b/arch/x86/kernel/head_64.S | ||
3914 | @@ -38,11 +38,12 @@ | ||
3915 | * | ||
3916 | */ | ||
3917 | |||
3918 | -#define p4d_index(x) (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1)) | ||
3919 | #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) | ||
3920 | |||
3921 | +#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) | ||
3922 | PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE) | ||
3923 | PGD_START_KERNEL = pgd_index(__START_KERNEL_map) | ||
3924 | +#endif | ||
3925 | L3_START_KERNEL = pud_index(__START_KERNEL_map) | ||
3926 | |||
3927 | .text | ||
3928 | @@ -50,6 +51,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map) | ||
3929 | .code64 | ||
3930 | .globl startup_64 | ||
3931 | startup_64: | ||
3932 | + UNWIND_HINT_EMPTY | ||
3933 | /* | ||
3934 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, | ||
3935 | * and someone has loaded an identity mapped page table | ||
3936 | @@ -89,6 +91,7 @@ startup_64: | ||
3937 | addq $(early_top_pgt - __START_KERNEL_map), %rax | ||
3938 | jmp 1f | ||
3939 | ENTRY(secondary_startup_64) | ||
3940 | + UNWIND_HINT_EMPTY | ||
3941 | /* | ||
3942 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, | ||
3943 | * and someone has loaded a mapped page table. | ||
3944 | @@ -133,6 +136,7 @@ ENTRY(secondary_startup_64) | ||
3945 | movq $1f, %rax | ||
3946 | jmp *%rax | ||
3947 | 1: | ||
3948 | + UNWIND_HINT_EMPTY | ||
3949 | |||
3950 | /* Check if nx is implemented */ | ||
3951 | movl $0x80000001, %eax | ||
3952 | @@ -150,9 +154,6 @@ ENTRY(secondary_startup_64) | ||
3953 | 1: wrmsr /* Make changes effective */ | ||
3954 | |||
3955 | /* Setup cr0 */ | ||
3956 | -#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ | ||
3957 | - X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ | ||
3958 | - X86_CR0_PG) | ||
3959 | movl $CR0_STATE, %eax | ||
3960 | /* Make changes effective */ | ||
3961 | movq %rax, %cr0 | ||
3962 | @@ -235,7 +236,7 @@ ENTRY(secondary_startup_64) | ||
3963 | pushq %rax # target address in negative space | ||
3964 | lretq | ||
3965 | .Lafter_lret: | ||
3966 | -ENDPROC(secondary_startup_64) | ||
3967 | +END(secondary_startup_64) | ||
3968 | |||
3969 | #include "verify_cpu.S" | ||
3970 | |||
3971 | @@ -247,6 +248,7 @@ ENDPROC(secondary_startup_64) | ||
3972 | */ | ||
3973 | ENTRY(start_cpu0) | ||
3974 | movq initial_stack(%rip), %rsp | ||
3975 | + UNWIND_HINT_EMPTY | ||
3976 | jmp .Ljump_to_C_code | ||
3977 | ENDPROC(start_cpu0) | ||
3978 | #endif | ||
3979 | @@ -266,26 +268,24 @@ ENDPROC(start_cpu0) | ||
3980 | .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS | ||
3981 | __FINITDATA | ||
3982 | |||
3983 | -bad_address: | ||
3984 | - jmp bad_address | ||
3985 | - | ||
3986 | __INIT | ||
3987 | ENTRY(early_idt_handler_array) | ||
3988 | - # 104(%rsp) %rflags | ||
3989 | - # 96(%rsp) %cs | ||
3990 | - # 88(%rsp) %rip | ||
3991 | - # 80(%rsp) error code | ||
3992 | i = 0 | ||
3993 | .rept NUM_EXCEPTION_VECTORS | ||
3994 | - .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 | ||
3995 | - pushq $0 # Dummy error code, to make stack frame uniform | ||
3996 | + .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0 | ||
3997 | + UNWIND_HINT_IRET_REGS | ||
3998 | + pushq $0 # Dummy error code, to make stack frame uniform | ||
3999 | + .else | ||
4000 | + UNWIND_HINT_IRET_REGS offset=8 | ||
4001 | .endif | ||
4002 | pushq $i # 72(%rsp) Vector number | ||
4003 | jmp early_idt_handler_common | ||
4004 | + UNWIND_HINT_IRET_REGS | ||
4005 | i = i + 1 | ||
4006 | .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc | ||
4007 | .endr | ||
4008 | -ENDPROC(early_idt_handler_array) | ||
4009 | + UNWIND_HINT_IRET_REGS offset=16 | ||
4010 | +END(early_idt_handler_array) | ||
4011 | |||
4012 | early_idt_handler_common: | ||
4013 | /* | ||
4014 | @@ -313,6 +313,7 @@ early_idt_handler_common: | ||
4015 | pushq %r13 /* pt_regs->r13 */ | ||
4016 | pushq %r14 /* pt_regs->r14 */ | ||
4017 | pushq %r15 /* pt_regs->r15 */ | ||
4018 | + UNWIND_HINT_REGS | ||
4019 | |||
4020 | cmpq $14,%rsi /* Page fault? */ | ||
4021 | jnz 10f | ||
4022 | @@ -327,8 +328,8 @@ early_idt_handler_common: | ||
4023 | |||
4024 | 20: | ||
4025 | decl early_recursion_flag(%rip) | ||
4026 | - jmp restore_regs_and_iret | ||
4027 | -ENDPROC(early_idt_handler_common) | ||
4028 | + jmp restore_regs_and_return_to_kernel | ||
4029 | +END(early_idt_handler_common) | ||
4030 | |||
4031 | __INITDATA | ||
4032 | |||
4033 | @@ -362,10 +363,7 @@ NEXT_PAGE(early_dynamic_pgts) | ||
4034 | |||
4035 | .data | ||
4036 | |||
4037 | -#ifndef CONFIG_XEN | ||
4038 | -NEXT_PAGE(init_top_pgt) | ||
4039 | - .fill 512,8,0 | ||
4040 | -#else | ||
4041 | +#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) | ||
4042 | NEXT_PAGE(init_top_pgt) | ||
4043 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC | ||
4044 | .org init_top_pgt + PGD_PAGE_OFFSET*8, 0 | ||
4045 | @@ -382,6 +380,9 @@ NEXT_PAGE(level2_ident_pgt) | ||
4046 | * Don't set NX because code runs from these pages. | ||
4047 | */ | ||
4048 | PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) | ||
4049 | +#else | ||
4050 | +NEXT_PAGE(init_top_pgt) | ||
4051 | + .fill 512,8,0 | ||
4052 | #endif | ||
4053 | |||
4054 | #ifdef CONFIG_X86_5LEVEL | ||
4055 | @@ -435,7 +436,7 @@ ENTRY(phys_base) | ||
4056 | EXPORT_SYMBOL(phys_base) | ||
4057 | |||
4058 | #include "../../x86/xen/xen-head.S" | ||
4059 | - | ||
4060 | + | ||
4061 | __PAGE_ALIGNED_BSS | ||
4062 | NEXT_PAGE(empty_zero_page) | ||
4063 | .skip PAGE_SIZE | ||
4064 | diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c | ||
4065 | index 3feb648781c4..2f723301eb58 100644 | ||
4066 | --- a/arch/x86/kernel/ioport.c | ||
4067 | +++ b/arch/x86/kernel/ioport.c | ||
4068 | @@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | ||
4069 | * because the ->io_bitmap_max value must match the bitmap | ||
4070 | * contents: | ||
4071 | */ | ||
4072 | - tss = &per_cpu(cpu_tss, get_cpu()); | ||
4073 | + tss = &per_cpu(cpu_tss_rw, get_cpu()); | ||
4074 | |||
4075 | if (turn_on) | ||
4076 | bitmap_clear(t->io_bitmap_ptr, from, num); | ||
4077 | diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c | ||
4078 | index 52089c043160..aa9d51eea9d0 100644 | ||
4079 | --- a/arch/x86/kernel/irq.c | ||
4080 | +++ b/arch/x86/kernel/irq.c | ||
4081 | @@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) | ||
4082 | /* high bit used in ret_from_ code */ | ||
4083 | unsigned vector = ~regs->orig_ax; | ||
4084 | |||
4085 | - /* | ||
4086 | - * NB: Unlike exception entries, IRQ entries do not reliably | ||
4087 | - * handle context tracking in the low-level entry code. This is | ||
4088 | - * because syscall entries execute briefly with IRQs on before | ||
4089 | - * updating context tracking state, so we can take an IRQ from | ||
4090 | - * kernel mode with CONTEXT_USER. The low-level entry code only | ||
4091 | - * updates the context if we came from user mode, so we won't | ||
4092 | - * switch to CONTEXT_KERNEL. We'll fix that once the syscall | ||
4093 | - * code is cleaned up enough that we can cleanly defer enabling | ||
4094 | - * IRQs. | ||
4095 | - */ | ||
4096 | - | ||
4097 | entering_irq(); | ||
4098 | |||
4099 | /* entering_irq() tells RCU that we're not quiescent. Check it. */ | ||
4100 | diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c | ||
4101 | index 020efbf5786b..d86e344f5b3d 100644 | ||
4102 | --- a/arch/x86/kernel/irq_64.c | ||
4103 | +++ b/arch/x86/kernel/irq_64.c | ||
4104 | @@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs) | ||
4105 | if (regs->sp >= estack_top && regs->sp <= estack_bottom) | ||
4106 | return; | ||
4107 | |||
4108 | - WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n", | ||
4109 | + WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n", | ||
4110 | current->comm, curbase, regs->sp, | ||
4111 | irq_stack_top, irq_stack_bottom, | ||
4112 | - estack_top, estack_bottom); | ||
4113 | + estack_top, estack_bottom, (void *)regs->ip); | ||
4114 | |||
4115 | if (sysctl_panic_on_stackoverflow) | ||
4116 | panic("low stack detected by irq handler - check messages\n"); | ||
4117 | diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c | ||
4118 | index 8bb9594d0761..a94de09edbed 100644 | ||
4119 | --- a/arch/x86/kernel/kvm.c | ||
4120 | +++ b/arch/x86/kernel/kvm.c | ||
4121 | @@ -544,12 +544,12 @@ static uint32_t __init kvm_detect(void) | ||
4122 | return kvm_cpuid_base(); | ||
4123 | } | ||
4124 | |||
4125 | -const struct hypervisor_x86 x86_hyper_kvm __refconst = { | ||
4126 | +const __initconst struct hypervisor_x86 x86_hyper_kvm = { | ||
4127 | .name = "KVM", | ||
4128 | .detect = kvm_detect, | ||
4129 | - .x2apic_available = kvm_para_available, | ||
4130 | + .type = X86_HYPER_KVM, | ||
4131 | + .init.x2apic_available = kvm_para_available, | ||
4132 | }; | ||
4133 | -EXPORT_SYMBOL_GPL(x86_hyper_kvm); | ||
4134 | |||
4135 | static __init int activate_jump_labels(void) | ||
4136 | { | ||
4137 | diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c | ||
4138 | index ae5615b03def..1c1eae961340 100644 | ||
4139 | --- a/arch/x86/kernel/ldt.c | ||
4140 | +++ b/arch/x86/kernel/ldt.c | ||
4141 | @@ -103,7 +103,7 @@ static void finalize_ldt_struct(struct ldt_struct *ldt) | ||
4142 | static void install_ldt(struct mm_struct *current_mm, | ||
4143 | struct ldt_struct *ldt) | ||
4144 | { | ||
4145 | - /* Synchronizes with lockless_dereference in load_mm_ldt. */ | ||
4146 | + /* Synchronizes with READ_ONCE in load_mm_ldt. */ | ||
4147 | smp_store_release(¤t_mm->context.ldt, ldt); | ||
4148 | |||
4149 | /* Activate the LDT for all CPUs using current_mm. */ | ||
4150 | diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c | ||
4151 | index ac0be8283325..9edadabf04f6 100644 | ||
4152 | --- a/arch/x86/kernel/paravirt_patch_64.c | ||
4153 | +++ b/arch/x86/kernel/paravirt_patch_64.c | ||
4154 | @@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); | ||
4155 | DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); | ||
4156 | DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); | ||
4157 | DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); | ||
4158 | -DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); | ||
4159 | DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); | ||
4160 | |||
4161 | DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); | ||
4162 | @@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | ||
4163 | PATCH_SITE(pv_mmu_ops, read_cr2); | ||
4164 | PATCH_SITE(pv_mmu_ops, read_cr3); | ||
4165 | PATCH_SITE(pv_mmu_ops, write_cr3); | ||
4166 | - PATCH_SITE(pv_mmu_ops, flush_tlb_single); | ||
4167 | PATCH_SITE(pv_cpu_ops, wbinvd); | ||
4168 | #if defined(CONFIG_PARAVIRT_SPINLOCKS) | ||
4169 | case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock): | ||
4170 | diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c | ||
4171 | index c67685337c5a..517415978409 100644 | ||
4172 | --- a/arch/x86/kernel/process.c | ||
4173 | +++ b/arch/x86/kernel/process.c | ||
4174 | @@ -47,9 +47,25 @@ | ||
4175 | * section. Since TSS's are completely CPU-local, we want them | ||
4176 | * on exact cacheline boundaries, to eliminate cacheline ping-pong. | ||
4177 | */ | ||
4178 | -__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { | ||
4179 | +__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = { | ||
4180 | .x86_tss = { | ||
4181 | - .sp0 = TOP_OF_INIT_STACK, | ||
4182 | + /* | ||
4183 | + * .sp0 is only used when entering ring 0 from a lower | ||
4184 | + * privilege level. Since the init task never runs anything | ||
4185 | + * but ring 0 code, there is no need for a valid value here. | ||
4186 | + * Poison it. | ||
4187 | + */ | ||
4188 | + .sp0 = (1UL << (BITS_PER_LONG-1)) + 1, | ||
4189 | + | ||
4190 | +#ifdef CONFIG_X86_64 | ||
4191 | + /* | ||
4192 | + * .sp1 is cpu_current_top_of_stack. The init task never | ||
4193 | + * runs user code, but cpu_current_top_of_stack should still | ||
4194 | + * be well defined before the first context switch. | ||
4195 | + */ | ||
4196 | + .sp1 = TOP_OF_INIT_STACK, | ||
4197 | +#endif | ||
4198 | + | ||
4199 | #ifdef CONFIG_X86_32 | ||
4200 | .ss0 = __KERNEL_DS, | ||
4201 | .ss1 = __KERNEL_CS, | ||
4202 | @@ -65,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { | ||
4203 | */ | ||
4204 | .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, | ||
4205 | #endif | ||
4206 | -#ifdef CONFIG_X86_32 | ||
4207 | - .SYSENTER_stack_canary = STACK_END_MAGIC, | ||
4208 | -#endif | ||
4209 | }; | ||
4210 | -EXPORT_PER_CPU_SYMBOL(cpu_tss); | ||
4211 | +EXPORT_PER_CPU_SYMBOL(cpu_tss_rw); | ||
4212 | |||
4213 | DEFINE_PER_CPU(bool, __tss_limit_invalid); | ||
4214 | EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid); | ||
4215 | @@ -98,7 +111,7 @@ void exit_thread(struct task_struct *tsk) | ||
4216 | struct fpu *fpu = &t->fpu; | ||
4217 | |||
4218 | if (bp) { | ||
4219 | - struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu()); | ||
4220 | + struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu()); | ||
4221 | |||
4222 | t->io_bitmap_ptr = NULL; | ||
4223 | clear_thread_flag(TIF_IO_BITMAP); | ||
4224 | diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c | ||
4225 | index 11966251cd42..5224c6099184 100644 | ||
4226 | --- a/arch/x86/kernel/process_32.c | ||
4227 | +++ b/arch/x86/kernel/process_32.c | ||
4228 | @@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
4229 | struct fpu *prev_fpu = &prev->fpu; | ||
4230 | struct fpu *next_fpu = &next->fpu; | ||
4231 | int cpu = smp_processor_id(); | ||
4232 | - struct tss_struct *tss = &per_cpu(cpu_tss, cpu); | ||
4233 | + struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); | ||
4234 | |||
4235 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ | ||
4236 | |||
4237 | @@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
4238 | |||
4239 | /* | ||
4240 | * Reload esp0 and cpu_current_top_of_stack. This changes | ||
4241 | - * current_thread_info(). | ||
4242 | + * current_thread_info(). Refresh the SYSENTER configuration in | ||
4243 | + * case prev or next is vm86. | ||
4244 | */ | ||
4245 | - load_sp0(tss, next); | ||
4246 | + update_sp0(next_p); | ||
4247 | + refresh_sysenter_cs(next); | ||
4248 | this_cpu_write(cpu_current_top_of_stack, | ||
4249 | (unsigned long)task_stack_page(next_p) + | ||
4250 | THREAD_SIZE); | ||
4251 | diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c | ||
4252 | index 302e7b2572d1..c75466232016 100644 | ||
4253 | --- a/arch/x86/kernel/process_64.c | ||
4254 | +++ b/arch/x86/kernel/process_64.c | ||
4255 | @@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all) | ||
4256 | unsigned int fsindex, gsindex; | ||
4257 | unsigned int ds, cs, es; | ||
4258 | |||
4259 | - printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip); | ||
4260 | - printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss, | ||
4261 | - regs->sp, regs->flags); | ||
4262 | + show_iret_regs(regs); | ||
4263 | + | ||
4264 | if (regs->orig_ax != -1) | ||
4265 | pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax); | ||
4266 | else | ||
4267 | @@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all) | ||
4268 | printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", | ||
4269 | regs->r13, regs->r14, regs->r15); | ||
4270 | |||
4271 | + if (!all) | ||
4272 | + return; | ||
4273 | + | ||
4274 | asm("movl %%ds,%0" : "=r" (ds)); | ||
4275 | asm("movl %%cs,%0" : "=r" (cs)); | ||
4276 | asm("movl %%es,%0" : "=r" (es)); | ||
4277 | @@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all) | ||
4278 | rdmsrl(MSR_GS_BASE, gs); | ||
4279 | rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); | ||
4280 | |||
4281 | - if (!all) | ||
4282 | - return; | ||
4283 | - | ||
4284 | cr0 = read_cr0(); | ||
4285 | cr2 = read_cr2(); | ||
4286 | cr3 = __read_cr3(); | ||
4287 | @@ -274,7 +273,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, | ||
4288 | struct inactive_task_frame *frame; | ||
4289 | struct task_struct *me = current; | ||
4290 | |||
4291 | - p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; | ||
4292 | childregs = task_pt_regs(p); | ||
4293 | fork_frame = container_of(childregs, struct fork_frame, regs); | ||
4294 | frame = &fork_frame->frame; | ||
4295 | @@ -401,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
4296 | struct fpu *prev_fpu = &prev->fpu; | ||
4297 | struct fpu *next_fpu = &next->fpu; | ||
4298 | int cpu = smp_processor_id(); | ||
4299 | - struct tss_struct *tss = &per_cpu(cpu_tss, cpu); | ||
4300 | + struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); | ||
4301 | |||
4302 | WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && | ||
4303 | this_cpu_read(irq_count) != -1); | ||
4304 | @@ -463,9 +461,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
4305 | * Switch the PDA and FPU contexts. | ||
4306 | */ | ||
4307 | this_cpu_write(current_task, next_p); | ||
4308 | + this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); | ||
4309 | |||
4310 | - /* Reload esp0 and ss1. This changes current_thread_info(). */ | ||
4311 | - load_sp0(tss, next); | ||
4312 | + /* Reload sp0. */ | ||
4313 | + update_sp0(next_p); | ||
4314 | |||
4315 | /* | ||
4316 | * Now maybe reload the debug registers and handle I/O bitmaps | ||
4317 | diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c | ||
4318 | index 5e0453f18a57..142126ab5aae 100644 | ||
4319 | --- a/arch/x86/kernel/smpboot.c | ||
4320 | +++ b/arch/x86/kernel/smpboot.c | ||
4321 | @@ -962,8 +962,7 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle) | ||
4322 | #ifdef CONFIG_X86_32 | ||
4323 | /* Stack for startup_32 can be just as for start_secondary onwards */ | ||
4324 | irq_ctx_init(cpu); | ||
4325 | - per_cpu(cpu_current_top_of_stack, cpu) = | ||
4326 | - (unsigned long)task_stack_page(idle) + THREAD_SIZE; | ||
4327 | + per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle); | ||
4328 | #else | ||
4329 | initial_gs = per_cpu_offset(cpu); | ||
4330 | #endif | ||
4331 | diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c | ||
4332 | index 5a6b8f809792..74136fd16f49 100644 | ||
4333 | --- a/arch/x86/kernel/traps.c | ||
4334 | +++ b/arch/x86/kernel/traps.c | ||
4335 | @@ -141,8 +141,7 @@ void ist_begin_non_atomic(struct pt_regs *regs) | ||
4336 | * will catch asm bugs and any attempt to use ist_preempt_enable | ||
4337 | * from double_fault. | ||
4338 | */ | ||
4339 | - BUG_ON((unsigned long)(current_top_of_stack() - | ||
4340 | - current_stack_pointer) >= THREAD_SIZE); | ||
4341 | + BUG_ON(!on_thread_stack()); | ||
4342 | |||
4343 | preempt_enable_no_resched(); | ||
4344 | } | ||
4345 | @@ -349,9 +348,15 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | ||
4346 | |||
4347 | /* | ||
4348 | * If IRET takes a non-IST fault on the espfix64 stack, then we | ||
4349 | - * end up promoting it to a doublefault. In that case, modify | ||
4350 | - * the stack to make it look like we just entered the #GP | ||
4351 | - * handler from user space, similar to bad_iret. | ||
4352 | + * end up promoting it to a doublefault. In that case, take | ||
4353 | + * advantage of the fact that we're not using the normal (TSS.sp0) | ||
4354 | + * stack right now. We can write a fake #GP(0) frame at TSS.sp0 | ||
4355 | + * and then modify our own IRET frame so that, when we return, | ||
4356 | + * we land directly at the #GP(0) vector with the stack already | ||
4357 | + * set up according to its expectations. | ||
4358 | + * | ||
4359 | + * The net result is that our #GP handler will think that we | ||
4360 | + * entered from usermode with the bad user context. | ||
4361 | * | ||
4362 | * No need for ist_enter here because we don't use RCU. | ||
4363 | */ | ||
4364 | @@ -359,13 +364,26 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | ||
4365 | regs->cs == __KERNEL_CS && | ||
4366 | regs->ip == (unsigned long)native_irq_return_iret) | ||
4367 | { | ||
4368 | - struct pt_regs *normal_regs = task_pt_regs(current); | ||
4369 | + struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; | ||
4370 | |||
4371 | - /* Fake a #GP(0) from userspace. */ | ||
4372 | - memmove(&normal_regs->ip, (void *)regs->sp, 5*8); | ||
4373 | - normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ | ||
4374 | + /* | ||
4375 | + * regs->sp points to the failing IRET frame on the | ||
4376 | + * ESPFIX64 stack. Copy it to the entry stack. This fills | ||
4377 | + * in gpregs->ss through gpregs->ip. | ||
4378 | + * | ||
4379 | + */ | ||
4380 | + memmove(&gpregs->ip, (void *)regs->sp, 5*8); | ||
4381 | + gpregs->orig_ax = 0; /* Missing (lost) #GP error code */ | ||
4382 | + | ||
4383 | + /* | ||
4384 | + * Adjust our frame so that we return straight to the #GP | ||
4385 | + * vector with the expected RSP value. This is safe because | ||
4386 | + * we won't enable interupts or schedule before we invoke | ||
4387 | + * general_protection, so nothing will clobber the stack | ||
4388 | + * frame we just set up. | ||
4389 | + */ | ||
4390 | regs->ip = (unsigned long)general_protection; | ||
4391 | - regs->sp = (unsigned long)&normal_regs->orig_ax; | ||
4392 | + regs->sp = (unsigned long)&gpregs->orig_ax; | ||
4393 | |||
4394 | return; | ||
4395 | } | ||
4396 | @@ -390,7 +408,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | ||
4397 | * | ||
4398 | * Processors update CR2 whenever a page fault is detected. If a | ||
4399 | * second page fault occurs while an earlier page fault is being | ||
4400 | - * deliv- ered, the faulting linear address of the second fault will | ||
4401 | + * delivered, the faulting linear address of the second fault will | ||
4402 | * overwrite the contents of CR2 (replacing the previous | ||
4403 | * address). These updates to CR2 occur even if the page fault | ||
4404 | * results in a double fault or occurs during the delivery of a | ||
4405 | @@ -601,14 +619,15 @@ NOKPROBE_SYMBOL(do_int3); | ||
4406 | |||
4407 | #ifdef CONFIG_X86_64 | ||
4408 | /* | ||
4409 | - * Help handler running on IST stack to switch off the IST stack if the | ||
4410 | - * interrupted code was in user mode. The actual stack switch is done in | ||
4411 | - * entry_64.S | ||
4412 | + * Help handler running on a per-cpu (IST or entry trampoline) stack | ||
4413 | + * to switch to the normal thread stack if the interrupted code was in | ||
4414 | + * user mode. The actual stack switch is done in entry_64.S | ||
4415 | */ | ||
4416 | asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) | ||
4417 | { | ||
4418 | - struct pt_regs *regs = task_pt_regs(current); | ||
4419 | - *regs = *eregs; | ||
4420 | + struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1; | ||
4421 | + if (regs != eregs) | ||
4422 | + *regs = *eregs; | ||
4423 | return regs; | ||
4424 | } | ||
4425 | NOKPROBE_SYMBOL(sync_regs); | ||
4426 | @@ -624,13 +643,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) | ||
4427 | /* | ||
4428 | * This is called from entry_64.S early in handling a fault | ||
4429 | * caused by a bad iret to user mode. To handle the fault | ||
4430 | - * correctly, we want move our stack frame to task_pt_regs | ||
4431 | - * and we want to pretend that the exception came from the | ||
4432 | - * iret target. | ||
4433 | + * correctly, we want to move our stack frame to where it would | ||
4434 | + * be had we entered directly on the entry stack (rather than | ||
4435 | + * just below the IRET frame) and we want to pretend that the | ||
4436 | + * exception came from the IRET target. | ||
4437 | */ | ||
4438 | struct bad_iret_stack *new_stack = | ||
4439 | - container_of(task_pt_regs(current), | ||
4440 | - struct bad_iret_stack, regs); | ||
4441 | + (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; | ||
4442 | |||
4443 | /* Copy the IRET target to the new stack. */ | ||
4444 | memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); | ||
4445 | @@ -795,14 +814,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) | ||
4446 | debug_stack_usage_dec(); | ||
4447 | |||
4448 | exit: | ||
4449 | -#if defined(CONFIG_X86_32) | ||
4450 | - /* | ||
4451 | - * This is the most likely code path that involves non-trivial use | ||
4452 | - * of the SYSENTER stack. Check that we haven't overrun it. | ||
4453 | - */ | ||
4454 | - WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC, | ||
4455 | - "Overran or corrupted SYSENTER stack\n"); | ||
4456 | -#endif | ||
4457 | ist_exit(regs); | ||
4458 | } | ||
4459 | NOKPROBE_SYMBOL(do_debug); | ||
4460 | @@ -929,6 +940,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | ||
4461 | |||
4462 | void __init trap_init(void) | ||
4463 | { | ||
4464 | + /* Init cpu_entry_area before IST entries are set up */ | ||
4465 | + setup_cpu_entry_areas(); | ||
4466 | + | ||
4467 | idt_setup_traps(); | ||
4468 | |||
4469 | /* | ||
4470 | diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c | ||
4471 | index a3f973b2c97a..be86a865087a 100644 | ||
4472 | --- a/arch/x86/kernel/unwind_orc.c | ||
4473 | +++ b/arch/x86/kernel/unwind_orc.c | ||
4474 | @@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) | ||
4475 | return NULL; | ||
4476 | } | ||
4477 | |||
4478 | -static bool stack_access_ok(struct unwind_state *state, unsigned long addr, | ||
4479 | +static bool stack_access_ok(struct unwind_state *state, unsigned long _addr, | ||
4480 | size_t len) | ||
4481 | { | ||
4482 | struct stack_info *info = &state->stack_info; | ||
4483 | + void *addr = (void *)_addr; | ||
4484 | |||
4485 | - /* | ||
4486 | - * If the address isn't on the current stack, switch to the next one. | ||
4487 | - * | ||
4488 | - * We may have to traverse multiple stacks to deal with the possibility | ||
4489 | - * that info->next_sp could point to an empty stack and the address | ||
4490 | - * could be on a subsequent stack. | ||
4491 | - */ | ||
4492 | - while (!on_stack(info, (void *)addr, len)) | ||
4493 | - if (get_stack_info(info->next_sp, state->task, info, | ||
4494 | - &state->stack_mask)) | ||
4495 | - return false; | ||
4496 | + if (!on_stack(info, addr, len) && | ||
4497 | + (get_stack_info(addr, state->task, info, &state->stack_mask))) | ||
4498 | + return false; | ||
4499 | |||
4500 | return true; | ||
4501 | } | ||
4502 | @@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr, | ||
4503 | return true; | ||
4504 | } | ||
4505 | |||
4506 | -#define REGS_SIZE (sizeof(struct pt_regs)) | ||
4507 | -#define SP_OFFSET (offsetof(struct pt_regs, sp)) | ||
4508 | -#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip)) | ||
4509 | -#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip)) | ||
4510 | - | ||
4511 | static bool deref_stack_regs(struct unwind_state *state, unsigned long addr, | ||
4512 | - unsigned long *ip, unsigned long *sp, bool full) | ||
4513 | + unsigned long *ip, unsigned long *sp) | ||
4514 | { | ||
4515 | - size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE; | ||
4516 | - size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET; | ||
4517 | - struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE); | ||
4518 | - | ||
4519 | - if (IS_ENABLED(CONFIG_X86_64)) { | ||
4520 | - if (!stack_access_ok(state, addr, regs_size)) | ||
4521 | - return false; | ||
4522 | + struct pt_regs *regs = (struct pt_regs *)addr; | ||
4523 | |||
4524 | - *ip = regs->ip; | ||
4525 | - *sp = regs->sp; | ||
4526 | + /* x86-32 support will be more complicated due to the ®s->sp hack */ | ||
4527 | + BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32)); | ||
4528 | |||
4529 | - return true; | ||
4530 | - } | ||
4531 | - | ||
4532 | - if (!stack_access_ok(state, addr, sp_offset)) | ||
4533 | + if (!stack_access_ok(state, addr, sizeof(struct pt_regs))) | ||
4534 | return false; | ||
4535 | |||
4536 | *ip = regs->ip; | ||
4537 | + *sp = regs->sp; | ||
4538 | + return true; | ||
4539 | +} | ||
4540 | |||
4541 | - if (user_mode(regs)) { | ||
4542 | - if (!stack_access_ok(state, addr + sp_offset, | ||
4543 | - REGS_SIZE - SP_OFFSET)) | ||
4544 | - return false; | ||
4545 | +static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr, | ||
4546 | + unsigned long *ip, unsigned long *sp) | ||
4547 | +{ | ||
4548 | + struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET; | ||
4549 | |||
4550 | - *sp = regs->sp; | ||
4551 | - } else | ||
4552 | - *sp = (unsigned long)®s->sp; | ||
4553 | + if (!stack_access_ok(state, addr, IRET_FRAME_SIZE)) | ||
4554 | + return false; | ||
4555 | |||
4556 | + *ip = regs->ip; | ||
4557 | + *sp = regs->sp; | ||
4558 | return true; | ||
4559 | } | ||
4560 | |||
4561 | @@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state) | ||
4562 | unsigned long ip_p, sp, orig_ip, prev_sp = state->sp; | ||
4563 | enum stack_type prev_type = state->stack_info.type; | ||
4564 | struct orc_entry *orc; | ||
4565 | - struct pt_regs *ptregs; | ||
4566 | bool indirect = false; | ||
4567 | |||
4568 | if (unwind_done(state)) | ||
4569 | @@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state) | ||
4570 | break; | ||
4571 | |||
4572 | case ORC_TYPE_REGS: | ||
4573 | - if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) { | ||
4574 | + if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) { | ||
4575 | orc_warn("can't dereference registers at %p for ip %pB\n", | ||
4576 | (void *)sp, (void *)orig_ip); | ||
4577 | goto done; | ||
4578 | @@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state) | ||
4579 | break; | ||
4580 | |||
4581 | case ORC_TYPE_REGS_IRET: | ||
4582 | - if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) { | ||
4583 | + if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) { | ||
4584 | orc_warn("can't dereference iret registers at %p for ip %pB\n", | ||
4585 | (void *)sp, (void *)orig_ip); | ||
4586 | goto done; | ||
4587 | } | ||
4588 | |||
4589 | - ptregs = container_of((void *)sp, struct pt_regs, ip); | ||
4590 | - if ((unsigned long)ptregs >= prev_sp && | ||
4591 | - on_stack(&state->stack_info, ptregs, REGS_SIZE)) { | ||
4592 | - state->regs = ptregs; | ||
4593 | - state->full_regs = false; | ||
4594 | - } else | ||
4595 | - state->regs = NULL; | ||
4596 | - | ||
4597 | + state->regs = (void *)sp - IRET_FRAME_OFFSET; | ||
4598 | + state->full_regs = false; | ||
4599 | state->signal = true; | ||
4600 | break; | ||
4601 | |||
4602 | @@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, | ||
4603 | } | ||
4604 | |||
4605 | if (get_stack_info((unsigned long *)state->sp, state->task, | ||
4606 | - &state->stack_info, &state->stack_mask)) | ||
4607 | - return; | ||
4608 | + &state->stack_info, &state->stack_mask)) { | ||
4609 | + /* | ||
4610 | + * We weren't on a valid stack. It's possible that | ||
4611 | + * we overflowed a valid stack into a guard page. | ||
4612 | + * See if the next page up is valid so that we can | ||
4613 | + * generate some kind of backtrace if this happens. | ||
4614 | + */ | ||
4615 | + void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp); | ||
4616 | + if (get_stack_info(next_page, state->task, &state->stack_info, | ||
4617 | + &state->stack_mask)) | ||
4618 | + return; | ||
4619 | + } | ||
4620 | |||
4621 | /* | ||
4622 | * The caller can provide the address of the first frame directly | ||
4623 | diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S | ||
4624 | index 014ea59aa153..3d3c2f71f617 100644 | ||
4625 | --- a/arch/x86/kernel/verify_cpu.S | ||
4626 | +++ b/arch/x86/kernel/verify_cpu.S | ||
4627 | @@ -33,7 +33,7 @@ | ||
4628 | #include <asm/cpufeatures.h> | ||
4629 | #include <asm/msr-index.h> | ||
4630 | |||
4631 | -verify_cpu: | ||
4632 | +ENTRY(verify_cpu) | ||
4633 | pushf # Save caller passed flags | ||
4634 | push $0 # Kill any dangerous flags | ||
4635 | popf | ||
4636 | @@ -139,3 +139,4 @@ verify_cpu: | ||
4637 | popf # Restore caller passed flags | ||
4638 | xorl %eax, %eax | ||
4639 | ret | ||
4640 | +ENDPROC(verify_cpu) | ||
4641 | diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c | ||
4642 | index 68244742ecb0..5edb27f1a2c4 100644 | ||
4643 | --- a/arch/x86/kernel/vm86_32.c | ||
4644 | +++ b/arch/x86/kernel/vm86_32.c | ||
4645 | @@ -55,6 +55,7 @@ | ||
4646 | #include <asm/irq.h> | ||
4647 | #include <asm/traps.h> | ||
4648 | #include <asm/vm86.h> | ||
4649 | +#include <asm/switch_to.h> | ||
4650 | |||
4651 | /* | ||
4652 | * Known problems: | ||
4653 | @@ -94,7 +95,6 @@ | ||
4654 | |||
4655 | void save_v86_state(struct kernel_vm86_regs *regs, int retval) | ||
4656 | { | ||
4657 | - struct tss_struct *tss; | ||
4658 | struct task_struct *tsk = current; | ||
4659 | struct vm86plus_struct __user *user; | ||
4660 | struct vm86 *vm86 = current->thread.vm86; | ||
4661 | @@ -146,12 +146,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) | ||
4662 | do_exit(SIGSEGV); | ||
4663 | } | ||
4664 | |||
4665 | - tss = &per_cpu(cpu_tss, get_cpu()); | ||
4666 | + preempt_disable(); | ||
4667 | tsk->thread.sp0 = vm86->saved_sp0; | ||
4668 | tsk->thread.sysenter_cs = __KERNEL_CS; | ||
4669 | - load_sp0(tss, &tsk->thread); | ||
4670 | + update_sp0(tsk); | ||
4671 | + refresh_sysenter_cs(&tsk->thread); | ||
4672 | vm86->saved_sp0 = 0; | ||
4673 | - put_cpu(); | ||
4674 | + preempt_enable(); | ||
4675 | |||
4676 | memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs)); | ||
4677 | |||
4678 | @@ -237,7 +238,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg) | ||
4679 | |||
4680 | static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) | ||
4681 | { | ||
4682 | - struct tss_struct *tss; | ||
4683 | struct task_struct *tsk = current; | ||
4684 | struct vm86 *vm86 = tsk->thread.vm86; | ||
4685 | struct kernel_vm86_regs vm86regs; | ||
4686 | @@ -365,15 +365,17 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) | ||
4687 | vm86->saved_sp0 = tsk->thread.sp0; | ||
4688 | lazy_save_gs(vm86->regs32.gs); | ||
4689 | |||
4690 | - tss = &per_cpu(cpu_tss, get_cpu()); | ||
4691 | /* make room for real-mode segments */ | ||
4692 | + preempt_disable(); | ||
4693 | tsk->thread.sp0 += 16; | ||
4694 | |||
4695 | - if (static_cpu_has(X86_FEATURE_SEP)) | ||
4696 | + if (static_cpu_has(X86_FEATURE_SEP)) { | ||
4697 | tsk->thread.sysenter_cs = 0; | ||
4698 | + refresh_sysenter_cs(&tsk->thread); | ||
4699 | + } | ||
4700 | |||
4701 | - load_sp0(tss, &tsk->thread); | ||
4702 | - put_cpu(); | ||
4703 | + update_sp0(tsk); | ||
4704 | + preempt_enable(); | ||
4705 | |||
4706 | if (vm86->flags & VM86_SCREEN_BITMAP) | ||
4707 | mark_screen_rdonly(tsk->mm); | ||
4708 | diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S | ||
4709 | index a4009fb9be87..d2a8b5a24a44 100644 | ||
4710 | --- a/arch/x86/kernel/vmlinux.lds.S | ||
4711 | +++ b/arch/x86/kernel/vmlinux.lds.S | ||
4712 | @@ -107,6 +107,15 @@ SECTIONS | ||
4713 | SOFTIRQENTRY_TEXT | ||
4714 | *(.fixup) | ||
4715 | *(.gnu.warning) | ||
4716 | + | ||
4717 | +#ifdef CONFIG_X86_64 | ||
4718 | + . = ALIGN(PAGE_SIZE); | ||
4719 | + _entry_trampoline = .; | ||
4720 | + *(.entry_trampoline) | ||
4721 | + . = ALIGN(PAGE_SIZE); | ||
4722 | + ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big"); | ||
4723 | +#endif | ||
4724 | + | ||
4725 | /* End of text section */ | ||
4726 | _etext = .; | ||
4727 | } :text = 0x9090 | ||
4728 | diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c | ||
4729 | index a088b2c47f73..5b2d10c1973a 100644 | ||
4730 | --- a/arch/x86/kernel/x86_init.c | ||
4731 | +++ b/arch/x86/kernel/x86_init.c | ||
4732 | @@ -28,6 +28,8 @@ void x86_init_noop(void) { } | ||
4733 | void __init x86_init_uint_noop(unsigned int unused) { } | ||
4734 | int __init iommu_init_noop(void) { return 0; } | ||
4735 | void iommu_shutdown_noop(void) { } | ||
4736 | +bool __init bool_x86_init_noop(void) { return false; } | ||
4737 | +void x86_op_int_noop(int cpu) { } | ||
4738 | |||
4739 | /* | ||
4740 | * The platform setup functions are preset with the default functions | ||
4741 | @@ -81,6 +83,12 @@ struct x86_init_ops x86_init __initdata = { | ||
4742 | .init_irq = x86_default_pci_init_irq, | ||
4743 | .fixup_irqs = x86_default_pci_fixup_irqs, | ||
4744 | }, | ||
4745 | + | ||
4746 | + .hyper = { | ||
4747 | + .init_platform = x86_init_noop, | ||
4748 | + .x2apic_available = bool_x86_init_noop, | ||
4749 | + .init_mem_mapping = x86_init_noop, | ||
4750 | + }, | ||
4751 | }; | ||
4752 | |||
4753 | struct x86_cpuinit_ops x86_cpuinit = { | ||
4754 | @@ -101,6 +109,7 @@ struct x86_platform_ops x86_platform __ro_after_init = { | ||
4755 | .get_nmi_reason = default_get_nmi_reason, | ||
4756 | .save_sched_clock_state = tsc_save_sched_clock_state, | ||
4757 | .restore_sched_clock_state = tsc_restore_sched_clock_state, | ||
4758 | + .hyper.pin_vcpu = x86_op_int_noop, | ||
4759 | }; | ||
4760 | |||
4761 | EXPORT_SYMBOL_GPL(x86_platform); | ||
4762 | diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c | ||
4763 | index 7a69cf053711..13ebeedcec07 100644 | ||
4764 | --- a/arch/x86/kvm/mmu.c | ||
4765 | +++ b/arch/x86/kvm/mmu.c | ||
4766 | @@ -5476,13 +5476,13 @@ int kvm_mmu_module_init(void) | ||
4767 | |||
4768 | pte_list_desc_cache = kmem_cache_create("pte_list_desc", | ||
4769 | sizeof(struct pte_list_desc), | ||
4770 | - 0, 0, NULL); | ||
4771 | + 0, SLAB_ACCOUNT, NULL); | ||
4772 | if (!pte_list_desc_cache) | ||
4773 | goto nomem; | ||
4774 | |||
4775 | mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", | ||
4776 | sizeof(struct kvm_mmu_page), | ||
4777 | - 0, 0, NULL); | ||
4778 | + 0, SLAB_ACCOUNT, NULL); | ||
4779 | if (!mmu_page_header_cache) | ||
4780 | goto nomem; | ||
4781 | |||
4782 | diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c | ||
4783 | index bc5921c1e2f2..47d9432756f3 100644 | ||
4784 | --- a/arch/x86/kvm/vmx.c | ||
4785 | +++ b/arch/x86/kvm/vmx.c | ||
4786 | @@ -2295,7 +2295,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | ||
4787 | * processors. See 22.2.4. | ||
4788 | */ | ||
4789 | vmcs_writel(HOST_TR_BASE, | ||
4790 | - (unsigned long)this_cpu_ptr(&cpu_tss)); | ||
4791 | + (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss); | ||
4792 | vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ | ||
4793 | |||
4794 | /* | ||
4795 | diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c | ||
4796 | index 553f8fd23cc4..4846eff7e4c8 100644 | ||
4797 | --- a/arch/x86/lib/delay.c | ||
4798 | +++ b/arch/x86/lib/delay.c | ||
4799 | @@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops) | ||
4800 | delay = min_t(u64, MWAITX_MAX_LOOPS, loops); | ||
4801 | |||
4802 | /* | ||
4803 | - * Use cpu_tss as a cacheline-aligned, seldomly | ||
4804 | + * Use cpu_tss_rw as a cacheline-aligned, seldomly | ||
4805 | * accessed per-cpu variable as the monitor target. | ||
4806 | */ | ||
4807 | - __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0); | ||
4808 | + __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0); | ||
4809 | |||
4810 | /* | ||
4811 | * AMD, like Intel, supports the EAX hint and EAX=0xf | ||
4812 | diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c | ||
4813 | index b0ff378650a9..3109ba6c6ede 100644 | ||
4814 | --- a/arch/x86/mm/fault.c | ||
4815 | +++ b/arch/x86/mm/fault.c | ||
4816 | @@ -29,26 +29,6 @@ | ||
4817 | #define CREATE_TRACE_POINTS | ||
4818 | #include <asm/trace/exceptions.h> | ||
4819 | |||
4820 | -/* | ||
4821 | - * Page fault error code bits: | ||
4822 | - * | ||
4823 | - * bit 0 == 0: no page found 1: protection fault | ||
4824 | - * bit 1 == 0: read access 1: write access | ||
4825 | - * bit 2 == 0: kernel-mode access 1: user-mode access | ||
4826 | - * bit 3 == 1: use of reserved bit detected | ||
4827 | - * bit 4 == 1: fault was an instruction fetch | ||
4828 | - * bit 5 == 1: protection keys block access | ||
4829 | - */ | ||
4830 | -enum x86_pf_error_code { | ||
4831 | - | ||
4832 | - PF_PROT = 1 << 0, | ||
4833 | - PF_WRITE = 1 << 1, | ||
4834 | - PF_USER = 1 << 2, | ||
4835 | - PF_RSVD = 1 << 3, | ||
4836 | - PF_INSTR = 1 << 4, | ||
4837 | - PF_PK = 1 << 5, | ||
4838 | -}; | ||
4839 | - | ||
4840 | /* | ||
4841 | * Returns 0 if mmiotrace is disabled, or if the fault is not | ||
4842 | * handled by mmiotrace: | ||
4843 | @@ -150,7 +130,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) | ||
4844 | * If it was a exec (instruction fetch) fault on NX page, then | ||
4845 | * do not ignore the fault: | ||
4846 | */ | ||
4847 | - if (error_code & PF_INSTR) | ||
4848 | + if (error_code & X86_PF_INSTR) | ||
4849 | return 0; | ||
4850 | |||
4851 | instr = (void *)convert_ip_to_linear(current, regs); | ||
4852 | @@ -180,7 +160,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) | ||
4853 | * siginfo so userspace can discover which protection key was set | ||
4854 | * on the PTE. | ||
4855 | * | ||
4856 | - * If we get here, we know that the hardware signaled a PF_PK | ||
4857 | + * If we get here, we know that the hardware signaled a X86_PF_PK | ||
4858 | * fault and that there was a VMA once we got in the fault | ||
4859 | * handler. It does *not* guarantee that the VMA we find here | ||
4860 | * was the one that we faulted on. | ||
4861 | @@ -205,7 +185,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey) | ||
4862 | /* | ||
4863 | * force_sig_info_fault() is called from a number of | ||
4864 | * contexts, some of which have a VMA and some of which | ||
4865 | - * do not. The PF_PK handing happens after we have a | ||
4866 | + * do not. The X86_PF_PK handing happens after we have a | ||
4867 | * valid VMA, so we should never reach this without a | ||
4868 | * valid VMA. | ||
4869 | */ | ||
4870 | @@ -698,7 +678,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, | ||
4871 | if (!oops_may_print()) | ||
4872 | return; | ||
4873 | |||
4874 | - if (error_code & PF_INSTR) { | ||
4875 | + if (error_code & X86_PF_INSTR) { | ||
4876 | unsigned int level; | ||
4877 | pgd_t *pgd; | ||
4878 | pte_t *pte; | ||
4879 | @@ -780,7 +760,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, | ||
4880 | */ | ||
4881 | if (current->thread.sig_on_uaccess_err && signal) { | ||
4882 | tsk->thread.trap_nr = X86_TRAP_PF; | ||
4883 | - tsk->thread.error_code = error_code | PF_USER; | ||
4884 | + tsk->thread.error_code = error_code | X86_PF_USER; | ||
4885 | tsk->thread.cr2 = address; | ||
4886 | |||
4887 | /* XXX: hwpoison faults will set the wrong code. */ | ||
4888 | @@ -898,7 +878,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, | ||
4889 | struct task_struct *tsk = current; | ||
4890 | |||
4891 | /* User mode accesses just cause a SIGSEGV */ | ||
4892 | - if (error_code & PF_USER) { | ||
4893 | + if (error_code & X86_PF_USER) { | ||
4894 | /* | ||
4895 | * It's possible to have interrupts off here: | ||
4896 | */ | ||
4897 | @@ -919,7 +899,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, | ||
4898 | * Instruction fetch faults in the vsyscall page might need | ||
4899 | * emulation. | ||
4900 | */ | ||
4901 | - if (unlikely((error_code & PF_INSTR) && | ||
4902 | + if (unlikely((error_code & X86_PF_INSTR) && | ||
4903 | ((address & ~0xfff) == VSYSCALL_ADDR))) { | ||
4904 | if (emulate_vsyscall(regs, address)) | ||
4905 | return; | ||
4906 | @@ -932,7 +912,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, | ||
4907 | * are always protection faults. | ||
4908 | */ | ||
4909 | if (address >= TASK_SIZE_MAX) | ||
4910 | - error_code |= PF_PROT; | ||
4911 | + error_code |= X86_PF_PROT; | ||
4912 | |||
4913 | if (likely(show_unhandled_signals)) | ||
4914 | show_signal_msg(regs, error_code, address, tsk); | ||
4915 | @@ -993,11 +973,11 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code, | ||
4916 | |||
4917 | if (!boot_cpu_has(X86_FEATURE_OSPKE)) | ||
4918 | return false; | ||
4919 | - if (error_code & PF_PK) | ||
4920 | + if (error_code & X86_PF_PK) | ||
4921 | return true; | ||
4922 | /* this checks permission keys on the VMA: */ | ||
4923 | - if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), | ||
4924 | - (error_code & PF_INSTR), foreign)) | ||
4925 | + if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE), | ||
4926 | + (error_code & X86_PF_INSTR), foreign)) | ||
4927 | return true; | ||
4928 | return false; | ||
4929 | } | ||
4930 | @@ -1025,7 +1005,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, | ||
4931 | int code = BUS_ADRERR; | ||
4932 | |||
4933 | /* Kernel mode? Handle exceptions or die: */ | ||
4934 | - if (!(error_code & PF_USER)) { | ||
4935 | + if (!(error_code & X86_PF_USER)) { | ||
4936 | no_context(regs, error_code, address, SIGBUS, BUS_ADRERR); | ||
4937 | return; | ||
4938 | } | ||
4939 | @@ -1053,14 +1033,14 @@ static noinline void | ||
4940 | mm_fault_error(struct pt_regs *regs, unsigned long error_code, | ||
4941 | unsigned long address, u32 *pkey, unsigned int fault) | ||
4942 | { | ||
4943 | - if (fatal_signal_pending(current) && !(error_code & PF_USER)) { | ||
4944 | + if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) { | ||
4945 | no_context(regs, error_code, address, 0, 0); | ||
4946 | return; | ||
4947 | } | ||
4948 | |||
4949 | if (fault & VM_FAULT_OOM) { | ||
4950 | /* Kernel mode? Handle exceptions or die: */ | ||
4951 | - if (!(error_code & PF_USER)) { | ||
4952 | + if (!(error_code & X86_PF_USER)) { | ||
4953 | no_context(regs, error_code, address, | ||
4954 | SIGSEGV, SEGV_MAPERR); | ||
4955 | return; | ||
4956 | @@ -1085,16 +1065,16 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, | ||
4957 | |||
4958 | static int spurious_fault_check(unsigned long error_code, pte_t *pte) | ||
4959 | { | ||
4960 | - if ((error_code & PF_WRITE) && !pte_write(*pte)) | ||
4961 | + if ((error_code & X86_PF_WRITE) && !pte_write(*pte)) | ||
4962 | return 0; | ||
4963 | |||
4964 | - if ((error_code & PF_INSTR) && !pte_exec(*pte)) | ||
4965 | + if ((error_code & X86_PF_INSTR) && !pte_exec(*pte)) | ||
4966 | return 0; | ||
4967 | /* | ||
4968 | * Note: We do not do lazy flushing on protection key | ||
4969 | - * changes, so no spurious fault will ever set PF_PK. | ||
4970 | + * changes, so no spurious fault will ever set X86_PF_PK. | ||
4971 | */ | ||
4972 | - if ((error_code & PF_PK)) | ||
4973 | + if ((error_code & X86_PF_PK)) | ||
4974 | return 1; | ||
4975 | |||
4976 | return 1; | ||
4977 | @@ -1140,8 +1120,8 @@ spurious_fault(unsigned long error_code, unsigned long address) | ||
4978 | * change, so user accesses are not expected to cause spurious | ||
4979 | * faults. | ||
4980 | */ | ||
4981 | - if (error_code != (PF_WRITE | PF_PROT) | ||
4982 | - && error_code != (PF_INSTR | PF_PROT)) | ||
4983 | + if (error_code != (X86_PF_WRITE | X86_PF_PROT) && | ||
4984 | + error_code != (X86_PF_INSTR | X86_PF_PROT)) | ||
4985 | return 0; | ||
4986 | |||
4987 | pgd = init_mm.pgd + pgd_index(address); | ||
4988 | @@ -1201,19 +1181,19 @@ access_error(unsigned long error_code, struct vm_area_struct *vma) | ||
4989 | * always an unconditional error and can never result in | ||
4990 | * a follow-up action to resolve the fault, like a COW. | ||
4991 | */ | ||
4992 | - if (error_code & PF_PK) | ||
4993 | + if (error_code & X86_PF_PK) | ||
4994 | return 1; | ||
4995 | |||
4996 | /* | ||
4997 | * Make sure to check the VMA so that we do not perform | ||
4998 | - * faults just to hit a PF_PK as soon as we fill in a | ||
4999 | + * faults just to hit a X86_PF_PK as soon as we fill in a | ||
5000 | * page. | ||
5001 | */ | ||
5002 | - if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), | ||
5003 | - (error_code & PF_INSTR), foreign)) | ||
5004 | + if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE), | ||
5005 | + (error_code & X86_PF_INSTR), foreign)) | ||
5006 | return 1; | ||
5007 | |||
5008 | - if (error_code & PF_WRITE) { | ||
5009 | + if (error_code & X86_PF_WRITE) { | ||
5010 | /* write, present and write, not present: */ | ||
5011 | if (unlikely(!(vma->vm_flags & VM_WRITE))) | ||
5012 | return 1; | ||
5013 | @@ -1221,7 +1201,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma) | ||
5014 | } | ||
5015 | |||
5016 | /* read, present: */ | ||
5017 | - if (unlikely(error_code & PF_PROT)) | ||
5018 | + if (unlikely(error_code & X86_PF_PROT)) | ||
5019 | return 1; | ||
5020 | |||
5021 | /* read, not present: */ | ||
5022 | @@ -1244,7 +1224,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) | ||
5023 | if (!static_cpu_has(X86_FEATURE_SMAP)) | ||
5024 | return false; | ||
5025 | |||
5026 | - if (error_code & PF_USER) | ||
5027 | + if (error_code & X86_PF_USER) | ||
5028 | return false; | ||
5029 | |||
5030 | if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC)) | ||
5031 | @@ -1297,7 +1277,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, | ||
5032 | * protection error (error_code & 9) == 0. | ||
5033 | */ | ||
5034 | if (unlikely(fault_in_kernel_space(address))) { | ||
5035 | - if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) { | ||
5036 | + if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { | ||
5037 | if (vmalloc_fault(address) >= 0) | ||
5038 | return; | ||
5039 | |||
5040 | @@ -1325,7 +1305,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, | ||
5041 | if (unlikely(kprobes_fault(regs))) | ||
5042 | return; | ||
5043 | |||
5044 | - if (unlikely(error_code & PF_RSVD)) | ||
5045 | + if (unlikely(error_code & X86_PF_RSVD)) | ||
5046 | pgtable_bad(regs, error_code, address); | ||
5047 | |||
5048 | if (unlikely(smap_violation(error_code, regs))) { | ||
5049 | @@ -1351,7 +1331,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, | ||
5050 | */ | ||
5051 | if (user_mode(regs)) { | ||
5052 | local_irq_enable(); | ||
5053 | - error_code |= PF_USER; | ||
5054 | + error_code |= X86_PF_USER; | ||
5055 | flags |= FAULT_FLAG_USER; | ||
5056 | } else { | ||
5057 | if (regs->flags & X86_EFLAGS_IF) | ||
5058 | @@ -1360,9 +1340,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, | ||
5059 | |||
5060 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); | ||
5061 | |||
5062 | - if (error_code & PF_WRITE) | ||
5063 | + if (error_code & X86_PF_WRITE) | ||
5064 | flags |= FAULT_FLAG_WRITE; | ||
5065 | - if (error_code & PF_INSTR) | ||
5066 | + if (error_code & X86_PF_INSTR) | ||
5067 | flags |= FAULT_FLAG_INSTRUCTION; | ||
5068 | |||
5069 | /* | ||
5070 | @@ -1382,7 +1362,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, | ||
5071 | * space check, thus avoiding the deadlock: | ||
5072 | */ | ||
5073 | if (unlikely(!down_read_trylock(&mm->mmap_sem))) { | ||
5074 | - if ((error_code & PF_USER) == 0 && | ||
5075 | + if (!(error_code & X86_PF_USER) && | ||
5076 | !search_exception_tables(regs->ip)) { | ||
5077 | bad_area_nosemaphore(regs, error_code, address, NULL); | ||
5078 | return; | ||
5079 | @@ -1409,7 +1389,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, | ||
5080 | bad_area(regs, error_code, address); | ||
5081 | return; | ||
5082 | } | ||
5083 | - if (error_code & PF_USER) { | ||
5084 | + if (error_code & X86_PF_USER) { | ||
5085 | /* | ||
5086 | * Accessing the stack below %sp is always a bug. | ||
5087 | * The large cushion allows instructions like enter | ||
5088 | diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c | ||
5089 | index af5c1ed21d43..a22c2b95e513 100644 | ||
5090 | --- a/arch/x86/mm/init.c | ||
5091 | +++ b/arch/x86/mm/init.c | ||
5092 | @@ -671,7 +671,7 @@ void __init init_mem_mapping(void) | ||
5093 | load_cr3(swapper_pg_dir); | ||
5094 | __flush_tlb_all(); | ||
5095 | |||
5096 | - hypervisor_init_mem_mapping(); | ||
5097 | + x86_init.hyper.init_mem_mapping(); | ||
5098 | |||
5099 | early_memtest(0, max_pfn_mapped << PAGE_SHIFT); | ||
5100 | } | ||
5101 | diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c | ||
5102 | index 048fbe8fc274..adcea90a2046 100644 | ||
5103 | --- a/arch/x86/mm/init_64.c | ||
5104 | +++ b/arch/x86/mm/init_64.c | ||
5105 | @@ -1426,16 +1426,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) | ||
5106 | |||
5107 | #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE) | ||
5108 | void register_page_bootmem_memmap(unsigned long section_nr, | ||
5109 | - struct page *start_page, unsigned long size) | ||
5110 | + struct page *start_page, unsigned long nr_pages) | ||
5111 | { | ||
5112 | unsigned long addr = (unsigned long)start_page; | ||
5113 | - unsigned long end = (unsigned long)(start_page + size); | ||
5114 | + unsigned long end = (unsigned long)(start_page + nr_pages); | ||
5115 | unsigned long next; | ||
5116 | pgd_t *pgd; | ||
5117 | p4d_t *p4d; | ||
5118 | pud_t *pud; | ||
5119 | pmd_t *pmd; | ||
5120 | - unsigned int nr_pages; | ||
5121 | + unsigned int nr_pmd_pages; | ||
5122 | struct page *page; | ||
5123 | |||
5124 | for (; addr < end; addr = next) { | ||
5125 | @@ -1482,9 +1482,9 @@ void register_page_bootmem_memmap(unsigned long section_nr, | ||
5126 | if (pmd_none(*pmd)) | ||
5127 | continue; | ||
5128 | |||
5129 | - nr_pages = 1 << (get_order(PMD_SIZE)); | ||
5130 | + nr_pmd_pages = 1 << get_order(PMD_SIZE); | ||
5131 | page = pmd_page(*pmd); | ||
5132 | - while (nr_pages--) | ||
5133 | + while (nr_pmd_pages--) | ||
5134 | get_page_bootmem(section_nr, page++, | ||
5135 | SECTION_INFO); | ||
5136 | } | ||
5137 | diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c | ||
5138 | index 8f5be3eb40dd..9ec70d780f1f 100644 | ||
5139 | --- a/arch/x86/mm/kasan_init_64.c | ||
5140 | +++ b/arch/x86/mm/kasan_init_64.c | ||
5141 | @@ -4,19 +4,150 @@ | ||
5142 | #include <linux/bootmem.h> | ||
5143 | #include <linux/kasan.h> | ||
5144 | #include <linux/kdebug.h> | ||
5145 | +#include <linux/memblock.h> | ||
5146 | #include <linux/mm.h> | ||
5147 | #include <linux/sched.h> | ||
5148 | #include <linux/sched/task.h> | ||
5149 | #include <linux/vmalloc.h> | ||
5150 | |||
5151 | #include <asm/e820/types.h> | ||
5152 | +#include <asm/pgalloc.h> | ||
5153 | #include <asm/tlbflush.h> | ||
5154 | #include <asm/sections.h> | ||
5155 | #include <asm/pgtable.h> | ||
5156 | |||
5157 | extern struct range pfn_mapped[E820_MAX_ENTRIES]; | ||
5158 | |||
5159 | -static int __init map_range(struct range *range) | ||
5160 | +static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); | ||
5161 | + | ||
5162 | +static __init void *early_alloc(size_t size, int nid) | ||
5163 | +{ | ||
5164 | + return memblock_virt_alloc_try_nid_nopanic(size, size, | ||
5165 | + __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); | ||
5166 | +} | ||
5167 | + | ||
5168 | +static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, | ||
5169 | + unsigned long end, int nid) | ||
5170 | +{ | ||
5171 | + pte_t *pte; | ||
5172 | + | ||
5173 | + if (pmd_none(*pmd)) { | ||
5174 | + void *p; | ||
5175 | + | ||
5176 | + if (boot_cpu_has(X86_FEATURE_PSE) && | ||
5177 | + ((end - addr) == PMD_SIZE) && | ||
5178 | + IS_ALIGNED(addr, PMD_SIZE)) { | ||
5179 | + p = early_alloc(PMD_SIZE, nid); | ||
5180 | + if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL)) | ||
5181 | + return; | ||
5182 | + else if (p) | ||
5183 | + memblock_free(__pa(p), PMD_SIZE); | ||
5184 | + } | ||
5185 | + | ||
5186 | + p = early_alloc(PAGE_SIZE, nid); | ||
5187 | + pmd_populate_kernel(&init_mm, pmd, p); | ||
5188 | + } | ||
5189 | + | ||
5190 | + pte = pte_offset_kernel(pmd, addr); | ||
5191 | + do { | ||
5192 | + pte_t entry; | ||
5193 | + void *p; | ||
5194 | + | ||
5195 | + if (!pte_none(*pte)) | ||
5196 | + continue; | ||
5197 | + | ||
5198 | + p = early_alloc(PAGE_SIZE, nid); | ||
5199 | + entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL); | ||
5200 | + set_pte_at(&init_mm, addr, pte, entry); | ||
5201 | + } while (pte++, addr += PAGE_SIZE, addr != end); | ||
5202 | +} | ||
5203 | + | ||
5204 | +static void __init kasan_populate_pud(pud_t *pud, unsigned long addr, | ||
5205 | + unsigned long end, int nid) | ||
5206 | +{ | ||
5207 | + pmd_t *pmd; | ||
5208 | + unsigned long next; | ||
5209 | + | ||
5210 | + if (pud_none(*pud)) { | ||
5211 | + void *p; | ||
5212 | + | ||
5213 | + if (boot_cpu_has(X86_FEATURE_GBPAGES) && | ||
5214 | + ((end - addr) == PUD_SIZE) && | ||
5215 | + IS_ALIGNED(addr, PUD_SIZE)) { | ||
5216 | + p = early_alloc(PUD_SIZE, nid); | ||
5217 | + if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL)) | ||
5218 | + return; | ||
5219 | + else if (p) | ||
5220 | + memblock_free(__pa(p), PUD_SIZE); | ||
5221 | + } | ||
5222 | + | ||
5223 | + p = early_alloc(PAGE_SIZE, nid); | ||
5224 | + pud_populate(&init_mm, pud, p); | ||
5225 | + } | ||
5226 | + | ||
5227 | + pmd = pmd_offset(pud, addr); | ||
5228 | + do { | ||
5229 | + next = pmd_addr_end(addr, end); | ||
5230 | + if (!pmd_large(*pmd)) | ||
5231 | + kasan_populate_pmd(pmd, addr, next, nid); | ||
5232 | + } while (pmd++, addr = next, addr != end); | ||
5233 | +} | ||
5234 | + | ||
5235 | +static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr, | ||
5236 | + unsigned long end, int nid) | ||
5237 | +{ | ||
5238 | + pud_t *pud; | ||
5239 | + unsigned long next; | ||
5240 | + | ||
5241 | + if (p4d_none(*p4d)) { | ||
5242 | + void *p = early_alloc(PAGE_SIZE, nid); | ||
5243 | + | ||
5244 | + p4d_populate(&init_mm, p4d, p); | ||
5245 | + } | ||
5246 | + | ||
5247 | + pud = pud_offset(p4d, addr); | ||
5248 | + do { | ||
5249 | + next = pud_addr_end(addr, end); | ||
5250 | + if (!pud_large(*pud)) | ||
5251 | + kasan_populate_pud(pud, addr, next, nid); | ||
5252 | + } while (pud++, addr = next, addr != end); | ||
5253 | +} | ||
5254 | + | ||
5255 | +static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr, | ||
5256 | + unsigned long end, int nid) | ||
5257 | +{ | ||
5258 | + void *p; | ||
5259 | + p4d_t *p4d; | ||
5260 | + unsigned long next; | ||
5261 | + | ||
5262 | + if (pgd_none(*pgd)) { | ||
5263 | + p = early_alloc(PAGE_SIZE, nid); | ||
5264 | + pgd_populate(&init_mm, pgd, p); | ||
5265 | + } | ||
5266 | + | ||
5267 | + p4d = p4d_offset(pgd, addr); | ||
5268 | + do { | ||
5269 | + next = p4d_addr_end(addr, end); | ||
5270 | + kasan_populate_p4d(p4d, addr, next, nid); | ||
5271 | + } while (p4d++, addr = next, addr != end); | ||
5272 | +} | ||
5273 | + | ||
5274 | +static void __init kasan_populate_shadow(unsigned long addr, unsigned long end, | ||
5275 | + int nid) | ||
5276 | +{ | ||
5277 | + pgd_t *pgd; | ||
5278 | + unsigned long next; | ||
5279 | + | ||
5280 | + addr = addr & PAGE_MASK; | ||
5281 | + end = round_up(end, PAGE_SIZE); | ||
5282 | + pgd = pgd_offset_k(addr); | ||
5283 | + do { | ||
5284 | + next = pgd_addr_end(addr, end); | ||
5285 | + kasan_populate_pgd(pgd, addr, next, nid); | ||
5286 | + } while (pgd++, addr = next, addr != end); | ||
5287 | +} | ||
5288 | + | ||
5289 | +static void __init map_range(struct range *range) | ||
5290 | { | ||
5291 | unsigned long start; | ||
5292 | unsigned long end; | ||
5293 | @@ -24,15 +155,17 @@ static int __init map_range(struct range *range) | ||
5294 | start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start)); | ||
5295 | end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end)); | ||
5296 | |||
5297 | - return vmemmap_populate(start, end, NUMA_NO_NODE); | ||
5298 | + kasan_populate_shadow(start, end, early_pfn_to_nid(range->start)); | ||
5299 | } | ||
5300 | |||
5301 | static void __init clear_pgds(unsigned long start, | ||
5302 | unsigned long end) | ||
5303 | { | ||
5304 | pgd_t *pgd; | ||
5305 | + /* See comment in kasan_init() */ | ||
5306 | + unsigned long pgd_end = end & PGDIR_MASK; | ||
5307 | |||
5308 | - for (; start < end; start += PGDIR_SIZE) { | ||
5309 | + for (; start < pgd_end; start += PGDIR_SIZE) { | ||
5310 | pgd = pgd_offset_k(start); | ||
5311 | /* | ||
5312 | * With folded p4d, pgd_clear() is nop, use p4d_clear() | ||
5313 | @@ -43,29 +176,61 @@ static void __init clear_pgds(unsigned long start, | ||
5314 | else | ||
5315 | pgd_clear(pgd); | ||
5316 | } | ||
5317 | + | ||
5318 | + pgd = pgd_offset_k(start); | ||
5319 | + for (; start < end; start += P4D_SIZE) | ||
5320 | + p4d_clear(p4d_offset(pgd, start)); | ||
5321 | +} | ||
5322 | + | ||
5323 | +static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr) | ||
5324 | +{ | ||
5325 | + unsigned long p4d; | ||
5326 | + | ||
5327 | + if (!IS_ENABLED(CONFIG_X86_5LEVEL)) | ||
5328 | + return (p4d_t *)pgd; | ||
5329 | + | ||
5330 | + p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK; | ||
5331 | + p4d += __START_KERNEL_map - phys_base; | ||
5332 | + return (p4d_t *)p4d + p4d_index(addr); | ||
5333 | +} | ||
5334 | + | ||
5335 | +static void __init kasan_early_p4d_populate(pgd_t *pgd, | ||
5336 | + unsigned long addr, | ||
5337 | + unsigned long end) | ||
5338 | +{ | ||
5339 | + pgd_t pgd_entry; | ||
5340 | + p4d_t *p4d, p4d_entry; | ||
5341 | + unsigned long next; | ||
5342 | + | ||
5343 | + if (pgd_none(*pgd)) { | ||
5344 | + pgd_entry = __pgd(_KERNPG_TABLE | __pa_nodebug(kasan_zero_p4d)); | ||
5345 | + set_pgd(pgd, pgd_entry); | ||
5346 | + } | ||
5347 | + | ||
5348 | + p4d = early_p4d_offset(pgd, addr); | ||
5349 | + do { | ||
5350 | + next = p4d_addr_end(addr, end); | ||
5351 | + | ||
5352 | + if (!p4d_none(*p4d)) | ||
5353 | + continue; | ||
5354 | + | ||
5355 | + p4d_entry = __p4d(_KERNPG_TABLE | __pa_nodebug(kasan_zero_pud)); | ||
5356 | + set_p4d(p4d, p4d_entry); | ||
5357 | + } while (p4d++, addr = next, addr != end && p4d_none(*p4d)); | ||
5358 | } | ||
5359 | |||
5360 | static void __init kasan_map_early_shadow(pgd_t *pgd) | ||
5361 | { | ||
5362 | - int i; | ||
5363 | - unsigned long start = KASAN_SHADOW_START; | ||
5364 | + /* See comment in kasan_init() */ | ||
5365 | + unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK; | ||
5366 | unsigned long end = KASAN_SHADOW_END; | ||
5367 | + unsigned long next; | ||
5368 | |||
5369 | - for (i = pgd_index(start); start < end; i++) { | ||
5370 | - switch (CONFIG_PGTABLE_LEVELS) { | ||
5371 | - case 4: | ||
5372 | - pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) | | ||
5373 | - _KERNPG_TABLE); | ||
5374 | - break; | ||
5375 | - case 5: | ||
5376 | - pgd[i] = __pgd(__pa_nodebug(kasan_zero_p4d) | | ||
5377 | - _KERNPG_TABLE); | ||
5378 | - break; | ||
5379 | - default: | ||
5380 | - BUILD_BUG(); | ||
5381 | - } | ||
5382 | - start += PGDIR_SIZE; | ||
5383 | - } | ||
5384 | + pgd += pgd_index(addr); | ||
5385 | + do { | ||
5386 | + next = pgd_addr_end(addr, end); | ||
5387 | + kasan_early_p4d_populate(pgd, addr, next); | ||
5388 | + } while (pgd++, addr = next, addr != end); | ||
5389 | } | ||
5390 | |||
5391 | #ifdef CONFIG_KASAN_INLINE | ||
5392 | @@ -102,7 +267,7 @@ void __init kasan_early_init(void) | ||
5393 | for (i = 0; i < PTRS_PER_PUD; i++) | ||
5394 | kasan_zero_pud[i] = __pud(pud_val); | ||
5395 | |||
5396 | - for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++) | ||
5397 | + for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++) | ||
5398 | kasan_zero_p4d[i] = __p4d(p4d_val); | ||
5399 | |||
5400 | kasan_map_early_shadow(early_top_pgt); | ||
5401 | @@ -112,37 +277,76 @@ void __init kasan_early_init(void) | ||
5402 | void __init kasan_init(void) | ||
5403 | { | ||
5404 | int i; | ||
5405 | + void *shadow_cpu_entry_begin, *shadow_cpu_entry_end; | ||
5406 | |||
5407 | #ifdef CONFIG_KASAN_INLINE | ||
5408 | register_die_notifier(&kasan_die_notifier); | ||
5409 | #endif | ||
5410 | |||
5411 | memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); | ||
5412 | + | ||
5413 | + /* | ||
5414 | + * We use the same shadow offset for 4- and 5-level paging to | ||
5415 | + * facilitate boot-time switching between paging modes. | ||
5416 | + * As result in 5-level paging mode KASAN_SHADOW_START and | ||
5417 | + * KASAN_SHADOW_END are not aligned to PGD boundary. | ||
5418 | + * | ||
5419 | + * KASAN_SHADOW_START doesn't share PGD with anything else. | ||
5420 | + * We claim whole PGD entry to make things easier. | ||
5421 | + * | ||
5422 | + * KASAN_SHADOW_END lands in the last PGD entry and it collides with | ||
5423 | + * bunch of things like kernel code, modules, EFI mapping, etc. | ||
5424 | + * We need to take extra steps to not overwrite them. | ||
5425 | + */ | ||
5426 | + if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | ||
5427 | + void *ptr; | ||
5428 | + | ||
5429 | + ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END)); | ||
5430 | + memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table)); | ||
5431 | + set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)], | ||
5432 | + __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE)); | ||
5433 | + } | ||
5434 | + | ||
5435 | load_cr3(early_top_pgt); | ||
5436 | __flush_tlb_all(); | ||
5437 | |||
5438 | - clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); | ||
5439 | + clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END); | ||
5440 | |||
5441 | - kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, | ||
5442 | + kasan_populate_zero_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK), | ||
5443 | kasan_mem_to_shadow((void *)PAGE_OFFSET)); | ||
5444 | |||
5445 | for (i = 0; i < E820_MAX_ENTRIES; i++) { | ||
5446 | if (pfn_mapped[i].end == 0) | ||
5447 | break; | ||
5448 | |||
5449 | - if (map_range(&pfn_mapped[i])) | ||
5450 | - panic("kasan: unable to allocate shadow!"); | ||
5451 | + map_range(&pfn_mapped[i]); | ||
5452 | } | ||
5453 | + | ||
5454 | kasan_populate_zero_shadow( | ||
5455 | kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), | ||
5456 | kasan_mem_to_shadow((void *)__START_KERNEL_map)); | ||
5457 | |||
5458 | - vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext), | ||
5459 | - (unsigned long)kasan_mem_to_shadow(_end), | ||
5460 | - NUMA_NO_NODE); | ||
5461 | + kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), | ||
5462 | + (unsigned long)kasan_mem_to_shadow(_end), | ||
5463 | + early_pfn_to_nid(__pa(_stext))); | ||
5464 | + | ||
5465 | + shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM); | ||
5466 | + shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin); | ||
5467 | + shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin, | ||
5468 | + PAGE_SIZE); | ||
5469 | + | ||
5470 | + shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE); | ||
5471 | + shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end); | ||
5472 | + shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end, | ||
5473 | + PAGE_SIZE); | ||
5474 | |||
5475 | kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), | ||
5476 | - (void *)KASAN_SHADOW_END); | ||
5477 | + shadow_cpu_entry_begin); | ||
5478 | + | ||
5479 | + kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, | ||
5480 | + (unsigned long)shadow_cpu_entry_end, 0); | ||
5481 | + | ||
5482 | + kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END); | ||
5483 | |||
5484 | load_cr3(init_top_pgt); | ||
5485 | __flush_tlb_all(); | ||
5486 | diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c | ||
5487 | index 84fcfde53f8f..04d5157fe7f8 100644 | ||
5488 | --- a/arch/x86/power/cpu.c | ||
5489 | +++ b/arch/x86/power/cpu.c | ||
5490 | @@ -160,17 +160,19 @@ static void do_fpu_end(void) | ||
5491 | static void fix_processor_context(void) | ||
5492 | { | ||
5493 | int cpu = smp_processor_id(); | ||
5494 | - struct tss_struct *t = &per_cpu(cpu_tss, cpu); | ||
5495 | #ifdef CONFIG_X86_64 | ||
5496 | struct desc_struct *desc = get_cpu_gdt_rw(cpu); | ||
5497 | tss_desc tss; | ||
5498 | #endif | ||
5499 | - set_tss_desc(cpu, t); /* | ||
5500 | - * This just modifies memory; should not be | ||
5501 | - * necessary. But... This is necessary, because | ||
5502 | - * 386 hardware has concept of busy TSS or some | ||
5503 | - * similar stupidity. | ||
5504 | - */ | ||
5505 | + | ||
5506 | + /* | ||
5507 | + * We need to reload TR, which requires that we change the | ||
5508 | + * GDT entry to indicate "available" first. | ||
5509 | + * | ||
5510 | + * XXX: This could probably all be replaced by a call to | ||
5511 | + * force_reload_TR(). | ||
5512 | + */ | ||
5513 | + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); | ||
5514 | |||
5515 | #ifdef CONFIG_X86_64 | ||
5516 | memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc)); | ||
5517 | diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c | ||
5518 | index de503c225ae1..754d5391d9fa 100644 | ||
5519 | --- a/arch/x86/xen/enlighten_hvm.c | ||
5520 | +++ b/arch/x86/xen/enlighten_hvm.c | ||
5521 | @@ -226,12 +226,12 @@ static uint32_t __init xen_platform_hvm(void) | ||
5522 | return xen_cpuid_base(); | ||
5523 | } | ||
5524 | |||
5525 | -const struct hypervisor_x86 x86_hyper_xen_hvm = { | ||
5526 | +const __initconst struct hypervisor_x86 x86_hyper_xen_hvm = { | ||
5527 | .name = "Xen HVM", | ||
5528 | .detect = xen_platform_hvm, | ||
5529 | - .init_platform = xen_hvm_guest_init, | ||
5530 | - .pin_vcpu = xen_pin_vcpu, | ||
5531 | - .x2apic_available = xen_x2apic_para_available, | ||
5532 | - .init_mem_mapping = xen_hvm_init_mem_mapping, | ||
5533 | + .type = X86_HYPER_XEN_HVM, | ||
5534 | + .init.init_platform = xen_hvm_guest_init, | ||
5535 | + .init.x2apic_available = xen_x2apic_para_available, | ||
5536 | + .init.init_mem_mapping = xen_hvm_init_mem_mapping, | ||
5537 | + .runtime.pin_vcpu = xen_pin_vcpu, | ||
5538 | }; | ||
5539 | -EXPORT_SYMBOL(x86_hyper_xen_hvm); | ||
5540 | diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c | ||
5541 | index d4396e27b1fb..ae3a071e1d0f 100644 | ||
5542 | --- a/arch/x86/xen/enlighten_pv.c | ||
5543 | +++ b/arch/x86/xen/enlighten_pv.c | ||
5544 | @@ -601,7 +601,7 @@ static struct trap_array_entry trap_array[] = { | ||
5545 | #ifdef CONFIG_X86_MCE | ||
5546 | { machine_check, xen_machine_check, true }, | ||
5547 | #endif | ||
5548 | - { nmi, xen_nmi, true }, | ||
5549 | + { nmi, xen_xennmi, true }, | ||
5550 | { overflow, xen_overflow, false }, | ||
5551 | #ifdef CONFIG_IA32_EMULATION | ||
5552 | { entry_INT80_compat, xen_entry_INT80_compat, false }, | ||
5553 | @@ -811,15 +811,14 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry, | ||
5554 | } | ||
5555 | } | ||
5556 | |||
5557 | -static void xen_load_sp0(struct tss_struct *tss, | ||
5558 | - struct thread_struct *thread) | ||
5559 | +static void xen_load_sp0(unsigned long sp0) | ||
5560 | { | ||
5561 | struct multicall_space mcs; | ||
5562 | |||
5563 | mcs = xen_mc_entry(0); | ||
5564 | - MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); | ||
5565 | + MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0); | ||
5566 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
5567 | - tss->x86_tss.sp0 = thread->sp0; | ||
5568 | + this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); | ||
5569 | } | ||
5570 | |||
5571 | void xen_set_iopl_mask(unsigned mask) | ||
5572 | @@ -1460,9 +1459,9 @@ static uint32_t __init xen_platform_pv(void) | ||
5573 | return 0; | ||
5574 | } | ||
5575 | |||
5576 | -const struct hypervisor_x86 x86_hyper_xen_pv = { | ||
5577 | +const __initconst struct hypervisor_x86 x86_hyper_xen_pv = { | ||
5578 | .name = "Xen PV", | ||
5579 | .detect = xen_platform_pv, | ||
5580 | - .pin_vcpu = xen_pin_vcpu, | ||
5581 | + .type = X86_HYPER_XEN_PV, | ||
5582 | + .runtime.pin_vcpu = xen_pin_vcpu, | ||
5583 | }; | ||
5584 | -EXPORT_SYMBOL(x86_hyper_xen_pv); | ||
5585 | diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c | ||
5586 | index 71495f1a86d7..c2454237fa67 100644 | ||
5587 | --- a/arch/x86/xen/mmu_pv.c | ||
5588 | +++ b/arch/x86/xen/mmu_pv.c | ||
5589 | @@ -449,7 +449,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd) | ||
5590 | } | ||
5591 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); | ||
5592 | |||
5593 | -#if CONFIG_PGTABLE_LEVELS == 4 | ||
5594 | +#ifdef CONFIG_X86_64 | ||
5595 | __visible pudval_t xen_pud_val(pud_t pud) | ||
5596 | { | ||
5597 | return pte_mfn_to_pfn(pud.pud); | ||
5598 | @@ -538,7 +538,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val) | ||
5599 | |||
5600 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
5601 | } | ||
5602 | -#endif /* CONFIG_PGTABLE_LEVELS == 4 */ | ||
5603 | +#endif /* CONFIG_X86_64 */ | ||
5604 | |||
5605 | static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, | ||
5606 | int (*func)(struct mm_struct *mm, struct page *, enum pt_level), | ||
5607 | @@ -580,21 +580,17 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d, | ||
5608 | int (*func)(struct mm_struct *mm, struct page *, enum pt_level), | ||
5609 | bool last, unsigned long limit) | ||
5610 | { | ||
5611 | - int i, nr, flush = 0; | ||
5612 | + int flush = 0; | ||
5613 | + pud_t *pud; | ||
5614 | |||
5615 | - nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D; | ||
5616 | - for (i = 0; i < nr; i++) { | ||
5617 | - pud_t *pud; | ||
5618 | |||
5619 | - if (p4d_none(p4d[i])) | ||
5620 | - continue; | ||
5621 | + if (p4d_none(*p4d)) | ||
5622 | + return flush; | ||
5623 | |||
5624 | - pud = pud_offset(&p4d[i], 0); | ||
5625 | - if (PTRS_PER_PUD > 1) | ||
5626 | - flush |= (*func)(mm, virt_to_page(pud), PT_PUD); | ||
5627 | - flush |= xen_pud_walk(mm, pud, func, | ||
5628 | - last && i == nr - 1, limit); | ||
5629 | - } | ||
5630 | + pud = pud_offset(p4d, 0); | ||
5631 | + if (PTRS_PER_PUD > 1) | ||
5632 | + flush |= (*func)(mm, virt_to_page(pud), PT_PUD); | ||
5633 | + flush |= xen_pud_walk(mm, pud, func, last, limit); | ||
5634 | return flush; | ||
5635 | } | ||
5636 | |||
5637 | @@ -644,8 +640,6 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, | ||
5638 | continue; | ||
5639 | |||
5640 | p4d = p4d_offset(&pgd[i], 0); | ||
5641 | - if (PTRS_PER_P4D > 1) | ||
5642 | - flush |= (*func)(mm, virt_to_page(p4d), PT_P4D); | ||
5643 | flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit); | ||
5644 | } | ||
5645 | |||
5646 | @@ -1176,22 +1170,14 @@ static void __init xen_cleanmfnmap(unsigned long vaddr) | ||
5647 | { | ||
5648 | pgd_t *pgd; | ||
5649 | p4d_t *p4d; | ||
5650 | - unsigned int i; | ||
5651 | bool unpin; | ||
5652 | |||
5653 | unpin = (vaddr == 2 * PGDIR_SIZE); | ||
5654 | vaddr &= PMD_MASK; | ||
5655 | pgd = pgd_offset_k(vaddr); | ||
5656 | p4d = p4d_offset(pgd, 0); | ||
5657 | - for (i = 0; i < PTRS_PER_P4D; i++) { | ||
5658 | - if (p4d_none(p4d[i])) | ||
5659 | - continue; | ||
5660 | - xen_cleanmfnmap_p4d(p4d + i, unpin); | ||
5661 | - } | ||
5662 | - if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | ||
5663 | - set_pgd(pgd, __pgd(0)); | ||
5664 | - xen_cleanmfnmap_free_pgtbl(p4d, unpin); | ||
5665 | - } | ||
5666 | + if (!p4d_none(*p4d)) | ||
5667 | + xen_cleanmfnmap_p4d(p4d, unpin); | ||
5668 | } | ||
5669 | |||
5670 | static void __init xen_pagetable_p2m_free(void) | ||
5671 | @@ -1692,7 +1678,7 @@ static void xen_release_pmd(unsigned long pfn) | ||
5672 | xen_release_ptpage(pfn, PT_PMD); | ||
5673 | } | ||
5674 | |||
5675 | -#if CONFIG_PGTABLE_LEVELS >= 4 | ||
5676 | +#ifdef CONFIG_X86_64 | ||
5677 | static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) | ||
5678 | { | ||
5679 | xen_alloc_ptpage(mm, pfn, PT_PUD); | ||
5680 | @@ -2029,13 +2015,12 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr) | ||
5681 | */ | ||
5682 | void __init xen_relocate_p2m(void) | ||
5683 | { | ||
5684 | - phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys; | ||
5685 | + phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys; | ||
5686 | unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end; | ||
5687 | - int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d; | ||
5688 | + int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud; | ||
5689 | pte_t *pt; | ||
5690 | pmd_t *pmd; | ||
5691 | pud_t *pud; | ||
5692 | - p4d_t *p4d = NULL; | ||
5693 | pgd_t *pgd; | ||
5694 | unsigned long *new_p2m; | ||
5695 | int save_pud; | ||
5696 | @@ -2045,11 +2030,7 @@ void __init xen_relocate_p2m(void) | ||
5697 | n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT; | ||
5698 | n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT; | ||
5699 | n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT; | ||
5700 | - if (PTRS_PER_P4D > 1) | ||
5701 | - n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT; | ||
5702 | - else | ||
5703 | - n_p4d = 0; | ||
5704 | - n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d; | ||
5705 | + n_frames = n_pte + n_pt + n_pmd + n_pud; | ||
5706 | |||
5707 | new_area = xen_find_free_area(PFN_PHYS(n_frames)); | ||
5708 | if (!new_area) { | ||
5709 | @@ -2065,76 +2046,56 @@ void __init xen_relocate_p2m(void) | ||
5710 | * To avoid any possible virtual address collision, just use | ||
5711 | * 2 * PUD_SIZE for the new area. | ||
5712 | */ | ||
5713 | - p4d_phys = new_area; | ||
5714 | - pud_phys = p4d_phys + PFN_PHYS(n_p4d); | ||
5715 | + pud_phys = new_area; | ||
5716 | pmd_phys = pud_phys + PFN_PHYS(n_pud); | ||
5717 | pt_phys = pmd_phys + PFN_PHYS(n_pmd); | ||
5718 | p2m_pfn = PFN_DOWN(pt_phys) + n_pt; | ||
5719 | |||
5720 | pgd = __va(read_cr3_pa()); | ||
5721 | new_p2m = (unsigned long *)(2 * PGDIR_SIZE); | ||
5722 | - idx_p4d = 0; | ||
5723 | save_pud = n_pud; | ||
5724 | - do { | ||
5725 | - if (n_p4d > 0) { | ||
5726 | - p4d = early_memremap(p4d_phys, PAGE_SIZE); | ||
5727 | - clear_page(p4d); | ||
5728 | - n_pud = min(save_pud, PTRS_PER_P4D); | ||
5729 | - } | ||
5730 | - for (idx_pud = 0; idx_pud < n_pud; idx_pud++) { | ||
5731 | - pud = early_memremap(pud_phys, PAGE_SIZE); | ||
5732 | - clear_page(pud); | ||
5733 | - for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD); | ||
5734 | - idx_pmd++) { | ||
5735 | - pmd = early_memremap(pmd_phys, PAGE_SIZE); | ||
5736 | - clear_page(pmd); | ||
5737 | - for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD); | ||
5738 | - idx_pt++) { | ||
5739 | - pt = early_memremap(pt_phys, PAGE_SIZE); | ||
5740 | - clear_page(pt); | ||
5741 | - for (idx_pte = 0; | ||
5742 | - idx_pte < min(n_pte, PTRS_PER_PTE); | ||
5743 | - idx_pte++) { | ||
5744 | - set_pte(pt + idx_pte, | ||
5745 | - pfn_pte(p2m_pfn, PAGE_KERNEL)); | ||
5746 | - p2m_pfn++; | ||
5747 | - } | ||
5748 | - n_pte -= PTRS_PER_PTE; | ||
5749 | - early_memunmap(pt, PAGE_SIZE); | ||
5750 | - make_lowmem_page_readonly(__va(pt_phys)); | ||
5751 | - pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, | ||
5752 | - PFN_DOWN(pt_phys)); | ||
5753 | - set_pmd(pmd + idx_pt, | ||
5754 | - __pmd(_PAGE_TABLE | pt_phys)); | ||
5755 | - pt_phys += PAGE_SIZE; | ||
5756 | + for (idx_pud = 0; idx_pud < n_pud; idx_pud++) { | ||
5757 | + pud = early_memremap(pud_phys, PAGE_SIZE); | ||
5758 | + clear_page(pud); | ||
5759 | + for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD); | ||
5760 | + idx_pmd++) { | ||
5761 | + pmd = early_memremap(pmd_phys, PAGE_SIZE); | ||
5762 | + clear_page(pmd); | ||
5763 | + for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD); | ||
5764 | + idx_pt++) { | ||
5765 | + pt = early_memremap(pt_phys, PAGE_SIZE); | ||
5766 | + clear_page(pt); | ||
5767 | + for (idx_pte = 0; | ||
5768 | + idx_pte < min(n_pte, PTRS_PER_PTE); | ||
5769 | + idx_pte++) { | ||
5770 | + set_pte(pt + idx_pte, | ||
5771 | + pfn_pte(p2m_pfn, PAGE_KERNEL)); | ||
5772 | + p2m_pfn++; | ||
5773 | } | ||
5774 | - n_pt -= PTRS_PER_PMD; | ||
5775 | - early_memunmap(pmd, PAGE_SIZE); | ||
5776 | - make_lowmem_page_readonly(__va(pmd_phys)); | ||
5777 | - pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, | ||
5778 | - PFN_DOWN(pmd_phys)); | ||
5779 | - set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys)); | ||
5780 | - pmd_phys += PAGE_SIZE; | ||
5781 | + n_pte -= PTRS_PER_PTE; | ||
5782 | + early_memunmap(pt, PAGE_SIZE); | ||
5783 | + make_lowmem_page_readonly(__va(pt_phys)); | ||
5784 | + pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, | ||
5785 | + PFN_DOWN(pt_phys)); | ||
5786 | + set_pmd(pmd + idx_pt, | ||
5787 | + __pmd(_PAGE_TABLE | pt_phys)); | ||
5788 | + pt_phys += PAGE_SIZE; | ||
5789 | } | ||
5790 | - n_pmd -= PTRS_PER_PUD; | ||
5791 | - early_memunmap(pud, PAGE_SIZE); | ||
5792 | - make_lowmem_page_readonly(__va(pud_phys)); | ||
5793 | - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys)); | ||
5794 | - if (n_p4d > 0) | ||
5795 | - set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys)); | ||
5796 | - else | ||
5797 | - set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys)); | ||
5798 | - pud_phys += PAGE_SIZE; | ||
5799 | - } | ||
5800 | - if (n_p4d > 0) { | ||
5801 | - save_pud -= PTRS_PER_P4D; | ||
5802 | - early_memunmap(p4d, PAGE_SIZE); | ||
5803 | - make_lowmem_page_readonly(__va(p4d_phys)); | ||
5804 | - pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys)); | ||
5805 | - set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys)); | ||
5806 | - p4d_phys += PAGE_SIZE; | ||
5807 | + n_pt -= PTRS_PER_PMD; | ||
5808 | + early_memunmap(pmd, PAGE_SIZE); | ||
5809 | + make_lowmem_page_readonly(__va(pmd_phys)); | ||
5810 | + pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE, | ||
5811 | + PFN_DOWN(pmd_phys)); | ||
5812 | + set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys)); | ||
5813 | + pmd_phys += PAGE_SIZE; | ||
5814 | } | ||
5815 | - } while (++idx_p4d < n_p4d); | ||
5816 | + n_pmd -= PTRS_PER_PUD; | ||
5817 | + early_memunmap(pud, PAGE_SIZE); | ||
5818 | + make_lowmem_page_readonly(__va(pud_phys)); | ||
5819 | + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys)); | ||
5820 | + set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys)); | ||
5821 | + pud_phys += PAGE_SIZE; | ||
5822 | + } | ||
5823 | |||
5824 | /* Now copy the old p2m info to the new area. */ | ||
5825 | memcpy(new_p2m, xen_p2m_addr, size); | ||
5826 | @@ -2311,7 +2272,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | ||
5827 | #endif | ||
5828 | case FIX_TEXT_POKE0: | ||
5829 | case FIX_TEXT_POKE1: | ||
5830 | - case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END: | ||
5831 | + case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM: | ||
5832 | /* All local page mappings */ | ||
5833 | pte = pfn_pte(phys, prot); | ||
5834 | break; | ||
5835 | @@ -2361,7 +2322,7 @@ static void __init xen_post_allocator_init(void) | ||
5836 | pv_mmu_ops.set_pte = xen_set_pte; | ||
5837 | pv_mmu_ops.set_pmd = xen_set_pmd; | ||
5838 | pv_mmu_ops.set_pud = xen_set_pud; | ||
5839 | -#if CONFIG_PGTABLE_LEVELS >= 4 | ||
5840 | +#ifdef CONFIG_X86_64 | ||
5841 | pv_mmu_ops.set_p4d = xen_set_p4d; | ||
5842 | #endif | ||
5843 | |||
5844 | @@ -2371,7 +2332,7 @@ static void __init xen_post_allocator_init(void) | ||
5845 | pv_mmu_ops.alloc_pmd = xen_alloc_pmd; | ||
5846 | pv_mmu_ops.release_pte = xen_release_pte; | ||
5847 | pv_mmu_ops.release_pmd = xen_release_pmd; | ||
5848 | -#if CONFIG_PGTABLE_LEVELS >= 4 | ||
5849 | +#ifdef CONFIG_X86_64 | ||
5850 | pv_mmu_ops.alloc_pud = xen_alloc_pud; | ||
5851 | pv_mmu_ops.release_pud = xen_release_pud; | ||
5852 | #endif | ||
5853 | @@ -2435,14 +2396,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { | ||
5854 | .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), | ||
5855 | .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), | ||
5856 | |||
5857 | -#if CONFIG_PGTABLE_LEVELS >= 4 | ||
5858 | +#ifdef CONFIG_X86_64 | ||
5859 | .pud_val = PV_CALLEE_SAVE(xen_pud_val), | ||
5860 | .make_pud = PV_CALLEE_SAVE(xen_make_pud), | ||
5861 | .set_p4d = xen_set_p4d_hyper, | ||
5862 | |||
5863 | .alloc_pud = xen_alloc_pmd_init, | ||
5864 | .release_pud = xen_release_pmd_init, | ||
5865 | -#endif /* CONFIG_PGTABLE_LEVELS == 4 */ | ||
5866 | +#endif /* CONFIG_X86_64 */ | ||
5867 | |||
5868 | .activate_mm = xen_activate_mm, | ||
5869 | .dup_mmap = xen_dup_mmap, | ||
5870 | diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c | ||
5871 | index 05f91ce9b55e..c0c756c76afe 100644 | ||
5872 | --- a/arch/x86/xen/smp_pv.c | ||
5873 | +++ b/arch/x86/xen/smp_pv.c | ||
5874 | @@ -14,6 +14,7 @@ | ||
5875 | * single-threaded. | ||
5876 | */ | ||
5877 | #include <linux/sched.h> | ||
5878 | +#include <linux/sched/task_stack.h> | ||
5879 | #include <linux/err.h> | ||
5880 | #include <linux/slab.h> | ||
5881 | #include <linux/smp.h> | ||
5882 | @@ -294,12 +295,19 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | ||
5883 | #endif | ||
5884 | memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); | ||
5885 | |||
5886 | + /* | ||
5887 | + * Bring up the CPU in cpu_bringup_and_idle() with the stack | ||
5888 | + * pointing just below where pt_regs would be if it were a normal | ||
5889 | + * kernel entry. | ||
5890 | + */ | ||
5891 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; | ||
5892 | ctxt->flags = VGCF_IN_KERNEL; | ||
5893 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ | ||
5894 | ctxt->user_regs.ds = __USER_DS; | ||
5895 | ctxt->user_regs.es = __USER_DS; | ||
5896 | ctxt->user_regs.ss = __KERNEL_DS; | ||
5897 | + ctxt->user_regs.cs = __KERNEL_CS; | ||
5898 | + ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle); | ||
5899 | |||
5900 | xen_copy_trap_info(ctxt->trap_ctxt); | ||
5901 | |||
5902 | @@ -314,8 +322,13 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | ||
5903 | ctxt->gdt_frames[0] = gdt_mfn; | ||
5904 | ctxt->gdt_ents = GDT_ENTRIES; | ||
5905 | |||
5906 | + /* | ||
5907 | + * Set SS:SP that Xen will use when entering guest kernel mode | ||
5908 | + * from guest user mode. Subsequent calls to load_sp0() can | ||
5909 | + * change this value. | ||
5910 | + */ | ||
5911 | ctxt->kernel_ss = __KERNEL_DS; | ||
5912 | - ctxt->kernel_sp = idle->thread.sp0; | ||
5913 | + ctxt->kernel_sp = task_top_of_stack(idle); | ||
5914 | |||
5915 | #ifdef CONFIG_X86_32 | ||
5916 | ctxt->event_callback_cs = __KERNEL_CS; | ||
5917 | @@ -327,10 +340,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | ||
5918 | (unsigned long)xen_hypervisor_callback; | ||
5919 | ctxt->failsafe_callback_eip = | ||
5920 | (unsigned long)xen_failsafe_callback; | ||
5921 | - ctxt->user_regs.cs = __KERNEL_CS; | ||
5922 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); | ||
5923 | |||
5924 | - ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); | ||
5925 | ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir)); | ||
5926 | if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt)) | ||
5927 | BUG(); | ||
5928 | diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S | ||
5929 | index c98a48c861fd..8a10c9a9e2b5 100644 | ||
5930 | --- a/arch/x86/xen/xen-asm_64.S | ||
5931 | +++ b/arch/x86/xen/xen-asm_64.S | ||
5932 | @@ -30,7 +30,7 @@ xen_pv_trap debug | ||
5933 | xen_pv_trap xendebug | ||
5934 | xen_pv_trap int3 | ||
5935 | xen_pv_trap xenint3 | ||
5936 | -xen_pv_trap nmi | ||
5937 | +xen_pv_trap xennmi | ||
5938 | xen_pv_trap overflow | ||
5939 | xen_pv_trap bounds | ||
5940 | xen_pv_trap invalid_op | ||
5941 | diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S | ||
5942 | index b5b8d7f43557..497cc55a0c16 100644 | ||
5943 | --- a/arch/x86/xen/xen-head.S | ||
5944 | +++ b/arch/x86/xen/xen-head.S | ||
5945 | @@ -10,6 +10,7 @@ | ||
5946 | #include <asm/boot.h> | ||
5947 | #include <asm/asm.h> | ||
5948 | #include <asm/page_types.h> | ||
5949 | +#include <asm/unwind_hints.h> | ||
5950 | |||
5951 | #include <xen/interface/elfnote.h> | ||
5952 | #include <xen/interface/features.h> | ||
5953 | @@ -20,6 +21,7 @@ | ||
5954 | #ifdef CONFIG_XEN_PV | ||
5955 | __INIT | ||
5956 | ENTRY(startup_xen) | ||
5957 | + UNWIND_HINT_EMPTY | ||
5958 | cld | ||
5959 | |||
5960 | /* Clear .bss */ | ||
5961 | @@ -34,21 +36,24 @@ ENTRY(startup_xen) | ||
5962 | mov $init_thread_union+THREAD_SIZE, %_ASM_SP | ||
5963 | |||
5964 | jmp xen_start_kernel | ||
5965 | - | ||
5966 | +END(startup_xen) | ||
5967 | __FINIT | ||
5968 | #endif | ||
5969 | |||
5970 | .pushsection .text | ||
5971 | .balign PAGE_SIZE | ||
5972 | ENTRY(hypercall_page) | ||
5973 | - .skip PAGE_SIZE | ||
5974 | + .rept (PAGE_SIZE / 32) | ||
5975 | + UNWIND_HINT_EMPTY | ||
5976 | + .skip 32 | ||
5977 | + .endr | ||
5978 | |||
5979 | #define HYPERCALL(n) \ | ||
5980 | .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \ | ||
5981 | .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32 | ||
5982 | #include <asm/xen-hypercalls.h> | ||
5983 | #undef HYPERCALL | ||
5984 | - | ||
5985 | +END(hypercall_page) | ||
5986 | .popsection | ||
5987 | |||
5988 | ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") | ||
5989 | diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c | ||
5990 | index a4783da90ba8..0f860cf0d56d 100644 | ||
5991 | --- a/block/bfq-iosched.c | ||
5992 | +++ b/block/bfq-iosched.c | ||
5993 | @@ -108,6 +108,7 @@ | ||
5994 | #include "blk-mq-tag.h" | ||
5995 | #include "blk-mq-sched.h" | ||
5996 | #include "bfq-iosched.h" | ||
5997 | +#include "blk-wbt.h" | ||
5998 | |||
5999 | #define BFQ_BFQQ_FNS(name) \ | ||
6000 | void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \ | ||
6001 | @@ -4775,7 +4776,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) | ||
6002 | bfq_init_root_group(bfqd->root_group, bfqd); | ||
6003 | bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group); | ||
6004 | |||
6005 | - | ||
6006 | + wbt_disable_default(q); | ||
6007 | return 0; | ||
6008 | |||
6009 | out_free: | ||
6010 | diff --git a/block/blk-wbt.c b/block/blk-wbt.c | ||
6011 | index 6a9a0f03a67b..e59d59c11ebb 100644 | ||
6012 | --- a/block/blk-wbt.c | ||
6013 | +++ b/block/blk-wbt.c | ||
6014 | @@ -654,7 +654,7 @@ void wbt_set_write_cache(struct rq_wb *rwb, bool write_cache_on) | ||
6015 | } | ||
6016 | |||
6017 | /* | ||
6018 | - * Disable wbt, if enabled by default. Only called from CFQ. | ||
6019 | + * Disable wbt, if enabled by default. | ||
6020 | */ | ||
6021 | void wbt_disable_default(struct request_queue *q) | ||
6022 | { | ||
6023 | diff --git a/crypto/lrw.c b/crypto/lrw.c | ||
6024 | index a8bfae4451bf..eb681e9fe574 100644 | ||
6025 | --- a/crypto/lrw.c | ||
6026 | +++ b/crypto/lrw.c | ||
6027 | @@ -610,8 +610,10 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb) | ||
6028 | ecb_name[len - 1] = 0; | ||
6029 | |||
6030 | if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, | ||
6031 | - "lrw(%s)", ecb_name) >= CRYPTO_MAX_ALG_NAME) | ||
6032 | - return -ENAMETOOLONG; | ||
6033 | + "lrw(%s)", ecb_name) >= CRYPTO_MAX_ALG_NAME) { | ||
6034 | + err = -ENAMETOOLONG; | ||
6035 | + goto err_drop_spawn; | ||
6036 | + } | ||
6037 | } | ||
6038 | |||
6039 | inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; | ||
6040 | diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c | ||
6041 | index 3c3a37b8503b..572b6c7303ed 100644 | ||
6042 | --- a/drivers/acpi/apei/ghes.c | ||
6043 | +++ b/drivers/acpi/apei/ghes.c | ||
6044 | @@ -51,6 +51,7 @@ | ||
6045 | #include <acpi/actbl1.h> | ||
6046 | #include <acpi/ghes.h> | ||
6047 | #include <acpi/apei.h> | ||
6048 | +#include <asm/fixmap.h> | ||
6049 | #include <asm/tlbflush.h> | ||
6050 | #include <ras/ras_event.h> | ||
6051 | |||
6052 | @@ -112,7 +113,7 @@ static DEFINE_MUTEX(ghes_list_mutex); | ||
6053 | * Because the memory area used to transfer hardware error information | ||
6054 | * from BIOS to Linux can be determined only in NMI, IRQ or timer | ||
6055 | * handler, but general ioremap can not be used in atomic context, so | ||
6056 | - * a special version of atomic ioremap is implemented for that. | ||
6057 | + * the fixmap is used instead. | ||
6058 | */ | ||
6059 | |||
6060 | /* | ||
6061 | @@ -126,8 +127,8 @@ static DEFINE_MUTEX(ghes_list_mutex); | ||
6062 | /* virtual memory area for atomic ioremap */ | ||
6063 | static struct vm_struct *ghes_ioremap_area; | ||
6064 | /* | ||
6065 | - * These 2 spinlock is used to prevent atomic ioremap virtual memory | ||
6066 | - * area from being mapped simultaneously. | ||
6067 | + * These 2 spinlocks are used to prevent the fixmap entries from being used | ||
6068 | + * simultaneously. | ||
6069 | */ | ||
6070 | static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); | ||
6071 | static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); | ||
6072 | @@ -159,52 +160,36 @@ static void ghes_ioremap_exit(void) | ||
6073 | |||
6074 | static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn) | ||
6075 | { | ||
6076 | - unsigned long vaddr; | ||
6077 | phys_addr_t paddr; | ||
6078 | pgprot_t prot; | ||
6079 | |||
6080 | - vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr); | ||
6081 | - | ||
6082 | paddr = pfn << PAGE_SHIFT; | ||
6083 | prot = arch_apei_get_mem_attribute(paddr); | ||
6084 | - ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot); | ||
6085 | + __set_fixmap(FIX_APEI_GHES_NMI, paddr, prot); | ||
6086 | |||
6087 | - return (void __iomem *)vaddr; | ||
6088 | + return (void __iomem *) fix_to_virt(FIX_APEI_GHES_NMI); | ||
6089 | } | ||
6090 | |||
6091 | static void __iomem *ghes_ioremap_pfn_irq(u64 pfn) | ||
6092 | { | ||
6093 | - unsigned long vaddr, paddr; | ||
6094 | + phys_addr_t paddr; | ||
6095 | pgprot_t prot; | ||
6096 | |||
6097 | - vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr); | ||
6098 | - | ||
6099 | paddr = pfn << PAGE_SHIFT; | ||
6100 | prot = arch_apei_get_mem_attribute(paddr); | ||
6101 | + __set_fixmap(FIX_APEI_GHES_IRQ, paddr, prot); | ||
6102 | |||
6103 | - ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot); | ||
6104 | - | ||
6105 | - return (void __iomem *)vaddr; | ||
6106 | + return (void __iomem *) fix_to_virt(FIX_APEI_GHES_IRQ); | ||
6107 | } | ||
6108 | |||
6109 | -static void ghes_iounmap_nmi(void __iomem *vaddr_ptr) | ||
6110 | +static void ghes_iounmap_nmi(void) | ||
6111 | { | ||
6112 | - unsigned long vaddr = (unsigned long __force)vaddr_ptr; | ||
6113 | - void *base = ghes_ioremap_area->addr; | ||
6114 | - | ||
6115 | - BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base)); | ||
6116 | - unmap_kernel_range_noflush(vaddr, PAGE_SIZE); | ||
6117 | - arch_apei_flush_tlb_one(vaddr); | ||
6118 | + clear_fixmap(FIX_APEI_GHES_NMI); | ||
6119 | } | ||
6120 | |||
6121 | -static void ghes_iounmap_irq(void __iomem *vaddr_ptr) | ||
6122 | +static void ghes_iounmap_irq(void) | ||
6123 | { | ||
6124 | - unsigned long vaddr = (unsigned long __force)vaddr_ptr; | ||
6125 | - void *base = ghes_ioremap_area->addr; | ||
6126 | - | ||
6127 | - BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base)); | ||
6128 | - unmap_kernel_range_noflush(vaddr, PAGE_SIZE); | ||
6129 | - arch_apei_flush_tlb_one(vaddr); | ||
6130 | + clear_fixmap(FIX_APEI_GHES_IRQ); | ||
6131 | } | ||
6132 | |||
6133 | static int ghes_estatus_pool_init(void) | ||
6134 | @@ -360,10 +345,10 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, | ||
6135 | paddr += trunk; | ||
6136 | buffer += trunk; | ||
6137 | if (in_nmi) { | ||
6138 | - ghes_iounmap_nmi(vaddr); | ||
6139 | + ghes_iounmap_nmi(); | ||
6140 | raw_spin_unlock(&ghes_ioremap_lock_nmi); | ||
6141 | } else { | ||
6142 | - ghes_iounmap_irq(vaddr); | ||
6143 | + ghes_iounmap_irq(); | ||
6144 | spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags); | ||
6145 | } | ||
6146 | } | ||
6147 | @@ -851,17 +836,8 @@ static void ghes_sea_remove(struct ghes *ghes) | ||
6148 | synchronize_rcu(); | ||
6149 | } | ||
6150 | #else /* CONFIG_ACPI_APEI_SEA */ | ||
6151 | -static inline void ghes_sea_add(struct ghes *ghes) | ||
6152 | -{ | ||
6153 | - pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not supported\n", | ||
6154 | - ghes->generic->header.source_id); | ||
6155 | -} | ||
6156 | - | ||
6157 | -static inline void ghes_sea_remove(struct ghes *ghes) | ||
6158 | -{ | ||
6159 | - pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not supported\n", | ||
6160 | - ghes->generic->header.source_id); | ||
6161 | -} | ||
6162 | +static inline void ghes_sea_add(struct ghes *ghes) { } | ||
6163 | +static inline void ghes_sea_remove(struct ghes *ghes) { } | ||
6164 | #endif /* CONFIG_ACPI_APEI_SEA */ | ||
6165 | |||
6166 | #ifdef CONFIG_HAVE_ACPI_APEI_NMI | ||
6167 | @@ -1063,23 +1039,9 @@ static void ghes_nmi_init_cxt(void) | ||
6168 | init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); | ||
6169 | } | ||
6170 | #else /* CONFIG_HAVE_ACPI_APEI_NMI */ | ||
6171 | -static inline void ghes_nmi_add(struct ghes *ghes) | ||
6172 | -{ | ||
6173 | - pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n", | ||
6174 | - ghes->generic->header.source_id); | ||
6175 | - BUG(); | ||
6176 | -} | ||
6177 | - | ||
6178 | -static inline void ghes_nmi_remove(struct ghes *ghes) | ||
6179 | -{ | ||
6180 | - pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n", | ||
6181 | - ghes->generic->header.source_id); | ||
6182 | - BUG(); | ||
6183 | -} | ||
6184 | - | ||
6185 | -static inline void ghes_nmi_init_cxt(void) | ||
6186 | -{ | ||
6187 | -} | ||
6188 | +static inline void ghes_nmi_add(struct ghes *ghes) { } | ||
6189 | +static inline void ghes_nmi_remove(struct ghes *ghes) { } | ||
6190 | +static inline void ghes_nmi_init_cxt(void) { } | ||
6191 | #endif /* CONFIG_HAVE_ACPI_APEI_NMI */ | ||
6192 | |||
6193 | static int ghes_probe(struct platform_device *ghes_dev) | ||
6194 | diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c | ||
6195 | index a6de32530693..0459b1204694 100644 | ||
6196 | --- a/drivers/base/power/opp/core.c | ||
6197 | +++ b/drivers/base/power/opp/core.c | ||
6198 | @@ -296,7 +296,7 @@ int dev_pm_opp_get_opp_count(struct device *dev) | ||
6199 | opp_table = _find_opp_table(dev); | ||
6200 | if (IS_ERR(opp_table)) { | ||
6201 | count = PTR_ERR(opp_table); | ||
6202 | - dev_err(dev, "%s: OPP table not found (%d)\n", | ||
6203 | + dev_dbg(dev, "%s: OPP table not found (%d)\n", | ||
6204 | __func__, count); | ||
6205 | return count; | ||
6206 | } | ||
6207 | diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c | ||
6208 | index e2540113d0da..73d2d88ddc03 100644 | ||
6209 | --- a/drivers/bluetooth/hci_bcm.c | ||
6210 | +++ b/drivers/bluetooth/hci_bcm.c | ||
6211 | @@ -68,7 +68,7 @@ struct bcm_device { | ||
6212 | u32 init_speed; | ||
6213 | u32 oper_speed; | ||
6214 | int irq; | ||
6215 | - u8 irq_polarity; | ||
6216 | + bool irq_active_low; | ||
6217 | |||
6218 | #ifdef CONFIG_PM | ||
6219 | struct hci_uart *hu; | ||
6220 | @@ -213,7 +213,9 @@ static int bcm_request_irq(struct bcm_data *bcm) | ||
6221 | } | ||
6222 | |||
6223 | err = devm_request_irq(&bdev->pdev->dev, bdev->irq, bcm_host_wake, | ||
6224 | - IRQF_TRIGGER_RISING, "host_wake", bdev); | ||
6225 | + bdev->irq_active_low ? IRQF_TRIGGER_FALLING : | ||
6226 | + IRQF_TRIGGER_RISING, | ||
6227 | + "host_wake", bdev); | ||
6228 | if (err) | ||
6229 | goto unlock; | ||
6230 | |||
6231 | @@ -253,7 +255,7 @@ static int bcm_setup_sleep(struct hci_uart *hu) | ||
6232 | struct sk_buff *skb; | ||
6233 | struct bcm_set_sleep_mode sleep_params = default_sleep_params; | ||
6234 | |||
6235 | - sleep_params.host_wake_active = !bcm->dev->irq_polarity; | ||
6236 | + sleep_params.host_wake_active = !bcm->dev->irq_active_low; | ||
6237 | |||
6238 | skb = __hci_cmd_sync(hu->hdev, 0xfc27, sizeof(sleep_params), | ||
6239 | &sleep_params, HCI_INIT_TIMEOUT); | ||
6240 | @@ -690,10 +692,8 @@ static const struct acpi_gpio_mapping acpi_bcm_int_first_gpios[] = { | ||
6241 | }; | ||
6242 | |||
6243 | #ifdef CONFIG_ACPI | ||
6244 | -static u8 acpi_active_low = ACPI_ACTIVE_LOW; | ||
6245 | - | ||
6246 | /* IRQ polarity of some chipsets are not defined correctly in ACPI table. */ | ||
6247 | -static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = { | ||
6248 | +static const struct dmi_system_id bcm_active_low_irq_dmi_table[] = { | ||
6249 | { | ||
6250 | .ident = "Asus T100TA", | ||
6251 | .matches = { | ||
6252 | @@ -701,7 +701,6 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = { | ||
6253 | "ASUSTeK COMPUTER INC."), | ||
6254 | DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TA"), | ||
6255 | }, | ||
6256 | - .driver_data = &acpi_active_low, | ||
6257 | }, | ||
6258 | { | ||
6259 | .ident = "Asus T100CHI", | ||
6260 | @@ -710,7 +709,6 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = { | ||
6261 | "ASUSTeK COMPUTER INC."), | ||
6262 | DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100CHI"), | ||
6263 | }, | ||
6264 | - .driver_data = &acpi_active_low, | ||
6265 | }, | ||
6266 | { /* Handle ThinkPad 8 tablets with BCM2E55 chipset ACPI ID */ | ||
6267 | .ident = "Lenovo ThinkPad 8", | ||
6268 | @@ -718,7 +716,6 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = { | ||
6269 | DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), | ||
6270 | DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "ThinkPad 8"), | ||
6271 | }, | ||
6272 | - .driver_data = &acpi_active_low, | ||
6273 | }, | ||
6274 | { } | ||
6275 | }; | ||
6276 | @@ -733,13 +730,13 @@ static int bcm_resource(struct acpi_resource *ares, void *data) | ||
6277 | switch (ares->type) { | ||
6278 | case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: | ||
6279 | irq = &ares->data.extended_irq; | ||
6280 | - dev->irq_polarity = irq->polarity; | ||
6281 | + dev->irq_active_low = irq->polarity == ACPI_ACTIVE_LOW; | ||
6282 | break; | ||
6283 | |||
6284 | case ACPI_RESOURCE_TYPE_GPIO: | ||
6285 | gpio = &ares->data.gpio; | ||
6286 | if (gpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT) | ||
6287 | - dev->irq_polarity = gpio->polarity; | ||
6288 | + dev->irq_active_low = gpio->polarity == ACPI_ACTIVE_LOW; | ||
6289 | break; | ||
6290 | |||
6291 | case ACPI_RESOURCE_TYPE_SERIAL_BUS: | ||
6292 | @@ -834,11 +831,11 @@ static int bcm_acpi_probe(struct bcm_device *dev) | ||
6293 | return ret; | ||
6294 | acpi_dev_free_resource_list(&resources); | ||
6295 | |||
6296 | - dmi_id = dmi_first_match(bcm_wrong_irq_dmi_table); | ||
6297 | + dmi_id = dmi_first_match(bcm_active_low_irq_dmi_table); | ||
6298 | if (dmi_id) { | ||
6299 | bt_dev_warn(dev, "%s: Overwriting IRQ polarity to active low", | ||
6300 | dmi_id->ident); | ||
6301 | - dev->irq_polarity = *(u8 *)dmi_id->driver_data; | ||
6302 | + dev->irq_active_low = true; | ||
6303 | } | ||
6304 | |||
6305 | return 0; | ||
6306 | diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c | ||
6307 | index 6e2403805784..6aef3bde10d7 100644 | ||
6308 | --- a/drivers/bluetooth/hci_ldisc.c | ||
6309 | +++ b/drivers/bluetooth/hci_ldisc.c | ||
6310 | @@ -41,6 +41,7 @@ | ||
6311 | #include <linux/ioctl.h> | ||
6312 | #include <linux/skbuff.h> | ||
6313 | #include <linux/firmware.h> | ||
6314 | +#include <linux/serdev.h> | ||
6315 | |||
6316 | #include <net/bluetooth/bluetooth.h> | ||
6317 | #include <net/bluetooth/hci_core.h> | ||
6318 | @@ -298,6 +299,12 @@ void hci_uart_set_flow_control(struct hci_uart *hu, bool enable) | ||
6319 | unsigned int set = 0; | ||
6320 | unsigned int clear = 0; | ||
6321 | |||
6322 | + if (hu->serdev) { | ||
6323 | + serdev_device_set_flow_control(hu->serdev, !enable); | ||
6324 | + serdev_device_set_rts(hu->serdev, !enable); | ||
6325 | + return; | ||
6326 | + } | ||
6327 | + | ||
6328 | if (enable) { | ||
6329 | /* Disable hardware flow control */ | ||
6330 | ktermios = tty->termios; | ||
6331 | diff --git a/drivers/clk/sunxi-ng/ccu-sun5i.c b/drivers/clk/sunxi-ng/ccu-sun5i.c | ||
6332 | index ab9e850b3707..2f385a57cd91 100644 | ||
6333 | --- a/drivers/clk/sunxi-ng/ccu-sun5i.c | ||
6334 | +++ b/drivers/clk/sunxi-ng/ccu-sun5i.c | ||
6335 | @@ -982,8 +982,8 @@ static void __init sun5i_ccu_init(struct device_node *node, | ||
6336 | |||
6337 | /* Force the PLL-Audio-1x divider to 4 */ | ||
6338 | val = readl(reg + SUN5I_PLL_AUDIO_REG); | ||
6339 | - val &= ~GENMASK(19, 16); | ||
6340 | - writel(val | (3 << 16), reg + SUN5I_PLL_AUDIO_REG); | ||
6341 | + val &= ~GENMASK(29, 26); | ||
6342 | + writel(val | (3 << 26), reg + SUN5I_PLL_AUDIO_REG); | ||
6343 | |||
6344 | /* | ||
6345 | * Use the peripheral PLL as the AHB parent, instead of CPU / | ||
6346 | diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c | ||
6347 | index 8af434815fba..241fb13f1c06 100644 | ||
6348 | --- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c | ||
6349 | +++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c | ||
6350 | @@ -608,7 +608,7 @@ static SUNXI_CCU_M_WITH_MUX_GATE(hdmi_clk, "hdmi", lcd_ch1_parents, | ||
6351 | 0x150, 0, 4, 24, 2, BIT(31), | ||
6352 | CLK_SET_RATE_PARENT); | ||
6353 | |||
6354 | -static SUNXI_CCU_GATE(hdmi_ddc_clk, "hdmi-ddc", "osc24M", 0x150, BIT(30), 0); | ||
6355 | +static SUNXI_CCU_GATE(hdmi_ddc_clk, "ddc", "osc24M", 0x150, BIT(30), 0); | ||
6356 | |||
6357 | static SUNXI_CCU_GATE(ps_clk, "ps", "lcd1-ch1", 0x140, BIT(31), 0); | ||
6358 | |||
6359 | diff --git a/drivers/clk/sunxi-ng/ccu_nm.c b/drivers/clk/sunxi-ng/ccu_nm.c | ||
6360 | index a32158e8f2e3..84a5e7f17f6f 100644 | ||
6361 | --- a/drivers/clk/sunxi-ng/ccu_nm.c | ||
6362 | +++ b/drivers/clk/sunxi-ng/ccu_nm.c | ||
6363 | @@ -99,6 +99,9 @@ static long ccu_nm_round_rate(struct clk_hw *hw, unsigned long rate, | ||
6364 | struct ccu_nm *nm = hw_to_ccu_nm(hw); | ||
6365 | struct _ccu_nm _nm; | ||
6366 | |||
6367 | + if (ccu_frac_helper_has_rate(&nm->common, &nm->frac, rate)) | ||
6368 | + return rate; | ||
6369 | + | ||
6370 | _nm.min_n = nm->n.min ?: 1; | ||
6371 | _nm.max_n = nm->n.max ?: 1 << nm->n.width; | ||
6372 | _nm.min_m = 1; | ||
6373 | diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c | ||
6374 | index 484cc8909d5c..ed4df58a855e 100644 | ||
6375 | --- a/drivers/cpuidle/cpuidle.c | ||
6376 | +++ b/drivers/cpuidle/cpuidle.c | ||
6377 | @@ -208,6 +208,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, | ||
6378 | return -EBUSY; | ||
6379 | } | ||
6380 | target_state = &drv->states[index]; | ||
6381 | + broadcast = false; | ||
6382 | } | ||
6383 | |||
6384 | /* Take note of the planned idle state. */ | ||
6385 | diff --git a/drivers/crypto/amcc/crypto4xx_core.h b/drivers/crypto/amcc/crypto4xx_core.h | ||
6386 | index ecfdcfe3698d..4f41d6da5acc 100644 | ||
6387 | --- a/drivers/crypto/amcc/crypto4xx_core.h | ||
6388 | +++ b/drivers/crypto/amcc/crypto4xx_core.h | ||
6389 | @@ -34,12 +34,12 @@ | ||
6390 | #define PPC405EX_CE_RESET 0x00000008 | ||
6391 | |||
6392 | #define CRYPTO4XX_CRYPTO_PRIORITY 300 | ||
6393 | -#define PPC4XX_LAST_PD 63 | ||
6394 | -#define PPC4XX_NUM_PD 64 | ||
6395 | -#define PPC4XX_LAST_GD 1023 | ||
6396 | +#define PPC4XX_NUM_PD 256 | ||
6397 | +#define PPC4XX_LAST_PD (PPC4XX_NUM_PD - 1) | ||
6398 | #define PPC4XX_NUM_GD 1024 | ||
6399 | -#define PPC4XX_LAST_SD 63 | ||
6400 | -#define PPC4XX_NUM_SD 64 | ||
6401 | +#define PPC4XX_LAST_GD (PPC4XX_NUM_GD - 1) | ||
6402 | +#define PPC4XX_NUM_SD 256 | ||
6403 | +#define PPC4XX_LAST_SD (PPC4XX_NUM_SD - 1) | ||
6404 | #define PPC4XX_SD_BUFFER_SIZE 2048 | ||
6405 | |||
6406 | #define PD_ENTRY_INUSE 1 | ||
6407 | diff --git a/drivers/gpu/drm/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/drm_dp_dual_mode_helper.c | ||
6408 | index 0ef9011a1856..02a50929af67 100644 | ||
6409 | --- a/drivers/gpu/drm/drm_dp_dual_mode_helper.c | ||
6410 | +++ b/drivers/gpu/drm/drm_dp_dual_mode_helper.c | ||
6411 | @@ -410,6 +410,7 @@ int drm_lspcon_get_mode(struct i2c_adapter *adapter, | ||
6412 | { | ||
6413 | u8 data; | ||
6414 | int ret = 0; | ||
6415 | + int retry; | ||
6416 | |||
6417 | if (!mode) { | ||
6418 | DRM_ERROR("NULL input\n"); | ||
6419 | @@ -417,10 +418,19 @@ int drm_lspcon_get_mode(struct i2c_adapter *adapter, | ||
6420 | } | ||
6421 | |||
6422 | /* Read Status: i2c over aux */ | ||
6423 | - ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_LSPCON_CURRENT_MODE, | ||
6424 | - &data, sizeof(data)); | ||
6425 | + for (retry = 0; retry < 6; retry++) { | ||
6426 | + if (retry) | ||
6427 | + usleep_range(500, 1000); | ||
6428 | + | ||
6429 | + ret = drm_dp_dual_mode_read(adapter, | ||
6430 | + DP_DUAL_MODE_LSPCON_CURRENT_MODE, | ||
6431 | + &data, sizeof(data)); | ||
6432 | + if (!ret) | ||
6433 | + break; | ||
6434 | + } | ||
6435 | + | ||
6436 | if (ret < 0) { | ||
6437 | - DRM_ERROR("LSPCON read(0x80, 0x41) failed\n"); | ||
6438 | + DRM_DEBUG_KMS("LSPCON read(0x80, 0x41) failed\n"); | ||
6439 | return -EFAULT; | ||
6440 | } | ||
6441 | |||
6442 | diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c | ||
6443 | index d1e0dc908048..04796d7d0fdb 100644 | ||
6444 | --- a/drivers/gpu/drm/vc4/vc4_dsi.c | ||
6445 | +++ b/drivers/gpu/drm/vc4/vc4_dsi.c | ||
6446 | @@ -866,7 +866,8 @@ static bool vc4_dsi_encoder_mode_fixup(struct drm_encoder *encoder, | ||
6447 | adjusted_mode->clock = pixel_clock_hz / 1000 + 1; | ||
6448 | |||
6449 | /* Given the new pixel clock, adjust HFP to keep vrefresh the same. */ | ||
6450 | - adjusted_mode->htotal = pixel_clock_hz / (mode->vrefresh * mode->vtotal); | ||
6451 | + adjusted_mode->htotal = adjusted_mode->clock * mode->htotal / | ||
6452 | + mode->clock; | ||
6453 | adjusted_mode->hsync_end += adjusted_mode->htotal - mode->htotal; | ||
6454 | adjusted_mode->hsync_start += adjusted_mode->htotal - mode->htotal; | ||
6455 | |||
6456 | diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c | ||
6457 | index 937801ac2fe0..2cd134dd94d2 100644 | ||
6458 | --- a/drivers/hv/vmbus_drv.c | ||
6459 | +++ b/drivers/hv/vmbus_drv.c | ||
6460 | @@ -1534,7 +1534,7 @@ static int __init hv_acpi_init(void) | ||
6461 | { | ||
6462 | int ret, t; | ||
6463 | |||
6464 | - if (x86_hyper != &x86_hyper_ms_hyperv) | ||
6465 | + if (x86_hyper_type != X86_HYPER_MS_HYPERV) | ||
6466 | return -ENODEV; | ||
6467 | |||
6468 | init_completion(&probe_event); | ||
6469 | diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c | ||
6470 | index 752856b3a849..379de1829cdb 100644 | ||
6471 | --- a/drivers/iio/accel/st_accel_core.c | ||
6472 | +++ b/drivers/iio/accel/st_accel_core.c | ||
6473 | @@ -164,7 +164,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { | ||
6474 | .mask_int2 = 0x00, | ||
6475 | .addr_ihl = 0x25, | ||
6476 | .mask_ihl = 0x02, | ||
6477 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6478 | + .stat_drdy = { | ||
6479 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6480 | + .mask = 0x07, | ||
6481 | + }, | ||
6482 | }, | ||
6483 | .sim = { | ||
6484 | .addr = 0x23, | ||
6485 | @@ -236,7 +239,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { | ||
6486 | .mask_ihl = 0x80, | ||
6487 | .addr_od = 0x22, | ||
6488 | .mask_od = 0x40, | ||
6489 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6490 | + .stat_drdy = { | ||
6491 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6492 | + .mask = 0x07, | ||
6493 | + }, | ||
6494 | }, | ||
6495 | .sim = { | ||
6496 | .addr = 0x23, | ||
6497 | @@ -318,7 +324,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { | ||
6498 | .mask_int2 = 0x00, | ||
6499 | .addr_ihl = 0x23, | ||
6500 | .mask_ihl = 0x40, | ||
6501 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6502 | + .stat_drdy = { | ||
6503 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6504 | + .mask = 0x07, | ||
6505 | + }, | ||
6506 | .ig1 = { | ||
6507 | .en_addr = 0x23, | ||
6508 | .en_mask = 0x08, | ||
6509 | @@ -389,7 +398,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { | ||
6510 | .drdy_irq = { | ||
6511 | .addr = 0x21, | ||
6512 | .mask_int1 = 0x04, | ||
6513 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6514 | + .stat_drdy = { | ||
6515 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6516 | + .mask = 0x07, | ||
6517 | + }, | ||
6518 | }, | ||
6519 | .sim = { | ||
6520 | .addr = 0x21, | ||
6521 | @@ -451,7 +463,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { | ||
6522 | .mask_ihl = 0x80, | ||
6523 | .addr_od = 0x22, | ||
6524 | .mask_od = 0x40, | ||
6525 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6526 | + .stat_drdy = { | ||
6527 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6528 | + .mask = 0x07, | ||
6529 | + }, | ||
6530 | }, | ||
6531 | .sim = { | ||
6532 | .addr = 0x21, | ||
6533 | @@ -569,7 +584,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { | ||
6534 | .drdy_irq = { | ||
6535 | .addr = 0x21, | ||
6536 | .mask_int1 = 0x04, | ||
6537 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6538 | + .stat_drdy = { | ||
6539 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6540 | + .mask = 0x07, | ||
6541 | + }, | ||
6542 | }, | ||
6543 | .sim = { | ||
6544 | .addr = 0x21, | ||
6545 | @@ -640,7 +658,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { | ||
6546 | .mask_int2 = 0x00, | ||
6547 | .addr_ihl = 0x25, | ||
6548 | .mask_ihl = 0x02, | ||
6549 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6550 | + .stat_drdy = { | ||
6551 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6552 | + .mask = 0x07, | ||
6553 | + }, | ||
6554 | }, | ||
6555 | .sim = { | ||
6556 | .addr = 0x23, | ||
6557 | diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c | ||
6558 | index 02e833b14db0..34115f05d5c4 100644 | ||
6559 | --- a/drivers/iio/common/st_sensors/st_sensors_core.c | ||
6560 | +++ b/drivers/iio/common/st_sensors/st_sensors_core.c | ||
6561 | @@ -470,7 +470,7 @@ int st_sensors_set_dataready_irq(struct iio_dev *indio_dev, bool enable) | ||
6562 | * different one. Take into account irq status register | ||
6563 | * to understand if irq trigger can be properly supported | ||
6564 | */ | ||
6565 | - if (sdata->sensor_settings->drdy_irq.addr_stat_drdy) | ||
6566 | + if (sdata->sensor_settings->drdy_irq.stat_drdy.addr) | ||
6567 | sdata->hw_irq_trigger = enable; | ||
6568 | return 0; | ||
6569 | } | ||
6570 | diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c | ||
6571 | index fa73e6795359..fdcc5a891958 100644 | ||
6572 | --- a/drivers/iio/common/st_sensors/st_sensors_trigger.c | ||
6573 | +++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c | ||
6574 | @@ -31,7 +31,7 @@ static int st_sensors_new_samples_available(struct iio_dev *indio_dev, | ||
6575 | int ret; | ||
6576 | |||
6577 | /* How would I know if I can't check it? */ | ||
6578 | - if (!sdata->sensor_settings->drdy_irq.addr_stat_drdy) | ||
6579 | + if (!sdata->sensor_settings->drdy_irq.stat_drdy.addr) | ||
6580 | return -EINVAL; | ||
6581 | |||
6582 | /* No scan mask, no interrupt */ | ||
6583 | @@ -39,23 +39,15 @@ static int st_sensors_new_samples_available(struct iio_dev *indio_dev, | ||
6584 | return 0; | ||
6585 | |||
6586 | ret = sdata->tf->read_byte(&sdata->tb, sdata->dev, | ||
6587 | - sdata->sensor_settings->drdy_irq.addr_stat_drdy, | ||
6588 | + sdata->sensor_settings->drdy_irq.stat_drdy.addr, | ||
6589 | &status); | ||
6590 | if (ret < 0) { | ||
6591 | dev_err(sdata->dev, | ||
6592 | "error checking samples available\n"); | ||
6593 | return ret; | ||
6594 | } | ||
6595 | - /* | ||
6596 | - * the lower bits of .active_scan_mask[0] is directly mapped | ||
6597 | - * to the channels on the sensor: either bit 0 for | ||
6598 | - * one-dimensional sensors, or e.g. x,y,z for accelerometers, | ||
6599 | - * gyroscopes or magnetometers. No sensor use more than 3 | ||
6600 | - * channels, so cut the other status bits here. | ||
6601 | - */ | ||
6602 | - status &= 0x07; | ||
6603 | |||
6604 | - if (status & (u8)indio_dev->active_scan_mask[0]) | ||
6605 | + if (status & sdata->sensor_settings->drdy_irq.stat_drdy.mask) | ||
6606 | return 1; | ||
6607 | |||
6608 | return 0; | ||
6609 | @@ -212,7 +204,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, | ||
6610 | * it was "our" interrupt. | ||
6611 | */ | ||
6612 | if (sdata->int_pin_open_drain && | ||
6613 | - sdata->sensor_settings->drdy_irq.addr_stat_drdy) | ||
6614 | + sdata->sensor_settings->drdy_irq.stat_drdy.addr) | ||
6615 | irq_trig |= IRQF_SHARED; | ||
6616 | |||
6617 | err = request_threaded_irq(sdata->get_irq_data_ready(indio_dev), | ||
6618 | diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c | ||
6619 | index e366422e8512..2536a8400c98 100644 | ||
6620 | --- a/drivers/iio/gyro/st_gyro_core.c | ||
6621 | +++ b/drivers/iio/gyro/st_gyro_core.c | ||
6622 | @@ -118,7 +118,10 @@ static const struct st_sensor_settings st_gyro_sensors_settings[] = { | ||
6623 | * drain settings, but only for INT1 and not | ||
6624 | * for the DRDY line on INT2. | ||
6625 | */ | ||
6626 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6627 | + .stat_drdy = { | ||
6628 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6629 | + .mask = 0x07, | ||
6630 | + }, | ||
6631 | }, | ||
6632 | .multi_read_bit = true, | ||
6633 | .bootime = 2, | ||
6634 | @@ -188,7 +191,10 @@ static const struct st_sensor_settings st_gyro_sensors_settings[] = { | ||
6635 | * drain settings, but only for INT1 and not | ||
6636 | * for the DRDY line on INT2. | ||
6637 | */ | ||
6638 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6639 | + .stat_drdy = { | ||
6640 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6641 | + .mask = 0x07, | ||
6642 | + }, | ||
6643 | }, | ||
6644 | .multi_read_bit = true, | ||
6645 | .bootime = 2, | ||
6646 | @@ -253,7 +259,10 @@ static const struct st_sensor_settings st_gyro_sensors_settings[] = { | ||
6647 | * drain settings, but only for INT1 and not | ||
6648 | * for the DRDY line on INT2. | ||
6649 | */ | ||
6650 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6651 | + .stat_drdy = { | ||
6652 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6653 | + .mask = 0x07, | ||
6654 | + }, | ||
6655 | }, | ||
6656 | .multi_read_bit = true, | ||
6657 | .bootime = 2, | ||
6658 | diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c | ||
6659 | index 08aafba4481c..19031a7bce23 100644 | ||
6660 | --- a/drivers/iio/magnetometer/st_magn_core.c | ||
6661 | +++ b/drivers/iio/magnetometer/st_magn_core.c | ||
6662 | @@ -317,7 +317,10 @@ static const struct st_sensor_settings st_magn_sensors_settings[] = { | ||
6663 | }, | ||
6664 | .drdy_irq = { | ||
6665 | /* drdy line is routed drdy pin */ | ||
6666 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6667 | + .stat_drdy = { | ||
6668 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6669 | + .mask = 0x07, | ||
6670 | + }, | ||
6671 | }, | ||
6672 | .multi_read_bit = true, | ||
6673 | .bootime = 2, | ||
6674 | @@ -361,7 +364,10 @@ static const struct st_sensor_settings st_magn_sensors_settings[] = { | ||
6675 | .drdy_irq = { | ||
6676 | .addr = 0x62, | ||
6677 | .mask_int1 = 0x01, | ||
6678 | - .addr_stat_drdy = 0x67, | ||
6679 | + .stat_drdy = { | ||
6680 | + .addr = 0x67, | ||
6681 | + .mask = 0x07, | ||
6682 | + }, | ||
6683 | }, | ||
6684 | .multi_read_bit = false, | ||
6685 | .bootime = 2, | ||
6686 | diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c | ||
6687 | index 34611a8ea2ce..ea075fcd5a6f 100644 | ||
6688 | --- a/drivers/iio/pressure/st_pressure_core.c | ||
6689 | +++ b/drivers/iio/pressure/st_pressure_core.c | ||
6690 | @@ -287,7 +287,10 @@ static const struct st_sensor_settings st_press_sensors_settings[] = { | ||
6691 | .mask_ihl = 0x80, | ||
6692 | .addr_od = 0x22, | ||
6693 | .mask_od = 0x40, | ||
6694 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6695 | + .stat_drdy = { | ||
6696 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6697 | + .mask = 0x03, | ||
6698 | + }, | ||
6699 | }, | ||
6700 | .multi_read_bit = true, | ||
6701 | .bootime = 2, | ||
6702 | @@ -395,7 +398,10 @@ static const struct st_sensor_settings st_press_sensors_settings[] = { | ||
6703 | .mask_ihl = 0x80, | ||
6704 | .addr_od = 0x22, | ||
6705 | .mask_od = 0x40, | ||
6706 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6707 | + .stat_drdy = { | ||
6708 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6709 | + .mask = 0x03, | ||
6710 | + }, | ||
6711 | }, | ||
6712 | .multi_read_bit = true, | ||
6713 | .bootime = 2, | ||
6714 | @@ -454,7 +460,10 @@ static const struct st_sensor_settings st_press_sensors_settings[] = { | ||
6715 | .mask_ihl = 0x80, | ||
6716 | .addr_od = 0x12, | ||
6717 | .mask_od = 0x40, | ||
6718 | - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6719 | + .stat_drdy = { | ||
6720 | + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, | ||
6721 | + .mask = 0x03, | ||
6722 | + }, | ||
6723 | }, | ||
6724 | .multi_read_bit = false, | ||
6725 | .bootime = 2, | ||
6726 | diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c | ||
6727 | index 747efd1ae5a6..8208c30f03c5 100644 | ||
6728 | --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c | ||
6729 | +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c | ||
6730 | @@ -1001,6 +1001,11 @@ static void hns_roce_v1_mr_free_work_fn(struct work_struct *work) | ||
6731 | } | ||
6732 | } | ||
6733 | |||
6734 | + if (!ne) { | ||
6735 | + dev_err(dev, "Reseved loop qp is absent!\n"); | ||
6736 | + goto free_work; | ||
6737 | + } | ||
6738 | + | ||
6739 | do { | ||
6740 | ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc); | ||
6741 | if (ret < 0) { | ||
6742 | diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c | ||
6743 | index c1b5f38f31a5..3b4916680018 100644 | ||
6744 | --- a/drivers/infiniband/sw/rxe/rxe_pool.c | ||
6745 | +++ b/drivers/infiniband/sw/rxe/rxe_pool.c | ||
6746 | @@ -404,6 +404,8 @@ void *rxe_alloc(struct rxe_pool *pool) | ||
6747 | elem = kmem_cache_zalloc(pool_cache(pool), | ||
6748 | (pool->flags & RXE_POOL_ATOMIC) ? | ||
6749 | GFP_ATOMIC : GFP_KERNEL); | ||
6750 | + if (!elem) | ||
6751 | + return NULL; | ||
6752 | |||
6753 | elem->pool = pool; | ||
6754 | kref_init(&elem->ref_cnt); | ||
6755 | diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c | ||
6756 | index afa938bd26d6..a72278e9cd27 100644 | ||
6757 | --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c | ||
6758 | +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c | ||
6759 | @@ -139,6 +139,7 @@ void opa_vnic_release_mac_tbl(struct opa_vnic_adapter *adapter) | ||
6760 | rcu_assign_pointer(adapter->mactbl, NULL); | ||
6761 | synchronize_rcu(); | ||
6762 | opa_vnic_free_mac_tbl(mactbl); | ||
6763 | + adapter->info.vport.mac_tbl_digest = 0; | ||
6764 | mutex_unlock(&adapter->mactbl_lock); | ||
6765 | } | ||
6766 | |||
6767 | diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c | ||
6768 | index c2733964379c..9655cc3aa3a0 100644 | ||
6769 | --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c | ||
6770 | +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c | ||
6771 | @@ -348,7 +348,7 @@ void opa_vnic_query_mcast_macs(struct opa_vnic_adapter *adapter, | ||
6772 | void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter, | ||
6773 | struct opa_veswport_iface_macs *macs) | ||
6774 | { | ||
6775 | - u16 start_idx, tot_macs, num_macs, idx = 0, count = 0; | ||
6776 | + u16 start_idx, tot_macs, num_macs, idx = 0, count = 0, em_macs = 0; | ||
6777 | struct netdev_hw_addr *ha; | ||
6778 | |||
6779 | start_idx = be16_to_cpu(macs->start_idx); | ||
6780 | @@ -359,8 +359,10 @@ void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter, | ||
6781 | |||
6782 | /* Do not include EM specified MAC address */ | ||
6783 | if (!memcmp(adapter->info.vport.base_mac_addr, ha->addr, | ||
6784 | - ARRAY_SIZE(adapter->info.vport.base_mac_addr))) | ||
6785 | + ARRAY_SIZE(adapter->info.vport.base_mac_addr))) { | ||
6786 | + em_macs++; | ||
6787 | continue; | ||
6788 | + } | ||
6789 | |||
6790 | if (start_idx > idx++) | ||
6791 | continue; | ||
6792 | @@ -383,7 +385,7 @@ void opa_vnic_query_ucast_macs(struct opa_vnic_adapter *adapter, | ||
6793 | } | ||
6794 | |||
6795 | tot_macs = netdev_hw_addr_list_count(&adapter->netdev->dev_addrs) + | ||
6796 | - netdev_uc_count(adapter->netdev); | ||
6797 | + netdev_uc_count(adapter->netdev) - em_macs; | ||
6798 | macs->tot_macs_in_lst = cpu_to_be16(tot_macs); | ||
6799 | macs->num_macs_in_msg = cpu_to_be16(count); | ||
6800 | macs->gen_count = cpu_to_be16(adapter->info.vport.uc_macs_gen_count); | ||
6801 | diff --git a/drivers/input/mouse/vmmouse.c b/drivers/input/mouse/vmmouse.c | ||
6802 | index 0f586780ceb4..1ae5c1ef3f5b 100644 | ||
6803 | --- a/drivers/input/mouse/vmmouse.c | ||
6804 | +++ b/drivers/input/mouse/vmmouse.c | ||
6805 | @@ -316,11 +316,9 @@ static int vmmouse_enable(struct psmouse *psmouse) | ||
6806 | /* | ||
6807 | * Array of supported hypervisors. | ||
6808 | */ | ||
6809 | -static const struct hypervisor_x86 *vmmouse_supported_hypervisors[] = { | ||
6810 | - &x86_hyper_vmware, | ||
6811 | -#ifdef CONFIG_KVM_GUEST | ||
6812 | - &x86_hyper_kvm, | ||
6813 | -#endif | ||
6814 | +static enum x86_hypervisor_type vmmouse_supported_hypervisors[] = { | ||
6815 | + X86_HYPER_VMWARE, | ||
6816 | + X86_HYPER_KVM, | ||
6817 | }; | ||
6818 | |||
6819 | /** | ||
6820 | @@ -331,7 +329,7 @@ static bool vmmouse_check_hypervisor(void) | ||
6821 | int i; | ||
6822 | |||
6823 | for (i = 0; i < ARRAY_SIZE(vmmouse_supported_hypervisors); i++) | ||
6824 | - if (vmmouse_supported_hypervisors[i] == x86_hyper) | ||
6825 | + if (vmmouse_supported_hypervisors[i] == x86_hyper_type) | ||
6826 | return true; | ||
6827 | |||
6828 | return false; | ||
6829 | diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c | ||
6830 | index 905729191d3e..78183f90820e 100644 | ||
6831 | --- a/drivers/leds/leds-pca955x.c | ||
6832 | +++ b/drivers/leds/leds-pca955x.c | ||
6833 | @@ -61,6 +61,10 @@ | ||
6834 | #define PCA955X_LS_BLINK0 0x2 /* Blink at PWM0 rate */ | ||
6835 | #define PCA955X_LS_BLINK1 0x3 /* Blink at PWM1 rate */ | ||
6836 | |||
6837 | +#define PCA955X_GPIO_INPUT LED_OFF | ||
6838 | +#define PCA955X_GPIO_HIGH LED_OFF | ||
6839 | +#define PCA955X_GPIO_LOW LED_FULL | ||
6840 | + | ||
6841 | enum pca955x_type { | ||
6842 | pca9550, | ||
6843 | pca9551, | ||
6844 | @@ -329,9 +333,9 @@ static int pca955x_set_value(struct gpio_chip *gc, unsigned int offset, | ||
6845 | struct pca955x_led *led = &pca955x->leds[offset]; | ||
6846 | |||
6847 | if (val) | ||
6848 | - return pca955x_led_set(&led->led_cdev, LED_FULL); | ||
6849 | - else | ||
6850 | - return pca955x_led_set(&led->led_cdev, LED_OFF); | ||
6851 | + return pca955x_led_set(&led->led_cdev, PCA955X_GPIO_HIGH); | ||
6852 | + | ||
6853 | + return pca955x_led_set(&led->led_cdev, PCA955X_GPIO_LOW); | ||
6854 | } | ||
6855 | |||
6856 | static void pca955x_gpio_set_value(struct gpio_chip *gc, unsigned int offset, | ||
6857 | @@ -355,8 +359,11 @@ static int pca955x_gpio_get_value(struct gpio_chip *gc, unsigned int offset) | ||
6858 | static int pca955x_gpio_direction_input(struct gpio_chip *gc, | ||
6859 | unsigned int offset) | ||
6860 | { | ||
6861 | - /* To use as input ensure pin is not driven */ | ||
6862 | - return pca955x_set_value(gc, offset, 0); | ||
6863 | + struct pca955x *pca955x = gpiochip_get_data(gc); | ||
6864 | + struct pca955x_led *led = &pca955x->leds[offset]; | ||
6865 | + | ||
6866 | + /* To use as input ensure pin is not driven. */ | ||
6867 | + return pca955x_led_set(&led->led_cdev, PCA955X_GPIO_INPUT); | ||
6868 | } | ||
6869 | |||
6870 | static int pca955x_gpio_direction_output(struct gpio_chip *gc, | ||
6871 | diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c | ||
6872 | index 35e82b14ded7..ddf0a4341ae2 100644 | ||
6873 | --- a/drivers/md/dm-mpath.c | ||
6874 | +++ b/drivers/md/dm-mpath.c | ||
6875 | @@ -366,7 +366,7 @@ static struct pgpath *choose_path_in_pg(struct multipath *m, | ||
6876 | |||
6877 | pgpath = path_to_pgpath(path); | ||
6878 | |||
6879 | - if (unlikely(lockless_dereference(m->current_pg) != pg)) { | ||
6880 | + if (unlikely(READ_ONCE(m->current_pg) != pg)) { | ||
6881 | /* Only update current_pgpath if pg changed */ | ||
6882 | spin_lock_irqsave(&m->lock, flags); | ||
6883 | m->current_pgpath = pgpath; | ||
6884 | @@ -390,7 +390,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) | ||
6885 | } | ||
6886 | |||
6887 | /* Were we instructed to switch PG? */ | ||
6888 | - if (lockless_dereference(m->next_pg)) { | ||
6889 | + if (READ_ONCE(m->next_pg)) { | ||
6890 | spin_lock_irqsave(&m->lock, flags); | ||
6891 | pg = m->next_pg; | ||
6892 | if (!pg) { | ||
6893 | @@ -406,7 +406,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) | ||
6894 | |||
6895 | /* Don't change PG until it has no remaining paths */ | ||
6896 | check_current_pg: | ||
6897 | - pg = lockless_dereference(m->current_pg); | ||
6898 | + pg = READ_ONCE(m->current_pg); | ||
6899 | if (pg) { | ||
6900 | pgpath = choose_path_in_pg(m, pg, nr_bytes); | ||
6901 | if (!IS_ERR_OR_NULL(pgpath)) | ||
6902 | @@ -473,7 +473,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, | ||
6903 | struct request *clone; | ||
6904 | |||
6905 | /* Do we need to select a new pgpath? */ | ||
6906 | - pgpath = lockless_dereference(m->current_pgpath); | ||
6907 | + pgpath = READ_ONCE(m->current_pgpath); | ||
6908 | if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags)) | ||
6909 | pgpath = choose_pgpath(m, nr_bytes); | ||
6910 | |||
6911 | @@ -533,7 +533,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m | ||
6912 | bool queue_io; | ||
6913 | |||
6914 | /* Do we need to select a new pgpath? */ | ||
6915 | - pgpath = lockless_dereference(m->current_pgpath); | ||
6916 | + pgpath = READ_ONCE(m->current_pgpath); | ||
6917 | queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags); | ||
6918 | if (!pgpath || !queue_io) | ||
6919 | pgpath = choose_pgpath(m, nr_bytes); | ||
6920 | @@ -1802,7 +1802,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti, | ||
6921 | struct pgpath *current_pgpath; | ||
6922 | int r; | ||
6923 | |||
6924 | - current_pgpath = lockless_dereference(m->current_pgpath); | ||
6925 | + current_pgpath = READ_ONCE(m->current_pgpath); | ||
6926 | if (!current_pgpath) | ||
6927 | current_pgpath = choose_pgpath(m, 0); | ||
6928 | |||
6929 | @@ -1824,7 +1824,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti, | ||
6930 | } | ||
6931 | |||
6932 | if (r == -ENOTCONN) { | ||
6933 | - if (!lockless_dereference(m->current_pg)) { | ||
6934 | + if (!READ_ONCE(m->current_pg)) { | ||
6935 | /* Path status changed, redo selection */ | ||
6936 | (void) choose_pgpath(m, 0); | ||
6937 | } | ||
6938 | @@ -1893,9 +1893,9 @@ static int multipath_busy(struct dm_target *ti) | ||
6939 | return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED); | ||
6940 | |||
6941 | /* Guess which priority_group will be used at next mapping time */ | ||
6942 | - pg = lockless_dereference(m->current_pg); | ||
6943 | - next_pg = lockless_dereference(m->next_pg); | ||
6944 | - if (unlikely(!lockless_dereference(m->current_pgpath) && next_pg)) | ||
6945 | + pg = READ_ONCE(m->current_pg); | ||
6946 | + next_pg = READ_ONCE(m->next_pg); | ||
6947 | + if (unlikely(!READ_ONCE(m->current_pgpath) && next_pg)) | ||
6948 | pg = next_pg; | ||
6949 | |||
6950 | if (!pg) { | ||
6951 | diff --git a/drivers/md/md.c b/drivers/md/md.c | ||
6952 | index 98ea86309ceb..6bf093cef958 100644 | ||
6953 | --- a/drivers/md/md.c | ||
6954 | +++ b/drivers/md/md.c | ||
6955 | @@ -7468,8 +7468,8 @@ void md_wakeup_thread(struct md_thread *thread) | ||
6956 | { | ||
6957 | if (thread) { | ||
6958 | pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm); | ||
6959 | - if (!test_and_set_bit(THREAD_WAKEUP, &thread->flags)) | ||
6960 | - wake_up(&thread->wqueue); | ||
6961 | + set_bit(THREAD_WAKEUP, &thread->flags); | ||
6962 | + wake_up(&thread->wqueue); | ||
6963 | } | ||
6964 | } | ||
6965 | EXPORT_SYMBOL(md_wakeup_thread); | ||
6966 | diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c | ||
6967 | index eda38cbe8530..41f2a9f6851d 100644 | ||
6968 | --- a/drivers/misc/pti.c | ||
6969 | +++ b/drivers/misc/pti.c | ||
6970 | @@ -32,7 +32,7 @@ | ||
6971 | #include <linux/pci.h> | ||
6972 | #include <linux/mutex.h> | ||
6973 | #include <linux/miscdevice.h> | ||
6974 | -#include <linux/pti.h> | ||
6975 | +#include <linux/intel-pti.h> | ||
6976 | #include <linux/slab.h> | ||
6977 | #include <linux/uaccess.h> | ||
6978 | |||
6979 | diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c | ||
6980 | index 1e688bfec567..9047c0a529b2 100644 | ||
6981 | --- a/drivers/misc/vmw_balloon.c | ||
6982 | +++ b/drivers/misc/vmw_balloon.c | ||
6983 | @@ -1271,7 +1271,7 @@ static int __init vmballoon_init(void) | ||
6984 | * Check if we are running on VMware's hypervisor and bail out | ||
6985 | * if we are not. | ||
6986 | */ | ||
6987 | - if (x86_hyper != &x86_hyper_vmware) | ||
6988 | + if (x86_hyper_type != X86_HYPER_VMWARE) | ||
6989 | return -ENODEV; | ||
6990 | |||
6991 | for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; | ||
6992 | diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c | ||
6993 | index c66abd476023..3b0db01ead1f 100644 | ||
6994 | --- a/drivers/net/ethernet/ibm/ibmvnic.c | ||
6995 | +++ b/drivers/net/ethernet/ibm/ibmvnic.c | ||
6996 | @@ -927,6 +927,7 @@ static int ibmvnic_open(struct net_device *netdev) | ||
6997 | } | ||
6998 | |||
6999 | rc = __ibmvnic_open(netdev); | ||
7000 | + netif_carrier_on(netdev); | ||
7001 | mutex_unlock(&adapter->reset_lock); | ||
7002 | |||
7003 | return rc; | ||
7004 | @@ -3899,6 +3900,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) | ||
7005 | if (rc) | ||
7006 | goto ibmvnic_init_fail; | ||
7007 | |||
7008 | + netif_carrier_off(netdev); | ||
7009 | rc = register_netdev(netdev); | ||
7010 | if (rc) { | ||
7011 | dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc); | ||
7012 | diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h | ||
7013 | index 689c413b7782..d2f9a2dd76a2 100644 | ||
7014 | --- a/drivers/net/ethernet/intel/fm10k/fm10k.h | ||
7015 | +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h | ||
7016 | @@ -526,8 +526,8 @@ s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid); | ||
7017 | int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac); | ||
7018 | int fm10k_ndo_set_vf_vlan(struct net_device *netdev, | ||
7019 | int vf_idx, u16 vid, u8 qos, __be16 vlan_proto); | ||
7020 | -int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate, | ||
7021 | - int unused); | ||
7022 | +int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, | ||
7023 | + int __always_unused min_rate, int max_rate); | ||
7024 | int fm10k_ndo_get_vf_config(struct net_device *netdev, | ||
7025 | int vf_idx, struct ifla_vf_info *ivi); | ||
7026 | |||
7027 | diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c | ||
7028 | index 5f4dac0d36ef..e72fd52bacfe 100644 | ||
7029 | --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c | ||
7030 | +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c | ||
7031 | @@ -126,6 +126,9 @@ s32 fm10k_iov_mbx(struct fm10k_intfc *interface) | ||
7032 | struct fm10k_mbx_info *mbx = &vf_info->mbx; | ||
7033 | u16 glort = vf_info->glort; | ||
7034 | |||
7035 | + /* process the SM mailbox first to drain outgoing messages */ | ||
7036 | + hw->mbx.ops.process(hw, &hw->mbx); | ||
7037 | + | ||
7038 | /* verify port mapping is valid, if not reset port */ | ||
7039 | if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort)) | ||
7040 | hw->iov.ops.reset_lport(hw, vf_info); | ||
7041 | @@ -482,7 +485,7 @@ int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, | ||
7042 | } | ||
7043 | |||
7044 | int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, | ||
7045 | - int __always_unused unused, int rate) | ||
7046 | + int __always_unused min_rate, int max_rate) | ||
7047 | { | ||
7048 | struct fm10k_intfc *interface = netdev_priv(netdev); | ||
7049 | struct fm10k_iov_data *iov_data = interface->iov_data; | ||
7050 | @@ -493,14 +496,15 @@ int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, | ||
7051 | return -EINVAL; | ||
7052 | |||
7053 | /* rate limit cannot be less than 10Mbs or greater than link speed */ | ||
7054 | - if (rate && ((rate < FM10K_VF_TC_MIN) || rate > FM10K_VF_TC_MAX)) | ||
7055 | + if (max_rate && | ||
7056 | + (max_rate < FM10K_VF_TC_MIN || max_rate > FM10K_VF_TC_MAX)) | ||
7057 | return -EINVAL; | ||
7058 | |||
7059 | /* store values */ | ||
7060 | - iov_data->vf_info[vf_idx].rate = rate; | ||
7061 | + iov_data->vf_info[vf_idx].rate = max_rate; | ||
7062 | |||
7063 | /* update hardware configuration */ | ||
7064 | - hw->iov.ops.configure_tc(hw, vf_idx, rate); | ||
7065 | + hw->iov.ops.configure_tc(hw, vf_idx, max_rate); | ||
7066 | |||
7067 | return 0; | ||
7068 | } | ||
7069 | diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c | ||
7070 | index ea20aacd5e1d..b2cde9b16d82 100644 | ||
7071 | --- a/drivers/net/ethernet/intel/i40e/i40e_main.c | ||
7072 | +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c | ||
7073 | @@ -2874,14 +2874,15 @@ static void i40e_vsi_free_rx_resources(struct i40e_vsi *vsi) | ||
7074 | static void i40e_config_xps_tx_ring(struct i40e_ring *ring) | ||
7075 | { | ||
7076 | struct i40e_vsi *vsi = ring->vsi; | ||
7077 | + int cpu; | ||
7078 | |||
7079 | if (!ring->q_vector || !ring->netdev) | ||
7080 | return; | ||
7081 | |||
7082 | if ((vsi->tc_config.numtc <= 1) && | ||
7083 | !test_and_set_bit(__I40E_TX_XPS_INIT_DONE, &ring->state)) { | ||
7084 | - netif_set_xps_queue(ring->netdev, | ||
7085 | - get_cpu_mask(ring->q_vector->v_idx), | ||
7086 | + cpu = cpumask_local_spread(ring->q_vector->v_idx, -1); | ||
7087 | + netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu), | ||
7088 | ring->queue_index); | ||
7089 | } | ||
7090 | |||
7091 | @@ -3471,6 +3472,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) | ||
7092 | int tx_int_idx = 0; | ||
7093 | int vector, err; | ||
7094 | int irq_num; | ||
7095 | + int cpu; | ||
7096 | |||
7097 | for (vector = 0; vector < q_vectors; vector++) { | ||
7098 | struct i40e_q_vector *q_vector = vsi->q_vectors[vector]; | ||
7099 | @@ -3506,10 +3508,14 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) | ||
7100 | q_vector->affinity_notify.notify = i40e_irq_affinity_notify; | ||
7101 | q_vector->affinity_notify.release = i40e_irq_affinity_release; | ||
7102 | irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); | ||
7103 | - /* get_cpu_mask returns a static constant mask with | ||
7104 | - * a permanent lifetime so it's ok to use here. | ||
7105 | + /* Spread affinity hints out across online CPUs. | ||
7106 | + * | ||
7107 | + * get_cpu_mask returns a static constant mask with | ||
7108 | + * a permanent lifetime so it's ok to pass to | ||
7109 | + * irq_set_affinity_hint without making a copy. | ||
7110 | */ | ||
7111 | - irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx)); | ||
7112 | + cpu = cpumask_local_spread(q_vector->v_idx, -1); | ||
7113 | + irq_set_affinity_hint(irq_num, get_cpu_mask(cpu)); | ||
7114 | } | ||
7115 | |||
7116 | vsi->irqs_ready = true; | ||
7117 | diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | ||
7118 | index 4d1e670f490e..e368b0237a1b 100644 | ||
7119 | --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | ||
7120 | +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | ||
7121 | @@ -1008,8 +1008,8 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf) | ||
7122 | set_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states); | ||
7123 | clear_bit(I40E_VF_STATE_DISABLED, &vf->vf_states); | ||
7124 | /* Do not notify the client during VF init */ | ||
7125 | - if (test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE, | ||
7126 | - &vf->vf_states)) | ||
7127 | + if (!test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE, | ||
7128 | + &vf->vf_states)) | ||
7129 | i40e_notify_client_of_vf_reset(pf, abs_vf_id); | ||
7130 | vf->num_vlan = 0; | ||
7131 | } | ||
7132 | @@ -2779,6 +2779,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) | ||
7133 | struct i40e_mac_filter *f; | ||
7134 | struct i40e_vf *vf; | ||
7135 | int ret = 0; | ||
7136 | + struct hlist_node *h; | ||
7137 | int bkt; | ||
7138 | |||
7139 | /* validate the request */ | ||
7140 | @@ -2817,7 +2818,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) | ||
7141 | /* Delete all the filters for this VSI - we're going to kill it | ||
7142 | * anyway. | ||
7143 | */ | ||
7144 | - hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) | ||
7145 | + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) | ||
7146 | __i40e_del_filter(vsi, f); | ||
7147 | |||
7148 | spin_unlock_bh(&vsi->mac_filter_hash_lock); | ||
7149 | diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c | ||
7150 | index 1825d956bb00..1ccad6f30ebf 100644 | ||
7151 | --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c | ||
7152 | +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c | ||
7153 | @@ -546,6 +546,7 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename) | ||
7154 | unsigned int vector, q_vectors; | ||
7155 | unsigned int rx_int_idx = 0, tx_int_idx = 0; | ||
7156 | int irq_num, err; | ||
7157 | + int cpu; | ||
7158 | |||
7159 | i40evf_irq_disable(adapter); | ||
7160 | /* Decrement for Other and TCP Timer vectors */ | ||
7161 | @@ -584,10 +585,12 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename) | ||
7162 | q_vector->affinity_notify.release = | ||
7163 | i40evf_irq_affinity_release; | ||
7164 | irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); | ||
7165 | - /* get_cpu_mask returns a static constant mask with | ||
7166 | - * a permanent lifetime so it's ok to use here. | ||
7167 | + /* Spread the IRQ affinity hints across online CPUs. Note that | ||
7168 | + * get_cpu_mask returns a mask with a permanent lifetime so | ||
7169 | + * it's safe to use as a hint for irq_set_affinity_hint. | ||
7170 | */ | ||
7171 | - irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx)); | ||
7172 | + cpu = cpumask_local_spread(q_vector->v_idx, -1); | ||
7173 | + irq_set_affinity_hint(irq_num, get_cpu_mask(cpu)); | ||
7174 | } | ||
7175 | |||
7176 | return 0; | ||
7177 | diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c | ||
7178 | index b0031c5ff767..667dbc7d4a4e 100644 | ||
7179 | --- a/drivers/net/ethernet/intel/igb/igb_main.c | ||
7180 | +++ b/drivers/net/ethernet/intel/igb/igb_main.c | ||
7181 | @@ -3162,6 +3162,8 @@ static int igb_sw_init(struct igb_adapter *adapter) | ||
7182 | /* Setup and initialize a copy of the hw vlan table array */ | ||
7183 | adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32), | ||
7184 | GFP_ATOMIC); | ||
7185 | + if (!adapter->shadow_vfta) | ||
7186 | + return -ENOMEM; | ||
7187 | |||
7188 | /* This call may decrease the number of queues */ | ||
7189 | if (igb_init_interrupt_scheme(adapter, true)) { | ||
7190 | diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | ||
7191 | index 6e6ab6f6875e..64429a14c630 100644 | ||
7192 | --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | ||
7193 | +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | ||
7194 | @@ -3781,10 +3781,10 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, | ||
7195 | fw_cmd.ver_build = build; | ||
7196 | fw_cmd.ver_sub = sub; | ||
7197 | fw_cmd.hdr.checksum = 0; | ||
7198 | - fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd, | ||
7199 | - (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len)); | ||
7200 | fw_cmd.pad = 0; | ||
7201 | fw_cmd.pad2 = 0; | ||
7202 | + fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd, | ||
7203 | + (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len)); | ||
7204 | |||
7205 | for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) { | ||
7206 | ret_val = ixgbe_host_interface_command(hw, &fw_cmd, | ||
7207 | diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | ||
7208 | index 19fbb2f28ea4..8a85217845ae 100644 | ||
7209 | --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | ||
7210 | +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | ||
7211 | @@ -900,6 +900,8 @@ static s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw, | ||
7212 | /* convert offset from words to bytes */ | ||
7213 | buffer.address = cpu_to_be32((offset + current_word) * 2); | ||
7214 | buffer.length = cpu_to_be16(words_to_read * 2); | ||
7215 | + buffer.pad2 = 0; | ||
7216 | + buffer.pad3 = 0; | ||
7217 | |||
7218 | status = ixgbe_hic_unlocked(hw, (u32 *)&buffer, sizeof(buffer), | ||
7219 | IXGBE_HI_COMMAND_TIMEOUT); | ||
7220 | diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c | ||
7221 | index c1e52b9dc58d..5f93e6add563 100644 | ||
7222 | --- a/drivers/net/phy/at803x.c | ||
7223 | +++ b/drivers/net/phy/at803x.c | ||
7224 | @@ -167,7 +167,7 @@ static int at803x_set_wol(struct phy_device *phydev, | ||
7225 | mac = (const u8 *) ndev->dev_addr; | ||
7226 | |||
7227 | if (!is_valid_ether_addr(mac)) | ||
7228 | - return -EFAULT; | ||
7229 | + return -EINVAL; | ||
7230 | |||
7231 | for (i = 0; i < 3; i++) { | ||
7232 | phy_write(phydev, AT803X_MMD_ACCESS_CONTROL, | ||
7233 | diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c | ||
7234 | index ac41c8be9200..0fd8e164339c 100644 | ||
7235 | --- a/drivers/pci/iov.c | ||
7236 | +++ b/drivers/pci/iov.c | ||
7237 | @@ -162,7 +162,6 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset) | ||
7238 | |||
7239 | pci_device_add(virtfn, virtfn->bus); | ||
7240 | |||
7241 | - pci_bus_add_device(virtfn); | ||
7242 | sprintf(buf, "virtfn%u", id); | ||
7243 | rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf); | ||
7244 | if (rc) | ||
7245 | @@ -173,6 +172,8 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset) | ||
7246 | |||
7247 | kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE); | ||
7248 | |||
7249 | + pci_bus_add_device(virtfn); | ||
7250 | + | ||
7251 | return 0; | ||
7252 | |||
7253 | failed2: | ||
7254 | diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c | ||
7255 | index 6078dfc11b11..74f1c57ab93b 100644 | ||
7256 | --- a/drivers/pci/pci.c | ||
7257 | +++ b/drivers/pci/pci.c | ||
7258 | @@ -4356,6 +4356,10 @@ static bool pci_bus_resetable(struct pci_bus *bus) | ||
7259 | { | ||
7260 | struct pci_dev *dev; | ||
7261 | |||
7262 | + | ||
7263 | + if (bus->self && (bus->self->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET)) | ||
7264 | + return false; | ||
7265 | + | ||
7266 | list_for_each_entry(dev, &bus->devices, bus_list) { | ||
7267 | if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET || | ||
7268 | (dev->subordinate && !pci_bus_resetable(dev->subordinate))) | ||
7269 | diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c | ||
7270 | index 890efcc574cb..744805232155 100644 | ||
7271 | --- a/drivers/pci/pcie/aer/aerdrv_core.c | ||
7272 | +++ b/drivers/pci/pcie/aer/aerdrv_core.c | ||
7273 | @@ -390,7 +390,14 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, | ||
7274 | * If the error is reported by an end point, we think this | ||
7275 | * error is related to the upstream link of the end point. | ||
7276 | */ | ||
7277 | - pci_walk_bus(dev->bus, cb, &result_data); | ||
7278 | + if (state == pci_channel_io_normal) | ||
7279 | + /* | ||
7280 | + * the error is non fatal so the bus is ok, just invoke | ||
7281 | + * the callback for the function that logged the error. | ||
7282 | + */ | ||
7283 | + cb(dev, &result_data); | ||
7284 | + else | ||
7285 | + pci_walk_bus(dev->bus, cb, &result_data); | ||
7286 | } | ||
7287 | |||
7288 | return result_data.result; | ||
7289 | diff --git a/drivers/platform/x86/asus-wireless.c b/drivers/platform/x86/asus-wireless.c | ||
7290 | index f3796164329e..d4aeac3477f5 100644 | ||
7291 | --- a/drivers/platform/x86/asus-wireless.c | ||
7292 | +++ b/drivers/platform/x86/asus-wireless.c | ||
7293 | @@ -118,6 +118,7 @@ static void asus_wireless_notify(struct acpi_device *adev, u32 event) | ||
7294 | return; | ||
7295 | } | ||
7296 | input_report_key(data->idev, KEY_RFKILL, 1); | ||
7297 | + input_sync(data->idev); | ||
7298 | input_report_key(data->idev, KEY_RFKILL, 0); | ||
7299 | input_sync(data->idev); | ||
7300 | } | ||
7301 | diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c | ||
7302 | index 8cec9a02c0b8..9eb32ead63db 100644 | ||
7303 | --- a/drivers/rtc/interface.c | ||
7304 | +++ b/drivers/rtc/interface.c | ||
7305 | @@ -779,7 +779,7 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) | ||
7306 | } | ||
7307 | |||
7308 | timerqueue_add(&rtc->timerqueue, &timer->node); | ||
7309 | - if (!next) { | ||
7310 | + if (!next || ktime_before(timer->node.expires, next->expires)) { | ||
7311 | struct rtc_wkalrm alarm; | ||
7312 | int err; | ||
7313 | alarm.time = rtc_ktime_to_tm(timer->node.expires); | ||
7314 | diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c | ||
7315 | index e1687e19c59f..a30f24cb6c83 100644 | ||
7316 | --- a/drivers/rtc/rtc-pl031.c | ||
7317 | +++ b/drivers/rtc/rtc-pl031.c | ||
7318 | @@ -308,7 +308,8 @@ static int pl031_remove(struct amba_device *adev) | ||
7319 | |||
7320 | dev_pm_clear_wake_irq(&adev->dev); | ||
7321 | device_init_wakeup(&adev->dev, false); | ||
7322 | - free_irq(adev->irq[0], ldata); | ||
7323 | + if (adev->irq[0]) | ||
7324 | + free_irq(adev->irq[0], ldata); | ||
7325 | rtc_device_unregister(ldata->rtc); | ||
7326 | iounmap(ldata->base); | ||
7327 | kfree(ldata); | ||
7328 | @@ -381,12 +382,13 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id) | ||
7329 | goto out_no_rtc; | ||
7330 | } | ||
7331 | |||
7332 | - if (request_irq(adev->irq[0], pl031_interrupt, | ||
7333 | - vendor->irqflags, "rtc-pl031", ldata)) { | ||
7334 | - ret = -EIO; | ||
7335 | - goto out_no_irq; | ||
7336 | + if (adev->irq[0]) { | ||
7337 | + ret = request_irq(adev->irq[0], pl031_interrupt, | ||
7338 | + vendor->irqflags, "rtc-pl031", ldata); | ||
7339 | + if (ret) | ||
7340 | + goto out_no_irq; | ||
7341 | + dev_pm_set_wake_irq(&adev->dev, adev->irq[0]); | ||
7342 | } | ||
7343 | - dev_pm_set_wake_irq(&adev->dev, adev->irq[0]); | ||
7344 | return 0; | ||
7345 | |||
7346 | out_no_irq: | ||
7347 | diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c | ||
7348 | index 1d02cf9fe06c..30d5f0ef29bb 100644 | ||
7349 | --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c | ||
7350 | +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c | ||
7351 | @@ -1575,6 +1575,7 @@ static void release_offload_resources(struct cxgbi_sock *csk) | ||
7352 | csk, csk->state, csk->flags, csk->tid); | ||
7353 | |||
7354 | cxgbi_sock_free_cpl_skbs(csk); | ||
7355 | + cxgbi_sock_purge_write_queue(csk); | ||
7356 | if (csk->wr_cred != csk->wr_max_cred) { | ||
7357 | cxgbi_sock_purge_wr_queue(csk); | ||
7358 | cxgbi_sock_reset_wr_list(csk); | ||
7359 | diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c | ||
7360 | index 499df9d17339..d9a03beb76a4 100644 | ||
7361 | --- a/drivers/scsi/lpfc/lpfc_hbadisc.c | ||
7362 | +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c | ||
7363 | @@ -4983,7 +4983,8 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) | ||
7364 | lpfc_cancel_retry_delay_tmo(vport, ndlp); | ||
7365 | if ((ndlp->nlp_flag & NLP_DEFER_RM) && | ||
7366 | !(ndlp->nlp_flag & NLP_REG_LOGIN_SEND) && | ||
7367 | - !(ndlp->nlp_flag & NLP_RPI_REGISTERED)) { | ||
7368 | + !(ndlp->nlp_flag & NLP_RPI_REGISTERED) && | ||
7369 | + phba->sli_rev != LPFC_SLI_REV4) { | ||
7370 | /* For this case we need to cleanup the default rpi | ||
7371 | * allocated by the firmware. | ||
7372 | */ | ||
7373 | diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h | ||
7374 | index 1db0a38683f4..2b145966c73f 100644 | ||
7375 | --- a/drivers/scsi/lpfc/lpfc_hw4.h | ||
7376 | +++ b/drivers/scsi/lpfc/lpfc_hw4.h | ||
7377 | @@ -3636,7 +3636,7 @@ struct lpfc_mbx_get_port_name { | ||
7378 | #define MB_CEQ_STATUS_QUEUE_FLUSHING 0x4 | ||
7379 | #define MB_CQE_STATUS_DMA_FAILED 0x5 | ||
7380 | |||
7381 | -#define LPFC_MBX_WR_CONFIG_MAX_BDE 8 | ||
7382 | +#define LPFC_MBX_WR_CONFIG_MAX_BDE 1 | ||
7383 | struct lpfc_mbx_wr_object { | ||
7384 | struct mbox_header header; | ||
7385 | union { | ||
7386 | diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c | ||
7387 | index 3c5b054a56ac..7ac1a067d780 100644 | ||
7388 | --- a/drivers/scsi/lpfc/lpfc_nvmet.c | ||
7389 | +++ b/drivers/scsi/lpfc/lpfc_nvmet.c | ||
7390 | @@ -1464,6 +1464,7 @@ static struct lpfc_nvmet_ctxbuf * | ||
7391 | lpfc_nvmet_replenish_context(struct lpfc_hba *phba, | ||
7392 | struct lpfc_nvmet_ctx_info *current_infop) | ||
7393 | { | ||
7394 | +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) | ||
7395 | struct lpfc_nvmet_ctxbuf *ctx_buf = NULL; | ||
7396 | struct lpfc_nvmet_ctx_info *get_infop; | ||
7397 | int i; | ||
7398 | @@ -1511,6 +1512,7 @@ lpfc_nvmet_replenish_context(struct lpfc_hba *phba, | ||
7399 | get_infop = get_infop->nvmet_ctx_next_cpu; | ||
7400 | } | ||
7401 | |||
7402 | +#endif | ||
7403 | /* Nothing found, all contexts for the MRQ are in-flight */ | ||
7404 | return NULL; | ||
7405 | } | ||
7406 | diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c | ||
7407 | index 22998cbd538f..33ff691878e2 100644 | ||
7408 | --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c | ||
7409 | +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c | ||
7410 | @@ -4804,6 +4804,11 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) | ||
7411 | } else if (log_info == VIRTUAL_IO_FAILED_RETRY) { | ||
7412 | scmd->result = DID_RESET << 16; | ||
7413 | break; | ||
7414 | + } else if ((scmd->device->channel == RAID_CHANNEL) && | ||
7415 | + (scsi_state == (MPI2_SCSI_STATE_TERMINATED | | ||
7416 | + MPI2_SCSI_STATE_NO_SCSI_STATUS))) { | ||
7417 | + scmd->result = DID_RESET << 16; | ||
7418 | + break; | ||
7419 | } | ||
7420 | scmd->result = DID_SOFT_ERROR << 16; | ||
7421 | break; | ||
7422 | diff --git a/drivers/staging/greybus/light.c b/drivers/staging/greybus/light.c | ||
7423 | index 3f4148c92308..0f538b8c3a07 100644 | ||
7424 | --- a/drivers/staging/greybus/light.c | ||
7425 | +++ b/drivers/staging/greybus/light.c | ||
7426 | @@ -925,6 +925,8 @@ static void __gb_lights_led_unregister(struct gb_channel *channel) | ||
7427 | return; | ||
7428 | |||
7429 | led_classdev_unregister(cdev); | ||
7430 | + kfree(cdev->name); | ||
7431 | + cdev->name = NULL; | ||
7432 | channel->led = NULL; | ||
7433 | } | ||
7434 | |||
7435 | diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c | ||
7436 | index 7952357df9c8..edb6e4e9ef3a 100644 | ||
7437 | --- a/drivers/tee/optee/core.c | ||
7438 | +++ b/drivers/tee/optee/core.c | ||
7439 | @@ -590,7 +590,6 @@ static int __init optee_driver_init(void) | ||
7440 | return -ENODEV; | ||
7441 | |||
7442 | np = of_find_matching_node(fw_np, optee_match); | ||
7443 | - of_node_put(fw_np); | ||
7444 | if (!np) | ||
7445 | return -ENODEV; | ||
7446 | |||
7447 | diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c | ||
7448 | index bd3572c41585..6d8906d65476 100644 | ||
7449 | --- a/drivers/thermal/hisi_thermal.c | ||
7450 | +++ b/drivers/thermal/hisi_thermal.c | ||
7451 | @@ -35,8 +35,9 @@ | ||
7452 | #define TEMP0_RST_MSK (0x1C) | ||
7453 | #define TEMP0_VALUE (0x28) | ||
7454 | |||
7455 | -#define HISI_TEMP_BASE (-60) | ||
7456 | +#define HISI_TEMP_BASE (-60000) | ||
7457 | #define HISI_TEMP_RESET (100000) | ||
7458 | +#define HISI_TEMP_STEP (784) | ||
7459 | |||
7460 | #define HISI_MAX_SENSORS 4 | ||
7461 | |||
7462 | @@ -61,19 +62,38 @@ struct hisi_thermal_data { | ||
7463 | void __iomem *regs; | ||
7464 | }; | ||
7465 | |||
7466 | -/* in millicelsius */ | ||
7467 | -static inline int _step_to_temp(int step) | ||
7468 | +/* | ||
7469 | + * The temperature computation on the tsensor is as follow: | ||
7470 | + * Unit: millidegree Celsius | ||
7471 | + * Step: 255/200 (0.7843) | ||
7472 | + * Temperature base: -60°C | ||
7473 | + * | ||
7474 | + * The register is programmed in temperature steps, every step is 784 | ||
7475 | + * millidegree and begins at -60 000 m°C | ||
7476 | + * | ||
7477 | + * The temperature from the steps: | ||
7478 | + * | ||
7479 | + * Temp = TempBase + (steps x 784) | ||
7480 | + * | ||
7481 | + * and the steps from the temperature: | ||
7482 | + * | ||
7483 | + * steps = (Temp - TempBase) / 784 | ||
7484 | + * | ||
7485 | + */ | ||
7486 | +static inline int hisi_thermal_step_to_temp(int step) | ||
7487 | { | ||
7488 | - /* | ||
7489 | - * Every step equals (1 * 200) / 255 celsius, and finally | ||
7490 | - * need convert to millicelsius. | ||
7491 | - */ | ||
7492 | - return (HISI_TEMP_BASE * 1000 + (step * 200000 / 255)); | ||
7493 | + return HISI_TEMP_BASE + (step * HISI_TEMP_STEP); | ||
7494 | +} | ||
7495 | + | ||
7496 | +static inline long hisi_thermal_temp_to_step(long temp) | ||
7497 | +{ | ||
7498 | + return (temp - HISI_TEMP_BASE) / HISI_TEMP_STEP; | ||
7499 | } | ||
7500 | |||
7501 | -static inline long _temp_to_step(long temp) | ||
7502 | +static inline long hisi_thermal_round_temp(int temp) | ||
7503 | { | ||
7504 | - return ((temp - HISI_TEMP_BASE * 1000) * 255) / 200000; | ||
7505 | + return hisi_thermal_step_to_temp( | ||
7506 | + hisi_thermal_temp_to_step(temp)); | ||
7507 | } | ||
7508 | |||
7509 | static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, | ||
7510 | @@ -99,7 +119,7 @@ static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, | ||
7511 | usleep_range(3000, 5000); | ||
7512 | |||
7513 | val = readl(data->regs + TEMP0_VALUE); | ||
7514 | - val = _step_to_temp(val); | ||
7515 | + val = hisi_thermal_step_to_temp(val); | ||
7516 | |||
7517 | mutex_unlock(&data->thermal_lock); | ||
7518 | |||
7519 | @@ -126,10 +146,11 @@ static void hisi_thermal_enable_bind_irq_sensor | ||
7520 | writel((sensor->id << 12), data->regs + TEMP0_CFG); | ||
7521 | |||
7522 | /* enable for interrupt */ | ||
7523 | - writel(_temp_to_step(sensor->thres_temp) | 0x0FFFFFF00, | ||
7524 | + writel(hisi_thermal_temp_to_step(sensor->thres_temp) | 0x0FFFFFF00, | ||
7525 | data->regs + TEMP0_TH); | ||
7526 | |||
7527 | - writel(_temp_to_step(HISI_TEMP_RESET), data->regs + TEMP0_RST_TH); | ||
7528 | + writel(hisi_thermal_temp_to_step(HISI_TEMP_RESET), | ||
7529 | + data->regs + TEMP0_RST_TH); | ||
7530 | |||
7531 | /* enable module */ | ||
7532 | writel(0x1, data->regs + TEMP0_RST_MSK); | ||
7533 | @@ -230,7 +251,7 @@ static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) | ||
7534 | sensor = &data->sensors[data->irq_bind_sensor]; | ||
7535 | |||
7536 | dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n", | ||
7537 | - sensor->thres_temp / 1000); | ||
7538 | + sensor->thres_temp); | ||
7539 | mutex_unlock(&data->thermal_lock); | ||
7540 | |||
7541 | for (i = 0; i < HISI_MAX_SENSORS; i++) { | ||
7542 | @@ -269,7 +290,7 @@ static int hisi_thermal_register_sensor(struct platform_device *pdev, | ||
7543 | |||
7544 | for (i = 0; i < of_thermal_get_ntrips(sensor->tzd); i++) { | ||
7545 | if (trip[i].type == THERMAL_TRIP_PASSIVE) { | ||
7546 | - sensor->thres_temp = trip[i].temperature; | ||
7547 | + sensor->thres_temp = hisi_thermal_round_temp(trip[i].temperature); | ||
7548 | break; | ||
7549 | } | ||
7550 | } | ||
7551 | @@ -317,15 +338,6 @@ static int hisi_thermal_probe(struct platform_device *pdev) | ||
7552 | if (data->irq < 0) | ||
7553 | return data->irq; | ||
7554 | |||
7555 | - ret = devm_request_threaded_irq(&pdev->dev, data->irq, | ||
7556 | - hisi_thermal_alarm_irq, | ||
7557 | - hisi_thermal_alarm_irq_thread, | ||
7558 | - 0, "hisi_thermal", data); | ||
7559 | - if (ret < 0) { | ||
7560 | - dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret); | ||
7561 | - return ret; | ||
7562 | - } | ||
7563 | - | ||
7564 | platform_set_drvdata(pdev, data); | ||
7565 | |||
7566 | data->clk = devm_clk_get(&pdev->dev, "thermal_clk"); | ||
7567 | @@ -345,8 +357,7 @@ static int hisi_thermal_probe(struct platform_device *pdev) | ||
7568 | } | ||
7569 | |||
7570 | hisi_thermal_enable_bind_irq_sensor(data); | ||
7571 | - irq_get_irqchip_state(data->irq, IRQCHIP_STATE_MASKED, | ||
7572 | - &data->irq_enabled); | ||
7573 | + data->irq_enabled = true; | ||
7574 | |||
7575 | for (i = 0; i < HISI_MAX_SENSORS; ++i) { | ||
7576 | ret = hisi_thermal_register_sensor(pdev, data, | ||
7577 | @@ -358,6 +369,17 @@ static int hisi_thermal_probe(struct platform_device *pdev) | ||
7578 | hisi_thermal_toggle_sensor(&data->sensors[i], true); | ||
7579 | } | ||
7580 | |||
7581 | + ret = devm_request_threaded_irq(&pdev->dev, data->irq, | ||
7582 | + hisi_thermal_alarm_irq, | ||
7583 | + hisi_thermal_alarm_irq_thread, | ||
7584 | + 0, "hisi_thermal", data); | ||
7585 | + if (ret < 0) { | ||
7586 | + dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret); | ||
7587 | + return ret; | ||
7588 | + } | ||
7589 | + | ||
7590 | + enable_irq(data->irq); | ||
7591 | + | ||
7592 | return 0; | ||
7593 | } | ||
7594 | |||
7595 | diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c | ||
7596 | index 5628fe114347..91335e6de88a 100644 | ||
7597 | --- a/drivers/vfio/pci/vfio_pci_config.c | ||
7598 | +++ b/drivers/vfio/pci/vfio_pci_config.c | ||
7599 | @@ -849,11 +849,13 @@ static int __init init_pci_cap_exp_perm(struct perm_bits *perm) | ||
7600 | |||
7601 | /* | ||
7602 | * Allow writes to device control fields, except devctl_phantom, | ||
7603 | - * which could confuse IOMMU, and the ARI bit in devctl2, which | ||
7604 | + * which could confuse IOMMU, MPS, which can break communication | ||
7605 | + * with other physical devices, and the ARI bit in devctl2, which | ||
7606 | * is set at probe time. FLR gets virtualized via our writefn. | ||
7607 | */ | ||
7608 | p_setw(perm, PCI_EXP_DEVCTL, | ||
7609 | - PCI_EXP_DEVCTL_BCR_FLR, ~PCI_EXP_DEVCTL_PHANTOM); | ||
7610 | + PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_PAYLOAD, | ||
7611 | + ~PCI_EXP_DEVCTL_PHANTOM); | ||
7612 | p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI); | ||
7613 | return 0; | ||
7614 | } | ||
7615 | diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c | ||
7616 | index 9bd17682655a..1c2289ddd555 100644 | ||
7617 | --- a/drivers/video/backlight/pwm_bl.c | ||
7618 | +++ b/drivers/video/backlight/pwm_bl.c | ||
7619 | @@ -79,14 +79,17 @@ static void pwm_backlight_power_off(struct pwm_bl_data *pb) | ||
7620 | static int compute_duty_cycle(struct pwm_bl_data *pb, int brightness) | ||
7621 | { | ||
7622 | unsigned int lth = pb->lth_brightness; | ||
7623 | - int duty_cycle; | ||
7624 | + u64 duty_cycle; | ||
7625 | |||
7626 | if (pb->levels) | ||
7627 | duty_cycle = pb->levels[brightness]; | ||
7628 | else | ||
7629 | duty_cycle = brightness; | ||
7630 | |||
7631 | - return (duty_cycle * (pb->period - lth) / pb->scale) + lth; | ||
7632 | + duty_cycle *= pb->period - lth; | ||
7633 | + do_div(duty_cycle, pb->scale); | ||
7634 | + | ||
7635 | + return duty_cycle + lth; | ||
7636 | } | ||
7637 | |||
7638 | static int pwm_backlight_update_status(struct backlight_device *bl) | ||
7639 | diff --git a/fs/dcache.c b/fs/dcache.c | ||
7640 | index f90141387f01..34c852af215c 100644 | ||
7641 | --- a/fs/dcache.c | ||
7642 | +++ b/fs/dcache.c | ||
7643 | @@ -231,7 +231,7 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c | ||
7644 | { | ||
7645 | /* | ||
7646 | * Be careful about RCU walk racing with rename: | ||
7647 | - * use 'lockless_dereference' to fetch the name pointer. | ||
7648 | + * use 'READ_ONCE' to fetch the name pointer. | ||
7649 | * | ||
7650 | * NOTE! Even if a rename will mean that the length | ||
7651 | * was not loaded atomically, we don't care. The | ||
7652 | @@ -245,7 +245,7 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c | ||
7653 | * early because the data cannot match (there can | ||
7654 | * be no NUL in the ct/tcount data) | ||
7655 | */ | ||
7656 | - const unsigned char *cs = lockless_dereference(dentry->d_name.name); | ||
7657 | + const unsigned char *cs = READ_ONCE(dentry->d_name.name); | ||
7658 | |||
7659 | return dentry_string_cmp(cs, ct, tcount); | ||
7660 | } | ||
7661 | diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h | ||
7662 | index 25d9b5adcd42..36b49bd09264 100644 | ||
7663 | --- a/fs/overlayfs/ovl_entry.h | ||
7664 | +++ b/fs/overlayfs/ovl_entry.h | ||
7665 | @@ -77,5 +77,5 @@ static inline struct ovl_inode *OVL_I(struct inode *inode) | ||
7666 | |||
7667 | static inline struct dentry *ovl_upperdentry_dereference(struct ovl_inode *oi) | ||
7668 | { | ||
7669 | - return lockless_dereference(oi->__upperdentry); | ||
7670 | + return READ_ONCE(oi->__upperdentry); | ||
7671 | } | ||
7672 | diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c | ||
7673 | index b2c7f33e08fc..d94a51dc4e32 100644 | ||
7674 | --- a/fs/overlayfs/readdir.c | ||
7675 | +++ b/fs/overlayfs/readdir.c | ||
7676 | @@ -757,7 +757,7 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, | ||
7677 | if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) { | ||
7678 | struct inode *inode = file_inode(file); | ||
7679 | |||
7680 | - realfile = lockless_dereference(od->upperfile); | ||
7681 | + realfile = READ_ONCE(od->upperfile); | ||
7682 | if (!realfile) { | ||
7683 | struct path upperpath; | ||
7684 | |||
7685 | diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h | ||
7686 | index e549bff87c5b..353f52fdc35e 100644 | ||
7687 | --- a/include/asm-generic/vmlinux.lds.h | ||
7688 | +++ b/include/asm-generic/vmlinux.lds.h | ||
7689 | @@ -688,7 +688,7 @@ | ||
7690 | #define BUG_TABLE | ||
7691 | #endif | ||
7692 | |||
7693 | -#ifdef CONFIG_ORC_UNWINDER | ||
7694 | +#ifdef CONFIG_UNWINDER_ORC | ||
7695 | #define ORC_UNWIND_TABLE \ | ||
7696 | . = ALIGN(4); \ | ||
7697 | .orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \ | ||
7698 | diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h | ||
7699 | index b8d200f60a40..73bec75b74c8 100644 | ||
7700 | --- a/include/linux/bpf_verifier.h | ||
7701 | +++ b/include/linux/bpf_verifier.h | ||
7702 | @@ -15,11 +15,11 @@ | ||
7703 | * In practice this is far bigger than any realistic pointer offset; this limit | ||
7704 | * ensures that umax_value + (int)off + (int)size cannot overflow a u64. | ||
7705 | */ | ||
7706 | -#define BPF_MAX_VAR_OFF (1ULL << 31) | ||
7707 | +#define BPF_MAX_VAR_OFF (1 << 29) | ||
7708 | /* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures | ||
7709 | * that converting umax_value to int cannot overflow. | ||
7710 | */ | ||
7711 | -#define BPF_MAX_VAR_SIZ INT_MAX | ||
7712 | +#define BPF_MAX_VAR_SIZ (1 << 29) | ||
7713 | |||
7714 | /* Liveness marks, used for registers and spilled-regs (in stack slots). | ||
7715 | * Read marks propagate upwards until they find a write mark; they record that | ||
7716 | @@ -110,7 +110,7 @@ struct bpf_insn_aux_data { | ||
7717 | struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ | ||
7718 | }; | ||
7719 | int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ | ||
7720 | - int converted_op_size; /* the valid value width after perceived conversion */ | ||
7721 | + bool seen; /* this insn was processed by the verifier */ | ||
7722 | }; | ||
7723 | |||
7724 | #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ | ||
7725 | diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h | ||
7726 | index 780b1242bf24..3b609edffa8f 100644 | ||
7727 | --- a/include/linux/compiler-clang.h | ||
7728 | +++ b/include/linux/compiler-clang.h | ||
7729 | @@ -1,5 +1,5 @@ | ||
7730 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
7731 | -#ifndef __LINUX_COMPILER_H | ||
7732 | +#ifndef __LINUX_COMPILER_TYPES_H | ||
7733 | #error "Please don't include <linux/compiler-clang.h> directly, include <linux/compiler.h> instead." | ||
7734 | #endif | ||
7735 | |||
7736 | diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h | ||
7737 | index bb78e5bdff26..2272ded07496 100644 | ||
7738 | --- a/include/linux/compiler-gcc.h | ||
7739 | +++ b/include/linux/compiler-gcc.h | ||
7740 | @@ -1,5 +1,5 @@ | ||
7741 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
7742 | -#ifndef __LINUX_COMPILER_H | ||
7743 | +#ifndef __LINUX_COMPILER_TYPES_H | ||
7744 | #error "Please don't include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead." | ||
7745 | #endif | ||
7746 | |||
7747 | diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h | ||
7748 | index 523d1b74550f..bfa08160db3a 100644 | ||
7749 | --- a/include/linux/compiler-intel.h | ||
7750 | +++ b/include/linux/compiler-intel.h | ||
7751 | @@ -1,5 +1,5 @@ | ||
7752 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
7753 | -#ifndef __LINUX_COMPILER_H | ||
7754 | +#ifndef __LINUX_COMPILER_TYPES_H | ||
7755 | #error "Please don't include <linux/compiler-intel.h> directly, include <linux/compiler.h> instead." | ||
7756 | #endif | ||
7757 | |||
7758 | diff --git a/include/linux/compiler.h b/include/linux/compiler.h | ||
7759 | index 202710420d6d..fab5dc250c61 100644 | ||
7760 | --- a/include/linux/compiler.h | ||
7761 | +++ b/include/linux/compiler.h | ||
7762 | @@ -2,111 +2,12 @@ | ||
7763 | #ifndef __LINUX_COMPILER_H | ||
7764 | #define __LINUX_COMPILER_H | ||
7765 | |||
7766 | -#ifndef __ASSEMBLY__ | ||
7767 | +#include <linux/compiler_types.h> | ||
7768 | |||
7769 | -#ifdef __CHECKER__ | ||
7770 | -# define __user __attribute__((noderef, address_space(1))) | ||
7771 | -# define __kernel __attribute__((address_space(0))) | ||
7772 | -# define __safe __attribute__((safe)) | ||
7773 | -# define __force __attribute__((force)) | ||
7774 | -# define __nocast __attribute__((nocast)) | ||
7775 | -# define __iomem __attribute__((noderef, address_space(2))) | ||
7776 | -# define __must_hold(x) __attribute__((context(x,1,1))) | ||
7777 | -# define __acquires(x) __attribute__((context(x,0,1))) | ||
7778 | -# define __releases(x) __attribute__((context(x,1,0))) | ||
7779 | -# define __acquire(x) __context__(x,1) | ||
7780 | -# define __release(x) __context__(x,-1) | ||
7781 | -# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) | ||
7782 | -# define __percpu __attribute__((noderef, address_space(3))) | ||
7783 | -# define __rcu __attribute__((noderef, address_space(4))) | ||
7784 | -# define __private __attribute__((noderef)) | ||
7785 | -extern void __chk_user_ptr(const volatile void __user *); | ||
7786 | -extern void __chk_io_ptr(const volatile void __iomem *); | ||
7787 | -# define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member)) | ||
7788 | -#else /* __CHECKER__ */ | ||
7789 | -# ifdef STRUCTLEAK_PLUGIN | ||
7790 | -# define __user __attribute__((user)) | ||
7791 | -# else | ||
7792 | -# define __user | ||
7793 | -# endif | ||
7794 | -# define __kernel | ||
7795 | -# define __safe | ||
7796 | -# define __force | ||
7797 | -# define __nocast | ||
7798 | -# define __iomem | ||
7799 | -# define __chk_user_ptr(x) (void)0 | ||
7800 | -# define __chk_io_ptr(x) (void)0 | ||
7801 | -# define __builtin_warning(x, y...) (1) | ||
7802 | -# define __must_hold(x) | ||
7803 | -# define __acquires(x) | ||
7804 | -# define __releases(x) | ||
7805 | -# define __acquire(x) (void)0 | ||
7806 | -# define __release(x) (void)0 | ||
7807 | -# define __cond_lock(x,c) (c) | ||
7808 | -# define __percpu | ||
7809 | -# define __rcu | ||
7810 | -# define __private | ||
7811 | -# define ACCESS_PRIVATE(p, member) ((p)->member) | ||
7812 | -#endif /* __CHECKER__ */ | ||
7813 | - | ||
7814 | -/* Indirect macros required for expanded argument pasting, eg. __LINE__. */ | ||
7815 | -#define ___PASTE(a,b) a##b | ||
7816 | -#define __PASTE(a,b) ___PASTE(a,b) | ||
7817 | +#ifndef __ASSEMBLY__ | ||
7818 | |||
7819 | #ifdef __KERNEL__ | ||
7820 | |||
7821 | -#ifdef __GNUC__ | ||
7822 | -#include <linux/compiler-gcc.h> | ||
7823 | -#endif | ||
7824 | - | ||
7825 | -#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__) | ||
7826 | -#define notrace __attribute__((hotpatch(0,0))) | ||
7827 | -#else | ||
7828 | -#define notrace __attribute__((no_instrument_function)) | ||
7829 | -#endif | ||
7830 | - | ||
7831 | -/* Intel compiler defines __GNUC__. So we will overwrite implementations | ||
7832 | - * coming from above header files here | ||
7833 | - */ | ||
7834 | -#ifdef __INTEL_COMPILER | ||
7835 | -# include <linux/compiler-intel.h> | ||
7836 | -#endif | ||
7837 | - | ||
7838 | -/* Clang compiler defines __GNUC__. So we will overwrite implementations | ||
7839 | - * coming from above header files here | ||
7840 | - */ | ||
7841 | -#ifdef __clang__ | ||
7842 | -#include <linux/compiler-clang.h> | ||
7843 | -#endif | ||
7844 | - | ||
7845 | -/* | ||
7846 | - * Generic compiler-dependent macros required for kernel | ||
7847 | - * build go below this comment. Actual compiler/compiler version | ||
7848 | - * specific implementations come from the above header files | ||
7849 | - */ | ||
7850 | - | ||
7851 | -struct ftrace_branch_data { | ||
7852 | - const char *func; | ||
7853 | - const char *file; | ||
7854 | - unsigned line; | ||
7855 | - union { | ||
7856 | - struct { | ||
7857 | - unsigned long correct; | ||
7858 | - unsigned long incorrect; | ||
7859 | - }; | ||
7860 | - struct { | ||
7861 | - unsigned long miss; | ||
7862 | - unsigned long hit; | ||
7863 | - }; | ||
7864 | - unsigned long miss_hit[2]; | ||
7865 | - }; | ||
7866 | -}; | ||
7867 | - | ||
7868 | -struct ftrace_likely_data { | ||
7869 | - struct ftrace_branch_data data; | ||
7870 | - unsigned long constant; | ||
7871 | -}; | ||
7872 | - | ||
7873 | /* | ||
7874 | * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code | ||
7875 | * to disable branch tracing on a per file basis. | ||
7876 | @@ -333,6 +234,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s | ||
7877 | * with an explicit memory barrier or atomic instruction that provides the | ||
7878 | * required ordering. | ||
7879 | */ | ||
7880 | +#include <asm/barrier.h> | ||
7881 | |||
7882 | #define __READ_ONCE(x, check) \ | ||
7883 | ({ \ | ||
7884 | @@ -341,6 +243,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s | ||
7885 | __read_once_size(&(x), __u.__c, sizeof(x)); \ | ||
7886 | else \ | ||
7887 | __read_once_size_nocheck(&(x), __u.__c, sizeof(x)); \ | ||
7888 | + smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \ | ||
7889 | __u.__val; \ | ||
7890 | }) | ||
7891 | #define READ_ONCE(x) __READ_ONCE(x, 1) | ||
7892 | @@ -363,167 +266,6 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s | ||
7893 | |||
7894 | #endif /* __ASSEMBLY__ */ | ||
7895 | |||
7896 | -#ifdef __KERNEL__ | ||
7897 | -/* | ||
7898 | - * Allow us to mark functions as 'deprecated' and have gcc emit a nice | ||
7899 | - * warning for each use, in hopes of speeding the functions removal. | ||
7900 | - * Usage is: | ||
7901 | - * int __deprecated foo(void) | ||
7902 | - */ | ||
7903 | -#ifndef __deprecated | ||
7904 | -# define __deprecated /* unimplemented */ | ||
7905 | -#endif | ||
7906 | - | ||
7907 | -#ifdef MODULE | ||
7908 | -#define __deprecated_for_modules __deprecated | ||
7909 | -#else | ||
7910 | -#define __deprecated_for_modules | ||
7911 | -#endif | ||
7912 | - | ||
7913 | -#ifndef __must_check | ||
7914 | -#define __must_check | ||
7915 | -#endif | ||
7916 | - | ||
7917 | -#ifndef CONFIG_ENABLE_MUST_CHECK | ||
7918 | -#undef __must_check | ||
7919 | -#define __must_check | ||
7920 | -#endif | ||
7921 | -#ifndef CONFIG_ENABLE_WARN_DEPRECATED | ||
7922 | -#undef __deprecated | ||
7923 | -#undef __deprecated_for_modules | ||
7924 | -#define __deprecated | ||
7925 | -#define __deprecated_for_modules | ||
7926 | -#endif | ||
7927 | - | ||
7928 | -#ifndef __malloc | ||
7929 | -#define __malloc | ||
7930 | -#endif | ||
7931 | - | ||
7932 | -/* | ||
7933 | - * Allow us to avoid 'defined but not used' warnings on functions and data, | ||
7934 | - * as well as force them to be emitted to the assembly file. | ||
7935 | - * | ||
7936 | - * As of gcc 3.4, static functions that are not marked with attribute((used)) | ||
7937 | - * may be elided from the assembly file. As of gcc 3.4, static data not so | ||
7938 | - * marked will not be elided, but this may change in a future gcc version. | ||
7939 | - * | ||
7940 | - * NOTE: Because distributions shipped with a backported unit-at-a-time | ||
7941 | - * compiler in gcc 3.3, we must define __used to be __attribute__((used)) | ||
7942 | - * for gcc >=3.3 instead of 3.4. | ||
7943 | - * | ||
7944 | - * In prior versions of gcc, such functions and data would be emitted, but | ||
7945 | - * would be warned about except with attribute((unused)). | ||
7946 | - * | ||
7947 | - * Mark functions that are referenced only in inline assembly as __used so | ||
7948 | - * the code is emitted even though it appears to be unreferenced. | ||
7949 | - */ | ||
7950 | -#ifndef __used | ||
7951 | -# define __used /* unimplemented */ | ||
7952 | -#endif | ||
7953 | - | ||
7954 | -#ifndef __maybe_unused | ||
7955 | -# define __maybe_unused /* unimplemented */ | ||
7956 | -#endif | ||
7957 | - | ||
7958 | -#ifndef __always_unused | ||
7959 | -# define __always_unused /* unimplemented */ | ||
7960 | -#endif | ||
7961 | - | ||
7962 | -#ifndef noinline | ||
7963 | -#define noinline | ||
7964 | -#endif | ||
7965 | - | ||
7966 | -/* | ||
7967 | - * Rather then using noinline to prevent stack consumption, use | ||
7968 | - * noinline_for_stack instead. For documentation reasons. | ||
7969 | - */ | ||
7970 | -#define noinline_for_stack noinline | ||
7971 | - | ||
7972 | -#ifndef __always_inline | ||
7973 | -#define __always_inline inline | ||
7974 | -#endif | ||
7975 | - | ||
7976 | -#endif /* __KERNEL__ */ | ||
7977 | - | ||
7978 | -/* | ||
7979 | - * From the GCC manual: | ||
7980 | - * | ||
7981 | - * Many functions do not examine any values except their arguments, | ||
7982 | - * and have no effects except the return value. Basically this is | ||
7983 | - * just slightly more strict class than the `pure' attribute above, | ||
7984 | - * since function is not allowed to read global memory. | ||
7985 | - * | ||
7986 | - * Note that a function that has pointer arguments and examines the | ||
7987 | - * data pointed to must _not_ be declared `const'. Likewise, a | ||
7988 | - * function that calls a non-`const' function usually must not be | ||
7989 | - * `const'. It does not make sense for a `const' function to return | ||
7990 | - * `void'. | ||
7991 | - */ | ||
7992 | -#ifndef __attribute_const__ | ||
7993 | -# define __attribute_const__ /* unimplemented */ | ||
7994 | -#endif | ||
7995 | - | ||
7996 | -#ifndef __designated_init | ||
7997 | -# define __designated_init | ||
7998 | -#endif | ||
7999 | - | ||
8000 | -#ifndef __latent_entropy | ||
8001 | -# define __latent_entropy | ||
8002 | -#endif | ||
8003 | - | ||
8004 | -#ifndef __randomize_layout | ||
8005 | -# define __randomize_layout __designated_init | ||
8006 | -#endif | ||
8007 | - | ||
8008 | -#ifndef __no_randomize_layout | ||
8009 | -# define __no_randomize_layout | ||
8010 | -#endif | ||
8011 | - | ||
8012 | -#ifndef randomized_struct_fields_start | ||
8013 | -# define randomized_struct_fields_start | ||
8014 | -# define randomized_struct_fields_end | ||
8015 | -#endif | ||
8016 | - | ||
8017 | -/* | ||
8018 | - * Tell gcc if a function is cold. The compiler will assume any path | ||
8019 | - * directly leading to the call is unlikely. | ||
8020 | - */ | ||
8021 | - | ||
8022 | -#ifndef __cold | ||
8023 | -#define __cold | ||
8024 | -#endif | ||
8025 | - | ||
8026 | -/* Simple shorthand for a section definition */ | ||
8027 | -#ifndef __section | ||
8028 | -# define __section(S) __attribute__ ((__section__(#S))) | ||
8029 | -#endif | ||
8030 | - | ||
8031 | -#ifndef __visible | ||
8032 | -#define __visible | ||
8033 | -#endif | ||
8034 | - | ||
8035 | -#ifndef __nostackprotector | ||
8036 | -# define __nostackprotector | ||
8037 | -#endif | ||
8038 | - | ||
8039 | -/* | ||
8040 | - * Assume alignment of return value. | ||
8041 | - */ | ||
8042 | -#ifndef __assume_aligned | ||
8043 | -#define __assume_aligned(a, ...) | ||
8044 | -#endif | ||
8045 | - | ||
8046 | - | ||
8047 | -/* Are two types/vars the same type (ignoring qualifiers)? */ | ||
8048 | -#ifndef __same_type | ||
8049 | -# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) | ||
8050 | -#endif | ||
8051 | - | ||
8052 | -/* Is this type a native word size -- useful for atomic operations */ | ||
8053 | -#ifndef __native_word | ||
8054 | -# define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) | ||
8055 | -#endif | ||
8056 | - | ||
8057 | /* Compile time object size, -1 for unknown */ | ||
8058 | #ifndef __compiletime_object_size | ||
8059 | # define __compiletime_object_size(obj) -1 | ||
8060 | diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h | ||
8061 | new file mode 100644 | ||
8062 | index 000000000000..6b79a9bba9a7 | ||
8063 | --- /dev/null | ||
8064 | +++ b/include/linux/compiler_types.h | ||
8065 | @@ -0,0 +1,274 @@ | ||
8066 | +#ifndef __LINUX_COMPILER_TYPES_H | ||
8067 | +#define __LINUX_COMPILER_TYPES_H | ||
8068 | + | ||
8069 | +#ifndef __ASSEMBLY__ | ||
8070 | + | ||
8071 | +#ifdef __CHECKER__ | ||
8072 | +# define __user __attribute__((noderef, address_space(1))) | ||
8073 | +# define __kernel __attribute__((address_space(0))) | ||
8074 | +# define __safe __attribute__((safe)) | ||
8075 | +# define __force __attribute__((force)) | ||
8076 | +# define __nocast __attribute__((nocast)) | ||
8077 | +# define __iomem __attribute__((noderef, address_space(2))) | ||
8078 | +# define __must_hold(x) __attribute__((context(x,1,1))) | ||
8079 | +# define __acquires(x) __attribute__((context(x,0,1))) | ||
8080 | +# define __releases(x) __attribute__((context(x,1,0))) | ||
8081 | +# define __acquire(x) __context__(x,1) | ||
8082 | +# define __release(x) __context__(x,-1) | ||
8083 | +# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) | ||
8084 | +# define __percpu __attribute__((noderef, address_space(3))) | ||
8085 | +# define __rcu __attribute__((noderef, address_space(4))) | ||
8086 | +# define __private __attribute__((noderef)) | ||
8087 | +extern void __chk_user_ptr(const volatile void __user *); | ||
8088 | +extern void __chk_io_ptr(const volatile void __iomem *); | ||
8089 | +# define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member)) | ||
8090 | +#else /* __CHECKER__ */ | ||
8091 | +# ifdef STRUCTLEAK_PLUGIN | ||
8092 | +# define __user __attribute__((user)) | ||
8093 | +# else | ||
8094 | +# define __user | ||
8095 | +# endif | ||
8096 | +# define __kernel | ||
8097 | +# define __safe | ||
8098 | +# define __force | ||
8099 | +# define __nocast | ||
8100 | +# define __iomem | ||
8101 | +# define __chk_user_ptr(x) (void)0 | ||
8102 | +# define __chk_io_ptr(x) (void)0 | ||
8103 | +# define __builtin_warning(x, y...) (1) | ||
8104 | +# define __must_hold(x) | ||
8105 | +# define __acquires(x) | ||
8106 | +# define __releases(x) | ||
8107 | +# define __acquire(x) (void)0 | ||
8108 | +# define __release(x) (void)0 | ||
8109 | +# define __cond_lock(x,c) (c) | ||
8110 | +# define __percpu | ||
8111 | +# define __rcu | ||
8112 | +# define __private | ||
8113 | +# define ACCESS_PRIVATE(p, member) ((p)->member) | ||
8114 | +#endif /* __CHECKER__ */ | ||
8115 | + | ||
8116 | +/* Indirect macros required for expanded argument pasting, eg. __LINE__. */ | ||
8117 | +#define ___PASTE(a,b) a##b | ||
8118 | +#define __PASTE(a,b) ___PASTE(a,b) | ||
8119 | + | ||
8120 | +#ifdef __KERNEL__ | ||
8121 | + | ||
8122 | +#ifdef __GNUC__ | ||
8123 | +#include <linux/compiler-gcc.h> | ||
8124 | +#endif | ||
8125 | + | ||
8126 | +#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__) | ||
8127 | +#define notrace __attribute__((hotpatch(0,0))) | ||
8128 | +#else | ||
8129 | +#define notrace __attribute__((no_instrument_function)) | ||
8130 | +#endif | ||
8131 | + | ||
8132 | +/* Intel compiler defines __GNUC__. So we will overwrite implementations | ||
8133 | + * coming from above header files here | ||
8134 | + */ | ||
8135 | +#ifdef __INTEL_COMPILER | ||
8136 | +# include <linux/compiler-intel.h> | ||
8137 | +#endif | ||
8138 | + | ||
8139 | +/* Clang compiler defines __GNUC__. So we will overwrite implementations | ||
8140 | + * coming from above header files here | ||
8141 | + */ | ||
8142 | +#ifdef __clang__ | ||
8143 | +#include <linux/compiler-clang.h> | ||
8144 | +#endif | ||
8145 | + | ||
8146 | +/* | ||
8147 | + * Generic compiler-dependent macros required for kernel | ||
8148 | + * build go below this comment. Actual compiler/compiler version | ||
8149 | + * specific implementations come from the above header files | ||
8150 | + */ | ||
8151 | + | ||
8152 | +struct ftrace_branch_data { | ||
8153 | + const char *func; | ||
8154 | + const char *file; | ||
8155 | + unsigned line; | ||
8156 | + union { | ||
8157 | + struct { | ||
8158 | + unsigned long correct; | ||
8159 | + unsigned long incorrect; | ||
8160 | + }; | ||
8161 | + struct { | ||
8162 | + unsigned long miss; | ||
8163 | + unsigned long hit; | ||
8164 | + }; | ||
8165 | + unsigned long miss_hit[2]; | ||
8166 | + }; | ||
8167 | +}; | ||
8168 | + | ||
8169 | +struct ftrace_likely_data { | ||
8170 | + struct ftrace_branch_data data; | ||
8171 | + unsigned long constant; | ||
8172 | +}; | ||
8173 | + | ||
8174 | +#endif /* __KERNEL__ */ | ||
8175 | + | ||
8176 | +#endif /* __ASSEMBLY__ */ | ||
8177 | + | ||
8178 | +#ifdef __KERNEL__ | ||
8179 | +/* | ||
8180 | + * Allow us to mark functions as 'deprecated' and have gcc emit a nice | ||
8181 | + * warning for each use, in hopes of speeding the functions removal. | ||
8182 | + * Usage is: | ||
8183 | + * int __deprecated foo(void) | ||
8184 | + */ | ||
8185 | +#ifndef __deprecated | ||
8186 | +# define __deprecated /* unimplemented */ | ||
8187 | +#endif | ||
8188 | + | ||
8189 | +#ifdef MODULE | ||
8190 | +#define __deprecated_for_modules __deprecated | ||
8191 | +#else | ||
8192 | +#define __deprecated_for_modules | ||
8193 | +#endif | ||
8194 | + | ||
8195 | +#ifndef __must_check | ||
8196 | +#define __must_check | ||
8197 | +#endif | ||
8198 | + | ||
8199 | +#ifndef CONFIG_ENABLE_MUST_CHECK | ||
8200 | +#undef __must_check | ||
8201 | +#define __must_check | ||
8202 | +#endif | ||
8203 | +#ifndef CONFIG_ENABLE_WARN_DEPRECATED | ||
8204 | +#undef __deprecated | ||
8205 | +#undef __deprecated_for_modules | ||
8206 | +#define __deprecated | ||
8207 | +#define __deprecated_for_modules | ||
8208 | +#endif | ||
8209 | + | ||
8210 | +#ifndef __malloc | ||
8211 | +#define __malloc | ||
8212 | +#endif | ||
8213 | + | ||
8214 | +/* | ||
8215 | + * Allow us to avoid 'defined but not used' warnings on functions and data, | ||
8216 | + * as well as force them to be emitted to the assembly file. | ||
8217 | + * | ||
8218 | + * As of gcc 3.4, static functions that are not marked with attribute((used)) | ||
8219 | + * may be elided from the assembly file. As of gcc 3.4, static data not so | ||
8220 | + * marked will not be elided, but this may change in a future gcc version. | ||
8221 | + * | ||
8222 | + * NOTE: Because distributions shipped with a backported unit-at-a-time | ||
8223 | + * compiler in gcc 3.3, we must define __used to be __attribute__((used)) | ||
8224 | + * for gcc >=3.3 instead of 3.4. | ||
8225 | + * | ||
8226 | + * In prior versions of gcc, such functions and data would be emitted, but | ||
8227 | + * would be warned about except with attribute((unused)). | ||
8228 | + * | ||
8229 | + * Mark functions that are referenced only in inline assembly as __used so | ||
8230 | + * the code is emitted even though it appears to be unreferenced. | ||
8231 | + */ | ||
8232 | +#ifndef __used | ||
8233 | +# define __used /* unimplemented */ | ||
8234 | +#endif | ||
8235 | + | ||
8236 | +#ifndef __maybe_unused | ||
8237 | +# define __maybe_unused /* unimplemented */ | ||
8238 | +#endif | ||
8239 | + | ||
8240 | +#ifndef __always_unused | ||
8241 | +# define __always_unused /* unimplemented */ | ||
8242 | +#endif | ||
8243 | + | ||
8244 | +#ifndef noinline | ||
8245 | +#define noinline | ||
8246 | +#endif | ||
8247 | + | ||
8248 | +/* | ||
8249 | + * Rather then using noinline to prevent stack consumption, use | ||
8250 | + * noinline_for_stack instead. For documentation reasons. | ||
8251 | + */ | ||
8252 | +#define noinline_for_stack noinline | ||
8253 | + | ||
8254 | +#ifndef __always_inline | ||
8255 | +#define __always_inline inline | ||
8256 | +#endif | ||
8257 | + | ||
8258 | +#endif /* __KERNEL__ */ | ||
8259 | + | ||
8260 | +/* | ||
8261 | + * From the GCC manual: | ||
8262 | + * | ||
8263 | + * Many functions do not examine any values except their arguments, | ||
8264 | + * and have no effects except the return value. Basically this is | ||
8265 | + * just slightly more strict class than the `pure' attribute above, | ||
8266 | + * since function is not allowed to read global memory. | ||
8267 | + * | ||
8268 | + * Note that a function that has pointer arguments and examines the | ||
8269 | + * data pointed to must _not_ be declared `const'. Likewise, a | ||
8270 | + * function that calls a non-`const' function usually must not be | ||
8271 | + * `const'. It does not make sense for a `const' function to return | ||
8272 | + * `void'. | ||
8273 | + */ | ||
8274 | +#ifndef __attribute_const__ | ||
8275 | +# define __attribute_const__ /* unimplemented */ | ||
8276 | +#endif | ||
8277 | + | ||
8278 | +#ifndef __designated_init | ||
8279 | +# define __designated_init | ||
8280 | +#endif | ||
8281 | + | ||
8282 | +#ifndef __latent_entropy | ||
8283 | +# define __latent_entropy | ||
8284 | +#endif | ||
8285 | + | ||
8286 | +#ifndef __randomize_layout | ||
8287 | +# define __randomize_layout __designated_init | ||
8288 | +#endif | ||
8289 | + | ||
8290 | +#ifndef __no_randomize_layout | ||
8291 | +# define __no_randomize_layout | ||
8292 | +#endif | ||
8293 | + | ||
8294 | +#ifndef randomized_struct_fields_start | ||
8295 | +# define randomized_struct_fields_start | ||
8296 | +# define randomized_struct_fields_end | ||
8297 | +#endif | ||
8298 | + | ||
8299 | +/* | ||
8300 | + * Tell gcc if a function is cold. The compiler will assume any path | ||
8301 | + * directly leading to the call is unlikely. | ||
8302 | + */ | ||
8303 | + | ||
8304 | +#ifndef __cold | ||
8305 | +#define __cold | ||
8306 | +#endif | ||
8307 | + | ||
8308 | +/* Simple shorthand for a section definition */ | ||
8309 | +#ifndef __section | ||
8310 | +# define __section(S) __attribute__ ((__section__(#S))) | ||
8311 | +#endif | ||
8312 | + | ||
8313 | +#ifndef __visible | ||
8314 | +#define __visible | ||
8315 | +#endif | ||
8316 | + | ||
8317 | +#ifndef __nostackprotector | ||
8318 | +# define __nostackprotector | ||
8319 | +#endif | ||
8320 | + | ||
8321 | +/* | ||
8322 | + * Assume alignment of return value. | ||
8323 | + */ | ||
8324 | +#ifndef __assume_aligned | ||
8325 | +#define __assume_aligned(a, ...) | ||
8326 | +#endif | ||
8327 | + | ||
8328 | + | ||
8329 | +/* Are two types/vars the same type (ignoring qualifiers)? */ | ||
8330 | +#ifndef __same_type | ||
8331 | +# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) | ||
8332 | +#endif | ||
8333 | + | ||
8334 | +/* Is this type a native word size -- useful for atomic operations */ | ||
8335 | +#ifndef __native_word | ||
8336 | +# define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) | ||
8337 | +#endif | ||
8338 | + | ||
8339 | +#endif /* __LINUX_COMPILER_TYPES_H */ | ||
8340 | diff --git a/include/linux/hypervisor.h b/include/linux/hypervisor.h | ||
8341 | index b4054fd5b6f6..b19563f9a8eb 100644 | ||
8342 | --- a/include/linux/hypervisor.h | ||
8343 | +++ b/include/linux/hypervisor.h | ||
8344 | @@ -7,8 +7,12 @@ | ||
8345 | * Juergen Gross <jgross@suse.com> | ||
8346 | */ | ||
8347 | |||
8348 | -#ifdef CONFIG_HYPERVISOR_GUEST | ||
8349 | -#include <asm/hypervisor.h> | ||
8350 | +#ifdef CONFIG_X86 | ||
8351 | +#include <asm/x86_init.h> | ||
8352 | +static inline void hypervisor_pin_vcpu(int cpu) | ||
8353 | +{ | ||
8354 | + x86_platform.hyper.pin_vcpu(cpu); | ||
8355 | +} | ||
8356 | #else | ||
8357 | static inline void hypervisor_pin_vcpu(int cpu) | ||
8358 | { | ||
8359 | diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h | ||
8360 | index 7b0fa8b5c120..ce0ef1c0a30a 100644 | ||
8361 | --- a/include/linux/iio/common/st_sensors.h | ||
8362 | +++ b/include/linux/iio/common/st_sensors.h | ||
8363 | @@ -139,7 +139,7 @@ struct st_sensor_das { | ||
8364 | * @mask_ihl: mask to enable/disable active low on the INT lines. | ||
8365 | * @addr_od: address to enable/disable Open Drain on the INT lines. | ||
8366 | * @mask_od: mask to enable/disable Open Drain on the INT lines. | ||
8367 | - * @addr_stat_drdy: address to read status of DRDY (data ready) interrupt | ||
8368 | + * struct stat_drdy - status register of DRDY (data ready) interrupt. | ||
8369 | * struct ig1 - represents the Interrupt Generator 1 of sensors. | ||
8370 | * @en_addr: address of the enable ig1 register. | ||
8371 | * @en_mask: mask to write the on/off value for enable. | ||
8372 | @@ -152,7 +152,10 @@ struct st_sensor_data_ready_irq { | ||
8373 | u8 mask_ihl; | ||
8374 | u8 addr_od; | ||
8375 | u8 mask_od; | ||
8376 | - u8 addr_stat_drdy; | ||
8377 | + struct { | ||
8378 | + u8 addr; | ||
8379 | + u8 mask; | ||
8380 | + } stat_drdy; | ||
8381 | struct { | ||
8382 | u8 en_addr; | ||
8383 | u8 en_mask; | ||
8384 | diff --git a/include/linux/intel-pti.h b/include/linux/intel-pti.h | ||
8385 | new file mode 100644 | ||
8386 | index 000000000000..2710d72de3c9 | ||
8387 | --- /dev/null | ||
8388 | +++ b/include/linux/intel-pti.h | ||
8389 | @@ -0,0 +1,43 @@ | ||
8390 | +/* | ||
8391 | + * Copyright (C) Intel 2011 | ||
8392 | + * | ||
8393 | + * This program is free software; you can redistribute it and/or modify | ||
8394 | + * it under the terms of the GNU General Public License version 2 as | ||
8395 | + * published by the Free Software Foundation. | ||
8396 | + * | ||
8397 | + * This program is distributed in the hope that it will be useful, | ||
8398 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8399 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
8400 | + * GNU General Public License for more details. | ||
8401 | + * | ||
8402 | + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
8403 | + * | ||
8404 | + * The PTI (Parallel Trace Interface) driver directs trace data routed from | ||
8405 | + * various parts in the system out through the Intel Penwell PTI port and | ||
8406 | + * out of the mobile device for analysis with a debugging tool | ||
8407 | + * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7, | ||
8408 | + * compact JTAG, standard. | ||
8409 | + * | ||
8410 | + * This header file will allow other parts of the OS to use the | ||
8411 | + * interface to write out it's contents for debugging a mobile system. | ||
8412 | + */ | ||
8413 | + | ||
8414 | +#ifndef LINUX_INTEL_PTI_H_ | ||
8415 | +#define LINUX_INTEL_PTI_H_ | ||
8416 | + | ||
8417 | +/* offset for last dword of any PTI message. Part of MIPI P1149.7 */ | ||
8418 | +#define PTI_LASTDWORD_DTS 0x30 | ||
8419 | + | ||
8420 | +/* basic structure used as a write address to the PTI HW */ | ||
8421 | +struct pti_masterchannel { | ||
8422 | + u8 master; | ||
8423 | + u8 channel; | ||
8424 | +}; | ||
8425 | + | ||
8426 | +/* the following functions are defined in misc/pti.c */ | ||
8427 | +void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count); | ||
8428 | +struct pti_masterchannel *pti_request_masterchannel(u8 type, | ||
8429 | + const char *thread_name); | ||
8430 | +void pti_release_masterchannel(struct pti_masterchannel *mc); | ||
8431 | + | ||
8432 | +#endif /* LINUX_INTEL_PTI_H_ */ | ||
8433 | diff --git a/include/linux/linkage.h b/include/linux/linkage.h | ||
8434 | index 2e6f90bd52aa..f68db9e450eb 100644 | ||
8435 | --- a/include/linux/linkage.h | ||
8436 | +++ b/include/linux/linkage.h | ||
8437 | @@ -2,7 +2,7 @@ | ||
8438 | #ifndef _LINUX_LINKAGE_H | ||
8439 | #define _LINUX_LINKAGE_H | ||
8440 | |||
8441 | -#include <linux/compiler.h> | ||
8442 | +#include <linux/compiler_types.h> | ||
8443 | #include <linux/stringify.h> | ||
8444 | #include <linux/export.h> | ||
8445 | #include <asm/linkage.h> | ||
8446 | diff --git a/include/linux/mm.h b/include/linux/mm.h | ||
8447 | index db647d428100..f50deada0f5c 100644 | ||
8448 | --- a/include/linux/mm.h | ||
8449 | +++ b/include/linux/mm.h | ||
8450 | @@ -2510,7 +2510,7 @@ void vmemmap_populate_print_last(void); | ||
8451 | void vmemmap_free(unsigned long start, unsigned long end); | ||
8452 | #endif | ||
8453 | void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, | ||
8454 | - unsigned long size); | ||
8455 | + unsigned long nr_pages); | ||
8456 | |||
8457 | enum mf_flags { | ||
8458 | MF_COUNT_INCREASED = 1 << 0, | ||
8459 | diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h | ||
8460 | index 18b06983131a..f0938257ee6d 100644 | ||
8461 | --- a/include/linux/mmzone.h | ||
8462 | +++ b/include/linux/mmzone.h | ||
8463 | @@ -1152,13 +1152,17 @@ struct mem_section { | ||
8464 | #define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1) | ||
8465 | |||
8466 | #ifdef CONFIG_SPARSEMEM_EXTREME | ||
8467 | -extern struct mem_section *mem_section[NR_SECTION_ROOTS]; | ||
8468 | +extern struct mem_section **mem_section; | ||
8469 | #else | ||
8470 | extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; | ||
8471 | #endif | ||
8472 | |||
8473 | static inline struct mem_section *__nr_to_section(unsigned long nr) | ||
8474 | { | ||
8475 | +#ifdef CONFIG_SPARSEMEM_EXTREME | ||
8476 | + if (!mem_section) | ||
8477 | + return NULL; | ||
8478 | +#endif | ||
8479 | if (!mem_section[SECTION_NR_TO_ROOT(nr)]) | ||
8480 | return NULL; | ||
8481 | return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; | ||
8482 | diff --git a/include/linux/pti.h b/include/linux/pti.h | ||
8483 | deleted file mode 100644 | ||
8484 | index b3ea01a3197e..000000000000 | ||
8485 | --- a/include/linux/pti.h | ||
8486 | +++ /dev/null | ||
8487 | @@ -1,43 +0,0 @@ | ||
8488 | -/* | ||
8489 | - * Copyright (C) Intel 2011 | ||
8490 | - * | ||
8491 | - * This program is free software; you can redistribute it and/or modify | ||
8492 | - * it under the terms of the GNU General Public License version 2 as | ||
8493 | - * published by the Free Software Foundation. | ||
8494 | - * | ||
8495 | - * This program is distributed in the hope that it will be useful, | ||
8496 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
8497 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
8498 | - * GNU General Public License for more details. | ||
8499 | - * | ||
8500 | - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
8501 | - * | ||
8502 | - * The PTI (Parallel Trace Interface) driver directs trace data routed from | ||
8503 | - * various parts in the system out through the Intel Penwell PTI port and | ||
8504 | - * out of the mobile device for analysis with a debugging tool | ||
8505 | - * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7, | ||
8506 | - * compact JTAG, standard. | ||
8507 | - * | ||
8508 | - * This header file will allow other parts of the OS to use the | ||
8509 | - * interface to write out it's contents for debugging a mobile system. | ||
8510 | - */ | ||
8511 | - | ||
8512 | -#ifndef PTI_H_ | ||
8513 | -#define PTI_H_ | ||
8514 | - | ||
8515 | -/* offset for last dword of any PTI message. Part of MIPI P1149.7 */ | ||
8516 | -#define PTI_LASTDWORD_DTS 0x30 | ||
8517 | - | ||
8518 | -/* basic structure used as a write address to the PTI HW */ | ||
8519 | -struct pti_masterchannel { | ||
8520 | - u8 master; | ||
8521 | - u8 channel; | ||
8522 | -}; | ||
8523 | - | ||
8524 | -/* the following functions are defined in misc/pti.c */ | ||
8525 | -void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count); | ||
8526 | -struct pti_masterchannel *pti_request_masterchannel(u8 type, | ||
8527 | - const char *thread_name); | ||
8528 | -void pti_release_masterchannel(struct pti_masterchannel *mc); | ||
8529 | - | ||
8530 | -#endif /*PTI_H_*/ | ||
8531 | diff --git a/include/linux/rculist.h b/include/linux/rculist.h | ||
8532 | index c2cdd45a880a..127f534fec94 100644 | ||
8533 | --- a/include/linux/rculist.h | ||
8534 | +++ b/include/linux/rculist.h | ||
8535 | @@ -275,7 +275,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, | ||
8536 | * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). | ||
8537 | */ | ||
8538 | #define list_entry_rcu(ptr, type, member) \ | ||
8539 | - container_of(lockless_dereference(ptr), type, member) | ||
8540 | + container_of(READ_ONCE(ptr), type, member) | ||
8541 | |||
8542 | /* | ||
8543 | * Where are list_empty_rcu() and list_first_entry_rcu()? | ||
8544 | @@ -368,7 +368,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, | ||
8545 | * example is when items are added to the list, but never deleted. | ||
8546 | */ | ||
8547 | #define list_entry_lockless(ptr, type, member) \ | ||
8548 | - container_of((typeof(ptr))lockless_dereference(ptr), type, member) | ||
8549 | + container_of((typeof(ptr))READ_ONCE(ptr), type, member) | ||
8550 | |||
8551 | /** | ||
8552 | * list_for_each_entry_lockless - iterate over rcu list of given type | ||
8553 | diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h | ||
8554 | index 1a9f70d44af9..a6ddc42f87a5 100644 | ||
8555 | --- a/include/linux/rcupdate.h | ||
8556 | +++ b/include/linux/rcupdate.h | ||
8557 | @@ -346,7 +346,7 @@ static inline void rcu_preempt_sleep_check(void) { } | ||
8558 | #define __rcu_dereference_check(p, c, space) \ | ||
8559 | ({ \ | ||
8560 | /* Dependency order vs. p above. */ \ | ||
8561 | - typeof(*p) *________p1 = (typeof(*p) *__force)lockless_dereference(p); \ | ||
8562 | + typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \ | ||
8563 | RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \ | ||
8564 | rcu_dereference_sparse(p, space); \ | ||
8565 | ((typeof(*p) __force __kernel *)(________p1)); \ | ||
8566 | @@ -360,7 +360,7 @@ static inline void rcu_preempt_sleep_check(void) { } | ||
8567 | #define rcu_dereference_raw(p) \ | ||
8568 | ({ \ | ||
8569 | /* Dependency order vs. p above. */ \ | ||
8570 | - typeof(p) ________p1 = lockless_dereference(p); \ | ||
8571 | + typeof(p) ________p1 = READ_ONCE(p); \ | ||
8572 | ((typeof(*p) __force __kernel *)(________p1)); \ | ||
8573 | }) | ||
8574 | |||
8575 | diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h | ||
8576 | index f65b92e0e1f9..ee8220f8dcf5 100644 | ||
8577 | --- a/include/uapi/linux/stddef.h | ||
8578 | +++ b/include/uapi/linux/stddef.h | ||
8579 | @@ -1,5 +1,5 @@ | ||
8580 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ | ||
8581 | -#include <linux/compiler.h> | ||
8582 | +#include <linux/compiler_types.h> | ||
8583 | |||
8584 | #ifndef __always_inline | ||
8585 | #define __always_inline inline | ||
8586 | diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c | ||
8587 | index c48ca2a34b5e..c5ff809e86d0 100644 | ||
8588 | --- a/kernel/bpf/verifier.c | ||
8589 | +++ b/kernel/bpf/verifier.c | ||
8590 | @@ -1061,6 +1061,11 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, | ||
8591 | break; | ||
8592 | case PTR_TO_STACK: | ||
8593 | pointer_desc = "stack "; | ||
8594 | + /* The stack spill tracking logic in check_stack_write() | ||
8595 | + * and check_stack_read() relies on stack accesses being | ||
8596 | + * aligned. | ||
8597 | + */ | ||
8598 | + strict = true; | ||
8599 | break; | ||
8600 | default: | ||
8601 | break; | ||
8602 | @@ -1068,6 +1073,29 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, | ||
8603 | return check_generic_ptr_alignment(reg, pointer_desc, off, size, strict); | ||
8604 | } | ||
8605 | |||
8606 | +/* truncate register to smaller size (in bytes) | ||
8607 | + * must be called with size < BPF_REG_SIZE | ||
8608 | + */ | ||
8609 | +static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) | ||
8610 | +{ | ||
8611 | + u64 mask; | ||
8612 | + | ||
8613 | + /* clear high bits in bit representation */ | ||
8614 | + reg->var_off = tnum_cast(reg->var_off, size); | ||
8615 | + | ||
8616 | + /* fix arithmetic bounds */ | ||
8617 | + mask = ((u64)1 << (size * 8)) - 1; | ||
8618 | + if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) { | ||
8619 | + reg->umin_value &= mask; | ||
8620 | + reg->umax_value &= mask; | ||
8621 | + } else { | ||
8622 | + reg->umin_value = 0; | ||
8623 | + reg->umax_value = mask; | ||
8624 | + } | ||
8625 | + reg->smin_value = reg->umin_value; | ||
8626 | + reg->smax_value = reg->umax_value; | ||
8627 | +} | ||
8628 | + | ||
8629 | /* check whether memory at (regno + off) is accessible for t = (read | write) | ||
8630 | * if t==write, value_regno is a register which value is stored into memory | ||
8631 | * if t==read, value_regno is a register which will receive the value from memory | ||
8632 | @@ -1200,9 +1228,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn | ||
8633 | if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && | ||
8634 | state->regs[value_regno].type == SCALAR_VALUE) { | ||
8635 | /* b/h/w load zero-extends, mark upper bits as known 0 */ | ||
8636 | - state->regs[value_regno].var_off = tnum_cast( | ||
8637 | - state->regs[value_regno].var_off, size); | ||
8638 | - __update_reg_bounds(&state->regs[value_regno]); | ||
8639 | + coerce_reg_to_size(&state->regs[value_regno], size); | ||
8640 | } | ||
8641 | return err; | ||
8642 | } | ||
8643 | @@ -1282,6 +1308,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, | ||
8644 | tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off); | ||
8645 | verbose("invalid variable stack read R%d var_off=%s\n", | ||
8646 | regno, tn_buf); | ||
8647 | + return -EACCES; | ||
8648 | } | ||
8649 | off = regs[regno].off + regs[regno].var_off.value; | ||
8650 | if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || | ||
8651 | @@ -1742,14 +1769,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) | ||
8652 | return 0; | ||
8653 | } | ||
8654 | |||
8655 | -static void coerce_reg_to_32(struct bpf_reg_state *reg) | ||
8656 | -{ | ||
8657 | - /* clear high 32 bits */ | ||
8658 | - reg->var_off = tnum_cast(reg->var_off, 4); | ||
8659 | - /* Update bounds */ | ||
8660 | - __update_reg_bounds(reg); | ||
8661 | -} | ||
8662 | - | ||
8663 | static bool signed_add_overflows(s64 a, s64 b) | ||
8664 | { | ||
8665 | /* Do the add in u64, where overflow is well-defined */ | ||
8666 | @@ -1770,6 +1789,41 @@ static bool signed_sub_overflows(s64 a, s64 b) | ||
8667 | return res > a; | ||
8668 | } | ||
8669 | |||
8670 | +static bool check_reg_sane_offset(struct bpf_verifier_env *env, | ||
8671 | + const struct bpf_reg_state *reg, | ||
8672 | + enum bpf_reg_type type) | ||
8673 | +{ | ||
8674 | + bool known = tnum_is_const(reg->var_off); | ||
8675 | + s64 val = reg->var_off.value; | ||
8676 | + s64 smin = reg->smin_value; | ||
8677 | + | ||
8678 | + if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) { | ||
8679 | + verbose("math between %s pointer and %lld is not allowed\n", | ||
8680 | + reg_type_str[type], val); | ||
8681 | + return false; | ||
8682 | + } | ||
8683 | + | ||
8684 | + if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) { | ||
8685 | + verbose("%s pointer offset %d is not allowed\n", | ||
8686 | + reg_type_str[type], reg->off); | ||
8687 | + return false; | ||
8688 | + } | ||
8689 | + | ||
8690 | + if (smin == S64_MIN) { | ||
8691 | + verbose("math between %s pointer and register with unbounded min value is not allowed\n", | ||
8692 | + reg_type_str[type]); | ||
8693 | + return false; | ||
8694 | + } | ||
8695 | + | ||
8696 | + if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) { | ||
8697 | + verbose("value %lld makes %s pointer be out of bounds\n", | ||
8698 | + smin, reg_type_str[type]); | ||
8699 | + return false; | ||
8700 | + } | ||
8701 | + | ||
8702 | + return true; | ||
8703 | +} | ||
8704 | + | ||
8705 | /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. | ||
8706 | * Caller should also handle BPF_MOV case separately. | ||
8707 | * If we return -EACCES, caller may want to try again treating pointer as a | ||
8708 | @@ -1835,6 +1889,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, | ||
8709 | dst_reg->type = ptr_reg->type; | ||
8710 | dst_reg->id = ptr_reg->id; | ||
8711 | |||
8712 | + if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) || | ||
8713 | + !check_reg_sane_offset(env, ptr_reg, ptr_reg->type)) | ||
8714 | + return -EINVAL; | ||
8715 | + | ||
8716 | switch (opcode) { | ||
8717 | case BPF_ADD: | ||
8718 | /* We can take a fixed offset as long as it doesn't overflow | ||
8719 | @@ -1965,12 +2023,19 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, | ||
8720 | return -EACCES; | ||
8721 | } | ||
8722 | |||
8723 | + if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) | ||
8724 | + return -EINVAL; | ||
8725 | + | ||
8726 | __update_reg_bounds(dst_reg); | ||
8727 | __reg_deduce_bounds(dst_reg); | ||
8728 | __reg_bound_offset(dst_reg); | ||
8729 | return 0; | ||
8730 | } | ||
8731 | |||
8732 | +/* WARNING: This function does calculations on 64-bit values, but the actual | ||
8733 | + * execution may occur on 32-bit values. Therefore, things like bitshifts | ||
8734 | + * need extra checks in the 32-bit case. | ||
8735 | + */ | ||
8736 | static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | ||
8737 | struct bpf_insn *insn, | ||
8738 | struct bpf_reg_state *dst_reg, | ||
8739 | @@ -1981,12 +2046,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | ||
8740 | bool src_known, dst_known; | ||
8741 | s64 smin_val, smax_val; | ||
8742 | u64 umin_val, umax_val; | ||
8743 | + u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; | ||
8744 | |||
8745 | - if (BPF_CLASS(insn->code) != BPF_ALU64) { | ||
8746 | - /* 32-bit ALU ops are (32,32)->64 */ | ||
8747 | - coerce_reg_to_32(dst_reg); | ||
8748 | - coerce_reg_to_32(&src_reg); | ||
8749 | - } | ||
8750 | smin_val = src_reg.smin_value; | ||
8751 | smax_val = src_reg.smax_value; | ||
8752 | umin_val = src_reg.umin_value; | ||
8753 | @@ -1994,6 +2055,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | ||
8754 | src_known = tnum_is_const(src_reg.var_off); | ||
8755 | dst_known = tnum_is_const(dst_reg->var_off); | ||
8756 | |||
8757 | + if (!src_known && | ||
8758 | + opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) { | ||
8759 | + __mark_reg_unknown(dst_reg); | ||
8760 | + return 0; | ||
8761 | + } | ||
8762 | + | ||
8763 | switch (opcode) { | ||
8764 | case BPF_ADD: | ||
8765 | if (signed_add_overflows(dst_reg->smin_value, smin_val) || | ||
8766 | @@ -2122,9 +2189,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | ||
8767 | __update_reg_bounds(dst_reg); | ||
8768 | break; | ||
8769 | case BPF_LSH: | ||
8770 | - if (umax_val > 63) { | ||
8771 | - /* Shifts greater than 63 are undefined. This includes | ||
8772 | - * shifts by a negative number. | ||
8773 | + if (umax_val >= insn_bitness) { | ||
8774 | + /* Shifts greater than 31 or 63 are undefined. | ||
8775 | + * This includes shifts by a negative number. | ||
8776 | */ | ||
8777 | mark_reg_unknown(regs, insn->dst_reg); | ||
8778 | break; | ||
8779 | @@ -2150,27 +2217,29 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | ||
8780 | __update_reg_bounds(dst_reg); | ||
8781 | break; | ||
8782 | case BPF_RSH: | ||
8783 | - if (umax_val > 63) { | ||
8784 | - /* Shifts greater than 63 are undefined. This includes | ||
8785 | - * shifts by a negative number. | ||
8786 | + if (umax_val >= insn_bitness) { | ||
8787 | + /* Shifts greater than 31 or 63 are undefined. | ||
8788 | + * This includes shifts by a negative number. | ||
8789 | */ | ||
8790 | mark_reg_unknown(regs, insn->dst_reg); | ||
8791 | break; | ||
8792 | } | ||
8793 | - /* BPF_RSH is an unsigned shift, so make the appropriate casts */ | ||
8794 | - if (dst_reg->smin_value < 0) { | ||
8795 | - if (umin_val) { | ||
8796 | - /* Sign bit will be cleared */ | ||
8797 | - dst_reg->smin_value = 0; | ||
8798 | - } else { | ||
8799 | - /* Lost sign bit information */ | ||
8800 | - dst_reg->smin_value = S64_MIN; | ||
8801 | - dst_reg->smax_value = S64_MAX; | ||
8802 | - } | ||
8803 | - } else { | ||
8804 | - dst_reg->smin_value = | ||
8805 | - (u64)(dst_reg->smin_value) >> umax_val; | ||
8806 | - } | ||
8807 | + /* BPF_RSH is an unsigned shift. If the value in dst_reg might | ||
8808 | + * be negative, then either: | ||
8809 | + * 1) src_reg might be zero, so the sign bit of the result is | ||
8810 | + * unknown, so we lose our signed bounds | ||
8811 | + * 2) it's known negative, thus the unsigned bounds capture the | ||
8812 | + * signed bounds | ||
8813 | + * 3) the signed bounds cross zero, so they tell us nothing | ||
8814 | + * about the result | ||
8815 | + * If the value in dst_reg is known nonnegative, then again the | ||
8816 | + * unsigned bounts capture the signed bounds. | ||
8817 | + * Thus, in all cases it suffices to blow away our signed bounds | ||
8818 | + * and rely on inferring new ones from the unsigned bounds and | ||
8819 | + * var_off of the result. | ||
8820 | + */ | ||
8821 | + dst_reg->smin_value = S64_MIN; | ||
8822 | + dst_reg->smax_value = S64_MAX; | ||
8823 | if (src_known) | ||
8824 | dst_reg->var_off = tnum_rshift(dst_reg->var_off, | ||
8825 | umin_val); | ||
8826 | @@ -2186,6 +2255,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, | ||
8827 | break; | ||
8828 | } | ||
8829 | |||
8830 | + if (BPF_CLASS(insn->code) != BPF_ALU64) { | ||
8831 | + /* 32-bit ALU ops are (32,32)->32 */ | ||
8832 | + coerce_reg_to_size(dst_reg, 4); | ||
8833 | + coerce_reg_to_size(&src_reg, 4); | ||
8834 | + } | ||
8835 | + | ||
8836 | __reg_deduce_bounds(dst_reg); | ||
8837 | __reg_bound_offset(dst_reg); | ||
8838 | return 0; | ||
8839 | @@ -2362,17 +2437,20 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) | ||
8840 | return -EACCES; | ||
8841 | } | ||
8842 | mark_reg_unknown(regs, insn->dst_reg); | ||
8843 | - /* high 32 bits are known zero. */ | ||
8844 | - regs[insn->dst_reg].var_off = tnum_cast( | ||
8845 | - regs[insn->dst_reg].var_off, 4); | ||
8846 | - __update_reg_bounds(®s[insn->dst_reg]); | ||
8847 | + coerce_reg_to_size(®s[insn->dst_reg], 4); | ||
8848 | } | ||
8849 | } else { | ||
8850 | /* case: R = imm | ||
8851 | * remember the value we stored into this reg | ||
8852 | */ | ||
8853 | regs[insn->dst_reg].type = SCALAR_VALUE; | ||
8854 | - __mark_reg_known(regs + insn->dst_reg, insn->imm); | ||
8855 | + if (BPF_CLASS(insn->code) == BPF_ALU64) { | ||
8856 | + __mark_reg_known(regs + insn->dst_reg, | ||
8857 | + insn->imm); | ||
8858 | + } else { | ||
8859 | + __mark_reg_known(regs + insn->dst_reg, | ||
8860 | + (u32)insn->imm); | ||
8861 | + } | ||
8862 | } | ||
8863 | |||
8864 | } else if (opcode > BPF_END) { | ||
8865 | @@ -3307,15 +3385,14 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, | ||
8866 | return range_within(rold, rcur) && | ||
8867 | tnum_in(rold->var_off, rcur->var_off); | ||
8868 | } else { | ||
8869 | - /* if we knew anything about the old value, we're not | ||
8870 | - * equal, because we can't know anything about the | ||
8871 | - * scalar value of the pointer in the new value. | ||
8872 | + /* We're trying to use a pointer in place of a scalar. | ||
8873 | + * Even if the scalar was unbounded, this could lead to | ||
8874 | + * pointer leaks because scalars are allowed to leak | ||
8875 | + * while pointers are not. We could make this safe in | ||
8876 | + * special cases if root is calling us, but it's | ||
8877 | + * probably not worth the hassle. | ||
8878 | */ | ||
8879 | - return rold->umin_value == 0 && | ||
8880 | - rold->umax_value == U64_MAX && | ||
8881 | - rold->smin_value == S64_MIN && | ||
8882 | - rold->smax_value == S64_MAX && | ||
8883 | - tnum_is_unknown(rold->var_off); | ||
8884 | + return false; | ||
8885 | } | ||
8886 | case PTR_TO_MAP_VALUE: | ||
8887 | /* If the new min/max/var_off satisfy the old ones and | ||
8888 | @@ -3665,6 +3742,7 @@ static int do_check(struct bpf_verifier_env *env) | ||
8889 | if (err) | ||
8890 | return err; | ||
8891 | |||
8892 | + env->insn_aux_data[insn_idx].seen = true; | ||
8893 | if (class == BPF_ALU || class == BPF_ALU64) { | ||
8894 | err = check_alu_op(env, insn); | ||
8895 | if (err) | ||
8896 | @@ -3855,6 +3933,7 @@ static int do_check(struct bpf_verifier_env *env) | ||
8897 | return err; | ||
8898 | |||
8899 | insn_idx++; | ||
8900 | + env->insn_aux_data[insn_idx].seen = true; | ||
8901 | } else { | ||
8902 | verbose("invalid BPF_LD mode\n"); | ||
8903 | return -EINVAL; | ||
8904 | @@ -4035,6 +4114,7 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len, | ||
8905 | u32 off, u32 cnt) | ||
8906 | { | ||
8907 | struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; | ||
8908 | + int i; | ||
8909 | |||
8910 | if (cnt == 1) | ||
8911 | return 0; | ||
8912 | @@ -4044,6 +4124,8 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len, | ||
8913 | memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off); | ||
8914 | memcpy(new_data + off + cnt - 1, old_data + off, | ||
8915 | sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); | ||
8916 | + for (i = off; i < off + cnt - 1; i++) | ||
8917 | + new_data[i].seen = true; | ||
8918 | env->insn_aux_data = new_data; | ||
8919 | vfree(old_data); | ||
8920 | return 0; | ||
8921 | @@ -4062,6 +4144,25 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of | ||
8922 | return new_prog; | ||
8923 | } | ||
8924 | |||
8925 | +/* The verifier does more data flow analysis than llvm and will not explore | ||
8926 | + * branches that are dead at run time. Malicious programs can have dead code | ||
8927 | + * too. Therefore replace all dead at-run-time code with nops. | ||
8928 | + */ | ||
8929 | +static void sanitize_dead_code(struct bpf_verifier_env *env) | ||
8930 | +{ | ||
8931 | + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; | ||
8932 | + struct bpf_insn nop = BPF_MOV64_REG(BPF_REG_0, BPF_REG_0); | ||
8933 | + struct bpf_insn *insn = env->prog->insnsi; | ||
8934 | + const int insn_cnt = env->prog->len; | ||
8935 | + int i; | ||
8936 | + | ||
8937 | + for (i = 0; i < insn_cnt; i++) { | ||
8938 | + if (aux_data[i].seen) | ||
8939 | + continue; | ||
8940 | + memcpy(insn + i, &nop, sizeof(nop)); | ||
8941 | + } | ||
8942 | +} | ||
8943 | + | ||
8944 | /* convert load instructions that access fields of 'struct __sk_buff' | ||
8945 | * into sequence of instructions that access fields of 'struct sk_buff' | ||
8946 | */ | ||
8947 | @@ -4378,6 +4479,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) | ||
8948 | while (pop_stack(env, NULL) >= 0); | ||
8949 | free_states(env); | ||
8950 | |||
8951 | + if (ret == 0) | ||
8952 | + sanitize_dead_code(env); | ||
8953 | + | ||
8954 | if (ret == 0) | ||
8955 | /* program is valid, convert *(u32*)(ctx + off) accesses */ | ||
8956 | ret = convert_ctx_accesses(env); | ||
8957 | diff --git a/kernel/events/core.c b/kernel/events/core.c | ||
8958 | index 4f1d4bfc607a..24ebad5567b4 100644 | ||
8959 | --- a/kernel/events/core.c | ||
8960 | +++ b/kernel/events/core.c | ||
8961 | @@ -4233,7 +4233,7 @@ static void perf_remove_from_owner(struct perf_event *event) | ||
8962 | * indeed free this event, otherwise we need to serialize on | ||
8963 | * owner->perf_event_mutex. | ||
8964 | */ | ||
8965 | - owner = lockless_dereference(event->owner); | ||
8966 | + owner = READ_ONCE(event->owner); | ||
8967 | if (owner) { | ||
8968 | /* | ||
8969 | * Since delayed_put_task_struct() also drops the last | ||
8970 | @@ -4330,7 +4330,7 @@ int perf_event_release_kernel(struct perf_event *event) | ||
8971 | * Cannot change, child events are not migrated, see the | ||
8972 | * comment with perf_event_ctx_lock_nested(). | ||
8973 | */ | ||
8974 | - ctx = lockless_dereference(child->ctx); | ||
8975 | + ctx = READ_ONCE(child->ctx); | ||
8976 | /* | ||
8977 | * Since child_mutex nests inside ctx::mutex, we must jump | ||
8978 | * through hoops. We start by grabbing a reference on the ctx. | ||
8979 | diff --git a/kernel/seccomp.c b/kernel/seccomp.c | ||
8980 | index 418a1c045933..5f0dfb2abb8d 100644 | ||
8981 | --- a/kernel/seccomp.c | ||
8982 | +++ b/kernel/seccomp.c | ||
8983 | @@ -190,7 +190,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd, | ||
8984 | u32 ret = SECCOMP_RET_ALLOW; | ||
8985 | /* Make sure cross-thread synced filter points somewhere sane. */ | ||
8986 | struct seccomp_filter *f = | ||
8987 | - lockless_dereference(current->seccomp.filter); | ||
8988 | + READ_ONCE(current->seccomp.filter); | ||
8989 | |||
8990 | /* Ensure unexpected behavior doesn't result in failing open. */ | ||
8991 | if (unlikely(WARN_ON(f == NULL))) | ||
8992 | diff --git a/kernel/task_work.c b/kernel/task_work.c | ||
8993 | index 5718b3ea202a..0fef395662a6 100644 | ||
8994 | --- a/kernel/task_work.c | ||
8995 | +++ b/kernel/task_work.c | ||
8996 | @@ -68,7 +68,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func) | ||
8997 | * we raced with task_work_run(), *pprev == NULL/exited. | ||
8998 | */ | ||
8999 | raw_spin_lock_irqsave(&task->pi_lock, flags); | ||
9000 | - while ((work = lockless_dereference(*pprev))) { | ||
9001 | + while ((work = READ_ONCE(*pprev))) { | ||
9002 | if (work->func != func) | ||
9003 | pprev = &work->next; | ||
9004 | else if (cmpxchg(pprev, work, work->next) == work) | ||
9005 | diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c | ||
9006 | index dc498b605d5d..6350f64d5aa4 100644 | ||
9007 | --- a/kernel/trace/bpf_trace.c | ||
9008 | +++ b/kernel/trace/bpf_trace.c | ||
9009 | @@ -293,14 +293,13 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = { | ||
9010 | .arg2_type = ARG_ANYTHING, | ||
9011 | }; | ||
9012 | |||
9013 | -static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd); | ||
9014 | +static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd); | ||
9015 | |||
9016 | static __always_inline u64 | ||
9017 | __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, | ||
9018 | - u64 flags, struct perf_raw_record *raw) | ||
9019 | + u64 flags, struct perf_sample_data *sd) | ||
9020 | { | ||
9021 | struct bpf_array *array = container_of(map, struct bpf_array, map); | ||
9022 | - struct perf_sample_data *sd = this_cpu_ptr(&bpf_sd); | ||
9023 | unsigned int cpu = smp_processor_id(); | ||
9024 | u64 index = flags & BPF_F_INDEX_MASK; | ||
9025 | struct bpf_event_entry *ee; | ||
9026 | @@ -323,8 +322,6 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, | ||
9027 | if (unlikely(event->oncpu != cpu)) | ||
9028 | return -EOPNOTSUPP; | ||
9029 | |||
9030 | - perf_sample_data_init(sd, 0, 0); | ||
9031 | - sd->raw = raw; | ||
9032 | perf_event_output(event, sd, regs); | ||
9033 | return 0; | ||
9034 | } | ||
9035 | @@ -332,6 +329,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, | ||
9036 | BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, | ||
9037 | u64, flags, void *, data, u64, size) | ||
9038 | { | ||
9039 | + struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd); | ||
9040 | struct perf_raw_record raw = { | ||
9041 | .frag = { | ||
9042 | .size = size, | ||
9043 | @@ -342,7 +340,10 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, | ||
9044 | if (unlikely(flags & ~(BPF_F_INDEX_MASK))) | ||
9045 | return -EINVAL; | ||
9046 | |||
9047 | - return __bpf_perf_event_output(regs, map, flags, &raw); | ||
9048 | + perf_sample_data_init(sd, 0, 0); | ||
9049 | + sd->raw = &raw; | ||
9050 | + | ||
9051 | + return __bpf_perf_event_output(regs, map, flags, sd); | ||
9052 | } | ||
9053 | |||
9054 | static const struct bpf_func_proto bpf_perf_event_output_proto = { | ||
9055 | @@ -357,10 +358,12 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = { | ||
9056 | }; | ||
9057 | |||
9058 | static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); | ||
9059 | +static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd); | ||
9060 | |||
9061 | u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, | ||
9062 | void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) | ||
9063 | { | ||
9064 | + struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd); | ||
9065 | struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); | ||
9066 | struct perf_raw_frag frag = { | ||
9067 | .copy = ctx_copy, | ||
9068 | @@ -378,8 +381,10 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, | ||
9069 | }; | ||
9070 | |||
9071 | perf_fetch_caller_regs(regs); | ||
9072 | + perf_sample_data_init(sd, 0, 0); | ||
9073 | + sd->raw = &raw; | ||
9074 | |||
9075 | - return __bpf_perf_event_output(regs, map, flags, &raw); | ||
9076 | + return __bpf_perf_event_output(regs, map, flags, sd); | ||
9077 | } | ||
9078 | |||
9079 | BPF_CALL_0(bpf_get_current_task) | ||
9080 | diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c | ||
9081 | index 1c21d0e2a145..7eb975a2d0e1 100644 | ||
9082 | --- a/kernel/trace/trace_events_hist.c | ||
9083 | +++ b/kernel/trace/trace_events_hist.c | ||
9084 | @@ -450,7 +450,7 @@ static int create_val_field(struct hist_trigger_data *hist_data, | ||
9085 | } | ||
9086 | |||
9087 | field = trace_find_event_field(file->event_call, field_name); | ||
9088 | - if (!field) { | ||
9089 | + if (!field || !field->size) { | ||
9090 | ret = -EINVAL; | ||
9091 | goto out; | ||
9092 | } | ||
9093 | @@ -548,7 +548,7 @@ static int create_key_field(struct hist_trigger_data *hist_data, | ||
9094 | } | ||
9095 | |||
9096 | field = trace_find_event_field(file->event_call, field_name); | ||
9097 | - if (!field) { | ||
9098 | + if (!field || !field->size) { | ||
9099 | ret = -EINVAL; | ||
9100 | goto out; | ||
9101 | } | ||
9102 | diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug | ||
9103 | index dfdad67d8f6c..ff21b4dbb392 100644 | ||
9104 | --- a/lib/Kconfig.debug | ||
9105 | +++ b/lib/Kconfig.debug | ||
9106 | @@ -376,7 +376,7 @@ config STACK_VALIDATION | ||
9107 | that runtime stack traces are more reliable. | ||
9108 | |||
9109 | This is also a prerequisite for generation of ORC unwind data, which | ||
9110 | - is needed for CONFIG_ORC_UNWINDER. | ||
9111 | + is needed for CONFIG_UNWINDER_ORC. | ||
9112 | |||
9113 | For more information, see | ||
9114 | tools/objtool/Documentation/stack-validation.txt. | ||
9115 | diff --git a/mm/slab.h b/mm/slab.h | ||
9116 | index 028cdc7df67e..86d7c7d860f9 100644 | ||
9117 | --- a/mm/slab.h | ||
9118 | +++ b/mm/slab.h | ||
9119 | @@ -259,7 +259,7 @@ cache_from_memcg_idx(struct kmem_cache *s, int idx) | ||
9120 | * memcg_caches issues a write barrier to match this (see | ||
9121 | * memcg_create_kmem_cache()). | ||
9122 | */ | ||
9123 | - cachep = lockless_dereference(arr->entries[idx]); | ||
9124 | + cachep = READ_ONCE(arr->entries[idx]); | ||
9125 | rcu_read_unlock(); | ||
9126 | |||
9127 | return cachep; | ||
9128 | diff --git a/mm/sparse.c b/mm/sparse.c | ||
9129 | index 4900707ae146..60805abf98af 100644 | ||
9130 | --- a/mm/sparse.c | ||
9131 | +++ b/mm/sparse.c | ||
9132 | @@ -23,8 +23,7 @@ | ||
9133 | * 1) mem_section - memory sections, mem_map's for valid memory | ||
9134 | */ | ||
9135 | #ifdef CONFIG_SPARSEMEM_EXTREME | ||
9136 | -struct mem_section *mem_section[NR_SECTION_ROOTS] | ||
9137 | - ____cacheline_internodealigned_in_smp; | ||
9138 | +struct mem_section **mem_section; | ||
9139 | #else | ||
9140 | struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] | ||
9141 | ____cacheline_internodealigned_in_smp; | ||
9142 | @@ -101,7 +100,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid) | ||
9143 | int __section_nr(struct mem_section* ms) | ||
9144 | { | ||
9145 | unsigned long root_nr; | ||
9146 | - struct mem_section* root; | ||
9147 | + struct mem_section *root = NULL; | ||
9148 | |||
9149 | for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) { | ||
9150 | root = __nr_to_section(root_nr * SECTIONS_PER_ROOT); | ||
9151 | @@ -112,7 +111,7 @@ int __section_nr(struct mem_section* ms) | ||
9152 | break; | ||
9153 | } | ||
9154 | |||
9155 | - VM_BUG_ON(root_nr == NR_SECTION_ROOTS); | ||
9156 | + VM_BUG_ON(!root); | ||
9157 | |||
9158 | return (root_nr * SECTIONS_PER_ROOT) + (ms - root); | ||
9159 | } | ||
9160 | @@ -208,6 +207,16 @@ void __init memory_present(int nid, unsigned long start, unsigned long end) | ||
9161 | { | ||
9162 | unsigned long pfn; | ||
9163 | |||
9164 | +#ifdef CONFIG_SPARSEMEM_EXTREME | ||
9165 | + if (unlikely(!mem_section)) { | ||
9166 | + unsigned long size, align; | ||
9167 | + | ||
9168 | + size = sizeof(struct mem_section) * NR_SECTION_ROOTS; | ||
9169 | + align = 1 << (INTERNODE_CACHE_SHIFT); | ||
9170 | + mem_section = memblock_virt_alloc(size, align); | ||
9171 | + } | ||
9172 | +#endif | ||
9173 | + | ||
9174 | start &= PAGE_SECTION_MASK; | ||
9175 | mminit_validate_memmodel_limits(&start, &end); | ||
9176 | for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { | ||
9177 | @@ -330,11 +339,17 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, | ||
9178 | static void __init check_usemap_section_nr(int nid, unsigned long *usemap) | ||
9179 | { | ||
9180 | unsigned long usemap_snr, pgdat_snr; | ||
9181 | - static unsigned long old_usemap_snr = NR_MEM_SECTIONS; | ||
9182 | - static unsigned long old_pgdat_snr = NR_MEM_SECTIONS; | ||
9183 | + static unsigned long old_usemap_snr; | ||
9184 | + static unsigned long old_pgdat_snr; | ||
9185 | struct pglist_data *pgdat = NODE_DATA(nid); | ||
9186 | int usemap_nid; | ||
9187 | |||
9188 | + /* First call */ | ||
9189 | + if (!old_usemap_snr) { | ||
9190 | + old_usemap_snr = NR_MEM_SECTIONS; | ||
9191 | + old_pgdat_snr = NR_MEM_SECTIONS; | ||
9192 | + } | ||
9193 | + | ||
9194 | usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT); | ||
9195 | pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); | ||
9196 | if (usemap_snr == pgdat_snr) | ||
9197 | diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c | ||
9198 | index 467e44d7587d..045331204097 100644 | ||
9199 | --- a/net/ipv4/ip_gre.c | ||
9200 | +++ b/net/ipv4/ip_gre.c | ||
9201 | @@ -579,8 +579,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, | ||
9202 | if (gre_handle_offloads(skb, false)) | ||
9203 | goto err_free_rt; | ||
9204 | |||
9205 | - if (skb->len > dev->mtu) { | ||
9206 | - pskb_trim(skb, dev->mtu); | ||
9207 | + if (skb->len > dev->mtu + dev->hard_header_len) { | ||
9208 | + pskb_trim(skb, dev->mtu + dev->hard_header_len); | ||
9209 | truncate = true; | ||
9210 | } | ||
9211 | |||
9212 | @@ -731,8 +731,8 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, | ||
9213 | if (skb_cow_head(skb, dev->needed_headroom)) | ||
9214 | goto free_skb; | ||
9215 | |||
9216 | - if (skb->len - dev->hard_header_len > dev->mtu) { | ||
9217 | - pskb_trim(skb, dev->mtu); | ||
9218 | + if (skb->len > dev->mtu + dev->hard_header_len) { | ||
9219 | + pskb_trim(skb, dev->mtu + dev->hard_header_len); | ||
9220 | truncate = true; | ||
9221 | } | ||
9222 | |||
9223 | diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c | ||
9224 | index 218cfcc77650..ee113ff15fd0 100644 | ||
9225 | --- a/net/ipv4/tcp_vegas.c | ||
9226 | +++ b/net/ipv4/tcp_vegas.c | ||
9227 | @@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); | ||
9228 | |||
9229 | static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) | ||
9230 | { | ||
9231 | - return min(tp->snd_ssthresh, tp->snd_cwnd-1); | ||
9232 | + return min(tp->snd_ssthresh, tp->snd_cwnd); | ||
9233 | } | ||
9234 | |||
9235 | static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) | ||
9236 | diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c | ||
9237 | index 8a1c846d3df9..2ec39404c449 100644 | ||
9238 | --- a/net/ipv6/addrconf.c | ||
9239 | +++ b/net/ipv6/addrconf.c | ||
9240 | @@ -303,10 +303,10 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { | ||
9241 | .disable_policy = 0, | ||
9242 | }; | ||
9243 | |||
9244 | -/* Check if a valid qdisc is available */ | ||
9245 | -static inline bool addrconf_qdisc_ok(const struct net_device *dev) | ||
9246 | +/* Check if link is ready: is it up and is a valid qdisc available */ | ||
9247 | +static inline bool addrconf_link_ready(const struct net_device *dev) | ||
9248 | { | ||
9249 | - return !qdisc_tx_is_noop(dev); | ||
9250 | + return netif_oper_up(dev) && !qdisc_tx_is_noop(dev); | ||
9251 | } | ||
9252 | |||
9253 | static void addrconf_del_rs_timer(struct inet6_dev *idev) | ||
9254 | @@ -451,7 +451,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) | ||
9255 | |||
9256 | ndev->token = in6addr_any; | ||
9257 | |||
9258 | - if (netif_running(dev) && addrconf_qdisc_ok(dev)) | ||
9259 | + if (netif_running(dev) && addrconf_link_ready(dev)) | ||
9260 | ndev->if_flags |= IF_READY; | ||
9261 | |||
9262 | ipv6_mc_init_dev(ndev); | ||
9263 | @@ -3404,7 +3404,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, | ||
9264 | /* restore routes for permanent addresses */ | ||
9265 | addrconf_permanent_addr(dev); | ||
9266 | |||
9267 | - if (!addrconf_qdisc_ok(dev)) { | ||
9268 | + if (!addrconf_link_ready(dev)) { | ||
9269 | /* device is not ready yet. */ | ||
9270 | pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n", | ||
9271 | dev->name); | ||
9272 | @@ -3419,7 +3419,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, | ||
9273 | run_pending = 1; | ||
9274 | } | ||
9275 | } else if (event == NETDEV_CHANGE) { | ||
9276 | - if (!addrconf_qdisc_ok(dev)) { | ||
9277 | + if (!addrconf_link_ready(dev)) { | ||
9278 | /* device is still not ready. */ | ||
9279 | break; | ||
9280 | } | ||
9281 | diff --git a/net/ipv6/route.c b/net/ipv6/route.c | ||
9282 | index 598efa8cfe25..76b47682f77f 100644 | ||
9283 | --- a/net/ipv6/route.c | ||
9284 | +++ b/net/ipv6/route.c | ||
9285 | @@ -1055,7 +1055,6 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) | ||
9286 | |||
9287 | static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) | ||
9288 | { | ||
9289 | - struct fib6_table *table = rt->rt6i_table; | ||
9290 | struct rt6_info *pcpu_rt, *prev, **p; | ||
9291 | |||
9292 | pcpu_rt = ip6_rt_pcpu_alloc(rt); | ||
9293 | @@ -1066,28 +1065,20 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) | ||
9294 | return net->ipv6.ip6_null_entry; | ||
9295 | } | ||
9296 | |||
9297 | - read_lock_bh(&table->tb6_lock); | ||
9298 | - if (rt->rt6i_pcpu) { | ||
9299 | - p = this_cpu_ptr(rt->rt6i_pcpu); | ||
9300 | - prev = cmpxchg(p, NULL, pcpu_rt); | ||
9301 | - if (prev) { | ||
9302 | - /* If someone did it before us, return prev instead */ | ||
9303 | - dst_release_immediate(&pcpu_rt->dst); | ||
9304 | - pcpu_rt = prev; | ||
9305 | - } | ||
9306 | - } else { | ||
9307 | - /* rt has been removed from the fib6 tree | ||
9308 | - * before we have a chance to acquire the read_lock. | ||
9309 | - * In this case, don't brother to create a pcpu rt | ||
9310 | - * since rt is going away anyway. The next | ||
9311 | - * dst_check() will trigger a re-lookup. | ||
9312 | - */ | ||
9313 | + dst_hold(&pcpu_rt->dst); | ||
9314 | + p = this_cpu_ptr(rt->rt6i_pcpu); | ||
9315 | + prev = cmpxchg(p, NULL, pcpu_rt); | ||
9316 | + if (prev) { | ||
9317 | + /* If someone did it before us, return prev instead */ | ||
9318 | + /* release refcnt taken by ip6_rt_pcpu_alloc() */ | ||
9319 | dst_release_immediate(&pcpu_rt->dst); | ||
9320 | - pcpu_rt = rt; | ||
9321 | + /* release refcnt taken by above dst_hold() */ | ||
9322 | + dst_release_immediate(&pcpu_rt->dst); | ||
9323 | + dst_hold(&prev->dst); | ||
9324 | + pcpu_rt = prev; | ||
9325 | } | ||
9326 | - dst_hold(&pcpu_rt->dst); | ||
9327 | + | ||
9328 | rt6_dst_from_metrics_check(pcpu_rt); | ||
9329 | - read_unlock_bh(&table->tb6_lock); | ||
9330 | return pcpu_rt; | ||
9331 | } | ||
9332 | |||
9333 | @@ -1177,19 +1168,28 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, | ||
9334 | if (pcpu_rt) { | ||
9335 | read_unlock_bh(&table->tb6_lock); | ||
9336 | } else { | ||
9337 | - /* We have to do the read_unlock first | ||
9338 | - * because rt6_make_pcpu_route() may trigger | ||
9339 | - * ip6_dst_gc() which will take the write_lock. | ||
9340 | - */ | ||
9341 | - dst_hold(&rt->dst); | ||
9342 | - read_unlock_bh(&table->tb6_lock); | ||
9343 | - pcpu_rt = rt6_make_pcpu_route(rt); | ||
9344 | - dst_release(&rt->dst); | ||
9345 | + /* atomic_inc_not_zero() is needed when using rcu */ | ||
9346 | + if (atomic_inc_not_zero(&rt->rt6i_ref)) { | ||
9347 | + /* We have to do the read_unlock first | ||
9348 | + * because rt6_make_pcpu_route() may trigger | ||
9349 | + * ip6_dst_gc() which will take the write_lock. | ||
9350 | + * | ||
9351 | + * No dst_hold() on rt is needed because grabbing | ||
9352 | + * rt->rt6i_ref makes sure rt can't be released. | ||
9353 | + */ | ||
9354 | + read_unlock_bh(&table->tb6_lock); | ||
9355 | + pcpu_rt = rt6_make_pcpu_route(rt); | ||
9356 | + rt6_release(rt); | ||
9357 | + } else { | ||
9358 | + /* rt is already removed from tree */ | ||
9359 | + read_unlock_bh(&table->tb6_lock); | ||
9360 | + pcpu_rt = net->ipv6.ip6_null_entry; | ||
9361 | + dst_hold(&pcpu_rt->dst); | ||
9362 | + } | ||
9363 | } | ||
9364 | |||
9365 | trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6); | ||
9366 | return pcpu_rt; | ||
9367 | - | ||
9368 | } | ||
9369 | } | ||
9370 | EXPORT_SYMBOL_GPL(ip6_pol_route); | ||
9371 | diff --git a/net/sctp/stream.c b/net/sctp/stream.c | ||
9372 | index fa8371ff05c4..724adf2786a2 100644 | ||
9373 | --- a/net/sctp/stream.c | ||
9374 | +++ b/net/sctp/stream.c | ||
9375 | @@ -40,9 +40,14 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, | ||
9376 | { | ||
9377 | int i; | ||
9378 | |||
9379 | + gfp |= __GFP_NOWARN; | ||
9380 | + | ||
9381 | /* Initial stream->out size may be very big, so free it and alloc | ||
9382 | - * a new one with new outcnt to save memory. | ||
9383 | + * a new one with new outcnt to save memory if needed. | ||
9384 | */ | ||
9385 | + if (outcnt == stream->outcnt) | ||
9386 | + goto in; | ||
9387 | + | ||
9388 | kfree(stream->out); | ||
9389 | |||
9390 | stream->out = kcalloc(outcnt, sizeof(*stream->out), gfp); | ||
9391 | @@ -53,6 +58,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, | ||
9392 | for (i = 0; i < stream->outcnt; i++) | ||
9393 | stream->out[i].state = SCTP_STREAM_OPEN; | ||
9394 | |||
9395 | +in: | ||
9396 | if (!incnt) | ||
9397 | return 0; | ||
9398 | |||
9399 | diff --git a/scripts/Makefile.build b/scripts/Makefile.build | ||
9400 | index bb831d49bcfd..e63af4e19382 100644 | ||
9401 | --- a/scripts/Makefile.build | ||
9402 | +++ b/scripts/Makefile.build | ||
9403 | @@ -259,7 +259,7 @@ ifneq ($(SKIP_STACK_VALIDATION),1) | ||
9404 | |||
9405 | __objtool_obj := $(objtree)/tools/objtool/objtool | ||
9406 | |||
9407 | -objtool_args = $(if $(CONFIG_ORC_UNWINDER),orc generate,check) | ||
9408 | +objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check) | ||
9409 | |||
9410 | ifndef CONFIG_FRAME_POINTER | ||
9411 | objtool_args += --no-fp | ||
9412 | diff --git a/scripts/headers_install.sh b/scripts/headers_install.sh | ||
9413 | index 4d1ea96e8794..a18bca720995 100755 | ||
9414 | --- a/scripts/headers_install.sh | ||
9415 | +++ b/scripts/headers_install.sh | ||
9416 | @@ -34,7 +34,7 @@ do | ||
9417 | sed -r \ | ||
9418 | -e 's/([ \t(])(__user|__force|__iomem)[ \t]/\1/g' \ | ||
9419 | -e 's/__attribute_const__([ \t]|$)/\1/g' \ | ||
9420 | - -e 's@^#include <linux/compiler.h>@@' \ | ||
9421 | + -e 's@^#include <linux/compiler(|_types).h>@@' \ | ||
9422 | -e 's/(^|[^a-zA-Z0-9])__packed([^a-zA-Z0-9_]|$)/\1__attribute__((packed))\2/g' \ | ||
9423 | -e 's/(^|[ \t(])(inline|asm|volatile)([ \t(]|$)/\1__\2__\3/g' \ | ||
9424 | -e 's@#(ifndef|define|endif[ \t]*/[*])[ \t]*_UAPI@#\1 @' \ | ||
9425 | diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c | ||
9426 | index 549c269acc7d..18933bf6473f 100644 | ||
9427 | --- a/sound/soc/codecs/msm8916-wcd-analog.c | ||
9428 | +++ b/sound/soc/codecs/msm8916-wcd-analog.c | ||
9429 | @@ -104,7 +104,7 @@ | ||
9430 | #define CDC_A_MICB_1_VAL (0xf141) | ||
9431 | #define MICB_MIN_VAL 1600 | ||
9432 | #define MICB_STEP_SIZE 50 | ||
9433 | -#define MICB_VOLTAGE_REGVAL(v) ((v - MICB_MIN_VAL)/MICB_STEP_SIZE) | ||
9434 | +#define MICB_VOLTAGE_REGVAL(v) (((v - MICB_MIN_VAL)/MICB_STEP_SIZE) << 3) | ||
9435 | #define MICB_1_VAL_MICB_OUT_VAL_MASK GENMASK(7, 3) | ||
9436 | #define MICB_1_VAL_MICB_OUT_VAL_V2P70V ((0x16) << 3) | ||
9437 | #define MICB_1_VAL_MICB_OUT_VAL_V1P80V ((0x4) << 3) | ||
9438 | @@ -349,8 +349,9 @@ static void pm8916_wcd_analog_micbias_enable(struct snd_soc_codec *codec) | ||
9439 | | MICB_1_CTL_EXT_PRECHARG_EN_ENABLE); | ||
9440 | |||
9441 | if (wcd->micbias_mv) { | ||
9442 | - snd_soc_write(codec, CDC_A_MICB_1_VAL, | ||
9443 | - MICB_VOLTAGE_REGVAL(wcd->micbias_mv)); | ||
9444 | + snd_soc_update_bits(codec, CDC_A_MICB_1_VAL, | ||
9445 | + MICB_1_VAL_MICB_OUT_VAL_MASK, | ||
9446 | + MICB_VOLTAGE_REGVAL(wcd->micbias_mv)); | ||
9447 | /* | ||
9448 | * Special headset needs MICBIAS as 2.7V so wait for | ||
9449 | * 50 msec for the MICBIAS to reach 2.7 volts. | ||
9450 | @@ -1241,6 +1242,8 @@ static const struct of_device_id pm8916_wcd_analog_spmi_match_table[] = { | ||
9451 | { } | ||
9452 | }; | ||
9453 | |||
9454 | +MODULE_DEVICE_TABLE(of, pm8916_wcd_analog_spmi_match_table); | ||
9455 | + | ||
9456 | static struct platform_driver pm8916_wcd_analog_spmi_driver = { | ||
9457 | .driver = { | ||
9458 | .name = "qcom,pm8916-wcd-spmi-codec", | ||
9459 | diff --git a/sound/soc/img/img-parallel-out.c b/sound/soc/img/img-parallel-out.c | ||
9460 | index 23b0f0f6ec9c..2fc8a6372206 100644 | ||
9461 | --- a/sound/soc/img/img-parallel-out.c | ||
9462 | +++ b/sound/soc/img/img-parallel-out.c | ||
9463 | @@ -164,9 +164,11 @@ static int img_prl_out_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) | ||
9464 | return -EINVAL; | ||
9465 | } | ||
9466 | |||
9467 | + pm_runtime_get_sync(prl->dev); | ||
9468 | reg = img_prl_out_readl(prl, IMG_PRL_OUT_CTL); | ||
9469 | reg = (reg & ~IMG_PRL_OUT_CTL_EDGE_MASK) | control_set; | ||
9470 | img_prl_out_writel(prl, reg, IMG_PRL_OUT_CTL); | ||
9471 | + pm_runtime_put(prl->dev); | ||
9472 | |||
9473 | return 0; | ||
9474 | } | ||
9475 | diff --git a/tools/objtool/check.c b/tools/objtool/check.c | ||
9476 | index c0e26ad1fa7e..9b341584eb1b 100644 | ||
9477 | --- a/tools/objtool/check.c | ||
9478 | +++ b/tools/objtool/check.c | ||
9479 | @@ -1757,11 +1757,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, | ||
9480 | if (insn->dead_end) | ||
9481 | return 0; | ||
9482 | |||
9483 | - insn = next_insn; | ||
9484 | - if (!insn) { | ||
9485 | + if (!next_insn) { | ||
9486 | + if (state.cfa.base == CFI_UNDEFINED) | ||
9487 | + return 0; | ||
9488 | WARN("%s: unexpected end of section", sec->name); | ||
9489 | return 1; | ||
9490 | } | ||
9491 | + | ||
9492 | + insn = next_insn; | ||
9493 | } | ||
9494 | |||
9495 | return 0; | ||
9496 | diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c | ||
9497 | index 31e0f9143840..07f329919828 100644 | ||
9498 | --- a/tools/objtool/objtool.c | ||
9499 | +++ b/tools/objtool/objtool.c | ||
9500 | @@ -70,7 +70,7 @@ static void cmd_usage(void) | ||
9501 | |||
9502 | printf("\n"); | ||
9503 | |||
9504 | - exit(1); | ||
9505 | + exit(129); | ||
9506 | } | ||
9507 | |||
9508 | static void handle_options(int *argc, const char ***argv) | ||
9509 | @@ -86,9 +86,7 @@ static void handle_options(int *argc, const char ***argv) | ||
9510 | break; | ||
9511 | } else { | ||
9512 | fprintf(stderr, "Unknown option: %s\n", cmd); | ||
9513 | - fprintf(stderr, "\n Usage: %s\n", | ||
9514 | - objtool_usage_string); | ||
9515 | - exit(1); | ||
9516 | + cmd_usage(); | ||
9517 | } | ||
9518 | |||
9519 | (*argv)++; | ||
9520 | diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c | ||
9521 | index 64ae21f64489..7a2d221c4702 100644 | ||
9522 | --- a/tools/testing/selftests/bpf/test_verifier.c | ||
9523 | +++ b/tools/testing/selftests/bpf/test_verifier.c | ||
9524 | @@ -606,7 +606,6 @@ static struct bpf_test tests[] = { | ||
9525 | }, | ||
9526 | .errstr = "misaligned stack access", | ||
9527 | .result = REJECT, | ||
9528 | - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, | ||
9529 | }, | ||
9530 | { | ||
9531 | "invalid map_fd for function call", | ||
9532 | @@ -1797,7 +1796,6 @@ static struct bpf_test tests[] = { | ||
9533 | }, | ||
9534 | .result = REJECT, | ||
9535 | .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8", | ||
9536 | - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, | ||
9537 | }, | ||
9538 | { | ||
9539 | "PTR_TO_STACK store/load - bad alignment on reg", | ||
9540 | @@ -1810,7 +1808,6 @@ static struct bpf_test tests[] = { | ||
9541 | }, | ||
9542 | .result = REJECT, | ||
9543 | .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8", | ||
9544 | - .flags = F_LOAD_WITH_STRICT_ALIGNMENT, | ||
9545 | }, | ||
9546 | { | ||
9547 | "PTR_TO_STACK store/load - out of bounds low", | ||
9548 | @@ -6115,7 +6112,7 @@ static struct bpf_test tests[] = { | ||
9549 | BPF_EXIT_INSN(), | ||
9550 | }, | ||
9551 | .fixup_map1 = { 3 }, | ||
9552 | - .errstr = "R0 min value is negative", | ||
9553 | + .errstr = "unbounded min value", | ||
9554 | .result = REJECT, | ||
9555 | }, | ||
9556 | { | ||
9557 | @@ -6139,7 +6136,7 @@ static struct bpf_test tests[] = { | ||
9558 | BPF_EXIT_INSN(), | ||
9559 | }, | ||
9560 | .fixup_map1 = { 3 }, | ||
9561 | - .errstr = "R0 min value is negative", | ||
9562 | + .errstr = "unbounded min value", | ||
9563 | .result = REJECT, | ||
9564 | }, | ||
9565 | { | ||
9566 | @@ -6165,7 +6162,7 @@ static struct bpf_test tests[] = { | ||
9567 | BPF_EXIT_INSN(), | ||
9568 | }, | ||
9569 | .fixup_map1 = { 3 }, | ||
9570 | - .errstr = "R8 invalid mem access 'inv'", | ||
9571 | + .errstr = "unbounded min value", | ||
9572 | .result = REJECT, | ||
9573 | }, | ||
9574 | { | ||
9575 | @@ -6190,7 +6187,7 @@ static struct bpf_test tests[] = { | ||
9576 | BPF_EXIT_INSN(), | ||
9577 | }, | ||
9578 | .fixup_map1 = { 3 }, | ||
9579 | - .errstr = "R8 invalid mem access 'inv'", | ||
9580 | + .errstr = "unbounded min value", | ||
9581 | .result = REJECT, | ||
9582 | }, | ||
9583 | { | ||
9584 | @@ -6238,7 +6235,7 @@ static struct bpf_test tests[] = { | ||
9585 | BPF_EXIT_INSN(), | ||
9586 | }, | ||
9587 | .fixup_map1 = { 3 }, | ||
9588 | - .errstr = "R0 min value is negative", | ||
9589 | + .errstr = "unbounded min value", | ||
9590 | .result = REJECT, | ||
9591 | }, | ||
9592 | { | ||
9593 | @@ -6309,7 +6306,7 @@ static struct bpf_test tests[] = { | ||
9594 | BPF_EXIT_INSN(), | ||
9595 | }, | ||
9596 | .fixup_map1 = { 3 }, | ||
9597 | - .errstr = "R0 min value is negative", | ||
9598 | + .errstr = "unbounded min value", | ||
9599 | .result = REJECT, | ||
9600 | }, | ||
9601 | { | ||
9602 | @@ -6360,7 +6357,7 @@ static struct bpf_test tests[] = { | ||
9603 | BPF_EXIT_INSN(), | ||
9604 | }, | ||
9605 | .fixup_map1 = { 3 }, | ||
9606 | - .errstr = "R0 min value is negative", | ||
9607 | + .errstr = "unbounded min value", | ||
9608 | .result = REJECT, | ||
9609 | }, | ||
9610 | { | ||
9611 | @@ -6387,7 +6384,7 @@ static struct bpf_test tests[] = { | ||
9612 | BPF_EXIT_INSN(), | ||
9613 | }, | ||
9614 | .fixup_map1 = { 3 }, | ||
9615 | - .errstr = "R0 min value is negative", | ||
9616 | + .errstr = "unbounded min value", | ||
9617 | .result = REJECT, | ||
9618 | }, | ||
9619 | { | ||
9620 | @@ -6413,7 +6410,7 @@ static struct bpf_test tests[] = { | ||
9621 | BPF_EXIT_INSN(), | ||
9622 | }, | ||
9623 | .fixup_map1 = { 3 }, | ||
9624 | - .errstr = "R0 min value is negative", | ||
9625 | + .errstr = "unbounded min value", | ||
9626 | .result = REJECT, | ||
9627 | }, | ||
9628 | { | ||
9629 | @@ -6442,7 +6439,7 @@ static struct bpf_test tests[] = { | ||
9630 | BPF_EXIT_INSN(), | ||
9631 | }, | ||
9632 | .fixup_map1 = { 3 }, | ||
9633 | - .errstr = "R0 min value is negative", | ||
9634 | + .errstr = "unbounded min value", | ||
9635 | .result = REJECT, | ||
9636 | }, | ||
9637 | { | ||
9638 | @@ -6472,7 +6469,7 @@ static struct bpf_test tests[] = { | ||
9639 | BPF_JMP_IMM(BPF_JA, 0, 0, -7), | ||
9640 | }, | ||
9641 | .fixup_map1 = { 4 }, | ||
9642 | - .errstr = "R0 min value is negative", | ||
9643 | + .errstr = "unbounded min value", | ||
9644 | .result = REJECT, | ||
9645 | }, | ||
9646 | { | ||
9647 | @@ -6500,8 +6497,7 @@ static struct bpf_test tests[] = { | ||
9648 | BPF_EXIT_INSN(), | ||
9649 | }, | ||
9650 | .fixup_map1 = { 3 }, | ||
9651 | - .errstr_unpriv = "R0 pointer comparison prohibited", | ||
9652 | - .errstr = "R0 min value is negative", | ||
9653 | + .errstr = "unbounded min value", | ||
9654 | .result = REJECT, | ||
9655 | .result_unpriv = REJECT, | ||
9656 | }, | ||
9657 | @@ -6556,6 +6552,462 @@ static struct bpf_test tests[] = { | ||
9658 | .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", | ||
9659 | .result = REJECT, | ||
9660 | }, | ||
9661 | + { | ||
9662 | + "bounds check based on zero-extended MOV", | ||
9663 | + .insns = { | ||
9664 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
9665 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
9666 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
9667 | + BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
9668 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
9669 | + BPF_FUNC_map_lookup_elem), | ||
9670 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), | ||
9671 | + /* r2 = 0x0000'0000'ffff'ffff */ | ||
9672 | + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff), | ||
9673 | + /* r2 = 0 */ | ||
9674 | + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32), | ||
9675 | + /* no-op */ | ||
9676 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), | ||
9677 | + /* access at offset 0 */ | ||
9678 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), | ||
9679 | + /* exit */ | ||
9680 | + BPF_MOV64_IMM(BPF_REG_0, 0), | ||
9681 | + BPF_EXIT_INSN(), | ||
9682 | + }, | ||
9683 | + .fixup_map1 = { 3 }, | ||
9684 | + .result = ACCEPT | ||
9685 | + }, | ||
9686 | + { | ||
9687 | + "bounds check based on sign-extended MOV. test1", | ||
9688 | + .insns = { | ||
9689 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
9690 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
9691 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
9692 | + BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
9693 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
9694 | + BPF_FUNC_map_lookup_elem), | ||
9695 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), | ||
9696 | + /* r2 = 0xffff'ffff'ffff'ffff */ | ||
9697 | + BPF_MOV64_IMM(BPF_REG_2, 0xffffffff), | ||
9698 | + /* r2 = 0xffff'ffff */ | ||
9699 | + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32), | ||
9700 | + /* r0 = <oob pointer> */ | ||
9701 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), | ||
9702 | + /* access to OOB pointer */ | ||
9703 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), | ||
9704 | + /* exit */ | ||
9705 | + BPF_MOV64_IMM(BPF_REG_0, 0), | ||
9706 | + BPF_EXIT_INSN(), | ||
9707 | + }, | ||
9708 | + .fixup_map1 = { 3 }, | ||
9709 | + .errstr = "map_value pointer and 4294967295", | ||
9710 | + .result = REJECT | ||
9711 | + }, | ||
9712 | + { | ||
9713 | + "bounds check based on sign-extended MOV. test2", | ||
9714 | + .insns = { | ||
9715 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
9716 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
9717 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
9718 | + BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
9719 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
9720 | + BPF_FUNC_map_lookup_elem), | ||
9721 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), | ||
9722 | + /* r2 = 0xffff'ffff'ffff'ffff */ | ||
9723 | + BPF_MOV64_IMM(BPF_REG_2, 0xffffffff), | ||
9724 | + /* r2 = 0xfff'ffff */ | ||
9725 | + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36), | ||
9726 | + /* r0 = <oob pointer> */ | ||
9727 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), | ||
9728 | + /* access to OOB pointer */ | ||
9729 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), | ||
9730 | + /* exit */ | ||
9731 | + BPF_MOV64_IMM(BPF_REG_0, 0), | ||
9732 | + BPF_EXIT_INSN(), | ||
9733 | + }, | ||
9734 | + .fixup_map1 = { 3 }, | ||
9735 | + .errstr = "R0 min value is outside of the array range", | ||
9736 | + .result = REJECT | ||
9737 | + }, | ||
9738 | + { | ||
9739 | + "bounds check based on reg_off + var_off + insn_off. test1", | ||
9740 | + .insns = { | ||
9741 | + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, | ||
9742 | + offsetof(struct __sk_buff, mark)), | ||
9743 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
9744 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
9745 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
9746 | + BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
9747 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
9748 | + BPF_FUNC_map_lookup_elem), | ||
9749 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), | ||
9750 | + BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1), | ||
9751 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1), | ||
9752 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6), | ||
9753 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1), | ||
9754 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3), | ||
9755 | + BPF_MOV64_IMM(BPF_REG_0, 0), | ||
9756 | + BPF_EXIT_INSN(), | ||
9757 | + }, | ||
9758 | + .fixup_map1 = { 4 }, | ||
9759 | + .errstr = "value_size=8 off=1073741825", | ||
9760 | + .result = REJECT, | ||
9761 | + .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
9762 | + }, | ||
9763 | + { | ||
9764 | + "bounds check based on reg_off + var_off + insn_off. test2", | ||
9765 | + .insns = { | ||
9766 | + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, | ||
9767 | + offsetof(struct __sk_buff, mark)), | ||
9768 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
9769 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
9770 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
9771 | + BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
9772 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
9773 | + BPF_FUNC_map_lookup_elem), | ||
9774 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), | ||
9775 | + BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1), | ||
9776 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1), | ||
9777 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6), | ||
9778 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1), | ||
9779 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3), | ||
9780 | + BPF_MOV64_IMM(BPF_REG_0, 0), | ||
9781 | + BPF_EXIT_INSN(), | ||
9782 | + }, | ||
9783 | + .fixup_map1 = { 4 }, | ||
9784 | + .errstr = "value 1073741823", | ||
9785 | + .result = REJECT, | ||
9786 | + .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
9787 | + }, | ||
9788 | + { | ||
9789 | + "bounds check after truncation of non-boundary-crossing range", | ||
9790 | + .insns = { | ||
9791 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
9792 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
9793 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
9794 | + BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
9795 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
9796 | + BPF_FUNC_map_lookup_elem), | ||
9797 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), | ||
9798 | + /* r1 = [0x00, 0xff] */ | ||
9799 | + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), | ||
9800 | + BPF_MOV64_IMM(BPF_REG_2, 1), | ||
9801 | + /* r2 = 0x10'0000'0000 */ | ||
9802 | + BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36), | ||
9803 | + /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */ | ||
9804 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), | ||
9805 | + /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */ | ||
9806 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), | ||
9807 | + /* r1 = [0x00, 0xff] */ | ||
9808 | + BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff), | ||
9809 | + /* r1 = 0 */ | ||
9810 | + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), | ||
9811 | + /* no-op */ | ||
9812 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), | ||
9813 | + /* access at offset 0 */ | ||
9814 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), | ||
9815 | + /* exit */ | ||
9816 | + BPF_MOV64_IMM(BPF_REG_0, 0), | ||
9817 | + BPF_EXIT_INSN(), | ||
9818 | + }, | ||
9819 | + .fixup_map1 = { 3 }, | ||
9820 | + .result = ACCEPT | ||
9821 | + }, | ||
9822 | + { | ||
9823 | + "bounds check after truncation of boundary-crossing range (1)", | ||
9824 | + .insns = { | ||
9825 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
9826 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
9827 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
9828 | + BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
9829 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
9830 | + BPF_FUNC_map_lookup_elem), | ||
9831 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), | ||
9832 | + /* r1 = [0x00, 0xff] */ | ||
9833 | + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), | ||
9834 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), | ||
9835 | + /* r1 = [0xffff'ff80, 0x1'0000'007f] */ | ||
9836 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), | ||
9837 | + /* r1 = [0xffff'ff80, 0xffff'ffff] or | ||
9838 | + * [0x0000'0000, 0x0000'007f] | ||
9839 | + */ | ||
9840 | + BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0), | ||
9841 | + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), | ||
9842 | + /* r1 = [0x00, 0xff] or | ||
9843 | + * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff] | ||
9844 | + */ | ||
9845 | + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), | ||
9846 | + /* r1 = 0 or | ||
9847 | + * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff] | ||
9848 | + */ | ||
9849 | + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), | ||
9850 | + /* no-op or OOB pointer computation */ | ||
9851 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), | ||
9852 | + /* potentially OOB access */ | ||
9853 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), | ||
9854 | + /* exit */ | ||
9855 | + BPF_MOV64_IMM(BPF_REG_0, 0), | ||
9856 | + BPF_EXIT_INSN(), | ||
9857 | + }, | ||
9858 | + .fixup_map1 = { 3 }, | ||
9859 | + /* not actually fully unbounded, but the bound is very high */ | ||
9860 | + .errstr = "R0 unbounded memory access", | ||
9861 | + .result = REJECT | ||
9862 | + }, | ||
9863 | + { | ||
9864 | + "bounds check after truncation of boundary-crossing range (2)", | ||
9865 | + .insns = { | ||
9866 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
9867 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
9868 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
9869 | + BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
9870 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
9871 | + BPF_FUNC_map_lookup_elem), | ||
9872 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), | ||
9873 | + /* r1 = [0x00, 0xff] */ | ||
9874 | + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), | ||
9875 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), | ||
9876 | + /* r1 = [0xffff'ff80, 0x1'0000'007f] */ | ||
9877 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), | ||
9878 | + /* r1 = [0xffff'ff80, 0xffff'ffff] or | ||
9879 | + * [0x0000'0000, 0x0000'007f] | ||
9880 | + * difference to previous test: truncation via MOV32 | ||
9881 | + * instead of ALU32. | ||
9882 | + */ | ||
9883 | + BPF_MOV32_REG(BPF_REG_1, BPF_REG_1), | ||
9884 | + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), | ||
9885 | + /* r1 = [0x00, 0xff] or | ||
9886 | + * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff] | ||
9887 | + */ | ||
9888 | + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), | ||
9889 | + /* r1 = 0 or | ||
9890 | + * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff] | ||
9891 | + */ | ||
9892 | + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), | ||
9893 | + /* no-op or OOB pointer computation */ | ||
9894 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), | ||
9895 | + /* potentially OOB access */ | ||
9896 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), | ||
9897 | + /* exit */ | ||
9898 | + BPF_MOV64_IMM(BPF_REG_0, 0), | ||
9899 | + BPF_EXIT_INSN(), | ||
9900 | + }, | ||
9901 | + .fixup_map1 = { 3 }, | ||
9902 | + /* not actually fully unbounded, but the bound is very high */ | ||
9903 | + .errstr = "R0 unbounded memory access", | ||
9904 | + .result = REJECT | ||
9905 | + }, | ||
9906 | + { | ||
9907 | + "bounds check after wrapping 32-bit addition", | ||
9908 | + .insns = { | ||
9909 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
9910 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
9911 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
9912 | + BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
9913 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
9914 | + BPF_FUNC_map_lookup_elem), | ||
9915 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), | ||
9916 | + /* r1 = 0x7fff'ffff */ | ||
9917 | + BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff), | ||
9918 | + /* r1 = 0xffff'fffe */ | ||
9919 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), | ||
9920 | + /* r1 = 0 */ | ||
9921 | + BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2), | ||
9922 | + /* no-op */ | ||
9923 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), | ||
9924 | + /* access at offset 0 */ | ||
9925 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), | ||
9926 | + /* exit */ | ||
9927 | + BPF_MOV64_IMM(BPF_REG_0, 0), | ||
9928 | + BPF_EXIT_INSN(), | ||
9929 | + }, | ||
9930 | + .fixup_map1 = { 3 }, | ||
9931 | + .result = ACCEPT | ||
9932 | + }, | ||
9933 | + { | ||
9934 | + "bounds check after shift with oversized count operand", | ||
9935 | + .insns = { | ||
9936 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
9937 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
9938 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
9939 | + BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
9940 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
9941 | + BPF_FUNC_map_lookup_elem), | ||
9942 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), | ||
9943 | + BPF_MOV64_IMM(BPF_REG_2, 32), | ||
9944 | + BPF_MOV64_IMM(BPF_REG_1, 1), | ||
9945 | + /* r1 = (u32)1 << (u32)32 = ? */ | ||
9946 | + BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2), | ||
9947 | + /* r1 = [0x0000, 0xffff] */ | ||
9948 | + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff), | ||
9949 | + /* computes unknown pointer, potentially OOB */ | ||
9950 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), | ||
9951 | + /* potentially OOB access */ | ||
9952 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), | ||
9953 | + /* exit */ | ||
9954 | + BPF_MOV64_IMM(BPF_REG_0, 0), | ||
9955 | + BPF_EXIT_INSN(), | ||
9956 | + }, | ||
9957 | + .fixup_map1 = { 3 }, | ||
9958 | + .errstr = "R0 max value is outside of the array range", | ||
9959 | + .result = REJECT | ||
9960 | + }, | ||
9961 | + { | ||
9962 | + "bounds check after right shift of maybe-negative number", | ||
9963 | + .insns = { | ||
9964 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
9965 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
9966 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
9967 | + BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
9968 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
9969 | + BPF_FUNC_map_lookup_elem), | ||
9970 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), | ||
9971 | + /* r1 = [0x00, 0xff] */ | ||
9972 | + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), | ||
9973 | + /* r1 = [-0x01, 0xfe] */ | ||
9974 | + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1), | ||
9975 | + /* r1 = 0 or 0xff'ffff'ffff'ffff */ | ||
9976 | + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), | ||
9977 | + /* r1 = 0 or 0xffff'ffff'ffff */ | ||
9978 | + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), | ||
9979 | + /* computes unknown pointer, potentially OOB */ | ||
9980 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), | ||
9981 | + /* potentially OOB access */ | ||
9982 | + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), | ||
9983 | + /* exit */ | ||
9984 | + BPF_MOV64_IMM(BPF_REG_0, 0), | ||
9985 | + BPF_EXIT_INSN(), | ||
9986 | + }, | ||
9987 | + .fixup_map1 = { 3 }, | ||
9988 | + .errstr = "R0 unbounded memory access", | ||
9989 | + .result = REJECT | ||
9990 | + }, | ||
9991 | + { | ||
9992 | + "bounds check map access with off+size signed 32bit overflow. test1", | ||
9993 | + .insns = { | ||
9994 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
9995 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
9996 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
9997 | + BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
9998 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
9999 | + BPF_FUNC_map_lookup_elem), | ||
10000 | + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), | ||
10001 | + BPF_EXIT_INSN(), | ||
10002 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe), | ||
10003 | + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), | ||
10004 | + BPF_JMP_A(0), | ||
10005 | + BPF_EXIT_INSN(), | ||
10006 | + }, | ||
10007 | + .fixup_map1 = { 3 }, | ||
10008 | + .errstr = "map_value pointer and 2147483646", | ||
10009 | + .result = REJECT | ||
10010 | + }, | ||
10011 | + { | ||
10012 | + "bounds check map access with off+size signed 32bit overflow. test2", | ||
10013 | + .insns = { | ||
10014 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
10015 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
10016 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
10017 | + BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
10018 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
10019 | + BPF_FUNC_map_lookup_elem), | ||
10020 | + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), | ||
10021 | + BPF_EXIT_INSN(), | ||
10022 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff), | ||
10023 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff), | ||
10024 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff), | ||
10025 | + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), | ||
10026 | + BPF_JMP_A(0), | ||
10027 | + BPF_EXIT_INSN(), | ||
10028 | + }, | ||
10029 | + .fixup_map1 = { 3 }, | ||
10030 | + .errstr = "pointer offset 1073741822", | ||
10031 | + .result = REJECT | ||
10032 | + }, | ||
10033 | + { | ||
10034 | + "bounds check map access with off+size signed 32bit overflow. test3", | ||
10035 | + .insns = { | ||
10036 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
10037 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
10038 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
10039 | + BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
10040 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
10041 | + BPF_FUNC_map_lookup_elem), | ||
10042 | + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), | ||
10043 | + BPF_EXIT_INSN(), | ||
10044 | + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff), | ||
10045 | + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff), | ||
10046 | + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2), | ||
10047 | + BPF_JMP_A(0), | ||
10048 | + BPF_EXIT_INSN(), | ||
10049 | + }, | ||
10050 | + .fixup_map1 = { 3 }, | ||
10051 | + .errstr = "pointer offset -1073741822", | ||
10052 | + .result = REJECT | ||
10053 | + }, | ||
10054 | + { | ||
10055 | + "bounds check map access with off+size signed 32bit overflow. test4", | ||
10056 | + .insns = { | ||
10057 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
10058 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
10059 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
10060 | + BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
10061 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
10062 | + BPF_FUNC_map_lookup_elem), | ||
10063 | + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), | ||
10064 | + BPF_EXIT_INSN(), | ||
10065 | + BPF_MOV64_IMM(BPF_REG_1, 1000000), | ||
10066 | + BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000), | ||
10067 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), | ||
10068 | + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2), | ||
10069 | + BPF_JMP_A(0), | ||
10070 | + BPF_EXIT_INSN(), | ||
10071 | + }, | ||
10072 | + .fixup_map1 = { 3 }, | ||
10073 | + .errstr = "map_value pointer and 1000000000000", | ||
10074 | + .result = REJECT | ||
10075 | + }, | ||
10076 | + { | ||
10077 | + "pointer/scalar confusion in state equality check (way 1)", | ||
10078 | + .insns = { | ||
10079 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
10080 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
10081 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
10082 | + BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
10083 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
10084 | + BPF_FUNC_map_lookup_elem), | ||
10085 | + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), | ||
10086 | + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), | ||
10087 | + BPF_JMP_A(1), | ||
10088 | + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), | ||
10089 | + BPF_JMP_A(0), | ||
10090 | + BPF_EXIT_INSN(), | ||
10091 | + }, | ||
10092 | + .fixup_map1 = { 3 }, | ||
10093 | + .result = ACCEPT, | ||
10094 | + .result_unpriv = REJECT, | ||
10095 | + .errstr_unpriv = "R0 leaks addr as return value" | ||
10096 | + }, | ||
10097 | + { | ||
10098 | + "pointer/scalar confusion in state equality check (way 2)", | ||
10099 | + .insns = { | ||
10100 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
10101 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
10102 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
10103 | + BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
10104 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
10105 | + BPF_FUNC_map_lookup_elem), | ||
10106 | + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), | ||
10107 | + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), | ||
10108 | + BPF_JMP_A(1), | ||
10109 | + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), | ||
10110 | + BPF_EXIT_INSN(), | ||
10111 | + }, | ||
10112 | + .fixup_map1 = { 3 }, | ||
10113 | + .result = ACCEPT, | ||
10114 | + .result_unpriv = REJECT, | ||
10115 | + .errstr_unpriv = "R0 leaks addr as return value" | ||
10116 | + }, | ||
10117 | { | ||
10118 | "variable-offset ctx access", | ||
10119 | .insns = { | ||
10120 | @@ -6597,6 +7049,71 @@ static struct bpf_test tests[] = { | ||
10121 | .result = REJECT, | ||
10122 | .prog_type = BPF_PROG_TYPE_LWT_IN, | ||
10123 | }, | ||
10124 | + { | ||
10125 | + "indirect variable-offset stack access", | ||
10126 | + .insns = { | ||
10127 | + /* Fill the top 8 bytes of the stack */ | ||
10128 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
10129 | + /* Get an unknown value */ | ||
10130 | + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), | ||
10131 | + /* Make it small and 4-byte aligned */ | ||
10132 | + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), | ||
10133 | + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8), | ||
10134 | + /* add it to fp. We now have either fp-4 or fp-8, but | ||
10135 | + * we don't know which | ||
10136 | + */ | ||
10137 | + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), | ||
10138 | + /* dereference it indirectly */ | ||
10139 | + BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
10140 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
10141 | + BPF_FUNC_map_lookup_elem), | ||
10142 | + BPF_MOV64_IMM(BPF_REG_0, 0), | ||
10143 | + BPF_EXIT_INSN(), | ||
10144 | + }, | ||
10145 | + .fixup_map1 = { 5 }, | ||
10146 | + .errstr = "variable stack read R2", | ||
10147 | + .result = REJECT, | ||
10148 | + .prog_type = BPF_PROG_TYPE_LWT_IN, | ||
10149 | + }, | ||
10150 | + { | ||
10151 | + "direct stack access with 32-bit wraparound. test1", | ||
10152 | + .insns = { | ||
10153 | + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), | ||
10154 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), | ||
10155 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff), | ||
10156 | + BPF_MOV32_IMM(BPF_REG_0, 0), | ||
10157 | + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), | ||
10158 | + BPF_EXIT_INSN() | ||
10159 | + }, | ||
10160 | + .errstr = "fp pointer and 2147483647", | ||
10161 | + .result = REJECT | ||
10162 | + }, | ||
10163 | + { | ||
10164 | + "direct stack access with 32-bit wraparound. test2", | ||
10165 | + .insns = { | ||
10166 | + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), | ||
10167 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff), | ||
10168 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff), | ||
10169 | + BPF_MOV32_IMM(BPF_REG_0, 0), | ||
10170 | + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), | ||
10171 | + BPF_EXIT_INSN() | ||
10172 | + }, | ||
10173 | + .errstr = "fp pointer and 1073741823", | ||
10174 | + .result = REJECT | ||
10175 | + }, | ||
10176 | + { | ||
10177 | + "direct stack access with 32-bit wraparound. test3", | ||
10178 | + .insns = { | ||
10179 | + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), | ||
10180 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff), | ||
10181 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff), | ||
10182 | + BPF_MOV32_IMM(BPF_REG_0, 0), | ||
10183 | + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), | ||
10184 | + BPF_EXIT_INSN() | ||
10185 | + }, | ||
10186 | + .errstr = "fp pointer offset 1073741822", | ||
10187 | + .result = REJECT | ||
10188 | + }, | ||
10189 | { | ||
10190 | "liveness pruning and write screening", | ||
10191 | .insns = { | ||
10192 | diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c | ||
10193 | index 2afc41a3730f..66e5ce5b91f0 100644 | ||
10194 | --- a/tools/testing/selftests/x86/ldt_gdt.c | ||
10195 | +++ b/tools/testing/selftests/x86/ldt_gdt.c | ||
10196 | @@ -137,30 +137,51 @@ static void check_valid_segment(uint16_t index, int ldt, | ||
10197 | } | ||
10198 | } | ||
10199 | |||
10200 | -static bool install_valid_mode(const struct user_desc *desc, uint32_t ar, | ||
10201 | - bool oldmode) | ||
10202 | +static bool install_valid_mode(const struct user_desc *d, uint32_t ar, | ||
10203 | + bool oldmode, bool ldt) | ||
10204 | { | ||
10205 | - int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, | ||
10206 | - desc, sizeof(*desc)); | ||
10207 | - if (ret < -1) | ||
10208 | - errno = -ret; | ||
10209 | + struct user_desc desc = *d; | ||
10210 | + int ret; | ||
10211 | + | ||
10212 | + if (!ldt) { | ||
10213 | +#ifndef __i386__ | ||
10214 | + /* No point testing set_thread_area in a 64-bit build */ | ||
10215 | + return false; | ||
10216 | +#endif | ||
10217 | + if (!gdt_entry_num) | ||
10218 | + return false; | ||
10219 | + desc.entry_number = gdt_entry_num; | ||
10220 | + | ||
10221 | + ret = syscall(SYS_set_thread_area, &desc); | ||
10222 | + } else { | ||
10223 | + ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, | ||
10224 | + &desc, sizeof(desc)); | ||
10225 | + | ||
10226 | + if (ret < -1) | ||
10227 | + errno = -ret; | ||
10228 | + | ||
10229 | + if (ret != 0 && errno == ENOSYS) { | ||
10230 | + printf("[OK]\tmodify_ldt returned -ENOSYS\n"); | ||
10231 | + return false; | ||
10232 | + } | ||
10233 | + } | ||
10234 | + | ||
10235 | if (ret == 0) { | ||
10236 | - uint32_t limit = desc->limit; | ||
10237 | - if (desc->limit_in_pages) | ||
10238 | + uint32_t limit = desc.limit; | ||
10239 | + if (desc.limit_in_pages) | ||
10240 | limit = (limit << 12) + 4095; | ||
10241 | - check_valid_segment(desc->entry_number, 1, ar, limit, true); | ||
10242 | + check_valid_segment(desc.entry_number, ldt, ar, limit, true); | ||
10243 | return true; | ||
10244 | - } else if (errno == ENOSYS) { | ||
10245 | - printf("[OK]\tmodify_ldt returned -ENOSYS\n"); | ||
10246 | - return false; | ||
10247 | } else { | ||
10248 | - if (desc->seg_32bit) { | ||
10249 | - printf("[FAIL]\tUnexpected modify_ldt failure %d\n", | ||
10250 | + if (desc.seg_32bit) { | ||
10251 | + printf("[FAIL]\tUnexpected %s failure %d\n", | ||
10252 | + ldt ? "modify_ldt" : "set_thread_area", | ||
10253 | errno); | ||
10254 | nerrs++; | ||
10255 | return false; | ||
10256 | } else { | ||
10257 | - printf("[OK]\tmodify_ldt rejected 16 bit segment\n"); | ||
10258 | + printf("[OK]\t%s rejected 16 bit segment\n", | ||
10259 | + ldt ? "modify_ldt" : "set_thread_area"); | ||
10260 | return false; | ||
10261 | } | ||
10262 | } | ||
10263 | @@ -168,7 +189,15 @@ static bool install_valid_mode(const struct user_desc *desc, uint32_t ar, | ||
10264 | |||
10265 | static bool install_valid(const struct user_desc *desc, uint32_t ar) | ||
10266 | { | ||
10267 | - return install_valid_mode(desc, ar, false); | ||
10268 | + bool ret = install_valid_mode(desc, ar, false, true); | ||
10269 | + | ||
10270 | + if (desc->contents <= 1 && desc->seg_32bit && | ||
10271 | + !desc->seg_not_present) { | ||
10272 | + /* Should work in the GDT, too. */ | ||
10273 | + install_valid_mode(desc, ar, false, false); | ||
10274 | + } | ||
10275 | + | ||
10276 | + return ret; | ||
10277 | } | ||
10278 | |||
10279 | static void install_invalid(const struct user_desc *desc, bool oldmode) | ||
10280 | diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c | ||
10281 | index 484e8820c382..2447d7c017e7 100644 | ||
10282 | --- a/virt/kvm/kvm_main.c | ||
10283 | +++ b/virt/kvm/kvm_main.c | ||
10284 | @@ -4018,7 +4018,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, | ||
10285 | if (!vcpu_align) | ||
10286 | vcpu_align = __alignof__(struct kvm_vcpu); | ||
10287 | kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, | ||
10288 | - 0, NULL); | ||
10289 | + SLAB_ACCOUNT, NULL); | ||
10290 | if (!kvm_vcpu_cache) { | ||
10291 | r = -ENOMEM; | ||
10292 | goto out_free_3; |