Magellan Linux

Contents of /trunk/kernel-alx-legacy/patches-4.9/0176-4.9.77-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3608 - (show annotations) (download)
Fri Aug 14 07:34:29 2020 UTC (3 years, 8 months ago) by niro
File size: 163168 byte(s)
-added kerenl-alx-legacy pkg
1 diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
2 index 498741737055..dfd56ec7a850 100644
3 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu
4 +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
5 @@ -350,3 +350,19 @@ Contact: Linux ARM Kernel Mailing list <linux-arm-kernel@lists.infradead.org>
6 Description: AArch64 CPU registers
7 'identification' directory exposes the CPU ID registers for
8 identifying model and revision of the CPU.
9 +
10 +What: /sys/devices/system/cpu/vulnerabilities
11 + /sys/devices/system/cpu/vulnerabilities/meltdown
12 + /sys/devices/system/cpu/vulnerabilities/spectre_v1
13 + /sys/devices/system/cpu/vulnerabilities/spectre_v2
14 +Date: January 2018
15 +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
16 +Description: Information about CPU vulnerabilities
17 +
18 + The files are named after the code names of CPU
19 + vulnerabilities. The output of those files reflects the
20 + state of the CPUs in the system. Possible output values:
21 +
22 + "Not affected" CPU is not affected by the vulnerability
23 + "Vulnerable" CPU is affected and no mitigation in effect
24 + "Mitigation: $M" CPU is affected and mitigation $M is in effect
25 diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
26 index 5d2676d043de..4c2667aa4634 100644
27 --- a/Documentation/kernel-parameters.txt
28 +++ b/Documentation/kernel-parameters.txt
29 @@ -2691,6 +2691,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
30 nosmt [KNL,S390] Disable symmetric multithreading (SMT).
31 Equivalent to smt=1.
32
33 + nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2
34 + (indirect branch prediction) vulnerability. System may
35 + allow data leaks with this option, which is equivalent
36 + to spectre_v2=off.
37 +
38 noxsave [BUGS=X86] Disables x86 extended register state save
39 and restore using xsave. The kernel will fallback to
40 enabling legacy floating-point and sse state.
41 @@ -2763,8 +2768,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
42
43 nojitter [IA-64] Disables jitter checking for ITC timers.
44
45 - nopti [X86-64] Disable KAISER isolation of kernel from user.
46 -
47 no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
48
49 no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
50 @@ -3327,11 +3330,20 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
51 pt. [PARIDE]
52 See Documentation/blockdev/paride.txt.
53
54 - pti= [X86_64]
55 - Control KAISER user/kernel address space isolation:
56 - on - enable
57 - off - disable
58 - auto - default setting
59 + pti= [X86_64] Control Page Table Isolation of user and
60 + kernel address spaces. Disabling this feature
61 + removes hardening, but improves performance of
62 + system calls and interrupts.
63 +
64 + on - unconditionally enable
65 + off - unconditionally disable
66 + auto - kernel detects whether your CPU model is
67 + vulnerable to issues that PTI mitigates
68 +
69 + Not specifying this option is equivalent to pti=auto.
70 +
71 + nopti [X86_64]
72 + Equivalent to pti=off
73
74 pty.legacy_count=
75 [KNL] Number of legacy pty's. Overwrites compiled-in
76 @@ -3937,6 +3949,29 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
77 sonypi.*= [HW] Sony Programmable I/O Control Device driver
78 See Documentation/laptops/sonypi.txt
79
80 + spectre_v2= [X86] Control mitigation of Spectre variant 2
81 + (indirect branch speculation) vulnerability.
82 +
83 + on - unconditionally enable
84 + off - unconditionally disable
85 + auto - kernel detects whether your CPU model is
86 + vulnerable
87 +
88 + Selecting 'on' will, and 'auto' may, choose a
89 + mitigation method at run time according to the
90 + CPU, the available microcode, the setting of the
91 + CONFIG_RETPOLINE configuration option, and the
92 + compiler with which the kernel was built.
93 +
94 + Specific mitigations can also be selected manually:
95 +
96 + retpoline - replace indirect branches
97 + retpoline,generic - google's original retpoline
98 + retpoline,amd - AMD-specific minimal thunk
99 +
100 + Not specifying this option is equivalent to
101 + spectre_v2=auto.
102 +
103 spia_io_base= [HW,MTD]
104 spia_fio_base=
105 spia_pedr=
106 diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt
107 new file mode 100644
108 index 000000000000..d11eff61fc9a
109 --- /dev/null
110 +++ b/Documentation/x86/pti.txt
111 @@ -0,0 +1,186 @@
112 +Overview
113 +========
114 +
115 +Page Table Isolation (pti, previously known as KAISER[1]) is a
116 +countermeasure against attacks on the shared user/kernel address
117 +space such as the "Meltdown" approach[2].
118 +
119 +To mitigate this class of attacks, we create an independent set of
120 +page tables for use only when running userspace applications. When
121 +the kernel is entered via syscalls, interrupts or exceptions, the
122 +page tables are switched to the full "kernel" copy. When the system
123 +switches back to user mode, the user copy is used again.
124 +
125 +The userspace page tables contain only a minimal amount of kernel
126 +data: only what is needed to enter/exit the kernel such as the
127 +entry/exit functions themselves and the interrupt descriptor table
128 +(IDT). There are a few strictly unnecessary things that get mapped
129 +such as the first C function when entering an interrupt (see
130 +comments in pti.c).
131 +
132 +This approach helps to ensure that side-channel attacks leveraging
133 +the paging structures do not function when PTI is enabled. It can be
134 +enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
135 +Once enabled at compile-time, it can be disabled at boot with the
136 +'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
137 +
138 +Page Table Management
139 +=====================
140 +
141 +When PTI is enabled, the kernel manages two sets of page tables.
142 +The first set is very similar to the single set which is present in
143 +kernels without PTI. This includes a complete mapping of userspace
144 +that the kernel can use for things like copy_to_user().
145 +
146 +Although _complete_, the user portion of the kernel page tables is
147 +crippled by setting the NX bit in the top level. This ensures
148 +that any missed kernel->user CR3 switch will immediately crash
149 +userspace upon executing its first instruction.
150 +
151 +The userspace page tables map only the kernel data needed to enter
152 +and exit the kernel. This data is entirely contained in the 'struct
153 +cpu_entry_area' structure which is placed in the fixmap which gives
154 +each CPU's copy of the area a compile-time-fixed virtual address.
155 +
156 +For new userspace mappings, the kernel makes the entries in its
157 +page tables like normal. The only difference is when the kernel
158 +makes entries in the top (PGD) level. In addition to setting the
159 +entry in the main kernel PGD, a copy of the entry is made in the
160 +userspace page tables' PGD.
161 +
162 +This sharing at the PGD level also inherently shares all the lower
163 +layers of the page tables. This leaves a single, shared set of
164 +userspace page tables to manage. One PTE to lock, one set of
165 +accessed bits, dirty bits, etc...
166 +
167 +Overhead
168 +========
169 +
170 +Protection against side-channel attacks is important. But,
171 +this protection comes at a cost:
172 +
173 +1. Increased Memory Use
174 + a. Each process now needs an order-1 PGD instead of order-0.
175 + (Consumes an additional 4k per process).
176 + b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
177 + aligned so that it can be mapped by setting a single PMD
178 + entry. This consumes nearly 2MB of RAM once the kernel
179 + is decompressed, but no space in the kernel image itself.
180 +
181 +2. Runtime Cost
182 + a. CR3 manipulation to switch between the page table copies
183 + must be done at interrupt, syscall, and exception entry
184 + and exit (it can be skipped when the kernel is interrupted,
185 + though.) Moves to CR3 are on the order of a hundred
186 + cycles, and are required at every entry and exit.
187 + b. A "trampoline" must be used for SYSCALL entry. This
188 + trampoline depends on a smaller set of resources than the
189 + non-PTI SYSCALL entry code, so requires mapping fewer
190 + things into the userspace page tables. The downside is
191 + that stacks must be switched at entry time.
192 + d. Global pages are disabled for all kernel structures not
193 + mapped into both kernel and userspace page tables. This
194 + feature of the MMU allows different processes to share TLB
195 + entries mapping the kernel. Losing the feature means more
196 + TLB misses after a context switch. The actual loss of
197 + performance is very small, however, never exceeding 1%.
198 + d. Process Context IDentifiers (PCID) is a CPU feature that
199 + allows us to skip flushing the entire TLB when switching page
200 + tables by setting a special bit in CR3 when the page tables
201 + are changed. This makes switching the page tables (at context
202 + switch, or kernel entry/exit) cheaper. But, on systems with
203 + PCID support, the context switch code must flush both the user
204 + and kernel entries out of the TLB. The user PCID TLB flush is
205 + deferred until the exit to userspace, minimizing the cost.
206 + See intel.com/sdm for the gory PCID/INVPCID details.
207 + e. The userspace page tables must be populated for each new
208 + process. Even without PTI, the shared kernel mappings
209 + are created by copying top-level (PGD) entries into each
210 + new process. But, with PTI, there are now *two* kernel
211 + mappings: one in the kernel page tables that maps everything
212 + and one for the entry/exit structures. At fork(), we need to
213 + copy both.
214 + f. In addition to the fork()-time copying, there must also
215 + be an update to the userspace PGD any time a set_pgd() is done
216 + on a PGD used to map userspace. This ensures that the kernel
217 + and userspace copies always map the same userspace
218 + memory.
219 + g. On systems without PCID support, each CR3 write flushes
220 + the entire TLB. That means that each syscall, interrupt
221 + or exception flushes the TLB.
222 + h. INVPCID is a TLB-flushing instruction which allows flushing
223 + of TLB entries for non-current PCIDs. Some systems support
224 + PCIDs, but do not support INVPCID. On these systems, addresses
225 + can only be flushed from the TLB for the current PCID. When
226 + flushing a kernel address, we need to flush all PCIDs, so a
227 + single kernel address flush will require a TLB-flushing CR3
228 + write upon the next use of every PCID.
229 +
230 +Possible Future Work
231 +====================
232 +1. We can be more careful about not actually writing to CR3
233 + unless its value is actually changed.
234 +2. Allow PTI to be enabled/disabled at runtime in addition to the
235 + boot-time switching.
236 +
237 +Testing
238 +========
239 +
240 +To test stability of PTI, the following test procedure is recommended,
241 +ideally doing all of these in parallel:
242 +
243 +1. Set CONFIG_DEBUG_ENTRY=y
244 +2. Run several copies of all of the tools/testing/selftests/x86/ tests
245 + (excluding MPX and protection_keys) in a loop on multiple CPUs for
246 + several minutes. These tests frequently uncover corner cases in the
247 + kernel entry code. In general, old kernels might cause these tests
248 + themselves to crash, but they should never crash the kernel.
249 +3. Run the 'perf' tool in a mode (top or record) that generates many
250 + frequent performance monitoring non-maskable interrupts (see "NMI"
251 + in /proc/interrupts). This exercises the NMI entry/exit code which
252 + is known to trigger bugs in code paths that did not expect to be
253 + interrupted, including nested NMIs. Using "-c" boosts the rate of
254 + NMIs, and using two -c with separate counters encourages nested NMIs
255 + and less deterministic behavior.
256 +
257 + while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
258 +
259 +4. Launch a KVM virtual machine.
260 +5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
261 + This has been a lightly-tested code path and needs extra scrutiny.
262 +
263 +Debugging
264 +=========
265 +
266 +Bugs in PTI cause a few different signatures of crashes
267 +that are worth noting here.
268 +
269 + * Failures of the selftests/x86 code. Usually a bug in one of the
270 + more obscure corners of entry_64.S
271 + * Crashes in early boot, especially around CPU bringup. Bugs
272 + in the trampoline code or mappings cause these.
273 + * Crashes at the first interrupt. Caused by bugs in entry_64.S,
274 + like screwing up a page table switch. Also caused by
275 + incorrectly mapping the IRQ handler entry code.
276 + * Crashes at the first NMI. The NMI code is separate from main
277 + interrupt handlers and can have bugs that do not affect
278 + normal interrupts. Also caused by incorrectly mapping NMI
279 + code. NMIs that interrupt the entry code must be very
280 + careful and can be the cause of crashes that show up when
281 + running perf.
282 + * Kernel crashes at the first exit to userspace. entry_64.S
283 + bugs, or failing to map some of the exit code.
284 + * Crashes at first interrupt that interrupts userspace. The paths
285 + in entry_64.S that return to userspace are sometimes separate
286 + from the ones that return to the kernel.
287 + * Double faults: overflowing the kernel stack because of page
288 + faults upon page faults. Caused by touching non-pti-mapped
289 + data in the entry code, or forgetting to switch to kernel
290 + CR3 before calling into C functions which are not pti-mapped.
291 + * Userspace segfaults early in boot, sometimes manifesting
292 + as mount(8) failing to mount the rootfs. These have
293 + tended to be TLB invalidation issues. Usually invalidating
294 + the wrong PCID, or otherwise missing an invalidation.
295 +
296 +1. https://gruss.cc/files/kaiser.pdf
297 +2. https://meltdownattack.com/meltdown.pdf
298 diff --git a/Makefile b/Makefile
299 index 2637f0ed0a07..aba553531d6a 100644
300 --- a/Makefile
301 +++ b/Makefile
302 @@ -1,6 +1,6 @@
303 VERSION = 4
304 PATCHLEVEL = 9
305 -SUBLEVEL = 76
306 +SUBLEVEL = 77
307 EXTRAVERSION =
308 NAME = Roaring Lionus
309
310 diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
311 index b6e715fd3c90..dac7ceb1a677 100644
312 --- a/arch/arm/kvm/mmio.c
313 +++ b/arch/arm/kvm/mmio.c
314 @@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
315 }
316
317 trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
318 - data);
319 + &data);
320 data = vcpu_data_host_to_guest(vcpu, data, len);
321 vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
322 }
323 @@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
324 data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
325 len);
326
327 - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
328 + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data);
329 kvm_mmio_write_buf(data_buf, len, data);
330
331 ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
332 data_buf);
333 } else {
334 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
335 - fault_ipa, 0);
336 + fault_ipa, NULL);
337
338 ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
339 data_buf);
340 diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
341 index c558bce989cd..6e716a5f1173 100644
342 --- a/arch/mips/kernel/process.c
343 +++ b/arch/mips/kernel/process.c
344 @@ -683,6 +683,18 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
345 struct task_struct *t;
346 int max_users;
347
348 + /* If nothing to change, return right away, successfully. */
349 + if (value == mips_get_process_fp_mode(task))
350 + return 0;
351 +
352 + /* Only accept a mode change if 64-bit FP enabled for o32. */
353 + if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT))
354 + return -EOPNOTSUPP;
355 +
356 + /* And only for o32 tasks. */
357 + if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS))
358 + return -EOPNOTSUPP;
359 +
360 /* Check the value is valid */
361 if (value & ~known_bits)
362 return -EOPNOTSUPP;
363 diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
364 index 11890e6e4093..0c8ae2cc6380 100644
365 --- a/arch/mips/kernel/ptrace.c
366 +++ b/arch/mips/kernel/ptrace.c
367 @@ -439,63 +439,160 @@ static int gpr64_set(struct task_struct *target,
368
369 #endif /* CONFIG_64BIT */
370
371 +/*
372 + * Copy the floating-point context to the supplied NT_PRFPREG buffer,
373 + * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots
374 + * correspond 1:1 to buffer slots. Only general registers are copied.
375 + */
376 +static int fpr_get_fpa(struct task_struct *target,
377 + unsigned int *pos, unsigned int *count,
378 + void **kbuf, void __user **ubuf)
379 +{
380 + return user_regset_copyout(pos, count, kbuf, ubuf,
381 + &target->thread.fpu,
382 + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
383 +}
384 +
385 +/*
386 + * Copy the floating-point context to the supplied NT_PRFPREG buffer,
387 + * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's
388 + * general register slots are copied to buffer slots. Only general
389 + * registers are copied.
390 + */
391 +static int fpr_get_msa(struct task_struct *target,
392 + unsigned int *pos, unsigned int *count,
393 + void **kbuf, void __user **ubuf)
394 +{
395 + unsigned int i;
396 + u64 fpr_val;
397 + int err;
398 +
399 + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
400 + for (i = 0; i < NUM_FPU_REGS; i++) {
401 + fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
402 + err = user_regset_copyout(pos, count, kbuf, ubuf,
403 + &fpr_val, i * sizeof(elf_fpreg_t),
404 + (i + 1) * sizeof(elf_fpreg_t));
405 + if (err)
406 + return err;
407 + }
408 +
409 + return 0;
410 +}
411 +
412 +/*
413 + * Copy the floating-point context to the supplied NT_PRFPREG buffer.
414 + * Choose the appropriate helper for general registers, and then copy
415 + * the FCSR register separately.
416 + */
417 static int fpr_get(struct task_struct *target,
418 const struct user_regset *regset,
419 unsigned int pos, unsigned int count,
420 void *kbuf, void __user *ubuf)
421 {
422 - unsigned i;
423 + const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
424 int err;
425 - u64 fpr_val;
426
427 - /* XXX fcr31 */
428 + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
429 + err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf);
430 + else
431 + err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf);
432 + if (err)
433 + return err;
434
435 - if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
436 - return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
437 - &target->thread.fpu,
438 - 0, sizeof(elf_fpregset_t));
439 + err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
440 + &target->thread.fpu.fcr31,
441 + fcr31_pos, fcr31_pos + sizeof(u32));
442
443 - for (i = 0; i < NUM_FPU_REGS; i++) {
444 - fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
445 - err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
446 - &fpr_val, i * sizeof(elf_fpreg_t),
447 - (i + 1) * sizeof(elf_fpreg_t));
448 + return err;
449 +}
450 +
451 +/*
452 + * Copy the supplied NT_PRFPREG buffer to the floating-point context,
453 + * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP
454 + * context's general register slots. Only general registers are copied.
455 + */
456 +static int fpr_set_fpa(struct task_struct *target,
457 + unsigned int *pos, unsigned int *count,
458 + const void **kbuf, const void __user **ubuf)
459 +{
460 + return user_regset_copyin(pos, count, kbuf, ubuf,
461 + &target->thread.fpu,
462 + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
463 +}
464 +
465 +/*
466 + * Copy the supplied NT_PRFPREG buffer to the floating-point context,
467 + * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64
468 + * bits only of FP context's general register slots. Only general
469 + * registers are copied.
470 + */
471 +static int fpr_set_msa(struct task_struct *target,
472 + unsigned int *pos, unsigned int *count,
473 + const void **kbuf, const void __user **ubuf)
474 +{
475 + unsigned int i;
476 + u64 fpr_val;
477 + int err;
478 +
479 + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
480 + for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) {
481 + err = user_regset_copyin(pos, count, kbuf, ubuf,
482 + &fpr_val, i * sizeof(elf_fpreg_t),
483 + (i + 1) * sizeof(elf_fpreg_t));
484 if (err)
485 return err;
486 + set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
487 }
488
489 return 0;
490 }
491
492 +/*
493 + * Copy the supplied NT_PRFPREG buffer to the floating-point context.
494 + * Choose the appropriate helper for general registers, and then copy
495 + * the FCSR register separately.
496 + *
497 + * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
498 + * which is supposed to have been guaranteed by the kernel before
499 + * calling us, e.g. in `ptrace_regset'. We enforce that requirement,
500 + * so that we can safely avoid preinitializing temporaries for
501 + * partial register writes.
502 + */
503 static int fpr_set(struct task_struct *target,
504 const struct user_regset *regset,
505 unsigned int pos, unsigned int count,
506 const void *kbuf, const void __user *ubuf)
507 {
508 - unsigned i;
509 + const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
510 + u32 fcr31;
511 int err;
512 - u64 fpr_val;
513
514 - /* XXX fcr31 */
515 + BUG_ON(count % sizeof(elf_fpreg_t));
516 +
517 + if (pos + count > sizeof(elf_fpregset_t))
518 + return -EIO;
519
520 init_fp_ctx(target);
521
522 - if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
523 - return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
524 - &target->thread.fpu,
525 - 0, sizeof(elf_fpregset_t));
526 + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
527 + err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf);
528 + else
529 + err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf);
530 + if (err)
531 + return err;
532
533 - BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
534 - for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) {
535 + if (count > 0) {
536 err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
537 - &fpr_val, i * sizeof(elf_fpreg_t),
538 - (i + 1) * sizeof(elf_fpreg_t));
539 + &fcr31,
540 + fcr31_pos, fcr31_pos + sizeof(u32));
541 if (err)
542 return err;
543 - set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
544 +
545 + ptrace_setfcr31(target, fcr31);
546 }
547
548 - return 0;
549 + return err;
550 }
551
552 enum mips_regset {
553 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
554 index da8156fd3d58..0ca4d12ce95c 100644
555 --- a/arch/x86/Kconfig
556 +++ b/arch/x86/Kconfig
557 @@ -64,6 +64,7 @@ config X86
558 select GENERIC_CLOCKEVENTS_MIN_ADJUST
559 select GENERIC_CMOS_UPDATE
560 select GENERIC_CPU_AUTOPROBE
561 + select GENERIC_CPU_VULNERABILITIES
562 select GENERIC_EARLY_IOREMAP
563 select GENERIC_FIND_FIRST_BIT
564 select GENERIC_IOMAP
565 @@ -407,6 +408,19 @@ config GOLDFISH
566 def_bool y
567 depends on X86_GOLDFISH
568
569 +config RETPOLINE
570 + bool "Avoid speculative indirect branches in kernel"
571 + default y
572 + ---help---
573 + Compile kernel with the retpoline compiler options to guard against
574 + kernel-to-user data leaks by avoiding speculative indirect
575 + branches. Requires a compiler with -mindirect-branch=thunk-extern
576 + support for full protection. The kernel may run slower.
577 +
578 + Without compiler support, at least indirect branches in assembler
579 + code are eliminated. Since this includes the syscall entry path,
580 + it is not entirely pointless.
581 +
582 if X86_32
583 config X86_EXTENDED_PLATFORM
584 bool "Support for extended (non-PC) x86 platforms"
585 diff --git a/arch/x86/Makefile b/arch/x86/Makefile
586 index 2d449337a360..cd22cb8ebd42 100644
587 --- a/arch/x86/Makefile
588 +++ b/arch/x86/Makefile
589 @@ -182,6 +182,14 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
590 KBUILD_CFLAGS += $(mflags-y)
591 KBUILD_AFLAGS += $(mflags-y)
592
593 +# Avoid indirect branches in kernel to deal with Spectre
594 +ifdef CONFIG_RETPOLINE
595 + RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
596 + ifneq ($(RETPOLINE_CFLAGS),)
597 + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
598 + endif
599 +endif
600 +
601 archscripts: scripts_basic
602 $(Q)$(MAKE) $(build)=arch/x86/tools relocs
603
604 diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
605 index 383a6f84a060..fa8801b35e51 100644
606 --- a/arch/x86/crypto/aesni-intel_asm.S
607 +++ b/arch/x86/crypto/aesni-intel_asm.S
608 @@ -32,6 +32,7 @@
609 #include <linux/linkage.h>
610 #include <asm/inst.h>
611 #include <asm/frame.h>
612 +#include <asm/nospec-branch.h>
613
614 /*
615 * The following macros are used to move an (un)aligned 16 byte value to/from
616 @@ -2734,7 +2735,7 @@ ENTRY(aesni_xts_crypt8)
617 pxor INC, STATE4
618 movdqu IV, 0x30(OUTP)
619
620 - call *%r11
621 + CALL_NOSPEC %r11
622
623 movdqu 0x00(OUTP), INC
624 pxor INC, STATE1
625 @@ -2779,7 +2780,7 @@ ENTRY(aesni_xts_crypt8)
626 _aesni_gf128mul_x_ble()
627 movups IV, (IVP)
628
629 - call *%r11
630 + CALL_NOSPEC %r11
631
632 movdqu 0x40(OUTP), INC
633 pxor INC, STATE1
634 diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
635 index aa9e8bd163f6..77ff4de2224d 100644
636 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
637 +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
638 @@ -17,6 +17,7 @@
639
640 #include <linux/linkage.h>
641 #include <asm/frame.h>
642 +#include <asm/nospec-branch.h>
643
644 #define CAMELLIA_TABLE_BYTE_LEN 272
645
646 @@ -1224,7 +1225,7 @@ camellia_xts_crypt_16way:
647 vpxor 14 * 16(%rax), %xmm15, %xmm14;
648 vpxor 15 * 16(%rax), %xmm15, %xmm15;
649
650 - call *%r9;
651 + CALL_NOSPEC %r9;
652
653 addq $(16 * 16), %rsp;
654
655 diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
656 index 16186c18656d..7384342fbb41 100644
657 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
658 +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
659 @@ -12,6 +12,7 @@
660
661 #include <linux/linkage.h>
662 #include <asm/frame.h>
663 +#include <asm/nospec-branch.h>
664
665 #define CAMELLIA_TABLE_BYTE_LEN 272
666
667 @@ -1337,7 +1338,7 @@ camellia_xts_crypt_32way:
668 vpxor 14 * 32(%rax), %ymm15, %ymm14;
669 vpxor 15 * 32(%rax), %ymm15, %ymm15;
670
671 - call *%r9;
672 + CALL_NOSPEC %r9;
673
674 addq $(16 * 32), %rsp;
675
676 diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
677 index dc05f010ca9b..174fd4146043 100644
678 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
679 +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
680 @@ -45,6 +45,7 @@
681
682 #include <asm/inst.h>
683 #include <linux/linkage.h>
684 +#include <asm/nospec-branch.h>
685
686 ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
687
688 @@ -172,7 +173,7 @@ continue_block:
689 movzxw (bufp, %rax, 2), len
690 lea crc_array(%rip), bufp
691 lea (bufp, len, 1), bufp
692 - jmp *bufp
693 + JMP_NOSPEC bufp
694
695 ################################################################
696 ## 2a) PROCESS FULL BLOCKS:
697 diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
698 index edba8606b99a..bdc9aeaf2e45 100644
699 --- a/arch/x86/entry/entry_32.S
700 +++ b/arch/x86/entry/entry_32.S
701 @@ -45,6 +45,7 @@
702 #include <asm/asm.h>
703 #include <asm/smap.h>
704 #include <asm/export.h>
705 +#include <asm/nospec-branch.h>
706
707 .section .entry.text, "ax"
708
709 @@ -260,7 +261,7 @@ ENTRY(ret_from_fork)
710
711 /* kernel thread */
712 1: movl %edi, %eax
713 - call *%ebx
714 + CALL_NOSPEC %ebx
715 /*
716 * A kernel thread is allowed to return here after successfully
717 * calling do_execve(). Exit to userspace to complete the execve()
718 @@ -984,7 +985,8 @@ trace:
719 movl 0x4(%ebp), %edx
720 subl $MCOUNT_INSN_SIZE, %eax
721
722 - call *ftrace_trace_function
723 + movl ftrace_trace_function, %ecx
724 + CALL_NOSPEC %ecx
725
726 popl %edx
727 popl %ecx
728 @@ -1020,7 +1022,7 @@ return_to_handler:
729 movl %eax, %ecx
730 popl %edx
731 popl %eax
732 - jmp *%ecx
733 + JMP_NOSPEC %ecx
734 #endif
735
736 #ifdef CONFIG_TRACING
737 @@ -1062,7 +1064,7 @@ error_code:
738 movl %ecx, %es
739 TRACE_IRQS_OFF
740 movl %esp, %eax # pt_regs pointer
741 - call *%edi
742 + CALL_NOSPEC %edi
743 jmp ret_from_exception
744 END(page_fault)
745
746 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
747 index af4e58132d91..b9c901ce6582 100644
748 --- a/arch/x86/entry/entry_64.S
749 +++ b/arch/x86/entry/entry_64.S
750 @@ -37,6 +37,7 @@
751 #include <asm/pgtable_types.h>
752 #include <asm/export.h>
753 #include <asm/kaiser.h>
754 +#include <asm/nospec-branch.h>
755 #include <linux/err.h>
756
757 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
758 @@ -208,7 +209,12 @@ entry_SYSCALL_64_fastpath:
759 * It might end up jumping to the slow path. If it jumps, RAX
760 * and all argument registers are clobbered.
761 */
762 +#ifdef CONFIG_RETPOLINE
763 + movq sys_call_table(, %rax, 8), %rax
764 + call __x86_indirect_thunk_rax
765 +#else
766 call *sys_call_table(, %rax, 8)
767 +#endif
768 .Lentry_SYSCALL_64_after_fastpath_call:
769
770 movq %rax, RAX(%rsp)
771 @@ -380,7 +386,7 @@ ENTRY(stub_ptregs_64)
772 jmp entry_SYSCALL64_slow_path
773
774 1:
775 - jmp *%rax /* Called from C */
776 + JMP_NOSPEC %rax /* Called from C */
777 END(stub_ptregs_64)
778
779 .macro ptregs_stub func
780 @@ -457,7 +463,7 @@ ENTRY(ret_from_fork)
781 1:
782 /* kernel thread */
783 movq %r12, %rdi
784 - call *%rbx
785 + CALL_NOSPEC %rbx
786 /*
787 * A kernel thread is allowed to return here after successfully
788 * calling do_execve(). Exit to userspace to complete the execve()
789 diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
790 index d4aea31eec03..deca9b9c7923 100644
791 --- a/arch/x86/include/asm/alternative.h
792 +++ b/arch/x86/include/asm/alternative.h
793 @@ -139,7 +139,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
794 ".popsection\n" \
795 ".pushsection .altinstr_replacement, \"ax\"\n" \
796 ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
797 - ".popsection"
798 + ".popsection\n"
799
800 #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
801 OLDINSTR_2(oldinstr, 1, 2) \
802 @@ -150,7 +150,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
803 ".pushsection .altinstr_replacement, \"ax\"\n" \
804 ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
805 ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
806 - ".popsection"
807 + ".popsection\n"
808
809 /*
810 * Alternative instructions for different CPU types or capabilities.
811 diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
812 index 44b8762fa0c7..b15aa4083dfd 100644
813 --- a/arch/x86/include/asm/asm-prototypes.h
814 +++ b/arch/x86/include/asm/asm-prototypes.h
815 @@ -10,7 +10,32 @@
816 #include <asm/pgtable.h>
817 #include <asm/special_insns.h>
818 #include <asm/preempt.h>
819 +#include <asm/asm.h>
820
821 #ifndef CONFIG_X86_CMPXCHG64
822 extern void cmpxchg8b_emu(void);
823 #endif
824 +
825 +#ifdef CONFIG_RETPOLINE
826 +#ifdef CONFIG_X86_32
827 +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
828 +#else
829 +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
830 +INDIRECT_THUNK(8)
831 +INDIRECT_THUNK(9)
832 +INDIRECT_THUNK(10)
833 +INDIRECT_THUNK(11)
834 +INDIRECT_THUNK(12)
835 +INDIRECT_THUNK(13)
836 +INDIRECT_THUNK(14)
837 +INDIRECT_THUNK(15)
838 +#endif
839 +INDIRECT_THUNK(ax)
840 +INDIRECT_THUNK(bx)
841 +INDIRECT_THUNK(cx)
842 +INDIRECT_THUNK(dx)
843 +INDIRECT_THUNK(si)
844 +INDIRECT_THUNK(di)
845 +INDIRECT_THUNK(bp)
846 +INDIRECT_THUNK(sp)
847 +#endif /* CONFIG_RETPOLINE */
848 diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
849 index 7acb51c49fec..00523524edbf 100644
850 --- a/arch/x86/include/asm/asm.h
851 +++ b/arch/x86/include/asm/asm.h
852 @@ -125,4 +125,15 @@
853 /* For C file, we already have NOKPROBE_SYMBOL macro */
854 #endif
855
856 +#ifndef __ASSEMBLY__
857 +/*
858 + * This output constraint should be used for any inline asm which has a "call"
859 + * instruction. Otherwise the asm may be inserted before the frame pointer
860 + * gets set up by the containing function. If you forget to do this, objtool
861 + * may print a "call without frame pointer save/setup" warning.
862 + */
863 +register unsigned long current_stack_pointer asm(_ASM_SP);
864 +#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
865 +#endif
866 +
867 #endif /* _ASM_X86_ASM_H */
868 diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
869 index 1d2b69fc0ceb..9ea67a04ff4f 100644
870 --- a/arch/x86/include/asm/cpufeature.h
871 +++ b/arch/x86/include/asm/cpufeature.h
872 @@ -135,6 +135,8 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
873 set_bit(bit, (unsigned long *)cpu_caps_set); \
874 } while (0)
875
876 +#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
877 +
878 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
879 /*
880 * Static testing of CPU features. Used the same as boot_cpu_has().
881 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
882 index 454a37adb823..4467568a531b 100644
883 --- a/arch/x86/include/asm/cpufeatures.h
884 +++ b/arch/x86/include/asm/cpufeatures.h
885 @@ -194,6 +194,9 @@
886 #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
887 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
888
889 +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
890 +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
891 +
892 #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
893 #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
894 #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
895 @@ -316,5 +319,8 @@
896 #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
897 #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
898 #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
899 +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
900 +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
901 +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
902
903 #endif /* _ASM_X86_CPUFEATURES_H */
904 diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
905 index b601ddac5719..b11c4c072df8 100644
906 --- a/arch/x86/include/asm/msr-index.h
907 +++ b/arch/x86/include/asm/msr-index.h
908 @@ -330,6 +330,9 @@
909 #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
910 #define FAM10H_MMIO_CONF_BASE_SHIFT 20
911 #define MSR_FAM10H_NODE_ID 0xc001100c
912 +#define MSR_F10H_DECFG 0xc0011029
913 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
914 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
915
916 /* K8 MSRs */
917 #define MSR_K8_TOP_MEM1 0xc001001a
918 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
919 new file mode 100644
920 index 000000000000..402a11c803c3
921 --- /dev/null
922 +++ b/arch/x86/include/asm/nospec-branch.h
923 @@ -0,0 +1,214 @@
924 +/* SPDX-License-Identifier: GPL-2.0 */
925 +
926 +#ifndef __NOSPEC_BRANCH_H__
927 +#define __NOSPEC_BRANCH_H__
928 +
929 +#include <asm/alternative.h>
930 +#include <asm/alternative-asm.h>
931 +#include <asm/cpufeatures.h>
932 +
933 +/*
934 + * Fill the CPU return stack buffer.
935 + *
936 + * Each entry in the RSB, if used for a speculative 'ret', contains an
937 + * infinite 'pause; jmp' loop to capture speculative execution.
938 + *
939 + * This is required in various cases for retpoline and IBRS-based
940 + * mitigations for the Spectre variant 2 vulnerability. Sometimes to
941 + * eliminate potentially bogus entries from the RSB, and sometimes
942 + * purely to ensure that it doesn't get empty, which on some CPUs would
943 + * allow predictions from other (unwanted!) sources to be used.
944 + *
945 + * We define a CPP macro such that it can be used from both .S files and
946 + * inline assembly. It's possible to do a .macro and then include that
947 + * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
948 + */
949 +
950 +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
951 +#define RSB_FILL_LOOPS 16 /* To avoid underflow */
952 +
953 +/*
954 + * Google experimented with loop-unrolling and this turned out to be
955 + * the optimal version — two calls, each with their own speculation
956 + * trap should their return address end up getting used, in a loop.
957 + */
958 +#define __FILL_RETURN_BUFFER(reg, nr, sp) \
959 + mov $(nr/2), reg; \
960 +771: \
961 + call 772f; \
962 +773: /* speculation trap */ \
963 + pause; \
964 + jmp 773b; \
965 +772: \
966 + call 774f; \
967 +775: /* speculation trap */ \
968 + pause; \
969 + jmp 775b; \
970 +774: \
971 + dec reg; \
972 + jnz 771b; \
973 + add $(BITS_PER_LONG/8) * nr, sp;
974 +
975 +#ifdef __ASSEMBLY__
976 +
977 +/*
978 + * This should be used immediately before a retpoline alternative. It tells
979 + * objtool where the retpolines are so that it can make sense of the control
980 + * flow by just reading the original instruction(s) and ignoring the
981 + * alternatives.
982 + */
983 +.macro ANNOTATE_NOSPEC_ALTERNATIVE
984 + .Lannotate_\@:
985 + .pushsection .discard.nospec
986 + .long .Lannotate_\@ - .
987 + .popsection
988 +.endm
989 +
990 +/*
991 + * These are the bare retpoline primitives for indirect jmp and call.
992 + * Do not use these directly; they only exist to make the ALTERNATIVE
993 + * invocation below less ugly.
994 + */
995 +.macro RETPOLINE_JMP reg:req
996 + call .Ldo_rop_\@
997 +.Lspec_trap_\@:
998 + pause
999 + jmp .Lspec_trap_\@
1000 +.Ldo_rop_\@:
1001 + mov \reg, (%_ASM_SP)
1002 + ret
1003 +.endm
1004 +
1005 +/*
1006 + * This is a wrapper around RETPOLINE_JMP so the called function in reg
1007 + * returns to the instruction after the macro.
1008 + */
1009 +.macro RETPOLINE_CALL reg:req
1010 + jmp .Ldo_call_\@
1011 +.Ldo_retpoline_jmp_\@:
1012 + RETPOLINE_JMP \reg
1013 +.Ldo_call_\@:
1014 + call .Ldo_retpoline_jmp_\@
1015 +.endm
1016 +
1017 +/*
1018 + * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
1019 + * indirect jmp/call which may be susceptible to the Spectre variant 2
1020 + * attack.
1021 + */
1022 +.macro JMP_NOSPEC reg:req
1023 +#ifdef CONFIG_RETPOLINE
1024 + ANNOTATE_NOSPEC_ALTERNATIVE
1025 + ALTERNATIVE_2 __stringify(jmp *\reg), \
1026 + __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
1027 + __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
1028 +#else
1029 + jmp *\reg
1030 +#endif
1031 +.endm
1032 +
1033 +.macro CALL_NOSPEC reg:req
1034 +#ifdef CONFIG_RETPOLINE
1035 + ANNOTATE_NOSPEC_ALTERNATIVE
1036 + ALTERNATIVE_2 __stringify(call *\reg), \
1037 + __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
1038 + __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
1039 +#else
1040 + call *\reg
1041 +#endif
1042 +.endm
1043 +
1044 + /*
1045 + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
1046 + * monstrosity above, manually.
1047 + */
1048 +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
1049 +#ifdef CONFIG_RETPOLINE
1050 + ANNOTATE_NOSPEC_ALTERNATIVE
1051 + ALTERNATIVE "jmp .Lskip_rsb_\@", \
1052 + __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
1053 + \ftr
1054 +.Lskip_rsb_\@:
1055 +#endif
1056 +.endm
1057 +
1058 +#else /* __ASSEMBLY__ */
1059 +
1060 +#define ANNOTATE_NOSPEC_ALTERNATIVE \
1061 + "999:\n\t" \
1062 + ".pushsection .discard.nospec\n\t" \
1063 + ".long 999b - .\n\t" \
1064 + ".popsection\n\t"
1065 +
1066 +#if defined(CONFIG_X86_64) && defined(RETPOLINE)
1067 +
1068 +/*
1069 + * Since the inline asm uses the %V modifier which is only in newer GCC,
1070 + * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
1071 + */
1072 +# define CALL_NOSPEC \
1073 + ANNOTATE_NOSPEC_ALTERNATIVE \
1074 + ALTERNATIVE( \
1075 + "call *%[thunk_target]\n", \
1076 + "call __x86_indirect_thunk_%V[thunk_target]\n", \
1077 + X86_FEATURE_RETPOLINE)
1078 +# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
1079 +
1080 +#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
1081 +/*
1082 + * For i386 we use the original ret-equivalent retpoline, because
1083 + * otherwise we'll run out of registers. We don't care about CET
1084 + * here, anyway.
1085 + */
1086 +# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \
1087 + " jmp 904f;\n" \
1088 + " .align 16\n" \
1089 + "901: call 903f;\n" \
1090 + "902: pause;\n" \
1091 + " jmp 902b;\n" \
1092 + " .align 16\n" \
1093 + "903: addl $4, %%esp;\n" \
1094 + " pushl %[thunk_target];\n" \
1095 + " ret;\n" \
1096 + " .align 16\n" \
1097 + "904: call 901b;\n", \
1098 + X86_FEATURE_RETPOLINE)
1099 +
1100 +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
1101 +#else /* No retpoline for C / inline asm */
1102 +# define CALL_NOSPEC "call *%[thunk_target]\n"
1103 +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
1104 +#endif
1105 +
1106 +/* The Spectre V2 mitigation variants */
1107 +enum spectre_v2_mitigation {
1108 + SPECTRE_V2_NONE,
1109 + SPECTRE_V2_RETPOLINE_MINIMAL,
1110 + SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
1111 + SPECTRE_V2_RETPOLINE_GENERIC,
1112 + SPECTRE_V2_RETPOLINE_AMD,
1113 + SPECTRE_V2_IBRS,
1114 +};
1115 +
1116 +/*
1117 + * On VMEXIT we must ensure that no RSB predictions learned in the guest
1118 + * can be followed in the host, by overwriting the RSB completely. Both
1119 + * retpoline and IBRS mitigations for Spectre v2 need this; only on future
1120 + * CPUs with IBRS_ATT *might* it be avoided.
1121 + */
1122 +static inline void vmexit_fill_RSB(void)
1123 +{
1124 +#ifdef CONFIG_RETPOLINE
1125 + unsigned long loops = RSB_CLEAR_LOOPS / 2;
1126 +
1127 + asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
1128 + ALTERNATIVE("jmp 910f",
1129 + __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
1130 + X86_FEATURE_RETPOLINE)
1131 + "910:"
1132 + : "=&r" (loops), ASM_CALL_CONSTRAINT
1133 + : "r" (loops) : "memory" );
1134 +#endif
1135 +}
1136 +#endif /* __ASSEMBLY__ */
1137 +#endif /* __NOSPEC_BRANCH_H__ */
1138 diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
1139 index b6d425999f99..1178a51b77f3 100644
1140 --- a/arch/x86/include/asm/pgalloc.h
1141 +++ b/arch/x86/include/asm/pgalloc.h
1142 @@ -27,6 +27,17 @@ static inline void paravirt_release_pud(unsigned long pfn) {}
1143 */
1144 extern gfp_t __userpte_alloc_gfp;
1145
1146 +#ifdef CONFIG_PAGE_TABLE_ISOLATION
1147 +/*
1148 + * Instead of one PGD, we acquire two PGDs. Being order-1, it is
1149 + * both 8k in size and 8k-aligned. That lets us just flip bit 12
1150 + * in a pointer to swap between the two 4k halves.
1151 + */
1152 +#define PGD_ALLOCATION_ORDER 1
1153 +#else
1154 +#define PGD_ALLOCATION_ORDER 0
1155 +#endif
1156 +
1157 /*
1158 * Allocate and free page tables.
1159 */
1160 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
1161 index 8cb52ee3ade6..e40b19ca486e 100644
1162 --- a/arch/x86/include/asm/processor.h
1163 +++ b/arch/x86/include/asm/processor.h
1164 @@ -156,8 +156,8 @@ extern struct cpuinfo_x86 boot_cpu_data;
1165 extern struct cpuinfo_x86 new_cpu_data;
1166
1167 extern struct tss_struct doublefault_tss;
1168 -extern __u32 cpu_caps_cleared[NCAPINTS];
1169 -extern __u32 cpu_caps_set[NCAPINTS];
1170 +extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
1171 +extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
1172
1173 #ifdef CONFIG_SMP
1174 DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
1175 diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
1176 index ad6f5eb07a95..bdf9c4c91572 100644
1177 --- a/arch/x86/include/asm/thread_info.h
1178 +++ b/arch/x86/include/asm/thread_info.h
1179 @@ -152,17 +152,6 @@ struct thread_info {
1180 */
1181 #ifndef __ASSEMBLY__
1182
1183 -static inline unsigned long current_stack_pointer(void)
1184 -{
1185 - unsigned long sp;
1186 -#ifdef CONFIG_X86_64
1187 - asm("mov %%rsp,%0" : "=g" (sp));
1188 -#else
1189 - asm("mov %%esp,%0" : "=g" (sp));
1190 -#endif
1191 - return sp;
1192 -}
1193 -
1194 /*
1195 * Walks up the stack frames to make sure that the specified object is
1196 * entirely contained by a single stack frame.
1197 diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
1198 index 8b678af866f7..ccdc23d89b60 100644
1199 --- a/arch/x86/include/asm/xen/hypercall.h
1200 +++ b/arch/x86/include/asm/xen/hypercall.h
1201 @@ -44,6 +44,7 @@
1202 #include <asm/page.h>
1203 #include <asm/pgtable.h>
1204 #include <asm/smap.h>
1205 +#include <asm/nospec-branch.h>
1206
1207 #include <xen/interface/xen.h>
1208 #include <xen/interface/sched.h>
1209 @@ -216,9 +217,9 @@ privcmd_call(unsigned call,
1210 __HYPERCALL_5ARG(a1, a2, a3, a4, a5);
1211
1212 stac();
1213 - asm volatile("call *%[call]"
1214 + asm volatile(CALL_NOSPEC
1215 : __HYPERCALL_5PARAM
1216 - : [call] "a" (&hypercall_page[call])
1217 + : [thunk_target] "a" (&hypercall_page[call])
1218 : __HYPERCALL_CLOBBER5);
1219 clac();
1220
1221 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
1222 index 11cc600f4df0..0a1e8a67cc99 100644
1223 --- a/arch/x86/kernel/acpi/boot.c
1224 +++ b/arch/x86/kernel/acpi/boot.c
1225 @@ -335,13 +335,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
1226 #ifdef CONFIG_X86_IO_APIC
1227 #define MP_ISA_BUS 0
1228
1229 +static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
1230 + u8 trigger, u32 gsi);
1231 +
1232 static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
1233 u32 gsi)
1234 {
1235 - int ioapic;
1236 - int pin;
1237 - struct mpc_intsrc mp_irq;
1238 -
1239 /*
1240 * Check bus_irq boundary.
1241 */
1242 @@ -350,14 +349,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
1243 return;
1244 }
1245
1246 - /*
1247 - * Convert 'gsi' to 'ioapic.pin'.
1248 - */
1249 - ioapic = mp_find_ioapic(gsi);
1250 - if (ioapic < 0)
1251 - return;
1252 - pin = mp_find_ioapic_pin(ioapic, gsi);
1253 -
1254 /*
1255 * TBD: This check is for faulty timer entries, where the override
1256 * erroneously sets the trigger to level, resulting in a HUGE
1257 @@ -366,16 +357,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
1258 if ((bus_irq == 0) && (trigger == 3))
1259 trigger = 1;
1260
1261 - mp_irq.type = MP_INTSRC;
1262 - mp_irq.irqtype = mp_INT;
1263 - mp_irq.irqflag = (trigger << 2) | polarity;
1264 - mp_irq.srcbus = MP_ISA_BUS;
1265 - mp_irq.srcbusirq = bus_irq; /* IRQ */
1266 - mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
1267 - mp_irq.dstirq = pin; /* INTIN# */
1268 -
1269 - mp_save_irq(&mp_irq);
1270 -
1271 + if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0)
1272 + return;
1273 /*
1274 * Reset default identity mapping if gsi is also an legacy IRQ,
1275 * otherwise there will be more than one entry with the same GSI
1276 @@ -422,6 +405,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
1277 return 0;
1278 }
1279
1280 +static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
1281 + u8 trigger, u32 gsi)
1282 +{
1283 + struct mpc_intsrc mp_irq;
1284 + int ioapic, pin;
1285 +
1286 + /* Convert 'gsi' to 'ioapic.pin'(INTIN#) */
1287 + ioapic = mp_find_ioapic(gsi);
1288 + if (ioapic < 0) {
1289 + pr_warn("Failed to find ioapic for gsi : %u\n", gsi);
1290 + return ioapic;
1291 + }
1292 +
1293 + pin = mp_find_ioapic_pin(ioapic, gsi);
1294 +
1295 + mp_irq.type = MP_INTSRC;
1296 + mp_irq.irqtype = mp_INT;
1297 + mp_irq.irqflag = (trigger << 2) | polarity;
1298 + mp_irq.srcbus = MP_ISA_BUS;
1299 + mp_irq.srcbusirq = bus_irq;
1300 + mp_irq.dstapic = mpc_ioapic_id(ioapic);
1301 + mp_irq.dstirq = pin;
1302 +
1303 + mp_save_irq(&mp_irq);
1304 +
1305 + return 0;
1306 +}
1307 +
1308 static int __init
1309 acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
1310 {
1311 @@ -466,7 +477,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
1312 if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
1313 polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
1314
1315 - mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
1316 + if (bus_irq < NR_IRQS_LEGACY)
1317 + mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
1318 + else
1319 + mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi);
1320 +
1321 acpi_penalize_sci_irq(bus_irq, trigger, polarity);
1322
1323 /*
1324 diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
1325 index 5cb272a7a5a3..10d5a3d6affc 100644
1326 --- a/arch/x86/kernel/alternative.c
1327 +++ b/arch/x86/kernel/alternative.c
1328 @@ -340,9 +340,12 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
1329 static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
1330 {
1331 unsigned long flags;
1332 + int i;
1333
1334 - if (instr[0] != 0x90)
1335 - return;
1336 + for (i = 0; i < a->padlen; i++) {
1337 + if (instr[i] != 0x90)
1338 + return;
1339 + }
1340
1341 local_irq_save(flags);
1342 add_nops(instr + (a->instrlen - a->padlen), a->padlen);
1343 diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
1344 index 4a8697f7d4ef..33b63670bf09 100644
1345 --- a/arch/x86/kernel/cpu/Makefile
1346 +++ b/arch/x86/kernel/cpu/Makefile
1347 @@ -20,13 +20,11 @@ obj-y := intel_cacheinfo.o scattered.o topology.o
1348 obj-y += common.o
1349 obj-y += rdrand.o
1350 obj-y += match.o
1351 +obj-y += bugs.o
1352
1353 obj-$(CONFIG_PROC_FS) += proc.o
1354 obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
1355
1356 -obj-$(CONFIG_X86_32) += bugs.o
1357 -obj-$(CONFIG_X86_64) += bugs_64.o
1358 -
1359 obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
1360 obj-$(CONFIG_CPU_SUP_AMD) += amd.o
1361 obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
1362 diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
1363 index 2b4cf04239b6..1b89f0c4251e 100644
1364 --- a/arch/x86/kernel/cpu/amd.c
1365 +++ b/arch/x86/kernel/cpu/amd.c
1366 @@ -782,8 +782,32 @@ static void init_amd(struct cpuinfo_x86 *c)
1367 set_cpu_cap(c, X86_FEATURE_K8);
1368
1369 if (cpu_has(c, X86_FEATURE_XMM2)) {
1370 - /* MFENCE stops RDTSC speculation */
1371 - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
1372 + unsigned long long val;
1373 + int ret;
1374 +
1375 + /*
1376 + * A serializing LFENCE has less overhead than MFENCE, so
1377 + * use it for execution serialization. On families which
1378 + * don't have that MSR, LFENCE is already serializing.
1379 + * msr_set_bit() uses the safe accessors, too, even if the MSR
1380 + * is not present.
1381 + */
1382 + msr_set_bit(MSR_F10H_DECFG,
1383 + MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
1384 +
1385 + /*
1386 + * Verify that the MSR write was successful (could be running
1387 + * under a hypervisor) and only then assume that LFENCE is
1388 + * serializing.
1389 + */
1390 + ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
1391 + if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
1392 + /* A serializing LFENCE stops RDTSC speculation */
1393 + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
1394 + } else {
1395 + /* MFENCE stops RDTSC speculation */
1396 + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
1397 + }
1398 }
1399
1400 /*
1401 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
1402 index 0b6124315441..49d25ddf0e9f 100644
1403 --- a/arch/x86/kernel/cpu/bugs.c
1404 +++ b/arch/x86/kernel/cpu/bugs.c
1405 @@ -9,6 +9,10 @@
1406 */
1407 #include <linux/init.h>
1408 #include <linux/utsname.h>
1409 +#include <linux/cpu.h>
1410 +
1411 +#include <asm/nospec-branch.h>
1412 +#include <asm/cmdline.h>
1413 #include <asm/bugs.h>
1414 #include <asm/processor.h>
1415 #include <asm/processor-flags.h>
1416 @@ -16,23 +20,24 @@
1417 #include <asm/msr.h>
1418 #include <asm/paravirt.h>
1419 #include <asm/alternative.h>
1420 +#include <asm/pgtable.h>
1421 +#include <asm/cacheflush.h>
1422 +
1423 +static void __init spectre_v2_select_mitigation(void);
1424
1425 void __init check_bugs(void)
1426 {
1427 -#ifdef CONFIG_X86_32
1428 - /*
1429 - * Regardless of whether PCID is enumerated, the SDM says
1430 - * that it can't be enabled in 32-bit mode.
1431 - */
1432 - setup_clear_cpu_cap(X86_FEATURE_PCID);
1433 -#endif
1434 -
1435 identify_boot_cpu();
1436 -#ifndef CONFIG_SMP
1437 - pr_info("CPU: ");
1438 - print_cpu_info(&boot_cpu_data);
1439 -#endif
1440
1441 + if (!IS_ENABLED(CONFIG_SMP)) {
1442 + pr_info("CPU: ");
1443 + print_cpu_info(&boot_cpu_data);
1444 + }
1445 +
1446 + /* Select the proper spectre mitigation before patching alternatives */
1447 + spectre_v2_select_mitigation();
1448 +
1449 +#ifdef CONFIG_X86_32
1450 /*
1451 * Check whether we are able to run this kernel safely on SMP.
1452 *
1453 @@ -48,4 +53,194 @@ void __init check_bugs(void)
1454 alternative_instructions();
1455
1456 fpu__init_check_bugs();
1457 +#else /* CONFIG_X86_64 */
1458 + alternative_instructions();
1459 +
1460 + /*
1461 + * Make sure the first 2MB area is not mapped by huge pages
1462 + * There are typically fixed size MTRRs in there and overlapping
1463 + * MTRRs into large pages causes slow downs.
1464 + *
1465 + * Right now we don't do that with gbpages because there seems
1466 + * very little benefit for that case.
1467 + */
1468 + if (!direct_gbpages)
1469 + set_memory_4k((unsigned long)__va(0), 1);
1470 +#endif
1471 +}
1472 +
1473 +/* The kernel command line selection */
1474 +enum spectre_v2_mitigation_cmd {
1475 + SPECTRE_V2_CMD_NONE,
1476 + SPECTRE_V2_CMD_AUTO,
1477 + SPECTRE_V2_CMD_FORCE,
1478 + SPECTRE_V2_CMD_RETPOLINE,
1479 + SPECTRE_V2_CMD_RETPOLINE_GENERIC,
1480 + SPECTRE_V2_CMD_RETPOLINE_AMD,
1481 +};
1482 +
1483 +static const char *spectre_v2_strings[] = {
1484 + [SPECTRE_V2_NONE] = "Vulnerable",
1485 + [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
1486 + [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
1487 + [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
1488 + [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
1489 +};
1490 +
1491 +#undef pr_fmt
1492 +#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt
1493 +
1494 +static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
1495 +
1496 +static void __init spec2_print_if_insecure(const char *reason)
1497 +{
1498 + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
1499 + pr_info("%s\n", reason);
1500 +}
1501 +
1502 +static void __init spec2_print_if_secure(const char *reason)
1503 +{
1504 + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
1505 + pr_info("%s\n", reason);
1506 +}
1507 +
1508 +static inline bool retp_compiler(void)
1509 +{
1510 + return __is_defined(RETPOLINE);
1511 +}
1512 +
1513 +static inline bool match_option(const char *arg, int arglen, const char *opt)
1514 +{
1515 + int len = strlen(opt);
1516 +
1517 + return len == arglen && !strncmp(arg, opt, len);
1518 +}
1519 +
1520 +static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
1521 +{
1522 + char arg[20];
1523 + int ret;
1524 +
1525 + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
1526 + sizeof(arg));
1527 + if (ret > 0) {
1528 + if (match_option(arg, ret, "off")) {
1529 + goto disable;
1530 + } else if (match_option(arg, ret, "on")) {
1531 + spec2_print_if_secure("force enabled on command line.");
1532 + return SPECTRE_V2_CMD_FORCE;
1533 + } else if (match_option(arg, ret, "retpoline")) {
1534 + spec2_print_if_insecure("retpoline selected on command line.");
1535 + return SPECTRE_V2_CMD_RETPOLINE;
1536 + } else if (match_option(arg, ret, "retpoline,amd")) {
1537 + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
1538 + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
1539 + return SPECTRE_V2_CMD_AUTO;
1540 + }
1541 + spec2_print_if_insecure("AMD retpoline selected on command line.");
1542 + return SPECTRE_V2_CMD_RETPOLINE_AMD;
1543 + } else if (match_option(arg, ret, "retpoline,generic")) {
1544 + spec2_print_if_insecure("generic retpoline selected on command line.");
1545 + return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
1546 + } else if (match_option(arg, ret, "auto")) {
1547 + return SPECTRE_V2_CMD_AUTO;
1548 + }
1549 + }
1550 +
1551 + if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
1552 + return SPECTRE_V2_CMD_AUTO;
1553 +disable:
1554 + spec2_print_if_insecure("disabled on command line.");
1555 + return SPECTRE_V2_CMD_NONE;
1556 }
1557 +
1558 +static void __init spectre_v2_select_mitigation(void)
1559 +{
1560 + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
1561 + enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
1562 +
1563 + /*
1564 + * If the CPU is not affected and the command line mode is NONE or AUTO
1565 + * then nothing to do.
1566 + */
1567 + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
1568 + (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
1569 + return;
1570 +
1571 + switch (cmd) {
1572 + case SPECTRE_V2_CMD_NONE:
1573 + return;
1574 +
1575 + case SPECTRE_V2_CMD_FORCE:
1576 + /* FALLTRHU */
1577 + case SPECTRE_V2_CMD_AUTO:
1578 + goto retpoline_auto;
1579 +
1580 + case SPECTRE_V2_CMD_RETPOLINE_AMD:
1581 + if (IS_ENABLED(CONFIG_RETPOLINE))
1582 + goto retpoline_amd;
1583 + break;
1584 + case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
1585 + if (IS_ENABLED(CONFIG_RETPOLINE))
1586 + goto retpoline_generic;
1587 + break;
1588 + case SPECTRE_V2_CMD_RETPOLINE:
1589 + if (IS_ENABLED(CONFIG_RETPOLINE))
1590 + goto retpoline_auto;
1591 + break;
1592 + }
1593 + pr_err("kernel not compiled with retpoline; no mitigation available!");
1594 + return;
1595 +
1596 +retpoline_auto:
1597 + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
1598 + retpoline_amd:
1599 + if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
1600 + pr_err("LFENCE not serializing. Switching to generic retpoline\n");
1601 + goto retpoline_generic;
1602 + }
1603 + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
1604 + SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
1605 + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
1606 + setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
1607 + } else {
1608 + retpoline_generic:
1609 + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
1610 + SPECTRE_V2_RETPOLINE_MINIMAL;
1611 + setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
1612 + }
1613 +
1614 + spectre_v2_enabled = mode;
1615 + pr_info("%s\n", spectre_v2_strings[mode]);
1616 +}
1617 +
1618 +#undef pr_fmt
1619 +
1620 +#ifdef CONFIG_SYSFS
1621 +ssize_t cpu_show_meltdown(struct device *dev,
1622 + struct device_attribute *attr, char *buf)
1623 +{
1624 + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
1625 + return sprintf(buf, "Not affected\n");
1626 + if (boot_cpu_has(X86_FEATURE_KAISER))
1627 + return sprintf(buf, "Mitigation: PTI\n");
1628 + return sprintf(buf, "Vulnerable\n");
1629 +}
1630 +
1631 +ssize_t cpu_show_spectre_v1(struct device *dev,
1632 + struct device_attribute *attr, char *buf)
1633 +{
1634 + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
1635 + return sprintf(buf, "Not affected\n");
1636 + return sprintf(buf, "Vulnerable\n");
1637 +}
1638 +
1639 +ssize_t cpu_show_spectre_v2(struct device *dev,
1640 + struct device_attribute *attr, char *buf)
1641 +{
1642 + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
1643 + return sprintf(buf, "Not affected\n");
1644 +
1645 + return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
1646 +}
1647 +#endif
1648 diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
1649 deleted file mode 100644
1650 index a972ac4c7e7d..000000000000
1651 --- a/arch/x86/kernel/cpu/bugs_64.c
1652 +++ /dev/null
1653 @@ -1,33 +0,0 @@
1654 -/*
1655 - * Copyright (C) 1994 Linus Torvalds
1656 - * Copyright (C) 2000 SuSE
1657 - */
1658 -
1659 -#include <linux/kernel.h>
1660 -#include <linux/init.h>
1661 -#include <asm/alternative.h>
1662 -#include <asm/bugs.h>
1663 -#include <asm/processor.h>
1664 -#include <asm/mtrr.h>
1665 -#include <asm/cacheflush.h>
1666 -
1667 -void __init check_bugs(void)
1668 -{
1669 - identify_boot_cpu();
1670 -#if !defined(CONFIG_SMP)
1671 - pr_info("CPU: ");
1672 - print_cpu_info(&boot_cpu_data);
1673 -#endif
1674 - alternative_instructions();
1675 -
1676 - /*
1677 - * Make sure the first 2MB area is not mapped by huge pages
1678 - * There are typically fixed size MTRRs in there and overlapping
1679 - * MTRRs into large pages causes slow downs.
1680 - *
1681 - * Right now we don't do that with gbpages because there seems
1682 - * very little benefit for that case.
1683 - */
1684 - if (!direct_gbpages)
1685 - set_memory_4k((unsigned long)__va(0), 1);
1686 -}
1687 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
1688 index 918e44772b04..7b9ae04ddf5d 100644
1689 --- a/arch/x86/kernel/cpu/common.c
1690 +++ b/arch/x86/kernel/cpu/common.c
1691 @@ -480,8 +480,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
1692 return NULL; /* Not found */
1693 }
1694
1695 -__u32 cpu_caps_cleared[NCAPINTS];
1696 -__u32 cpu_caps_set[NCAPINTS];
1697 +__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
1698 +__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
1699
1700 void load_percpu_segment(int cpu)
1701 {
1702 @@ -706,6 +706,16 @@ void cpu_detect(struct cpuinfo_x86 *c)
1703 }
1704 }
1705
1706 +static void apply_forced_caps(struct cpuinfo_x86 *c)
1707 +{
1708 + int i;
1709 +
1710 + for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
1711 + c->x86_capability[i] &= ~cpu_caps_cleared[i];
1712 + c->x86_capability[i] |= cpu_caps_set[i];
1713 + }
1714 +}
1715 +
1716 void get_cpu_cap(struct cpuinfo_x86 *c)
1717 {
1718 u32 eax, ebx, ecx, edx;
1719 @@ -872,7 +882,22 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
1720 }
1721
1722 setup_force_cpu_cap(X86_FEATURE_ALWAYS);
1723 +
1724 + /* Assume for now that ALL x86 CPUs are insecure */
1725 + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
1726 +
1727 + setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
1728 + setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
1729 +
1730 fpu__init_system(c);
1731 +
1732 +#ifdef CONFIG_X86_32
1733 + /*
1734 + * Regardless of whether PCID is enumerated, the SDM says
1735 + * that it can't be enabled in 32-bit mode.
1736 + */
1737 + setup_clear_cpu_cap(X86_FEATURE_PCID);
1738 +#endif
1739 }
1740
1741 void __init early_cpu_init(void)
1742 @@ -1086,10 +1111,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
1743 this_cpu->c_identify(c);
1744
1745 /* Clear/Set all flags overridden by options, after probe */
1746 - for (i = 0; i < NCAPINTS; i++) {
1747 - c->x86_capability[i] &= ~cpu_caps_cleared[i];
1748 - c->x86_capability[i] |= cpu_caps_set[i];
1749 - }
1750 + apply_forced_caps(c);
1751
1752 #ifdef CONFIG_X86_64
1753 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
1754 @@ -1151,10 +1173,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
1755 * Clear/Set all flags overridden by options, need do it
1756 * before following smp all cpus cap AND.
1757 */
1758 - for (i = 0; i < NCAPINTS; i++) {
1759 - c->x86_capability[i] &= ~cpu_caps_cleared[i];
1760 - c->x86_capability[i] |= cpu_caps_set[i];
1761 - }
1762 + apply_forced_caps(c);
1763
1764 /*
1765 * On SMP, boot_cpu_data holds the common feature set between
1766 diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
1767 index 13dbcc0f9d03..ac3e636ad586 100644
1768 --- a/arch/x86/kernel/cpu/microcode/intel.c
1769 +++ b/arch/x86/kernel/cpu/microcode/intel.c
1770 @@ -1051,8 +1051,17 @@ static bool is_blacklisted(unsigned int cpu)
1771 {
1772 struct cpuinfo_x86 *c = &cpu_data(cpu);
1773
1774 - if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
1775 - pr_err_once("late loading on model 79 is disabled.\n");
1776 + /*
1777 + * Late loading on model 79 with microcode revision less than 0x0b000021
1778 + * may result in a system hang. This behavior is documented in item
1779 + * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
1780 + */
1781 + if (c->x86 == 6 &&
1782 + c->x86_model == INTEL_FAM6_BROADWELL_X &&
1783 + c->x86_mask == 0x01 &&
1784 + c->microcode < 0x0b000021) {
1785 + pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
1786 + pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
1787 return true;
1788 }
1789
1790 diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
1791 index 1f38d9a4d9de..2763573ee1d2 100644
1792 --- a/arch/x86/kernel/irq_32.c
1793 +++ b/arch/x86/kernel/irq_32.c
1794 @@ -19,6 +19,7 @@
1795 #include <linux/mm.h>
1796
1797 #include <asm/apic.h>
1798 +#include <asm/nospec-branch.h>
1799
1800 #ifdef CONFIG_DEBUG_STACKOVERFLOW
1801
1802 @@ -54,17 +55,17 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
1803 static void call_on_stack(void *func, void *stack)
1804 {
1805 asm volatile("xchgl %%ebx,%%esp \n"
1806 - "call *%%edi \n"
1807 + CALL_NOSPEC
1808 "movl %%ebx,%%esp \n"
1809 : "=b" (stack)
1810 : "0" (stack),
1811 - "D"(func)
1812 + [thunk_target] "D"(func)
1813 : "memory", "cc", "edx", "ecx", "eax");
1814 }
1815
1816 static inline void *current_stack(void)
1817 {
1818 - return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
1819 + return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
1820 }
1821
1822 static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
1823 @@ -88,17 +89,17 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
1824
1825 /* Save the next esp at the bottom of the stack */
1826 prev_esp = (u32 *)irqstk;
1827 - *prev_esp = current_stack_pointer();
1828 + *prev_esp = current_stack_pointer;
1829
1830 if (unlikely(overflow))
1831 call_on_stack(print_stack_overflow, isp);
1832
1833 asm volatile("xchgl %%ebx,%%esp \n"
1834 - "call *%%edi \n"
1835 + CALL_NOSPEC
1836 "movl %%ebx,%%esp \n"
1837 : "=a" (arg1), "=b" (isp)
1838 : "0" (desc), "1" (isp),
1839 - "D" (desc->handle_irq)
1840 + [thunk_target] "D" (desc->handle_irq)
1841 : "memory", "cc", "ecx");
1842 return 1;
1843 }
1844 @@ -139,7 +140,7 @@ void do_softirq_own_stack(void)
1845
1846 /* Push the previous esp onto the stack */
1847 prev_esp = (u32 *)irqstk;
1848 - *prev_esp = current_stack_pointer();
1849 + *prev_esp = current_stack_pointer;
1850
1851 call_on_stack(__do_softirq, isp);
1852 }
1853 diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
1854 index 7b0d3da52fb4..287ec3bc141f 100644
1855 --- a/arch/x86/kernel/mcount_64.S
1856 +++ b/arch/x86/kernel/mcount_64.S
1857 @@ -8,7 +8,7 @@
1858 #include <asm/ptrace.h>
1859 #include <asm/ftrace.h>
1860 #include <asm/export.h>
1861 -
1862 +#include <asm/nospec-branch.h>
1863
1864 .code64
1865 .section .entry.text, "ax"
1866 @@ -290,8 +290,9 @@ trace:
1867 * ip and parent ip are used and the list function is called when
1868 * function tracing is enabled.
1869 */
1870 - call *ftrace_trace_function
1871
1872 + movq ftrace_trace_function, %r8
1873 + CALL_NOSPEC %r8
1874 restore_mcount_regs
1875
1876 jmp fgraph_trace
1877 @@ -334,5 +335,5 @@ GLOBAL(return_to_handler)
1878 movq 8(%rsp), %rdx
1879 movq (%rsp), %rax
1880 addq $24, %rsp
1881 - jmp *%rdi
1882 + JMP_NOSPEC %rdi
1883 #endif
1884 diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
1885 index bd4e3d4d3625..322f433fbc76 100644
1886 --- a/arch/x86/kernel/traps.c
1887 +++ b/arch/x86/kernel/traps.c
1888 @@ -153,7 +153,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
1889 * from double_fault.
1890 */
1891 BUG_ON((unsigned long)(current_top_of_stack() -
1892 - current_stack_pointer()) >= THREAD_SIZE);
1893 + current_stack_pointer) >= THREAD_SIZE);
1894
1895 preempt_enable_no_resched();
1896 }
1897 diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
1898 index 8148d8ca7930..24af898fb3a6 100644
1899 --- a/arch/x86/kvm/svm.c
1900 +++ b/arch/x86/kvm/svm.c
1901 @@ -44,6 +44,7 @@
1902 #include <asm/debugreg.h>
1903 #include <asm/kvm_para.h>
1904 #include <asm/irq_remapping.h>
1905 +#include <asm/nospec-branch.h>
1906
1907 #include <asm/virtext.h>
1908 #include "trace.h"
1909 @@ -4868,6 +4869,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
1910 "mov %%r13, %c[r13](%[svm]) \n\t"
1911 "mov %%r14, %c[r14](%[svm]) \n\t"
1912 "mov %%r15, %c[r15](%[svm]) \n\t"
1913 +#endif
1914 + /*
1915 + * Clear host registers marked as clobbered to prevent
1916 + * speculative use.
1917 + */
1918 + "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
1919 + "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
1920 + "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
1921 + "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
1922 + "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
1923 +#ifdef CONFIG_X86_64
1924 + "xor %%r8, %%r8 \n\t"
1925 + "xor %%r9, %%r9 \n\t"
1926 + "xor %%r10, %%r10 \n\t"
1927 + "xor %%r11, %%r11 \n\t"
1928 + "xor %%r12, %%r12 \n\t"
1929 + "xor %%r13, %%r13 \n\t"
1930 + "xor %%r14, %%r14 \n\t"
1931 + "xor %%r15, %%r15 \n\t"
1932 #endif
1933 "pop %%" _ASM_BP
1934 :
1935 @@ -4898,6 +4918,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
1936 #endif
1937 );
1938
1939 + /* Eliminate branch target predictions from guest mode */
1940 + vmexit_fill_RSB();
1941 +
1942 #ifdef CONFIG_X86_64
1943 wrmsrl(MSR_GS_BASE, svm->host.gs_base);
1944 #else
1945 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
1946 index 263e56059fd5..3ca6d15994e4 100644
1947 --- a/arch/x86/kvm/vmx.c
1948 +++ b/arch/x86/kvm/vmx.c
1949 @@ -48,6 +48,7 @@
1950 #include <asm/kexec.h>
1951 #include <asm/apic.h>
1952 #include <asm/irq_remapping.h>
1953 +#include <asm/nospec-branch.h>
1954
1955 #include "trace.h"
1956 #include "pmu.h"
1957 @@ -857,8 +858,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
1958 {
1959 BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
1960
1961 - if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
1962 - vmcs_field_to_offset_table[field] == 0)
1963 + if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
1964 + return -ENOENT;
1965 +
1966 + /*
1967 + * FIXME: Mitigation for CVE-2017-5753. To be replaced with a
1968 + * generic mechanism.
1969 + */
1970 + asm("lfence");
1971 +
1972 + if (vmcs_field_to_offset_table[field] == 0)
1973 return -ENOENT;
1974
1975 return vmcs_field_to_offset_table[field];
1976 @@ -8948,6 +8957,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
1977 /* Save guest registers, load host registers, keep flags */
1978 "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
1979 "pop %0 \n\t"
1980 + "setbe %c[fail](%0)\n\t"
1981 "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
1982 "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
1983 __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
1984 @@ -8964,12 +8974,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
1985 "mov %%r13, %c[r13](%0) \n\t"
1986 "mov %%r14, %c[r14](%0) \n\t"
1987 "mov %%r15, %c[r15](%0) \n\t"
1988 + "xor %%r8d, %%r8d \n\t"
1989 + "xor %%r9d, %%r9d \n\t"
1990 + "xor %%r10d, %%r10d \n\t"
1991 + "xor %%r11d, %%r11d \n\t"
1992 + "xor %%r12d, %%r12d \n\t"
1993 + "xor %%r13d, %%r13d \n\t"
1994 + "xor %%r14d, %%r14d \n\t"
1995 + "xor %%r15d, %%r15d \n\t"
1996 #endif
1997 "mov %%cr2, %%" _ASM_AX " \n\t"
1998 "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
1999
2000 + "xor %%eax, %%eax \n\t"
2001 + "xor %%ebx, %%ebx \n\t"
2002 + "xor %%esi, %%esi \n\t"
2003 + "xor %%edi, %%edi \n\t"
2004 "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
2005 - "setbe %c[fail](%0) \n\t"
2006 ".pushsection .rodata \n\t"
2007 ".global vmx_return \n\t"
2008 "vmx_return: " _ASM_PTR " 2b \n\t"
2009 @@ -9006,6 +9027,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
2010 #endif
2011 );
2012
2013 + /* Eliminate branch target predictions from guest mode */
2014 + vmexit_fill_RSB();
2015 +
2016 /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
2017 if (debugctlmsr)
2018 update_debugctlmsr(debugctlmsr);
2019 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
2020 index 73304b1a03cc..d3f80cccb9aa 100644
2021 --- a/arch/x86/kvm/x86.c
2022 +++ b/arch/x86/kvm/x86.c
2023 @@ -4264,7 +4264,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
2024 addr, n, v))
2025 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
2026 break;
2027 - trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
2028 + trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
2029 handled += n;
2030 addr += n;
2031 len -= n;
2032 @@ -4517,7 +4517,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
2033 {
2034 if (vcpu->mmio_read_completed) {
2035 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
2036 - vcpu->mmio_fragments[0].gpa, *(u64 *)val);
2037 + vcpu->mmio_fragments[0].gpa, val);
2038 vcpu->mmio_read_completed = 0;
2039 return 1;
2040 }
2041 @@ -4539,14 +4539,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
2042
2043 static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
2044 {
2045 - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
2046 + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
2047 return vcpu_mmio_write(vcpu, gpa, bytes, val);
2048 }
2049
2050 static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
2051 void *val, int bytes)
2052 {
2053 - trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
2054 + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
2055 return X86EMUL_IO_NEEDED;
2056 }
2057
2058 diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
2059 index 34a74131a12c..6bf1898ddf49 100644
2060 --- a/arch/x86/lib/Makefile
2061 +++ b/arch/x86/lib/Makefile
2062 @@ -25,6 +25,7 @@ lib-y += memcpy_$(BITS).o
2063 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
2064 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
2065 lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
2066 +lib-$(CONFIG_RETPOLINE) += retpoline.o
2067
2068 obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
2069
2070 diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
2071 index 4d34bb548b41..46e71a74e612 100644
2072 --- a/arch/x86/lib/checksum_32.S
2073 +++ b/arch/x86/lib/checksum_32.S
2074 @@ -29,7 +29,8 @@
2075 #include <asm/errno.h>
2076 #include <asm/asm.h>
2077 #include <asm/export.h>
2078 -
2079 +#include <asm/nospec-branch.h>
2080 +
2081 /*
2082 * computes a partial checksum, e.g. for TCP/UDP fragments
2083 */
2084 @@ -156,7 +157,7 @@ ENTRY(csum_partial)
2085 negl %ebx
2086 lea 45f(%ebx,%ebx,2), %ebx
2087 testl %esi, %esi
2088 - jmp *%ebx
2089 + JMP_NOSPEC %ebx
2090
2091 # Handle 2-byte-aligned regions
2092 20: addw (%esi), %ax
2093 @@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic)
2094 andl $-32,%edx
2095 lea 3f(%ebx,%ebx), %ebx
2096 testl %esi, %esi
2097 - jmp *%ebx
2098 + JMP_NOSPEC %ebx
2099 1: addl $64,%esi
2100 addl $64,%edi
2101 SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
2102 diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
2103 new file mode 100644
2104 index 000000000000..cb45c6cb465f
2105 --- /dev/null
2106 +++ b/arch/x86/lib/retpoline.S
2107 @@ -0,0 +1,48 @@
2108 +/* SPDX-License-Identifier: GPL-2.0 */
2109 +
2110 +#include <linux/stringify.h>
2111 +#include <linux/linkage.h>
2112 +#include <asm/dwarf2.h>
2113 +#include <asm/cpufeatures.h>
2114 +#include <asm/alternative-asm.h>
2115 +#include <asm/export.h>
2116 +#include <asm/nospec-branch.h>
2117 +
2118 +.macro THUNK reg
2119 + .section .text.__x86.indirect_thunk.\reg
2120 +
2121 +ENTRY(__x86_indirect_thunk_\reg)
2122 + CFI_STARTPROC
2123 + JMP_NOSPEC %\reg
2124 + CFI_ENDPROC
2125 +ENDPROC(__x86_indirect_thunk_\reg)
2126 +.endm
2127 +
2128 +/*
2129 + * Despite being an assembler file we can't just use .irp here
2130 + * because __KSYM_DEPS__ only uses the C preprocessor and would
2131 + * only see one instance of "__x86_indirect_thunk_\reg" rather
2132 + * than one per register with the correct names. So we do it
2133 + * the simple and nasty way...
2134 + */
2135 +#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
2136 +#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
2137 +
2138 +GENERATE_THUNK(_ASM_AX)
2139 +GENERATE_THUNK(_ASM_BX)
2140 +GENERATE_THUNK(_ASM_CX)
2141 +GENERATE_THUNK(_ASM_DX)
2142 +GENERATE_THUNK(_ASM_SI)
2143 +GENERATE_THUNK(_ASM_DI)
2144 +GENERATE_THUNK(_ASM_BP)
2145 +GENERATE_THUNK(_ASM_SP)
2146 +#ifdef CONFIG_64BIT
2147 +GENERATE_THUNK(r8)
2148 +GENERATE_THUNK(r9)
2149 +GENERATE_THUNK(r10)
2150 +GENERATE_THUNK(r11)
2151 +GENERATE_THUNK(r12)
2152 +GENERATE_THUNK(r13)
2153 +GENERATE_THUNK(r14)
2154 +GENERATE_THUNK(r15)
2155 +#endif
2156 diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
2157 index 8f8e5e03d083..a8ade08a9bf5 100644
2158 --- a/arch/x86/mm/kaiser.c
2159 +++ b/arch/x86/mm/kaiser.c
2160 @@ -197,6 +197,8 @@ static int kaiser_add_user_map(const void *__start_addr, unsigned long size,
2161 * requires that not to be #defined to 0): so mask it off here.
2162 */
2163 flags &= ~_PAGE_GLOBAL;
2164 + if (!(__supported_pte_mask & _PAGE_NX))
2165 + flags &= ~_PAGE_NX;
2166
2167 for (; address < end_addr; address += PAGE_SIZE) {
2168 target_address = get_pa_from_mapping(address);
2169 diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
2170 index 5aaec8effc5f..209b9465e97a 100644
2171 --- a/arch/x86/mm/pgtable.c
2172 +++ b/arch/x86/mm/pgtable.c
2173 @@ -345,13 +345,6 @@ static inline void _pgd_free(pgd_t *pgd)
2174 }
2175 #else
2176
2177 -/*
2178 - * Instead of one pgd, Kaiser acquires two pgds. Being order-1, it is
2179 - * both 8k in size and 8k-aligned. That lets us just flip bit 12
2180 - * in a pointer to swap between the two 4k halves.
2181 - */
2182 -#define PGD_ALLOCATION_ORDER kaiser_enabled
2183 -
2184 static inline pgd_t *_pgd_alloc(void)
2185 {
2186 return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
2187 diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
2188 index 41205de487e7..578973ade71b 100644
2189 --- a/arch/x86/mm/tlb.c
2190 +++ b/arch/x86/mm/tlb.c
2191 @@ -110,7 +110,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
2192 * mapped in the new pgd, we'll double-fault. Forcibly
2193 * map it.
2194 */
2195 - unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
2196 + unsigned int stack_pgd_index = pgd_index(current_stack_pointer);
2197
2198 pgd_t *pgd = next->pgd + stack_pgd_index;
2199
2200 diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
2201 index 2f25a363068c..dcb2d9d185a2 100644
2202 --- a/arch/x86/platform/efi/efi_64.c
2203 +++ b/arch/x86/platform/efi/efi_64.c
2204 @@ -142,7 +142,7 @@ int __init efi_alloc_page_tables(void)
2205 return 0;
2206
2207 gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO;
2208 - efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
2209 + efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
2210 if (!efi_pgd)
2211 return -ENOMEM;
2212
2213 diff --git a/crypto/algapi.c b/crypto/algapi.c
2214 index 1fad2a6b3bbb..5c098ffa7d3d 100644
2215 --- a/crypto/algapi.c
2216 +++ b/crypto/algapi.c
2217 @@ -167,6 +167,18 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
2218
2219 spawn->alg = NULL;
2220 spawns = &inst->alg.cra_users;
2221 +
2222 + /*
2223 + * We may encounter an unregistered instance here, since
2224 + * an instance's spawns are set up prior to the instance
2225 + * being registered. An unregistered instance will have
2226 + * NULL ->cra_users.next, since ->cra_users isn't
2227 + * properly initialized until registration. But an
2228 + * unregistered instance cannot have any users, so treat
2229 + * it the same as ->cra_users being empty.
2230 + */
2231 + if (spawns->next == NULL)
2232 + break;
2233 }
2234 } while ((spawns = crypto_more_spawns(alg, &stack, &top,
2235 &secondary_spawns)));
2236 diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
2237 index d02e7c0f5bfd..0651010bba21 100644
2238 --- a/drivers/base/Kconfig
2239 +++ b/drivers/base/Kconfig
2240 @@ -235,6 +235,9 @@ config GENERIC_CPU_DEVICES
2241 config GENERIC_CPU_AUTOPROBE
2242 bool
2243
2244 +config GENERIC_CPU_VULNERABILITIES
2245 + bool
2246 +
2247 config SOC_BUS
2248 bool
2249
2250 diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
2251 index 4c28e1a09786..56b6c8508a89 100644
2252 --- a/drivers/base/cpu.c
2253 +++ b/drivers/base/cpu.c
2254 @@ -499,10 +499,58 @@ static void __init cpu_dev_register_generic(void)
2255 #endif
2256 }
2257
2258 +#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
2259 +
2260 +ssize_t __weak cpu_show_meltdown(struct device *dev,
2261 + struct device_attribute *attr, char *buf)
2262 +{
2263 + return sprintf(buf, "Not affected\n");
2264 +}
2265 +
2266 +ssize_t __weak cpu_show_spectre_v1(struct device *dev,
2267 + struct device_attribute *attr, char *buf)
2268 +{
2269 + return sprintf(buf, "Not affected\n");
2270 +}
2271 +
2272 +ssize_t __weak cpu_show_spectre_v2(struct device *dev,
2273 + struct device_attribute *attr, char *buf)
2274 +{
2275 + return sprintf(buf, "Not affected\n");
2276 +}
2277 +
2278 +static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
2279 +static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
2280 +static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
2281 +
2282 +static struct attribute *cpu_root_vulnerabilities_attrs[] = {
2283 + &dev_attr_meltdown.attr,
2284 + &dev_attr_spectre_v1.attr,
2285 + &dev_attr_spectre_v2.attr,
2286 + NULL
2287 +};
2288 +
2289 +static const struct attribute_group cpu_root_vulnerabilities_group = {
2290 + .name = "vulnerabilities",
2291 + .attrs = cpu_root_vulnerabilities_attrs,
2292 +};
2293 +
2294 +static void __init cpu_register_vulnerabilities(void)
2295 +{
2296 + if (sysfs_create_group(&cpu_subsys.dev_root->kobj,
2297 + &cpu_root_vulnerabilities_group))
2298 + pr_err("Unable to register CPU vulnerabilities\n");
2299 +}
2300 +
2301 +#else
2302 +static inline void cpu_register_vulnerabilities(void) { }
2303 +#endif
2304 +
2305 void __init cpu_dev_init(void)
2306 {
2307 if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
2308 panic("Failed to register CPU subsystem");
2309
2310 cpu_dev_register_generic();
2311 + cpu_register_vulnerabilities();
2312 }
2313 diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
2314 index 24f4b544d270..e32badd26c8a 100644
2315 --- a/drivers/block/rbd.c
2316 +++ b/drivers/block/rbd.c
2317 @@ -4511,7 +4511,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
2318 segment_size = rbd_obj_bytes(&rbd_dev->header);
2319 blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
2320 q->limits.max_sectors = queue_max_hw_sectors(q);
2321 - blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
2322 + blk_queue_max_segments(q, USHRT_MAX);
2323 blk_queue_max_segment_size(q, segment_size);
2324 blk_queue_io_min(q, segment_size);
2325 blk_queue_io_opt(q, segment_size);
2326 diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
2327 index fefb9d995d2c..81f5a552e32f 100644
2328 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
2329 +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
2330 @@ -2729,6 +2729,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv,
2331 }
2332
2333 view_type = vmw_view_cmd_to_type(header->id);
2334 + if (view_type == vmw_view_max)
2335 + return -EINVAL;
2336 cmd = container_of(header, typeof(*cmd), header);
2337 ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
2338 user_surface_converter,
2339 diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
2340 index e0a8216ecf2b..13c32eb40738 100644
2341 --- a/drivers/hv/hv.c
2342 +++ b/drivers/hv/hv.c
2343 @@ -31,6 +31,7 @@
2344 #include <linux/clockchips.h>
2345 #include <asm/hyperv.h>
2346 #include <asm/mshyperv.h>
2347 +#include <asm/nospec-branch.h>
2348 #include "hyperv_vmbus.h"
2349
2350 /* The one and only */
2351 @@ -103,9 +104,10 @@ u64 hv_do_hypercall(u64 control, void *input, void *output)
2352 return (u64)ULLONG_MAX;
2353
2354 __asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8");
2355 - __asm__ __volatile__("call *%3" : "=a" (hv_status) :
2356 + __asm__ __volatile__(CALL_NOSPEC :
2357 + "=a" (hv_status) :
2358 "c" (control), "d" (input_address),
2359 - "m" (hypercall_page));
2360 + THUNK_TARGET(hypercall_page));
2361
2362 return hv_status;
2363
2364 @@ -123,11 +125,12 @@ u64 hv_do_hypercall(u64 control, void *input, void *output)
2365 if (!hypercall_page)
2366 return (u64)ULLONG_MAX;
2367
2368 - __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi),
2369 + __asm__ __volatile__ (CALL_NOSPEC : "=d"(hv_status_hi),
2370 "=a"(hv_status_lo) : "d" (control_hi),
2371 "a" (control_lo), "b" (input_address_hi),
2372 "c" (input_address_lo), "D"(output_address_hi),
2373 - "S"(output_address_lo), "m" (hypercall_page));
2374 + "S"(output_address_lo),
2375 + THUNK_TARGET(hypercall_page));
2376
2377 return hv_status_lo | ((u64)hv_status_hi << 32);
2378 #endif /* !x86_64 */
2379 diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
2380 index b9748970df4a..29ab814693fc 100644
2381 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c
2382 +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
2383 @@ -992,8 +992,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
2384 return -ENOMEM;
2385
2386 attr->qp_state = IB_QPS_INIT;
2387 - attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
2388 - IB_ACCESS_REMOTE_WRITE;
2389 + attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
2390 attr->port_num = ch->sport->port;
2391 attr->pkey_index = 0;
2392
2393 diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
2394 index 7643f72adb1c..3ec647e8b9c6 100644
2395 --- a/drivers/md/dm-bufio.c
2396 +++ b/drivers/md/dm-bufio.c
2397 @@ -1554,7 +1554,8 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
2398 int l;
2399 struct dm_buffer *b, *tmp;
2400 unsigned long freed = 0;
2401 - unsigned long count = nr_to_scan;
2402 + unsigned long count = c->n_buffers[LIST_CLEAN] +
2403 + c->n_buffers[LIST_DIRTY];
2404 unsigned long retain_target = get_retain_buffers(c);
2405
2406 for (l = 0; l < LIST_SIZE; l++) {
2407 @@ -1591,6 +1592,7 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
2408 {
2409 struct dm_bufio_client *c;
2410 unsigned long count;
2411 + unsigned long retain_target;
2412
2413 c = container_of(shrink, struct dm_bufio_client, shrinker);
2414 if (sc->gfp_mask & __GFP_FS)
2415 @@ -1599,8 +1601,9 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
2416 return 0;
2417
2418 count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
2419 + retain_target = get_retain_buffers(c);
2420 dm_bufio_unlock(c);
2421 - return count;
2422 + return (count < retain_target) ? 0 : (count - retain_target);
2423 }
2424
2425 /*
2426 diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
2427 index eea9aea14b00..5d5012337d9e 100644
2428 --- a/drivers/net/can/usb/gs_usb.c
2429 +++ b/drivers/net/can/usb/gs_usb.c
2430 @@ -449,7 +449,7 @@ static int gs_usb_set_bittiming(struct net_device *netdev)
2431 dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)",
2432 rc);
2433
2434 - return rc;
2435 + return (rc > 0) ? 0 : rc;
2436 }
2437
2438 static void gs_usb_xmit_callback(struct urb *urb)
2439 diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
2440 index f3aaca743ea3..8a48656a376b 100644
2441 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
2442 +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
2443 @@ -1364,6 +1364,9 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
2444 * Checks to see of the link status of the hardware has changed. If a
2445 * change in link status has been detected, then we read the PHY registers
2446 * to get the current speed/duplex if link exists.
2447 + *
2448 + * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link
2449 + * up).
2450 **/
2451 static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
2452 {
2453 @@ -1379,7 +1382,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
2454 * Change or Rx Sequence Error interrupt.
2455 */
2456 if (!mac->get_link_status)
2457 - return 0;
2458 + return 1;
2459
2460 /* First we want to see if the MII Status Register reports
2461 * link. If so, then we want to get the current speed/duplex
2462 @@ -1611,10 +1614,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
2463 * different link partner.
2464 */
2465 ret_val = e1000e_config_fc_after_link_up(hw);
2466 - if (ret_val)
2467 + if (ret_val) {
2468 e_dbg("Error configuring flow control\n");
2469 + return ret_val;
2470 + }
2471
2472 - return ret_val;
2473 + return 1;
2474 }
2475
2476 static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
2477 diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
2478 index 9e31a3390154..8aa91ddff287 100644
2479 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
2480 +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
2481 @@ -1328,9 +1328,9 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
2482 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
2483 bool removing)
2484 {
2485 - if (!removing && !nh->should_offload)
2486 + if (!removing)
2487 nh->should_offload = 1;
2488 - else if (removing && nh->offloaded)
2489 + else
2490 nh->should_offload = 0;
2491 nh->update = 1;
2492 }
2493 diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
2494 index 2140dedab712..b6816ae00b7a 100644
2495 --- a/drivers/net/ethernet/renesas/sh_eth.c
2496 +++ b/drivers/net/ethernet/renesas/sh_eth.c
2497 @@ -3087,18 +3087,37 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
2498 /* ioremap the TSU registers */
2499 if (mdp->cd->tsu) {
2500 struct resource *rtsu;
2501 +
2502 rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1);
2503 - mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu);
2504 - if (IS_ERR(mdp->tsu_addr)) {
2505 - ret = PTR_ERR(mdp->tsu_addr);
2506 + if (!rtsu) {
2507 + dev_err(&pdev->dev, "no TSU resource\n");
2508 + ret = -ENODEV;
2509 + goto out_release;
2510 + }
2511 + /* We can only request the TSU region for the first port
2512 + * of the two sharing this TSU for the probe to succeed...
2513 + */
2514 + if (devno % 2 == 0 &&
2515 + !devm_request_mem_region(&pdev->dev, rtsu->start,
2516 + resource_size(rtsu),
2517 + dev_name(&pdev->dev))) {
2518 + dev_err(&pdev->dev, "can't request TSU resource.\n");
2519 + ret = -EBUSY;
2520 + goto out_release;
2521 + }
2522 + mdp->tsu_addr = devm_ioremap(&pdev->dev, rtsu->start,
2523 + resource_size(rtsu));
2524 + if (!mdp->tsu_addr) {
2525 + dev_err(&pdev->dev, "TSU region ioremap() failed.\n");
2526 + ret = -ENOMEM;
2527 goto out_release;
2528 }
2529 mdp->port = devno % 2;
2530 ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER;
2531 }
2532
2533 - /* initialize first or needed device */
2534 - if (!devno || pd->needs_init) {
2535 + /* Need to init only the first port of the two sharing a TSU */
2536 + if (devno % 2 == 0) {
2537 if (mdp->cd->chip_reset)
2538 mdp->cd->chip_reset(ndev);
2539
2540 diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
2541 index adf61a7b1b01..98bbb91336e4 100644
2542 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
2543 +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
2544 @@ -280,8 +280,14 @@ static void stmmac_eee_ctrl_timer(unsigned long arg)
2545 bool stmmac_eee_init(struct stmmac_priv *priv)
2546 {
2547 unsigned long flags;
2548 + int interface = priv->plat->interface;
2549 bool ret = false;
2550
2551 + if ((interface != PHY_INTERFACE_MODE_MII) &&
2552 + (interface != PHY_INTERFACE_MODE_GMII) &&
2553 + !phy_interface_mode_is_rgmii(interface))
2554 + goto out;
2555 +
2556 /* Using PCS we cannot dial with the phy registers at this stage
2557 * so we do not support extra feature like EEE.
2558 */
2559 diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c
2560 index e221bfcee76b..947bea81d924 100644
2561 --- a/drivers/net/usb/cx82310_eth.c
2562 +++ b/drivers/net/usb/cx82310_eth.c
2563 @@ -293,12 +293,9 @@ static struct sk_buff *cx82310_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
2564 {
2565 int len = skb->len;
2566
2567 - if (skb_headroom(skb) < 2) {
2568 - struct sk_buff *skb2 = skb_copy_expand(skb, 2, 0, flags);
2569 + if (skb_cow_head(skb, 2)) {
2570 dev_kfree_skb_any(skb);
2571 - skb = skb2;
2572 - if (!skb)
2573 - return NULL;
2574 + return NULL;
2575 }
2576 skb_push(skb, 2);
2577
2578 diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
2579 index f33460cec79f..9c257ffedb15 100644
2580 --- a/drivers/net/usb/lan78xx.c
2581 +++ b/drivers/net/usb/lan78xx.c
2582 @@ -2419,14 +2419,9 @@ static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev,
2583 {
2584 u32 tx_cmd_a, tx_cmd_b;
2585
2586 - if (skb_headroom(skb) < TX_OVERHEAD) {
2587 - struct sk_buff *skb2;
2588 -
2589 - skb2 = skb_copy_expand(skb, TX_OVERHEAD, 0, flags);
2590 + if (skb_cow_head(skb, TX_OVERHEAD)) {
2591 dev_kfree_skb_any(skb);
2592 - skb = skb2;
2593 - if (!skb)
2594 - return NULL;
2595 + return NULL;
2596 }
2597
2598 if (lan78xx_linearize(skb) < 0)
2599 diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
2600 index 9af9799935db..4cb9b11a545a 100644
2601 --- a/drivers/net/usb/smsc75xx.c
2602 +++ b/drivers/net/usb/smsc75xx.c
2603 @@ -2205,13 +2205,9 @@ static struct sk_buff *smsc75xx_tx_fixup(struct usbnet *dev,
2604 {
2605 u32 tx_cmd_a, tx_cmd_b;
2606
2607 - if (skb_headroom(skb) < SMSC75XX_TX_OVERHEAD) {
2608 - struct sk_buff *skb2 =
2609 - skb_copy_expand(skb, SMSC75XX_TX_OVERHEAD, 0, flags);
2610 + if (skb_cow_head(skb, SMSC75XX_TX_OVERHEAD)) {
2611 dev_kfree_skb_any(skb);
2612 - skb = skb2;
2613 - if (!skb)
2614 - return NULL;
2615 + return NULL;
2616 }
2617
2618 tx_cmd_a = (u32)(skb->len & TX_CMD_A_LEN) | TX_CMD_A_FCS;
2619 diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c
2620 index 4a1e9c489f1f..aadfe1d1c37e 100644
2621 --- a/drivers/net/usb/sr9700.c
2622 +++ b/drivers/net/usb/sr9700.c
2623 @@ -456,14 +456,9 @@ static struct sk_buff *sr9700_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
2624
2625 len = skb->len;
2626
2627 - if (skb_headroom(skb) < SR_TX_OVERHEAD) {
2628 - struct sk_buff *skb2;
2629 -
2630 - skb2 = skb_copy_expand(skb, SR_TX_OVERHEAD, 0, flags);
2631 + if (skb_cow_head(skb, SR_TX_OVERHEAD)) {
2632 dev_kfree_skb_any(skb);
2633 - skb = skb2;
2634 - if (!skb)
2635 - return NULL;
2636 + return NULL;
2637 }
2638
2639 __skb_push(skb, SR_TX_OVERHEAD);
2640 diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
2641 index 0b4c1562420f..ba1fe61e6ea6 100644
2642 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c
2643 +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
2644 @@ -548,6 +548,11 @@ static int ath10k_htt_rx_crypto_param_len(struct ath10k *ar,
2645 return IEEE80211_TKIP_IV_LEN;
2646 case HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2:
2647 return IEEE80211_CCMP_HDR_LEN;
2648 + case HTT_RX_MPDU_ENCRYPT_AES_CCM256_WPA2:
2649 + return IEEE80211_CCMP_256_HDR_LEN;
2650 + case HTT_RX_MPDU_ENCRYPT_AES_GCMP_WPA2:
2651 + case HTT_RX_MPDU_ENCRYPT_AES_GCMP256_WPA2:
2652 + return IEEE80211_GCMP_HDR_LEN;
2653 case HTT_RX_MPDU_ENCRYPT_WEP128:
2654 case HTT_RX_MPDU_ENCRYPT_WAPI:
2655 break;
2656 @@ -573,6 +578,11 @@ static int ath10k_htt_rx_crypto_tail_len(struct ath10k *ar,
2657 return IEEE80211_TKIP_ICV_LEN;
2658 case HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2:
2659 return IEEE80211_CCMP_MIC_LEN;
2660 + case HTT_RX_MPDU_ENCRYPT_AES_CCM256_WPA2:
2661 + return IEEE80211_CCMP_256_MIC_LEN;
2662 + case HTT_RX_MPDU_ENCRYPT_AES_GCMP_WPA2:
2663 + case HTT_RX_MPDU_ENCRYPT_AES_GCMP256_WPA2:
2664 + return IEEE80211_GCMP_MIC_LEN;
2665 case HTT_RX_MPDU_ENCRYPT_WEP128:
2666 case HTT_RX_MPDU_ENCRYPT_WAPI:
2667 break;
2668 @@ -1024,9 +1034,21 @@ static void ath10k_htt_rx_h_undecap_raw(struct ath10k *ar,
2669 hdr = (void *)msdu->data;
2670
2671 /* Tail */
2672 - if (status->flag & RX_FLAG_IV_STRIPPED)
2673 + if (status->flag & RX_FLAG_IV_STRIPPED) {
2674 skb_trim(msdu, msdu->len -
2675 ath10k_htt_rx_crypto_tail_len(ar, enctype));
2676 + } else {
2677 + /* MIC */
2678 + if ((status->flag & RX_FLAG_MIC_STRIPPED) &&
2679 + enctype == HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2)
2680 + skb_trim(msdu, msdu->len - 8);
2681 +
2682 + /* ICV */
2683 + if (status->flag & RX_FLAG_ICV_STRIPPED &&
2684 + enctype != HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2)
2685 + skb_trim(msdu, msdu->len -
2686 + ath10k_htt_rx_crypto_tail_len(ar, enctype));
2687 + }
2688
2689 /* MMIC */
2690 if ((status->flag & RX_FLAG_MMIC_STRIPPED) &&
2691 @@ -1048,7 +1070,8 @@ static void ath10k_htt_rx_h_undecap_raw(struct ath10k *ar,
2692 static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar,
2693 struct sk_buff *msdu,
2694 struct ieee80211_rx_status *status,
2695 - const u8 first_hdr[64])
2696 + const u8 first_hdr[64],
2697 + enum htt_rx_mpdu_encrypt_type enctype)
2698 {
2699 struct ieee80211_hdr *hdr;
2700 struct htt_rx_desc *rxd;
2701 @@ -1056,6 +1079,7 @@ static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar,
2702 u8 da[ETH_ALEN];
2703 u8 sa[ETH_ALEN];
2704 int l3_pad_bytes;
2705 + int bytes_aligned = ar->hw_params.decap_align_bytes;
2706
2707 /* Delivered decapped frame:
2708 * [nwifi 802.11 header] <-- replaced with 802.11 hdr
2709 @@ -1084,6 +1108,14 @@ static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar,
2710 /* push original 802.11 header */
2711 hdr = (struct ieee80211_hdr *)first_hdr;
2712 hdr_len = ieee80211_hdrlen(hdr->frame_control);
2713 +
2714 + if (!(status->flag & RX_FLAG_IV_STRIPPED)) {
2715 + memcpy(skb_push(msdu,
2716 + ath10k_htt_rx_crypto_param_len(ar, enctype)),
2717 + (void *)hdr + round_up(hdr_len, bytes_aligned),
2718 + ath10k_htt_rx_crypto_param_len(ar, enctype));
2719 + }
2720 +
2721 memcpy(skb_push(msdu, hdr_len), hdr, hdr_len);
2722
2723 /* original 802.11 header has a different DA and in
2724 @@ -1144,6 +1176,7 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar,
2725 u8 sa[ETH_ALEN];
2726 int l3_pad_bytes;
2727 struct htt_rx_desc *rxd;
2728 + int bytes_aligned = ar->hw_params.decap_align_bytes;
2729
2730 /* Delivered decapped frame:
2731 * [eth header] <-- replaced with 802.11 hdr & rfc1042/llc
2732 @@ -1172,6 +1205,14 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar,
2733 /* push original 802.11 header */
2734 hdr = (struct ieee80211_hdr *)first_hdr;
2735 hdr_len = ieee80211_hdrlen(hdr->frame_control);
2736 +
2737 + if (!(status->flag & RX_FLAG_IV_STRIPPED)) {
2738 + memcpy(skb_push(msdu,
2739 + ath10k_htt_rx_crypto_param_len(ar, enctype)),
2740 + (void *)hdr + round_up(hdr_len, bytes_aligned),
2741 + ath10k_htt_rx_crypto_param_len(ar, enctype));
2742 + }
2743 +
2744 memcpy(skb_push(msdu, hdr_len), hdr, hdr_len);
2745
2746 /* original 802.11 header has a different DA and in
2747 @@ -1185,12 +1226,14 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar,
2748 static void ath10k_htt_rx_h_undecap_snap(struct ath10k *ar,
2749 struct sk_buff *msdu,
2750 struct ieee80211_rx_status *status,
2751 - const u8 first_hdr[64])
2752 + const u8 first_hdr[64],
2753 + enum htt_rx_mpdu_encrypt_type enctype)
2754 {
2755 struct ieee80211_hdr *hdr;
2756 size_t hdr_len;
2757 int l3_pad_bytes;
2758 struct htt_rx_desc *rxd;
2759 + int bytes_aligned = ar->hw_params.decap_align_bytes;
2760
2761 /* Delivered decapped frame:
2762 * [amsdu header] <-- replaced with 802.11 hdr
2763 @@ -1206,6 +1249,14 @@ static void ath10k_htt_rx_h_undecap_snap(struct ath10k *ar,
2764
2765 hdr = (struct ieee80211_hdr *)first_hdr;
2766 hdr_len = ieee80211_hdrlen(hdr->frame_control);
2767 +
2768 + if (!(status->flag & RX_FLAG_IV_STRIPPED)) {
2769 + memcpy(skb_push(msdu,
2770 + ath10k_htt_rx_crypto_param_len(ar, enctype)),
2771 + (void *)hdr + round_up(hdr_len, bytes_aligned),
2772 + ath10k_htt_rx_crypto_param_len(ar, enctype));
2773 + }
2774 +
2775 memcpy(skb_push(msdu, hdr_len), hdr, hdr_len);
2776 }
2777
2778 @@ -1240,13 +1291,15 @@ static void ath10k_htt_rx_h_undecap(struct ath10k *ar,
2779 is_decrypted);
2780 break;
2781 case RX_MSDU_DECAP_NATIVE_WIFI:
2782 - ath10k_htt_rx_h_undecap_nwifi(ar, msdu, status, first_hdr);
2783 + ath10k_htt_rx_h_undecap_nwifi(ar, msdu, status, first_hdr,
2784 + enctype);
2785 break;
2786 case RX_MSDU_DECAP_ETHERNET2_DIX:
2787 ath10k_htt_rx_h_undecap_eth(ar, msdu, status, first_hdr, enctype);
2788 break;
2789 case RX_MSDU_DECAP_8023_SNAP_LLC:
2790 - ath10k_htt_rx_h_undecap_snap(ar, msdu, status, first_hdr);
2791 + ath10k_htt_rx_h_undecap_snap(ar, msdu, status, first_hdr,
2792 + enctype);
2793 break;
2794 }
2795 }
2796 @@ -1289,7 +1342,8 @@ static void ath10k_htt_rx_h_csum_offload(struct sk_buff *msdu)
2797
2798 static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
2799 struct sk_buff_head *amsdu,
2800 - struct ieee80211_rx_status *status)
2801 + struct ieee80211_rx_status *status,
2802 + bool fill_crypt_header)
2803 {
2804 struct sk_buff *first;
2805 struct sk_buff *last;
2806 @@ -1299,7 +1353,6 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
2807 enum htt_rx_mpdu_encrypt_type enctype;
2808 u8 first_hdr[64];
2809 u8 *qos;
2810 - size_t hdr_len;
2811 bool has_fcs_err;
2812 bool has_crypto_err;
2813 bool has_tkip_err;
2814 @@ -1324,15 +1377,17 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
2815 * decapped header. It'll be used for undecapping of each MSDU.
2816 */
2817 hdr = (void *)rxd->rx_hdr_status;
2818 - hdr_len = ieee80211_hdrlen(hdr->frame_control);
2819 - memcpy(first_hdr, hdr, hdr_len);
2820 + memcpy(first_hdr, hdr, RX_HTT_HDR_STATUS_LEN);
2821
2822 /* Each A-MSDU subframe will use the original header as the base and be
2823 * reported as a separate MSDU so strip the A-MSDU bit from QoS Ctl.
2824 */
2825 hdr = (void *)first_hdr;
2826 - qos = ieee80211_get_qos_ctl(hdr);
2827 - qos[0] &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT;
2828 +
2829 + if (ieee80211_is_data_qos(hdr->frame_control)) {
2830 + qos = ieee80211_get_qos_ctl(hdr);
2831 + qos[0] &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT;
2832 + }
2833
2834 /* Some attention flags are valid only in the last MSDU. */
2835 last = skb_peek_tail(amsdu);
2836 @@ -1379,9 +1434,14 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
2837 status->flag |= RX_FLAG_DECRYPTED;
2838
2839 if (likely(!is_mgmt))
2840 - status->flag |= RX_FLAG_IV_STRIPPED |
2841 - RX_FLAG_MMIC_STRIPPED;
2842 -}
2843 + status->flag |= RX_FLAG_MMIC_STRIPPED;
2844 +
2845 + if (fill_crypt_header)
2846 + status->flag |= RX_FLAG_MIC_STRIPPED |
2847 + RX_FLAG_ICV_STRIPPED;
2848 + else
2849 + status->flag |= RX_FLAG_IV_STRIPPED;
2850 + }
2851
2852 skb_queue_walk(amsdu, msdu) {
2853 ath10k_htt_rx_h_csum_offload(msdu);
2854 @@ -1397,6 +1457,9 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
2855 if (is_mgmt)
2856 continue;
2857
2858 + if (fill_crypt_header)
2859 + continue;
2860 +
2861 hdr = (void *)msdu->data;
2862 hdr->frame_control &= ~__cpu_to_le16(IEEE80211_FCTL_PROTECTED);
2863 }
2864 @@ -1407,6 +1470,9 @@ static void ath10k_htt_rx_h_deliver(struct ath10k *ar,
2865 struct ieee80211_rx_status *status)
2866 {
2867 struct sk_buff *msdu;
2868 + struct sk_buff *first_subframe;
2869 +
2870 + first_subframe = skb_peek(amsdu);
2871
2872 while ((msdu = __skb_dequeue(amsdu))) {
2873 /* Setup per-MSDU flags */
2874 @@ -1415,6 +1481,13 @@ static void ath10k_htt_rx_h_deliver(struct ath10k *ar,
2875 else
2876 status->flag |= RX_FLAG_AMSDU_MORE;
2877
2878 + if (msdu == first_subframe) {
2879 + first_subframe = NULL;
2880 + status->flag &= ~RX_FLAG_ALLOW_SAME_PN;
2881 + } else {
2882 + status->flag |= RX_FLAG_ALLOW_SAME_PN;
2883 + }
2884 +
2885 ath10k_process_rx(ar, status, msdu);
2886 }
2887 }
2888 @@ -1557,7 +1630,7 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt)
2889 ath10k_htt_rx_h_ppdu(ar, &amsdu, rx_status, 0xffff);
2890 ath10k_htt_rx_h_unchain(ar, &amsdu, ret > 0);
2891 ath10k_htt_rx_h_filter(ar, &amsdu, rx_status);
2892 - ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status);
2893 + ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status, true);
2894 ath10k_htt_rx_h_deliver(ar, &amsdu, rx_status);
2895
2896 return num_msdus;
2897 @@ -1892,7 +1965,7 @@ static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
2898 num_msdus += skb_queue_len(&amsdu);
2899 ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id);
2900 ath10k_htt_rx_h_filter(ar, &amsdu, status);
2901 - ath10k_htt_rx_h_mpdu(ar, &amsdu, status);
2902 + ath10k_htt_rx_h_mpdu(ar, &amsdu, status, false);
2903 ath10k_htt_rx_h_deliver(ar, &amsdu, status);
2904 break;
2905 case -EAGAIN:
2906 diff --git a/drivers/net/wireless/ath/ath10k/rx_desc.h b/drivers/net/wireless/ath/ath10k/rx_desc.h
2907 index 034e7a54c5b2..e4878d0044bf 100644
2908 --- a/drivers/net/wireless/ath/ath10k/rx_desc.h
2909 +++ b/drivers/net/wireless/ath/ath10k/rx_desc.h
2910 @@ -239,6 +239,9 @@ enum htt_rx_mpdu_encrypt_type {
2911 HTT_RX_MPDU_ENCRYPT_WAPI = 5,
2912 HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2 = 6,
2913 HTT_RX_MPDU_ENCRYPT_NONE = 7,
2914 + HTT_RX_MPDU_ENCRYPT_AES_CCM256_WPA2 = 8,
2915 + HTT_RX_MPDU_ENCRYPT_AES_GCMP_WPA2 = 9,
2916 + HTT_RX_MPDU_ENCRYPT_AES_GCMP256_WPA2 = 10,
2917 };
2918
2919 #define RX_MPDU_START_INFO0_PEER_IDX_MASK 0x000007ff
2920 diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
2921 index ceeb8c188ef3..00d82e8443bd 100644
2922 --- a/drivers/platform/x86/wmi.c
2923 +++ b/drivers/platform/x86/wmi.c
2924 @@ -848,5 +848,5 @@ static void __exit acpi_wmi_exit(void)
2925 pr_info("Mapper unloaded\n");
2926 }
2927
2928 -subsys_initcall(acpi_wmi_init);
2929 +subsys_initcall_sync(acpi_wmi_init);
2930 module_exit(acpi_wmi_exit);
2931 diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
2932 index 2b770cb0c488..558a66b459fa 100644
2933 --- a/drivers/staging/android/ashmem.c
2934 +++ b/drivers/staging/android/ashmem.c
2935 @@ -774,10 +774,12 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2936 break;
2937 case ASHMEM_SET_SIZE:
2938 ret = -EINVAL;
2939 + mutex_lock(&ashmem_mutex);
2940 if (!asma->file) {
2941 ret = 0;
2942 asma->size = (size_t)arg;
2943 }
2944 + mutex_unlock(&ashmem_mutex);
2945 break;
2946 case ASHMEM_GET_SIZE:
2947 ret = asma->size;
2948 diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
2949 index 72e926d9868f..04d2b6e25503 100644
2950 --- a/drivers/target/iscsi/iscsi_target.c
2951 +++ b/drivers/target/iscsi/iscsi_target.c
2952 @@ -1940,7 +1940,6 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
2953 struct iscsi_tmr_req *tmr_req;
2954 struct iscsi_tm *hdr;
2955 int out_of_order_cmdsn = 0, ret;
2956 - bool sess_ref = false;
2957 u8 function, tcm_function = TMR_UNKNOWN;
2958
2959 hdr = (struct iscsi_tm *) buf;
2960 @@ -1982,18 +1981,17 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
2961 buf);
2962 }
2963
2964 + transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops,
2965 + conn->sess->se_sess, 0, DMA_NONE,
2966 + TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
2967 +
2968 + target_get_sess_cmd(&cmd->se_cmd, true);
2969 +
2970 /*
2971 * TASK_REASSIGN for ERL=2 / connection stays inside of
2972 * LIO-Target $FABRIC_MOD
2973 */
2974 if (function != ISCSI_TM_FUNC_TASK_REASSIGN) {
2975 - transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops,
2976 - conn->sess->se_sess, 0, DMA_NONE,
2977 - TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
2978 -
2979 - target_get_sess_cmd(&cmd->se_cmd, true);
2980 - sess_ref = true;
2981 -
2982 switch (function) {
2983 case ISCSI_TM_FUNC_ABORT_TASK:
2984 tcm_function = TMR_ABORT_TASK;
2985 @@ -2132,12 +2130,8 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
2986 * For connection recovery, this is also the default action for
2987 * TMR TASK_REASSIGN.
2988 */
2989 - if (sess_ref) {
2990 - pr_debug("Handle TMR, using sess_ref=true check\n");
2991 - target_put_sess_cmd(&cmd->se_cmd);
2992 - }
2993 -
2994 iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
2995 + target_put_sess_cmd(&cmd->se_cmd);
2996 return 0;
2997 }
2998 EXPORT_SYMBOL(iscsit_handle_task_mgt_cmd);
2999 diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
3000 index 27dd1e12f246..14bb2db5273c 100644
3001 --- a/drivers/target/target_core_tmr.c
3002 +++ b/drivers/target/target_core_tmr.c
3003 @@ -133,6 +133,15 @@ static bool __target_check_io_state(struct se_cmd *se_cmd,
3004 spin_unlock(&se_cmd->t_state_lock);
3005 return false;
3006 }
3007 + if (se_cmd->transport_state & CMD_T_PRE_EXECUTE) {
3008 + if (se_cmd->scsi_status) {
3009 + pr_debug("Attempted to abort io tag: %llu early failure"
3010 + " status: 0x%02x\n", se_cmd->tag,
3011 + se_cmd->scsi_status);
3012 + spin_unlock(&se_cmd->t_state_lock);
3013 + return false;
3014 + }
3015 + }
3016 if (sess->sess_tearing_down || se_cmd->cmd_wait_set) {
3017 pr_debug("Attempted to abort io tag: %llu already shutdown,"
3018 " skipping\n", se_cmd->tag);
3019 diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
3020 index 4c0782cb1e94..6f3eccf986c7 100644
3021 --- a/drivers/target/target_core_transport.c
3022 +++ b/drivers/target/target_core_transport.c
3023 @@ -1939,6 +1939,7 @@ void target_execute_cmd(struct se_cmd *cmd)
3024 }
3025
3026 cmd->t_state = TRANSPORT_PROCESSING;
3027 + cmd->transport_state &= ~CMD_T_PRE_EXECUTE;
3028 cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT;
3029 spin_unlock_irq(&cmd->t_state_lock);
3030
3031 @@ -2592,6 +2593,7 @@ int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref)
3032 ret = -ESHUTDOWN;
3033 goto out;
3034 }
3035 + se_cmd->transport_state |= CMD_T_PRE_EXECUTE;
3036 list_add_tail(&se_cmd->se_cmd_list, &se_sess->sess_cmd_list);
3037 out:
3038 spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
3039 diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
3040 index 82eea55a7b5c..3b7d69ca83be 100644
3041 --- a/drivers/usb/host/xhci-mem.c
3042 +++ b/drivers/usb/host/xhci-mem.c
3043 @@ -1086,7 +1086,8 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
3044
3045 return 1;
3046 fail:
3047 -
3048 + if (dev->eps[0].ring)
3049 + xhci_ring_free(xhci, dev->eps[0].ring);
3050 if (dev->in_ctx)
3051 xhci_free_container_ctx(xhci, dev->in_ctx);
3052 if (dev->out_ctx)
3053 diff --git a/drivers/usb/misc/usb3503.c b/drivers/usb/misc/usb3503.c
3054 index 8e7737d7ac0a..03be5d574f23 100644
3055 --- a/drivers/usb/misc/usb3503.c
3056 +++ b/drivers/usb/misc/usb3503.c
3057 @@ -292,6 +292,8 @@ static int usb3503_probe(struct usb3503 *hub)
3058 if (gpio_is_valid(hub->gpio_reset)) {
3059 err = devm_gpio_request_one(dev, hub->gpio_reset,
3060 GPIOF_OUT_INIT_LOW, "usb3503 reset");
3061 + /* Datasheet defines a hardware reset to be at least 100us */
3062 + usleep_range(100, 10000);
3063 if (err) {
3064 dev_err(dev,
3065 "unable to request GPIO %d as reset pin (%d)\n",
3066 diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
3067 index 1a874a1f3890..80b37d214beb 100644
3068 --- a/drivers/usb/mon/mon_bin.c
3069 +++ b/drivers/usb/mon/mon_bin.c
3070 @@ -1002,7 +1002,9 @@ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg
3071 break;
3072
3073 case MON_IOCQ_RING_SIZE:
3074 + mutex_lock(&rp->fetch_lock);
3075 ret = rp->b_size;
3076 + mutex_unlock(&rp->fetch_lock);
3077 break;
3078
3079 case MON_IOCT_RING_SIZE:
3080 @@ -1229,12 +1231,16 @@ static int mon_bin_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
3081 unsigned long offset, chunk_idx;
3082 struct page *pageptr;
3083
3084 + mutex_lock(&rp->fetch_lock);
3085 offset = vmf->pgoff << PAGE_SHIFT;
3086 - if (offset >= rp->b_size)
3087 + if (offset >= rp->b_size) {
3088 + mutex_unlock(&rp->fetch_lock);
3089 return VM_FAULT_SIGBUS;
3090 + }
3091 chunk_idx = offset / CHUNK_SIZE;
3092 pageptr = rp->b_vec[chunk_idx].pg;
3093 get_page(pageptr);
3094 + mutex_unlock(&rp->fetch_lock);
3095 vmf->page = pageptr;
3096 return 0;
3097 }
3098 diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
3099 index 11ee55e080e5..3178d8afb3e6 100644
3100 --- a/drivers/usb/serial/cp210x.c
3101 +++ b/drivers/usb/serial/cp210x.c
3102 @@ -121,6 +121,7 @@ static const struct usb_device_id id_table[] = {
3103 { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */
3104 { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
3105 { USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */
3106 + { USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */
3107 { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */
3108 { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */
3109 { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
3110 @@ -171,6 +172,7 @@ static const struct usb_device_id id_table[] = {
3111 { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
3112 { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
3113 { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */
3114 + { USB_DEVICE(0x18EF, 0xE030) }, /* ELV ALC 8xxx Battery Charger */
3115 { USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */
3116 { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */
3117 { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */
3118 diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
3119 index 9f356f7cf7d5..719ec68ae309 100644
3120 --- a/drivers/usb/storage/unusual_uas.h
3121 +++ b/drivers/usb/storage/unusual_uas.h
3122 @@ -156,6 +156,13 @@ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999,
3123 USB_SC_DEVICE, USB_PR_DEVICE, NULL,
3124 US_FL_NO_ATA_1X),
3125
3126 +/* Reported-by: Icenowy Zheng <icenowy@aosc.io> */
3127 +UNUSUAL_DEV(0x2537, 0x1068, 0x0000, 0x9999,
3128 + "Norelsys",
3129 + "NS1068X",
3130 + USB_SC_DEVICE, USB_PR_DEVICE, NULL,
3131 + US_FL_IGNORE_UAS),
3132 +
3133 /* Reported-by: Takeo Nakayama <javhera@gmx.com> */
3134 UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999,
3135 "JMicron",
3136 diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
3137 index e24b24fa0f16..2a5d3180777d 100644
3138 --- a/drivers/usb/usbip/usbip_common.c
3139 +++ b/drivers/usb/usbip/usbip_common.c
3140 @@ -105,7 +105,7 @@ static void usbip_dump_usb_device(struct usb_device *udev)
3141 dev_dbg(dev, " devnum(%d) devpath(%s) usb speed(%s)",
3142 udev->devnum, udev->devpath, usb_speed_string(udev->speed));
3143
3144 - pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport);
3145 + pr_debug("tt hub ttport %d\n", udev->ttport);
3146
3147 dev_dbg(dev, " ");
3148 for (i = 0; i < 16; i++)
3149 @@ -138,12 +138,8 @@ static void usbip_dump_usb_device(struct usb_device *udev)
3150 }
3151 pr_debug("\n");
3152
3153 - dev_dbg(dev, "parent %p, bus %p\n", udev->parent, udev->bus);
3154 -
3155 - dev_dbg(dev,
3156 - "descriptor %p, config %p, actconfig %p, rawdescriptors %p\n",
3157 - &udev->descriptor, udev->config,
3158 - udev->actconfig, udev->rawdescriptors);
3159 + dev_dbg(dev, "parent %s, bus %s\n", dev_name(&udev->parent->dev),
3160 + udev->bus->bus_name);
3161
3162 dev_dbg(dev, "have_langid %d, string_langid %d\n",
3163 udev->have_langid, udev->string_langid);
3164 @@ -251,9 +247,6 @@ void usbip_dump_urb(struct urb *urb)
3165
3166 dev = &urb->dev->dev;
3167
3168 - dev_dbg(dev, " urb :%p\n", urb);
3169 - dev_dbg(dev, " dev :%p\n", urb->dev);
3170 -
3171 usbip_dump_usb_device(urb->dev);
3172
3173 dev_dbg(dev, " pipe :%08x ", urb->pipe);
3174 @@ -262,11 +255,9 @@ void usbip_dump_urb(struct urb *urb)
3175
3176 dev_dbg(dev, " status :%d\n", urb->status);
3177 dev_dbg(dev, " transfer_flags :%08X\n", urb->transfer_flags);
3178 - dev_dbg(dev, " transfer_buffer :%p\n", urb->transfer_buffer);
3179 dev_dbg(dev, " transfer_buffer_length:%d\n",
3180 urb->transfer_buffer_length);
3181 dev_dbg(dev, " actual_length :%d\n", urb->actual_length);
3182 - dev_dbg(dev, " setup_packet :%p\n", urb->setup_packet);
3183
3184 if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL)
3185 usbip_dump_usb_ctrlrequest(
3186 @@ -276,8 +267,6 @@ void usbip_dump_urb(struct urb *urb)
3187 dev_dbg(dev, " number_of_packets :%d\n", urb->number_of_packets);
3188 dev_dbg(dev, " interval :%d\n", urb->interval);
3189 dev_dbg(dev, " error_count :%d\n", urb->error_count);
3190 - dev_dbg(dev, " context :%p\n", urb->context);
3191 - dev_dbg(dev, " complete :%p\n", urb->complete);
3192 }
3193 EXPORT_SYMBOL_GPL(usbip_dump_urb);
3194
3195 diff --git a/drivers/usb/usbip/vudc_rx.c b/drivers/usb/usbip/vudc_rx.c
3196 index e429b59f6f8a..d020e72b3122 100644
3197 --- a/drivers/usb/usbip/vudc_rx.c
3198 +++ b/drivers/usb/usbip/vudc_rx.c
3199 @@ -132,6 +132,25 @@ static int v_recv_cmd_submit(struct vudc *udc,
3200 urb_p->new = 1;
3201 urb_p->seqnum = pdu->base.seqnum;
3202
3203 + if (urb_p->ep->type == USB_ENDPOINT_XFER_ISOC) {
3204 + /* validate packet size and number of packets */
3205 + unsigned int maxp, packets, bytes;
3206 +
3207 + maxp = usb_endpoint_maxp(urb_p->ep->desc);
3208 + maxp *= usb_endpoint_maxp_mult(urb_p->ep->desc);
3209 + bytes = pdu->u.cmd_submit.transfer_buffer_length;
3210 + packets = DIV_ROUND_UP(bytes, maxp);
3211 +
3212 + if (pdu->u.cmd_submit.number_of_packets < 0 ||
3213 + pdu->u.cmd_submit.number_of_packets > packets) {
3214 + dev_err(&udc->gadget.dev,
3215 + "CMD_SUBMIT: isoc invalid num packets %d\n",
3216 + pdu->u.cmd_submit.number_of_packets);
3217 + ret = -EMSGSIZE;
3218 + goto free_urbp;
3219 + }
3220 + }
3221 +
3222 ret = alloc_urb_from_cmd(&urb_p->urb, pdu, urb_p->ep->type);
3223 if (ret) {
3224 usbip_event_add(&udc->ud, VUDC_EVENT_ERROR_MALLOC);
3225 diff --git a/drivers/usb/usbip/vudc_tx.c b/drivers/usb/usbip/vudc_tx.c
3226 index 234661782fa0..3ab4c86486a7 100644
3227 --- a/drivers/usb/usbip/vudc_tx.c
3228 +++ b/drivers/usb/usbip/vudc_tx.c
3229 @@ -97,6 +97,13 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p)
3230 memset(&pdu_header, 0, sizeof(pdu_header));
3231 memset(&msg, 0, sizeof(msg));
3232
3233 + if (urb->actual_length > 0 && !urb->transfer_buffer) {
3234 + dev_err(&udc->gadget.dev,
3235 + "urb: actual_length %d transfer_buffer null\n",
3236 + urb->actual_length);
3237 + return -1;
3238 + }
3239 +
3240 if (urb_p->type == USB_ENDPOINT_XFER_ISOC)
3241 iovnum = 2 + urb->number_of_packets;
3242 else
3243 @@ -112,8 +119,8 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p)
3244
3245 /* 1. setup usbip_header */
3246 setup_ret_submit_pdu(&pdu_header, urb_p);
3247 - usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n",
3248 - pdu_header.base.seqnum, urb);
3249 + usbip_dbg_stub_tx("setup txdata seqnum: %d\n",
3250 + pdu_header.base.seqnum);
3251 usbip_header_correct_endian(&pdu_header, 1);
3252
3253 iov[iovnum].iov_base = &pdu_header;
3254 diff --git a/include/linux/bpf.h b/include/linux/bpf.h
3255 index 97498be2ca2e..75ffd3b2149e 100644
3256 --- a/include/linux/bpf.h
3257 +++ b/include/linux/bpf.h
3258 @@ -43,6 +43,7 @@ struct bpf_map {
3259 u32 max_entries;
3260 u32 map_flags;
3261 u32 pages;
3262 + bool unpriv_array;
3263 struct user_struct *user;
3264 const struct bpf_map_ops *ops;
3265 struct work_struct work;
3266 @@ -189,6 +190,7 @@ struct bpf_prog_aux {
3267 struct bpf_array {
3268 struct bpf_map map;
3269 u32 elem_size;
3270 + u32 index_mask;
3271 /* 'ownership' of prog_array is claimed by the first program that
3272 * is going to use this map or by the first program which FD is stored
3273 * in the map to make sure that all callers and callees have the same
3274 diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
3275 index 4c4e9358c146..070fc49e39e2 100644
3276 --- a/include/linux/bpf_verifier.h
3277 +++ b/include/linux/bpf_verifier.h
3278 @@ -67,7 +67,10 @@ struct bpf_verifier_state_list {
3279 };
3280
3281 struct bpf_insn_aux_data {
3282 - enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
3283 + union {
3284 + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
3285 + struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */
3286 + };
3287 bool seen; /* this insn was processed by the verifier */
3288 };
3289
3290 diff --git a/include/linux/cpu.h b/include/linux/cpu.h
3291 index e571128ad99a..2f475ad89a0d 100644
3292 --- a/include/linux/cpu.h
3293 +++ b/include/linux/cpu.h
3294 @@ -44,6 +44,13 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr);
3295 extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
3296 extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
3297
3298 +extern ssize_t cpu_show_meltdown(struct device *dev,
3299 + struct device_attribute *attr, char *buf);
3300 +extern ssize_t cpu_show_spectre_v1(struct device *dev,
3301 + struct device_attribute *attr, char *buf);
3302 +extern ssize_t cpu_show_spectre_v2(struct device *dev,
3303 + struct device_attribute *attr, char *buf);
3304 +
3305 extern __printf(4, 5)
3306 struct device *cpu_device_create(struct device *parent, void *drvdata,
3307 const struct attribute_group **groups,
3308 diff --git a/include/linux/frame.h b/include/linux/frame.h
3309 index e6baaba3f1ae..d772c61c31da 100644
3310 --- a/include/linux/frame.h
3311 +++ b/include/linux/frame.h
3312 @@ -11,7 +11,7 @@
3313 * For more information, see tools/objtool/Documentation/stack-validation.txt.
3314 */
3315 #define STACK_FRAME_NON_STANDARD(func) \
3316 - static void __used __section(__func_stack_frame_non_standard) \
3317 + static void __used __section(.discard.func_stack_frame_non_standard) \
3318 *__func_stack_frame_non_standard_##func = func
3319
3320 #else /* !CONFIG_STACK_VALIDATION */
3321 diff --git a/include/linux/phy.h b/include/linux/phy.h
3322 index a04d69ab7c34..867110c9d707 100644
3323 --- a/include/linux/phy.h
3324 +++ b/include/linux/phy.h
3325 @@ -683,6 +683,17 @@ static inline bool phy_is_internal(struct phy_device *phydev)
3326 return phydev->is_internal;
3327 }
3328
3329 +/**
3330 + * phy_interface_mode_is_rgmii - Convenience function for testing if a
3331 + * PHY interface mode is RGMII (all variants)
3332 + * @mode: the phy_interface_t enum
3333 + */
3334 +static inline bool phy_interface_mode_is_rgmii(phy_interface_t mode)
3335 +{
3336 + return mode >= PHY_INTERFACE_MODE_RGMII &&
3337 + mode <= PHY_INTERFACE_MODE_RGMII_TXID;
3338 +};
3339 +
3340 /**
3341 * phy_interface_is_rgmii - Convenience function for testing if a PHY interface
3342 * is RGMII (all variants)
3343 diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h
3344 index f2e27e078362..01b3778ba6da 100644
3345 --- a/include/linux/sh_eth.h
3346 +++ b/include/linux/sh_eth.h
3347 @@ -16,7 +16,6 @@ struct sh_eth_plat_data {
3348 unsigned char mac_addr[ETH_ALEN];
3349 unsigned no_ether_link:1;
3350 unsigned ether_link_active_low:1;
3351 - unsigned needs_init:1;
3352 };
3353
3354 #endif
3355 diff --git a/include/net/mac80211.h b/include/net/mac80211.h
3356 index 2c7d876e2a1a..8fd61bc50383 100644
3357 --- a/include/net/mac80211.h
3358 +++ b/include/net/mac80211.h
3359 @@ -1007,7 +1007,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
3360 * @RX_FLAG_DECRYPTED: This frame was decrypted in hardware.
3361 * @RX_FLAG_MMIC_STRIPPED: the Michael MIC is stripped off this frame,
3362 * verification has been done by the hardware.
3363 - * @RX_FLAG_IV_STRIPPED: The IV/ICV are stripped from this frame.
3364 + * @RX_FLAG_IV_STRIPPED: The IV and ICV are stripped from this frame.
3365 * If this flag is set, the stack cannot do any replay detection
3366 * hence the driver or hardware will have to do that.
3367 * @RX_FLAG_PN_VALIDATED: Currently only valid for CCMP/GCMP frames, this
3368 @@ -1078,6 +1078,8 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
3369 * @RX_FLAG_ALLOW_SAME_PN: Allow the same PN as same packet before.
3370 * This is used for AMSDU subframes which can have the same PN as
3371 * the first subframe.
3372 + * @RX_FLAG_ICV_STRIPPED: The ICV is stripped from this frame. CRC checking must
3373 + * be done in the hardware.
3374 */
3375 enum mac80211_rx_flags {
3376 RX_FLAG_MMIC_ERROR = BIT(0),
3377 @@ -1113,6 +1115,7 @@ enum mac80211_rx_flags {
3378 RX_FLAG_RADIOTAP_VENDOR_DATA = BIT(31),
3379 RX_FLAG_MIC_STRIPPED = BIT_ULL(32),
3380 RX_FLAG_ALLOW_SAME_PN = BIT_ULL(33),
3381 + RX_FLAG_ICV_STRIPPED = BIT_ULL(34),
3382 };
3383
3384 #define RX_FLAG_STBC_SHIFT 26
3385 diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
3386 index eb3b23b6ec54..30f99ce4c6ce 100644
3387 --- a/include/target/target_core_base.h
3388 +++ b/include/target/target_core_base.h
3389 @@ -493,6 +493,7 @@ struct se_cmd {
3390 #define CMD_T_BUSY (1 << 9)
3391 #define CMD_T_TAS (1 << 10)
3392 #define CMD_T_FABRIC_STOP (1 << 11)
3393 +#define CMD_T_PRE_EXECUTE (1 << 12)
3394 spinlock_t t_state_lock;
3395 struct kref cmd_kref;
3396 struct completion t_transport_stop_comp;
3397 diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
3398 index 8ade3eb6c640..90fce4d6956a 100644
3399 --- a/include/trace/events/kvm.h
3400 +++ b/include/trace/events/kvm.h
3401 @@ -208,7 +208,7 @@ TRACE_EVENT(kvm_ack_irq,
3402 { KVM_TRACE_MMIO_WRITE, "write" }
3403
3404 TRACE_EVENT(kvm_mmio,
3405 - TP_PROTO(int type, int len, u64 gpa, u64 val),
3406 + TP_PROTO(int type, int len, u64 gpa, void *val),
3407 TP_ARGS(type, len, gpa, val),
3408
3409 TP_STRUCT__entry(
3410 @@ -222,7 +222,10 @@ TRACE_EVENT(kvm_mmio,
3411 __entry->type = type;
3412 __entry->len = len;
3413 __entry->gpa = gpa;
3414 - __entry->val = val;
3415 + __entry->val = 0;
3416 + if (val)
3417 + memcpy(&__entry->val, val,
3418 + min_t(u32, sizeof(__entry->val), len));
3419 ),
3420
3421 TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
3422 diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
3423 index f3721e150d94..9a1e6ed7babc 100644
3424 --- a/kernel/bpf/arraymap.c
3425 +++ b/kernel/bpf/arraymap.c
3426 @@ -46,9 +46,10 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
3427 static struct bpf_map *array_map_alloc(union bpf_attr *attr)
3428 {
3429 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
3430 + u32 elem_size, index_mask, max_entries;
3431 + bool unpriv = !capable(CAP_SYS_ADMIN);
3432 struct bpf_array *array;
3433 - u64 array_size;
3434 - u32 elem_size;
3435 + u64 array_size, mask64;
3436
3437 /* check sanity of attributes */
3438 if (attr->max_entries == 0 || attr->key_size != 4 ||
3439 @@ -63,11 +64,32 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
3440
3441 elem_size = round_up(attr->value_size, 8);
3442
3443 + max_entries = attr->max_entries;
3444 +
3445 + /* On 32 bit archs roundup_pow_of_two() with max_entries that has
3446 + * upper most bit set in u32 space is undefined behavior due to
3447 + * resulting 1U << 32, so do it manually here in u64 space.
3448 + */
3449 + mask64 = fls_long(max_entries - 1);
3450 + mask64 = 1ULL << mask64;
3451 + mask64 -= 1;
3452 +
3453 + index_mask = mask64;
3454 + if (unpriv) {
3455 + /* round up array size to nearest power of 2,
3456 + * since cpu will speculate within index_mask limits
3457 + */
3458 + max_entries = index_mask + 1;
3459 + /* Check for overflows. */
3460 + if (max_entries < attr->max_entries)
3461 + return ERR_PTR(-E2BIG);
3462 + }
3463 +
3464 array_size = sizeof(*array);
3465 if (percpu)
3466 - array_size += (u64) attr->max_entries * sizeof(void *);
3467 + array_size += (u64) max_entries * sizeof(void *);
3468 else
3469 - array_size += (u64) attr->max_entries * elem_size;
3470 + array_size += (u64) max_entries * elem_size;
3471
3472 /* make sure there is no u32 overflow later in round_up() */
3473 if (array_size >= U32_MAX - PAGE_SIZE)
3474 @@ -77,6 +99,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
3475 array = bpf_map_area_alloc(array_size);
3476 if (!array)
3477 return ERR_PTR(-ENOMEM);
3478 + array->index_mask = index_mask;
3479 + array->map.unpriv_array = unpriv;
3480
3481 /* copy mandatory map attributes */
3482 array->map.map_type = attr->map_type;
3483 @@ -110,7 +134,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
3484 if (unlikely(index >= array->map.max_entries))
3485 return NULL;
3486
3487 - return array->value + array->elem_size * index;
3488 + return array->value + array->elem_size * (index & array->index_mask);
3489 }
3490
3491 /* Called from eBPF program */
3492 @@ -122,7 +146,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
3493 if (unlikely(index >= array->map.max_entries))
3494 return NULL;
3495
3496 - return this_cpu_ptr(array->pptrs[index]);
3497 + return this_cpu_ptr(array->pptrs[index & array->index_mask]);
3498 }
3499
3500 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
3501 @@ -142,7 +166,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
3502 */
3503 size = round_up(map->value_size, 8);
3504 rcu_read_lock();
3505 - pptr = array->pptrs[index];
3506 + pptr = array->pptrs[index & array->index_mask];
3507 for_each_possible_cpu(cpu) {
3508 bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
3509 off += size;
3510 @@ -190,10 +214,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
3511 return -EEXIST;
3512
3513 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
3514 - memcpy(this_cpu_ptr(array->pptrs[index]),
3515 + memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
3516 value, map->value_size);
3517 else
3518 - memcpy(array->value + array->elem_size * index,
3519 + memcpy(array->value +
3520 + array->elem_size * (index & array->index_mask),
3521 value, map->value_size);
3522 return 0;
3523 }
3524 @@ -227,7 +252,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
3525 */
3526 size = round_up(map->value_size, 8);
3527 rcu_read_lock();
3528 - pptr = array->pptrs[index];
3529 + pptr = array->pptrs[index & array->index_mask];
3530 for_each_possible_cpu(cpu) {
3531 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
3532 off += size;
3533 diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
3534 index 72ea91df71c9..91a2d3752007 100644
3535 --- a/kernel/bpf/syscall.c
3536 +++ b/kernel/bpf/syscall.c
3537 @@ -565,57 +565,6 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl)
3538 list_add(&tl->list_node, &bpf_prog_types);
3539 }
3540
3541 -/* fixup insn->imm field of bpf_call instructions:
3542 - * if (insn->imm == BPF_FUNC_map_lookup_elem)
3543 - * insn->imm = bpf_map_lookup_elem - __bpf_call_base;
3544 - * else if (insn->imm == BPF_FUNC_map_update_elem)
3545 - * insn->imm = bpf_map_update_elem - __bpf_call_base;
3546 - * else ...
3547 - *
3548 - * this function is called after eBPF program passed verification
3549 - */
3550 -static void fixup_bpf_calls(struct bpf_prog *prog)
3551 -{
3552 - const struct bpf_func_proto *fn;
3553 - int i;
3554 -
3555 - for (i = 0; i < prog->len; i++) {
3556 - struct bpf_insn *insn = &prog->insnsi[i];
3557 -
3558 - if (insn->code == (BPF_JMP | BPF_CALL)) {
3559 - /* we reach here when program has bpf_call instructions
3560 - * and it passed bpf_check(), means that
3561 - * ops->get_func_proto must have been supplied, check it
3562 - */
3563 - BUG_ON(!prog->aux->ops->get_func_proto);
3564 -
3565 - if (insn->imm == BPF_FUNC_get_route_realm)
3566 - prog->dst_needed = 1;
3567 - if (insn->imm == BPF_FUNC_get_prandom_u32)
3568 - bpf_user_rnd_init_once();
3569 - if (insn->imm == BPF_FUNC_tail_call) {
3570 - /* mark bpf_tail_call as different opcode
3571 - * to avoid conditional branch in
3572 - * interpeter for every normal call
3573 - * and to prevent accidental JITing by
3574 - * JIT compiler that doesn't support
3575 - * bpf_tail_call yet
3576 - */
3577 - insn->imm = 0;
3578 - insn->code |= BPF_X;
3579 - continue;
3580 - }
3581 -
3582 - fn = prog->aux->ops->get_func_proto(insn->imm);
3583 - /* all functions that have prototype and verifier allowed
3584 - * programs to call them, must be real in-kernel functions
3585 - */
3586 - BUG_ON(!fn->func);
3587 - insn->imm = fn->func - __bpf_call_base;
3588 - }
3589 - }
3590 -}
3591 -
3592 /* drop refcnt on maps used by eBPF program and free auxilary data */
3593 static void free_used_maps(struct bpf_prog_aux *aux)
3594 {
3595 @@ -808,9 +757,6 @@ static int bpf_prog_load(union bpf_attr *attr)
3596 if (err < 0)
3597 goto free_used_maps;
3598
3599 - /* fixup BPF_CALL->imm field */
3600 - fixup_bpf_calls(prog);
3601 -
3602 /* eBPF program is ready to be JITed */
3603 prog = bpf_prog_select_runtime(prog, &err);
3604 if (err < 0)
3605 diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
3606 index d7eeebfafe8d..19c44cf59bb2 100644
3607 --- a/kernel/bpf/verifier.c
3608 +++ b/kernel/bpf/verifier.c
3609 @@ -1187,7 +1187,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
3610 }
3611 }
3612
3613 -static int check_call(struct bpf_verifier_env *env, int func_id)
3614 +static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
3615 {
3616 struct bpf_verifier_state *state = &env->cur_state;
3617 const struct bpf_func_proto *fn = NULL;
3618 @@ -1238,6 +1238,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id)
3619 err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
3620 if (err)
3621 return err;
3622 + if (func_id == BPF_FUNC_tail_call) {
3623 + if (meta.map_ptr == NULL) {
3624 + verbose("verifier bug\n");
3625 + return -EINVAL;
3626 + }
3627 + env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
3628 + }
3629 err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
3630 if (err)
3631 return err;
3632 @@ -3019,7 +3026,7 @@ static int do_check(struct bpf_verifier_env *env)
3633 return -EINVAL;
3634 }
3635
3636 - err = check_call(env, insn->imm);
3637 + err = check_call(env, insn->imm, insn_idx);
3638 if (err)
3639 return err;
3640
3641 @@ -3362,6 +3369,81 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
3642 return 0;
3643 }
3644
3645 +/* fixup insn->imm field of bpf_call instructions
3646 + *
3647 + * this function is called after eBPF program passed verification
3648 + */
3649 +static int fixup_bpf_calls(struct bpf_verifier_env *env)
3650 +{
3651 + struct bpf_prog *prog = env->prog;
3652 + struct bpf_insn *insn = prog->insnsi;
3653 + const struct bpf_func_proto *fn;
3654 + const int insn_cnt = prog->len;
3655 + struct bpf_insn insn_buf[16];
3656 + struct bpf_prog *new_prog;
3657 + struct bpf_map *map_ptr;
3658 + int i, cnt, delta = 0;
3659 +
3660 +
3661 + for (i = 0; i < insn_cnt; i++, insn++) {
3662 + if (insn->code != (BPF_JMP | BPF_CALL))
3663 + continue;
3664 +
3665 + if (insn->imm == BPF_FUNC_get_route_realm)
3666 + prog->dst_needed = 1;
3667 + if (insn->imm == BPF_FUNC_get_prandom_u32)
3668 + bpf_user_rnd_init_once();
3669 + if (insn->imm == BPF_FUNC_tail_call) {
3670 + /* mark bpf_tail_call as different opcode to avoid
3671 + * conditional branch in the interpeter for every normal
3672 + * call and to prevent accidental JITing by JIT compiler
3673 + * that doesn't support bpf_tail_call yet
3674 + */
3675 + insn->imm = 0;
3676 + insn->code |= BPF_X;
3677 +
3678 + /* instead of changing every JIT dealing with tail_call
3679 + * emit two extra insns:
3680 + * if (index >= max_entries) goto out;
3681 + * index &= array->index_mask;
3682 + * to avoid out-of-bounds cpu speculation
3683 + */
3684 + map_ptr = env->insn_aux_data[i + delta].map_ptr;
3685 + if (!map_ptr->unpriv_array)
3686 + continue;
3687 + insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
3688 + map_ptr->max_entries, 2);
3689 + insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
3690 + container_of(map_ptr,
3691 + struct bpf_array,
3692 + map)->index_mask);
3693 + insn_buf[2] = *insn;
3694 + cnt = 3;
3695 + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
3696 + if (!new_prog)
3697 + return -ENOMEM;
3698 +
3699 + delta += cnt - 1;
3700 + env->prog = prog = new_prog;
3701 + insn = new_prog->insnsi + i + delta;
3702 + continue;
3703 + }
3704 +
3705 + fn = prog->aux->ops->get_func_proto(insn->imm);
3706 + /* all functions that have prototype and verifier allowed
3707 + * programs to call them, must be real in-kernel functions
3708 + */
3709 + if (!fn->func) {
3710 + verbose("kernel subsystem misconfigured func %d\n",
3711 + insn->imm);
3712 + return -EFAULT;
3713 + }
3714 + insn->imm = fn->func - __bpf_call_base;
3715 + }
3716 +
3717 + return 0;
3718 +}
3719 +
3720 static void free_states(struct bpf_verifier_env *env)
3721 {
3722 struct bpf_verifier_state_list *sl, *sln;
3723 @@ -3463,6 +3545,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
3724 /* program is valid, convert *(u32*)(ctx + off) accesses */
3725 ret = convert_ctx_accesses(env);
3726
3727 + if (ret == 0)
3728 + ret = fixup_bpf_calls(env);
3729 +
3730 if (log_level && log_len >= log_size - 1) {
3731 BUG_ON(log_len >= log_size);
3732 /* verifier log exceeded user supplied buffer */
3733 diff --git a/mm/zswap.c b/mm/zswap.c
3734 index dbef27822a98..ded051e3433d 100644
3735 --- a/mm/zswap.c
3736 +++ b/mm/zswap.c
3737 @@ -752,18 +752,22 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp,
3738 pool = zswap_pool_find_get(type, compressor);
3739 if (pool) {
3740 zswap_pool_debug("using existing", pool);
3741 + WARN_ON(pool == zswap_pool_current());
3742 list_del_rcu(&pool->list);
3743 - } else {
3744 - spin_unlock(&zswap_pools_lock);
3745 - pool = zswap_pool_create(type, compressor);
3746 - spin_lock(&zswap_pools_lock);
3747 }
3748
3749 + spin_unlock(&zswap_pools_lock);
3750 +
3751 + if (!pool)
3752 + pool = zswap_pool_create(type, compressor);
3753 +
3754 if (pool)
3755 ret = param_set_charp(s, kp);
3756 else
3757 ret = -EINVAL;
3758
3759 + spin_lock(&zswap_pools_lock);
3760 +
3761 if (!ret) {
3762 put_pool = zswap_pool_current();
3763 list_add_rcu(&pool->list, &zswap_pools);
3764 diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
3765 index 4a47074d1d7f..c8ea3cf9db85 100644
3766 --- a/net/8021q/vlan.c
3767 +++ b/net/8021q/vlan.c
3768 @@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
3769 vlan_gvrp_uninit_applicant(real_dev);
3770 }
3771
3772 - /* Take it out of our own structures, but be sure to interlock with
3773 - * HW accelerating devices or SW vlan input packet processing if
3774 - * VLAN is not 0 (leave it there for 802.1p).
3775 - */
3776 - if (vlan_id)
3777 - vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
3778 + vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
3779
3780 /* Get rid of the vlan's reference to real_dev */
3781 dev_put(real_dev);
3782 diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
3783 index ffd09c1675d4..2bbca23a9d05 100644
3784 --- a/net/bluetooth/l2cap_core.c
3785 +++ b/net/bluetooth/l2cap_core.c
3786 @@ -3353,9 +3353,10 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data
3787 break;
3788
3789 case L2CAP_CONF_EFS:
3790 - remote_efs = 1;
3791 - if (olen == sizeof(efs))
3792 + if (olen == sizeof(efs)) {
3793 + remote_efs = 1;
3794 memcpy(&efs, (void *) val, olen);
3795 + }
3796 break;
3797
3798 case L2CAP_CONF_EWS:
3799 @@ -3574,16 +3575,17 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
3800 break;
3801
3802 case L2CAP_CONF_EFS:
3803 - if (olen == sizeof(efs))
3804 + if (olen == sizeof(efs)) {
3805 memcpy(&efs, (void *)val, olen);
3806
3807 - if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
3808 - efs.stype != L2CAP_SERV_NOTRAFIC &&
3809 - efs.stype != chan->local_stype)
3810 - return -ECONNREFUSED;
3811 + if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
3812 + efs.stype != L2CAP_SERV_NOTRAFIC &&
3813 + efs.stype != chan->local_stype)
3814 + return -ECONNREFUSED;
3815
3816 - l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
3817 - (unsigned long) &efs, endptr - ptr);
3818 + l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
3819 + (unsigned long) &efs, endptr - ptr);
3820 + }
3821 break;
3822
3823 case L2CAP_CONF_FCS:
3824 diff --git a/net/core/ethtool.c b/net/core/ethtool.c
3825 index e9989b835a66..7913771ec474 100644
3826 --- a/net/core/ethtool.c
3827 +++ b/net/core/ethtool.c
3828 @@ -742,15 +742,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev,
3829 return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
3830 }
3831
3832 -static void
3833 -warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
3834 -{
3835 - char name[sizeof(current->comm)];
3836 -
3837 - pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n",
3838 - get_task_comm(name, current), details);
3839 -}
3840 -
3841 /* Query device for its ethtool_cmd settings.
3842 *
3843 * Backward compatibility note: for compatibility with legacy ethtool,
3844 @@ -777,10 +768,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
3845 &link_ksettings);
3846 if (err < 0)
3847 return err;
3848 - if (!convert_link_ksettings_to_legacy_settings(&cmd,
3849 - &link_ksettings))
3850 - warn_incomplete_ethtool_legacy_settings_conversion(
3851 - "link modes are only partially reported");
3852 + convert_link_ksettings_to_legacy_settings(&cmd,
3853 + &link_ksettings);
3854
3855 /* send a sensible cmd tag back to user */
3856 cmd.cmd = ETHTOOL_GSET;
3857 diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
3858 index 6b10573cc9fa..d1d9faf3046b 100644
3859 --- a/net/core/sock_diag.c
3860 +++ b/net/core/sock_diag.c
3861 @@ -295,7 +295,7 @@ static int sock_diag_bind(struct net *net, int group)
3862 case SKNLGRP_INET6_UDP_DESTROY:
3863 if (!sock_diag_handlers[AF_INET6])
3864 request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
3865 - NETLINK_SOCK_DIAG, AF_INET);
3866 + NETLINK_SOCK_DIAG, AF_INET6);
3867 break;
3868 }
3869 return 0;
3870 diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
3871 index 506efba33a89..388584b8ff31 100644
3872 --- a/net/ipv6/ip6_output.c
3873 +++ b/net/ipv6/ip6_output.c
3874 @@ -1800,9 +1800,10 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
3875 cork.base.opt = NULL;
3876 v6_cork.opt = NULL;
3877 err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
3878 - if (err)
3879 + if (err) {
3880 + ip6_cork_release(&cork, &v6_cork);
3881 return ERR_PTR(err);
3882 -
3883 + }
3884 if (ipc6->dontfrag < 0)
3885 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
3886
3887 diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
3888 index 11d22d642488..131e6aa954bc 100644
3889 --- a/net/ipv6/ip6_tunnel.c
3890 +++ b/net/ipv6/ip6_tunnel.c
3891 @@ -1080,10 +1080,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
3892 memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
3893 neigh_release(neigh);
3894 }
3895 - } else if (!(t->parms.flags &
3896 - (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
3897 - /* enable the cache only only if the routing decision does
3898 - * not depend on the current inner header value
3899 + } else if (t->parms.proto != 0 && !(t->parms.flags &
3900 + (IP6_TNL_F_USE_ORIG_TCLASS |
3901 + IP6_TNL_F_USE_ORIG_FWMARK))) {
3902 + /* enable the cache only if neither the outer protocol nor the
3903 + * routing decision depends on the current inner header value
3904 */
3905 use_cache = true;
3906 }
3907 diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
3908 index efa3f48f1ec5..73e8f347802e 100644
3909 --- a/net/mac80211/wep.c
3910 +++ b/net/mac80211/wep.c
3911 @@ -293,7 +293,8 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx)
3912 return RX_DROP_UNUSABLE;
3913 ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key);
3914 /* remove ICV */
3915 - if (pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN))
3916 + if (!(status->flag & RX_FLAG_ICV_STRIPPED) &&
3917 + pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN))
3918 return RX_DROP_UNUSABLE;
3919 }
3920
3921 diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
3922 index 5c71d60f3a64..caa5986cb2e4 100644
3923 --- a/net/mac80211/wpa.c
3924 +++ b/net/mac80211/wpa.c
3925 @@ -295,7 +295,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
3926 return RX_DROP_UNUSABLE;
3927
3928 /* Trim ICV */
3929 - skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN);
3930 + if (!(status->flag & RX_FLAG_ICV_STRIPPED))
3931 + skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN);
3932
3933 /* Remove IV */
3934 memmove(skb->data + IEEE80211_TKIP_IV_LEN, skb->data, hdrlen);
3935 diff --git a/net/rds/rdma.c b/net/rds/rdma.c
3936 index de8496e60735..f6027f41cd34 100644
3937 --- a/net/rds/rdma.c
3938 +++ b/net/rds/rdma.c
3939 @@ -524,6 +524,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args)
3940
3941 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
3942
3943 + if (args->nr_local == 0)
3944 + return -EINVAL;
3945 +
3946 /* figure out the number of pages in the vector */
3947 for (i = 0; i < args->nr_local; i++) {
3948 if (copy_from_user(&vec, &local_vec[i],
3949 @@ -873,6 +876,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
3950 err:
3951 if (page)
3952 put_page(page);
3953 + rm->atomic.op_active = 0;
3954 kfree(rm->atomic.op_notifier);
3955
3956 return ret;
3957 diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
3958 index e0aa30f83c6c..9617b42aaf20 100644
3959 --- a/net/sched/act_gact.c
3960 +++ b/net/sched/act_gact.c
3961 @@ -161,7 +161,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
3962 if (action == TC_ACT_SHOT)
3963 this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
3964
3965 - tm->lastuse = lastuse;
3966 + tm->lastuse = max_t(u64, tm->lastuse, lastuse);
3967 }
3968
3969 static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
3970 diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
3971 index 6b07fba5770b..fc3650b06192 100644
3972 --- a/net/sched/act_mirred.c
3973 +++ b/net/sched/act_mirred.c
3974 @@ -211,7 +211,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
3975 struct tcf_t *tm = &m->tcf_tm;
3976
3977 _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
3978 - tm->lastuse = lastuse;
3979 + tm->lastuse = max_t(u64, tm->lastuse, lastuse);
3980 }
3981
3982 static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
3983 diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
3984 index bd8349759095..845eb9b800f3 100644
3985 --- a/scripts/mod/modpost.c
3986 +++ b/scripts/mod/modpost.c
3987 @@ -838,6 +838,7 @@ static const char *const section_white_list[] =
3988 ".cmem*", /* EZchip */
3989 ".fmt_slot*", /* EZchip */
3990 ".gnu.lto*",
3991 + ".discard.*",
3992 NULL
3993 };
3994
3995 diff --git a/scripts/module-common.lds b/scripts/module-common.lds
3996 index 53234e85192a..9b6e246a45d0 100644
3997 --- a/scripts/module-common.lds
3998 +++ b/scripts/module-common.lds
3999 @@ -4,7 +4,10 @@
4000 * combine them automatically.
4001 */
4002 SECTIONS {
4003 - /DISCARD/ : { *(.discard) }
4004 + /DISCARD/ : {
4005 + *(.discard)
4006 + *(.discard.*)
4007 + }
4008
4009 __ksymtab 0 : { *(SORT(___ksymtab+*)) }
4010 __ksymtab_gpl 0 : { *(SORT(___ksymtab_gpl+*)) }
4011 diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
4012 index ebc9fdfe64df..3321348fd86b 100644
4013 --- a/sound/core/oss/pcm_oss.c
4014 +++ b/sound/core/oss/pcm_oss.c
4015 @@ -466,7 +466,6 @@ static int snd_pcm_hw_param_near(struct snd_pcm_substream *pcm,
4016 v = snd_pcm_hw_param_last(pcm, params, var, dir);
4017 else
4018 v = snd_pcm_hw_param_first(pcm, params, var, dir);
4019 - snd_BUG_ON(v < 0);
4020 return v;
4021 }
4022
4023 @@ -1370,8 +1369,11 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha
4024
4025 if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
4026 return tmp;
4027 - mutex_lock(&runtime->oss.params_lock);
4028 while (bytes > 0) {
4029 + if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
4030 + tmp = -ERESTARTSYS;
4031 + break;
4032 + }
4033 if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
4034 tmp = bytes;
4035 if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes)
4036 @@ -1415,14 +1417,18 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha
4037 xfer += tmp;
4038 if ((substream->f_flags & O_NONBLOCK) != 0 &&
4039 tmp != runtime->oss.period_bytes)
4040 - break;
4041 + tmp = -EAGAIN;
4042 }
4043 - }
4044 - mutex_unlock(&runtime->oss.params_lock);
4045 - return xfer;
4046 -
4047 err:
4048 - mutex_unlock(&runtime->oss.params_lock);
4049 + mutex_unlock(&runtime->oss.params_lock);
4050 + if (tmp < 0)
4051 + break;
4052 + if (signal_pending(current)) {
4053 + tmp = -ERESTARTSYS;
4054 + break;
4055 + }
4056 + tmp = 0;
4057 + }
4058 return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
4059 }
4060
4061 @@ -1470,8 +1476,11 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use
4062
4063 if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
4064 return tmp;
4065 - mutex_lock(&runtime->oss.params_lock);
4066 while (bytes > 0) {
4067 + if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
4068 + tmp = -ERESTARTSYS;
4069 + break;
4070 + }
4071 if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
4072 if (runtime->oss.buffer_used == 0) {
4073 tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1);
4074 @@ -1502,12 +1511,16 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use
4075 bytes -= tmp;
4076 xfer += tmp;
4077 }
4078 - }
4079 - mutex_unlock(&runtime->oss.params_lock);
4080 - return xfer;
4081 -
4082 err:
4083 - mutex_unlock(&runtime->oss.params_lock);
4084 + mutex_unlock(&runtime->oss.params_lock);
4085 + if (tmp < 0)
4086 + break;
4087 + if (signal_pending(current)) {
4088 + tmp = -ERESTARTSYS;
4089 + break;
4090 + }
4091 + tmp = 0;
4092 + }
4093 return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
4094 }
4095
4096 diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c
4097 index 727ac44d39f4..a84a1d3d23e5 100644
4098 --- a/sound/core/oss/pcm_plugin.c
4099 +++ b/sound/core/oss/pcm_plugin.c
4100 @@ -591,18 +591,26 @@ snd_pcm_sframes_t snd_pcm_plug_write_transfer(struct snd_pcm_substream *plug, st
4101 snd_pcm_sframes_t frames = size;
4102
4103 plugin = snd_pcm_plug_first(plug);
4104 - while (plugin && frames > 0) {
4105 + while (plugin) {
4106 + if (frames <= 0)
4107 + return frames;
4108 if ((next = plugin->next) != NULL) {
4109 snd_pcm_sframes_t frames1 = frames;
4110 - if (plugin->dst_frames)
4111 + if (plugin->dst_frames) {
4112 frames1 = plugin->dst_frames(plugin, frames);
4113 + if (frames1 <= 0)
4114 + return frames1;
4115 + }
4116 if ((err = next->client_channels(next, frames1, &dst_channels)) < 0) {
4117 return err;
4118 }
4119 if (err != frames1) {
4120 frames = err;
4121 - if (plugin->src_frames)
4122 + if (plugin->src_frames) {
4123 frames = plugin->src_frames(plugin, frames1);
4124 + if (frames <= 0)
4125 + return frames;
4126 + }
4127 }
4128 } else
4129 dst_channels = NULL;
4130 diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
4131 index c80d80e312e3..e685e779a4b8 100644
4132 --- a/sound/core/pcm_lib.c
4133 +++ b/sound/core/pcm_lib.c
4134 @@ -1664,7 +1664,7 @@ int snd_pcm_hw_param_first(struct snd_pcm_substream *pcm,
4135 return changed;
4136 if (params->rmask) {
4137 int err = snd_pcm_hw_refine(pcm, params);
4138 - if (snd_BUG_ON(err < 0))
4139 + if (err < 0)
4140 return err;
4141 }
4142 return snd_pcm_hw_param_value(params, var, dir);
4143 @@ -1711,7 +1711,7 @@ int snd_pcm_hw_param_last(struct snd_pcm_substream *pcm,
4144 return changed;
4145 if (params->rmask) {
4146 int err = snd_pcm_hw_refine(pcm, params);
4147 - if (snd_BUG_ON(err < 0))
4148 + if (err < 0)
4149 return err;
4150 }
4151 return snd_pcm_hw_param_value(params, var, dir);
4152 diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c
4153 index 54f348a4fb78..cbd20cb8ca11 100644
4154 --- a/sound/drivers/aloop.c
4155 +++ b/sound/drivers/aloop.c
4156 @@ -39,6 +39,7 @@
4157 #include <sound/core.h>
4158 #include <sound/control.h>
4159 #include <sound/pcm.h>
4160 +#include <sound/pcm_params.h>
4161 #include <sound/info.h>
4162 #include <sound/initval.h>
4163
4164 @@ -305,19 +306,6 @@ static int loopback_trigger(struct snd_pcm_substream *substream, int cmd)
4165 return 0;
4166 }
4167
4168 -static void params_change_substream(struct loopback_pcm *dpcm,
4169 - struct snd_pcm_runtime *runtime)
4170 -{
4171 - struct snd_pcm_runtime *dst_runtime;
4172 -
4173 - if (dpcm == NULL || dpcm->substream == NULL)
4174 - return;
4175 - dst_runtime = dpcm->substream->runtime;
4176 - if (dst_runtime == NULL)
4177 - return;
4178 - dst_runtime->hw = dpcm->cable->hw;
4179 -}
4180 -
4181 static void params_change(struct snd_pcm_substream *substream)
4182 {
4183 struct snd_pcm_runtime *runtime = substream->runtime;
4184 @@ -329,10 +317,6 @@ static void params_change(struct snd_pcm_substream *substream)
4185 cable->hw.rate_max = runtime->rate;
4186 cable->hw.channels_min = runtime->channels;
4187 cable->hw.channels_max = runtime->channels;
4188 - params_change_substream(cable->streams[SNDRV_PCM_STREAM_PLAYBACK],
4189 - runtime);
4190 - params_change_substream(cable->streams[SNDRV_PCM_STREAM_CAPTURE],
4191 - runtime);
4192 }
4193
4194 static int loopback_prepare(struct snd_pcm_substream *substream)
4195 @@ -620,26 +604,29 @@ static unsigned int get_cable_index(struct snd_pcm_substream *substream)
4196 static int rule_format(struct snd_pcm_hw_params *params,
4197 struct snd_pcm_hw_rule *rule)
4198 {
4199 + struct loopback_pcm *dpcm = rule->private;
4200 + struct loopback_cable *cable = dpcm->cable;
4201 + struct snd_mask m;
4202
4203 - struct snd_pcm_hardware *hw = rule->private;
4204 - struct snd_mask *maskp = hw_param_mask(params, rule->var);
4205 -
4206 - maskp->bits[0] &= (u_int32_t)hw->formats;
4207 - maskp->bits[1] &= (u_int32_t)(hw->formats >> 32);
4208 - memset(maskp->bits + 2, 0, (SNDRV_MASK_MAX-64) / 8); /* clear rest */
4209 - if (! maskp->bits[0] && ! maskp->bits[1])
4210 - return -EINVAL;
4211 - return 0;
4212 + snd_mask_none(&m);
4213 + mutex_lock(&dpcm->loopback->cable_lock);
4214 + m.bits[0] = (u_int32_t)cable->hw.formats;
4215 + m.bits[1] = (u_int32_t)(cable->hw.formats >> 32);
4216 + mutex_unlock(&dpcm->loopback->cable_lock);
4217 + return snd_mask_refine(hw_param_mask(params, rule->var), &m);
4218 }
4219
4220 static int rule_rate(struct snd_pcm_hw_params *params,
4221 struct snd_pcm_hw_rule *rule)
4222 {
4223 - struct snd_pcm_hardware *hw = rule->private;
4224 + struct loopback_pcm *dpcm = rule->private;
4225 + struct loopback_cable *cable = dpcm->cable;
4226 struct snd_interval t;
4227
4228 - t.min = hw->rate_min;
4229 - t.max = hw->rate_max;
4230 + mutex_lock(&dpcm->loopback->cable_lock);
4231 + t.min = cable->hw.rate_min;
4232 + t.max = cable->hw.rate_max;
4233 + mutex_unlock(&dpcm->loopback->cable_lock);
4234 t.openmin = t.openmax = 0;
4235 t.integer = 0;
4236 return snd_interval_refine(hw_param_interval(params, rule->var), &t);
4237 @@ -648,22 +635,44 @@ static int rule_rate(struct snd_pcm_hw_params *params,
4238 static int rule_channels(struct snd_pcm_hw_params *params,
4239 struct snd_pcm_hw_rule *rule)
4240 {
4241 - struct snd_pcm_hardware *hw = rule->private;
4242 + struct loopback_pcm *dpcm = rule->private;
4243 + struct loopback_cable *cable = dpcm->cable;
4244 struct snd_interval t;
4245
4246 - t.min = hw->channels_min;
4247 - t.max = hw->channels_max;
4248 + mutex_lock(&dpcm->loopback->cable_lock);
4249 + t.min = cable->hw.channels_min;
4250 + t.max = cable->hw.channels_max;
4251 + mutex_unlock(&dpcm->loopback->cable_lock);
4252 t.openmin = t.openmax = 0;
4253 t.integer = 0;
4254 return snd_interval_refine(hw_param_interval(params, rule->var), &t);
4255 }
4256
4257 +static void free_cable(struct snd_pcm_substream *substream)
4258 +{
4259 + struct loopback *loopback = substream->private_data;
4260 + int dev = get_cable_index(substream);
4261 + struct loopback_cable *cable;
4262 +
4263 + cable = loopback->cables[substream->number][dev];
4264 + if (!cable)
4265 + return;
4266 + if (cable->streams[!substream->stream]) {
4267 + /* other stream is still alive */
4268 + cable->streams[substream->stream] = NULL;
4269 + } else {
4270 + /* free the cable */
4271 + loopback->cables[substream->number][dev] = NULL;
4272 + kfree(cable);
4273 + }
4274 +}
4275 +
4276 static int loopback_open(struct snd_pcm_substream *substream)
4277 {
4278 struct snd_pcm_runtime *runtime = substream->runtime;
4279 struct loopback *loopback = substream->private_data;
4280 struct loopback_pcm *dpcm;
4281 - struct loopback_cable *cable;
4282 + struct loopback_cable *cable = NULL;
4283 int err = 0;
4284 int dev = get_cable_index(substream);
4285
4286 @@ -682,7 +691,6 @@ static int loopback_open(struct snd_pcm_substream *substream)
4287 if (!cable) {
4288 cable = kzalloc(sizeof(*cable), GFP_KERNEL);
4289 if (!cable) {
4290 - kfree(dpcm);
4291 err = -ENOMEM;
4292 goto unlock;
4293 }
4294 @@ -700,19 +708,19 @@ static int loopback_open(struct snd_pcm_substream *substream)
4295 /* are cached -> they do not reflect the actual state */
4296 err = snd_pcm_hw_rule_add(runtime, 0,
4297 SNDRV_PCM_HW_PARAM_FORMAT,
4298 - rule_format, &runtime->hw,
4299 + rule_format, dpcm,
4300 SNDRV_PCM_HW_PARAM_FORMAT, -1);
4301 if (err < 0)
4302 goto unlock;
4303 err = snd_pcm_hw_rule_add(runtime, 0,
4304 SNDRV_PCM_HW_PARAM_RATE,
4305 - rule_rate, &runtime->hw,
4306 + rule_rate, dpcm,
4307 SNDRV_PCM_HW_PARAM_RATE, -1);
4308 if (err < 0)
4309 goto unlock;
4310 err = snd_pcm_hw_rule_add(runtime, 0,
4311 SNDRV_PCM_HW_PARAM_CHANNELS,
4312 - rule_channels, &runtime->hw,
4313 + rule_channels, dpcm,
4314 SNDRV_PCM_HW_PARAM_CHANNELS, -1);
4315 if (err < 0)
4316 goto unlock;
4317 @@ -724,6 +732,10 @@ static int loopback_open(struct snd_pcm_substream *substream)
4318 else
4319 runtime->hw = cable->hw;
4320 unlock:
4321 + if (err < 0) {
4322 + free_cable(substream);
4323 + kfree(dpcm);
4324 + }
4325 mutex_unlock(&loopback->cable_lock);
4326 return err;
4327 }
4328 @@ -732,20 +744,10 @@ static int loopback_close(struct snd_pcm_substream *substream)
4329 {
4330 struct loopback *loopback = substream->private_data;
4331 struct loopback_pcm *dpcm = substream->runtime->private_data;
4332 - struct loopback_cable *cable;
4333 - int dev = get_cable_index(substream);
4334
4335 loopback_timer_stop(dpcm);
4336 mutex_lock(&loopback->cable_lock);
4337 - cable = loopback->cables[substream->number][dev];
4338 - if (cable->streams[!substream->stream]) {
4339 - /* other stream is still alive */
4340 - cable->streams[substream->stream] = NULL;
4341 - } else {
4342 - /* free the cable */
4343 - loopback->cables[substream->number][dev] = NULL;
4344 - kfree(cable);
4345 - }
4346 + free_cable(substream);
4347 mutex_unlock(&loopback->cable_lock);
4348 return 0;
4349 }
4350 diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
4351 index b8dadb050d2b..a688a857a7ae 100644
4352 --- a/tools/objtool/builtin-check.c
4353 +++ b/tools/objtool/builtin-check.c
4354 @@ -51,7 +51,7 @@ struct instruction {
4355 unsigned int len, state;
4356 unsigned char type;
4357 unsigned long immediate;
4358 - bool alt_group, visited;
4359 + bool alt_group, visited, ignore_alts;
4360 struct symbol *call_dest;
4361 struct instruction *jump_dest;
4362 struct list_head alts;
4363 @@ -352,6 +352,40 @@ static void add_ignores(struct objtool_file *file)
4364 }
4365 }
4366
4367 +/*
4368 + * FIXME: For now, just ignore any alternatives which add retpolines. This is
4369 + * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline.
4370 + * But it at least allows objtool to understand the control flow *around* the
4371 + * retpoline.
4372 + */
4373 +static int add_nospec_ignores(struct objtool_file *file)
4374 +{
4375 + struct section *sec;
4376 + struct rela *rela;
4377 + struct instruction *insn;
4378 +
4379 + sec = find_section_by_name(file->elf, ".rela.discard.nospec");
4380 + if (!sec)
4381 + return 0;
4382 +
4383 + list_for_each_entry(rela, &sec->rela_list, list) {
4384 + if (rela->sym->type != STT_SECTION) {
4385 + WARN("unexpected relocation symbol type in %s", sec->name);
4386 + return -1;
4387 + }
4388 +
4389 + insn = find_insn(file, rela->sym->sec, rela->addend);
4390 + if (!insn) {
4391 + WARN("bad .discard.nospec entry");
4392 + return -1;
4393 + }
4394 +
4395 + insn->ignore_alts = true;
4396 + }
4397 +
4398 + return 0;
4399 +}
4400 +
4401 /*
4402 * Find the destination instructions for all jumps.
4403 */
4404 @@ -382,6 +416,13 @@ static int add_jump_destinations(struct objtool_file *file)
4405 } else if (rela->sym->sec->idx) {
4406 dest_sec = rela->sym->sec;
4407 dest_off = rela->sym->sym.st_value + rela->addend + 4;
4408 + } else if (strstr(rela->sym->name, "_indirect_thunk_")) {
4409 + /*
4410 + * Retpoline jumps are really dynamic jumps in
4411 + * disguise, so convert them accordingly.
4412 + */
4413 + insn->type = INSN_JUMP_DYNAMIC;
4414 + continue;
4415 } else {
4416 /* sibling call */
4417 insn->jump_dest = 0;
4418 @@ -428,11 +469,18 @@ static int add_call_destinations(struct objtool_file *file)
4419 dest_off = insn->offset + insn->len + insn->immediate;
4420 insn->call_dest = find_symbol_by_offset(insn->sec,
4421 dest_off);
4422 + /*
4423 + * FIXME: Thanks to retpolines, it's now considered
4424 + * normal for a function to call within itself. So
4425 + * disable this warning for now.
4426 + */
4427 +#if 0
4428 if (!insn->call_dest) {
4429 WARN_FUNC("can't find call dest symbol at offset 0x%lx",
4430 insn->sec, insn->offset, dest_off);
4431 return -1;
4432 }
4433 +#endif
4434 } else if (rela->sym->type == STT_SECTION) {
4435 insn->call_dest = find_symbol_by_offset(rela->sym->sec,
4436 rela->addend+4);
4437 @@ -594,12 +642,6 @@ static int add_special_section_alts(struct objtool_file *file)
4438 return ret;
4439
4440 list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
4441 - alt = malloc(sizeof(*alt));
4442 - if (!alt) {
4443 - WARN("malloc failed");
4444 - ret = -1;
4445 - goto out;
4446 - }
4447
4448 orig_insn = find_insn(file, special_alt->orig_sec,
4449 special_alt->orig_off);
4450 @@ -610,6 +652,10 @@ static int add_special_section_alts(struct objtool_file *file)
4451 goto out;
4452 }
4453
4454 + /* Ignore retpoline alternatives. */
4455 + if (orig_insn->ignore_alts)
4456 + continue;
4457 +
4458 new_insn = NULL;
4459 if (!special_alt->group || special_alt->new_len) {
4460 new_insn = find_insn(file, special_alt->new_sec,
4461 @@ -635,6 +681,13 @@ static int add_special_section_alts(struct objtool_file *file)
4462 goto out;
4463 }
4464
4465 + alt = malloc(sizeof(*alt));
4466 + if (!alt) {
4467 + WARN("malloc failed");
4468 + ret = -1;
4469 + goto out;
4470 + }
4471 +
4472 alt->insn = new_insn;
4473 list_add_tail(&alt->list, &orig_insn->alts);
4474
4475 @@ -854,6 +907,10 @@ static int decode_sections(struct objtool_file *file)
4476
4477 add_ignores(file);
4478
4479 + ret = add_nospec_ignores(file);
4480 + if (ret)
4481 + return ret;
4482 +
4483 ret = add_jump_destinations(file);
4484 if (ret)
4485 return ret;
4486 @@ -1173,6 +1230,14 @@ static int validate_uncallable_instructions(struct objtool_file *file)
4487
4488 for_each_insn(file, insn) {
4489 if (!insn->visited && insn->type == INSN_RETURN) {
4490 +
4491 + /*
4492 + * Don't warn about call instructions in unvisited
4493 + * retpoline alternatives.
4494 + */
4495 + if (!strcmp(insn->sec->name, ".altinstr_replacement"))
4496 + continue;
4497 +
4498 WARN_FUNC("return instruction outside of a callable function",
4499 insn->sec, insn->offset);
4500 warnings++;
4501 @@ -1229,7 +1294,7 @@ int cmd_check(int argc, const char **argv)
4502
4503 INIT_LIST_HEAD(&file.insn_list);
4504 hash_init(file.insn_hash);
4505 - file.whitelist = find_section_by_name(file.elf, "__func_stack_frame_non_standard");
4506 + file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
4507 file.rodata = find_section_by_name(file.elf, ".rodata");
4508 file.ignore_unreachables = false;
4509 file.c_file = find_section_by_name(file.elf, ".comment");
4510 diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
4511 index 6300c1a41ff6..4af37bfe4aea 100644
4512 --- a/tools/testing/selftests/x86/Makefile
4513 +++ b/tools/testing/selftests/x86/Makefile
4514 @@ -6,7 +6,7 @@ include ../lib.mk
4515
4516 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
4517 check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test \
4518 - protection_keys
4519 + protection_keys test_vsyscall
4520 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
4521 test_FCMOV test_FCOMI test_FISTTP \
4522 vdso_restorer
4523 diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
4524 new file mode 100644
4525 index 000000000000..6e0bd52ad53d
4526 --- /dev/null
4527 +++ b/tools/testing/selftests/x86/test_vsyscall.c
4528 @@ -0,0 +1,500 @@
4529 +/* SPDX-License-Identifier: GPL-2.0 */
4530 +
4531 +#define _GNU_SOURCE
4532 +
4533 +#include <stdio.h>
4534 +#include <sys/time.h>
4535 +#include <time.h>
4536 +#include <stdlib.h>
4537 +#include <sys/syscall.h>
4538 +#include <unistd.h>
4539 +#include <dlfcn.h>
4540 +#include <string.h>
4541 +#include <inttypes.h>
4542 +#include <signal.h>
4543 +#include <sys/ucontext.h>
4544 +#include <errno.h>
4545 +#include <err.h>
4546 +#include <sched.h>
4547 +#include <stdbool.h>
4548 +#include <setjmp.h>
4549 +
4550 +#ifdef __x86_64__
4551 +# define VSYS(x) (x)
4552 +#else
4553 +# define VSYS(x) 0
4554 +#endif
4555 +
4556 +#ifndef SYS_getcpu
4557 +# ifdef __x86_64__
4558 +# define SYS_getcpu 309
4559 +# else
4560 +# define SYS_getcpu 318
4561 +# endif
4562 +#endif
4563 +
4564 +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
4565 + int flags)
4566 +{
4567 + struct sigaction sa;
4568 + memset(&sa, 0, sizeof(sa));
4569 + sa.sa_sigaction = handler;
4570 + sa.sa_flags = SA_SIGINFO | flags;
4571 + sigemptyset(&sa.sa_mask);
4572 + if (sigaction(sig, &sa, 0))
4573 + err(1, "sigaction");
4574 +}
4575 +
4576 +/* vsyscalls and vDSO */
4577 +bool should_read_vsyscall = false;
4578 +
4579 +typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
4580 +gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
4581 +gtod_t vdso_gtod;
4582 +
4583 +typedef int (*vgettime_t)(clockid_t, struct timespec *);
4584 +vgettime_t vdso_gettime;
4585 +
4586 +typedef long (*time_func_t)(time_t *t);
4587 +time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
4588 +time_func_t vdso_time;
4589 +
4590 +typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
4591 +getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
4592 +getcpu_t vdso_getcpu;
4593 +
4594 +static void init_vdso(void)
4595 +{
4596 + void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
4597 + if (!vdso)
4598 + vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
4599 + if (!vdso) {
4600 + printf("[WARN]\tfailed to find vDSO\n");
4601 + return;
4602 + }
4603 +
4604 + vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday");
4605 + if (!vdso_gtod)
4606 + printf("[WARN]\tfailed to find gettimeofday in vDSO\n");
4607 +
4608 + vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
4609 + if (!vdso_gettime)
4610 + printf("[WARN]\tfailed to find clock_gettime in vDSO\n");
4611 +
4612 + vdso_time = (time_func_t)dlsym(vdso, "__vdso_time");
4613 + if (!vdso_time)
4614 + printf("[WARN]\tfailed to find time in vDSO\n");
4615 +
4616 + vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
4617 + if (!vdso_getcpu) {
4618 + /* getcpu() was never wired up in the 32-bit vDSO. */
4619 + printf("[%s]\tfailed to find getcpu in vDSO\n",
4620 + sizeof(long) == 8 ? "WARN" : "NOTE");
4621 + }
4622 +}
4623 +
4624 +static int init_vsys(void)
4625 +{
4626 +#ifdef __x86_64__
4627 + int nerrs = 0;
4628 + FILE *maps;
4629 + char line[128];
4630 + bool found = false;
4631 +
4632 + maps = fopen("/proc/self/maps", "r");
4633 + if (!maps) {
4634 + printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
4635 + should_read_vsyscall = true;
4636 + return 0;
4637 + }
4638 +
4639 + while (fgets(line, sizeof(line), maps)) {
4640 + char r, x;
4641 + void *start, *end;
4642 + char name[128];
4643 + if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
4644 + &start, &end, &r, &x, name) != 5)
4645 + continue;
4646 +
4647 + if (strcmp(name, "[vsyscall]"))
4648 + continue;
4649 +
4650 + printf("\tvsyscall map: %s", line);
4651 +
4652 + if (start != (void *)0xffffffffff600000 ||
4653 + end != (void *)0xffffffffff601000) {
4654 + printf("[FAIL]\taddress range is nonsense\n");
4655 + nerrs++;
4656 + }
4657 +
4658 + printf("\tvsyscall permissions are %c-%c\n", r, x);
4659 + should_read_vsyscall = (r == 'r');
4660 + if (x != 'x') {
4661 + vgtod = NULL;
4662 + vtime = NULL;
4663 + vgetcpu = NULL;
4664 + }
4665 +
4666 + found = true;
4667 + break;
4668 + }
4669 +
4670 + fclose(maps);
4671 +
4672 + if (!found) {
4673 + printf("\tno vsyscall map in /proc/self/maps\n");
4674 + should_read_vsyscall = false;
4675 + vgtod = NULL;
4676 + vtime = NULL;
4677 + vgetcpu = NULL;
4678 + }
4679 +
4680 + return nerrs;
4681 +#else
4682 + return 0;
4683 +#endif
4684 +}
4685 +
4686 +/* syscalls */
4687 +static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
4688 +{
4689 + return syscall(SYS_gettimeofday, tv, tz);
4690 +}
4691 +
4692 +static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
4693 +{
4694 + return syscall(SYS_clock_gettime, id, ts);
4695 +}
4696 +
4697 +static inline long sys_time(time_t *t)
4698 +{
4699 + return syscall(SYS_time, t);
4700 +}
4701 +
4702 +static inline long sys_getcpu(unsigned * cpu, unsigned * node,
4703 + void* cache)
4704 +{
4705 + return syscall(SYS_getcpu, cpu, node, cache);
4706 +}
4707 +
4708 +static jmp_buf jmpbuf;
4709 +
4710 +static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
4711 +{
4712 + siglongjmp(jmpbuf, 1);
4713 +}
4714 +
4715 +static double tv_diff(const struct timeval *a, const struct timeval *b)
4716 +{
4717 + return (double)(a->tv_sec - b->tv_sec) +
4718 + (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6;
4719 +}
4720 +
4721 +static int check_gtod(const struct timeval *tv_sys1,
4722 + const struct timeval *tv_sys2,
4723 + const struct timezone *tz_sys,
4724 + const char *which,
4725 + const struct timeval *tv_other,
4726 + const struct timezone *tz_other)
4727 +{
4728 + int nerrs = 0;
4729 + double d1, d2;
4730 +
4731 + if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) {
4732 + printf("[FAIL] %s tz mismatch\n", which);
4733 + nerrs++;
4734 + }
4735 +
4736 + d1 = tv_diff(tv_other, tv_sys1);
4737 + d2 = tv_diff(tv_sys2, tv_other);
4738 + printf("\t%s time offsets: %lf %lf\n", which, d1, d2);
4739 +
4740 + if (d1 < 0 || d2 < 0) {
4741 + printf("[FAIL]\t%s time was inconsistent with the syscall\n", which);
4742 + nerrs++;
4743 + } else {
4744 + printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which);
4745 + }
4746 +
4747 + return nerrs;
4748 +}
4749 +
4750 +static int test_gtod(void)
4751 +{
4752 + struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys;
4753 + struct timezone tz_sys, tz_vdso, tz_vsys;
4754 + long ret_vdso = -1;
4755 + long ret_vsys = -1;
4756 + int nerrs = 0;
4757 +
4758 + printf("[RUN]\ttest gettimeofday()\n");
4759 +
4760 + if (sys_gtod(&tv_sys1, &tz_sys) != 0)
4761 + err(1, "syscall gettimeofday");
4762 + if (vdso_gtod)
4763 + ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
4764 + if (vgtod)
4765 + ret_vsys = vgtod(&tv_vsys, &tz_vsys);
4766 + if (sys_gtod(&tv_sys2, &tz_sys) != 0)
4767 + err(1, "syscall gettimeofday");
4768 +
4769 + if (vdso_gtod) {
4770 + if (ret_vdso == 0) {
4771 + nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
4772 + } else {
4773 + printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso);
4774 + nerrs++;
4775 + }
4776 + }
4777 +
4778 + if (vgtod) {
4779 + if (ret_vsys == 0) {
4780 + nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
4781 + } else {
4782 + printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys);
4783 + nerrs++;
4784 + }
4785 + }
4786 +
4787 + return nerrs;
4788 +}
4789 +
4790 +static int test_time(void) {
4791 + int nerrs = 0;
4792 +
4793 + printf("[RUN]\ttest time()\n");
4794 + long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0;
4795 + long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1;
4796 + t_sys1 = sys_time(&t2_sys1);
4797 + if (vdso_time)
4798 + t_vdso = vdso_time(&t2_vdso);
4799 + if (vtime)
4800 + t_vsys = vtime(&t2_vsys);
4801 + t_sys2 = sys_time(&t2_sys2);
4802 + if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
4803 + printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2);
4804 + nerrs++;
4805 + return nerrs;
4806 + }
4807 +
4808 + if (vdso_time) {
4809 + if (t_vdso < 0 || t_vdso != t2_vdso) {
4810 + printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
4811 + nerrs++;
4812 + } else if (t_vdso < t_sys1 || t_vdso > t_sys2) {
4813 + printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2);
4814 + nerrs++;
4815 + } else {
4816 + printf("[OK]\tvDSO time() is okay\n");
4817 + }
4818 + }
4819 +
4820 + if (vtime) {
4821 + if (t_vsys < 0 || t_vsys != t2_vsys) {
4822 + printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
4823 + nerrs++;
4824 + } else if (t_vsys < t_sys1 || t_vsys > t_sys2) {
4825 + printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2);
4826 + nerrs++;
4827 + } else {
4828 + printf("[OK]\tvsyscall time() is okay\n");
4829 + }
4830 + }
4831 +
4832 + return nerrs;
4833 +}
4834 +
4835 +static int test_getcpu(int cpu)
4836 +{
4837 + int nerrs = 0;
4838 + long ret_sys, ret_vdso = -1, ret_vsys = -1;
4839 +
4840 + printf("[RUN]\tgetcpu() on CPU %d\n", cpu);
4841 +
4842 + cpu_set_t cpuset;
4843 + CPU_ZERO(&cpuset);
4844 + CPU_SET(cpu, &cpuset);
4845 + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
4846 + printf("[SKIP]\tfailed to force CPU %d\n", cpu);
4847 + return nerrs;
4848 + }
4849 +
4850 + unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
4851 + unsigned node = 0;
4852 + bool have_node = false;
4853 + ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
4854 + if (vdso_getcpu)
4855 + ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
4856 + if (vgetcpu)
4857 + ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
4858 +
4859 + if (ret_sys == 0) {
4860 + if (cpu_sys != cpu) {
4861 + printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu);
4862 + nerrs++;
4863 + }
4864 +
4865 + have_node = true;
4866 + node = node_sys;
4867 + }
4868 +
4869 + if (vdso_getcpu) {
4870 + if (ret_vdso) {
4871 + printf("[FAIL]\tvDSO getcpu() failed\n");
4872 + nerrs++;
4873 + } else {
4874 + if (!have_node) {
4875 + have_node = true;
4876 + node = node_vdso;
4877 + }
4878 +
4879 + if (cpu_vdso != cpu) {
4880 + printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu);
4881 + nerrs++;
4882 + } else {
4883 + printf("[OK]\tvDSO reported correct CPU\n");
4884 + }
4885 +
4886 + if (node_vdso != node) {
4887 + printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node);
4888 + nerrs++;
4889 + } else {
4890 + printf("[OK]\tvDSO reported correct node\n");
4891 + }
4892 + }
4893 + }
4894 +
4895 + if (vgetcpu) {
4896 + if (ret_vsys) {
4897 + printf("[FAIL]\tvsyscall getcpu() failed\n");
4898 + nerrs++;
4899 + } else {
4900 + if (!have_node) {
4901 + have_node = true;
4902 + node = node_vsys;
4903 + }
4904 +
4905 + if (cpu_vsys != cpu) {
4906 + printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu);
4907 + nerrs++;
4908 + } else {
4909 + printf("[OK]\tvsyscall reported correct CPU\n");
4910 + }
4911 +
4912 + if (node_vsys != node) {
4913 + printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node);
4914 + nerrs++;
4915 + } else {
4916 + printf("[OK]\tvsyscall reported correct node\n");
4917 + }
4918 + }
4919 + }
4920 +
4921 + return nerrs;
4922 +}
4923 +
4924 +static int test_vsys_r(void)
4925 +{
4926 +#ifdef __x86_64__
4927 + printf("[RUN]\tChecking read access to the vsyscall page\n");
4928 + bool can_read;
4929 + if (sigsetjmp(jmpbuf, 1) == 0) {
4930 + *(volatile int *)0xffffffffff600000;
4931 + can_read = true;
4932 + } else {
4933 + can_read = false;
4934 + }
4935 +
4936 + if (can_read && !should_read_vsyscall) {
4937 + printf("[FAIL]\tWe have read access, but we shouldn't\n");
4938 + return 1;
4939 + } else if (!can_read && should_read_vsyscall) {
4940 + printf("[FAIL]\tWe don't have read access, but we should\n");
4941 + return 1;
4942 + } else {
4943 + printf("[OK]\tgot expected result\n");
4944 + }
4945 +#endif
4946 +
4947 + return 0;
4948 +}
4949 +
4950 +
4951 +#ifdef __x86_64__
4952 +#define X86_EFLAGS_TF (1UL << 8)
4953 +static volatile sig_atomic_t num_vsyscall_traps;
4954 +
4955 +static unsigned long get_eflags(void)
4956 +{
4957 + unsigned long eflags;
4958 + asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
4959 + return eflags;
4960 +}
4961 +
4962 +static void set_eflags(unsigned long eflags)
4963 +{
4964 + asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
4965 +}
4966 +
4967 +static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
4968 +{
4969 + ucontext_t *ctx = (ucontext_t *)ctx_void;
4970 + unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP];
4971 +
4972 + if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0)
4973 + num_vsyscall_traps++;
4974 +}
4975 +
4976 +static int test_native_vsyscall(void)
4977 +{
4978 + time_t tmp;
4979 + bool is_native;
4980 +
4981 + if (!vtime)
4982 + return 0;
4983 +
4984 + printf("[RUN]\tchecking for native vsyscall\n");
4985 + sethandler(SIGTRAP, sigtrap, 0);
4986 + set_eflags(get_eflags() | X86_EFLAGS_TF);
4987 + vtime(&tmp);
4988 + set_eflags(get_eflags() & ~X86_EFLAGS_TF);
4989 +
4990 + /*
4991 + * If vsyscalls are emulated, we expect a single trap in the
4992 + * vsyscall page -- the call instruction will trap with RIP
4993 + * pointing to the entry point before emulation takes over.
4994 + * In native mode, we expect two traps, since whatever code
4995 + * the vsyscall page contains will be more than just a ret
4996 + * instruction.
4997 + */
4998 + is_native = (num_vsyscall_traps > 1);
4999 +
5000 + printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n",
5001 + (is_native ? "native" : "emulated"),
5002 + (int)num_vsyscall_traps);
5003 +
5004 + return 0;
5005 +}
5006 +#endif
5007 +
5008 +int main(int argc, char **argv)
5009 +{
5010 + int nerrs = 0;
5011 +
5012 + init_vdso();
5013 + nerrs += init_vsys();
5014 +
5015 + nerrs += test_gtod();
5016 + nerrs += test_time();
5017 + nerrs += test_getcpu(0);
5018 + nerrs += test_getcpu(1);
5019 +
5020 + sethandler(SIGSEGV, sigsegv, 0);
5021 + nerrs += test_vsys_r();
5022 +
5023 +#ifdef __x86_64__
5024 + nerrs += test_native_vsyscall();
5025 +#endif
5026 +
5027 + return nerrs ? 1 : 0;
5028 +}