Magellan Linux

Annotation of /trunk/kernel-alx-legacy/patches-4.9/0176-4.9.77-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3608 - (hide annotations) (download)
Fri Aug 14 07:34:29 2020 UTC (3 years, 8 months ago) by niro
File size: 163168 byte(s)
-added kerenl-alx-legacy pkg
1 niro 3608 diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
2     index 498741737055..dfd56ec7a850 100644
3     --- a/Documentation/ABI/testing/sysfs-devices-system-cpu
4     +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
5     @@ -350,3 +350,19 @@ Contact: Linux ARM Kernel Mailing list <linux-arm-kernel@lists.infradead.org>
6     Description: AArch64 CPU registers
7     'identification' directory exposes the CPU ID registers for
8     identifying model and revision of the CPU.
9     +
10     +What: /sys/devices/system/cpu/vulnerabilities
11     + /sys/devices/system/cpu/vulnerabilities/meltdown
12     + /sys/devices/system/cpu/vulnerabilities/spectre_v1
13     + /sys/devices/system/cpu/vulnerabilities/spectre_v2
14     +Date: January 2018
15     +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
16     +Description: Information about CPU vulnerabilities
17     +
18     + The files are named after the code names of CPU
19     + vulnerabilities. The output of those files reflects the
20     + state of the CPUs in the system. Possible output values:
21     +
22     + "Not affected" CPU is not affected by the vulnerability
23     + "Vulnerable" CPU is affected and no mitigation in effect
24     + "Mitigation: $M" CPU is affected and mitigation $M is in effect
25     diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
26     index 5d2676d043de..4c2667aa4634 100644
27     --- a/Documentation/kernel-parameters.txt
28     +++ b/Documentation/kernel-parameters.txt
29     @@ -2691,6 +2691,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
30     nosmt [KNL,S390] Disable symmetric multithreading (SMT).
31     Equivalent to smt=1.
32    
33     + nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2
34     + (indirect branch prediction) vulnerability. System may
35     + allow data leaks with this option, which is equivalent
36     + to spectre_v2=off.
37     +
38     noxsave [BUGS=X86] Disables x86 extended register state save
39     and restore using xsave. The kernel will fallback to
40     enabling legacy floating-point and sse state.
41     @@ -2763,8 +2768,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
42    
43     nojitter [IA-64] Disables jitter checking for ITC timers.
44    
45     - nopti [X86-64] Disable KAISER isolation of kernel from user.
46     -
47     no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
48    
49     no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
50     @@ -3327,11 +3330,20 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
51     pt. [PARIDE]
52     See Documentation/blockdev/paride.txt.
53    
54     - pti= [X86_64]
55     - Control KAISER user/kernel address space isolation:
56     - on - enable
57     - off - disable
58     - auto - default setting
59     + pti= [X86_64] Control Page Table Isolation of user and
60     + kernel address spaces. Disabling this feature
61     + removes hardening, but improves performance of
62     + system calls and interrupts.
63     +
64     + on - unconditionally enable
65     + off - unconditionally disable
66     + auto - kernel detects whether your CPU model is
67     + vulnerable to issues that PTI mitigates
68     +
69     + Not specifying this option is equivalent to pti=auto.
70     +
71     + nopti [X86_64]
72     + Equivalent to pti=off
73    
74     pty.legacy_count=
75     [KNL] Number of legacy pty's. Overwrites compiled-in
76     @@ -3937,6 +3949,29 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
77     sonypi.*= [HW] Sony Programmable I/O Control Device driver
78     See Documentation/laptops/sonypi.txt
79    
80     + spectre_v2= [X86] Control mitigation of Spectre variant 2
81     + (indirect branch speculation) vulnerability.
82     +
83     + on - unconditionally enable
84     + off - unconditionally disable
85     + auto - kernel detects whether your CPU model is
86     + vulnerable
87     +
88     + Selecting 'on' will, and 'auto' may, choose a
89     + mitigation method at run time according to the
90     + CPU, the available microcode, the setting of the
91     + CONFIG_RETPOLINE configuration option, and the
92     + compiler with which the kernel was built.
93     +
94     + Specific mitigations can also be selected manually:
95     +
96     + retpoline - replace indirect branches
97     + retpoline,generic - google's original retpoline
98     + retpoline,amd - AMD-specific minimal thunk
99     +
100     + Not specifying this option is equivalent to
101     + spectre_v2=auto.
102     +
103     spia_io_base= [HW,MTD]
104     spia_fio_base=
105     spia_pedr=
106     diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt
107     new file mode 100644
108     index 000000000000..d11eff61fc9a
109     --- /dev/null
110     +++ b/Documentation/x86/pti.txt
111     @@ -0,0 +1,186 @@
112     +Overview
113     +========
114     +
115     +Page Table Isolation (pti, previously known as KAISER[1]) is a
116     +countermeasure against attacks on the shared user/kernel address
117     +space such as the "Meltdown" approach[2].
118     +
119     +To mitigate this class of attacks, we create an independent set of
120     +page tables for use only when running userspace applications. When
121     +the kernel is entered via syscalls, interrupts or exceptions, the
122     +page tables are switched to the full "kernel" copy. When the system
123     +switches back to user mode, the user copy is used again.
124     +
125     +The userspace page tables contain only a minimal amount of kernel
126     +data: only what is needed to enter/exit the kernel such as the
127     +entry/exit functions themselves and the interrupt descriptor table
128     +(IDT). There are a few strictly unnecessary things that get mapped
129     +such as the first C function when entering an interrupt (see
130     +comments in pti.c).
131     +
132     +This approach helps to ensure that side-channel attacks leveraging
133     +the paging structures do not function when PTI is enabled. It can be
134     +enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
135     +Once enabled at compile-time, it can be disabled at boot with the
136     +'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
137     +
138     +Page Table Management
139     +=====================
140     +
141     +When PTI is enabled, the kernel manages two sets of page tables.
142     +The first set is very similar to the single set which is present in
143     +kernels without PTI. This includes a complete mapping of userspace
144     +that the kernel can use for things like copy_to_user().
145     +
146     +Although _complete_, the user portion of the kernel page tables is
147     +crippled by setting the NX bit in the top level. This ensures
148     +that any missed kernel->user CR3 switch will immediately crash
149     +userspace upon executing its first instruction.
150     +
151     +The userspace page tables map only the kernel data needed to enter
152     +and exit the kernel. This data is entirely contained in the 'struct
153     +cpu_entry_area' structure which is placed in the fixmap which gives
154     +each CPU's copy of the area a compile-time-fixed virtual address.
155     +
156     +For new userspace mappings, the kernel makes the entries in its
157     +page tables like normal. The only difference is when the kernel
158     +makes entries in the top (PGD) level. In addition to setting the
159     +entry in the main kernel PGD, a copy of the entry is made in the
160     +userspace page tables' PGD.
161     +
162     +This sharing at the PGD level also inherently shares all the lower
163     +layers of the page tables. This leaves a single, shared set of
164     +userspace page tables to manage. One PTE to lock, one set of
165     +accessed bits, dirty bits, etc...
166     +
167     +Overhead
168     +========
169     +
170     +Protection against side-channel attacks is important. But,
171     +this protection comes at a cost:
172     +
173     +1. Increased Memory Use
174     + a. Each process now needs an order-1 PGD instead of order-0.
175     + (Consumes an additional 4k per process).
176     + b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
177     + aligned so that it can be mapped by setting a single PMD
178     + entry. This consumes nearly 2MB of RAM once the kernel
179     + is decompressed, but no space in the kernel image itself.
180     +
181     +2. Runtime Cost
182     + a. CR3 manipulation to switch between the page table copies
183     + must be done at interrupt, syscall, and exception entry
184     + and exit (it can be skipped when the kernel is interrupted,
185     + though.) Moves to CR3 are on the order of a hundred
186     + cycles, and are required at every entry and exit.
187     + b. A "trampoline" must be used for SYSCALL entry. This
188     + trampoline depends on a smaller set of resources than the
189     + non-PTI SYSCALL entry code, so requires mapping fewer
190     + things into the userspace page tables. The downside is
191     + that stacks must be switched at entry time.
192     + d. Global pages are disabled for all kernel structures not
193     + mapped into both kernel and userspace page tables. This
194     + feature of the MMU allows different processes to share TLB
195     + entries mapping the kernel. Losing the feature means more
196     + TLB misses after a context switch. The actual loss of
197     + performance is very small, however, never exceeding 1%.
198     + d. Process Context IDentifiers (PCID) is a CPU feature that
199     + allows us to skip flushing the entire TLB when switching page
200     + tables by setting a special bit in CR3 when the page tables
201     + are changed. This makes switching the page tables (at context
202     + switch, or kernel entry/exit) cheaper. But, on systems with
203     + PCID support, the context switch code must flush both the user
204     + and kernel entries out of the TLB. The user PCID TLB flush is
205     + deferred until the exit to userspace, minimizing the cost.
206     + See intel.com/sdm for the gory PCID/INVPCID details.
207     + e. The userspace page tables must be populated for each new
208     + process. Even without PTI, the shared kernel mappings
209     + are created by copying top-level (PGD) entries into each
210     + new process. But, with PTI, there are now *two* kernel
211     + mappings: one in the kernel page tables that maps everything
212     + and one for the entry/exit structures. At fork(), we need to
213     + copy both.
214     + f. In addition to the fork()-time copying, there must also
215     + be an update to the userspace PGD any time a set_pgd() is done
216     + on a PGD used to map userspace. This ensures that the kernel
217     + and userspace copies always map the same userspace
218     + memory.
219     + g. On systems without PCID support, each CR3 write flushes
220     + the entire TLB. That means that each syscall, interrupt
221     + or exception flushes the TLB.
222     + h. INVPCID is a TLB-flushing instruction which allows flushing
223     + of TLB entries for non-current PCIDs. Some systems support
224     + PCIDs, but do not support INVPCID. On these systems, addresses
225     + can only be flushed from the TLB for the current PCID. When
226     + flushing a kernel address, we need to flush all PCIDs, so a
227     + single kernel address flush will require a TLB-flushing CR3
228     + write upon the next use of every PCID.
229     +
230     +Possible Future Work
231     +====================
232     +1. We can be more careful about not actually writing to CR3
233     + unless its value is actually changed.
234     +2. Allow PTI to be enabled/disabled at runtime in addition to the
235     + boot-time switching.
236     +
237     +Testing
238     +========
239     +
240     +To test stability of PTI, the following test procedure is recommended,
241     +ideally doing all of these in parallel:
242     +
243     +1. Set CONFIG_DEBUG_ENTRY=y
244     +2. Run several copies of all of the tools/testing/selftests/x86/ tests
245     + (excluding MPX and protection_keys) in a loop on multiple CPUs for
246     + several minutes. These tests frequently uncover corner cases in the
247     + kernel entry code. In general, old kernels might cause these tests
248     + themselves to crash, but they should never crash the kernel.
249     +3. Run the 'perf' tool in a mode (top or record) that generates many
250     + frequent performance monitoring non-maskable interrupts (see "NMI"
251     + in /proc/interrupts). This exercises the NMI entry/exit code which
252     + is known to trigger bugs in code paths that did not expect to be
253     + interrupted, including nested NMIs. Using "-c" boosts the rate of
254     + NMIs, and using two -c with separate counters encourages nested NMIs
255     + and less deterministic behavior.
256     +
257     + while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
258     +
259     +4. Launch a KVM virtual machine.
260     +5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
261     + This has been a lightly-tested code path and needs extra scrutiny.
262     +
263     +Debugging
264     +=========
265     +
266     +Bugs in PTI cause a few different signatures of crashes
267     +that are worth noting here.
268     +
269     + * Failures of the selftests/x86 code. Usually a bug in one of the
270     + more obscure corners of entry_64.S
271     + * Crashes in early boot, especially around CPU bringup. Bugs
272     + in the trampoline code or mappings cause these.
273     + * Crashes at the first interrupt. Caused by bugs in entry_64.S,
274     + like screwing up a page table switch. Also caused by
275     + incorrectly mapping the IRQ handler entry code.
276     + * Crashes at the first NMI. The NMI code is separate from main
277     + interrupt handlers and can have bugs that do not affect
278     + normal interrupts. Also caused by incorrectly mapping NMI
279     + code. NMIs that interrupt the entry code must be very
280     + careful and can be the cause of crashes that show up when
281     + running perf.
282     + * Kernel crashes at the first exit to userspace. entry_64.S
283     + bugs, or failing to map some of the exit code.
284     + * Crashes at first interrupt that interrupts userspace. The paths
285     + in entry_64.S that return to userspace are sometimes separate
286     + from the ones that return to the kernel.
287     + * Double faults: overflowing the kernel stack because of page
288     + faults upon page faults. Caused by touching non-pti-mapped
289     + data in the entry code, or forgetting to switch to kernel
290     + CR3 before calling into C functions which are not pti-mapped.
291     + * Userspace segfaults early in boot, sometimes manifesting
292     + as mount(8) failing to mount the rootfs. These have
293     + tended to be TLB invalidation issues. Usually invalidating
294     + the wrong PCID, or otherwise missing an invalidation.
295     +
296     +1. https://gruss.cc/files/kaiser.pdf
297     +2. https://meltdownattack.com/meltdown.pdf
298     diff --git a/Makefile b/Makefile
299     index 2637f0ed0a07..aba553531d6a 100644
300     --- a/Makefile
301     +++ b/Makefile
302     @@ -1,6 +1,6 @@
303     VERSION = 4
304     PATCHLEVEL = 9
305     -SUBLEVEL = 76
306     +SUBLEVEL = 77
307     EXTRAVERSION =
308     NAME = Roaring Lionus
309    
310     diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
311     index b6e715fd3c90..dac7ceb1a677 100644
312     --- a/arch/arm/kvm/mmio.c
313     +++ b/arch/arm/kvm/mmio.c
314     @@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
315     }
316    
317     trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
318     - data);
319     + &data);
320     data = vcpu_data_host_to_guest(vcpu, data, len);
321     vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
322     }
323     @@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
324     data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
325     len);
326    
327     - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
328     + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data);
329     kvm_mmio_write_buf(data_buf, len, data);
330    
331     ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
332     data_buf);
333     } else {
334     trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
335     - fault_ipa, 0);
336     + fault_ipa, NULL);
337    
338     ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
339     data_buf);
340     diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
341     index c558bce989cd..6e716a5f1173 100644
342     --- a/arch/mips/kernel/process.c
343     +++ b/arch/mips/kernel/process.c
344     @@ -683,6 +683,18 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
345     struct task_struct *t;
346     int max_users;
347    
348     + /* If nothing to change, return right away, successfully. */
349     + if (value == mips_get_process_fp_mode(task))
350     + return 0;
351     +
352     + /* Only accept a mode change if 64-bit FP enabled for o32. */
353     + if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT))
354     + return -EOPNOTSUPP;
355     +
356     + /* And only for o32 tasks. */
357     + if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS))
358     + return -EOPNOTSUPP;
359     +
360     /* Check the value is valid */
361     if (value & ~known_bits)
362     return -EOPNOTSUPP;
363     diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
364     index 11890e6e4093..0c8ae2cc6380 100644
365     --- a/arch/mips/kernel/ptrace.c
366     +++ b/arch/mips/kernel/ptrace.c
367     @@ -439,63 +439,160 @@ static int gpr64_set(struct task_struct *target,
368    
369     #endif /* CONFIG_64BIT */
370    
371     +/*
372     + * Copy the floating-point context to the supplied NT_PRFPREG buffer,
373     + * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots
374     + * correspond 1:1 to buffer slots. Only general registers are copied.
375     + */
376     +static int fpr_get_fpa(struct task_struct *target,
377     + unsigned int *pos, unsigned int *count,
378     + void **kbuf, void __user **ubuf)
379     +{
380     + return user_regset_copyout(pos, count, kbuf, ubuf,
381     + &target->thread.fpu,
382     + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
383     +}
384     +
385     +/*
386     + * Copy the floating-point context to the supplied NT_PRFPREG buffer,
387     + * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's
388     + * general register slots are copied to buffer slots. Only general
389     + * registers are copied.
390     + */
391     +static int fpr_get_msa(struct task_struct *target,
392     + unsigned int *pos, unsigned int *count,
393     + void **kbuf, void __user **ubuf)
394     +{
395     + unsigned int i;
396     + u64 fpr_val;
397     + int err;
398     +
399     + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
400     + for (i = 0; i < NUM_FPU_REGS; i++) {
401     + fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
402     + err = user_regset_copyout(pos, count, kbuf, ubuf,
403     + &fpr_val, i * sizeof(elf_fpreg_t),
404     + (i + 1) * sizeof(elf_fpreg_t));
405     + if (err)
406     + return err;
407     + }
408     +
409     + return 0;
410     +}
411     +
412     +/*
413     + * Copy the floating-point context to the supplied NT_PRFPREG buffer.
414     + * Choose the appropriate helper for general registers, and then copy
415     + * the FCSR register separately.
416     + */
417     static int fpr_get(struct task_struct *target,
418     const struct user_regset *regset,
419     unsigned int pos, unsigned int count,
420     void *kbuf, void __user *ubuf)
421     {
422     - unsigned i;
423     + const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
424     int err;
425     - u64 fpr_val;
426    
427     - /* XXX fcr31 */
428     + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
429     + err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf);
430     + else
431     + err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf);
432     + if (err)
433     + return err;
434    
435     - if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
436     - return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
437     - &target->thread.fpu,
438     - 0, sizeof(elf_fpregset_t));
439     + err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
440     + &target->thread.fpu.fcr31,
441     + fcr31_pos, fcr31_pos + sizeof(u32));
442    
443     - for (i = 0; i < NUM_FPU_REGS; i++) {
444     - fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
445     - err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
446     - &fpr_val, i * sizeof(elf_fpreg_t),
447     - (i + 1) * sizeof(elf_fpreg_t));
448     + return err;
449     +}
450     +
451     +/*
452     + * Copy the supplied NT_PRFPREG buffer to the floating-point context,
453     + * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP
454     + * context's general register slots. Only general registers are copied.
455     + */
456     +static int fpr_set_fpa(struct task_struct *target,
457     + unsigned int *pos, unsigned int *count,
458     + const void **kbuf, const void __user **ubuf)
459     +{
460     + return user_regset_copyin(pos, count, kbuf, ubuf,
461     + &target->thread.fpu,
462     + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
463     +}
464     +
465     +/*
466     + * Copy the supplied NT_PRFPREG buffer to the floating-point context,
467     + * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64
468     + * bits only of FP context's general register slots. Only general
469     + * registers are copied.
470     + */
471     +static int fpr_set_msa(struct task_struct *target,
472     + unsigned int *pos, unsigned int *count,
473     + const void **kbuf, const void __user **ubuf)
474     +{
475     + unsigned int i;
476     + u64 fpr_val;
477     + int err;
478     +
479     + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
480     + for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) {
481     + err = user_regset_copyin(pos, count, kbuf, ubuf,
482     + &fpr_val, i * sizeof(elf_fpreg_t),
483     + (i + 1) * sizeof(elf_fpreg_t));
484     if (err)
485     return err;
486     + set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
487     }
488    
489     return 0;
490     }
491    
492     +/*
493     + * Copy the supplied NT_PRFPREG buffer to the floating-point context.
494     + * Choose the appropriate helper for general registers, and then copy
495     + * the FCSR register separately.
496     + *
497     + * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
498     + * which is supposed to have been guaranteed by the kernel before
499     + * calling us, e.g. in `ptrace_regset'. We enforce that requirement,
500     + * so that we can safely avoid preinitializing temporaries for
501     + * partial register writes.
502     + */
503     static int fpr_set(struct task_struct *target,
504     const struct user_regset *regset,
505     unsigned int pos, unsigned int count,
506     const void *kbuf, const void __user *ubuf)
507     {
508     - unsigned i;
509     + const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
510     + u32 fcr31;
511     int err;
512     - u64 fpr_val;
513    
514     - /* XXX fcr31 */
515     + BUG_ON(count % sizeof(elf_fpreg_t));
516     +
517     + if (pos + count > sizeof(elf_fpregset_t))
518     + return -EIO;
519    
520     init_fp_ctx(target);
521    
522     - if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
523     - return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
524     - &target->thread.fpu,
525     - 0, sizeof(elf_fpregset_t));
526     + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
527     + err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf);
528     + else
529     + err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf);
530     + if (err)
531     + return err;
532    
533     - BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
534     - for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) {
535     + if (count > 0) {
536     err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
537     - &fpr_val, i * sizeof(elf_fpreg_t),
538     - (i + 1) * sizeof(elf_fpreg_t));
539     + &fcr31,
540     + fcr31_pos, fcr31_pos + sizeof(u32));
541     if (err)
542     return err;
543     - set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
544     +
545     + ptrace_setfcr31(target, fcr31);
546     }
547    
548     - return 0;
549     + return err;
550     }
551    
552     enum mips_regset {
553     diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
554     index da8156fd3d58..0ca4d12ce95c 100644
555     --- a/arch/x86/Kconfig
556     +++ b/arch/x86/Kconfig
557     @@ -64,6 +64,7 @@ config X86
558     select GENERIC_CLOCKEVENTS_MIN_ADJUST
559     select GENERIC_CMOS_UPDATE
560     select GENERIC_CPU_AUTOPROBE
561     + select GENERIC_CPU_VULNERABILITIES
562     select GENERIC_EARLY_IOREMAP
563     select GENERIC_FIND_FIRST_BIT
564     select GENERIC_IOMAP
565     @@ -407,6 +408,19 @@ config GOLDFISH
566     def_bool y
567     depends on X86_GOLDFISH
568    
569     +config RETPOLINE
570     + bool "Avoid speculative indirect branches in kernel"
571     + default y
572     + ---help---
573     + Compile kernel with the retpoline compiler options to guard against
574     + kernel-to-user data leaks by avoiding speculative indirect
575     + branches. Requires a compiler with -mindirect-branch=thunk-extern
576     + support for full protection. The kernel may run slower.
577     +
578     + Without compiler support, at least indirect branches in assembler
579     + code are eliminated. Since this includes the syscall entry path,
580     + it is not entirely pointless.
581     +
582     if X86_32
583     config X86_EXTENDED_PLATFORM
584     bool "Support for extended (non-PC) x86 platforms"
585     diff --git a/arch/x86/Makefile b/arch/x86/Makefile
586     index 2d449337a360..cd22cb8ebd42 100644
587     --- a/arch/x86/Makefile
588     +++ b/arch/x86/Makefile
589     @@ -182,6 +182,14 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
590     KBUILD_CFLAGS += $(mflags-y)
591     KBUILD_AFLAGS += $(mflags-y)
592    
593     +# Avoid indirect branches in kernel to deal with Spectre
594     +ifdef CONFIG_RETPOLINE
595     + RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
596     + ifneq ($(RETPOLINE_CFLAGS),)
597     + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
598     + endif
599     +endif
600     +
601     archscripts: scripts_basic
602     $(Q)$(MAKE) $(build)=arch/x86/tools relocs
603    
604     diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
605     index 383a6f84a060..fa8801b35e51 100644
606     --- a/arch/x86/crypto/aesni-intel_asm.S
607     +++ b/arch/x86/crypto/aesni-intel_asm.S
608     @@ -32,6 +32,7 @@
609     #include <linux/linkage.h>
610     #include <asm/inst.h>
611     #include <asm/frame.h>
612     +#include <asm/nospec-branch.h>
613    
614     /*
615     * The following macros are used to move an (un)aligned 16 byte value to/from
616     @@ -2734,7 +2735,7 @@ ENTRY(aesni_xts_crypt8)
617     pxor INC, STATE4
618     movdqu IV, 0x30(OUTP)
619    
620     - call *%r11
621     + CALL_NOSPEC %r11
622    
623     movdqu 0x00(OUTP), INC
624     pxor INC, STATE1
625     @@ -2779,7 +2780,7 @@ ENTRY(aesni_xts_crypt8)
626     _aesni_gf128mul_x_ble()
627     movups IV, (IVP)
628    
629     - call *%r11
630     + CALL_NOSPEC %r11
631    
632     movdqu 0x40(OUTP), INC
633     pxor INC, STATE1
634     diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
635     index aa9e8bd163f6..77ff4de2224d 100644
636     --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
637     +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
638     @@ -17,6 +17,7 @@
639    
640     #include <linux/linkage.h>
641     #include <asm/frame.h>
642     +#include <asm/nospec-branch.h>
643    
644     #define CAMELLIA_TABLE_BYTE_LEN 272
645    
646     @@ -1224,7 +1225,7 @@ camellia_xts_crypt_16way:
647     vpxor 14 * 16(%rax), %xmm15, %xmm14;
648     vpxor 15 * 16(%rax), %xmm15, %xmm15;
649    
650     - call *%r9;
651     + CALL_NOSPEC %r9;
652    
653     addq $(16 * 16), %rsp;
654    
655     diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
656     index 16186c18656d..7384342fbb41 100644
657     --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
658     +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
659     @@ -12,6 +12,7 @@
660    
661     #include <linux/linkage.h>
662     #include <asm/frame.h>
663     +#include <asm/nospec-branch.h>
664    
665     #define CAMELLIA_TABLE_BYTE_LEN 272
666    
667     @@ -1337,7 +1338,7 @@ camellia_xts_crypt_32way:
668     vpxor 14 * 32(%rax), %ymm15, %ymm14;
669     vpxor 15 * 32(%rax), %ymm15, %ymm15;
670    
671     - call *%r9;
672     + CALL_NOSPEC %r9;
673    
674     addq $(16 * 32), %rsp;
675    
676     diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
677     index dc05f010ca9b..174fd4146043 100644
678     --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
679     +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
680     @@ -45,6 +45,7 @@
681    
682     #include <asm/inst.h>
683     #include <linux/linkage.h>
684     +#include <asm/nospec-branch.h>
685    
686     ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
687    
688     @@ -172,7 +173,7 @@ continue_block:
689     movzxw (bufp, %rax, 2), len
690     lea crc_array(%rip), bufp
691     lea (bufp, len, 1), bufp
692     - jmp *bufp
693     + JMP_NOSPEC bufp
694    
695     ################################################################
696     ## 2a) PROCESS FULL BLOCKS:
697     diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
698     index edba8606b99a..bdc9aeaf2e45 100644
699     --- a/arch/x86/entry/entry_32.S
700     +++ b/arch/x86/entry/entry_32.S
701     @@ -45,6 +45,7 @@
702     #include <asm/asm.h>
703     #include <asm/smap.h>
704     #include <asm/export.h>
705     +#include <asm/nospec-branch.h>
706    
707     .section .entry.text, "ax"
708    
709     @@ -260,7 +261,7 @@ ENTRY(ret_from_fork)
710    
711     /* kernel thread */
712     1: movl %edi, %eax
713     - call *%ebx
714     + CALL_NOSPEC %ebx
715     /*
716     * A kernel thread is allowed to return here after successfully
717     * calling do_execve(). Exit to userspace to complete the execve()
718     @@ -984,7 +985,8 @@ trace:
719     movl 0x4(%ebp), %edx
720     subl $MCOUNT_INSN_SIZE, %eax
721    
722     - call *ftrace_trace_function
723     + movl ftrace_trace_function, %ecx
724     + CALL_NOSPEC %ecx
725    
726     popl %edx
727     popl %ecx
728     @@ -1020,7 +1022,7 @@ return_to_handler:
729     movl %eax, %ecx
730     popl %edx
731     popl %eax
732     - jmp *%ecx
733     + JMP_NOSPEC %ecx
734     #endif
735    
736     #ifdef CONFIG_TRACING
737     @@ -1062,7 +1064,7 @@ error_code:
738     movl %ecx, %es
739     TRACE_IRQS_OFF
740     movl %esp, %eax # pt_regs pointer
741     - call *%edi
742     + CALL_NOSPEC %edi
743     jmp ret_from_exception
744     END(page_fault)
745    
746     diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
747     index af4e58132d91..b9c901ce6582 100644
748     --- a/arch/x86/entry/entry_64.S
749     +++ b/arch/x86/entry/entry_64.S
750     @@ -37,6 +37,7 @@
751     #include <asm/pgtable_types.h>
752     #include <asm/export.h>
753     #include <asm/kaiser.h>
754     +#include <asm/nospec-branch.h>
755     #include <linux/err.h>
756    
757     /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
758     @@ -208,7 +209,12 @@ entry_SYSCALL_64_fastpath:
759     * It might end up jumping to the slow path. If it jumps, RAX
760     * and all argument registers are clobbered.
761     */
762     +#ifdef CONFIG_RETPOLINE
763     + movq sys_call_table(, %rax, 8), %rax
764     + call __x86_indirect_thunk_rax
765     +#else
766     call *sys_call_table(, %rax, 8)
767     +#endif
768     .Lentry_SYSCALL_64_after_fastpath_call:
769    
770     movq %rax, RAX(%rsp)
771     @@ -380,7 +386,7 @@ ENTRY(stub_ptregs_64)
772     jmp entry_SYSCALL64_slow_path
773    
774     1:
775     - jmp *%rax /* Called from C */
776     + JMP_NOSPEC %rax /* Called from C */
777     END(stub_ptregs_64)
778    
779     .macro ptregs_stub func
780     @@ -457,7 +463,7 @@ ENTRY(ret_from_fork)
781     1:
782     /* kernel thread */
783     movq %r12, %rdi
784     - call *%rbx
785     + CALL_NOSPEC %rbx
786     /*
787     * A kernel thread is allowed to return here after successfully
788     * calling do_execve(). Exit to userspace to complete the execve()
789     diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
790     index d4aea31eec03..deca9b9c7923 100644
791     --- a/arch/x86/include/asm/alternative.h
792     +++ b/arch/x86/include/asm/alternative.h
793     @@ -139,7 +139,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
794     ".popsection\n" \
795     ".pushsection .altinstr_replacement, \"ax\"\n" \
796     ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
797     - ".popsection"
798     + ".popsection\n"
799    
800     #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
801     OLDINSTR_2(oldinstr, 1, 2) \
802     @@ -150,7 +150,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
803     ".pushsection .altinstr_replacement, \"ax\"\n" \
804     ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
805     ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
806     - ".popsection"
807     + ".popsection\n"
808    
809     /*
810     * Alternative instructions for different CPU types or capabilities.
811     diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
812     index 44b8762fa0c7..b15aa4083dfd 100644
813     --- a/arch/x86/include/asm/asm-prototypes.h
814     +++ b/arch/x86/include/asm/asm-prototypes.h
815     @@ -10,7 +10,32 @@
816     #include <asm/pgtable.h>
817     #include <asm/special_insns.h>
818     #include <asm/preempt.h>
819     +#include <asm/asm.h>
820    
821     #ifndef CONFIG_X86_CMPXCHG64
822     extern void cmpxchg8b_emu(void);
823     #endif
824     +
825     +#ifdef CONFIG_RETPOLINE
826     +#ifdef CONFIG_X86_32
827     +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
828     +#else
829     +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
830     +INDIRECT_THUNK(8)
831     +INDIRECT_THUNK(9)
832     +INDIRECT_THUNK(10)
833     +INDIRECT_THUNK(11)
834     +INDIRECT_THUNK(12)
835     +INDIRECT_THUNK(13)
836     +INDIRECT_THUNK(14)
837     +INDIRECT_THUNK(15)
838     +#endif
839     +INDIRECT_THUNK(ax)
840     +INDIRECT_THUNK(bx)
841     +INDIRECT_THUNK(cx)
842     +INDIRECT_THUNK(dx)
843     +INDIRECT_THUNK(si)
844     +INDIRECT_THUNK(di)
845     +INDIRECT_THUNK(bp)
846     +INDIRECT_THUNK(sp)
847     +#endif /* CONFIG_RETPOLINE */
848     diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
849     index 7acb51c49fec..00523524edbf 100644
850     --- a/arch/x86/include/asm/asm.h
851     +++ b/arch/x86/include/asm/asm.h
852     @@ -125,4 +125,15 @@
853     /* For C file, we already have NOKPROBE_SYMBOL macro */
854     #endif
855    
856     +#ifndef __ASSEMBLY__
857     +/*
858     + * This output constraint should be used for any inline asm which has a "call"
859     + * instruction. Otherwise the asm may be inserted before the frame pointer
860     + * gets set up by the containing function. If you forget to do this, objtool
861     + * may print a "call without frame pointer save/setup" warning.
862     + */
863     +register unsigned long current_stack_pointer asm(_ASM_SP);
864     +#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
865     +#endif
866     +
867     #endif /* _ASM_X86_ASM_H */
868     diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
869     index 1d2b69fc0ceb..9ea67a04ff4f 100644
870     --- a/arch/x86/include/asm/cpufeature.h
871     +++ b/arch/x86/include/asm/cpufeature.h
872     @@ -135,6 +135,8 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
873     set_bit(bit, (unsigned long *)cpu_caps_set); \
874     } while (0)
875    
876     +#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
877     +
878     #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
879     /*
880     * Static testing of CPU features. Used the same as boot_cpu_has().
881     diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
882     index 454a37adb823..4467568a531b 100644
883     --- a/arch/x86/include/asm/cpufeatures.h
884     +++ b/arch/x86/include/asm/cpufeatures.h
885     @@ -194,6 +194,9 @@
886     #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
887     #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
888    
889     +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
890     +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
891     +
892     #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
893     #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
894     #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
895     @@ -316,5 +319,8 @@
896     #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
897     #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
898     #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
899     +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
900     +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
901     +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
902    
903     #endif /* _ASM_X86_CPUFEATURES_H */
904     diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
905     index b601ddac5719..b11c4c072df8 100644
906     --- a/arch/x86/include/asm/msr-index.h
907     +++ b/arch/x86/include/asm/msr-index.h
908     @@ -330,6 +330,9 @@
909     #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
910     #define FAM10H_MMIO_CONF_BASE_SHIFT 20
911     #define MSR_FAM10H_NODE_ID 0xc001100c
912     +#define MSR_F10H_DECFG 0xc0011029
913     +#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
914     +#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
915    
916     /* K8 MSRs */
917     #define MSR_K8_TOP_MEM1 0xc001001a
918     diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
919     new file mode 100644
920     index 000000000000..402a11c803c3
921     --- /dev/null
922     +++ b/arch/x86/include/asm/nospec-branch.h
923     @@ -0,0 +1,214 @@
924     +/* SPDX-License-Identifier: GPL-2.0 */
925     +
926     +#ifndef __NOSPEC_BRANCH_H__
927     +#define __NOSPEC_BRANCH_H__
928     +
929     +#include <asm/alternative.h>
930     +#include <asm/alternative-asm.h>
931     +#include <asm/cpufeatures.h>
932     +
933     +/*
934     + * Fill the CPU return stack buffer.
935     + *
936     + * Each entry in the RSB, if used for a speculative 'ret', contains an
937     + * infinite 'pause; jmp' loop to capture speculative execution.
938     + *
939     + * This is required in various cases for retpoline and IBRS-based
940     + * mitigations for the Spectre variant 2 vulnerability. Sometimes to
941     + * eliminate potentially bogus entries from the RSB, and sometimes
942     + * purely to ensure that it doesn't get empty, which on some CPUs would
943     + * allow predictions from other (unwanted!) sources to be used.
944     + *
945     + * We define a CPP macro such that it can be used from both .S files and
946     + * inline assembly. It's possible to do a .macro and then include that
947     + * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
948     + */
949     +
950     +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
951     +#define RSB_FILL_LOOPS 16 /* To avoid underflow */
952     +
953     +/*
954     + * Google experimented with loop-unrolling and this turned out to be
955     + * the optimal version — two calls, each with their own speculation
956     + * trap should their return address end up getting used, in a loop.
957     + */
958     +#define __FILL_RETURN_BUFFER(reg, nr, sp) \
959     + mov $(nr/2), reg; \
960     +771: \
961     + call 772f; \
962     +773: /* speculation trap */ \
963     + pause; \
964     + jmp 773b; \
965     +772: \
966     + call 774f; \
967     +775: /* speculation trap */ \
968     + pause; \
969     + jmp 775b; \
970     +774: \
971     + dec reg; \
972     + jnz 771b; \
973     + add $(BITS_PER_LONG/8) * nr, sp;
974     +
975     +#ifdef __ASSEMBLY__
976     +
977     +/*
978     + * This should be used immediately before a retpoline alternative. It tells
979     + * objtool where the retpolines are so that it can make sense of the control
980     + * flow by just reading the original instruction(s) and ignoring the
981     + * alternatives.
982     + */
983     +.macro ANNOTATE_NOSPEC_ALTERNATIVE
984     + .Lannotate_\@:
985     + .pushsection .discard.nospec
986     + .long .Lannotate_\@ - .
987     + .popsection
988     +.endm
989     +
990     +/*
991     + * These are the bare retpoline primitives for indirect jmp and call.
992     + * Do not use these directly; they only exist to make the ALTERNATIVE
993     + * invocation below less ugly.
994     + */
995     +.macro RETPOLINE_JMP reg:req
996     + call .Ldo_rop_\@
997     +.Lspec_trap_\@:
998     + pause
999     + jmp .Lspec_trap_\@
1000     +.Ldo_rop_\@:
1001     + mov \reg, (%_ASM_SP)
1002     + ret
1003     +.endm
1004     +
1005     +/*
1006     + * This is a wrapper around RETPOLINE_JMP so the called function in reg
1007     + * returns to the instruction after the macro.
1008     + */
1009     +.macro RETPOLINE_CALL reg:req
1010     + jmp .Ldo_call_\@
1011     +.Ldo_retpoline_jmp_\@:
1012     + RETPOLINE_JMP \reg
1013     +.Ldo_call_\@:
1014     + call .Ldo_retpoline_jmp_\@
1015     +.endm
1016     +
1017     +/*
1018     + * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
1019     + * indirect jmp/call which may be susceptible to the Spectre variant 2
1020     + * attack.
1021     + */
1022     +.macro JMP_NOSPEC reg:req
1023     +#ifdef CONFIG_RETPOLINE
1024     + ANNOTATE_NOSPEC_ALTERNATIVE
1025     + ALTERNATIVE_2 __stringify(jmp *\reg), \
1026     + __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
1027     + __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
1028     +#else
1029     + jmp *\reg
1030     +#endif
1031     +.endm
1032     +
1033     +.macro CALL_NOSPEC reg:req
1034     +#ifdef CONFIG_RETPOLINE
1035     + ANNOTATE_NOSPEC_ALTERNATIVE
1036     + ALTERNATIVE_2 __stringify(call *\reg), \
1037     + __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
1038     + __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
1039     +#else
1040     + call *\reg
1041     +#endif
1042     +.endm
1043     +
1044     + /*
1045     + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
1046     + * monstrosity above, manually.
1047     + */
1048     +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
1049     +#ifdef CONFIG_RETPOLINE
1050     + ANNOTATE_NOSPEC_ALTERNATIVE
1051     + ALTERNATIVE "jmp .Lskip_rsb_\@", \
1052     + __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
1053     + \ftr
1054     +.Lskip_rsb_\@:
1055     +#endif
1056     +.endm
1057     +
1058     +#else /* __ASSEMBLY__ */
1059     +
1060     +#define ANNOTATE_NOSPEC_ALTERNATIVE \
1061     + "999:\n\t" \
1062     + ".pushsection .discard.nospec\n\t" \
1063     + ".long 999b - .\n\t" \
1064     + ".popsection\n\t"
1065     +
1066     +#if defined(CONFIG_X86_64) && defined(RETPOLINE)
1067     +
1068     +/*
1069     + * Since the inline asm uses the %V modifier which is only in newer GCC,
1070     + * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
1071     + */
1072     +# define CALL_NOSPEC \
1073     + ANNOTATE_NOSPEC_ALTERNATIVE \
1074     + ALTERNATIVE( \
1075     + "call *%[thunk_target]\n", \
1076     + "call __x86_indirect_thunk_%V[thunk_target]\n", \
1077     + X86_FEATURE_RETPOLINE)
1078     +# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
1079     +
1080     +#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
1081     +/*
1082     + * For i386 we use the original ret-equivalent retpoline, because
1083     + * otherwise we'll run out of registers. We don't care about CET
1084     + * here, anyway.
1085     + */
1086     +# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \
1087     + " jmp 904f;\n" \
1088     + " .align 16\n" \
1089     + "901: call 903f;\n" \
1090     + "902: pause;\n" \
1091     + " jmp 902b;\n" \
1092     + " .align 16\n" \
1093     + "903: addl $4, %%esp;\n" \
1094     + " pushl %[thunk_target];\n" \
1095     + " ret;\n" \
1096     + " .align 16\n" \
1097     + "904: call 901b;\n", \
1098     + X86_FEATURE_RETPOLINE)
1099     +
1100     +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
1101     +#else /* No retpoline for C / inline asm */
1102     +# define CALL_NOSPEC "call *%[thunk_target]\n"
1103     +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
1104     +#endif
1105     +
1106     +/* The Spectre V2 mitigation variants */
1107     +enum spectre_v2_mitigation {
1108     + SPECTRE_V2_NONE,
1109     + SPECTRE_V2_RETPOLINE_MINIMAL,
1110     + SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
1111     + SPECTRE_V2_RETPOLINE_GENERIC,
1112     + SPECTRE_V2_RETPOLINE_AMD,
1113     + SPECTRE_V2_IBRS,
1114     +};
1115     +
1116     +/*
1117     + * On VMEXIT we must ensure that no RSB predictions learned in the guest
1118     + * can be followed in the host, by overwriting the RSB completely. Both
1119     + * retpoline and IBRS mitigations for Spectre v2 need this; only on future
1120     + * CPUs with IBRS_ATT *might* it be avoided.
1121     + */
1122     +static inline void vmexit_fill_RSB(void)
1123     +{
1124     +#ifdef CONFIG_RETPOLINE
1125     + unsigned long loops = RSB_CLEAR_LOOPS / 2;
1126     +
1127     + asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
1128     + ALTERNATIVE("jmp 910f",
1129     + __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
1130     + X86_FEATURE_RETPOLINE)
1131     + "910:"
1132     + : "=&r" (loops), ASM_CALL_CONSTRAINT
1133     + : "r" (loops) : "memory" );
1134     +#endif
1135     +}
1136     +#endif /* __ASSEMBLY__ */
1137     +#endif /* __NOSPEC_BRANCH_H__ */
1138     diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
1139     index b6d425999f99..1178a51b77f3 100644
1140     --- a/arch/x86/include/asm/pgalloc.h
1141     +++ b/arch/x86/include/asm/pgalloc.h
1142     @@ -27,6 +27,17 @@ static inline void paravirt_release_pud(unsigned long pfn) {}
1143     */
1144     extern gfp_t __userpte_alloc_gfp;
1145    
1146     +#ifdef CONFIG_PAGE_TABLE_ISOLATION
1147     +/*
1148     + * Instead of one PGD, we acquire two PGDs. Being order-1, it is
1149     + * both 8k in size and 8k-aligned. That lets us just flip bit 12
1150     + * in a pointer to swap between the two 4k halves.
1151     + */
1152     +#define PGD_ALLOCATION_ORDER 1
1153     +#else
1154     +#define PGD_ALLOCATION_ORDER 0
1155     +#endif
1156     +
1157     /*
1158     * Allocate and free page tables.
1159     */
1160     diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
1161     index 8cb52ee3ade6..e40b19ca486e 100644
1162     --- a/arch/x86/include/asm/processor.h
1163     +++ b/arch/x86/include/asm/processor.h
1164     @@ -156,8 +156,8 @@ extern struct cpuinfo_x86 boot_cpu_data;
1165     extern struct cpuinfo_x86 new_cpu_data;
1166    
1167     extern struct tss_struct doublefault_tss;
1168     -extern __u32 cpu_caps_cleared[NCAPINTS];
1169     -extern __u32 cpu_caps_set[NCAPINTS];
1170     +extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
1171     +extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
1172    
1173     #ifdef CONFIG_SMP
1174     DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
1175     diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
1176     index ad6f5eb07a95..bdf9c4c91572 100644
1177     --- a/arch/x86/include/asm/thread_info.h
1178     +++ b/arch/x86/include/asm/thread_info.h
1179     @@ -152,17 +152,6 @@ struct thread_info {
1180     */
1181     #ifndef __ASSEMBLY__
1182    
1183     -static inline unsigned long current_stack_pointer(void)
1184     -{
1185     - unsigned long sp;
1186     -#ifdef CONFIG_X86_64
1187     - asm("mov %%rsp,%0" : "=g" (sp));
1188     -#else
1189     - asm("mov %%esp,%0" : "=g" (sp));
1190     -#endif
1191     - return sp;
1192     -}
1193     -
1194     /*
1195     * Walks up the stack frames to make sure that the specified object is
1196     * entirely contained by a single stack frame.
1197     diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
1198     index 8b678af866f7..ccdc23d89b60 100644
1199     --- a/arch/x86/include/asm/xen/hypercall.h
1200     +++ b/arch/x86/include/asm/xen/hypercall.h
1201     @@ -44,6 +44,7 @@
1202     #include <asm/page.h>
1203     #include <asm/pgtable.h>
1204     #include <asm/smap.h>
1205     +#include <asm/nospec-branch.h>
1206    
1207     #include <xen/interface/xen.h>
1208     #include <xen/interface/sched.h>
1209     @@ -216,9 +217,9 @@ privcmd_call(unsigned call,
1210     __HYPERCALL_5ARG(a1, a2, a3, a4, a5);
1211    
1212     stac();
1213     - asm volatile("call *%[call]"
1214     + asm volatile(CALL_NOSPEC
1215     : __HYPERCALL_5PARAM
1216     - : [call] "a" (&hypercall_page[call])
1217     + : [thunk_target] "a" (&hypercall_page[call])
1218     : __HYPERCALL_CLOBBER5);
1219     clac();
1220    
1221     diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
1222     index 11cc600f4df0..0a1e8a67cc99 100644
1223     --- a/arch/x86/kernel/acpi/boot.c
1224     +++ b/arch/x86/kernel/acpi/boot.c
1225     @@ -335,13 +335,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
1226     #ifdef CONFIG_X86_IO_APIC
1227     #define MP_ISA_BUS 0
1228    
1229     +static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
1230     + u8 trigger, u32 gsi);
1231     +
1232     static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
1233     u32 gsi)
1234     {
1235     - int ioapic;
1236     - int pin;
1237     - struct mpc_intsrc mp_irq;
1238     -
1239     /*
1240     * Check bus_irq boundary.
1241     */
1242     @@ -350,14 +349,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
1243     return;
1244     }
1245    
1246     - /*
1247     - * Convert 'gsi' to 'ioapic.pin'.
1248     - */
1249     - ioapic = mp_find_ioapic(gsi);
1250     - if (ioapic < 0)
1251     - return;
1252     - pin = mp_find_ioapic_pin(ioapic, gsi);
1253     -
1254     /*
1255     * TBD: This check is for faulty timer entries, where the override
1256     * erroneously sets the trigger to level, resulting in a HUGE
1257     @@ -366,16 +357,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
1258     if ((bus_irq == 0) && (trigger == 3))
1259     trigger = 1;
1260    
1261     - mp_irq.type = MP_INTSRC;
1262     - mp_irq.irqtype = mp_INT;
1263     - mp_irq.irqflag = (trigger << 2) | polarity;
1264     - mp_irq.srcbus = MP_ISA_BUS;
1265     - mp_irq.srcbusirq = bus_irq; /* IRQ */
1266     - mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
1267     - mp_irq.dstirq = pin; /* INTIN# */
1268     -
1269     - mp_save_irq(&mp_irq);
1270     -
1271     + if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0)
1272     + return;
1273     /*
1274     * Reset default identity mapping if gsi is also an legacy IRQ,
1275     * otherwise there will be more than one entry with the same GSI
1276     @@ -422,6 +405,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
1277     return 0;
1278     }
1279    
1280     +static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
1281     + u8 trigger, u32 gsi)
1282     +{
1283     + struct mpc_intsrc mp_irq;
1284     + int ioapic, pin;
1285     +
1286     + /* Convert 'gsi' to 'ioapic.pin'(INTIN#) */
1287     + ioapic = mp_find_ioapic(gsi);
1288     + if (ioapic < 0) {
1289     + pr_warn("Failed to find ioapic for gsi : %u\n", gsi);
1290     + return ioapic;
1291     + }
1292     +
1293     + pin = mp_find_ioapic_pin(ioapic, gsi);
1294     +
1295     + mp_irq.type = MP_INTSRC;
1296     + mp_irq.irqtype = mp_INT;
1297     + mp_irq.irqflag = (trigger << 2) | polarity;
1298     + mp_irq.srcbus = MP_ISA_BUS;
1299     + mp_irq.srcbusirq = bus_irq;
1300     + mp_irq.dstapic = mpc_ioapic_id(ioapic);
1301     + mp_irq.dstirq = pin;
1302     +
1303     + mp_save_irq(&mp_irq);
1304     +
1305     + return 0;
1306     +}
1307     +
1308     static int __init
1309     acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
1310     {
1311     @@ -466,7 +477,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
1312     if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
1313     polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
1314    
1315     - mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
1316     + if (bus_irq < NR_IRQS_LEGACY)
1317     + mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
1318     + else
1319     + mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi);
1320     +
1321     acpi_penalize_sci_irq(bus_irq, trigger, polarity);
1322    
1323     /*
1324     diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
1325     index 5cb272a7a5a3..10d5a3d6affc 100644
1326     --- a/arch/x86/kernel/alternative.c
1327     +++ b/arch/x86/kernel/alternative.c
1328     @@ -340,9 +340,12 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
1329     static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
1330     {
1331     unsigned long flags;
1332     + int i;
1333    
1334     - if (instr[0] != 0x90)
1335     - return;
1336     + for (i = 0; i < a->padlen; i++) {
1337     + if (instr[i] != 0x90)
1338     + return;
1339     + }
1340    
1341     local_irq_save(flags);
1342     add_nops(instr + (a->instrlen - a->padlen), a->padlen);
1343     diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
1344     index 4a8697f7d4ef..33b63670bf09 100644
1345     --- a/arch/x86/kernel/cpu/Makefile
1346     +++ b/arch/x86/kernel/cpu/Makefile
1347     @@ -20,13 +20,11 @@ obj-y := intel_cacheinfo.o scattered.o topology.o
1348     obj-y += common.o
1349     obj-y += rdrand.o
1350     obj-y += match.o
1351     +obj-y += bugs.o
1352    
1353     obj-$(CONFIG_PROC_FS) += proc.o
1354     obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
1355    
1356     -obj-$(CONFIG_X86_32) += bugs.o
1357     -obj-$(CONFIG_X86_64) += bugs_64.o
1358     -
1359     obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
1360     obj-$(CONFIG_CPU_SUP_AMD) += amd.o
1361     obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
1362     diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
1363     index 2b4cf04239b6..1b89f0c4251e 100644
1364     --- a/arch/x86/kernel/cpu/amd.c
1365     +++ b/arch/x86/kernel/cpu/amd.c
1366     @@ -782,8 +782,32 @@ static void init_amd(struct cpuinfo_x86 *c)
1367     set_cpu_cap(c, X86_FEATURE_K8);
1368    
1369     if (cpu_has(c, X86_FEATURE_XMM2)) {
1370     - /* MFENCE stops RDTSC speculation */
1371     - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
1372     + unsigned long long val;
1373     + int ret;
1374     +
1375     + /*
1376     + * A serializing LFENCE has less overhead than MFENCE, so
1377     + * use it for execution serialization. On families which
1378     + * don't have that MSR, LFENCE is already serializing.
1379     + * msr_set_bit() uses the safe accessors, too, even if the MSR
1380     + * is not present.
1381     + */
1382     + msr_set_bit(MSR_F10H_DECFG,
1383     + MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
1384     +
1385     + /*
1386     + * Verify that the MSR write was successful (could be running
1387     + * under a hypervisor) and only then assume that LFENCE is
1388     + * serializing.
1389     + */
1390     + ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
1391     + if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
1392     + /* A serializing LFENCE stops RDTSC speculation */
1393     + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
1394     + } else {
1395     + /* MFENCE stops RDTSC speculation */
1396     + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
1397     + }
1398     }
1399    
1400     /*
1401     diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
1402     index 0b6124315441..49d25ddf0e9f 100644
1403     --- a/arch/x86/kernel/cpu/bugs.c
1404     +++ b/arch/x86/kernel/cpu/bugs.c
1405     @@ -9,6 +9,10 @@
1406     */
1407     #include <linux/init.h>
1408     #include <linux/utsname.h>
1409     +#include <linux/cpu.h>
1410     +
1411     +#include <asm/nospec-branch.h>
1412     +#include <asm/cmdline.h>
1413     #include <asm/bugs.h>
1414     #include <asm/processor.h>
1415     #include <asm/processor-flags.h>
1416     @@ -16,23 +20,24 @@
1417     #include <asm/msr.h>
1418     #include <asm/paravirt.h>
1419     #include <asm/alternative.h>
1420     +#include <asm/pgtable.h>
1421     +#include <asm/cacheflush.h>
1422     +
1423     +static void __init spectre_v2_select_mitigation(void);
1424    
1425     void __init check_bugs(void)
1426     {
1427     -#ifdef CONFIG_X86_32
1428     - /*
1429     - * Regardless of whether PCID is enumerated, the SDM says
1430     - * that it can't be enabled in 32-bit mode.
1431     - */
1432     - setup_clear_cpu_cap(X86_FEATURE_PCID);
1433     -#endif
1434     -
1435     identify_boot_cpu();
1436     -#ifndef CONFIG_SMP
1437     - pr_info("CPU: ");
1438     - print_cpu_info(&boot_cpu_data);
1439     -#endif
1440    
1441     + if (!IS_ENABLED(CONFIG_SMP)) {
1442     + pr_info("CPU: ");
1443     + print_cpu_info(&boot_cpu_data);
1444     + }
1445     +
1446     + /* Select the proper spectre mitigation before patching alternatives */
1447     + spectre_v2_select_mitigation();
1448     +
1449     +#ifdef CONFIG_X86_32
1450     /*
1451     * Check whether we are able to run this kernel safely on SMP.
1452     *
1453     @@ -48,4 +53,194 @@ void __init check_bugs(void)
1454     alternative_instructions();
1455    
1456     fpu__init_check_bugs();
1457     +#else /* CONFIG_X86_64 */
1458     + alternative_instructions();
1459     +
1460     + /*
1461     + * Make sure the first 2MB area is not mapped by huge pages
1462     + * There are typically fixed size MTRRs in there and overlapping
1463     + * MTRRs into large pages causes slow downs.
1464     + *
1465     + * Right now we don't do that with gbpages because there seems
1466     + * very little benefit for that case.
1467     + */
1468     + if (!direct_gbpages)
1469     + set_memory_4k((unsigned long)__va(0), 1);
1470     +#endif
1471     +}
1472     +
1473     +/* The kernel command line selection */
1474     +enum spectre_v2_mitigation_cmd {
1475     + SPECTRE_V2_CMD_NONE,
1476     + SPECTRE_V2_CMD_AUTO,
1477     + SPECTRE_V2_CMD_FORCE,
1478     + SPECTRE_V2_CMD_RETPOLINE,
1479     + SPECTRE_V2_CMD_RETPOLINE_GENERIC,
1480     + SPECTRE_V2_CMD_RETPOLINE_AMD,
1481     +};
1482     +
1483     +static const char *spectre_v2_strings[] = {
1484     + [SPECTRE_V2_NONE] = "Vulnerable",
1485     + [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
1486     + [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
1487     + [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
1488     + [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
1489     +};
1490     +
1491     +#undef pr_fmt
1492     +#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt
1493     +
1494     +static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
1495     +
1496     +static void __init spec2_print_if_insecure(const char *reason)
1497     +{
1498     + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
1499     + pr_info("%s\n", reason);
1500     +}
1501     +
1502     +static void __init spec2_print_if_secure(const char *reason)
1503     +{
1504     + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
1505     + pr_info("%s\n", reason);
1506     +}
1507     +
1508     +static inline bool retp_compiler(void)
1509     +{
1510     + return __is_defined(RETPOLINE);
1511     +}
1512     +
1513     +static inline bool match_option(const char *arg, int arglen, const char *opt)
1514     +{
1515     + int len = strlen(opt);
1516     +
1517     + return len == arglen && !strncmp(arg, opt, len);
1518     +}
1519     +
1520     +static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
1521     +{
1522     + char arg[20];
1523     + int ret;
1524     +
1525     + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
1526     + sizeof(arg));
1527     + if (ret > 0) {
1528     + if (match_option(arg, ret, "off")) {
1529     + goto disable;
1530     + } else if (match_option(arg, ret, "on")) {
1531     + spec2_print_if_secure("force enabled on command line.");
1532     + return SPECTRE_V2_CMD_FORCE;
1533     + } else if (match_option(arg, ret, "retpoline")) {
1534     + spec2_print_if_insecure("retpoline selected on command line.");
1535     + return SPECTRE_V2_CMD_RETPOLINE;
1536     + } else if (match_option(arg, ret, "retpoline,amd")) {
1537     + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
1538     + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
1539     + return SPECTRE_V2_CMD_AUTO;
1540     + }
1541     + spec2_print_if_insecure("AMD retpoline selected on command line.");
1542     + return SPECTRE_V2_CMD_RETPOLINE_AMD;
1543     + } else if (match_option(arg, ret, "retpoline,generic")) {
1544     + spec2_print_if_insecure("generic retpoline selected on command line.");
1545     + return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
1546     + } else if (match_option(arg, ret, "auto")) {
1547     + return SPECTRE_V2_CMD_AUTO;
1548     + }
1549     + }
1550     +
1551     + if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
1552     + return SPECTRE_V2_CMD_AUTO;
1553     +disable:
1554     + spec2_print_if_insecure("disabled on command line.");
1555     + return SPECTRE_V2_CMD_NONE;
1556     }
1557     +
1558     +static void __init spectre_v2_select_mitigation(void)
1559     +{
1560     + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
1561     + enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
1562     +
1563     + /*
1564     + * If the CPU is not affected and the command line mode is NONE or AUTO
1565     + * then nothing to do.
1566     + */
1567     + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
1568     + (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
1569     + return;
1570     +
1571     + switch (cmd) {
1572     + case SPECTRE_V2_CMD_NONE:
1573     + return;
1574     +
1575     + case SPECTRE_V2_CMD_FORCE:
1576     + /* FALLTRHU */
1577     + case SPECTRE_V2_CMD_AUTO:
1578     + goto retpoline_auto;
1579     +
1580     + case SPECTRE_V2_CMD_RETPOLINE_AMD:
1581     + if (IS_ENABLED(CONFIG_RETPOLINE))
1582     + goto retpoline_amd;
1583     + break;
1584     + case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
1585     + if (IS_ENABLED(CONFIG_RETPOLINE))
1586     + goto retpoline_generic;
1587     + break;
1588     + case SPECTRE_V2_CMD_RETPOLINE:
1589     + if (IS_ENABLED(CONFIG_RETPOLINE))
1590     + goto retpoline_auto;
1591     + break;
1592     + }
1593     + pr_err("kernel not compiled with retpoline; no mitigation available!");
1594     + return;
1595     +
1596     +retpoline_auto:
1597     + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
1598     + retpoline_amd:
1599     + if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
1600     + pr_err("LFENCE not serializing. Switching to generic retpoline\n");
1601     + goto retpoline_generic;
1602     + }
1603     + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
1604     + SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
1605     + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
1606     + setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
1607     + } else {
1608     + retpoline_generic:
1609     + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
1610     + SPECTRE_V2_RETPOLINE_MINIMAL;
1611     + setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
1612     + }
1613     +
1614     + spectre_v2_enabled = mode;
1615     + pr_info("%s\n", spectre_v2_strings[mode]);
1616     +}
1617     +
1618     +#undef pr_fmt
1619     +
1620     +#ifdef CONFIG_SYSFS
1621     +ssize_t cpu_show_meltdown(struct device *dev,
1622     + struct device_attribute *attr, char *buf)
1623     +{
1624     + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
1625     + return sprintf(buf, "Not affected\n");
1626     + if (boot_cpu_has(X86_FEATURE_KAISER))
1627     + return sprintf(buf, "Mitigation: PTI\n");
1628     + return sprintf(buf, "Vulnerable\n");
1629     +}
1630     +
1631     +ssize_t cpu_show_spectre_v1(struct device *dev,
1632     + struct device_attribute *attr, char *buf)
1633     +{
1634     + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
1635     + return sprintf(buf, "Not affected\n");
1636     + return sprintf(buf, "Vulnerable\n");
1637     +}
1638     +
1639     +ssize_t cpu_show_spectre_v2(struct device *dev,
1640     + struct device_attribute *attr, char *buf)
1641     +{
1642     + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
1643     + return sprintf(buf, "Not affected\n");
1644     +
1645     + return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
1646     +}
1647     +#endif
1648     diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
1649     deleted file mode 100644
1650     index a972ac4c7e7d..000000000000
1651     --- a/arch/x86/kernel/cpu/bugs_64.c
1652     +++ /dev/null
1653     @@ -1,33 +0,0 @@
1654     -/*
1655     - * Copyright (C) 1994 Linus Torvalds
1656     - * Copyright (C) 2000 SuSE
1657     - */
1658     -
1659     -#include <linux/kernel.h>
1660     -#include <linux/init.h>
1661     -#include <asm/alternative.h>
1662     -#include <asm/bugs.h>
1663     -#include <asm/processor.h>
1664     -#include <asm/mtrr.h>
1665     -#include <asm/cacheflush.h>
1666     -
1667     -void __init check_bugs(void)
1668     -{
1669     - identify_boot_cpu();
1670     -#if !defined(CONFIG_SMP)
1671     - pr_info("CPU: ");
1672     - print_cpu_info(&boot_cpu_data);
1673     -#endif
1674     - alternative_instructions();
1675     -
1676     - /*
1677     - * Make sure the first 2MB area is not mapped by huge pages
1678     - * There are typically fixed size MTRRs in there and overlapping
1679     - * MTRRs into large pages causes slow downs.
1680     - *
1681     - * Right now we don't do that with gbpages because there seems
1682     - * very little benefit for that case.
1683     - */
1684     - if (!direct_gbpages)
1685     - set_memory_4k((unsigned long)__va(0), 1);
1686     -}
1687     diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
1688     index 918e44772b04..7b9ae04ddf5d 100644
1689     --- a/arch/x86/kernel/cpu/common.c
1690     +++ b/arch/x86/kernel/cpu/common.c
1691     @@ -480,8 +480,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
1692     return NULL; /* Not found */
1693     }
1694    
1695     -__u32 cpu_caps_cleared[NCAPINTS];
1696     -__u32 cpu_caps_set[NCAPINTS];
1697     +__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
1698     +__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
1699    
1700     void load_percpu_segment(int cpu)
1701     {
1702     @@ -706,6 +706,16 @@ void cpu_detect(struct cpuinfo_x86 *c)
1703     }
1704     }
1705    
1706     +static void apply_forced_caps(struct cpuinfo_x86 *c)
1707     +{
1708     + int i;
1709     +
1710     + for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
1711     + c->x86_capability[i] &= ~cpu_caps_cleared[i];
1712     + c->x86_capability[i] |= cpu_caps_set[i];
1713     + }
1714     +}
1715     +
1716     void get_cpu_cap(struct cpuinfo_x86 *c)
1717     {
1718     u32 eax, ebx, ecx, edx;
1719     @@ -872,7 +882,22 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
1720     }
1721    
1722     setup_force_cpu_cap(X86_FEATURE_ALWAYS);
1723     +
1724     + /* Assume for now that ALL x86 CPUs are insecure */
1725     + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
1726     +
1727     + setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
1728     + setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
1729     +
1730     fpu__init_system(c);
1731     +
1732     +#ifdef CONFIG_X86_32
1733     + /*
1734     + * Regardless of whether PCID is enumerated, the SDM says
1735     + * that it can't be enabled in 32-bit mode.
1736     + */
1737     + setup_clear_cpu_cap(X86_FEATURE_PCID);
1738     +#endif
1739     }
1740    
1741     void __init early_cpu_init(void)
1742     @@ -1086,10 +1111,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
1743     this_cpu->c_identify(c);
1744    
1745     /* Clear/Set all flags overridden by options, after probe */
1746     - for (i = 0; i < NCAPINTS; i++) {
1747     - c->x86_capability[i] &= ~cpu_caps_cleared[i];
1748     - c->x86_capability[i] |= cpu_caps_set[i];
1749     - }
1750     + apply_forced_caps(c);
1751    
1752     #ifdef CONFIG_X86_64
1753     c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
1754     @@ -1151,10 +1173,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
1755     * Clear/Set all flags overridden by options, need do it
1756     * before following smp all cpus cap AND.
1757     */
1758     - for (i = 0; i < NCAPINTS; i++) {
1759     - c->x86_capability[i] &= ~cpu_caps_cleared[i];
1760     - c->x86_capability[i] |= cpu_caps_set[i];
1761     - }
1762     + apply_forced_caps(c);
1763    
1764     /*
1765     * On SMP, boot_cpu_data holds the common feature set between
1766     diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
1767     index 13dbcc0f9d03..ac3e636ad586 100644
1768     --- a/arch/x86/kernel/cpu/microcode/intel.c
1769     +++ b/arch/x86/kernel/cpu/microcode/intel.c
1770     @@ -1051,8 +1051,17 @@ static bool is_blacklisted(unsigned int cpu)
1771     {
1772     struct cpuinfo_x86 *c = &cpu_data(cpu);
1773    
1774     - if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
1775     - pr_err_once("late loading on model 79 is disabled.\n");
1776     + /*
1777     + * Late loading on model 79 with microcode revision less than 0x0b000021
1778     + * may result in a system hang. This behavior is documented in item
1779     + * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
1780     + */
1781     + if (c->x86 == 6 &&
1782     + c->x86_model == INTEL_FAM6_BROADWELL_X &&
1783     + c->x86_mask == 0x01 &&
1784     + c->microcode < 0x0b000021) {
1785     + pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
1786     + pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
1787     return true;
1788     }
1789    
1790     diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
1791     index 1f38d9a4d9de..2763573ee1d2 100644
1792     --- a/arch/x86/kernel/irq_32.c
1793     +++ b/arch/x86/kernel/irq_32.c
1794     @@ -19,6 +19,7 @@
1795     #include <linux/mm.h>
1796    
1797     #include <asm/apic.h>
1798     +#include <asm/nospec-branch.h>
1799    
1800     #ifdef CONFIG_DEBUG_STACKOVERFLOW
1801    
1802     @@ -54,17 +55,17 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
1803     static void call_on_stack(void *func, void *stack)
1804     {
1805     asm volatile("xchgl %%ebx,%%esp \n"
1806     - "call *%%edi \n"
1807     + CALL_NOSPEC
1808     "movl %%ebx,%%esp \n"
1809     : "=b" (stack)
1810     : "0" (stack),
1811     - "D"(func)
1812     + [thunk_target] "D"(func)
1813     : "memory", "cc", "edx", "ecx", "eax");
1814     }
1815    
1816     static inline void *current_stack(void)
1817     {
1818     - return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
1819     + return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
1820     }
1821    
1822     static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
1823     @@ -88,17 +89,17 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
1824    
1825     /* Save the next esp at the bottom of the stack */
1826     prev_esp = (u32 *)irqstk;
1827     - *prev_esp = current_stack_pointer();
1828     + *prev_esp = current_stack_pointer;
1829    
1830     if (unlikely(overflow))
1831     call_on_stack(print_stack_overflow, isp);
1832    
1833     asm volatile("xchgl %%ebx,%%esp \n"
1834     - "call *%%edi \n"
1835     + CALL_NOSPEC
1836     "movl %%ebx,%%esp \n"
1837     : "=a" (arg1), "=b" (isp)
1838     : "0" (desc), "1" (isp),
1839     - "D" (desc->handle_irq)
1840     + [thunk_target] "D" (desc->handle_irq)
1841     : "memory", "cc", "ecx");
1842     return 1;
1843     }
1844     @@ -139,7 +140,7 @@ void do_softirq_own_stack(void)
1845    
1846     /* Push the previous esp onto the stack */
1847     prev_esp = (u32 *)irqstk;
1848     - *prev_esp = current_stack_pointer();
1849     + *prev_esp = current_stack_pointer;
1850    
1851     call_on_stack(__do_softirq, isp);
1852     }
1853     diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
1854     index 7b0d3da52fb4..287ec3bc141f 100644
1855     --- a/arch/x86/kernel/mcount_64.S
1856     +++ b/arch/x86/kernel/mcount_64.S
1857     @@ -8,7 +8,7 @@
1858     #include <asm/ptrace.h>
1859     #include <asm/ftrace.h>
1860     #include <asm/export.h>
1861     -
1862     +#include <asm/nospec-branch.h>
1863    
1864     .code64
1865     .section .entry.text, "ax"
1866     @@ -290,8 +290,9 @@ trace:
1867     * ip and parent ip are used and the list function is called when
1868     * function tracing is enabled.
1869     */
1870     - call *ftrace_trace_function
1871    
1872     + movq ftrace_trace_function, %r8
1873     + CALL_NOSPEC %r8
1874     restore_mcount_regs
1875    
1876     jmp fgraph_trace
1877     @@ -334,5 +335,5 @@ GLOBAL(return_to_handler)
1878     movq 8(%rsp), %rdx
1879     movq (%rsp), %rax
1880     addq $24, %rsp
1881     - jmp *%rdi
1882     + JMP_NOSPEC %rdi
1883     #endif
1884     diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
1885     index bd4e3d4d3625..322f433fbc76 100644
1886     --- a/arch/x86/kernel/traps.c
1887     +++ b/arch/x86/kernel/traps.c
1888     @@ -153,7 +153,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
1889     * from double_fault.
1890     */
1891     BUG_ON((unsigned long)(current_top_of_stack() -
1892     - current_stack_pointer()) >= THREAD_SIZE);
1893     + current_stack_pointer) >= THREAD_SIZE);
1894    
1895     preempt_enable_no_resched();
1896     }
1897     diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
1898     index 8148d8ca7930..24af898fb3a6 100644
1899     --- a/arch/x86/kvm/svm.c
1900     +++ b/arch/x86/kvm/svm.c
1901     @@ -44,6 +44,7 @@
1902     #include <asm/debugreg.h>
1903     #include <asm/kvm_para.h>
1904     #include <asm/irq_remapping.h>
1905     +#include <asm/nospec-branch.h>
1906    
1907     #include <asm/virtext.h>
1908     #include "trace.h"
1909     @@ -4868,6 +4869,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
1910     "mov %%r13, %c[r13](%[svm]) \n\t"
1911     "mov %%r14, %c[r14](%[svm]) \n\t"
1912     "mov %%r15, %c[r15](%[svm]) \n\t"
1913     +#endif
1914     + /*
1915     + * Clear host registers marked as clobbered to prevent
1916     + * speculative use.
1917     + */
1918     + "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
1919     + "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
1920     + "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
1921     + "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
1922     + "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
1923     +#ifdef CONFIG_X86_64
1924     + "xor %%r8, %%r8 \n\t"
1925     + "xor %%r9, %%r9 \n\t"
1926     + "xor %%r10, %%r10 \n\t"
1927     + "xor %%r11, %%r11 \n\t"
1928     + "xor %%r12, %%r12 \n\t"
1929     + "xor %%r13, %%r13 \n\t"
1930     + "xor %%r14, %%r14 \n\t"
1931     + "xor %%r15, %%r15 \n\t"
1932     #endif
1933     "pop %%" _ASM_BP
1934     :
1935     @@ -4898,6 +4918,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
1936     #endif
1937     );
1938    
1939     + /* Eliminate branch target predictions from guest mode */
1940     + vmexit_fill_RSB();
1941     +
1942     #ifdef CONFIG_X86_64
1943     wrmsrl(MSR_GS_BASE, svm->host.gs_base);
1944     #else
1945     diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
1946     index 263e56059fd5..3ca6d15994e4 100644
1947     --- a/arch/x86/kvm/vmx.c
1948     +++ b/arch/x86/kvm/vmx.c
1949     @@ -48,6 +48,7 @@
1950     #include <asm/kexec.h>
1951     #include <asm/apic.h>
1952     #include <asm/irq_remapping.h>
1953     +#include <asm/nospec-branch.h>
1954    
1955     #include "trace.h"
1956     #include "pmu.h"
1957     @@ -857,8 +858,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
1958     {
1959     BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
1960    
1961     - if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
1962     - vmcs_field_to_offset_table[field] == 0)
1963     + if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
1964     + return -ENOENT;
1965     +
1966     + /*
1967     + * FIXME: Mitigation for CVE-2017-5753. To be replaced with a
1968     + * generic mechanism.
1969     + */
1970     + asm("lfence");
1971     +
1972     + if (vmcs_field_to_offset_table[field] == 0)
1973     return -ENOENT;
1974    
1975     return vmcs_field_to_offset_table[field];
1976     @@ -8948,6 +8957,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
1977     /* Save guest registers, load host registers, keep flags */
1978     "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
1979     "pop %0 \n\t"
1980     + "setbe %c[fail](%0)\n\t"
1981     "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
1982     "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
1983     __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
1984     @@ -8964,12 +8974,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
1985     "mov %%r13, %c[r13](%0) \n\t"
1986     "mov %%r14, %c[r14](%0) \n\t"
1987     "mov %%r15, %c[r15](%0) \n\t"
1988     + "xor %%r8d, %%r8d \n\t"
1989     + "xor %%r9d, %%r9d \n\t"
1990     + "xor %%r10d, %%r10d \n\t"
1991     + "xor %%r11d, %%r11d \n\t"
1992     + "xor %%r12d, %%r12d \n\t"
1993     + "xor %%r13d, %%r13d \n\t"
1994     + "xor %%r14d, %%r14d \n\t"
1995     + "xor %%r15d, %%r15d \n\t"
1996     #endif
1997     "mov %%cr2, %%" _ASM_AX " \n\t"
1998     "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
1999    
2000     + "xor %%eax, %%eax \n\t"
2001     + "xor %%ebx, %%ebx \n\t"
2002     + "xor %%esi, %%esi \n\t"
2003     + "xor %%edi, %%edi \n\t"
2004     "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
2005     - "setbe %c[fail](%0) \n\t"
2006     ".pushsection .rodata \n\t"
2007     ".global vmx_return \n\t"
2008     "vmx_return: " _ASM_PTR " 2b \n\t"
2009     @@ -9006,6 +9027,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
2010     #endif
2011     );
2012    
2013     + /* Eliminate branch target predictions from guest mode */
2014     + vmexit_fill_RSB();
2015     +
2016     /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
2017     if (debugctlmsr)
2018     update_debugctlmsr(debugctlmsr);
2019     diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
2020     index 73304b1a03cc..d3f80cccb9aa 100644
2021     --- a/arch/x86/kvm/x86.c
2022     +++ b/arch/x86/kvm/x86.c
2023     @@ -4264,7 +4264,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
2024     addr, n, v))
2025     && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
2026     break;
2027     - trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
2028     + trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
2029     handled += n;
2030     addr += n;
2031     len -= n;
2032     @@ -4517,7 +4517,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
2033     {
2034     if (vcpu->mmio_read_completed) {
2035     trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
2036     - vcpu->mmio_fragments[0].gpa, *(u64 *)val);
2037     + vcpu->mmio_fragments[0].gpa, val);
2038     vcpu->mmio_read_completed = 0;
2039     return 1;
2040     }
2041     @@ -4539,14 +4539,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
2042    
2043     static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
2044     {
2045     - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
2046     + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
2047     return vcpu_mmio_write(vcpu, gpa, bytes, val);
2048     }
2049    
2050     static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
2051     void *val, int bytes)
2052     {
2053     - trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
2054     + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
2055     return X86EMUL_IO_NEEDED;
2056     }
2057    
2058     diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
2059     index 34a74131a12c..6bf1898ddf49 100644
2060     --- a/arch/x86/lib/Makefile
2061     +++ b/arch/x86/lib/Makefile
2062     @@ -25,6 +25,7 @@ lib-y += memcpy_$(BITS).o
2063     lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
2064     lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
2065     lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
2066     +lib-$(CONFIG_RETPOLINE) += retpoline.o
2067    
2068     obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
2069    
2070     diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
2071     index 4d34bb548b41..46e71a74e612 100644
2072     --- a/arch/x86/lib/checksum_32.S
2073     +++ b/arch/x86/lib/checksum_32.S
2074     @@ -29,7 +29,8 @@
2075     #include <asm/errno.h>
2076     #include <asm/asm.h>
2077     #include <asm/export.h>
2078     -
2079     +#include <asm/nospec-branch.h>
2080     +
2081     /*
2082     * computes a partial checksum, e.g. for TCP/UDP fragments
2083     */
2084     @@ -156,7 +157,7 @@ ENTRY(csum_partial)
2085     negl %ebx
2086     lea 45f(%ebx,%ebx,2), %ebx
2087     testl %esi, %esi
2088     - jmp *%ebx
2089     + JMP_NOSPEC %ebx
2090    
2091     # Handle 2-byte-aligned regions
2092     20: addw (%esi), %ax
2093     @@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic)
2094     andl $-32,%edx
2095     lea 3f(%ebx,%ebx), %ebx
2096     testl %esi, %esi
2097     - jmp *%ebx
2098     + JMP_NOSPEC %ebx
2099     1: addl $64,%esi
2100     addl $64,%edi
2101     SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
2102     diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
2103     new file mode 100644
2104     index 000000000000..cb45c6cb465f
2105     --- /dev/null
2106     +++ b/arch/x86/lib/retpoline.S
2107     @@ -0,0 +1,48 @@
2108     +/* SPDX-License-Identifier: GPL-2.0 */
2109     +
2110     +#include <linux/stringify.h>
2111     +#include <linux/linkage.h>
2112     +#include <asm/dwarf2.h>
2113     +#include <asm/cpufeatures.h>
2114     +#include <asm/alternative-asm.h>
2115     +#include <asm/export.h>
2116     +#include <asm/nospec-branch.h>
2117     +
2118     +.macro THUNK reg
2119     + .section .text.__x86.indirect_thunk.\reg
2120     +
2121     +ENTRY(__x86_indirect_thunk_\reg)
2122     + CFI_STARTPROC
2123     + JMP_NOSPEC %\reg
2124     + CFI_ENDPROC
2125     +ENDPROC(__x86_indirect_thunk_\reg)
2126     +.endm
2127     +
2128     +/*
2129     + * Despite being an assembler file we can't just use .irp here
2130     + * because __KSYM_DEPS__ only uses the C preprocessor and would
2131     + * only see one instance of "__x86_indirect_thunk_\reg" rather
2132     + * than one per register with the correct names. So we do it
2133     + * the simple and nasty way...
2134     + */
2135     +#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
2136     +#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
2137     +
2138     +GENERATE_THUNK(_ASM_AX)
2139     +GENERATE_THUNK(_ASM_BX)
2140     +GENERATE_THUNK(_ASM_CX)
2141     +GENERATE_THUNK(_ASM_DX)
2142     +GENERATE_THUNK(_ASM_SI)
2143     +GENERATE_THUNK(_ASM_DI)
2144     +GENERATE_THUNK(_ASM_BP)
2145     +GENERATE_THUNK(_ASM_SP)
2146     +#ifdef CONFIG_64BIT
2147     +GENERATE_THUNK(r8)
2148     +GENERATE_THUNK(r9)
2149     +GENERATE_THUNK(r10)
2150     +GENERATE_THUNK(r11)
2151     +GENERATE_THUNK(r12)
2152     +GENERATE_THUNK(r13)
2153     +GENERATE_THUNK(r14)
2154     +GENERATE_THUNK(r15)
2155     +#endif
2156     diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
2157     index 8f8e5e03d083..a8ade08a9bf5 100644
2158     --- a/arch/x86/mm/kaiser.c
2159     +++ b/arch/x86/mm/kaiser.c
2160     @@ -197,6 +197,8 @@ static int kaiser_add_user_map(const void *__start_addr, unsigned long size,
2161     * requires that not to be #defined to 0): so mask it off here.
2162     */
2163     flags &= ~_PAGE_GLOBAL;
2164     + if (!(__supported_pte_mask & _PAGE_NX))
2165     + flags &= ~_PAGE_NX;
2166    
2167     for (; address < end_addr; address += PAGE_SIZE) {
2168     target_address = get_pa_from_mapping(address);
2169     diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
2170     index 5aaec8effc5f..209b9465e97a 100644
2171     --- a/arch/x86/mm/pgtable.c
2172     +++ b/arch/x86/mm/pgtable.c
2173     @@ -345,13 +345,6 @@ static inline void _pgd_free(pgd_t *pgd)
2174     }
2175     #else
2176    
2177     -/*
2178     - * Instead of one pgd, Kaiser acquires two pgds. Being order-1, it is
2179     - * both 8k in size and 8k-aligned. That lets us just flip bit 12
2180     - * in a pointer to swap between the two 4k halves.
2181     - */
2182     -#define PGD_ALLOCATION_ORDER kaiser_enabled
2183     -
2184     static inline pgd_t *_pgd_alloc(void)
2185     {
2186     return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
2187     diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
2188     index 41205de487e7..578973ade71b 100644
2189     --- a/arch/x86/mm/tlb.c
2190     +++ b/arch/x86/mm/tlb.c
2191     @@ -110,7 +110,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
2192     * mapped in the new pgd, we'll double-fault. Forcibly
2193     * map it.
2194     */
2195     - unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
2196     + unsigned int stack_pgd_index = pgd_index(current_stack_pointer);
2197    
2198     pgd_t *pgd = next->pgd + stack_pgd_index;
2199    
2200     diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
2201     index 2f25a363068c..dcb2d9d185a2 100644
2202     --- a/arch/x86/platform/efi/efi_64.c
2203     +++ b/arch/x86/platform/efi/efi_64.c
2204     @@ -142,7 +142,7 @@ int __init efi_alloc_page_tables(void)
2205     return 0;
2206    
2207     gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO;
2208     - efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
2209     + efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
2210     if (!efi_pgd)
2211     return -ENOMEM;
2212    
2213     diff --git a/crypto/algapi.c b/crypto/algapi.c
2214     index 1fad2a6b3bbb..5c098ffa7d3d 100644
2215     --- a/crypto/algapi.c
2216     +++ b/crypto/algapi.c
2217     @@ -167,6 +167,18 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
2218    
2219     spawn->alg = NULL;
2220     spawns = &inst->alg.cra_users;
2221     +
2222     + /*
2223     + * We may encounter an unregistered instance here, since
2224     + * an instance's spawns are set up prior to the instance
2225     + * being registered. An unregistered instance will have
2226     + * NULL ->cra_users.next, since ->cra_users isn't
2227     + * properly initialized until registration. But an
2228     + * unregistered instance cannot have any users, so treat
2229     + * it the same as ->cra_users being empty.
2230     + */
2231     + if (spawns->next == NULL)
2232     + break;
2233     }
2234     } while ((spawns = crypto_more_spawns(alg, &stack, &top,
2235     &secondary_spawns)));
2236     diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
2237     index d02e7c0f5bfd..0651010bba21 100644
2238     --- a/drivers/base/Kconfig
2239     +++ b/drivers/base/Kconfig
2240     @@ -235,6 +235,9 @@ config GENERIC_CPU_DEVICES
2241     config GENERIC_CPU_AUTOPROBE
2242     bool
2243    
2244     +config GENERIC_CPU_VULNERABILITIES
2245     + bool
2246     +
2247     config SOC_BUS
2248     bool
2249    
2250     diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
2251     index 4c28e1a09786..56b6c8508a89 100644
2252     --- a/drivers/base/cpu.c
2253     +++ b/drivers/base/cpu.c
2254     @@ -499,10 +499,58 @@ static void __init cpu_dev_register_generic(void)
2255     #endif
2256     }
2257    
2258     +#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
2259     +
2260     +ssize_t __weak cpu_show_meltdown(struct device *dev,
2261     + struct device_attribute *attr, char *buf)
2262     +{
2263     + return sprintf(buf, "Not affected\n");
2264     +}
2265     +
2266     +ssize_t __weak cpu_show_spectre_v1(struct device *dev,
2267     + struct device_attribute *attr, char *buf)
2268     +{
2269     + return sprintf(buf, "Not affected\n");
2270     +}
2271     +
2272     +ssize_t __weak cpu_show_spectre_v2(struct device *dev,
2273     + struct device_attribute *attr, char *buf)
2274     +{
2275     + return sprintf(buf, "Not affected\n");
2276     +}
2277     +
2278     +static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
2279     +static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
2280     +static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
2281     +
2282     +static struct attribute *cpu_root_vulnerabilities_attrs[] = {
2283     + &dev_attr_meltdown.attr,
2284     + &dev_attr_spectre_v1.attr,
2285     + &dev_attr_spectre_v2.attr,
2286     + NULL
2287     +};
2288     +
2289     +static const struct attribute_group cpu_root_vulnerabilities_group = {
2290     + .name = "vulnerabilities",
2291     + .attrs = cpu_root_vulnerabilities_attrs,
2292     +};
2293     +
2294     +static void __init cpu_register_vulnerabilities(void)
2295     +{
2296     + if (sysfs_create_group(&cpu_subsys.dev_root->kobj,
2297     + &cpu_root_vulnerabilities_group))
2298     + pr_err("Unable to register CPU vulnerabilities\n");
2299     +}
2300     +
2301     +#else
2302     +static inline void cpu_register_vulnerabilities(void) { }
2303     +#endif
2304     +
2305     void __init cpu_dev_init(void)
2306     {
2307     if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
2308     panic("Failed to register CPU subsystem");
2309    
2310     cpu_dev_register_generic();
2311     + cpu_register_vulnerabilities();
2312     }
2313     diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
2314     index 24f4b544d270..e32badd26c8a 100644
2315     --- a/drivers/block/rbd.c
2316     +++ b/drivers/block/rbd.c
2317     @@ -4511,7 +4511,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
2318     segment_size = rbd_obj_bytes(&rbd_dev->header);
2319     blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
2320     q->limits.max_sectors = queue_max_hw_sectors(q);
2321     - blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
2322     + blk_queue_max_segments(q, USHRT_MAX);
2323     blk_queue_max_segment_size(q, segment_size);
2324     blk_queue_io_min(q, segment_size);
2325     blk_queue_io_opt(q, segment_size);
2326     diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
2327     index fefb9d995d2c..81f5a552e32f 100644
2328     --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
2329     +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
2330     @@ -2729,6 +2729,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv,
2331     }
2332    
2333     view_type = vmw_view_cmd_to_type(header->id);
2334     + if (view_type == vmw_view_max)
2335     + return -EINVAL;
2336     cmd = container_of(header, typeof(*cmd), header);
2337     ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
2338     user_surface_converter,
2339     diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
2340     index e0a8216ecf2b..13c32eb40738 100644
2341     --- a/drivers/hv/hv.c
2342     +++ b/drivers/hv/hv.c
2343     @@ -31,6 +31,7 @@
2344     #include <linux/clockchips.h>
2345     #include <asm/hyperv.h>
2346     #include <asm/mshyperv.h>
2347     +#include <asm/nospec-branch.h>
2348     #include "hyperv_vmbus.h"
2349    
2350     /* The one and only */
2351     @@ -103,9 +104,10 @@ u64 hv_do_hypercall(u64 control, void *input, void *output)
2352     return (u64)ULLONG_MAX;
2353    
2354     __asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8");
2355     - __asm__ __volatile__("call *%3" : "=a" (hv_status) :
2356     + __asm__ __volatile__(CALL_NOSPEC :
2357     + "=a" (hv_status) :
2358     "c" (control), "d" (input_address),
2359     - "m" (hypercall_page));
2360     + THUNK_TARGET(hypercall_page));
2361    
2362     return hv_status;
2363    
2364     @@ -123,11 +125,12 @@ u64 hv_do_hypercall(u64 control, void *input, void *output)
2365     if (!hypercall_page)
2366     return (u64)ULLONG_MAX;
2367    
2368     - __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi),
2369     + __asm__ __volatile__ (CALL_NOSPEC : "=d"(hv_status_hi),
2370     "=a"(hv_status_lo) : "d" (control_hi),
2371     "a" (control_lo), "b" (input_address_hi),
2372     "c" (input_address_lo), "D"(output_address_hi),
2373     - "S"(output_address_lo), "m" (hypercall_page));
2374     + "S"(output_address_lo),
2375     + THUNK_TARGET(hypercall_page));
2376    
2377     return hv_status_lo | ((u64)hv_status_hi << 32);
2378     #endif /* !x86_64 */
2379     diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
2380     index b9748970df4a..29ab814693fc 100644
2381     --- a/drivers/infiniband/ulp/srpt/ib_srpt.c
2382     +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
2383     @@ -992,8 +992,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
2384     return -ENOMEM;
2385    
2386     attr->qp_state = IB_QPS_INIT;
2387     - attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
2388     - IB_ACCESS_REMOTE_WRITE;
2389     + attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
2390     attr->port_num = ch->sport->port;
2391     attr->pkey_index = 0;
2392    
2393     diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
2394     index 7643f72adb1c..3ec647e8b9c6 100644
2395     --- a/drivers/md/dm-bufio.c
2396     +++ b/drivers/md/dm-bufio.c
2397     @@ -1554,7 +1554,8 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
2398     int l;
2399     struct dm_buffer *b, *tmp;
2400     unsigned long freed = 0;
2401     - unsigned long count = nr_to_scan;
2402     + unsigned long count = c->n_buffers[LIST_CLEAN] +
2403     + c->n_buffers[LIST_DIRTY];
2404     unsigned long retain_target = get_retain_buffers(c);
2405    
2406     for (l = 0; l < LIST_SIZE; l++) {
2407     @@ -1591,6 +1592,7 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
2408     {
2409     struct dm_bufio_client *c;
2410     unsigned long count;
2411     + unsigned long retain_target;
2412    
2413     c = container_of(shrink, struct dm_bufio_client, shrinker);
2414     if (sc->gfp_mask & __GFP_FS)
2415     @@ -1599,8 +1601,9 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
2416     return 0;
2417    
2418     count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
2419     + retain_target = get_retain_buffers(c);
2420     dm_bufio_unlock(c);
2421     - return count;
2422     + return (count < retain_target) ? 0 : (count - retain_target);
2423     }
2424    
2425     /*
2426     diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
2427     index eea9aea14b00..5d5012337d9e 100644
2428     --- a/drivers/net/can/usb/gs_usb.c
2429     +++ b/drivers/net/can/usb/gs_usb.c
2430     @@ -449,7 +449,7 @@ static int gs_usb_set_bittiming(struct net_device *netdev)
2431     dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)",
2432     rc);
2433    
2434     - return rc;
2435     + return (rc > 0) ? 0 : rc;
2436     }
2437    
2438     static void gs_usb_xmit_callback(struct urb *urb)
2439     diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
2440     index f3aaca743ea3..8a48656a376b 100644
2441     --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
2442     +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
2443     @@ -1364,6 +1364,9 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
2444     * Checks to see of the link status of the hardware has changed. If a
2445     * change in link status has been detected, then we read the PHY registers
2446     * to get the current speed/duplex if link exists.
2447     + *
2448     + * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link
2449     + * up).
2450     **/
2451     static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
2452     {
2453     @@ -1379,7 +1382,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
2454     * Change or Rx Sequence Error interrupt.
2455     */
2456     if (!mac->get_link_status)
2457     - return 0;
2458     + return 1;
2459    
2460     /* First we want to see if the MII Status Register reports
2461     * link. If so, then we want to get the current speed/duplex
2462     @@ -1611,10 +1614,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
2463     * different link partner.
2464     */
2465     ret_val = e1000e_config_fc_after_link_up(hw);
2466     - if (ret_val)
2467     + if (ret_val) {
2468     e_dbg("Error configuring flow control\n");
2469     + return ret_val;
2470     + }
2471    
2472     - return ret_val;
2473     + return 1;
2474     }
2475    
2476     static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
2477     diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
2478     index 9e31a3390154..8aa91ddff287 100644
2479     --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
2480     +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
2481     @@ -1328,9 +1328,9 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
2482     static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
2483     bool removing)
2484     {
2485     - if (!removing && !nh->should_offload)
2486     + if (!removing)
2487     nh->should_offload = 1;
2488     - else if (removing && nh->offloaded)
2489     + else
2490     nh->should_offload = 0;
2491     nh->update = 1;
2492     }
2493     diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
2494     index 2140dedab712..b6816ae00b7a 100644
2495     --- a/drivers/net/ethernet/renesas/sh_eth.c
2496     +++ b/drivers/net/ethernet/renesas/sh_eth.c
2497     @@ -3087,18 +3087,37 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
2498     /* ioremap the TSU registers */
2499     if (mdp->cd->tsu) {
2500     struct resource *rtsu;
2501     +
2502     rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1);
2503     - mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu);
2504     - if (IS_ERR(mdp->tsu_addr)) {
2505     - ret = PTR_ERR(mdp->tsu_addr);
2506     + if (!rtsu) {
2507     + dev_err(&pdev->dev, "no TSU resource\n");
2508     + ret = -ENODEV;
2509     + goto out_release;
2510     + }
2511     + /* We can only request the TSU region for the first port
2512     + * of the two sharing this TSU for the probe to succeed...
2513     + */
2514     + if (devno % 2 == 0 &&
2515     + !devm_request_mem_region(&pdev->dev, rtsu->start,
2516     + resource_size(rtsu),
2517     + dev_name(&pdev->dev))) {
2518     + dev_err(&pdev->dev, "can't request TSU resource.\n");
2519     + ret = -EBUSY;
2520     + goto out_release;
2521     + }
2522     + mdp->tsu_addr = devm_ioremap(&pdev->dev, rtsu->start,
2523     + resource_size(rtsu));
2524     + if (!mdp->tsu_addr) {
2525     + dev_err(&pdev->dev, "TSU region ioremap() failed.\n");
2526     + ret = -ENOMEM;
2527     goto out_release;
2528     }
2529     mdp->port = devno % 2;
2530     ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER;
2531     }
2532    
2533     - /* initialize first or needed device */
2534     - if (!devno || pd->needs_init) {
2535     + /* Need to init only the first port of the two sharing a TSU */
2536     + if (devno % 2 == 0) {
2537     if (mdp->cd->chip_reset)
2538     mdp->cd->chip_reset(ndev);
2539    
2540     diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
2541     index adf61a7b1b01..98bbb91336e4 100644
2542     --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
2543     +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
2544     @@ -280,8 +280,14 @@ static void stmmac_eee_ctrl_timer(unsigned long arg)
2545     bool stmmac_eee_init(struct stmmac_priv *priv)
2546     {
2547     unsigned long flags;
2548     + int interface = priv->plat->interface;
2549     bool ret = false;
2550    
2551     + if ((interface != PHY_INTERFACE_MODE_MII) &&
2552     + (interface != PHY_INTERFACE_MODE_GMII) &&
2553     + !phy_interface_mode_is_rgmii(interface))
2554     + goto out;
2555     +
2556     /* Using PCS we cannot dial with the phy registers at this stage
2557     * so we do not support extra feature like EEE.
2558     */
2559     diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c
2560     index e221bfcee76b..947bea81d924 100644
2561     --- a/drivers/net/usb/cx82310_eth.c
2562     +++ b/drivers/net/usb/cx82310_eth.c
2563     @@ -293,12 +293,9 @@ static struct sk_buff *cx82310_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
2564     {
2565     int len = skb->len;
2566    
2567     - if (skb_headroom(skb) < 2) {
2568     - struct sk_buff *skb2 = skb_copy_expand(skb, 2, 0, flags);
2569     + if (skb_cow_head(skb, 2)) {
2570     dev_kfree_skb_any(skb);
2571     - skb = skb2;
2572     - if (!skb)
2573     - return NULL;
2574     + return NULL;
2575     }
2576     skb_push(skb, 2);
2577    
2578     diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
2579     index f33460cec79f..9c257ffedb15 100644
2580     --- a/drivers/net/usb/lan78xx.c
2581     +++ b/drivers/net/usb/lan78xx.c
2582     @@ -2419,14 +2419,9 @@ static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev,
2583     {
2584     u32 tx_cmd_a, tx_cmd_b;
2585    
2586     - if (skb_headroom(skb) < TX_OVERHEAD) {
2587     - struct sk_buff *skb2;
2588     -
2589     - skb2 = skb_copy_expand(skb, TX_OVERHEAD, 0, flags);
2590     + if (skb_cow_head(skb, TX_OVERHEAD)) {
2591     dev_kfree_skb_any(skb);
2592     - skb = skb2;
2593     - if (!skb)
2594     - return NULL;
2595     + return NULL;
2596     }
2597    
2598     if (lan78xx_linearize(skb) < 0)
2599     diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
2600     index 9af9799935db..4cb9b11a545a 100644
2601     --- a/drivers/net/usb/smsc75xx.c
2602     +++ b/drivers/net/usb/smsc75xx.c
2603     @@ -2205,13 +2205,9 @@ static struct sk_buff *smsc75xx_tx_fixup(struct usbnet *dev,
2604     {
2605     u32 tx_cmd_a, tx_cmd_b;
2606    
2607     - if (skb_headroom(skb) < SMSC75XX_TX_OVERHEAD) {
2608     - struct sk_buff *skb2 =
2609     - skb_copy_expand(skb, SMSC75XX_TX_OVERHEAD, 0, flags);
2610     + if (skb_cow_head(skb, SMSC75XX_TX_OVERHEAD)) {
2611     dev_kfree_skb_any(skb);
2612     - skb = skb2;
2613     - if (!skb)
2614     - return NULL;
2615     + return NULL;
2616     }
2617    
2618     tx_cmd_a = (u32)(skb->len & TX_CMD_A_LEN) | TX_CMD_A_FCS;
2619     diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c
2620     index 4a1e9c489f1f..aadfe1d1c37e 100644
2621     --- a/drivers/net/usb/sr9700.c
2622     +++ b/drivers/net/usb/sr9700.c
2623     @@ -456,14 +456,9 @@ static struct sk_buff *sr9700_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
2624    
2625     len = skb->len;
2626    
2627     - if (skb_headroom(skb) < SR_TX_OVERHEAD) {
2628     - struct sk_buff *skb2;
2629     -
2630     - skb2 = skb_copy_expand(skb, SR_TX_OVERHEAD, 0, flags);
2631     + if (skb_cow_head(skb, SR_TX_OVERHEAD)) {
2632     dev_kfree_skb_any(skb);
2633     - skb = skb2;
2634     - if (!skb)
2635     - return NULL;
2636     + return NULL;
2637     }
2638    
2639     __skb_push(skb, SR_TX_OVERHEAD);
2640     diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
2641     index 0b4c1562420f..ba1fe61e6ea6 100644
2642     --- a/drivers/net/wireless/ath/ath10k/htt_rx.c
2643     +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
2644     @@ -548,6 +548,11 @@ static int ath10k_htt_rx_crypto_param_len(struct ath10k *ar,
2645     return IEEE80211_TKIP_IV_LEN;
2646     case HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2:
2647     return IEEE80211_CCMP_HDR_LEN;
2648     + case HTT_RX_MPDU_ENCRYPT_AES_CCM256_WPA2:
2649     + return IEEE80211_CCMP_256_HDR_LEN;
2650     + case HTT_RX_MPDU_ENCRYPT_AES_GCMP_WPA2:
2651     + case HTT_RX_MPDU_ENCRYPT_AES_GCMP256_WPA2:
2652     + return IEEE80211_GCMP_HDR_LEN;
2653     case HTT_RX_MPDU_ENCRYPT_WEP128:
2654     case HTT_RX_MPDU_ENCRYPT_WAPI:
2655     break;
2656     @@ -573,6 +578,11 @@ static int ath10k_htt_rx_crypto_tail_len(struct ath10k *ar,
2657     return IEEE80211_TKIP_ICV_LEN;
2658     case HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2:
2659     return IEEE80211_CCMP_MIC_LEN;
2660     + case HTT_RX_MPDU_ENCRYPT_AES_CCM256_WPA2:
2661     + return IEEE80211_CCMP_256_MIC_LEN;
2662     + case HTT_RX_MPDU_ENCRYPT_AES_GCMP_WPA2:
2663     + case HTT_RX_MPDU_ENCRYPT_AES_GCMP256_WPA2:
2664     + return IEEE80211_GCMP_MIC_LEN;
2665     case HTT_RX_MPDU_ENCRYPT_WEP128:
2666     case HTT_RX_MPDU_ENCRYPT_WAPI:
2667     break;
2668     @@ -1024,9 +1034,21 @@ static void ath10k_htt_rx_h_undecap_raw(struct ath10k *ar,
2669     hdr = (void *)msdu->data;
2670    
2671     /* Tail */
2672     - if (status->flag & RX_FLAG_IV_STRIPPED)
2673     + if (status->flag & RX_FLAG_IV_STRIPPED) {
2674     skb_trim(msdu, msdu->len -
2675     ath10k_htt_rx_crypto_tail_len(ar, enctype));
2676     + } else {
2677     + /* MIC */
2678     + if ((status->flag & RX_FLAG_MIC_STRIPPED) &&
2679     + enctype == HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2)
2680     + skb_trim(msdu, msdu->len - 8);
2681     +
2682     + /* ICV */
2683     + if (status->flag & RX_FLAG_ICV_STRIPPED &&
2684     + enctype != HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2)
2685     + skb_trim(msdu, msdu->len -
2686     + ath10k_htt_rx_crypto_tail_len(ar, enctype));
2687     + }
2688    
2689     /* MMIC */
2690     if ((status->flag & RX_FLAG_MMIC_STRIPPED) &&
2691     @@ -1048,7 +1070,8 @@ static void ath10k_htt_rx_h_undecap_raw(struct ath10k *ar,
2692     static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar,
2693     struct sk_buff *msdu,
2694     struct ieee80211_rx_status *status,
2695     - const u8 first_hdr[64])
2696     + const u8 first_hdr[64],
2697     + enum htt_rx_mpdu_encrypt_type enctype)
2698     {
2699     struct ieee80211_hdr *hdr;
2700     struct htt_rx_desc *rxd;
2701     @@ -1056,6 +1079,7 @@ static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar,
2702     u8 da[ETH_ALEN];
2703     u8 sa[ETH_ALEN];
2704     int l3_pad_bytes;
2705     + int bytes_aligned = ar->hw_params.decap_align_bytes;
2706    
2707     /* Delivered decapped frame:
2708     * [nwifi 802.11 header] <-- replaced with 802.11 hdr
2709     @@ -1084,6 +1108,14 @@ static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar,
2710     /* push original 802.11 header */
2711     hdr = (struct ieee80211_hdr *)first_hdr;
2712     hdr_len = ieee80211_hdrlen(hdr->frame_control);
2713     +
2714     + if (!(status->flag & RX_FLAG_IV_STRIPPED)) {
2715     + memcpy(skb_push(msdu,
2716     + ath10k_htt_rx_crypto_param_len(ar, enctype)),
2717     + (void *)hdr + round_up(hdr_len, bytes_aligned),
2718     + ath10k_htt_rx_crypto_param_len(ar, enctype));
2719     + }
2720     +
2721     memcpy(skb_push(msdu, hdr_len), hdr, hdr_len);
2722    
2723     /* original 802.11 header has a different DA and in
2724     @@ -1144,6 +1176,7 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar,
2725     u8 sa[ETH_ALEN];
2726     int l3_pad_bytes;
2727     struct htt_rx_desc *rxd;
2728     + int bytes_aligned = ar->hw_params.decap_align_bytes;
2729    
2730     /* Delivered decapped frame:
2731     * [eth header] <-- replaced with 802.11 hdr & rfc1042/llc
2732     @@ -1172,6 +1205,14 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar,
2733     /* push original 802.11 header */
2734     hdr = (struct ieee80211_hdr *)first_hdr;
2735     hdr_len = ieee80211_hdrlen(hdr->frame_control);
2736     +
2737     + if (!(status->flag & RX_FLAG_IV_STRIPPED)) {
2738     + memcpy(skb_push(msdu,
2739     + ath10k_htt_rx_crypto_param_len(ar, enctype)),
2740     + (void *)hdr + round_up(hdr_len, bytes_aligned),
2741     + ath10k_htt_rx_crypto_param_len(ar, enctype));
2742     + }
2743     +
2744     memcpy(skb_push(msdu, hdr_len), hdr, hdr_len);
2745    
2746     /* original 802.11 header has a different DA and in
2747     @@ -1185,12 +1226,14 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar,
2748     static void ath10k_htt_rx_h_undecap_snap(struct ath10k *ar,
2749     struct sk_buff *msdu,
2750     struct ieee80211_rx_status *status,
2751     - const u8 first_hdr[64])
2752     + const u8 first_hdr[64],
2753     + enum htt_rx_mpdu_encrypt_type enctype)
2754     {
2755     struct ieee80211_hdr *hdr;
2756     size_t hdr_len;
2757     int l3_pad_bytes;
2758     struct htt_rx_desc *rxd;
2759     + int bytes_aligned = ar->hw_params.decap_align_bytes;
2760    
2761     /* Delivered decapped frame:
2762     * [amsdu header] <-- replaced with 802.11 hdr
2763     @@ -1206,6 +1249,14 @@ static void ath10k_htt_rx_h_undecap_snap(struct ath10k *ar,
2764    
2765     hdr = (struct ieee80211_hdr *)first_hdr;
2766     hdr_len = ieee80211_hdrlen(hdr->frame_control);
2767     +
2768     + if (!(status->flag & RX_FLAG_IV_STRIPPED)) {
2769     + memcpy(skb_push(msdu,
2770     + ath10k_htt_rx_crypto_param_len(ar, enctype)),
2771     + (void *)hdr + round_up(hdr_len, bytes_aligned),
2772     + ath10k_htt_rx_crypto_param_len(ar, enctype));
2773     + }
2774     +
2775     memcpy(skb_push(msdu, hdr_len), hdr, hdr_len);
2776     }
2777    
2778     @@ -1240,13 +1291,15 @@ static void ath10k_htt_rx_h_undecap(struct ath10k *ar,
2779     is_decrypted);
2780     break;
2781     case RX_MSDU_DECAP_NATIVE_WIFI:
2782     - ath10k_htt_rx_h_undecap_nwifi(ar, msdu, status, first_hdr);
2783     + ath10k_htt_rx_h_undecap_nwifi(ar, msdu, status, first_hdr,
2784     + enctype);
2785     break;
2786     case RX_MSDU_DECAP_ETHERNET2_DIX:
2787     ath10k_htt_rx_h_undecap_eth(ar, msdu, status, first_hdr, enctype);
2788     break;
2789     case RX_MSDU_DECAP_8023_SNAP_LLC:
2790     - ath10k_htt_rx_h_undecap_snap(ar, msdu, status, first_hdr);
2791     + ath10k_htt_rx_h_undecap_snap(ar, msdu, status, first_hdr,
2792     + enctype);
2793     break;
2794     }
2795     }
2796     @@ -1289,7 +1342,8 @@ static void ath10k_htt_rx_h_csum_offload(struct sk_buff *msdu)
2797    
2798     static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
2799     struct sk_buff_head *amsdu,
2800     - struct ieee80211_rx_status *status)
2801     + struct ieee80211_rx_status *status,
2802     + bool fill_crypt_header)
2803     {
2804     struct sk_buff *first;
2805     struct sk_buff *last;
2806     @@ -1299,7 +1353,6 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
2807     enum htt_rx_mpdu_encrypt_type enctype;
2808     u8 first_hdr[64];
2809     u8 *qos;
2810     - size_t hdr_len;
2811     bool has_fcs_err;
2812     bool has_crypto_err;
2813     bool has_tkip_err;
2814     @@ -1324,15 +1377,17 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
2815     * decapped header. It'll be used for undecapping of each MSDU.
2816     */
2817     hdr = (void *)rxd->rx_hdr_status;
2818     - hdr_len = ieee80211_hdrlen(hdr->frame_control);
2819     - memcpy(first_hdr, hdr, hdr_len);
2820     + memcpy(first_hdr, hdr, RX_HTT_HDR_STATUS_LEN);
2821    
2822     /* Each A-MSDU subframe will use the original header as the base and be
2823     * reported as a separate MSDU so strip the A-MSDU bit from QoS Ctl.
2824     */
2825     hdr = (void *)first_hdr;
2826     - qos = ieee80211_get_qos_ctl(hdr);
2827     - qos[0] &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT;
2828     +
2829     + if (ieee80211_is_data_qos(hdr->frame_control)) {
2830     + qos = ieee80211_get_qos_ctl(hdr);
2831     + qos[0] &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT;
2832     + }
2833    
2834     /* Some attention flags are valid only in the last MSDU. */
2835     last = skb_peek_tail(amsdu);
2836     @@ -1379,9 +1434,14 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
2837     status->flag |= RX_FLAG_DECRYPTED;
2838    
2839     if (likely(!is_mgmt))
2840     - status->flag |= RX_FLAG_IV_STRIPPED |
2841     - RX_FLAG_MMIC_STRIPPED;
2842     -}
2843     + status->flag |= RX_FLAG_MMIC_STRIPPED;
2844     +
2845     + if (fill_crypt_header)
2846     + status->flag |= RX_FLAG_MIC_STRIPPED |
2847     + RX_FLAG_ICV_STRIPPED;
2848     + else
2849     + status->flag |= RX_FLAG_IV_STRIPPED;
2850     + }
2851    
2852     skb_queue_walk(amsdu, msdu) {
2853     ath10k_htt_rx_h_csum_offload(msdu);
2854     @@ -1397,6 +1457,9 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
2855     if (is_mgmt)
2856     continue;
2857    
2858     + if (fill_crypt_header)
2859     + continue;
2860     +
2861     hdr = (void *)msdu->data;
2862     hdr->frame_control &= ~__cpu_to_le16(IEEE80211_FCTL_PROTECTED);
2863     }
2864     @@ -1407,6 +1470,9 @@ static void ath10k_htt_rx_h_deliver(struct ath10k *ar,
2865     struct ieee80211_rx_status *status)
2866     {
2867     struct sk_buff *msdu;
2868     + struct sk_buff *first_subframe;
2869     +
2870     + first_subframe = skb_peek(amsdu);
2871    
2872     while ((msdu = __skb_dequeue(amsdu))) {
2873     /* Setup per-MSDU flags */
2874     @@ -1415,6 +1481,13 @@ static void ath10k_htt_rx_h_deliver(struct ath10k *ar,
2875     else
2876     status->flag |= RX_FLAG_AMSDU_MORE;
2877    
2878     + if (msdu == first_subframe) {
2879     + first_subframe = NULL;
2880     + status->flag &= ~RX_FLAG_ALLOW_SAME_PN;
2881     + } else {
2882     + status->flag |= RX_FLAG_ALLOW_SAME_PN;
2883     + }
2884     +
2885     ath10k_process_rx(ar, status, msdu);
2886     }
2887     }
2888     @@ -1557,7 +1630,7 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt)
2889     ath10k_htt_rx_h_ppdu(ar, &amsdu, rx_status, 0xffff);
2890     ath10k_htt_rx_h_unchain(ar, &amsdu, ret > 0);
2891     ath10k_htt_rx_h_filter(ar, &amsdu, rx_status);
2892     - ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status);
2893     + ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status, true);
2894     ath10k_htt_rx_h_deliver(ar, &amsdu, rx_status);
2895    
2896     return num_msdus;
2897     @@ -1892,7 +1965,7 @@ static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
2898     num_msdus += skb_queue_len(&amsdu);
2899     ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id);
2900     ath10k_htt_rx_h_filter(ar, &amsdu, status);
2901     - ath10k_htt_rx_h_mpdu(ar, &amsdu, status);
2902     + ath10k_htt_rx_h_mpdu(ar, &amsdu, status, false);
2903     ath10k_htt_rx_h_deliver(ar, &amsdu, status);
2904     break;
2905     case -EAGAIN:
2906     diff --git a/drivers/net/wireless/ath/ath10k/rx_desc.h b/drivers/net/wireless/ath/ath10k/rx_desc.h
2907     index 034e7a54c5b2..e4878d0044bf 100644
2908     --- a/drivers/net/wireless/ath/ath10k/rx_desc.h
2909     +++ b/drivers/net/wireless/ath/ath10k/rx_desc.h
2910     @@ -239,6 +239,9 @@ enum htt_rx_mpdu_encrypt_type {
2911     HTT_RX_MPDU_ENCRYPT_WAPI = 5,
2912     HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2 = 6,
2913     HTT_RX_MPDU_ENCRYPT_NONE = 7,
2914     + HTT_RX_MPDU_ENCRYPT_AES_CCM256_WPA2 = 8,
2915     + HTT_RX_MPDU_ENCRYPT_AES_GCMP_WPA2 = 9,
2916     + HTT_RX_MPDU_ENCRYPT_AES_GCMP256_WPA2 = 10,
2917     };
2918    
2919     #define RX_MPDU_START_INFO0_PEER_IDX_MASK 0x000007ff
2920     diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
2921     index ceeb8c188ef3..00d82e8443bd 100644
2922     --- a/drivers/platform/x86/wmi.c
2923     +++ b/drivers/platform/x86/wmi.c
2924     @@ -848,5 +848,5 @@ static void __exit acpi_wmi_exit(void)
2925     pr_info("Mapper unloaded\n");
2926     }
2927    
2928     -subsys_initcall(acpi_wmi_init);
2929     +subsys_initcall_sync(acpi_wmi_init);
2930     module_exit(acpi_wmi_exit);
2931     diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
2932     index 2b770cb0c488..558a66b459fa 100644
2933     --- a/drivers/staging/android/ashmem.c
2934     +++ b/drivers/staging/android/ashmem.c
2935     @@ -774,10 +774,12 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2936     break;
2937     case ASHMEM_SET_SIZE:
2938     ret = -EINVAL;
2939     + mutex_lock(&ashmem_mutex);
2940     if (!asma->file) {
2941     ret = 0;
2942     asma->size = (size_t)arg;
2943     }
2944     + mutex_unlock(&ashmem_mutex);
2945     break;
2946     case ASHMEM_GET_SIZE:
2947     ret = asma->size;
2948     diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
2949     index 72e926d9868f..04d2b6e25503 100644
2950     --- a/drivers/target/iscsi/iscsi_target.c
2951     +++ b/drivers/target/iscsi/iscsi_target.c
2952     @@ -1940,7 +1940,6 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
2953     struct iscsi_tmr_req *tmr_req;
2954     struct iscsi_tm *hdr;
2955     int out_of_order_cmdsn = 0, ret;
2956     - bool sess_ref = false;
2957     u8 function, tcm_function = TMR_UNKNOWN;
2958    
2959     hdr = (struct iscsi_tm *) buf;
2960     @@ -1982,18 +1981,17 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
2961     buf);
2962     }
2963    
2964     + transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops,
2965     + conn->sess->se_sess, 0, DMA_NONE,
2966     + TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
2967     +
2968     + target_get_sess_cmd(&cmd->se_cmd, true);
2969     +
2970     /*
2971     * TASK_REASSIGN for ERL=2 / connection stays inside of
2972     * LIO-Target $FABRIC_MOD
2973     */
2974     if (function != ISCSI_TM_FUNC_TASK_REASSIGN) {
2975     - transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops,
2976     - conn->sess->se_sess, 0, DMA_NONE,
2977     - TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
2978     -
2979     - target_get_sess_cmd(&cmd->se_cmd, true);
2980     - sess_ref = true;
2981     -
2982     switch (function) {
2983     case ISCSI_TM_FUNC_ABORT_TASK:
2984     tcm_function = TMR_ABORT_TASK;
2985     @@ -2132,12 +2130,8 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
2986     * For connection recovery, this is also the default action for
2987     * TMR TASK_REASSIGN.
2988     */
2989     - if (sess_ref) {
2990     - pr_debug("Handle TMR, using sess_ref=true check\n");
2991     - target_put_sess_cmd(&cmd->se_cmd);
2992     - }
2993     -
2994     iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
2995     + target_put_sess_cmd(&cmd->se_cmd);
2996     return 0;
2997     }
2998     EXPORT_SYMBOL(iscsit_handle_task_mgt_cmd);
2999     diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
3000     index 27dd1e12f246..14bb2db5273c 100644
3001     --- a/drivers/target/target_core_tmr.c
3002     +++ b/drivers/target/target_core_tmr.c
3003     @@ -133,6 +133,15 @@ static bool __target_check_io_state(struct se_cmd *se_cmd,
3004     spin_unlock(&se_cmd->t_state_lock);
3005     return false;
3006     }
3007     + if (se_cmd->transport_state & CMD_T_PRE_EXECUTE) {
3008     + if (se_cmd->scsi_status) {
3009     + pr_debug("Attempted to abort io tag: %llu early failure"
3010     + " status: 0x%02x\n", se_cmd->tag,
3011     + se_cmd->scsi_status);
3012     + spin_unlock(&se_cmd->t_state_lock);
3013     + return false;
3014     + }
3015     + }
3016     if (sess->sess_tearing_down || se_cmd->cmd_wait_set) {
3017     pr_debug("Attempted to abort io tag: %llu already shutdown,"
3018     " skipping\n", se_cmd->tag);
3019     diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
3020     index 4c0782cb1e94..6f3eccf986c7 100644
3021     --- a/drivers/target/target_core_transport.c
3022     +++ b/drivers/target/target_core_transport.c
3023     @@ -1939,6 +1939,7 @@ void target_execute_cmd(struct se_cmd *cmd)
3024     }
3025    
3026     cmd->t_state = TRANSPORT_PROCESSING;
3027     + cmd->transport_state &= ~CMD_T_PRE_EXECUTE;
3028     cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT;
3029     spin_unlock_irq(&cmd->t_state_lock);
3030    
3031     @@ -2592,6 +2593,7 @@ int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref)
3032     ret = -ESHUTDOWN;
3033     goto out;
3034     }
3035     + se_cmd->transport_state |= CMD_T_PRE_EXECUTE;
3036     list_add_tail(&se_cmd->se_cmd_list, &se_sess->sess_cmd_list);
3037     out:
3038     spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
3039     diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
3040     index 82eea55a7b5c..3b7d69ca83be 100644
3041     --- a/drivers/usb/host/xhci-mem.c
3042     +++ b/drivers/usb/host/xhci-mem.c
3043     @@ -1086,7 +1086,8 @@ int xhci_alloc_virt_device(struct xhci_hcd *xhci, int slot_id,
3044    
3045     return 1;
3046     fail:
3047     -
3048     + if (dev->eps[0].ring)
3049     + xhci_ring_free(xhci, dev->eps[0].ring);
3050     if (dev->in_ctx)
3051     xhci_free_container_ctx(xhci, dev->in_ctx);
3052     if (dev->out_ctx)
3053     diff --git a/drivers/usb/misc/usb3503.c b/drivers/usb/misc/usb3503.c
3054     index 8e7737d7ac0a..03be5d574f23 100644
3055     --- a/drivers/usb/misc/usb3503.c
3056     +++ b/drivers/usb/misc/usb3503.c
3057     @@ -292,6 +292,8 @@ static int usb3503_probe(struct usb3503 *hub)
3058     if (gpio_is_valid(hub->gpio_reset)) {
3059     err = devm_gpio_request_one(dev, hub->gpio_reset,
3060     GPIOF_OUT_INIT_LOW, "usb3503 reset");
3061     + /* Datasheet defines a hardware reset to be at least 100us */
3062     + usleep_range(100, 10000);
3063     if (err) {
3064     dev_err(dev,
3065     "unable to request GPIO %d as reset pin (%d)\n",
3066     diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
3067     index 1a874a1f3890..80b37d214beb 100644
3068     --- a/drivers/usb/mon/mon_bin.c
3069     +++ b/drivers/usb/mon/mon_bin.c
3070     @@ -1002,7 +1002,9 @@ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg
3071     break;
3072    
3073     case MON_IOCQ_RING_SIZE:
3074     + mutex_lock(&rp->fetch_lock);
3075     ret = rp->b_size;
3076     + mutex_unlock(&rp->fetch_lock);
3077     break;
3078    
3079     case MON_IOCT_RING_SIZE:
3080     @@ -1229,12 +1231,16 @@ static int mon_bin_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
3081     unsigned long offset, chunk_idx;
3082     struct page *pageptr;
3083    
3084     + mutex_lock(&rp->fetch_lock);
3085     offset = vmf->pgoff << PAGE_SHIFT;
3086     - if (offset >= rp->b_size)
3087     + if (offset >= rp->b_size) {
3088     + mutex_unlock(&rp->fetch_lock);
3089     return VM_FAULT_SIGBUS;
3090     + }
3091     chunk_idx = offset / CHUNK_SIZE;
3092     pageptr = rp->b_vec[chunk_idx].pg;
3093     get_page(pageptr);
3094     + mutex_unlock(&rp->fetch_lock);
3095     vmf->page = pageptr;
3096     return 0;
3097     }
3098     diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
3099     index 11ee55e080e5..3178d8afb3e6 100644
3100     --- a/drivers/usb/serial/cp210x.c
3101     +++ b/drivers/usb/serial/cp210x.c
3102     @@ -121,6 +121,7 @@ static const struct usb_device_id id_table[] = {
3103     { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */
3104     { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
3105     { USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */
3106     + { USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */
3107     { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */
3108     { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */
3109     { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
3110     @@ -171,6 +172,7 @@ static const struct usb_device_id id_table[] = {
3111     { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
3112     { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
3113     { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */
3114     + { USB_DEVICE(0x18EF, 0xE030) }, /* ELV ALC 8xxx Battery Charger */
3115     { USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */
3116     { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */
3117     { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */
3118     diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
3119     index 9f356f7cf7d5..719ec68ae309 100644
3120     --- a/drivers/usb/storage/unusual_uas.h
3121     +++ b/drivers/usb/storage/unusual_uas.h
3122     @@ -156,6 +156,13 @@ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999,
3123     USB_SC_DEVICE, USB_PR_DEVICE, NULL,
3124     US_FL_NO_ATA_1X),
3125    
3126     +/* Reported-by: Icenowy Zheng <icenowy@aosc.io> */
3127     +UNUSUAL_DEV(0x2537, 0x1068, 0x0000, 0x9999,
3128     + "Norelsys",
3129     + "NS1068X",
3130     + USB_SC_DEVICE, USB_PR_DEVICE, NULL,
3131     + US_FL_IGNORE_UAS),
3132     +
3133     /* Reported-by: Takeo Nakayama <javhera@gmx.com> */
3134     UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999,
3135     "JMicron",
3136     diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
3137     index e24b24fa0f16..2a5d3180777d 100644
3138     --- a/drivers/usb/usbip/usbip_common.c
3139     +++ b/drivers/usb/usbip/usbip_common.c
3140     @@ -105,7 +105,7 @@ static void usbip_dump_usb_device(struct usb_device *udev)
3141     dev_dbg(dev, " devnum(%d) devpath(%s) usb speed(%s)",
3142     udev->devnum, udev->devpath, usb_speed_string(udev->speed));
3143    
3144     - pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport);
3145     + pr_debug("tt hub ttport %d\n", udev->ttport);
3146    
3147     dev_dbg(dev, " ");
3148     for (i = 0; i < 16; i++)
3149     @@ -138,12 +138,8 @@ static void usbip_dump_usb_device(struct usb_device *udev)
3150     }
3151     pr_debug("\n");
3152    
3153     - dev_dbg(dev, "parent %p, bus %p\n", udev->parent, udev->bus);
3154     -
3155     - dev_dbg(dev,
3156     - "descriptor %p, config %p, actconfig %p, rawdescriptors %p\n",
3157     - &udev->descriptor, udev->config,
3158     - udev->actconfig, udev->rawdescriptors);
3159     + dev_dbg(dev, "parent %s, bus %s\n", dev_name(&udev->parent->dev),
3160     + udev->bus->bus_name);
3161    
3162     dev_dbg(dev, "have_langid %d, string_langid %d\n",
3163     udev->have_langid, udev->string_langid);
3164     @@ -251,9 +247,6 @@ void usbip_dump_urb(struct urb *urb)
3165    
3166     dev = &urb->dev->dev;
3167    
3168     - dev_dbg(dev, " urb :%p\n", urb);
3169     - dev_dbg(dev, " dev :%p\n", urb->dev);
3170     -
3171     usbip_dump_usb_device(urb->dev);
3172    
3173     dev_dbg(dev, " pipe :%08x ", urb->pipe);
3174     @@ -262,11 +255,9 @@ void usbip_dump_urb(struct urb *urb)
3175    
3176     dev_dbg(dev, " status :%d\n", urb->status);
3177     dev_dbg(dev, " transfer_flags :%08X\n", urb->transfer_flags);
3178     - dev_dbg(dev, " transfer_buffer :%p\n", urb->transfer_buffer);
3179     dev_dbg(dev, " transfer_buffer_length:%d\n",
3180     urb->transfer_buffer_length);
3181     dev_dbg(dev, " actual_length :%d\n", urb->actual_length);
3182     - dev_dbg(dev, " setup_packet :%p\n", urb->setup_packet);
3183    
3184     if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL)
3185     usbip_dump_usb_ctrlrequest(
3186     @@ -276,8 +267,6 @@ void usbip_dump_urb(struct urb *urb)
3187     dev_dbg(dev, " number_of_packets :%d\n", urb->number_of_packets);
3188     dev_dbg(dev, " interval :%d\n", urb->interval);
3189     dev_dbg(dev, " error_count :%d\n", urb->error_count);
3190     - dev_dbg(dev, " context :%p\n", urb->context);
3191     - dev_dbg(dev, " complete :%p\n", urb->complete);
3192     }
3193     EXPORT_SYMBOL_GPL(usbip_dump_urb);
3194    
3195     diff --git a/drivers/usb/usbip/vudc_rx.c b/drivers/usb/usbip/vudc_rx.c
3196     index e429b59f6f8a..d020e72b3122 100644
3197     --- a/drivers/usb/usbip/vudc_rx.c
3198     +++ b/drivers/usb/usbip/vudc_rx.c
3199     @@ -132,6 +132,25 @@ static int v_recv_cmd_submit(struct vudc *udc,
3200     urb_p->new = 1;
3201     urb_p->seqnum = pdu->base.seqnum;
3202    
3203     + if (urb_p->ep->type == USB_ENDPOINT_XFER_ISOC) {
3204     + /* validate packet size and number of packets */
3205     + unsigned int maxp, packets, bytes;
3206     +
3207     + maxp = usb_endpoint_maxp(urb_p->ep->desc);
3208     + maxp *= usb_endpoint_maxp_mult(urb_p->ep->desc);
3209     + bytes = pdu->u.cmd_submit.transfer_buffer_length;
3210     + packets = DIV_ROUND_UP(bytes, maxp);
3211     +
3212     + if (pdu->u.cmd_submit.number_of_packets < 0 ||
3213     + pdu->u.cmd_submit.number_of_packets > packets) {
3214     + dev_err(&udc->gadget.dev,
3215     + "CMD_SUBMIT: isoc invalid num packets %d\n",
3216     + pdu->u.cmd_submit.number_of_packets);
3217     + ret = -EMSGSIZE;
3218     + goto free_urbp;
3219     + }
3220     + }
3221     +
3222     ret = alloc_urb_from_cmd(&urb_p->urb, pdu, urb_p->ep->type);
3223     if (ret) {
3224     usbip_event_add(&udc->ud, VUDC_EVENT_ERROR_MALLOC);
3225     diff --git a/drivers/usb/usbip/vudc_tx.c b/drivers/usb/usbip/vudc_tx.c
3226     index 234661782fa0..3ab4c86486a7 100644
3227     --- a/drivers/usb/usbip/vudc_tx.c
3228     +++ b/drivers/usb/usbip/vudc_tx.c
3229     @@ -97,6 +97,13 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p)
3230     memset(&pdu_header, 0, sizeof(pdu_header));
3231     memset(&msg, 0, sizeof(msg));
3232    
3233     + if (urb->actual_length > 0 && !urb->transfer_buffer) {
3234     + dev_err(&udc->gadget.dev,
3235     + "urb: actual_length %d transfer_buffer null\n",
3236     + urb->actual_length);
3237     + return -1;
3238     + }
3239     +
3240     if (urb_p->type == USB_ENDPOINT_XFER_ISOC)
3241     iovnum = 2 + urb->number_of_packets;
3242     else
3243     @@ -112,8 +119,8 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p)
3244    
3245     /* 1. setup usbip_header */
3246     setup_ret_submit_pdu(&pdu_header, urb_p);
3247     - usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n",
3248     - pdu_header.base.seqnum, urb);
3249     + usbip_dbg_stub_tx("setup txdata seqnum: %d\n",
3250     + pdu_header.base.seqnum);
3251     usbip_header_correct_endian(&pdu_header, 1);
3252    
3253     iov[iovnum].iov_base = &pdu_header;
3254     diff --git a/include/linux/bpf.h b/include/linux/bpf.h
3255     index 97498be2ca2e..75ffd3b2149e 100644
3256     --- a/include/linux/bpf.h
3257     +++ b/include/linux/bpf.h
3258     @@ -43,6 +43,7 @@ struct bpf_map {
3259     u32 max_entries;
3260     u32 map_flags;
3261     u32 pages;
3262     + bool unpriv_array;
3263     struct user_struct *user;
3264     const struct bpf_map_ops *ops;
3265     struct work_struct work;
3266     @@ -189,6 +190,7 @@ struct bpf_prog_aux {
3267     struct bpf_array {
3268     struct bpf_map map;
3269     u32 elem_size;
3270     + u32 index_mask;
3271     /* 'ownership' of prog_array is claimed by the first program that
3272     * is going to use this map or by the first program which FD is stored
3273     * in the map to make sure that all callers and callees have the same
3274     diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
3275     index 4c4e9358c146..070fc49e39e2 100644
3276     --- a/include/linux/bpf_verifier.h
3277     +++ b/include/linux/bpf_verifier.h
3278     @@ -67,7 +67,10 @@ struct bpf_verifier_state_list {
3279     };
3280    
3281     struct bpf_insn_aux_data {
3282     - enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
3283     + union {
3284     + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
3285     + struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */
3286     + };
3287     bool seen; /* this insn was processed by the verifier */
3288     };
3289    
3290     diff --git a/include/linux/cpu.h b/include/linux/cpu.h
3291     index e571128ad99a..2f475ad89a0d 100644
3292     --- a/include/linux/cpu.h
3293     +++ b/include/linux/cpu.h
3294     @@ -44,6 +44,13 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr);
3295     extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
3296     extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
3297    
3298     +extern ssize_t cpu_show_meltdown(struct device *dev,
3299     + struct device_attribute *attr, char *buf);
3300     +extern ssize_t cpu_show_spectre_v1(struct device *dev,
3301     + struct device_attribute *attr, char *buf);
3302     +extern ssize_t cpu_show_spectre_v2(struct device *dev,
3303     + struct device_attribute *attr, char *buf);
3304     +
3305     extern __printf(4, 5)
3306     struct device *cpu_device_create(struct device *parent, void *drvdata,
3307     const struct attribute_group **groups,
3308     diff --git a/include/linux/frame.h b/include/linux/frame.h
3309     index e6baaba3f1ae..d772c61c31da 100644
3310     --- a/include/linux/frame.h
3311     +++ b/include/linux/frame.h
3312     @@ -11,7 +11,7 @@
3313     * For more information, see tools/objtool/Documentation/stack-validation.txt.
3314     */
3315     #define STACK_FRAME_NON_STANDARD(func) \
3316     - static void __used __section(__func_stack_frame_non_standard) \
3317     + static void __used __section(.discard.func_stack_frame_non_standard) \
3318     *__func_stack_frame_non_standard_##func = func
3319    
3320     #else /* !CONFIG_STACK_VALIDATION */
3321     diff --git a/include/linux/phy.h b/include/linux/phy.h
3322     index a04d69ab7c34..867110c9d707 100644
3323     --- a/include/linux/phy.h
3324     +++ b/include/linux/phy.h
3325     @@ -683,6 +683,17 @@ static inline bool phy_is_internal(struct phy_device *phydev)
3326     return phydev->is_internal;
3327     }
3328    
3329     +/**
3330     + * phy_interface_mode_is_rgmii - Convenience function for testing if a
3331     + * PHY interface mode is RGMII (all variants)
3332     + * @mode: the phy_interface_t enum
3333     + */
3334     +static inline bool phy_interface_mode_is_rgmii(phy_interface_t mode)
3335     +{
3336     + return mode >= PHY_INTERFACE_MODE_RGMII &&
3337     + mode <= PHY_INTERFACE_MODE_RGMII_TXID;
3338     +};
3339     +
3340     /**
3341     * phy_interface_is_rgmii - Convenience function for testing if a PHY interface
3342     * is RGMII (all variants)
3343     diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h
3344     index f2e27e078362..01b3778ba6da 100644
3345     --- a/include/linux/sh_eth.h
3346     +++ b/include/linux/sh_eth.h
3347     @@ -16,7 +16,6 @@ struct sh_eth_plat_data {
3348     unsigned char mac_addr[ETH_ALEN];
3349     unsigned no_ether_link:1;
3350     unsigned ether_link_active_low:1;
3351     - unsigned needs_init:1;
3352     };
3353    
3354     #endif
3355     diff --git a/include/net/mac80211.h b/include/net/mac80211.h
3356     index 2c7d876e2a1a..8fd61bc50383 100644
3357     --- a/include/net/mac80211.h
3358     +++ b/include/net/mac80211.h
3359     @@ -1007,7 +1007,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
3360     * @RX_FLAG_DECRYPTED: This frame was decrypted in hardware.
3361     * @RX_FLAG_MMIC_STRIPPED: the Michael MIC is stripped off this frame,
3362     * verification has been done by the hardware.
3363     - * @RX_FLAG_IV_STRIPPED: The IV/ICV are stripped from this frame.
3364     + * @RX_FLAG_IV_STRIPPED: The IV and ICV are stripped from this frame.
3365     * If this flag is set, the stack cannot do any replay detection
3366     * hence the driver or hardware will have to do that.
3367     * @RX_FLAG_PN_VALIDATED: Currently only valid for CCMP/GCMP frames, this
3368     @@ -1078,6 +1078,8 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
3369     * @RX_FLAG_ALLOW_SAME_PN: Allow the same PN as same packet before.
3370     * This is used for AMSDU subframes which can have the same PN as
3371     * the first subframe.
3372     + * @RX_FLAG_ICV_STRIPPED: The ICV is stripped from this frame. CRC checking must
3373     + * be done in the hardware.
3374     */
3375     enum mac80211_rx_flags {
3376     RX_FLAG_MMIC_ERROR = BIT(0),
3377     @@ -1113,6 +1115,7 @@ enum mac80211_rx_flags {
3378     RX_FLAG_RADIOTAP_VENDOR_DATA = BIT(31),
3379     RX_FLAG_MIC_STRIPPED = BIT_ULL(32),
3380     RX_FLAG_ALLOW_SAME_PN = BIT_ULL(33),
3381     + RX_FLAG_ICV_STRIPPED = BIT_ULL(34),
3382     };
3383    
3384     #define RX_FLAG_STBC_SHIFT 26
3385     diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
3386     index eb3b23b6ec54..30f99ce4c6ce 100644
3387     --- a/include/target/target_core_base.h
3388     +++ b/include/target/target_core_base.h
3389     @@ -493,6 +493,7 @@ struct se_cmd {
3390     #define CMD_T_BUSY (1 << 9)
3391     #define CMD_T_TAS (1 << 10)
3392     #define CMD_T_FABRIC_STOP (1 << 11)
3393     +#define CMD_T_PRE_EXECUTE (1 << 12)
3394     spinlock_t t_state_lock;
3395     struct kref cmd_kref;
3396     struct completion t_transport_stop_comp;
3397     diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
3398     index 8ade3eb6c640..90fce4d6956a 100644
3399     --- a/include/trace/events/kvm.h
3400     +++ b/include/trace/events/kvm.h
3401     @@ -208,7 +208,7 @@ TRACE_EVENT(kvm_ack_irq,
3402     { KVM_TRACE_MMIO_WRITE, "write" }
3403    
3404     TRACE_EVENT(kvm_mmio,
3405     - TP_PROTO(int type, int len, u64 gpa, u64 val),
3406     + TP_PROTO(int type, int len, u64 gpa, void *val),
3407     TP_ARGS(type, len, gpa, val),
3408    
3409     TP_STRUCT__entry(
3410     @@ -222,7 +222,10 @@ TRACE_EVENT(kvm_mmio,
3411     __entry->type = type;
3412     __entry->len = len;
3413     __entry->gpa = gpa;
3414     - __entry->val = val;
3415     + __entry->val = 0;
3416     + if (val)
3417     + memcpy(&__entry->val, val,
3418     + min_t(u32, sizeof(__entry->val), len));
3419     ),
3420    
3421     TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
3422     diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
3423     index f3721e150d94..9a1e6ed7babc 100644
3424     --- a/kernel/bpf/arraymap.c
3425     +++ b/kernel/bpf/arraymap.c
3426     @@ -46,9 +46,10 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
3427     static struct bpf_map *array_map_alloc(union bpf_attr *attr)
3428     {
3429     bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
3430     + u32 elem_size, index_mask, max_entries;
3431     + bool unpriv = !capable(CAP_SYS_ADMIN);
3432     struct bpf_array *array;
3433     - u64 array_size;
3434     - u32 elem_size;
3435     + u64 array_size, mask64;
3436    
3437     /* check sanity of attributes */
3438     if (attr->max_entries == 0 || attr->key_size != 4 ||
3439     @@ -63,11 +64,32 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
3440    
3441     elem_size = round_up(attr->value_size, 8);
3442    
3443     + max_entries = attr->max_entries;
3444     +
3445     + /* On 32 bit archs roundup_pow_of_two() with max_entries that has
3446     + * upper most bit set in u32 space is undefined behavior due to
3447     + * resulting 1U << 32, so do it manually here in u64 space.
3448     + */
3449     + mask64 = fls_long(max_entries - 1);
3450     + mask64 = 1ULL << mask64;
3451     + mask64 -= 1;
3452     +
3453     + index_mask = mask64;
3454     + if (unpriv) {
3455     + /* round up array size to nearest power of 2,
3456     + * since cpu will speculate within index_mask limits
3457     + */
3458     + max_entries = index_mask + 1;
3459     + /* Check for overflows. */
3460     + if (max_entries < attr->max_entries)
3461     + return ERR_PTR(-E2BIG);
3462     + }
3463     +
3464     array_size = sizeof(*array);
3465     if (percpu)
3466     - array_size += (u64) attr->max_entries * sizeof(void *);
3467     + array_size += (u64) max_entries * sizeof(void *);
3468     else
3469     - array_size += (u64) attr->max_entries * elem_size;
3470     + array_size += (u64) max_entries * elem_size;
3471    
3472     /* make sure there is no u32 overflow later in round_up() */
3473     if (array_size >= U32_MAX - PAGE_SIZE)
3474     @@ -77,6 +99,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
3475     array = bpf_map_area_alloc(array_size);
3476     if (!array)
3477     return ERR_PTR(-ENOMEM);
3478     + array->index_mask = index_mask;
3479     + array->map.unpriv_array = unpriv;
3480    
3481     /* copy mandatory map attributes */
3482     array->map.map_type = attr->map_type;
3483     @@ -110,7 +134,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
3484     if (unlikely(index >= array->map.max_entries))
3485     return NULL;
3486    
3487     - return array->value + array->elem_size * index;
3488     + return array->value + array->elem_size * (index & array->index_mask);
3489     }
3490    
3491     /* Called from eBPF program */
3492     @@ -122,7 +146,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
3493     if (unlikely(index >= array->map.max_entries))
3494     return NULL;
3495    
3496     - return this_cpu_ptr(array->pptrs[index]);
3497     + return this_cpu_ptr(array->pptrs[index & array->index_mask]);
3498     }
3499    
3500     int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
3501     @@ -142,7 +166,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
3502     */
3503     size = round_up(map->value_size, 8);
3504     rcu_read_lock();
3505     - pptr = array->pptrs[index];
3506     + pptr = array->pptrs[index & array->index_mask];
3507     for_each_possible_cpu(cpu) {
3508     bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
3509     off += size;
3510     @@ -190,10 +214,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
3511     return -EEXIST;
3512    
3513     if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
3514     - memcpy(this_cpu_ptr(array->pptrs[index]),
3515     + memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
3516     value, map->value_size);
3517     else
3518     - memcpy(array->value + array->elem_size * index,
3519     + memcpy(array->value +
3520     + array->elem_size * (index & array->index_mask),
3521     value, map->value_size);
3522     return 0;
3523     }
3524     @@ -227,7 +252,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
3525     */
3526     size = round_up(map->value_size, 8);
3527     rcu_read_lock();
3528     - pptr = array->pptrs[index];
3529     + pptr = array->pptrs[index & array->index_mask];
3530     for_each_possible_cpu(cpu) {
3531     bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
3532     off += size;
3533     diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
3534     index 72ea91df71c9..91a2d3752007 100644
3535     --- a/kernel/bpf/syscall.c
3536     +++ b/kernel/bpf/syscall.c
3537     @@ -565,57 +565,6 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl)
3538     list_add(&tl->list_node, &bpf_prog_types);
3539     }
3540    
3541     -/* fixup insn->imm field of bpf_call instructions:
3542     - * if (insn->imm == BPF_FUNC_map_lookup_elem)
3543     - * insn->imm = bpf_map_lookup_elem - __bpf_call_base;
3544     - * else if (insn->imm == BPF_FUNC_map_update_elem)
3545     - * insn->imm = bpf_map_update_elem - __bpf_call_base;
3546     - * else ...
3547     - *
3548     - * this function is called after eBPF program passed verification
3549     - */
3550     -static void fixup_bpf_calls(struct bpf_prog *prog)
3551     -{
3552     - const struct bpf_func_proto *fn;
3553     - int i;
3554     -
3555     - for (i = 0; i < prog->len; i++) {
3556     - struct bpf_insn *insn = &prog->insnsi[i];
3557     -
3558     - if (insn->code == (BPF_JMP | BPF_CALL)) {
3559     - /* we reach here when program has bpf_call instructions
3560     - * and it passed bpf_check(), means that
3561     - * ops->get_func_proto must have been supplied, check it
3562     - */
3563     - BUG_ON(!prog->aux->ops->get_func_proto);
3564     -
3565     - if (insn->imm == BPF_FUNC_get_route_realm)
3566     - prog->dst_needed = 1;
3567     - if (insn->imm == BPF_FUNC_get_prandom_u32)
3568     - bpf_user_rnd_init_once();
3569     - if (insn->imm == BPF_FUNC_tail_call) {
3570     - /* mark bpf_tail_call as different opcode
3571     - * to avoid conditional branch in
3572     - * interpeter for every normal call
3573     - * and to prevent accidental JITing by
3574     - * JIT compiler that doesn't support
3575     - * bpf_tail_call yet
3576     - */
3577     - insn->imm = 0;
3578     - insn->code |= BPF_X;
3579     - continue;
3580     - }
3581     -
3582     - fn = prog->aux->ops->get_func_proto(insn->imm);
3583     - /* all functions that have prototype and verifier allowed
3584     - * programs to call them, must be real in-kernel functions
3585     - */
3586     - BUG_ON(!fn->func);
3587     - insn->imm = fn->func - __bpf_call_base;
3588     - }
3589     - }
3590     -}
3591     -
3592     /* drop refcnt on maps used by eBPF program and free auxilary data */
3593     static void free_used_maps(struct bpf_prog_aux *aux)
3594     {
3595     @@ -808,9 +757,6 @@ static int bpf_prog_load(union bpf_attr *attr)
3596     if (err < 0)
3597     goto free_used_maps;
3598    
3599     - /* fixup BPF_CALL->imm field */
3600     - fixup_bpf_calls(prog);
3601     -
3602     /* eBPF program is ready to be JITed */
3603     prog = bpf_prog_select_runtime(prog, &err);
3604     if (err < 0)
3605     diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
3606     index d7eeebfafe8d..19c44cf59bb2 100644
3607     --- a/kernel/bpf/verifier.c
3608     +++ b/kernel/bpf/verifier.c
3609     @@ -1187,7 +1187,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
3610     }
3611     }
3612    
3613     -static int check_call(struct bpf_verifier_env *env, int func_id)
3614     +static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
3615     {
3616     struct bpf_verifier_state *state = &env->cur_state;
3617     const struct bpf_func_proto *fn = NULL;
3618     @@ -1238,6 +1238,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id)
3619     err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
3620     if (err)
3621     return err;
3622     + if (func_id == BPF_FUNC_tail_call) {
3623     + if (meta.map_ptr == NULL) {
3624     + verbose("verifier bug\n");
3625     + return -EINVAL;
3626     + }
3627     + env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
3628     + }
3629     err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
3630     if (err)
3631     return err;
3632     @@ -3019,7 +3026,7 @@ static int do_check(struct bpf_verifier_env *env)
3633     return -EINVAL;
3634     }
3635    
3636     - err = check_call(env, insn->imm);
3637     + err = check_call(env, insn->imm, insn_idx);
3638     if (err)
3639     return err;
3640    
3641     @@ -3362,6 +3369,81 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
3642     return 0;
3643     }
3644    
3645     +/* fixup insn->imm field of bpf_call instructions
3646     + *
3647     + * this function is called after eBPF program passed verification
3648     + */
3649     +static int fixup_bpf_calls(struct bpf_verifier_env *env)
3650     +{
3651     + struct bpf_prog *prog = env->prog;
3652     + struct bpf_insn *insn = prog->insnsi;
3653     + const struct bpf_func_proto *fn;
3654     + const int insn_cnt = prog->len;
3655     + struct bpf_insn insn_buf[16];
3656     + struct bpf_prog *new_prog;
3657     + struct bpf_map *map_ptr;
3658     + int i, cnt, delta = 0;
3659     +
3660     +
3661     + for (i = 0; i < insn_cnt; i++, insn++) {
3662     + if (insn->code != (BPF_JMP | BPF_CALL))
3663     + continue;
3664     +
3665     + if (insn->imm == BPF_FUNC_get_route_realm)
3666     + prog->dst_needed = 1;
3667     + if (insn->imm == BPF_FUNC_get_prandom_u32)
3668     + bpf_user_rnd_init_once();
3669     + if (insn->imm == BPF_FUNC_tail_call) {
3670     + /* mark bpf_tail_call as different opcode to avoid
3671     + * conditional branch in the interpeter for every normal
3672     + * call and to prevent accidental JITing by JIT compiler
3673     + * that doesn't support bpf_tail_call yet
3674     + */
3675     + insn->imm = 0;
3676     + insn->code |= BPF_X;
3677     +
3678     + /* instead of changing every JIT dealing with tail_call
3679     + * emit two extra insns:
3680     + * if (index >= max_entries) goto out;
3681     + * index &= array->index_mask;
3682     + * to avoid out-of-bounds cpu speculation
3683     + */
3684     + map_ptr = env->insn_aux_data[i + delta].map_ptr;
3685     + if (!map_ptr->unpriv_array)
3686     + continue;
3687     + insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
3688     + map_ptr->max_entries, 2);
3689     + insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
3690     + container_of(map_ptr,
3691     + struct bpf_array,
3692     + map)->index_mask);
3693     + insn_buf[2] = *insn;
3694     + cnt = 3;
3695     + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
3696     + if (!new_prog)
3697     + return -ENOMEM;
3698     +
3699     + delta += cnt - 1;
3700     + env->prog = prog = new_prog;
3701     + insn = new_prog->insnsi + i + delta;
3702     + continue;
3703     + }
3704     +
3705     + fn = prog->aux->ops->get_func_proto(insn->imm);
3706     + /* all functions that have prototype and verifier allowed
3707     + * programs to call them, must be real in-kernel functions
3708     + */
3709     + if (!fn->func) {
3710     + verbose("kernel subsystem misconfigured func %d\n",
3711     + insn->imm);
3712     + return -EFAULT;
3713     + }
3714     + insn->imm = fn->func - __bpf_call_base;
3715     + }
3716     +
3717     + return 0;
3718     +}
3719     +
3720     static void free_states(struct bpf_verifier_env *env)
3721     {
3722     struct bpf_verifier_state_list *sl, *sln;
3723     @@ -3463,6 +3545,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
3724     /* program is valid, convert *(u32*)(ctx + off) accesses */
3725     ret = convert_ctx_accesses(env);
3726    
3727     + if (ret == 0)
3728     + ret = fixup_bpf_calls(env);
3729     +
3730     if (log_level && log_len >= log_size - 1) {
3731     BUG_ON(log_len >= log_size);
3732     /* verifier log exceeded user supplied buffer */
3733     diff --git a/mm/zswap.c b/mm/zswap.c
3734     index dbef27822a98..ded051e3433d 100644
3735     --- a/mm/zswap.c
3736     +++ b/mm/zswap.c
3737     @@ -752,18 +752,22 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp,
3738     pool = zswap_pool_find_get(type, compressor);
3739     if (pool) {
3740     zswap_pool_debug("using existing", pool);
3741     + WARN_ON(pool == zswap_pool_current());
3742     list_del_rcu(&pool->list);
3743     - } else {
3744     - spin_unlock(&zswap_pools_lock);
3745     - pool = zswap_pool_create(type, compressor);
3746     - spin_lock(&zswap_pools_lock);
3747     }
3748    
3749     + spin_unlock(&zswap_pools_lock);
3750     +
3751     + if (!pool)
3752     + pool = zswap_pool_create(type, compressor);
3753     +
3754     if (pool)
3755     ret = param_set_charp(s, kp);
3756     else
3757     ret = -EINVAL;
3758    
3759     + spin_lock(&zswap_pools_lock);
3760     +
3761     if (!ret) {
3762     put_pool = zswap_pool_current();
3763     list_add_rcu(&pool->list, &zswap_pools);
3764     diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
3765     index 4a47074d1d7f..c8ea3cf9db85 100644
3766     --- a/net/8021q/vlan.c
3767     +++ b/net/8021q/vlan.c
3768     @@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
3769     vlan_gvrp_uninit_applicant(real_dev);
3770     }
3771    
3772     - /* Take it out of our own structures, but be sure to interlock with
3773     - * HW accelerating devices or SW vlan input packet processing if
3774     - * VLAN is not 0 (leave it there for 802.1p).
3775     - */
3776     - if (vlan_id)
3777     - vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
3778     + vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
3779    
3780     /* Get rid of the vlan's reference to real_dev */
3781     dev_put(real_dev);
3782     diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
3783     index ffd09c1675d4..2bbca23a9d05 100644
3784     --- a/net/bluetooth/l2cap_core.c
3785     +++ b/net/bluetooth/l2cap_core.c
3786     @@ -3353,9 +3353,10 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data
3787     break;
3788    
3789     case L2CAP_CONF_EFS:
3790     - remote_efs = 1;
3791     - if (olen == sizeof(efs))
3792     + if (olen == sizeof(efs)) {
3793     + remote_efs = 1;
3794     memcpy(&efs, (void *) val, olen);
3795     + }
3796     break;
3797    
3798     case L2CAP_CONF_EWS:
3799     @@ -3574,16 +3575,17 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
3800     break;
3801    
3802     case L2CAP_CONF_EFS:
3803     - if (olen == sizeof(efs))
3804     + if (olen == sizeof(efs)) {
3805     memcpy(&efs, (void *)val, olen);
3806    
3807     - if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
3808     - efs.stype != L2CAP_SERV_NOTRAFIC &&
3809     - efs.stype != chan->local_stype)
3810     - return -ECONNREFUSED;
3811     + if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
3812     + efs.stype != L2CAP_SERV_NOTRAFIC &&
3813     + efs.stype != chan->local_stype)
3814     + return -ECONNREFUSED;
3815    
3816     - l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
3817     - (unsigned long) &efs, endptr - ptr);
3818     + l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
3819     + (unsigned long) &efs, endptr - ptr);
3820     + }
3821     break;
3822    
3823     case L2CAP_CONF_FCS:
3824     diff --git a/net/core/ethtool.c b/net/core/ethtool.c
3825     index e9989b835a66..7913771ec474 100644
3826     --- a/net/core/ethtool.c
3827     +++ b/net/core/ethtool.c
3828     @@ -742,15 +742,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev,
3829     return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
3830     }
3831    
3832     -static void
3833     -warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
3834     -{
3835     - char name[sizeof(current->comm)];
3836     -
3837     - pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n",
3838     - get_task_comm(name, current), details);
3839     -}
3840     -
3841     /* Query device for its ethtool_cmd settings.
3842     *
3843     * Backward compatibility note: for compatibility with legacy ethtool,
3844     @@ -777,10 +768,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
3845     &link_ksettings);
3846     if (err < 0)
3847     return err;
3848     - if (!convert_link_ksettings_to_legacy_settings(&cmd,
3849     - &link_ksettings))
3850     - warn_incomplete_ethtool_legacy_settings_conversion(
3851     - "link modes are only partially reported");
3852     + convert_link_ksettings_to_legacy_settings(&cmd,
3853     + &link_ksettings);
3854    
3855     /* send a sensible cmd tag back to user */
3856     cmd.cmd = ETHTOOL_GSET;
3857     diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
3858     index 6b10573cc9fa..d1d9faf3046b 100644
3859     --- a/net/core/sock_diag.c
3860     +++ b/net/core/sock_diag.c
3861     @@ -295,7 +295,7 @@ static int sock_diag_bind(struct net *net, int group)
3862     case SKNLGRP_INET6_UDP_DESTROY:
3863     if (!sock_diag_handlers[AF_INET6])
3864     request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
3865     - NETLINK_SOCK_DIAG, AF_INET);
3866     + NETLINK_SOCK_DIAG, AF_INET6);
3867     break;
3868     }
3869     return 0;
3870     diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
3871     index 506efba33a89..388584b8ff31 100644
3872     --- a/net/ipv6/ip6_output.c
3873     +++ b/net/ipv6/ip6_output.c
3874     @@ -1800,9 +1800,10 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
3875     cork.base.opt = NULL;
3876     v6_cork.opt = NULL;
3877     err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
3878     - if (err)
3879     + if (err) {
3880     + ip6_cork_release(&cork, &v6_cork);
3881     return ERR_PTR(err);
3882     -
3883     + }
3884     if (ipc6->dontfrag < 0)
3885     ipc6->dontfrag = inet6_sk(sk)->dontfrag;
3886    
3887     diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
3888     index 11d22d642488..131e6aa954bc 100644
3889     --- a/net/ipv6/ip6_tunnel.c
3890     +++ b/net/ipv6/ip6_tunnel.c
3891     @@ -1080,10 +1080,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
3892     memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
3893     neigh_release(neigh);
3894     }
3895     - } else if (!(t->parms.flags &
3896     - (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
3897     - /* enable the cache only only if the routing decision does
3898     - * not depend on the current inner header value
3899     + } else if (t->parms.proto != 0 && !(t->parms.flags &
3900     + (IP6_TNL_F_USE_ORIG_TCLASS |
3901     + IP6_TNL_F_USE_ORIG_FWMARK))) {
3902     + /* enable the cache only if neither the outer protocol nor the
3903     + * routing decision depends on the current inner header value
3904     */
3905     use_cache = true;
3906     }
3907     diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
3908     index efa3f48f1ec5..73e8f347802e 100644
3909     --- a/net/mac80211/wep.c
3910     +++ b/net/mac80211/wep.c
3911     @@ -293,7 +293,8 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx)
3912     return RX_DROP_UNUSABLE;
3913     ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key);
3914     /* remove ICV */
3915     - if (pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN))
3916     + if (!(status->flag & RX_FLAG_ICV_STRIPPED) &&
3917     + pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN))
3918     return RX_DROP_UNUSABLE;
3919     }
3920    
3921     diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
3922     index 5c71d60f3a64..caa5986cb2e4 100644
3923     --- a/net/mac80211/wpa.c
3924     +++ b/net/mac80211/wpa.c
3925     @@ -295,7 +295,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
3926     return RX_DROP_UNUSABLE;
3927    
3928     /* Trim ICV */
3929     - skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN);
3930     + if (!(status->flag & RX_FLAG_ICV_STRIPPED))
3931     + skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN);
3932    
3933     /* Remove IV */
3934     memmove(skb->data + IEEE80211_TKIP_IV_LEN, skb->data, hdrlen);
3935     diff --git a/net/rds/rdma.c b/net/rds/rdma.c
3936     index de8496e60735..f6027f41cd34 100644
3937     --- a/net/rds/rdma.c
3938     +++ b/net/rds/rdma.c
3939     @@ -524,6 +524,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args)
3940    
3941     local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
3942    
3943     + if (args->nr_local == 0)
3944     + return -EINVAL;
3945     +
3946     /* figure out the number of pages in the vector */
3947     for (i = 0; i < args->nr_local; i++) {
3948     if (copy_from_user(&vec, &local_vec[i],
3949     @@ -873,6 +876,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
3950     err:
3951     if (page)
3952     put_page(page);
3953     + rm->atomic.op_active = 0;
3954     kfree(rm->atomic.op_notifier);
3955    
3956     return ret;
3957     diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
3958     index e0aa30f83c6c..9617b42aaf20 100644
3959     --- a/net/sched/act_gact.c
3960     +++ b/net/sched/act_gact.c
3961     @@ -161,7 +161,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
3962     if (action == TC_ACT_SHOT)
3963     this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
3964    
3965     - tm->lastuse = lastuse;
3966     + tm->lastuse = max_t(u64, tm->lastuse, lastuse);
3967     }
3968    
3969     static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
3970     diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
3971     index 6b07fba5770b..fc3650b06192 100644
3972     --- a/net/sched/act_mirred.c
3973     +++ b/net/sched/act_mirred.c
3974     @@ -211,7 +211,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
3975     struct tcf_t *tm = &m->tcf_tm;
3976    
3977     _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
3978     - tm->lastuse = lastuse;
3979     + tm->lastuse = max_t(u64, tm->lastuse, lastuse);
3980     }
3981    
3982     static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
3983     diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
3984     index bd8349759095..845eb9b800f3 100644
3985     --- a/scripts/mod/modpost.c
3986     +++ b/scripts/mod/modpost.c
3987     @@ -838,6 +838,7 @@ static const char *const section_white_list[] =
3988     ".cmem*", /* EZchip */
3989     ".fmt_slot*", /* EZchip */
3990     ".gnu.lto*",
3991     + ".discard.*",
3992     NULL
3993     };
3994    
3995     diff --git a/scripts/module-common.lds b/scripts/module-common.lds
3996     index 53234e85192a..9b6e246a45d0 100644
3997     --- a/scripts/module-common.lds
3998     +++ b/scripts/module-common.lds
3999     @@ -4,7 +4,10 @@
4000     * combine them automatically.
4001     */
4002     SECTIONS {
4003     - /DISCARD/ : { *(.discard) }
4004     + /DISCARD/ : {
4005     + *(.discard)
4006     + *(.discard.*)
4007     + }
4008    
4009     __ksymtab 0 : { *(SORT(___ksymtab+*)) }
4010     __ksymtab_gpl 0 : { *(SORT(___ksymtab_gpl+*)) }
4011     diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
4012     index ebc9fdfe64df..3321348fd86b 100644
4013     --- a/sound/core/oss/pcm_oss.c
4014     +++ b/sound/core/oss/pcm_oss.c
4015     @@ -466,7 +466,6 @@ static int snd_pcm_hw_param_near(struct snd_pcm_substream *pcm,
4016     v = snd_pcm_hw_param_last(pcm, params, var, dir);
4017     else
4018     v = snd_pcm_hw_param_first(pcm, params, var, dir);
4019     - snd_BUG_ON(v < 0);
4020     return v;
4021     }
4022    
4023     @@ -1370,8 +1369,11 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha
4024    
4025     if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
4026     return tmp;
4027     - mutex_lock(&runtime->oss.params_lock);
4028     while (bytes > 0) {
4029     + if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
4030     + tmp = -ERESTARTSYS;
4031     + break;
4032     + }
4033     if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
4034     tmp = bytes;
4035     if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes)
4036     @@ -1415,14 +1417,18 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha
4037     xfer += tmp;
4038     if ((substream->f_flags & O_NONBLOCK) != 0 &&
4039     tmp != runtime->oss.period_bytes)
4040     - break;
4041     + tmp = -EAGAIN;
4042     }
4043     - }
4044     - mutex_unlock(&runtime->oss.params_lock);
4045     - return xfer;
4046     -
4047     err:
4048     - mutex_unlock(&runtime->oss.params_lock);
4049     + mutex_unlock(&runtime->oss.params_lock);
4050     + if (tmp < 0)
4051     + break;
4052     + if (signal_pending(current)) {
4053     + tmp = -ERESTARTSYS;
4054     + break;
4055     + }
4056     + tmp = 0;
4057     + }
4058     return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
4059     }
4060    
4061     @@ -1470,8 +1476,11 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use
4062    
4063     if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
4064     return tmp;
4065     - mutex_lock(&runtime->oss.params_lock);
4066     while (bytes > 0) {
4067     + if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
4068     + tmp = -ERESTARTSYS;
4069     + break;
4070     + }
4071     if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
4072     if (runtime->oss.buffer_used == 0) {
4073     tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1);
4074     @@ -1502,12 +1511,16 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use
4075     bytes -= tmp;
4076     xfer += tmp;
4077     }
4078     - }
4079     - mutex_unlock(&runtime->oss.params_lock);
4080     - return xfer;
4081     -
4082     err:
4083     - mutex_unlock(&runtime->oss.params_lock);
4084     + mutex_unlock(&runtime->oss.params_lock);
4085     + if (tmp < 0)
4086     + break;
4087     + if (signal_pending(current)) {
4088     + tmp = -ERESTARTSYS;
4089     + break;
4090     + }
4091     + tmp = 0;
4092     + }
4093     return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
4094     }
4095    
4096     diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c
4097     index 727ac44d39f4..a84a1d3d23e5 100644
4098     --- a/sound/core/oss/pcm_plugin.c
4099     +++ b/sound/core/oss/pcm_plugin.c
4100     @@ -591,18 +591,26 @@ snd_pcm_sframes_t snd_pcm_plug_write_transfer(struct snd_pcm_substream *plug, st
4101     snd_pcm_sframes_t frames = size;
4102    
4103     plugin = snd_pcm_plug_first(plug);
4104     - while (plugin && frames > 0) {
4105     + while (plugin) {
4106     + if (frames <= 0)
4107     + return frames;
4108     if ((next = plugin->next) != NULL) {
4109     snd_pcm_sframes_t frames1 = frames;
4110     - if (plugin->dst_frames)
4111     + if (plugin->dst_frames) {
4112     frames1 = plugin->dst_frames(plugin, frames);
4113     + if (frames1 <= 0)
4114     + return frames1;
4115     + }
4116     if ((err = next->client_channels(next, frames1, &dst_channels)) < 0) {
4117     return err;
4118     }
4119     if (err != frames1) {
4120     frames = err;
4121     - if (plugin->src_frames)
4122     + if (plugin->src_frames) {
4123     frames = plugin->src_frames(plugin, frames1);
4124     + if (frames <= 0)
4125     + return frames;
4126     + }
4127     }
4128     } else
4129     dst_channels = NULL;
4130     diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
4131     index c80d80e312e3..e685e779a4b8 100644
4132     --- a/sound/core/pcm_lib.c
4133     +++ b/sound/core/pcm_lib.c
4134     @@ -1664,7 +1664,7 @@ int snd_pcm_hw_param_first(struct snd_pcm_substream *pcm,
4135     return changed;
4136     if (params->rmask) {
4137     int err = snd_pcm_hw_refine(pcm, params);
4138     - if (snd_BUG_ON(err < 0))
4139     + if (err < 0)
4140     return err;
4141     }
4142     return snd_pcm_hw_param_value(params, var, dir);
4143     @@ -1711,7 +1711,7 @@ int snd_pcm_hw_param_last(struct snd_pcm_substream *pcm,
4144     return changed;
4145     if (params->rmask) {
4146     int err = snd_pcm_hw_refine(pcm, params);
4147     - if (snd_BUG_ON(err < 0))
4148     + if (err < 0)
4149     return err;
4150     }
4151     return snd_pcm_hw_param_value(params, var, dir);
4152     diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c
4153     index 54f348a4fb78..cbd20cb8ca11 100644
4154     --- a/sound/drivers/aloop.c
4155     +++ b/sound/drivers/aloop.c
4156     @@ -39,6 +39,7 @@
4157     #include <sound/core.h>
4158     #include <sound/control.h>
4159     #include <sound/pcm.h>
4160     +#include <sound/pcm_params.h>
4161     #include <sound/info.h>
4162     #include <sound/initval.h>
4163    
4164     @@ -305,19 +306,6 @@ static int loopback_trigger(struct snd_pcm_substream *substream, int cmd)
4165     return 0;
4166     }
4167    
4168     -static void params_change_substream(struct loopback_pcm *dpcm,
4169     - struct snd_pcm_runtime *runtime)
4170     -{
4171     - struct snd_pcm_runtime *dst_runtime;
4172     -
4173     - if (dpcm == NULL || dpcm->substream == NULL)
4174     - return;
4175     - dst_runtime = dpcm->substream->runtime;
4176     - if (dst_runtime == NULL)
4177     - return;
4178     - dst_runtime->hw = dpcm->cable->hw;
4179     -}
4180     -
4181     static void params_change(struct snd_pcm_substream *substream)
4182     {
4183     struct snd_pcm_runtime *runtime = substream->runtime;
4184     @@ -329,10 +317,6 @@ static void params_change(struct snd_pcm_substream *substream)
4185     cable->hw.rate_max = runtime->rate;
4186     cable->hw.channels_min = runtime->channels;
4187     cable->hw.channels_max = runtime->channels;
4188     - params_change_substream(cable->streams[SNDRV_PCM_STREAM_PLAYBACK],
4189     - runtime);
4190     - params_change_substream(cable->streams[SNDRV_PCM_STREAM_CAPTURE],
4191     - runtime);
4192     }
4193    
4194     static int loopback_prepare(struct snd_pcm_substream *substream)
4195     @@ -620,26 +604,29 @@ static unsigned int get_cable_index(struct snd_pcm_substream *substream)
4196     static int rule_format(struct snd_pcm_hw_params *params,
4197     struct snd_pcm_hw_rule *rule)
4198     {
4199     + struct loopback_pcm *dpcm = rule->private;
4200     + struct loopback_cable *cable = dpcm->cable;
4201     + struct snd_mask m;
4202    
4203     - struct snd_pcm_hardware *hw = rule->private;
4204     - struct snd_mask *maskp = hw_param_mask(params, rule->var);
4205     -
4206     - maskp->bits[0] &= (u_int32_t)hw->formats;
4207     - maskp->bits[1] &= (u_int32_t)(hw->formats >> 32);
4208     - memset(maskp->bits + 2, 0, (SNDRV_MASK_MAX-64) / 8); /* clear rest */
4209     - if (! maskp->bits[0] && ! maskp->bits[1])
4210     - return -EINVAL;
4211     - return 0;
4212     + snd_mask_none(&m);
4213     + mutex_lock(&dpcm->loopback->cable_lock);
4214     + m.bits[0] = (u_int32_t)cable->hw.formats;
4215     + m.bits[1] = (u_int32_t)(cable->hw.formats >> 32);
4216     + mutex_unlock(&dpcm->loopback->cable_lock);
4217     + return snd_mask_refine(hw_param_mask(params, rule->var), &m);
4218     }
4219    
4220     static int rule_rate(struct snd_pcm_hw_params *params,
4221     struct snd_pcm_hw_rule *rule)
4222     {
4223     - struct snd_pcm_hardware *hw = rule->private;
4224     + struct loopback_pcm *dpcm = rule->private;
4225     + struct loopback_cable *cable = dpcm->cable;
4226     struct snd_interval t;
4227    
4228     - t.min = hw->rate_min;
4229     - t.max = hw->rate_max;
4230     + mutex_lock(&dpcm->loopback->cable_lock);
4231     + t.min = cable->hw.rate_min;
4232     + t.max = cable->hw.rate_max;
4233     + mutex_unlock(&dpcm->loopback->cable_lock);
4234     t.openmin = t.openmax = 0;
4235     t.integer = 0;
4236     return snd_interval_refine(hw_param_interval(params, rule->var), &t);
4237     @@ -648,22 +635,44 @@ static int rule_rate(struct snd_pcm_hw_params *params,
4238     static int rule_channels(struct snd_pcm_hw_params *params,
4239     struct snd_pcm_hw_rule *rule)
4240     {
4241     - struct snd_pcm_hardware *hw = rule->private;
4242     + struct loopback_pcm *dpcm = rule->private;
4243     + struct loopback_cable *cable = dpcm->cable;
4244     struct snd_interval t;
4245    
4246     - t.min = hw->channels_min;
4247     - t.max = hw->channels_max;
4248     + mutex_lock(&dpcm->loopback->cable_lock);
4249     + t.min = cable->hw.channels_min;
4250     + t.max = cable->hw.channels_max;
4251     + mutex_unlock(&dpcm->loopback->cable_lock);
4252     t.openmin = t.openmax = 0;
4253     t.integer = 0;
4254     return snd_interval_refine(hw_param_interval(params, rule->var), &t);
4255     }
4256    
4257     +static void free_cable(struct snd_pcm_substream *substream)
4258     +{
4259     + struct loopback *loopback = substream->private_data;
4260     + int dev = get_cable_index(substream);
4261     + struct loopback_cable *cable;
4262     +
4263     + cable = loopback->cables[substream->number][dev];
4264     + if (!cable)
4265     + return;
4266     + if (cable->streams[!substream->stream]) {
4267     + /* other stream is still alive */
4268     + cable->streams[substream->stream] = NULL;
4269     + } else {
4270     + /* free the cable */
4271     + loopback->cables[substream->number][dev] = NULL;
4272     + kfree(cable);
4273     + }
4274     +}
4275     +
4276     static int loopback_open(struct snd_pcm_substream *substream)
4277     {
4278     struct snd_pcm_runtime *runtime = substream->runtime;
4279     struct loopback *loopback = substream->private_data;
4280     struct loopback_pcm *dpcm;
4281     - struct loopback_cable *cable;
4282     + struct loopback_cable *cable = NULL;
4283     int err = 0;
4284     int dev = get_cable_index(substream);
4285    
4286     @@ -682,7 +691,6 @@ static int loopback_open(struct snd_pcm_substream *substream)
4287     if (!cable) {
4288     cable = kzalloc(sizeof(*cable), GFP_KERNEL);
4289     if (!cable) {
4290     - kfree(dpcm);
4291     err = -ENOMEM;
4292     goto unlock;
4293     }
4294     @@ -700,19 +708,19 @@ static int loopback_open(struct snd_pcm_substream *substream)
4295     /* are cached -> they do not reflect the actual state */
4296     err = snd_pcm_hw_rule_add(runtime, 0,
4297     SNDRV_PCM_HW_PARAM_FORMAT,
4298     - rule_format, &runtime->hw,
4299     + rule_format, dpcm,
4300     SNDRV_PCM_HW_PARAM_FORMAT, -1);
4301     if (err < 0)
4302     goto unlock;
4303     err = snd_pcm_hw_rule_add(runtime, 0,
4304     SNDRV_PCM_HW_PARAM_RATE,
4305     - rule_rate, &runtime->hw,
4306     + rule_rate, dpcm,
4307     SNDRV_PCM_HW_PARAM_RATE, -1);
4308     if (err < 0)
4309     goto unlock;
4310     err = snd_pcm_hw_rule_add(runtime, 0,
4311     SNDRV_PCM_HW_PARAM_CHANNELS,
4312     - rule_channels, &runtime->hw,
4313     + rule_channels, dpcm,
4314     SNDRV_PCM_HW_PARAM_CHANNELS, -1);
4315     if (err < 0)
4316     goto unlock;
4317     @@ -724,6 +732,10 @@ static int loopback_open(struct snd_pcm_substream *substream)
4318     else
4319     runtime->hw = cable->hw;
4320     unlock:
4321     + if (err < 0) {
4322     + free_cable(substream);
4323     + kfree(dpcm);
4324     + }
4325     mutex_unlock(&loopback->cable_lock);
4326     return err;
4327     }
4328     @@ -732,20 +744,10 @@ static int loopback_close(struct snd_pcm_substream *substream)
4329     {
4330     struct loopback *loopback = substream->private_data;
4331     struct loopback_pcm *dpcm = substream->runtime->private_data;
4332     - struct loopback_cable *cable;
4333     - int dev = get_cable_index(substream);
4334    
4335     loopback_timer_stop(dpcm);
4336     mutex_lock(&loopback->cable_lock);
4337     - cable = loopback->cables[substream->number][dev];
4338     - if (cable->streams[!substream->stream]) {
4339     - /* other stream is still alive */
4340     - cable->streams[substream->stream] = NULL;
4341     - } else {
4342     - /* free the cable */
4343     - loopback->cables[substream->number][dev] = NULL;
4344     - kfree(cable);
4345     - }
4346     + free_cable(substream);
4347     mutex_unlock(&loopback->cable_lock);
4348     return 0;
4349     }
4350     diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
4351     index b8dadb050d2b..a688a857a7ae 100644
4352     --- a/tools/objtool/builtin-check.c
4353     +++ b/tools/objtool/builtin-check.c
4354     @@ -51,7 +51,7 @@ struct instruction {
4355     unsigned int len, state;
4356     unsigned char type;
4357     unsigned long immediate;
4358     - bool alt_group, visited;
4359     + bool alt_group, visited, ignore_alts;
4360     struct symbol *call_dest;
4361     struct instruction *jump_dest;
4362     struct list_head alts;
4363     @@ -352,6 +352,40 @@ static void add_ignores(struct objtool_file *file)
4364     }
4365     }
4366    
4367     +/*
4368     + * FIXME: For now, just ignore any alternatives which add retpolines. This is
4369     + * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline.
4370     + * But it at least allows objtool to understand the control flow *around* the
4371     + * retpoline.
4372     + */
4373     +static int add_nospec_ignores(struct objtool_file *file)
4374     +{
4375     + struct section *sec;
4376     + struct rela *rela;
4377     + struct instruction *insn;
4378     +
4379     + sec = find_section_by_name(file->elf, ".rela.discard.nospec");
4380     + if (!sec)
4381     + return 0;
4382     +
4383     + list_for_each_entry(rela, &sec->rela_list, list) {
4384     + if (rela->sym->type != STT_SECTION) {
4385     + WARN("unexpected relocation symbol type in %s", sec->name);
4386     + return -1;
4387     + }
4388     +
4389     + insn = find_insn(file, rela->sym->sec, rela->addend);
4390     + if (!insn) {
4391     + WARN("bad .discard.nospec entry");
4392     + return -1;
4393     + }
4394     +
4395     + insn->ignore_alts = true;
4396     + }
4397     +
4398     + return 0;
4399     +}
4400     +
4401     /*
4402     * Find the destination instructions for all jumps.
4403     */
4404     @@ -382,6 +416,13 @@ static int add_jump_destinations(struct objtool_file *file)
4405     } else if (rela->sym->sec->idx) {
4406     dest_sec = rela->sym->sec;
4407     dest_off = rela->sym->sym.st_value + rela->addend + 4;
4408     + } else if (strstr(rela->sym->name, "_indirect_thunk_")) {
4409     + /*
4410     + * Retpoline jumps are really dynamic jumps in
4411     + * disguise, so convert them accordingly.
4412     + */
4413     + insn->type = INSN_JUMP_DYNAMIC;
4414     + continue;
4415     } else {
4416     /* sibling call */
4417     insn->jump_dest = 0;
4418     @@ -428,11 +469,18 @@ static int add_call_destinations(struct objtool_file *file)
4419     dest_off = insn->offset + insn->len + insn->immediate;
4420     insn->call_dest = find_symbol_by_offset(insn->sec,
4421     dest_off);
4422     + /*
4423     + * FIXME: Thanks to retpolines, it's now considered
4424     + * normal for a function to call within itself. So
4425     + * disable this warning for now.
4426     + */
4427     +#if 0
4428     if (!insn->call_dest) {
4429     WARN_FUNC("can't find call dest symbol at offset 0x%lx",
4430     insn->sec, insn->offset, dest_off);
4431     return -1;
4432     }
4433     +#endif
4434     } else if (rela->sym->type == STT_SECTION) {
4435     insn->call_dest = find_symbol_by_offset(rela->sym->sec,
4436     rela->addend+4);
4437     @@ -594,12 +642,6 @@ static int add_special_section_alts(struct objtool_file *file)
4438     return ret;
4439    
4440     list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
4441     - alt = malloc(sizeof(*alt));
4442     - if (!alt) {
4443     - WARN("malloc failed");
4444     - ret = -1;
4445     - goto out;
4446     - }
4447    
4448     orig_insn = find_insn(file, special_alt->orig_sec,
4449     special_alt->orig_off);
4450     @@ -610,6 +652,10 @@ static int add_special_section_alts(struct objtool_file *file)
4451     goto out;
4452     }
4453    
4454     + /* Ignore retpoline alternatives. */
4455     + if (orig_insn->ignore_alts)
4456     + continue;
4457     +
4458     new_insn = NULL;
4459     if (!special_alt->group || special_alt->new_len) {
4460     new_insn = find_insn(file, special_alt->new_sec,
4461     @@ -635,6 +681,13 @@ static int add_special_section_alts(struct objtool_file *file)
4462     goto out;
4463     }
4464    
4465     + alt = malloc(sizeof(*alt));
4466     + if (!alt) {
4467     + WARN("malloc failed");
4468     + ret = -1;
4469     + goto out;
4470     + }
4471     +
4472     alt->insn = new_insn;
4473     list_add_tail(&alt->list, &orig_insn->alts);
4474    
4475     @@ -854,6 +907,10 @@ static int decode_sections(struct objtool_file *file)
4476    
4477     add_ignores(file);
4478    
4479     + ret = add_nospec_ignores(file);
4480     + if (ret)
4481     + return ret;
4482     +
4483     ret = add_jump_destinations(file);
4484     if (ret)
4485     return ret;
4486     @@ -1173,6 +1230,14 @@ static int validate_uncallable_instructions(struct objtool_file *file)
4487    
4488     for_each_insn(file, insn) {
4489     if (!insn->visited && insn->type == INSN_RETURN) {
4490     +
4491     + /*
4492     + * Don't warn about call instructions in unvisited
4493     + * retpoline alternatives.
4494     + */
4495     + if (!strcmp(insn->sec->name, ".altinstr_replacement"))
4496     + continue;
4497     +
4498     WARN_FUNC("return instruction outside of a callable function",
4499     insn->sec, insn->offset);
4500     warnings++;
4501     @@ -1229,7 +1294,7 @@ int cmd_check(int argc, const char **argv)
4502    
4503     INIT_LIST_HEAD(&file.insn_list);
4504     hash_init(file.insn_hash);
4505     - file.whitelist = find_section_by_name(file.elf, "__func_stack_frame_non_standard");
4506     + file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
4507     file.rodata = find_section_by_name(file.elf, ".rodata");
4508     file.ignore_unreachables = false;
4509     file.c_file = find_section_by_name(file.elf, ".comment");
4510     diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
4511     index 6300c1a41ff6..4af37bfe4aea 100644
4512     --- a/tools/testing/selftests/x86/Makefile
4513     +++ b/tools/testing/selftests/x86/Makefile
4514     @@ -6,7 +6,7 @@ include ../lib.mk
4515    
4516     TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
4517     check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test \
4518     - protection_keys
4519     + protection_keys test_vsyscall
4520     TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
4521     test_FCMOV test_FCOMI test_FISTTP \
4522     vdso_restorer
4523     diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
4524     new file mode 100644
4525     index 000000000000..6e0bd52ad53d
4526     --- /dev/null
4527     +++ b/tools/testing/selftests/x86/test_vsyscall.c
4528     @@ -0,0 +1,500 @@
4529     +/* SPDX-License-Identifier: GPL-2.0 */
4530     +
4531     +#define _GNU_SOURCE
4532     +
4533     +#include <stdio.h>
4534     +#include <sys/time.h>
4535     +#include <time.h>
4536     +#include <stdlib.h>
4537     +#include <sys/syscall.h>
4538     +#include <unistd.h>
4539     +#include <dlfcn.h>
4540     +#include <string.h>
4541     +#include <inttypes.h>
4542     +#include <signal.h>
4543     +#include <sys/ucontext.h>
4544     +#include <errno.h>
4545     +#include <err.h>
4546     +#include <sched.h>
4547     +#include <stdbool.h>
4548     +#include <setjmp.h>
4549     +
4550     +#ifdef __x86_64__
4551     +# define VSYS(x) (x)
4552     +#else
4553     +# define VSYS(x) 0
4554     +#endif
4555     +
4556     +#ifndef SYS_getcpu
4557     +# ifdef __x86_64__
4558     +# define SYS_getcpu 309
4559     +# else
4560     +# define SYS_getcpu 318
4561     +# endif
4562     +#endif
4563     +
4564     +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
4565     + int flags)
4566     +{
4567     + struct sigaction sa;
4568     + memset(&sa, 0, sizeof(sa));
4569     + sa.sa_sigaction = handler;
4570     + sa.sa_flags = SA_SIGINFO | flags;
4571     + sigemptyset(&sa.sa_mask);
4572     + if (sigaction(sig, &sa, 0))
4573     + err(1, "sigaction");
4574     +}
4575     +
4576     +/* vsyscalls and vDSO */
4577     +bool should_read_vsyscall = false;
4578     +
4579     +typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
4580     +gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
4581     +gtod_t vdso_gtod;
4582     +
4583     +typedef int (*vgettime_t)(clockid_t, struct timespec *);
4584     +vgettime_t vdso_gettime;
4585     +
4586     +typedef long (*time_func_t)(time_t *t);
4587     +time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
4588     +time_func_t vdso_time;
4589     +
4590     +typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
4591     +getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
4592     +getcpu_t vdso_getcpu;
4593     +
4594     +static void init_vdso(void)
4595     +{
4596     + void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
4597     + if (!vdso)
4598     + vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
4599     + if (!vdso) {
4600     + printf("[WARN]\tfailed to find vDSO\n");
4601     + return;
4602     + }
4603     +
4604     + vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday");
4605     + if (!vdso_gtod)
4606     + printf("[WARN]\tfailed to find gettimeofday in vDSO\n");
4607     +
4608     + vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
4609     + if (!vdso_gettime)
4610     + printf("[WARN]\tfailed to find clock_gettime in vDSO\n");
4611     +
4612     + vdso_time = (time_func_t)dlsym(vdso, "__vdso_time");
4613     + if (!vdso_time)
4614     + printf("[WARN]\tfailed to find time in vDSO\n");
4615     +
4616     + vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
4617     + if (!vdso_getcpu) {
4618     + /* getcpu() was never wired up in the 32-bit vDSO. */
4619     + printf("[%s]\tfailed to find getcpu in vDSO\n",
4620     + sizeof(long) == 8 ? "WARN" : "NOTE");
4621     + }
4622     +}
4623     +
4624     +static int init_vsys(void)
4625     +{
4626     +#ifdef __x86_64__
4627     + int nerrs = 0;
4628     + FILE *maps;
4629     + char line[128];
4630     + bool found = false;
4631     +
4632     + maps = fopen("/proc/self/maps", "r");
4633     + if (!maps) {
4634     + printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
4635     + should_read_vsyscall = true;
4636     + return 0;
4637     + }
4638     +
4639     + while (fgets(line, sizeof(line), maps)) {
4640     + char r, x;
4641     + void *start, *end;
4642     + char name[128];
4643     + if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
4644     + &start, &end, &r, &x, name) != 5)
4645     + continue;
4646     +
4647     + if (strcmp(name, "[vsyscall]"))
4648     + continue;
4649     +
4650     + printf("\tvsyscall map: %s", line);
4651     +
4652     + if (start != (void *)0xffffffffff600000 ||
4653     + end != (void *)0xffffffffff601000) {
4654     + printf("[FAIL]\taddress range is nonsense\n");
4655     + nerrs++;
4656     + }
4657     +
4658     + printf("\tvsyscall permissions are %c-%c\n", r, x);
4659     + should_read_vsyscall = (r == 'r');
4660     + if (x != 'x') {
4661     + vgtod = NULL;
4662     + vtime = NULL;
4663     + vgetcpu = NULL;
4664     + }
4665     +
4666     + found = true;
4667     + break;
4668     + }
4669     +
4670     + fclose(maps);
4671     +
4672     + if (!found) {
4673     + printf("\tno vsyscall map in /proc/self/maps\n");
4674     + should_read_vsyscall = false;
4675     + vgtod = NULL;
4676     + vtime = NULL;
4677     + vgetcpu = NULL;
4678     + }
4679     +
4680     + return nerrs;
4681     +#else
4682     + return 0;
4683     +#endif
4684     +}
4685     +
4686     +/* syscalls */
4687     +static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
4688     +{
4689     + return syscall(SYS_gettimeofday, tv, tz);
4690     +}
4691     +
4692     +static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
4693     +{
4694     + return syscall(SYS_clock_gettime, id, ts);
4695     +}
4696     +
4697     +static inline long sys_time(time_t *t)
4698     +{
4699     + return syscall(SYS_time, t);
4700     +}
4701     +
4702     +static inline long sys_getcpu(unsigned * cpu, unsigned * node,
4703     + void* cache)
4704     +{
4705     + return syscall(SYS_getcpu, cpu, node, cache);
4706     +}
4707     +
4708     +static jmp_buf jmpbuf;
4709     +
4710     +static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
4711     +{
4712     + siglongjmp(jmpbuf, 1);
4713     +}
4714     +
4715     +static double tv_diff(const struct timeval *a, const struct timeval *b)
4716     +{
4717     + return (double)(a->tv_sec - b->tv_sec) +
4718     + (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6;
4719     +}
4720     +
4721     +static int check_gtod(const struct timeval *tv_sys1,
4722     + const struct timeval *tv_sys2,
4723     + const struct timezone *tz_sys,
4724     + const char *which,
4725     + const struct timeval *tv_other,
4726     + const struct timezone *tz_other)
4727     +{
4728     + int nerrs = 0;
4729     + double d1, d2;
4730     +
4731     + if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) {
4732     + printf("[FAIL] %s tz mismatch\n", which);
4733     + nerrs++;
4734     + }
4735     +
4736     + d1 = tv_diff(tv_other, tv_sys1);
4737     + d2 = tv_diff(tv_sys2, tv_other);
4738     + printf("\t%s time offsets: %lf %lf\n", which, d1, d2);
4739     +
4740     + if (d1 < 0 || d2 < 0) {
4741     + printf("[FAIL]\t%s time was inconsistent with the syscall\n", which);
4742     + nerrs++;
4743     + } else {
4744     + printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which);
4745     + }
4746     +
4747     + return nerrs;
4748     +}
4749     +
4750     +static int test_gtod(void)
4751     +{
4752     + struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys;
4753     + struct timezone tz_sys, tz_vdso, tz_vsys;
4754     + long ret_vdso = -1;
4755     + long ret_vsys = -1;
4756     + int nerrs = 0;
4757     +
4758     + printf("[RUN]\ttest gettimeofday()\n");
4759     +
4760     + if (sys_gtod(&tv_sys1, &tz_sys) != 0)
4761     + err(1, "syscall gettimeofday");
4762     + if (vdso_gtod)
4763     + ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
4764     + if (vgtod)
4765     + ret_vsys = vgtod(&tv_vsys, &tz_vsys);
4766     + if (sys_gtod(&tv_sys2, &tz_sys) != 0)
4767     + err(1, "syscall gettimeofday");
4768     +
4769     + if (vdso_gtod) {
4770     + if (ret_vdso == 0) {
4771     + nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
4772     + } else {
4773     + printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso);
4774     + nerrs++;
4775     + }
4776     + }
4777     +
4778     + if (vgtod) {
4779     + if (ret_vsys == 0) {
4780     + nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
4781     + } else {
4782     + printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys);
4783     + nerrs++;
4784     + }
4785     + }
4786     +
4787     + return nerrs;
4788     +}
4789     +
4790     +static int test_time(void) {
4791     + int nerrs = 0;
4792     +
4793     + printf("[RUN]\ttest time()\n");
4794     + long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0;
4795     + long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1;
4796     + t_sys1 = sys_time(&t2_sys1);
4797     + if (vdso_time)
4798     + t_vdso = vdso_time(&t2_vdso);
4799     + if (vtime)
4800     + t_vsys = vtime(&t2_vsys);
4801     + t_sys2 = sys_time(&t2_sys2);
4802     + if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
4803     + printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2);
4804     + nerrs++;
4805     + return nerrs;
4806     + }
4807     +
4808     + if (vdso_time) {
4809     + if (t_vdso < 0 || t_vdso != t2_vdso) {
4810     + printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
4811     + nerrs++;
4812     + } else if (t_vdso < t_sys1 || t_vdso > t_sys2) {
4813     + printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2);
4814     + nerrs++;
4815     + } else {
4816     + printf("[OK]\tvDSO time() is okay\n");
4817     + }
4818     + }
4819     +
4820     + if (vtime) {
4821     + if (t_vsys < 0 || t_vsys != t2_vsys) {
4822     + printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
4823     + nerrs++;
4824     + } else if (t_vsys < t_sys1 || t_vsys > t_sys2) {
4825     + printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2);
4826     + nerrs++;
4827     + } else {
4828     + printf("[OK]\tvsyscall time() is okay\n");
4829     + }
4830     + }
4831     +
4832     + return nerrs;
4833     +}
4834     +
4835     +static int test_getcpu(int cpu)
4836     +{
4837     + int nerrs = 0;
4838     + long ret_sys, ret_vdso = -1, ret_vsys = -1;
4839     +
4840     + printf("[RUN]\tgetcpu() on CPU %d\n", cpu);
4841     +
4842     + cpu_set_t cpuset;
4843     + CPU_ZERO(&cpuset);
4844     + CPU_SET(cpu, &cpuset);
4845     + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
4846     + printf("[SKIP]\tfailed to force CPU %d\n", cpu);
4847     + return nerrs;
4848     + }
4849     +
4850     + unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
4851     + unsigned node = 0;
4852     + bool have_node = false;
4853     + ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
4854     + if (vdso_getcpu)
4855     + ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
4856     + if (vgetcpu)
4857     + ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
4858     +
4859     + if (ret_sys == 0) {
4860     + if (cpu_sys != cpu) {
4861     + printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu);
4862     + nerrs++;
4863     + }
4864     +
4865     + have_node = true;
4866     + node = node_sys;
4867     + }
4868     +
4869     + if (vdso_getcpu) {
4870     + if (ret_vdso) {
4871     + printf("[FAIL]\tvDSO getcpu() failed\n");
4872     + nerrs++;
4873     + } else {
4874     + if (!have_node) {
4875     + have_node = true;
4876     + node = node_vdso;
4877     + }
4878     +
4879     + if (cpu_vdso != cpu) {
4880     + printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu);
4881     + nerrs++;
4882     + } else {
4883     + printf("[OK]\tvDSO reported correct CPU\n");
4884     + }
4885     +
4886     + if (node_vdso != node) {
4887     + printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node);
4888     + nerrs++;
4889     + } else {
4890     + printf("[OK]\tvDSO reported correct node\n");
4891     + }
4892     + }
4893     + }
4894     +
4895     + if (vgetcpu) {
4896     + if (ret_vsys) {
4897     + printf("[FAIL]\tvsyscall getcpu() failed\n");
4898     + nerrs++;
4899     + } else {
4900     + if (!have_node) {
4901     + have_node = true;
4902     + node = node_vsys;
4903     + }
4904     +
4905     + if (cpu_vsys != cpu) {
4906     + printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu);
4907     + nerrs++;
4908     + } else {
4909     + printf("[OK]\tvsyscall reported correct CPU\n");
4910     + }
4911     +
4912     + if (node_vsys != node) {
4913     + printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node);
4914     + nerrs++;
4915     + } else {
4916     + printf("[OK]\tvsyscall reported correct node\n");
4917     + }
4918     + }
4919     + }
4920     +
4921     + return nerrs;
4922     +}
4923     +
4924     +static int test_vsys_r(void)
4925     +{
4926     +#ifdef __x86_64__
4927     + printf("[RUN]\tChecking read access to the vsyscall page\n");
4928     + bool can_read;
4929     + if (sigsetjmp(jmpbuf, 1) == 0) {
4930     + *(volatile int *)0xffffffffff600000;
4931     + can_read = true;
4932     + } else {
4933     + can_read = false;
4934     + }
4935     +
4936     + if (can_read && !should_read_vsyscall) {
4937     + printf("[FAIL]\tWe have read access, but we shouldn't\n");
4938     + return 1;
4939     + } else if (!can_read && should_read_vsyscall) {
4940     + printf("[FAIL]\tWe don't have read access, but we should\n");
4941     + return 1;
4942     + } else {
4943     + printf("[OK]\tgot expected result\n");
4944     + }
4945     +#endif
4946     +
4947     + return 0;
4948     +}
4949     +
4950     +
4951     +#ifdef __x86_64__
4952     +#define X86_EFLAGS_TF (1UL << 8)
4953     +static volatile sig_atomic_t num_vsyscall_traps;
4954     +
4955     +static unsigned long get_eflags(void)
4956     +{
4957     + unsigned long eflags;
4958     + asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
4959     + return eflags;
4960     +}
4961     +
4962     +static void set_eflags(unsigned long eflags)
4963     +{
4964     + asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
4965     +}
4966     +
4967     +static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
4968     +{
4969     + ucontext_t *ctx = (ucontext_t *)ctx_void;
4970     + unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP];
4971     +
4972     + if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0)
4973     + num_vsyscall_traps++;
4974     +}
4975     +
4976     +static int test_native_vsyscall(void)
4977     +{
4978     + time_t tmp;
4979     + bool is_native;
4980     +
4981     + if (!vtime)
4982     + return 0;
4983     +
4984     + printf("[RUN]\tchecking for native vsyscall\n");
4985     + sethandler(SIGTRAP, sigtrap, 0);
4986     + set_eflags(get_eflags() | X86_EFLAGS_TF);
4987     + vtime(&tmp);
4988     + set_eflags(get_eflags() & ~X86_EFLAGS_TF);
4989     +
4990     + /*
4991     + * If vsyscalls are emulated, we expect a single trap in the
4992     + * vsyscall page -- the call instruction will trap with RIP
4993     + * pointing to the entry point before emulation takes over.
4994     + * In native mode, we expect two traps, since whatever code
4995     + * the vsyscall page contains will be more than just a ret
4996     + * instruction.
4997     + */
4998     + is_native = (num_vsyscall_traps > 1);
4999     +
5000     + printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n",
5001     + (is_native ? "native" : "emulated"),
5002     + (int)num_vsyscall_traps);
5003     +
5004     + return 0;
5005     +}
5006     +#endif
5007     +
5008     +int main(int argc, char **argv)
5009     +{
5010     + int nerrs = 0;
5011     +
5012     + init_vdso();
5013     + nerrs += init_vsys();
5014     +
5015     + nerrs += test_gtod();
5016     + nerrs += test_time();
5017     + nerrs += test_getcpu(0);
5018     + nerrs += test_getcpu(1);
5019     +
5020     + sethandler(SIGSEGV, sigsegv, 0);
5021     + nerrs += test_vsys_r();
5022     +
5023     +#ifdef __x86_64__
5024     + nerrs += test_native_vsyscall();
5025     +#endif
5026     +
5027     + return nerrs ? 1 : 0;
5028     +}