Magellan Linux

Annotation of /trunk/kernel-magellan/patches-4.14/0113-4.14.14-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3074 - (hide annotations) (download)
Wed Jan 17 13:27:15 2018 UTC (6 years, 3 months ago) by niro
File size: 193113 byte(s)
-linux-4.14.14
1 niro 3074 diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
2     index f3d5817c4ef0..258902db14bf 100644
3     --- a/Documentation/ABI/testing/sysfs-devices-system-cpu
4     +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
5     @@ -373,3 +373,19 @@ Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
6     Description: information about CPUs heterogeneity.
7    
8     cpu_capacity: capacity of cpu#.
9     +
10     +What: /sys/devices/system/cpu/vulnerabilities
11     + /sys/devices/system/cpu/vulnerabilities/meltdown
12     + /sys/devices/system/cpu/vulnerabilities/spectre_v1
13     + /sys/devices/system/cpu/vulnerabilities/spectre_v2
14     +Date: January 2018
15     +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
16     +Description: Information about CPU vulnerabilities
17     +
18     + The files are named after the code names of CPU
19     + vulnerabilities. The output of those files reflects the
20     + state of the CPUs in the system. Possible output values:
21     +
22     + "Not affected" CPU is not affected by the vulnerability
23     + "Vulnerable" CPU is affected and no mitigation in effect
24     + "Mitigation: $M" CPU is affected and mitigation $M is in effect
25     diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
26     index 520fdec15bbb..8122b5f98ea1 100644
27     --- a/Documentation/admin-guide/kernel-parameters.txt
28     +++ b/Documentation/admin-guide/kernel-parameters.txt
29     @@ -2599,6 +2599,11 @@
30     nosmt [KNL,S390] Disable symmetric multithreading (SMT).
31     Equivalent to smt=1.
32    
33     + nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2
34     + (indirect branch prediction) vulnerability. System may
35     + allow data leaks with this option, which is equivalent
36     + to spectre_v2=off.
37     +
38     noxsave [BUGS=X86] Disables x86 extended register state save
39     and restore using xsave. The kernel will fallback to
40     enabling legacy floating-point and sse state.
41     @@ -2685,8 +2690,6 @@
42     steal time is computed, but won't influence scheduler
43     behaviour
44    
45     - nopti [X86-64] Disable kernel page table isolation
46     -
47     nolapic [X86-32,APIC] Do not enable or use the local APIC.
48    
49     nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
50     @@ -3255,11 +3258,20 @@
51     pt. [PARIDE]
52     See Documentation/blockdev/paride.txt.
53    
54     - pti= [X86_64]
55     - Control user/kernel address space isolation:
56     - on - enable
57     - off - disable
58     - auto - default setting
59     + pti= [X86_64] Control Page Table Isolation of user and
60     + kernel address spaces. Disabling this feature
61     + removes hardening, but improves performance of
62     + system calls and interrupts.
63     +
64     + on - unconditionally enable
65     + off - unconditionally disable
66     + auto - kernel detects whether your CPU model is
67     + vulnerable to issues that PTI mitigates
68     +
69     + Not specifying this option is equivalent to pti=auto.
70     +
71     + nopti [X86_64]
72     + Equivalent to pti=off
73    
74     pty.legacy_count=
75     [KNL] Number of legacy pty's. Overwrites compiled-in
76     @@ -3901,6 +3913,29 @@
77     sonypi.*= [HW] Sony Programmable I/O Control Device driver
78     See Documentation/laptops/sonypi.txt
79    
80     + spectre_v2= [X86] Control mitigation of Spectre variant 2
81     + (indirect branch speculation) vulnerability.
82     +
83     + on - unconditionally enable
84     + off - unconditionally disable
85     + auto - kernel detects whether your CPU model is
86     + vulnerable
87     +
88     + Selecting 'on' will, and 'auto' may, choose a
89     + mitigation method at run time according to the
90     + CPU, the available microcode, the setting of the
91     + CONFIG_RETPOLINE configuration option, and the
92     + compiler with which the kernel was built.
93     +
94     + Specific mitigations can also be selected manually:
95     +
96     + retpoline - replace indirect branches
97     + retpoline,generic - google's original retpoline
98     + retpoline,amd - AMD-specific minimal thunk
99     +
100     + Not specifying this option is equivalent to
101     + spectre_v2=auto.
102     +
103     spia_io_base= [HW,MTD]
104     spia_fio_base=
105     spia_pedr=
106     diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt
107     new file mode 100644
108     index 000000000000..d11eff61fc9a
109     --- /dev/null
110     +++ b/Documentation/x86/pti.txt
111     @@ -0,0 +1,186 @@
112     +Overview
113     +========
114     +
115     +Page Table Isolation (pti, previously known as KAISER[1]) is a
116     +countermeasure against attacks on the shared user/kernel address
117     +space such as the "Meltdown" approach[2].
118     +
119     +To mitigate this class of attacks, we create an independent set of
120     +page tables for use only when running userspace applications. When
121     +the kernel is entered via syscalls, interrupts or exceptions, the
122     +page tables are switched to the full "kernel" copy. When the system
123     +switches back to user mode, the user copy is used again.
124     +
125     +The userspace page tables contain only a minimal amount of kernel
126     +data: only what is needed to enter/exit the kernel such as the
127     +entry/exit functions themselves and the interrupt descriptor table
128     +(IDT). There are a few strictly unnecessary things that get mapped
129     +such as the first C function when entering an interrupt (see
130     +comments in pti.c).
131     +
132     +This approach helps to ensure that side-channel attacks leveraging
133     +the paging structures do not function when PTI is enabled. It can be
134     +enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
135     +Once enabled at compile-time, it can be disabled at boot with the
136     +'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
137     +
138     +Page Table Management
139     +=====================
140     +
141     +When PTI is enabled, the kernel manages two sets of page tables.
142     +The first set is very similar to the single set which is present in
143     +kernels without PTI. This includes a complete mapping of userspace
144     +that the kernel can use for things like copy_to_user().
145     +
146     +Although _complete_, the user portion of the kernel page tables is
147     +crippled by setting the NX bit in the top level. This ensures
148     +that any missed kernel->user CR3 switch will immediately crash
149     +userspace upon executing its first instruction.
150     +
151     +The userspace page tables map only the kernel data needed to enter
152     +and exit the kernel. This data is entirely contained in the 'struct
153     +cpu_entry_area' structure which is placed in the fixmap which gives
154     +each CPU's copy of the area a compile-time-fixed virtual address.
155     +
156     +For new userspace mappings, the kernel makes the entries in its
157     +page tables like normal. The only difference is when the kernel
158     +makes entries in the top (PGD) level. In addition to setting the
159     +entry in the main kernel PGD, a copy of the entry is made in the
160     +userspace page tables' PGD.
161     +
162     +This sharing at the PGD level also inherently shares all the lower
163     +layers of the page tables. This leaves a single, shared set of
164     +userspace page tables to manage. One PTE to lock, one set of
165     +accessed bits, dirty bits, etc...
166     +
167     +Overhead
168     +========
169     +
170     +Protection against side-channel attacks is important. But,
171     +this protection comes at a cost:
172     +
173     +1. Increased Memory Use
174     + a. Each process now needs an order-1 PGD instead of order-0.
175     + (Consumes an additional 4k per process).
176     + b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
177     + aligned so that it can be mapped by setting a single PMD
178     + entry. This consumes nearly 2MB of RAM once the kernel
179     + is decompressed, but no space in the kernel image itself.
180     +
181     +2. Runtime Cost
182     + a. CR3 manipulation to switch between the page table copies
183     + must be done at interrupt, syscall, and exception entry
184     + and exit (it can be skipped when the kernel is interrupted,
185     + though.) Moves to CR3 are on the order of a hundred
186     + cycles, and are required at every entry and exit.
187     + b. A "trampoline" must be used for SYSCALL entry. This
188     + trampoline depends on a smaller set of resources than the
189     + non-PTI SYSCALL entry code, so requires mapping fewer
190     + things into the userspace page tables. The downside is
191     + that stacks must be switched at entry time.
192     + d. Global pages are disabled for all kernel structures not
193     + mapped into both kernel and userspace page tables. This
194     + feature of the MMU allows different processes to share TLB
195     + entries mapping the kernel. Losing the feature means more
196     + TLB misses after a context switch. The actual loss of
197     + performance is very small, however, never exceeding 1%.
198     + d. Process Context IDentifiers (PCID) is a CPU feature that
199     + allows us to skip flushing the entire TLB when switching page
200     + tables by setting a special bit in CR3 when the page tables
201     + are changed. This makes switching the page tables (at context
202     + switch, or kernel entry/exit) cheaper. But, on systems with
203     + PCID support, the context switch code must flush both the user
204     + and kernel entries out of the TLB. The user PCID TLB flush is
205     + deferred until the exit to userspace, minimizing the cost.
206     + See intel.com/sdm for the gory PCID/INVPCID details.
207     + e. The userspace page tables must be populated for each new
208     + process. Even without PTI, the shared kernel mappings
209     + are created by copying top-level (PGD) entries into each
210     + new process. But, with PTI, there are now *two* kernel
211     + mappings: one in the kernel page tables that maps everything
212     + and one for the entry/exit structures. At fork(), we need to
213     + copy both.
214     + f. In addition to the fork()-time copying, there must also
215     + be an update to the userspace PGD any time a set_pgd() is done
216     + on a PGD used to map userspace. This ensures that the kernel
217     + and userspace copies always map the same userspace
218     + memory.
219     + g. On systems without PCID support, each CR3 write flushes
220     + the entire TLB. That means that each syscall, interrupt
221     + or exception flushes the TLB.
222     + h. INVPCID is a TLB-flushing instruction which allows flushing
223     + of TLB entries for non-current PCIDs. Some systems support
224     + PCIDs, but do not support INVPCID. On these systems, addresses
225     + can only be flushed from the TLB for the current PCID. When
226     + flushing a kernel address, we need to flush all PCIDs, so a
227     + single kernel address flush will require a TLB-flushing CR3
228     + write upon the next use of every PCID.
229     +
230     +Possible Future Work
231     +====================
232     +1. We can be more careful about not actually writing to CR3
233     + unless its value is actually changed.
234     +2. Allow PTI to be enabled/disabled at runtime in addition to the
235     + boot-time switching.
236     +
237     +Testing
238     +========
239     +
240     +To test stability of PTI, the following test procedure is recommended,
241     +ideally doing all of these in parallel:
242     +
243     +1. Set CONFIG_DEBUG_ENTRY=y
244     +2. Run several copies of all of the tools/testing/selftests/x86/ tests
245     + (excluding MPX and protection_keys) in a loop on multiple CPUs for
246     + several minutes. These tests frequently uncover corner cases in the
247     + kernel entry code. In general, old kernels might cause these tests
248     + themselves to crash, but they should never crash the kernel.
249     +3. Run the 'perf' tool in a mode (top or record) that generates many
250     + frequent performance monitoring non-maskable interrupts (see "NMI"
251     + in /proc/interrupts). This exercises the NMI entry/exit code which
252     + is known to trigger bugs in code paths that did not expect to be
253     + interrupted, including nested NMIs. Using "-c" boosts the rate of
254     + NMIs, and using two -c with separate counters encourages nested NMIs
255     + and less deterministic behavior.
256     +
257     + while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
258     +
259     +4. Launch a KVM virtual machine.
260     +5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
261     + This has been a lightly-tested code path and needs extra scrutiny.
262     +
263     +Debugging
264     +=========
265     +
266     +Bugs in PTI cause a few different signatures of crashes
267     +that are worth noting here.
268     +
269     + * Failures of the selftests/x86 code. Usually a bug in one of the
270     + more obscure corners of entry_64.S
271     + * Crashes in early boot, especially around CPU bringup. Bugs
272     + in the trampoline code or mappings cause these.
273     + * Crashes at the first interrupt. Caused by bugs in entry_64.S,
274     + like screwing up a page table switch. Also caused by
275     + incorrectly mapping the IRQ handler entry code.
276     + * Crashes at the first NMI. The NMI code is separate from main
277     + interrupt handlers and can have bugs that do not affect
278     + normal interrupts. Also caused by incorrectly mapping NMI
279     + code. NMIs that interrupt the entry code must be very
280     + careful and can be the cause of crashes that show up when
281     + running perf.
282     + * Kernel crashes at the first exit to userspace. entry_64.S
283     + bugs, or failing to map some of the exit code.
284     + * Crashes at first interrupt that interrupts userspace. The paths
285     + in entry_64.S that return to userspace are sometimes separate
286     + from the ones that return to the kernel.
287     + * Double faults: overflowing the kernel stack because of page
288     + faults upon page faults. Caused by touching non-pti-mapped
289     + data in the entry code, or forgetting to switch to kernel
290     + CR3 before calling into C functions which are not pti-mapped.
291     + * Userspace segfaults early in boot, sometimes manifesting
292     + as mount(8) failing to mount the rootfs. These have
293     + tended to be TLB invalidation issues. Usually invalidating
294     + the wrong PCID, or otherwise missing an invalidation.
295     +
296     +1. https://gruss.cc/files/kaiser.pdf
297     +2. https://meltdownattack.com/meltdown.pdf
298     diff --git a/Makefile b/Makefile
299     index a67c5179052a..4951305eb867 100644
300     --- a/Makefile
301     +++ b/Makefile
302     @@ -1,7 +1,7 @@
303     # SPDX-License-Identifier: GPL-2.0
304     VERSION = 4
305     PATCHLEVEL = 14
306     -SUBLEVEL = 13
307     +SUBLEVEL = 14
308     EXTRAVERSION =
309     NAME = Petit Gorille
310    
311     diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
312     index c5ff6bfe2825..2f2d176396aa 100644
313     --- a/arch/mips/kernel/process.c
314     +++ b/arch/mips/kernel/process.c
315     @@ -705,6 +705,18 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
316     struct task_struct *t;
317     int max_users;
318    
319     + /* If nothing to change, return right away, successfully. */
320     + if (value == mips_get_process_fp_mode(task))
321     + return 0;
322     +
323     + /* Only accept a mode change if 64-bit FP enabled for o32. */
324     + if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT))
325     + return -EOPNOTSUPP;
326     +
327     + /* And only for o32 tasks. */
328     + if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS))
329     + return -EOPNOTSUPP;
330     +
331     /* Check the value is valid */
332     if (value & ~known_bits)
333     return -EOPNOTSUPP;
334     diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
335     index 5a09c2901a76..c552c20237d4 100644
336     --- a/arch/mips/kernel/ptrace.c
337     +++ b/arch/mips/kernel/ptrace.c
338     @@ -410,63 +410,160 @@ static int gpr64_set(struct task_struct *target,
339    
340     #endif /* CONFIG_64BIT */
341    
342     +/*
343     + * Copy the floating-point context to the supplied NT_PRFPREG buffer,
344     + * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots
345     + * correspond 1:1 to buffer slots. Only general registers are copied.
346     + */
347     +static int fpr_get_fpa(struct task_struct *target,
348     + unsigned int *pos, unsigned int *count,
349     + void **kbuf, void __user **ubuf)
350     +{
351     + return user_regset_copyout(pos, count, kbuf, ubuf,
352     + &target->thread.fpu,
353     + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
354     +}
355     +
356     +/*
357     + * Copy the floating-point context to the supplied NT_PRFPREG buffer,
358     + * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's
359     + * general register slots are copied to buffer slots. Only general
360     + * registers are copied.
361     + */
362     +static int fpr_get_msa(struct task_struct *target,
363     + unsigned int *pos, unsigned int *count,
364     + void **kbuf, void __user **ubuf)
365     +{
366     + unsigned int i;
367     + u64 fpr_val;
368     + int err;
369     +
370     + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
371     + for (i = 0; i < NUM_FPU_REGS; i++) {
372     + fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
373     + err = user_regset_copyout(pos, count, kbuf, ubuf,
374     + &fpr_val, i * sizeof(elf_fpreg_t),
375     + (i + 1) * sizeof(elf_fpreg_t));
376     + if (err)
377     + return err;
378     + }
379     +
380     + return 0;
381     +}
382     +
383     +/*
384     + * Copy the floating-point context to the supplied NT_PRFPREG buffer.
385     + * Choose the appropriate helper for general registers, and then copy
386     + * the FCSR register separately.
387     + */
388     static int fpr_get(struct task_struct *target,
389     const struct user_regset *regset,
390     unsigned int pos, unsigned int count,
391     void *kbuf, void __user *ubuf)
392     {
393     - unsigned i;
394     + const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
395     int err;
396     - u64 fpr_val;
397    
398     - /* XXX fcr31 */
399     + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
400     + err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf);
401     + else
402     + err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf);
403     + if (err)
404     + return err;
405    
406     - if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
407     - return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
408     - &target->thread.fpu,
409     - 0, sizeof(elf_fpregset_t));
410     + err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
411     + &target->thread.fpu.fcr31,
412     + fcr31_pos, fcr31_pos + sizeof(u32));
413    
414     - for (i = 0; i < NUM_FPU_REGS; i++) {
415     - fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
416     - err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
417     - &fpr_val, i * sizeof(elf_fpreg_t),
418     - (i + 1) * sizeof(elf_fpreg_t));
419     + return err;
420     +}
421     +
422     +/*
423     + * Copy the supplied NT_PRFPREG buffer to the floating-point context,
424     + * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP
425     + * context's general register slots. Only general registers are copied.
426     + */
427     +static int fpr_set_fpa(struct task_struct *target,
428     + unsigned int *pos, unsigned int *count,
429     + const void **kbuf, const void __user **ubuf)
430     +{
431     + return user_regset_copyin(pos, count, kbuf, ubuf,
432     + &target->thread.fpu,
433     + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
434     +}
435     +
436     +/*
437     + * Copy the supplied NT_PRFPREG buffer to the floating-point context,
438     + * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64
439     + * bits only of FP context's general register slots. Only general
440     + * registers are copied.
441     + */
442     +static int fpr_set_msa(struct task_struct *target,
443     + unsigned int *pos, unsigned int *count,
444     + const void **kbuf, const void __user **ubuf)
445     +{
446     + unsigned int i;
447     + u64 fpr_val;
448     + int err;
449     +
450     + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
451     + for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) {
452     + err = user_regset_copyin(pos, count, kbuf, ubuf,
453     + &fpr_val, i * sizeof(elf_fpreg_t),
454     + (i + 1) * sizeof(elf_fpreg_t));
455     if (err)
456     return err;
457     + set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
458     }
459    
460     return 0;
461     }
462    
463     +/*
464     + * Copy the supplied NT_PRFPREG buffer to the floating-point context.
465     + * Choose the appropriate helper for general registers, and then copy
466     + * the FCSR register separately.
467     + *
468     + * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
469     + * which is supposed to have been guaranteed by the kernel before
470     + * calling us, e.g. in `ptrace_regset'. We enforce that requirement,
471     + * so that we can safely avoid preinitializing temporaries for
472     + * partial register writes.
473     + */
474     static int fpr_set(struct task_struct *target,
475     const struct user_regset *regset,
476     unsigned int pos, unsigned int count,
477     const void *kbuf, const void __user *ubuf)
478     {
479     - unsigned i;
480     + const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
481     + u32 fcr31;
482     int err;
483     - u64 fpr_val;
484    
485     - /* XXX fcr31 */
486     + BUG_ON(count % sizeof(elf_fpreg_t));
487     +
488     + if (pos + count > sizeof(elf_fpregset_t))
489     + return -EIO;
490    
491     init_fp_ctx(target);
492    
493     - if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
494     - return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
495     - &target->thread.fpu,
496     - 0, sizeof(elf_fpregset_t));
497     + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
498     + err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf);
499     + else
500     + err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf);
501     + if (err)
502     + return err;
503    
504     - BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
505     - for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) {
506     + if (count > 0) {
507     err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
508     - &fpr_val, i * sizeof(elf_fpreg_t),
509     - (i + 1) * sizeof(elf_fpreg_t));
510     + &fcr31,
511     + fcr31_pos, fcr31_pos + sizeof(u32));
512     if (err)
513     return err;
514     - set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
515     +
516     + ptrace_setfcr31(target, fcr31);
517     }
518    
519     - return 0;
520     + return err;
521     }
522    
523     enum mips_regset {
524     diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
525     index 29ebe2fd5867..a93d719edc90 100644
526     --- a/arch/powerpc/kvm/book3s_64_mmu.c
527     +++ b/arch/powerpc/kvm/book3s_64_mmu.c
528     @@ -235,6 +235,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
529     gpte->may_read = true;
530     gpte->may_write = true;
531     gpte->page_size = MMU_PAGE_4K;
532     + gpte->wimg = HPTE_R_M;
533    
534     return 0;
535     }
536     diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
537     index 59247af5fd45..2645d484e945 100644
538     --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
539     +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
540     @@ -65,11 +65,17 @@ struct kvm_resize_hpt {
541     u32 order;
542    
543     /* These fields protected by kvm->lock */
544     +
545     + /* Possible values and their usage:
546     + * <0 an error occurred during allocation,
547     + * -EBUSY allocation is in the progress,
548     + * 0 allocation made successfuly.
549     + */
550     int error;
551     - bool prepare_done;
552    
553     - /* Private to the work thread, until prepare_done is true,
554     - * then protected by kvm->resize_hpt_sem */
555     + /* Private to the work thread, until error != -EBUSY,
556     + * then protected by kvm->lock.
557     + */
558     struct kvm_hpt_info hpt;
559     };
560    
561     @@ -159,8 +165,6 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
562     * Reset all the reverse-mapping chains for all memslots
563     */
564     kvmppc_rmap_reset(kvm);
565     - /* Ensure that each vcpu will flush its TLB on next entry. */
566     - cpumask_setall(&kvm->arch.need_tlb_flush);
567     err = 0;
568     goto out;
569     }
570     @@ -176,6 +180,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
571     kvmppc_set_hpt(kvm, &info);
572    
573     out:
574     + if (err == 0)
575     + /* Ensure that each vcpu will flush its TLB on next entry. */
576     + cpumask_setall(&kvm->arch.need_tlb_flush);
577     +
578     mutex_unlock(&kvm->lock);
579     return err;
580     }
581     @@ -1424,16 +1432,20 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
582    
583     static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
584     {
585     - BUG_ON(kvm->arch.resize_hpt != resize);
586     + if (WARN_ON(!mutex_is_locked(&kvm->lock)))
587     + return;
588    
589     if (!resize)
590     return;
591    
592     - if (resize->hpt.virt)
593     - kvmppc_free_hpt(&resize->hpt);
594     + if (resize->error != -EBUSY) {
595     + if (resize->hpt.virt)
596     + kvmppc_free_hpt(&resize->hpt);
597     + kfree(resize);
598     + }
599    
600     - kvm->arch.resize_hpt = NULL;
601     - kfree(resize);
602     + if (kvm->arch.resize_hpt == resize)
603     + kvm->arch.resize_hpt = NULL;
604     }
605    
606     static void resize_hpt_prepare_work(struct work_struct *work)
607     @@ -1442,17 +1454,41 @@ static void resize_hpt_prepare_work(struct work_struct *work)
608     struct kvm_resize_hpt,
609     work);
610     struct kvm *kvm = resize->kvm;
611     - int err;
612     + int err = 0;
613    
614     - resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
615     - resize->order);
616     -
617     - err = resize_hpt_allocate(resize);
618     + if (WARN_ON(resize->error != -EBUSY))
619     + return;
620    
621     mutex_lock(&kvm->lock);
622    
623     + /* Request is still current? */
624     + if (kvm->arch.resize_hpt == resize) {
625     + /* We may request large allocations here:
626     + * do not sleep with kvm->lock held for a while.
627     + */
628     + mutex_unlock(&kvm->lock);
629     +
630     + resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
631     + resize->order);
632     +
633     + err = resize_hpt_allocate(resize);
634     +
635     + /* We have strict assumption about -EBUSY
636     + * when preparing for HPT resize.
637     + */
638     + if (WARN_ON(err == -EBUSY))
639     + err = -EINPROGRESS;
640     +
641     + mutex_lock(&kvm->lock);
642     + /* It is possible that kvm->arch.resize_hpt != resize
643     + * after we grab kvm->lock again.
644     + */
645     + }
646     +
647     resize->error = err;
648     - resize->prepare_done = true;
649     +
650     + if (kvm->arch.resize_hpt != resize)
651     + resize_hpt_release(kvm, resize);
652    
653     mutex_unlock(&kvm->lock);
654     }
655     @@ -1477,14 +1513,12 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
656    
657     if (resize) {
658     if (resize->order == shift) {
659     - /* Suitable resize in progress */
660     - if (resize->prepare_done) {
661     - ret = resize->error;
662     - if (ret != 0)
663     - resize_hpt_release(kvm, resize);
664     - } else {
665     + /* Suitable resize in progress? */
666     + ret = resize->error;
667     + if (ret == -EBUSY)
668     ret = 100; /* estimated time in ms */
669     - }
670     + else if (ret)
671     + resize_hpt_release(kvm, resize);
672    
673     goto out;
674     }
675     @@ -1504,6 +1538,8 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
676     ret = -ENOMEM;
677     goto out;
678     }
679     +
680     + resize->error = -EBUSY;
681     resize->order = shift;
682     resize->kvm = kvm;
683     INIT_WORK(&resize->work, resize_hpt_prepare_work);
684     @@ -1558,16 +1594,12 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
685     if (!resize || (resize->order != shift))
686     goto out;
687    
688     - ret = -EBUSY;
689     - if (!resize->prepare_done)
690     - goto out;
691     -
692     ret = resize->error;
693     - if (ret != 0)
694     + if (ret)
695     goto out;
696    
697     ret = resize_hpt_rehash(resize);
698     - if (ret != 0)
699     + if (ret)
700     goto out;
701    
702     resize_hpt_pivot(resize);
703     diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
704     index 69a09444d46e..e2ef16198456 100644
705     --- a/arch/powerpc/kvm/book3s_pr.c
706     +++ b/arch/powerpc/kvm/book3s_pr.c
707     @@ -60,6 +60,7 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
708     #define MSR_USER32 MSR_USER
709     #define MSR_USER64 MSR_USER
710     #define HW_PAGE_SIZE PAGE_SIZE
711     +#define HPTE_R_M _PAGE_COHERENT
712     #endif
713    
714     static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
715     @@ -557,6 +558,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
716     pte.eaddr = eaddr;
717     pte.vpage = eaddr >> 12;
718     pte.page_size = MMU_PAGE_64K;
719     + pte.wimg = HPTE_R_M;
720     }
721    
722     switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
723     diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
724     index 592c974d4558..17de6acc0eab 100644
725     --- a/arch/x86/Kconfig
726     +++ b/arch/x86/Kconfig
727     @@ -89,6 +89,7 @@ config X86
728     select GENERIC_CLOCKEVENTS_MIN_ADJUST
729     select GENERIC_CMOS_UPDATE
730     select GENERIC_CPU_AUTOPROBE
731     + select GENERIC_CPU_VULNERABILITIES
732     select GENERIC_EARLY_IOREMAP
733     select GENERIC_FIND_FIRST_BIT
734     select GENERIC_IOMAP
735     @@ -428,6 +429,19 @@ config GOLDFISH
736     def_bool y
737     depends on X86_GOLDFISH
738    
739     +config RETPOLINE
740     + bool "Avoid speculative indirect branches in kernel"
741     + default y
742     + help
743     + Compile kernel with the retpoline compiler options to guard against
744     + kernel-to-user data leaks by avoiding speculative indirect
745     + branches. Requires a compiler with -mindirect-branch=thunk-extern
746     + support for full protection. The kernel may run slower.
747     +
748     + Without compiler support, at least indirect branches in assembler
749     + code are eliminated. Since this includes the syscall entry path,
750     + it is not entirely pointless.
751     +
752     config INTEL_RDT
753     bool "Intel Resource Director Technology support"
754     default n
755     diff --git a/arch/x86/Makefile b/arch/x86/Makefile
756     index a20eacd9c7e9..504b1a4535ac 100644
757     --- a/arch/x86/Makefile
758     +++ b/arch/x86/Makefile
759     @@ -235,6 +235,14 @@ KBUILD_CFLAGS += -Wno-sign-compare
760     #
761     KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
762    
763     +# Avoid indirect branches in kernel to deal with Spectre
764     +ifdef CONFIG_RETPOLINE
765     + RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
766     + ifneq ($(RETPOLINE_CFLAGS),)
767     + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
768     + endif
769     +endif
770     +
771     archscripts: scripts_basic
772     $(Q)$(MAKE) $(build)=arch/x86/tools relocs
773    
774     diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
775     index 16627fec80b2..3d09e3aca18d 100644
776     --- a/arch/x86/crypto/aesni-intel_asm.S
777     +++ b/arch/x86/crypto/aesni-intel_asm.S
778     @@ -32,6 +32,7 @@
779     #include <linux/linkage.h>
780     #include <asm/inst.h>
781     #include <asm/frame.h>
782     +#include <asm/nospec-branch.h>
783    
784     /*
785     * The following macros are used to move an (un)aligned 16 byte value to/from
786     @@ -2884,7 +2885,7 @@ ENTRY(aesni_xts_crypt8)
787     pxor INC, STATE4
788     movdqu IV, 0x30(OUTP)
789    
790     - call *%r11
791     + CALL_NOSPEC %r11
792    
793     movdqu 0x00(OUTP), INC
794     pxor INC, STATE1
795     @@ -2929,7 +2930,7 @@ ENTRY(aesni_xts_crypt8)
796     _aesni_gf128mul_x_ble()
797     movups IV, (IVP)
798    
799     - call *%r11
800     + CALL_NOSPEC %r11
801    
802     movdqu 0x40(OUTP), INC
803     pxor INC, STATE1
804     diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
805     index f7c495e2863c..a14af6eb09cb 100644
806     --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
807     +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
808     @@ -17,6 +17,7 @@
809    
810     #include <linux/linkage.h>
811     #include <asm/frame.h>
812     +#include <asm/nospec-branch.h>
813    
814     #define CAMELLIA_TABLE_BYTE_LEN 272
815    
816     @@ -1227,7 +1228,7 @@ camellia_xts_crypt_16way:
817     vpxor 14 * 16(%rax), %xmm15, %xmm14;
818     vpxor 15 * 16(%rax), %xmm15, %xmm15;
819    
820     - call *%r9;
821     + CALL_NOSPEC %r9;
822    
823     addq $(16 * 16), %rsp;
824    
825     diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
826     index eee5b3982cfd..b66bbfa62f50 100644
827     --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
828     +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
829     @@ -12,6 +12,7 @@
830    
831     #include <linux/linkage.h>
832     #include <asm/frame.h>
833     +#include <asm/nospec-branch.h>
834    
835     #define CAMELLIA_TABLE_BYTE_LEN 272
836    
837     @@ -1343,7 +1344,7 @@ camellia_xts_crypt_32way:
838     vpxor 14 * 32(%rax), %ymm15, %ymm14;
839     vpxor 15 * 32(%rax), %ymm15, %ymm15;
840    
841     - call *%r9;
842     + CALL_NOSPEC %r9;
843    
844     addq $(16 * 32), %rsp;
845    
846     diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
847     index 7a7de27c6f41..d9b734d0c8cc 100644
848     --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
849     +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
850     @@ -45,6 +45,7 @@
851    
852     #include <asm/inst.h>
853     #include <linux/linkage.h>
854     +#include <asm/nospec-branch.h>
855    
856     ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
857    
858     @@ -172,7 +173,7 @@ continue_block:
859     movzxw (bufp, %rax, 2), len
860     lea crc_array(%rip), bufp
861     lea (bufp, len, 1), bufp
862     - jmp *bufp
863     + JMP_NOSPEC bufp
864    
865     ################################################################
866     ## 2a) PROCESS FULL BLOCKS:
867     diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
868     index 45a63e00a6af..3f48f695d5e6 100644
869     --- a/arch/x86/entry/calling.h
870     +++ b/arch/x86/entry/calling.h
871     @@ -198,8 +198,11 @@ For 32-bit we have the following conventions - kernel is built with
872     * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
873     * halves:
874     */
875     -#define PTI_SWITCH_PGTABLES_MASK (1<<PAGE_SHIFT)
876     -#define PTI_SWITCH_MASK (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
877     +#define PTI_USER_PGTABLE_BIT PAGE_SHIFT
878     +#define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT)
879     +#define PTI_USER_PCID_BIT X86_CR3_PTI_PCID_USER_BIT
880     +#define PTI_USER_PCID_MASK (1 << PTI_USER_PCID_BIT)
881     +#define PTI_USER_PGTABLE_AND_PCID_MASK (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
882    
883     .macro SET_NOFLUSH_BIT reg:req
884     bts $X86_CR3_PCID_NOFLUSH_BIT, \reg
885     @@ -208,7 +211,7 @@ For 32-bit we have the following conventions - kernel is built with
886     .macro ADJUST_KERNEL_CR3 reg:req
887     ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
888     /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
889     - andq $(~PTI_SWITCH_MASK), \reg
890     + andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
891     .endm
892    
893     .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
894     @@ -239,15 +242,19 @@ For 32-bit we have the following conventions - kernel is built with
895     /* Flush needed, clear the bit */
896     btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
897     movq \scratch_reg2, \scratch_reg
898     - jmp .Lwrcr3_\@
899     + jmp .Lwrcr3_pcid_\@
900    
901     .Lnoflush_\@:
902     movq \scratch_reg2, \scratch_reg
903     SET_NOFLUSH_BIT \scratch_reg
904    
905     +.Lwrcr3_pcid_\@:
906     + /* Flip the ASID to the user version */
907     + orq $(PTI_USER_PCID_MASK), \scratch_reg
908     +
909     .Lwrcr3_\@:
910     - /* Flip the PGD and ASID to the user version */
911     - orq $(PTI_SWITCH_MASK), \scratch_reg
912     + /* Flip the PGD to the user version */
913     + orq $(PTI_USER_PGTABLE_MASK), \scratch_reg
914     mov \scratch_reg, %cr3
915     .Lend_\@:
916     .endm
917     @@ -263,17 +270,12 @@ For 32-bit we have the following conventions - kernel is built with
918     movq %cr3, \scratch_reg
919     movq \scratch_reg, \save_reg
920     /*
921     - * Is the "switch mask" all zero? That means that both of
922     - * these are zero:
923     - *
924     - * 1. The user/kernel PCID bit, and
925     - * 2. The user/kernel "bit" that points CR3 to the
926     - * bottom half of the 8k PGD
927     - *
928     - * That indicates a kernel CR3 value, not a user CR3.
929     + * Test the user pagetable bit. If set, then the user page tables
930     + * are active. If clear CR3 already has the kernel page table
931     + * active.
932     */
933     - testq $(PTI_SWITCH_MASK), \scratch_reg
934     - jz .Ldone_\@
935     + bt $PTI_USER_PGTABLE_BIT, \scratch_reg
936     + jnc .Ldone_\@
937    
938     ADJUST_KERNEL_CR3 \scratch_reg
939     movq \scratch_reg, %cr3
940     @@ -290,7 +292,7 @@ For 32-bit we have the following conventions - kernel is built with
941     * KERNEL pages can always resume with NOFLUSH as we do
942     * explicit flushes.
943     */
944     - bt $X86_CR3_PTI_SWITCH_BIT, \save_reg
945     + bt $PTI_USER_PGTABLE_BIT, \save_reg
946     jnc .Lnoflush_\@
947    
948     /*
949     diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
950     index ace8f321a5a1..a1f28a54f23a 100644
951     --- a/arch/x86/entry/entry_32.S
952     +++ b/arch/x86/entry/entry_32.S
953     @@ -44,6 +44,7 @@
954     #include <asm/asm.h>
955     #include <asm/smap.h>
956     #include <asm/frame.h>
957     +#include <asm/nospec-branch.h>
958    
959     .section .entry.text, "ax"
960    
961     @@ -290,7 +291,7 @@ ENTRY(ret_from_fork)
962    
963     /* kernel thread */
964     1: movl %edi, %eax
965     - call *%ebx
966     + CALL_NOSPEC %ebx
967     /*
968     * A kernel thread is allowed to return here after successfully
969     * calling do_execve(). Exit to userspace to complete the execve()
970     @@ -919,7 +920,7 @@ common_exception:
971     movl %ecx, %es
972     TRACE_IRQS_OFF
973     movl %esp, %eax # pt_regs pointer
974     - call *%edi
975     + CALL_NOSPEC %edi
976     jmp ret_from_exception
977     END(common_exception)
978    
979     diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
980     index dd696b966e58..f5fda5f26e34 100644
981     --- a/arch/x86/entry/entry_64.S
982     +++ b/arch/x86/entry/entry_64.S
983     @@ -37,6 +37,7 @@
984     #include <asm/pgtable_types.h>
985     #include <asm/export.h>
986     #include <asm/frame.h>
987     +#include <asm/nospec-branch.h>
988     #include <linux/err.h>
989    
990     #include "calling.h"
991     @@ -187,7 +188,7 @@ ENTRY(entry_SYSCALL_64_trampoline)
992     */
993     pushq %rdi
994     movq $entry_SYSCALL_64_stage2, %rdi
995     - jmp *%rdi
996     + JMP_NOSPEC %rdi
997     END(entry_SYSCALL_64_trampoline)
998    
999     .popsection
1000     @@ -266,7 +267,12 @@ entry_SYSCALL_64_fastpath:
1001     * It might end up jumping to the slow path. If it jumps, RAX
1002     * and all argument registers are clobbered.
1003     */
1004     +#ifdef CONFIG_RETPOLINE
1005     + movq sys_call_table(, %rax, 8), %rax
1006     + call __x86_indirect_thunk_rax
1007     +#else
1008     call *sys_call_table(, %rax, 8)
1009     +#endif
1010     .Lentry_SYSCALL_64_after_fastpath_call:
1011    
1012     movq %rax, RAX(%rsp)
1013     @@ -438,7 +444,7 @@ ENTRY(stub_ptregs_64)
1014     jmp entry_SYSCALL64_slow_path
1015    
1016     1:
1017     - jmp *%rax /* Called from C */
1018     + JMP_NOSPEC %rax /* Called from C */
1019     END(stub_ptregs_64)
1020    
1021     .macro ptregs_stub func
1022     @@ -517,7 +523,7 @@ ENTRY(ret_from_fork)
1023     1:
1024     /* kernel thread */
1025     movq %r12, %rdi
1026     - call *%rbx
1027     + CALL_NOSPEC %rbx
1028     /*
1029     * A kernel thread is allowed to return here after successfully
1030     * calling do_execve(). Exit to userspace to complete the execve()
1031     diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
1032     index 141e07b06216..24ffa1e88cf9 100644
1033     --- a/arch/x86/events/intel/bts.c
1034     +++ b/arch/x86/events/intel/bts.c
1035     @@ -582,6 +582,24 @@ static __init int bts_init(void)
1036     if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
1037     return -ENODEV;
1038    
1039     + if (boot_cpu_has(X86_FEATURE_PTI)) {
1040     + /*
1041     + * BTS hardware writes through a virtual memory map we must
1042     + * either use the kernel physical map, or the user mapping of
1043     + * the AUX buffer.
1044     + *
1045     + * However, since this driver supports per-CPU and per-task inherit
1046     + * we cannot use the user mapping since it will not be availble
1047     + * if we're not running the owning process.
1048     + *
1049     + * With PTI we can't use the kernal map either, because its not
1050     + * there when we run userspace.
1051     + *
1052     + * For now, disable this driver when using PTI.
1053     + */
1054     + return -ENODEV;
1055     + }
1056     +
1057     bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
1058     PERF_PMU_CAP_EXCLUSIVE;
1059     bts_pmu.task_ctx_nr = perf_sw_context;
1060     diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
1061     index ff700d81e91e..0927cdc4f946 100644
1062     --- a/arch/x86/include/asm/asm-prototypes.h
1063     +++ b/arch/x86/include/asm/asm-prototypes.h
1064     @@ -11,7 +11,32 @@
1065     #include <asm/pgtable.h>
1066     #include <asm/special_insns.h>
1067     #include <asm/preempt.h>
1068     +#include <asm/asm.h>
1069    
1070     #ifndef CONFIG_X86_CMPXCHG64
1071     extern void cmpxchg8b_emu(void);
1072     #endif
1073     +
1074     +#ifdef CONFIG_RETPOLINE
1075     +#ifdef CONFIG_X86_32
1076     +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
1077     +#else
1078     +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
1079     +INDIRECT_THUNK(8)
1080     +INDIRECT_THUNK(9)
1081     +INDIRECT_THUNK(10)
1082     +INDIRECT_THUNK(11)
1083     +INDIRECT_THUNK(12)
1084     +INDIRECT_THUNK(13)
1085     +INDIRECT_THUNK(14)
1086     +INDIRECT_THUNK(15)
1087     +#endif
1088     +INDIRECT_THUNK(ax)
1089     +INDIRECT_THUNK(bx)
1090     +INDIRECT_THUNK(cx)
1091     +INDIRECT_THUNK(dx)
1092     +INDIRECT_THUNK(si)
1093     +INDIRECT_THUNK(di)
1094     +INDIRECT_THUNK(bp)
1095     +INDIRECT_THUNK(sp)
1096     +#endif /* CONFIG_RETPOLINE */
1097     diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
1098     index 21ac898df2d8..f275447862f4 100644
1099     --- a/arch/x86/include/asm/cpufeatures.h
1100     +++ b/arch/x86/include/asm/cpufeatures.h
1101     @@ -203,6 +203,8 @@
1102     #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
1103     #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
1104     #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
1105     +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
1106     +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
1107     #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
1108     #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
1109     #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
1110     @@ -342,5 +344,7 @@
1111     #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
1112     #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
1113     #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
1114     +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
1115     +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
1116    
1117     #endif /* _ASM_X86_CPUFEATURES_H */
1118     diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
1119     index 581bb54dd464..5119e4b555cc 100644
1120     --- a/arch/x86/include/asm/mshyperv.h
1121     +++ b/arch/x86/include/asm/mshyperv.h
1122     @@ -7,6 +7,7 @@
1123     #include <linux/nmi.h>
1124     #include <asm/io.h>
1125     #include <asm/hyperv.h>
1126     +#include <asm/nospec-branch.h>
1127    
1128     /*
1129     * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
1130     @@ -186,10 +187,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
1131     return U64_MAX;
1132    
1133     __asm__ __volatile__("mov %4, %%r8\n"
1134     - "call *%5"
1135     + CALL_NOSPEC
1136     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
1137     "+c" (control), "+d" (input_address)
1138     - : "r" (output_address), "m" (hv_hypercall_pg)
1139     + : "r" (output_address),
1140     + THUNK_TARGET(hv_hypercall_pg)
1141     : "cc", "memory", "r8", "r9", "r10", "r11");
1142     #else
1143     u32 input_address_hi = upper_32_bits(input_address);
1144     @@ -200,13 +202,13 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
1145     if (!hv_hypercall_pg)
1146     return U64_MAX;
1147    
1148     - __asm__ __volatile__("call *%7"
1149     + __asm__ __volatile__(CALL_NOSPEC
1150     : "=A" (hv_status),
1151     "+c" (input_address_lo), ASM_CALL_CONSTRAINT
1152     : "A" (control),
1153     "b" (input_address_hi),
1154     "D"(output_address_hi), "S"(output_address_lo),
1155     - "m" (hv_hypercall_pg)
1156     + THUNK_TARGET(hv_hypercall_pg)
1157     : "cc", "memory");
1158     #endif /* !x86_64 */
1159     return hv_status;
1160     @@ -227,10 +229,10 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
1161    
1162     #ifdef CONFIG_X86_64
1163     {
1164     - __asm__ __volatile__("call *%4"
1165     + __asm__ __volatile__(CALL_NOSPEC
1166     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
1167     "+c" (control), "+d" (input1)
1168     - : "m" (hv_hypercall_pg)
1169     + : THUNK_TARGET(hv_hypercall_pg)
1170     : "cc", "r8", "r9", "r10", "r11");
1171     }
1172     #else
1173     @@ -238,13 +240,13 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
1174     u32 input1_hi = upper_32_bits(input1);
1175     u32 input1_lo = lower_32_bits(input1);
1176    
1177     - __asm__ __volatile__ ("call *%5"
1178     + __asm__ __volatile__ (CALL_NOSPEC
1179     : "=A"(hv_status),
1180     "+c"(input1_lo),
1181     ASM_CALL_CONSTRAINT
1182     : "A" (control),
1183     "b" (input1_hi),
1184     - "m" (hv_hypercall_pg)
1185     + THUNK_TARGET(hv_hypercall_pg)
1186     : "cc", "edi", "esi");
1187     }
1188     #endif
1189     diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
1190     index ab022618a50a..fa11fb1fa570 100644
1191     --- a/arch/x86/include/asm/msr-index.h
1192     +++ b/arch/x86/include/asm/msr-index.h
1193     @@ -352,6 +352,9 @@
1194     #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
1195     #define FAM10H_MMIO_CONF_BASE_SHIFT 20
1196     #define MSR_FAM10H_NODE_ID 0xc001100c
1197     +#define MSR_F10H_DECFG 0xc0011029
1198     +#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
1199     +#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
1200    
1201     /* K8 MSRs */
1202     #define MSR_K8_TOP_MEM1 0xc001001a
1203     diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
1204     new file mode 100644
1205     index 000000000000..402a11c803c3
1206     --- /dev/null
1207     +++ b/arch/x86/include/asm/nospec-branch.h
1208     @@ -0,0 +1,214 @@
1209     +/* SPDX-License-Identifier: GPL-2.0 */
1210     +
1211     +#ifndef __NOSPEC_BRANCH_H__
1212     +#define __NOSPEC_BRANCH_H__
1213     +
1214     +#include <asm/alternative.h>
1215     +#include <asm/alternative-asm.h>
1216     +#include <asm/cpufeatures.h>
1217     +
1218     +/*
1219     + * Fill the CPU return stack buffer.
1220     + *
1221     + * Each entry in the RSB, if used for a speculative 'ret', contains an
1222     + * infinite 'pause; jmp' loop to capture speculative execution.
1223     + *
1224     + * This is required in various cases for retpoline and IBRS-based
1225     + * mitigations for the Spectre variant 2 vulnerability. Sometimes to
1226     + * eliminate potentially bogus entries from the RSB, and sometimes
1227     + * purely to ensure that it doesn't get empty, which on some CPUs would
1228     + * allow predictions from other (unwanted!) sources to be used.
1229     + *
1230     + * We define a CPP macro such that it can be used from both .S files and
1231     + * inline assembly. It's possible to do a .macro and then include that
1232     + * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
1233     + */
1234     +
1235     +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
1236     +#define RSB_FILL_LOOPS 16 /* To avoid underflow */
1237     +
1238     +/*
1239     + * Google experimented with loop-unrolling and this turned out to be
1240     + * the optimal version — two calls, each with their own speculation
1241     + * trap should their return address end up getting used, in a loop.
1242     + */
1243     +#define __FILL_RETURN_BUFFER(reg, nr, sp) \
1244     + mov $(nr/2), reg; \
1245     +771: \
1246     + call 772f; \
1247     +773: /* speculation trap */ \
1248     + pause; \
1249     + jmp 773b; \
1250     +772: \
1251     + call 774f; \
1252     +775: /* speculation trap */ \
1253     + pause; \
1254     + jmp 775b; \
1255     +774: \
1256     + dec reg; \
1257     + jnz 771b; \
1258     + add $(BITS_PER_LONG/8) * nr, sp;
1259     +
1260     +#ifdef __ASSEMBLY__
1261     +
1262     +/*
1263     + * This should be used immediately before a retpoline alternative. It tells
1264     + * objtool where the retpolines are so that it can make sense of the control
1265     + * flow by just reading the original instruction(s) and ignoring the
1266     + * alternatives.
1267     + */
1268     +.macro ANNOTATE_NOSPEC_ALTERNATIVE
1269     + .Lannotate_\@:
1270     + .pushsection .discard.nospec
1271     + .long .Lannotate_\@ - .
1272     + .popsection
1273     +.endm
1274     +
1275     +/*
1276     + * These are the bare retpoline primitives for indirect jmp and call.
1277     + * Do not use these directly; they only exist to make the ALTERNATIVE
1278     + * invocation below less ugly.
1279     + */
1280     +.macro RETPOLINE_JMP reg:req
1281     + call .Ldo_rop_\@
1282     +.Lspec_trap_\@:
1283     + pause
1284     + jmp .Lspec_trap_\@
1285     +.Ldo_rop_\@:
1286     + mov \reg, (%_ASM_SP)
1287     + ret
1288     +.endm
1289     +
1290     +/*
1291     + * This is a wrapper around RETPOLINE_JMP so the called function in reg
1292     + * returns to the instruction after the macro.
1293     + */
1294     +.macro RETPOLINE_CALL reg:req
1295     + jmp .Ldo_call_\@
1296     +.Ldo_retpoline_jmp_\@:
1297     + RETPOLINE_JMP \reg
1298     +.Ldo_call_\@:
1299     + call .Ldo_retpoline_jmp_\@
1300     +.endm
1301     +
1302     +/*
1303     + * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
1304     + * indirect jmp/call which may be susceptible to the Spectre variant 2
1305     + * attack.
1306     + */
1307     +.macro JMP_NOSPEC reg:req
1308     +#ifdef CONFIG_RETPOLINE
1309     + ANNOTATE_NOSPEC_ALTERNATIVE
1310     + ALTERNATIVE_2 __stringify(jmp *\reg), \
1311     + __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
1312     + __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
1313     +#else
1314     + jmp *\reg
1315     +#endif
1316     +.endm
1317     +
1318     +.macro CALL_NOSPEC reg:req
1319     +#ifdef CONFIG_RETPOLINE
1320     + ANNOTATE_NOSPEC_ALTERNATIVE
1321     + ALTERNATIVE_2 __stringify(call *\reg), \
1322     + __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
1323     + __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
1324     +#else
1325     + call *\reg
1326     +#endif
1327     +.endm
1328     +
1329     + /*
1330     + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
1331     + * monstrosity above, manually.
1332     + */
1333     +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
1334     +#ifdef CONFIG_RETPOLINE
1335     + ANNOTATE_NOSPEC_ALTERNATIVE
1336     + ALTERNATIVE "jmp .Lskip_rsb_\@", \
1337     + __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
1338     + \ftr
1339     +.Lskip_rsb_\@:
1340     +#endif
1341     +.endm
1342     +
1343     +#else /* __ASSEMBLY__ */
1344     +
1345     +#define ANNOTATE_NOSPEC_ALTERNATIVE \
1346     + "999:\n\t" \
1347     + ".pushsection .discard.nospec\n\t" \
1348     + ".long 999b - .\n\t" \
1349     + ".popsection\n\t"
1350     +
1351     +#if defined(CONFIG_X86_64) && defined(RETPOLINE)
1352     +
1353     +/*
1354     + * Since the inline asm uses the %V modifier which is only in newer GCC,
1355     + * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
1356     + */
1357     +# define CALL_NOSPEC \
1358     + ANNOTATE_NOSPEC_ALTERNATIVE \
1359     + ALTERNATIVE( \
1360     + "call *%[thunk_target]\n", \
1361     + "call __x86_indirect_thunk_%V[thunk_target]\n", \
1362     + X86_FEATURE_RETPOLINE)
1363     +# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
1364     +
1365     +#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
1366     +/*
1367     + * For i386 we use the original ret-equivalent retpoline, because
1368     + * otherwise we'll run out of registers. We don't care about CET
1369     + * here, anyway.
1370     + */
1371     +# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \
1372     + " jmp 904f;\n" \
1373     + " .align 16\n" \
1374     + "901: call 903f;\n" \
1375     + "902: pause;\n" \
1376     + " jmp 902b;\n" \
1377     + " .align 16\n" \
1378     + "903: addl $4, %%esp;\n" \
1379     + " pushl %[thunk_target];\n" \
1380     + " ret;\n" \
1381     + " .align 16\n" \
1382     + "904: call 901b;\n", \
1383     + X86_FEATURE_RETPOLINE)
1384     +
1385     +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
1386     +#else /* No retpoline for C / inline asm */
1387     +# define CALL_NOSPEC "call *%[thunk_target]\n"
1388     +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
1389     +#endif
1390     +
1391     +/* The Spectre V2 mitigation variants */
1392     +enum spectre_v2_mitigation {
1393     + SPECTRE_V2_NONE,
1394     + SPECTRE_V2_RETPOLINE_MINIMAL,
1395     + SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
1396     + SPECTRE_V2_RETPOLINE_GENERIC,
1397     + SPECTRE_V2_RETPOLINE_AMD,
1398     + SPECTRE_V2_IBRS,
1399     +};
1400     +
1401     +/*
1402     + * On VMEXIT we must ensure that no RSB predictions learned in the guest
1403     + * can be followed in the host, by overwriting the RSB completely. Both
1404     + * retpoline and IBRS mitigations for Spectre v2 need this; only on future
1405     + * CPUs with IBRS_ATT *might* it be avoided.
1406     + */
1407     +static inline void vmexit_fill_RSB(void)
1408     +{
1409     +#ifdef CONFIG_RETPOLINE
1410     + unsigned long loops = RSB_CLEAR_LOOPS / 2;
1411     +
1412     + asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
1413     + ALTERNATIVE("jmp 910f",
1414     + __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
1415     + X86_FEATURE_RETPOLINE)
1416     + "910:"
1417     + : "=&r" (loops), ASM_CALL_CONSTRAINT
1418     + : "r" (loops) : "memory" );
1419     +#endif
1420     +}
1421     +#endif /* __ASSEMBLY__ */
1422     +#endif /* __NOSPEC_BRANCH_H__ */
1423     diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
1424     index 6a60fea90b9d..625a52a5594f 100644
1425     --- a/arch/x86/include/asm/processor-flags.h
1426     +++ b/arch/x86/include/asm/processor-flags.h
1427     @@ -40,7 +40,7 @@
1428     #define CR3_NOFLUSH BIT_ULL(63)
1429    
1430     #ifdef CONFIG_PAGE_TABLE_ISOLATION
1431     -# define X86_CR3_PTI_SWITCH_BIT 11
1432     +# define X86_CR3_PTI_PCID_USER_BIT 11
1433     #endif
1434    
1435     #else
1436     diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
1437     index f9b48ce152eb..3effd3c994af 100644
1438     --- a/arch/x86/include/asm/tlbflush.h
1439     +++ b/arch/x86/include/asm/tlbflush.h
1440     @@ -81,13 +81,13 @@ static inline u16 kern_pcid(u16 asid)
1441     * Make sure that the dynamic ASID space does not confict with the
1442     * bit we are using to switch between user and kernel ASIDs.
1443     */
1444     - BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT));
1445     + BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
1446    
1447     /*
1448     * The ASID being passed in here should have respected the
1449     * MAX_ASID_AVAILABLE and thus never have the switch bit set.
1450     */
1451     - VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT));
1452     + VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));
1453     #endif
1454     /*
1455     * The dynamically-assigned ASIDs that get passed in are small
1456     @@ -112,7 +112,7 @@ static inline u16 user_pcid(u16 asid)
1457     {
1458     u16 ret = kern_pcid(asid);
1459     #ifdef CONFIG_PAGE_TABLE_ISOLATION
1460     - ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
1461     + ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
1462     #endif
1463     return ret;
1464     }
1465     diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
1466     index 7cb282e9e587..bfd882617613 100644
1467     --- a/arch/x86/include/asm/xen/hypercall.h
1468     +++ b/arch/x86/include/asm/xen/hypercall.h
1469     @@ -44,6 +44,7 @@
1470     #include <asm/page.h>
1471     #include <asm/pgtable.h>
1472     #include <asm/smap.h>
1473     +#include <asm/nospec-branch.h>
1474    
1475     #include <xen/interface/xen.h>
1476     #include <xen/interface/sched.h>
1477     @@ -217,9 +218,9 @@ privcmd_call(unsigned call,
1478     __HYPERCALL_5ARG(a1, a2, a3, a4, a5);
1479    
1480     stac();
1481     - asm volatile("call *%[call]"
1482     + asm volatile(CALL_NOSPEC
1483     : __HYPERCALL_5PARAM
1484     - : [call] "a" (&hypercall_page[call])
1485     + : [thunk_target] "a" (&hypercall_page[call])
1486     : __HYPERCALL_CLOBBER5);
1487     clac();
1488    
1489     diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
1490     index 079535e53e2a..9c2a002d9297 100644
1491     --- a/arch/x86/kernel/acpi/boot.c
1492     +++ b/arch/x86/kernel/acpi/boot.c
1493     @@ -342,13 +342,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
1494     #ifdef CONFIG_X86_IO_APIC
1495     #define MP_ISA_BUS 0
1496    
1497     +static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
1498     + u8 trigger, u32 gsi);
1499     +
1500     static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
1501     u32 gsi)
1502     {
1503     - int ioapic;
1504     - int pin;
1505     - struct mpc_intsrc mp_irq;
1506     -
1507     /*
1508     * Check bus_irq boundary.
1509     */
1510     @@ -357,14 +356,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
1511     return;
1512     }
1513    
1514     - /*
1515     - * Convert 'gsi' to 'ioapic.pin'.
1516     - */
1517     - ioapic = mp_find_ioapic(gsi);
1518     - if (ioapic < 0)
1519     - return;
1520     - pin = mp_find_ioapic_pin(ioapic, gsi);
1521     -
1522     /*
1523     * TBD: This check is for faulty timer entries, where the override
1524     * erroneously sets the trigger to level, resulting in a HUGE
1525     @@ -373,16 +364,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
1526     if ((bus_irq == 0) && (trigger == 3))
1527     trigger = 1;
1528    
1529     - mp_irq.type = MP_INTSRC;
1530     - mp_irq.irqtype = mp_INT;
1531     - mp_irq.irqflag = (trigger << 2) | polarity;
1532     - mp_irq.srcbus = MP_ISA_BUS;
1533     - mp_irq.srcbusirq = bus_irq; /* IRQ */
1534     - mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
1535     - mp_irq.dstirq = pin; /* INTIN# */
1536     -
1537     - mp_save_irq(&mp_irq);
1538     -
1539     + if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0)
1540     + return;
1541     /*
1542     * Reset default identity mapping if gsi is also an legacy IRQ,
1543     * otherwise there will be more than one entry with the same GSI
1544     @@ -429,6 +412,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
1545     return 0;
1546     }
1547    
1548     +static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
1549     + u8 trigger, u32 gsi)
1550     +{
1551     + struct mpc_intsrc mp_irq;
1552     + int ioapic, pin;
1553     +
1554     + /* Convert 'gsi' to 'ioapic.pin'(INTIN#) */
1555     + ioapic = mp_find_ioapic(gsi);
1556     + if (ioapic < 0) {
1557     + pr_warn("Failed to find ioapic for gsi : %u\n", gsi);
1558     + return ioapic;
1559     + }
1560     +
1561     + pin = mp_find_ioapic_pin(ioapic, gsi);
1562     +
1563     + mp_irq.type = MP_INTSRC;
1564     + mp_irq.irqtype = mp_INT;
1565     + mp_irq.irqflag = (trigger << 2) | polarity;
1566     + mp_irq.srcbus = MP_ISA_BUS;
1567     + mp_irq.srcbusirq = bus_irq;
1568     + mp_irq.dstapic = mpc_ioapic_id(ioapic);
1569     + mp_irq.dstirq = pin;
1570     +
1571     + mp_save_irq(&mp_irq);
1572     +
1573     + return 0;
1574     +}
1575     +
1576     static int __init
1577     acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
1578     {
1579     @@ -473,7 +484,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
1580     if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
1581     polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
1582    
1583     - mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
1584     + if (bus_irq < NR_IRQS_LEGACY)
1585     + mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
1586     + else
1587     + mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi);
1588     +
1589     acpi_penalize_sci_irq(bus_irq, trigger, polarity);
1590    
1591     /*
1592     diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
1593     index 3344d3382e91..e0b97e4d1db5 100644
1594     --- a/arch/x86/kernel/alternative.c
1595     +++ b/arch/x86/kernel/alternative.c
1596     @@ -344,9 +344,12 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
1597     static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
1598     {
1599     unsigned long flags;
1600     + int i;
1601    
1602     - if (instr[0] != 0x90)
1603     - return;
1604     + for (i = 0; i < a->padlen; i++) {
1605     + if (instr[i] != 0x90)
1606     + return;
1607     + }
1608    
1609     local_irq_save(flags);
1610     add_nops(instr + (a->instrlen - a->padlen), a->padlen);
1611     diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
1612     index bcb75dc97d44..ea831c858195 100644
1613     --- a/arch/x86/kernel/cpu/amd.c
1614     +++ b/arch/x86/kernel/cpu/amd.c
1615     @@ -829,8 +829,32 @@ static void init_amd(struct cpuinfo_x86 *c)
1616     set_cpu_cap(c, X86_FEATURE_K8);
1617    
1618     if (cpu_has(c, X86_FEATURE_XMM2)) {
1619     - /* MFENCE stops RDTSC speculation */
1620     - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
1621     + unsigned long long val;
1622     + int ret;
1623     +
1624     + /*
1625     + * A serializing LFENCE has less overhead than MFENCE, so
1626     + * use it for execution serialization. On families which
1627     + * don't have that MSR, LFENCE is already serializing.
1628     + * msr_set_bit() uses the safe accessors, too, even if the MSR
1629     + * is not present.
1630     + */
1631     + msr_set_bit(MSR_F10H_DECFG,
1632     + MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
1633     +
1634     + /*
1635     + * Verify that the MSR write was successful (could be running
1636     + * under a hypervisor) and only then assume that LFENCE is
1637     + * serializing.
1638     + */
1639     + ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
1640     + if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
1641     + /* A serializing LFENCE stops RDTSC speculation */
1642     + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
1643     + } else {
1644     + /* MFENCE stops RDTSC speculation */
1645     + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
1646     + }
1647     }
1648    
1649     /*
1650     diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
1651     index ba0b2424c9b0..e4dc26185aa7 100644
1652     --- a/arch/x86/kernel/cpu/bugs.c
1653     +++ b/arch/x86/kernel/cpu/bugs.c
1654     @@ -10,6 +10,10 @@
1655     */
1656     #include <linux/init.h>
1657     #include <linux/utsname.h>
1658     +#include <linux/cpu.h>
1659     +
1660     +#include <asm/nospec-branch.h>
1661     +#include <asm/cmdline.h>
1662     #include <asm/bugs.h>
1663     #include <asm/processor.h>
1664     #include <asm/processor-flags.h>
1665     @@ -20,6 +24,8 @@
1666     #include <asm/pgtable.h>
1667     #include <asm/set_memory.h>
1668    
1669     +static void __init spectre_v2_select_mitigation(void);
1670     +
1671     void __init check_bugs(void)
1672     {
1673     identify_boot_cpu();
1674     @@ -29,6 +35,9 @@ void __init check_bugs(void)
1675     print_cpu_info(&boot_cpu_data);
1676     }
1677    
1678     + /* Select the proper spectre mitigation before patching alternatives */
1679     + spectre_v2_select_mitigation();
1680     +
1681     #ifdef CONFIG_X86_32
1682     /*
1683     * Check whether we are able to run this kernel safely on SMP.
1684     @@ -60,3 +69,179 @@ void __init check_bugs(void)
1685     set_memory_4k((unsigned long)__va(0), 1);
1686     #endif
1687     }
1688     +
1689     +/* The kernel command line selection */
1690     +enum spectre_v2_mitigation_cmd {
1691     + SPECTRE_V2_CMD_NONE,
1692     + SPECTRE_V2_CMD_AUTO,
1693     + SPECTRE_V2_CMD_FORCE,
1694     + SPECTRE_V2_CMD_RETPOLINE,
1695     + SPECTRE_V2_CMD_RETPOLINE_GENERIC,
1696     + SPECTRE_V2_CMD_RETPOLINE_AMD,
1697     +};
1698     +
1699     +static const char *spectre_v2_strings[] = {
1700     + [SPECTRE_V2_NONE] = "Vulnerable",
1701     + [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
1702     + [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
1703     + [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
1704     + [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
1705     +};
1706     +
1707     +#undef pr_fmt
1708     +#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt
1709     +
1710     +static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
1711     +
1712     +static void __init spec2_print_if_insecure(const char *reason)
1713     +{
1714     + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
1715     + pr_info("%s\n", reason);
1716     +}
1717     +
1718     +static void __init spec2_print_if_secure(const char *reason)
1719     +{
1720     + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
1721     + pr_info("%s\n", reason);
1722     +}
1723     +
1724     +static inline bool retp_compiler(void)
1725     +{
1726     + return __is_defined(RETPOLINE);
1727     +}
1728     +
1729     +static inline bool match_option(const char *arg, int arglen, const char *opt)
1730     +{
1731     + int len = strlen(opt);
1732     +
1733     + return len == arglen && !strncmp(arg, opt, len);
1734     +}
1735     +
1736     +static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
1737     +{
1738     + char arg[20];
1739     + int ret;
1740     +
1741     + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
1742     + sizeof(arg));
1743     + if (ret > 0) {
1744     + if (match_option(arg, ret, "off")) {
1745     + goto disable;
1746     + } else if (match_option(arg, ret, "on")) {
1747     + spec2_print_if_secure("force enabled on command line.");
1748     + return SPECTRE_V2_CMD_FORCE;
1749     + } else if (match_option(arg, ret, "retpoline")) {
1750     + spec2_print_if_insecure("retpoline selected on command line.");
1751     + return SPECTRE_V2_CMD_RETPOLINE;
1752     + } else if (match_option(arg, ret, "retpoline,amd")) {
1753     + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
1754     + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
1755     + return SPECTRE_V2_CMD_AUTO;
1756     + }
1757     + spec2_print_if_insecure("AMD retpoline selected on command line.");
1758     + return SPECTRE_V2_CMD_RETPOLINE_AMD;
1759     + } else if (match_option(arg, ret, "retpoline,generic")) {
1760     + spec2_print_if_insecure("generic retpoline selected on command line.");
1761     + return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
1762     + } else if (match_option(arg, ret, "auto")) {
1763     + return SPECTRE_V2_CMD_AUTO;
1764     + }
1765     + }
1766     +
1767     + if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
1768     + return SPECTRE_V2_CMD_AUTO;
1769     +disable:
1770     + spec2_print_if_insecure("disabled on command line.");
1771     + return SPECTRE_V2_CMD_NONE;
1772     +}
1773     +
1774     +static void __init spectre_v2_select_mitigation(void)
1775     +{
1776     + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
1777     + enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
1778     +
1779     + /*
1780     + * If the CPU is not affected and the command line mode is NONE or AUTO
1781     + * then nothing to do.
1782     + */
1783     + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
1784     + (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
1785     + return;
1786     +
1787     + switch (cmd) {
1788     + case SPECTRE_V2_CMD_NONE:
1789     + return;
1790     +
1791     + case SPECTRE_V2_CMD_FORCE:
1792     + /* FALLTRHU */
1793     + case SPECTRE_V2_CMD_AUTO:
1794     + goto retpoline_auto;
1795     +
1796     + case SPECTRE_V2_CMD_RETPOLINE_AMD:
1797     + if (IS_ENABLED(CONFIG_RETPOLINE))
1798     + goto retpoline_amd;
1799     + break;
1800     + case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
1801     + if (IS_ENABLED(CONFIG_RETPOLINE))
1802     + goto retpoline_generic;
1803     + break;
1804     + case SPECTRE_V2_CMD_RETPOLINE:
1805     + if (IS_ENABLED(CONFIG_RETPOLINE))
1806     + goto retpoline_auto;
1807     + break;
1808     + }
1809     + pr_err("kernel not compiled with retpoline; no mitigation available!");
1810     + return;
1811     +
1812     +retpoline_auto:
1813     + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
1814     + retpoline_amd:
1815     + if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
1816     + pr_err("LFENCE not serializing. Switching to generic retpoline\n");
1817     + goto retpoline_generic;
1818     + }
1819     + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
1820     + SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
1821     + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
1822     + setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
1823     + } else {
1824     + retpoline_generic:
1825     + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
1826     + SPECTRE_V2_RETPOLINE_MINIMAL;
1827     + setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
1828     + }
1829     +
1830     + spectre_v2_enabled = mode;
1831     + pr_info("%s\n", spectre_v2_strings[mode]);
1832     +}
1833     +
1834     +#undef pr_fmt
1835     +
1836     +#ifdef CONFIG_SYSFS
1837     +ssize_t cpu_show_meltdown(struct device *dev,
1838     + struct device_attribute *attr, char *buf)
1839     +{
1840     + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
1841     + return sprintf(buf, "Not affected\n");
1842     + if (boot_cpu_has(X86_FEATURE_PTI))
1843     + return sprintf(buf, "Mitigation: PTI\n");
1844     + return sprintf(buf, "Vulnerable\n");
1845     +}
1846     +
1847     +ssize_t cpu_show_spectre_v1(struct device *dev,
1848     + struct device_attribute *attr, char *buf)
1849     +{
1850     + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
1851     + return sprintf(buf, "Not affected\n");
1852     + return sprintf(buf, "Vulnerable\n");
1853     +}
1854     +
1855     +ssize_t cpu_show_spectre_v2(struct device *dev,
1856     + struct device_attribute *attr, char *buf)
1857     +{
1858     + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
1859     + return sprintf(buf, "Not affected\n");
1860     +
1861     + return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
1862     +}
1863     +#endif
1864     diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
1865     index 2d3bd2215e5b..372ba3fb400f 100644
1866     --- a/arch/x86/kernel/cpu/common.c
1867     +++ b/arch/x86/kernel/cpu/common.c
1868     @@ -902,6 +902,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
1869     if (c->x86_vendor != X86_VENDOR_AMD)
1870     setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
1871    
1872     + setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
1873     + setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
1874     +
1875     fpu__init_system(c);
1876    
1877     #ifdef CONFIG_X86_32
1878     diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
1879     index 8ccdca6d3f9e..d9e460fc7a3b 100644
1880     --- a/arch/x86/kernel/cpu/microcode/intel.c
1881     +++ b/arch/x86/kernel/cpu/microcode/intel.c
1882     @@ -910,8 +910,17 @@ static bool is_blacklisted(unsigned int cpu)
1883     {
1884     struct cpuinfo_x86 *c = &cpu_data(cpu);
1885    
1886     - if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
1887     - pr_err_once("late loading on model 79 is disabled.\n");
1888     + /*
1889     + * Late loading on model 79 with microcode revision less than 0x0b000021
1890     + * may result in a system hang. This behavior is documented in item
1891     + * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
1892     + */
1893     + if (c->x86 == 6 &&
1894     + c->x86_model == INTEL_FAM6_BROADWELL_X &&
1895     + c->x86_mask == 0x01 &&
1896     + c->microcode < 0x0b000021) {
1897     + pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
1898     + pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
1899     return true;
1900     }
1901    
1902     diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
1903     index b6c6468e10bc..4c8440de3355 100644
1904     --- a/arch/x86/kernel/ftrace_32.S
1905     +++ b/arch/x86/kernel/ftrace_32.S
1906     @@ -8,6 +8,7 @@
1907     #include <asm/segment.h>
1908     #include <asm/export.h>
1909     #include <asm/ftrace.h>
1910     +#include <asm/nospec-branch.h>
1911    
1912     #ifdef CC_USING_FENTRY
1913     # define function_hook __fentry__
1914     @@ -197,7 +198,8 @@ ftrace_stub:
1915     movl 0x4(%ebp), %edx
1916     subl $MCOUNT_INSN_SIZE, %eax
1917    
1918     - call *ftrace_trace_function
1919     + movl ftrace_trace_function, %ecx
1920     + CALL_NOSPEC %ecx
1921    
1922     popl %edx
1923     popl %ecx
1924     @@ -241,5 +243,5 @@ return_to_handler:
1925     movl %eax, %ecx
1926     popl %edx
1927     popl %eax
1928     - jmp *%ecx
1929     + JMP_NOSPEC %ecx
1930     #endif
1931     diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
1932     index c832291d948a..7cb8ba08beb9 100644
1933     --- a/arch/x86/kernel/ftrace_64.S
1934     +++ b/arch/x86/kernel/ftrace_64.S
1935     @@ -7,7 +7,7 @@
1936     #include <asm/ptrace.h>
1937     #include <asm/ftrace.h>
1938     #include <asm/export.h>
1939     -
1940     +#include <asm/nospec-branch.h>
1941    
1942     .code64
1943     .section .entry.text, "ax"
1944     @@ -286,8 +286,8 @@ trace:
1945     * ip and parent ip are used and the list function is called when
1946     * function tracing is enabled.
1947     */
1948     - call *ftrace_trace_function
1949     -
1950     + movq ftrace_trace_function, %r8
1951     + CALL_NOSPEC %r8
1952     restore_mcount_regs
1953    
1954     jmp fgraph_trace
1955     @@ -329,5 +329,5 @@ GLOBAL(return_to_handler)
1956     movq 8(%rsp), %rdx
1957     movq (%rsp), %rax
1958     addq $24, %rsp
1959     - jmp *%rdi
1960     + JMP_NOSPEC %rdi
1961     #endif
1962     diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
1963     index a83b3346a0e1..c1bdbd3d3232 100644
1964     --- a/arch/x86/kernel/irq_32.c
1965     +++ b/arch/x86/kernel/irq_32.c
1966     @@ -20,6 +20,7 @@
1967     #include <linux/mm.h>
1968    
1969     #include <asm/apic.h>
1970     +#include <asm/nospec-branch.h>
1971    
1972     #ifdef CONFIG_DEBUG_STACKOVERFLOW
1973    
1974     @@ -55,11 +56,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
1975     static void call_on_stack(void *func, void *stack)
1976     {
1977     asm volatile("xchgl %%ebx,%%esp \n"
1978     - "call *%%edi \n"
1979     + CALL_NOSPEC
1980     "movl %%ebx,%%esp \n"
1981     : "=b" (stack)
1982     : "0" (stack),
1983     - "D"(func)
1984     + [thunk_target] "D"(func)
1985     : "memory", "cc", "edx", "ecx", "eax");
1986     }
1987    
1988     @@ -95,11 +96,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
1989     call_on_stack(print_stack_overflow, isp);
1990    
1991     asm volatile("xchgl %%ebx,%%esp \n"
1992     - "call *%%edi \n"
1993     + CALL_NOSPEC
1994     "movl %%ebx,%%esp \n"
1995     : "=a" (arg1), "=b" (isp)
1996     : "0" (desc), "1" (isp),
1997     - "D" (desc->handle_irq)
1998     + [thunk_target] "D" (desc->handle_irq)
1999     : "memory", "cc", "ecx");
2000     return 1;
2001     }
2002     diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
2003     index a4eb27918ceb..a2486f444073 100644
2004     --- a/arch/x86/kernel/tboot.c
2005     +++ b/arch/x86/kernel/tboot.c
2006     @@ -138,6 +138,17 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
2007     return -1;
2008     set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
2009     pte_unmap(pte);
2010     +
2011     + /*
2012     + * PTI poisons low addresses in the kernel page tables in the
2013     + * name of making them unusable for userspace. To execute
2014     + * code at such a low address, the poison must be cleared.
2015     + *
2016     + * Note: 'pgd' actually gets set in p4d_alloc() _or_
2017     + * pud_alloc() depending on 4/5-level paging.
2018     + */
2019     + pgd->pgd &= ~_PAGE_NX;
2020     +
2021     return 0;
2022     }
2023    
2024     diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
2025     index 17fb6c6d939a..6a8284f72328 100644
2026     --- a/arch/x86/kvm/svm.c
2027     +++ b/arch/x86/kvm/svm.c
2028     @@ -45,6 +45,7 @@
2029     #include <asm/debugreg.h>
2030     #include <asm/kvm_para.h>
2031     #include <asm/irq_remapping.h>
2032     +#include <asm/nospec-branch.h>
2033    
2034     #include <asm/virtext.h>
2035     #include "trace.h"
2036     @@ -4964,6 +4965,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
2037     "mov %%r13, %c[r13](%[svm]) \n\t"
2038     "mov %%r14, %c[r14](%[svm]) \n\t"
2039     "mov %%r15, %c[r15](%[svm]) \n\t"
2040     +#endif
2041     + /*
2042     + * Clear host registers marked as clobbered to prevent
2043     + * speculative use.
2044     + */
2045     + "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
2046     + "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
2047     + "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
2048     + "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
2049     + "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
2050     +#ifdef CONFIG_X86_64
2051     + "xor %%r8, %%r8 \n\t"
2052     + "xor %%r9, %%r9 \n\t"
2053     + "xor %%r10, %%r10 \n\t"
2054     + "xor %%r11, %%r11 \n\t"
2055     + "xor %%r12, %%r12 \n\t"
2056     + "xor %%r13, %%r13 \n\t"
2057     + "xor %%r14, %%r14 \n\t"
2058     + "xor %%r15, %%r15 \n\t"
2059     #endif
2060     "pop %%" _ASM_BP
2061     :
2062     @@ -4994,6 +5014,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
2063     #endif
2064     );
2065    
2066     + /* Eliminate branch target predictions from guest mode */
2067     + vmexit_fill_RSB();
2068     +
2069     #ifdef CONFIG_X86_64
2070     wrmsrl(MSR_GS_BASE, svm->host.gs_base);
2071     #else
2072     diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
2073     index 47d9432756f3..ef16cf0f7cfd 100644
2074     --- a/arch/x86/kvm/vmx.c
2075     +++ b/arch/x86/kvm/vmx.c
2076     @@ -50,6 +50,7 @@
2077     #include <asm/apic.h>
2078     #include <asm/irq_remapping.h>
2079     #include <asm/mmu_context.h>
2080     +#include <asm/nospec-branch.h>
2081    
2082     #include "trace.h"
2083     #include "pmu.h"
2084     @@ -888,8 +889,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
2085     {
2086     BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
2087    
2088     - if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
2089     - vmcs_field_to_offset_table[field] == 0)
2090     + if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
2091     + return -ENOENT;
2092     +
2093     + /*
2094     + * FIXME: Mitigation for CVE-2017-5753. To be replaced with a
2095     + * generic mechanism.
2096     + */
2097     + asm("lfence");
2098     +
2099     + if (vmcs_field_to_offset_table[field] == 0)
2100     return -ENOENT;
2101    
2102     return vmcs_field_to_offset_table[field];
2103     @@ -9405,6 +9414,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
2104     /* Save guest registers, load host registers, keep flags */
2105     "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
2106     "pop %0 \n\t"
2107     + "setbe %c[fail](%0)\n\t"
2108     "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
2109     "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
2110     __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
2111     @@ -9421,12 +9431,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
2112     "mov %%r13, %c[r13](%0) \n\t"
2113     "mov %%r14, %c[r14](%0) \n\t"
2114     "mov %%r15, %c[r15](%0) \n\t"
2115     + "xor %%r8d, %%r8d \n\t"
2116     + "xor %%r9d, %%r9d \n\t"
2117     + "xor %%r10d, %%r10d \n\t"
2118     + "xor %%r11d, %%r11d \n\t"
2119     + "xor %%r12d, %%r12d \n\t"
2120     + "xor %%r13d, %%r13d \n\t"
2121     + "xor %%r14d, %%r14d \n\t"
2122     + "xor %%r15d, %%r15d \n\t"
2123     #endif
2124     "mov %%cr2, %%" _ASM_AX " \n\t"
2125     "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
2126    
2127     + "xor %%eax, %%eax \n\t"
2128     + "xor %%ebx, %%ebx \n\t"
2129     + "xor %%esi, %%esi \n\t"
2130     + "xor %%edi, %%edi \n\t"
2131     "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
2132     - "setbe %c[fail](%0) \n\t"
2133     ".pushsection .rodata \n\t"
2134     ".global vmx_return \n\t"
2135     "vmx_return: " _ASM_PTR " 2b \n\t"
2136     @@ -9463,6 +9484,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
2137     #endif
2138     );
2139    
2140     + /* Eliminate branch target predictions from guest mode */
2141     + vmexit_fill_RSB();
2142     +
2143     /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
2144     if (debugctlmsr)
2145     update_debugctlmsr(debugctlmsr);
2146     diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
2147     index 075619a92ce7..575c8953cc9a 100644
2148     --- a/arch/x86/kvm/x86.c
2149     +++ b/arch/x86/kvm/x86.c
2150     @@ -4362,7 +4362,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
2151     addr, n, v))
2152     && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
2153     break;
2154     - trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
2155     + trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
2156     handled += n;
2157     addr += n;
2158     len -= n;
2159     @@ -4621,7 +4621,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
2160     {
2161     if (vcpu->mmio_read_completed) {
2162     trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
2163     - vcpu->mmio_fragments[0].gpa, *(u64 *)val);
2164     + vcpu->mmio_fragments[0].gpa, val);
2165     vcpu->mmio_read_completed = 0;
2166     return 1;
2167     }
2168     @@ -4643,14 +4643,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
2169    
2170     static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
2171     {
2172     - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
2173     + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
2174     return vcpu_mmio_write(vcpu, gpa, bytes, val);
2175     }
2176    
2177     static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
2178     void *val, int bytes)
2179     {
2180     - trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
2181     + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
2182     return X86EMUL_IO_NEEDED;
2183     }
2184    
2185     diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
2186     index 457f681ef379..d435c89875c1 100644
2187     --- a/arch/x86/lib/Makefile
2188     +++ b/arch/x86/lib/Makefile
2189     @@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o
2190     lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
2191     lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
2192     lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
2193     +lib-$(CONFIG_RETPOLINE) += retpoline.o
2194    
2195     obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
2196    
2197     diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
2198     index 4d34bb548b41..46e71a74e612 100644
2199     --- a/arch/x86/lib/checksum_32.S
2200     +++ b/arch/x86/lib/checksum_32.S
2201     @@ -29,7 +29,8 @@
2202     #include <asm/errno.h>
2203     #include <asm/asm.h>
2204     #include <asm/export.h>
2205     -
2206     +#include <asm/nospec-branch.h>
2207     +
2208     /*
2209     * computes a partial checksum, e.g. for TCP/UDP fragments
2210     */
2211     @@ -156,7 +157,7 @@ ENTRY(csum_partial)
2212     negl %ebx
2213     lea 45f(%ebx,%ebx,2), %ebx
2214     testl %esi, %esi
2215     - jmp *%ebx
2216     + JMP_NOSPEC %ebx
2217    
2218     # Handle 2-byte-aligned regions
2219     20: addw (%esi), %ax
2220     @@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic)
2221     andl $-32,%edx
2222     lea 3f(%ebx,%ebx), %ebx
2223     testl %esi, %esi
2224     - jmp *%ebx
2225     + JMP_NOSPEC %ebx
2226     1: addl $64,%esi
2227     addl $64,%edi
2228     SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
2229     diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
2230     new file mode 100644
2231     index 000000000000..cb45c6cb465f
2232     --- /dev/null
2233     +++ b/arch/x86/lib/retpoline.S
2234     @@ -0,0 +1,48 @@
2235     +/* SPDX-License-Identifier: GPL-2.0 */
2236     +
2237     +#include <linux/stringify.h>
2238     +#include <linux/linkage.h>
2239     +#include <asm/dwarf2.h>
2240     +#include <asm/cpufeatures.h>
2241     +#include <asm/alternative-asm.h>
2242     +#include <asm/export.h>
2243     +#include <asm/nospec-branch.h>
2244     +
2245     +.macro THUNK reg
2246     + .section .text.__x86.indirect_thunk.\reg
2247     +
2248     +ENTRY(__x86_indirect_thunk_\reg)
2249     + CFI_STARTPROC
2250     + JMP_NOSPEC %\reg
2251     + CFI_ENDPROC
2252     +ENDPROC(__x86_indirect_thunk_\reg)
2253     +.endm
2254     +
2255     +/*
2256     + * Despite being an assembler file we can't just use .irp here
2257     + * because __KSYM_DEPS__ only uses the C preprocessor and would
2258     + * only see one instance of "__x86_indirect_thunk_\reg" rather
2259     + * than one per register with the correct names. So we do it
2260     + * the simple and nasty way...
2261     + */
2262     +#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
2263     +#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
2264     +
2265     +GENERATE_THUNK(_ASM_AX)
2266     +GENERATE_THUNK(_ASM_BX)
2267     +GENERATE_THUNK(_ASM_CX)
2268     +GENERATE_THUNK(_ASM_DX)
2269     +GENERATE_THUNK(_ASM_SI)
2270     +GENERATE_THUNK(_ASM_DI)
2271     +GENERATE_THUNK(_ASM_BP)
2272     +GENERATE_THUNK(_ASM_SP)
2273     +#ifdef CONFIG_64BIT
2274     +GENERATE_THUNK(r8)
2275     +GENERATE_THUNK(r9)
2276     +GENERATE_THUNK(r10)
2277     +GENERATE_THUNK(r11)
2278     +GENERATE_THUNK(r12)
2279     +GENERATE_THUNK(r13)
2280     +GENERATE_THUNK(r14)
2281     +GENERATE_THUNK(r15)
2282     +#endif
2283     diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
2284     index 43d4a4a29037..ce38f165489b 100644
2285     --- a/arch/x86/mm/pti.c
2286     +++ b/arch/x86/mm/pti.c
2287     @@ -149,7 +149,7 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
2288     *
2289     * Returns a pointer to a P4D on success, or NULL on failure.
2290     */
2291     -static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
2292     +static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
2293     {
2294     pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
2295     gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
2296     @@ -164,12 +164,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
2297     if (!new_p4d_page)
2298     return NULL;
2299    
2300     - if (pgd_none(*pgd)) {
2301     - set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
2302     - new_p4d_page = 0;
2303     - }
2304     - if (new_p4d_page)
2305     - free_page(new_p4d_page);
2306     + set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
2307     }
2308     BUILD_BUG_ON(pgd_large(*pgd) != 0);
2309    
2310     @@ -182,7 +177,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
2311     *
2312     * Returns a pointer to a PMD on success, or NULL on failure.
2313     */
2314     -static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
2315     +static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
2316     {
2317     gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
2318     p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
2319     @@ -194,12 +189,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
2320     if (!new_pud_page)
2321     return NULL;
2322    
2323     - if (p4d_none(*p4d)) {
2324     - set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
2325     - new_pud_page = 0;
2326     - }
2327     - if (new_pud_page)
2328     - free_page(new_pud_page);
2329     + set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
2330     }
2331    
2332     pud = pud_offset(p4d, address);
2333     @@ -213,12 +203,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
2334     if (!new_pmd_page)
2335     return NULL;
2336    
2337     - if (pud_none(*pud)) {
2338     - set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
2339     - new_pmd_page = 0;
2340     - }
2341     - if (new_pmd_page)
2342     - free_page(new_pmd_page);
2343     + set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
2344     }
2345    
2346     return pmd_offset(pud, address);
2347     @@ -251,12 +236,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
2348     if (!new_pte_page)
2349     return NULL;
2350    
2351     - if (pmd_none(*pmd)) {
2352     - set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
2353     - new_pte_page = 0;
2354     - }
2355     - if (new_pte_page)
2356     - free_page(new_pte_page);
2357     + set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
2358     }
2359    
2360     pte = pte_offset_kernel(pmd, address);
2361     diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
2362     index 39c4b35ac7a4..61975b6bcb1a 100644
2363     --- a/arch/x86/platform/efi/efi_64.c
2364     +++ b/arch/x86/platform/efi/efi_64.c
2365     @@ -134,7 +134,9 @@ pgd_t * __init efi_call_phys_prolog(void)
2366     pud[j] = *pud_offset(p4d_k, vaddr);
2367     }
2368     }
2369     + pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
2370     }
2371     +
2372     out:
2373     __flush_tlb_all();
2374    
2375     diff --git a/crypto/algapi.c b/crypto/algapi.c
2376     index aa699ff6c876..50eb828db767 100644
2377     --- a/crypto/algapi.c
2378     +++ b/crypto/algapi.c
2379     @@ -167,6 +167,18 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
2380    
2381     spawn->alg = NULL;
2382     spawns = &inst->alg.cra_users;
2383     +
2384     + /*
2385     + * We may encounter an unregistered instance here, since
2386     + * an instance's spawns are set up prior to the instance
2387     + * being registered. An unregistered instance will have
2388     + * NULL ->cra_users.next, since ->cra_users isn't
2389     + * properly initialized until registration. But an
2390     + * unregistered instance cannot have any users, so treat
2391     + * it the same as ->cra_users being empty.
2392     + */
2393     + if (spawns->next == NULL)
2394     + break;
2395     }
2396     } while ((spawns = crypto_more_spawns(alg, &stack, &top,
2397     &secondary_spawns)));
2398     diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
2399     index bdc87907d6a1..2415ad9f6dd4 100644
2400     --- a/drivers/base/Kconfig
2401     +++ b/drivers/base/Kconfig
2402     @@ -236,6 +236,9 @@ config GENERIC_CPU_DEVICES
2403     config GENERIC_CPU_AUTOPROBE
2404     bool
2405    
2406     +config GENERIC_CPU_VULNERABILITIES
2407     + bool
2408     +
2409     config SOC_BUS
2410     bool
2411     select GLOB
2412     diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
2413     index 321cd7b4d817..825964efda1d 100644
2414     --- a/drivers/base/cpu.c
2415     +++ b/drivers/base/cpu.c
2416     @@ -501,10 +501,58 @@ static void __init cpu_dev_register_generic(void)
2417     #endif
2418     }
2419    
2420     +#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
2421     +
2422     +ssize_t __weak cpu_show_meltdown(struct device *dev,
2423     + struct device_attribute *attr, char *buf)
2424     +{
2425     + return sprintf(buf, "Not affected\n");
2426     +}
2427     +
2428     +ssize_t __weak cpu_show_spectre_v1(struct device *dev,
2429     + struct device_attribute *attr, char *buf)
2430     +{
2431     + return sprintf(buf, "Not affected\n");
2432     +}
2433     +
2434     +ssize_t __weak cpu_show_spectre_v2(struct device *dev,
2435     + struct device_attribute *attr, char *buf)
2436     +{
2437     + return sprintf(buf, "Not affected\n");
2438     +}
2439     +
2440     +static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
2441     +static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
2442     +static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
2443     +
2444     +static struct attribute *cpu_root_vulnerabilities_attrs[] = {
2445     + &dev_attr_meltdown.attr,
2446     + &dev_attr_spectre_v1.attr,
2447     + &dev_attr_spectre_v2.attr,
2448     + NULL
2449     +};
2450     +
2451     +static const struct attribute_group cpu_root_vulnerabilities_group = {
2452     + .name = "vulnerabilities",
2453     + .attrs = cpu_root_vulnerabilities_attrs,
2454     +};
2455     +
2456     +static void __init cpu_register_vulnerabilities(void)
2457     +{
2458     + if (sysfs_create_group(&cpu_subsys.dev_root->kobj,
2459     + &cpu_root_vulnerabilities_group))
2460     + pr_err("Unable to register CPU vulnerabilities\n");
2461     +}
2462     +
2463     +#else
2464     +static inline void cpu_register_vulnerabilities(void) { }
2465     +#endif
2466     +
2467     void __init cpu_dev_init(void)
2468     {
2469     if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
2470     panic("Failed to register CPU subsystem");
2471    
2472     cpu_dev_register_generic();
2473     + cpu_register_vulnerabilities();
2474     }
2475     diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
2476     index adc877dfef5c..609227211295 100644
2477     --- a/drivers/block/rbd.c
2478     +++ b/drivers/block/rbd.c
2479     @@ -3074,13 +3074,21 @@ static void format_lock_cookie(struct rbd_device *rbd_dev, char *buf)
2480     mutex_unlock(&rbd_dev->watch_mutex);
2481     }
2482    
2483     +static void __rbd_lock(struct rbd_device *rbd_dev, const char *cookie)
2484     +{
2485     + struct rbd_client_id cid = rbd_get_cid(rbd_dev);
2486     +
2487     + strcpy(rbd_dev->lock_cookie, cookie);
2488     + rbd_set_owner_cid(rbd_dev, &cid);
2489     + queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
2490     +}
2491     +
2492     /*
2493     * lock_rwsem must be held for write
2494     */
2495     static int rbd_lock(struct rbd_device *rbd_dev)
2496     {
2497     struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
2498     - struct rbd_client_id cid = rbd_get_cid(rbd_dev);
2499     char cookie[32];
2500     int ret;
2501    
2502     @@ -3095,9 +3103,7 @@ static int rbd_lock(struct rbd_device *rbd_dev)
2503     return ret;
2504    
2505     rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED;
2506     - strcpy(rbd_dev->lock_cookie, cookie);
2507     - rbd_set_owner_cid(rbd_dev, &cid);
2508     - queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
2509     + __rbd_lock(rbd_dev, cookie);
2510     return 0;
2511     }
2512    
2513     @@ -3883,7 +3889,7 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
2514     queue_delayed_work(rbd_dev->task_wq,
2515     &rbd_dev->lock_dwork, 0);
2516     } else {
2517     - strcpy(rbd_dev->lock_cookie, cookie);
2518     + __rbd_lock(rbd_dev, cookie);
2519     }
2520     }
2521    
2522     @@ -4415,7 +4421,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
2523     segment_size = rbd_obj_bytes(&rbd_dev->header);
2524     blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
2525     q->limits.max_sectors = queue_max_hw_sectors(q);
2526     - blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
2527     + blk_queue_max_segments(q, USHRT_MAX);
2528     blk_queue_max_segment_size(q, segment_size);
2529     blk_queue_io_min(q, segment_size);
2530     blk_queue_io_opt(q, segment_size);
2531     diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
2532     index a385838e2919..dadacbe558ab 100644
2533     --- a/drivers/gpu/drm/i915/gvt/gtt.c
2534     +++ b/drivers/gpu/drm/i915/gvt/gtt.c
2535     @@ -1359,12 +1359,15 @@ static int ppgtt_handle_guest_write_page_table_bytes(void *gp,
2536     return ret;
2537     } else {
2538     if (!test_bit(index, spt->post_shadow_bitmap)) {
2539     + int type = spt->shadow_page.type;
2540     +
2541     ppgtt_get_shadow_entry(spt, &se, index);
2542     ret = ppgtt_handle_guest_entry_removal(gpt, &se, index);
2543     if (ret)
2544     return ret;
2545     + ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
2546     + ppgtt_set_shadow_entry(spt, &se, index);
2547     }
2548     -
2549     ppgtt_set_post_shadow(spt, index);
2550     }
2551    
2552     diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
2553     index 82498f8232eb..5c5cb2ceee49 100644
2554     --- a/drivers/gpu/drm/i915/i915_drv.c
2555     +++ b/drivers/gpu/drm/i915/i915_drv.c
2556     @@ -1693,6 +1693,7 @@ static int i915_drm_resume(struct drm_device *dev)
2557     intel_guc_resume(dev_priv);
2558    
2559     intel_modeset_init_hw(dev);
2560     + intel_init_clock_gating(dev_priv);
2561    
2562     spin_lock_irq(&dev_priv->irq_lock);
2563     if (dev_priv->display.hpd_irq_setup)
2564     diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
2565     index ce2ed16f2a30..920c8914cec1 100644
2566     --- a/drivers/gpu/drm/i915/i915_reg.h
2567     +++ b/drivers/gpu/drm/i915/i915_reg.h
2568     @@ -6987,6 +6987,8 @@ enum {
2569     #define GEN9_SLICE_COMMON_ECO_CHICKEN0 _MMIO(0x7308)
2570     #define DISABLE_PIXEL_MASK_CAMMING (1<<14)
2571    
2572     +#define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
2573     +
2574     #define GEN7_L3SQCREG1 _MMIO(0xB010)
2575     #define VLV_B0_WA_L3SQCREG1_VALUE 0x00D30000
2576    
2577     diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
2578     index 1c73d5542681..095a2240af4f 100644
2579     --- a/drivers/gpu/drm/i915/intel_display.c
2580     +++ b/drivers/gpu/drm/i915/intel_display.c
2581     @@ -3800,6 +3800,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
2582    
2583     intel_pps_unlock_regs_wa(dev_priv);
2584     intel_modeset_init_hw(dev);
2585     + intel_init_clock_gating(dev_priv);
2586    
2587     spin_lock_irq(&dev_priv->irq_lock);
2588     if (dev_priv->display.hpd_irq_setup)
2589     @@ -14406,8 +14407,6 @@ void intel_modeset_init_hw(struct drm_device *dev)
2590    
2591     intel_update_cdclk(dev_priv);
2592     dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw;
2593     -
2594     - intel_init_clock_gating(dev_priv);
2595     }
2596    
2597     /*
2598     @@ -15124,6 +15123,15 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
2599     struct intel_encoder *encoder;
2600     int i;
2601    
2602     + if (IS_HASWELL(dev_priv)) {
2603     + /*
2604     + * WaRsPkgCStateDisplayPMReq:hsw
2605     + * System hang if this isn't done before disabling all planes!
2606     + */
2607     + I915_WRITE(CHICKEN_PAR1_1,
2608     + I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
2609     + }
2610     +
2611     intel_modeset_readout_hw_state(dev);
2612    
2613     /* HW state is read out, now we need to sanitize this mess. */
2614     @@ -15220,6 +15228,8 @@ void intel_modeset_gem_init(struct drm_device *dev)
2615    
2616     intel_init_gt_powersave(dev_priv);
2617    
2618     + intel_init_clock_gating(dev_priv);
2619     +
2620     intel_setup_overlay(dev_priv);
2621     }
2622    
2623     diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
2624     index 3c2d9cf22ed5..b6a7e492c1a3 100644
2625     --- a/drivers/gpu/drm/i915/intel_engine_cs.c
2626     +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
2627     @@ -1125,6 +1125,11 @@ static int glk_init_workarounds(struct intel_engine_cs *engine)
2628     if (ret)
2629     return ret;
2630    
2631     + /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
2632     + ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1);
2633     + if (ret)
2634     + return ret;
2635     +
2636     /* WaToEnableHwFixForPushConstHWBug:glk */
2637     WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
2638     GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
2639     diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
2640     index cb950752c346..014e5c08571a 100644
2641     --- a/drivers/gpu/drm/i915/intel_pm.c
2642     +++ b/drivers/gpu/drm/i915/intel_pm.c
2643     @@ -5669,12 +5669,30 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
2644     mutex_unlock(&dev_priv->wm.wm_mutex);
2645     }
2646    
2647     +/*
2648     + * FIXME should probably kill this and improve
2649     + * the real watermark readout/sanitation instead
2650     + */
2651     +static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
2652     +{
2653     + I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
2654     + I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
2655     + I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
2656     +
2657     + /*
2658     + * Don't touch WM1S_LP_EN here.
2659     + * Doing so could cause underruns.
2660     + */
2661     +}
2662     +
2663     void ilk_wm_get_hw_state(struct drm_device *dev)
2664     {
2665     struct drm_i915_private *dev_priv = to_i915(dev);
2666     struct ilk_wm_values *hw = &dev_priv->wm.hw;
2667     struct drm_crtc *crtc;
2668    
2669     + ilk_init_lp_watermarks(dev_priv);
2670     +
2671     for_each_crtc(dev, crtc)
2672     ilk_pipe_wm_get_hw_state(crtc);
2673    
2674     @@ -7959,18 +7977,6 @@ static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
2675     }
2676     }
2677    
2678     -static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
2679     -{
2680     - I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
2681     - I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
2682     - I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
2683     -
2684     - /*
2685     - * Don't touch WM1S_LP_EN here.
2686     - * Doing so could cause underruns.
2687     - */
2688     -}
2689     -
2690     static void ironlake_init_clock_gating(struct drm_i915_private *dev_priv)
2691     {
2692     uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
2693     @@ -8004,8 +8010,6 @@ static void ironlake_init_clock_gating(struct drm_i915_private *dev_priv)
2694     (I915_READ(DISP_ARB_CTL) |
2695     DISP_FBC_WM_DIS));
2696    
2697     - ilk_init_lp_watermarks(dev_priv);
2698     -
2699     /*
2700     * Based on the document from hardware guys the following bits
2701     * should be set unconditionally in order to enable FBC.
2702     @@ -8118,8 +8122,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
2703     I915_WRITE(GEN6_GT_MODE,
2704     _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
2705    
2706     - ilk_init_lp_watermarks(dev_priv);
2707     -
2708     I915_WRITE(CACHE_MODE_0,
2709     _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
2710    
2711     @@ -8293,8 +8295,6 @@ static void broadwell_init_clock_gating(struct drm_i915_private *dev_priv)
2712     {
2713     enum pipe pipe;
2714    
2715     - ilk_init_lp_watermarks(dev_priv);
2716     -
2717     /* WaSwitchSolVfFArbitrationPriority:bdw */
2718     I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
2719    
2720     @@ -8349,8 +8349,6 @@ static void broadwell_init_clock_gating(struct drm_i915_private *dev_priv)
2721    
2722     static void haswell_init_clock_gating(struct drm_i915_private *dev_priv)
2723     {
2724     - ilk_init_lp_watermarks(dev_priv);
2725     -
2726     /* L3 caching of data atomics doesn't work -- disable it. */
2727     I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
2728     I915_WRITE(HSW_ROW_CHICKEN3,
2729     @@ -8394,10 +8392,6 @@ static void haswell_init_clock_gating(struct drm_i915_private *dev_priv)
2730     /* WaSwitchSolVfFArbitrationPriority:hsw */
2731     I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
2732    
2733     - /* WaRsPkgCStateDisplayPMReq:hsw */
2734     - I915_WRITE(CHICKEN_PAR1_1,
2735     - I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
2736     -
2737     lpt_init_clock_gating(dev_priv);
2738     }
2739    
2740     @@ -8405,8 +8399,6 @@ static void ivybridge_init_clock_gating(struct drm_i915_private *dev_priv)
2741     {
2742     uint32_t snpcr;
2743    
2744     - ilk_init_lp_watermarks(dev_priv);
2745     -
2746     I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
2747    
2748     /* WaDisableEarlyCull:ivb */
2749     diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
2750     index 21c62a34e558..87e8af5776a3 100644
2751     --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
2752     +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
2753     @@ -2731,6 +2731,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv,
2754     }
2755    
2756     view_type = vmw_view_cmd_to_type(header->id);
2757     + if (view_type == vmw_view_max)
2758     + return -EINVAL;
2759     cmd = container_of(header, typeof(*cmd), header);
2760     ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
2761     user_surface_converter,
2762     diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
2763     index b850562fbdd6..62c2f4be8012 100644
2764     --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
2765     +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
2766     @@ -697,7 +697,6 @@ vmw_du_plane_duplicate_state(struct drm_plane *plane)
2767     vps->pinned = 0;
2768    
2769     /* Mapping is managed by prepare_fb/cleanup_fb */
2770     - memset(&vps->guest_map, 0, sizeof(vps->guest_map));
2771     memset(&vps->host_map, 0, sizeof(vps->host_map));
2772     vps->cpp = 0;
2773    
2774     @@ -760,11 +759,6 @@ vmw_du_plane_destroy_state(struct drm_plane *plane,
2775    
2776    
2777     /* Should have been freed by cleanup_fb */
2778     - if (vps->guest_map.virtual) {
2779     - DRM_ERROR("Guest mapping not freed\n");
2780     - ttm_bo_kunmap(&vps->guest_map);
2781     - }
2782     -
2783     if (vps->host_map.virtual) {
2784     DRM_ERROR("Host mapping not freed\n");
2785     ttm_bo_kunmap(&vps->host_map);
2786     diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
2787     index ff9c8389ff21..cd9da2dd79af 100644
2788     --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
2789     +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
2790     @@ -175,7 +175,7 @@ struct vmw_plane_state {
2791     int pinned;
2792    
2793     /* For CPU Blit */
2794     - struct ttm_bo_kmap_obj host_map, guest_map;
2795     + struct ttm_bo_kmap_obj host_map;
2796     unsigned int cpp;
2797     };
2798    
2799     diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
2800     index ca3afae2db1f..4dee05b15552 100644
2801     --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
2802     +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
2803     @@ -114,7 +114,7 @@ struct vmw_screen_target_display_unit {
2804     bool defined;
2805    
2806     /* For CPU Blit */
2807     - struct ttm_bo_kmap_obj host_map, guest_map;
2808     + struct ttm_bo_kmap_obj host_map;
2809     unsigned int cpp;
2810     };
2811    
2812     @@ -695,7 +695,8 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
2813     s32 src_pitch, dst_pitch;
2814     u8 *src, *dst;
2815     bool not_used;
2816     -
2817     + struct ttm_bo_kmap_obj guest_map;
2818     + int ret;
2819    
2820     if (!dirty->num_hits)
2821     return;
2822     @@ -706,6 +707,13 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
2823     if (width == 0 || height == 0)
2824     return;
2825    
2826     + ret = ttm_bo_kmap(&ddirty->buf->base, 0, ddirty->buf->base.num_pages,
2827     + &guest_map);
2828     + if (ret) {
2829     + DRM_ERROR("Failed mapping framebuffer for blit: %d\n",
2830     + ret);
2831     + goto out_cleanup;
2832     + }
2833    
2834     /* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */
2835     src_pitch = stdu->display_srf->base_size.width * stdu->cpp;
2836     @@ -713,7 +721,7 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
2837     src += ddirty->top * src_pitch + ddirty->left * stdu->cpp;
2838    
2839     dst_pitch = ddirty->pitch;
2840     - dst = ttm_kmap_obj_virtual(&stdu->guest_map, &not_used);
2841     + dst = ttm_kmap_obj_virtual(&guest_map, &not_used);
2842     dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp;
2843    
2844    
2845     @@ -772,6 +780,7 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
2846     vmw_fifo_commit(dev_priv, sizeof(*cmd));
2847     }
2848    
2849     + ttm_bo_kunmap(&guest_map);
2850     out_cleanup:
2851     ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX;
2852     ddirty->right = ddirty->bottom = S32_MIN;
2853     @@ -1109,9 +1118,6 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane,
2854     {
2855     struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
2856    
2857     - if (vps->guest_map.virtual)
2858     - ttm_bo_kunmap(&vps->guest_map);
2859     -
2860     if (vps->host_map.virtual)
2861     ttm_bo_kunmap(&vps->host_map);
2862    
2863     @@ -1277,33 +1283,11 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
2864     */
2865     if (vps->content_fb_type == SEPARATE_DMA &&
2866     !(dev_priv->capabilities & SVGA_CAP_3D)) {
2867     -
2868     - struct vmw_framebuffer_dmabuf *new_vfbd;
2869     -
2870     - new_vfbd = vmw_framebuffer_to_vfbd(new_fb);
2871     -
2872     - ret = ttm_bo_reserve(&new_vfbd->buffer->base, false, false,
2873     - NULL);
2874     - if (ret)
2875     - goto out_srf_unpin;
2876     -
2877     - ret = ttm_bo_kmap(&new_vfbd->buffer->base, 0,
2878     - new_vfbd->buffer->base.num_pages,
2879     - &vps->guest_map);
2880     -
2881     - ttm_bo_unreserve(&new_vfbd->buffer->base);
2882     -
2883     - if (ret) {
2884     - DRM_ERROR("Failed to map content buffer to CPU\n");
2885     - goto out_srf_unpin;
2886     - }
2887     -
2888     ret = ttm_bo_kmap(&vps->surf->res.backup->base, 0,
2889     vps->surf->res.backup->base.num_pages,
2890     &vps->host_map);
2891     if (ret) {
2892     DRM_ERROR("Failed to map display buffer to CPU\n");
2893     - ttm_bo_kunmap(&vps->guest_map);
2894     goto out_srf_unpin;
2895     }
2896    
2897     @@ -1350,7 +1334,6 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane,
2898     stdu->display_srf = vps->surf;
2899     stdu->content_fb_type = vps->content_fb_type;
2900     stdu->cpp = vps->cpp;
2901     - memcpy(&stdu->guest_map, &vps->guest_map, sizeof(vps->guest_map));
2902     memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map));
2903    
2904     if (!stdu->defined)
2905     diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
2906     index 514c1000ded1..73feeeeb4283 100644
2907     --- a/drivers/infiniband/hw/cxgb4/cq.c
2908     +++ b/drivers/infiniband/hw/cxgb4/cq.c
2909     @@ -410,7 +410,7 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp)
2910    
2911     static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
2912     {
2913     - if (CQE_OPCODE(cqe) == C4IW_DRAIN_OPCODE) {
2914     + if (DRAIN_CQE(cqe)) {
2915     WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid);
2916     return 0;
2917     }
2918     @@ -509,7 +509,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
2919     /*
2920     * Special cqe for drain WR completions...
2921     */
2922     - if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) {
2923     + if (DRAIN_CQE(hw_cqe)) {
2924     *cookie = CQE_DRAIN_COOKIE(hw_cqe);
2925     *cqe = *hw_cqe;
2926     goto skip_cqe;
2927     @@ -766,9 +766,6 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
2928     c4iw_invalidate_mr(qhp->rhp,
2929     CQE_WRID_FR_STAG(&cqe));
2930     break;
2931     - case C4IW_DRAIN_OPCODE:
2932     - wc->opcode = IB_WC_SEND;
2933     - break;
2934     default:
2935     pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
2936     CQE_OPCODE(&cqe), CQE_QPID(&cqe));
2937     diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
2938     index 8f963df0bffc..9d25298d96fa 100644
2939     --- a/drivers/infiniband/hw/cxgb4/ev.c
2940     +++ b/drivers/infiniband/hw/cxgb4/ev.c
2941     @@ -109,9 +109,11 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp,
2942     if (qhp->ibqp.event_handler)
2943     (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
2944    
2945     - spin_lock_irqsave(&chp->comp_handler_lock, flag);
2946     - (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
2947     - spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
2948     + if (t4_clear_cq_armed(&chp->cq)) {
2949     + spin_lock_irqsave(&chp->comp_handler_lock, flag);
2950     + (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
2951     + spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
2952     + }
2953     }
2954    
2955     void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
2956     diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
2957     index 819a30635d53..20c481115a99 100644
2958     --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
2959     +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
2960     @@ -631,8 +631,6 @@ static inline int to_ib_qp_state(int c4iw_qp_state)
2961     return IB_QPS_ERR;
2962     }
2963    
2964     -#define C4IW_DRAIN_OPCODE FW_RI_SGE_EC_CR_RETURN
2965     -
2966     static inline u32 c4iw_ib_to_tpt_access(int a)
2967     {
2968     return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) |
2969     diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
2970     index e69453665a17..f311ea73c806 100644
2971     --- a/drivers/infiniband/hw/cxgb4/qp.c
2972     +++ b/drivers/infiniband/hw/cxgb4/qp.c
2973     @@ -794,21 +794,57 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
2974     return 0;
2975     }
2976    
2977     -static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
2978     +static int ib_to_fw_opcode(int ib_opcode)
2979     +{
2980     + int opcode;
2981     +
2982     + switch (ib_opcode) {
2983     + case IB_WR_SEND_WITH_INV:
2984     + opcode = FW_RI_SEND_WITH_INV;
2985     + break;
2986     + case IB_WR_SEND:
2987     + opcode = FW_RI_SEND;
2988     + break;
2989     + case IB_WR_RDMA_WRITE:
2990     + opcode = FW_RI_RDMA_WRITE;
2991     + break;
2992     + case IB_WR_RDMA_READ:
2993     + case IB_WR_RDMA_READ_WITH_INV:
2994     + opcode = FW_RI_READ_REQ;
2995     + break;
2996     + case IB_WR_REG_MR:
2997     + opcode = FW_RI_FAST_REGISTER;
2998     + break;
2999     + case IB_WR_LOCAL_INV:
3000     + opcode = FW_RI_LOCAL_INV;
3001     + break;
3002     + default:
3003     + opcode = -EINVAL;
3004     + }
3005     + return opcode;
3006     +}
3007     +
3008     +static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
3009     {
3010     struct t4_cqe cqe = {};
3011     struct c4iw_cq *schp;
3012     unsigned long flag;
3013     struct t4_cq *cq;
3014     + int opcode;
3015    
3016     schp = to_c4iw_cq(qhp->ibqp.send_cq);
3017     cq = &schp->cq;
3018    
3019     + opcode = ib_to_fw_opcode(wr->opcode);
3020     + if (opcode < 0)
3021     + return opcode;
3022     +
3023     cqe.u.drain_cookie = wr->wr_id;
3024     cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
3025     - CQE_OPCODE_V(C4IW_DRAIN_OPCODE) |
3026     + CQE_OPCODE_V(opcode) |
3027     CQE_TYPE_V(1) |
3028     CQE_SWCQE_V(1) |
3029     + CQE_DRAIN_V(1) |
3030     CQE_QPID_V(qhp->wq.sq.qid));
3031    
3032     spin_lock_irqsave(&schp->lock, flag);
3033     @@ -817,10 +853,29 @@ static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
3034     t4_swcq_produce(cq);
3035     spin_unlock_irqrestore(&schp->lock, flag);
3036    
3037     - spin_lock_irqsave(&schp->comp_handler_lock, flag);
3038     - (*schp->ibcq.comp_handler)(&schp->ibcq,
3039     - schp->ibcq.cq_context);
3040     - spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
3041     + if (t4_clear_cq_armed(&schp->cq)) {
3042     + spin_lock_irqsave(&schp->comp_handler_lock, flag);
3043     + (*schp->ibcq.comp_handler)(&schp->ibcq,
3044     + schp->ibcq.cq_context);
3045     + spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
3046     + }
3047     + return 0;
3048     +}
3049     +
3050     +static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr,
3051     + struct ib_send_wr **bad_wr)
3052     +{
3053     + int ret = 0;
3054     +
3055     + while (wr) {
3056     + ret = complete_sq_drain_wr(qhp, wr);
3057     + if (ret) {
3058     + *bad_wr = wr;
3059     + break;
3060     + }
3061     + wr = wr->next;
3062     + }
3063     + return ret;
3064     }
3065    
3066     static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
3067     @@ -835,9 +890,10 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
3068    
3069     cqe.u.drain_cookie = wr->wr_id;
3070     cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
3071     - CQE_OPCODE_V(C4IW_DRAIN_OPCODE) |
3072     + CQE_OPCODE_V(FW_RI_SEND) |
3073     CQE_TYPE_V(0) |
3074     CQE_SWCQE_V(1) |
3075     + CQE_DRAIN_V(1) |
3076     CQE_QPID_V(qhp->wq.sq.qid));
3077    
3078     spin_lock_irqsave(&rchp->lock, flag);
3079     @@ -846,10 +902,20 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
3080     t4_swcq_produce(cq);
3081     spin_unlock_irqrestore(&rchp->lock, flag);
3082    
3083     - spin_lock_irqsave(&rchp->comp_handler_lock, flag);
3084     - (*rchp->ibcq.comp_handler)(&rchp->ibcq,
3085     - rchp->ibcq.cq_context);
3086     - spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
3087     + if (t4_clear_cq_armed(&rchp->cq)) {
3088     + spin_lock_irqsave(&rchp->comp_handler_lock, flag);
3089     + (*rchp->ibcq.comp_handler)(&rchp->ibcq,
3090     + rchp->ibcq.cq_context);
3091     + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
3092     + }
3093     +}
3094     +
3095     +static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
3096     +{
3097     + while (wr) {
3098     + complete_rq_drain_wr(qhp, wr);
3099     + wr = wr->next;
3100     + }
3101     }
3102    
3103     int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3104     @@ -875,7 +941,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3105     */
3106     if (qhp->wq.flushed) {
3107     spin_unlock_irqrestore(&qhp->lock, flag);
3108     - complete_sq_drain_wr(qhp, wr);
3109     + err = complete_sq_drain_wrs(qhp, wr, bad_wr);
3110     return err;
3111     }
3112     num_wrs = t4_sq_avail(&qhp->wq);
3113     @@ -1024,7 +1090,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
3114     */
3115     if (qhp->wq.flushed) {
3116     spin_unlock_irqrestore(&qhp->lock, flag);
3117     - complete_rq_drain_wr(qhp, wr);
3118     + complete_rq_drain_wrs(qhp, wr);
3119     return err;
3120     }
3121     num_wrs = t4_rq_avail(&qhp->wq);
3122     @@ -1267,48 +1333,51 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
3123    
3124     pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
3125    
3126     - /* locking hierarchy: cq lock first, then qp lock. */
3127     + /* locking hierarchy: cqs lock first, then qp lock. */
3128     spin_lock_irqsave(&rchp->lock, flag);
3129     + if (schp != rchp)
3130     + spin_lock(&schp->lock);
3131     spin_lock(&qhp->lock);
3132    
3133     if (qhp->wq.flushed) {
3134     spin_unlock(&qhp->lock);
3135     + if (schp != rchp)
3136     + spin_unlock(&schp->lock);
3137     spin_unlock_irqrestore(&rchp->lock, flag);
3138     return;
3139     }
3140     qhp->wq.flushed = 1;
3141     + t4_set_wq_in_error(&qhp->wq);
3142    
3143     c4iw_flush_hw_cq(rchp);
3144     c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
3145     rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
3146     - spin_unlock(&qhp->lock);
3147     - spin_unlock_irqrestore(&rchp->lock, flag);
3148    
3149     - /* locking hierarchy: cq lock first, then qp lock. */
3150     - spin_lock_irqsave(&schp->lock, flag);
3151     - spin_lock(&qhp->lock);
3152     if (schp != rchp)
3153     c4iw_flush_hw_cq(schp);
3154     sq_flushed = c4iw_flush_sq(qhp);
3155     +
3156     spin_unlock(&qhp->lock);
3157     - spin_unlock_irqrestore(&schp->lock, flag);
3158     + if (schp != rchp)
3159     + spin_unlock(&schp->lock);
3160     + spin_unlock_irqrestore(&rchp->lock, flag);
3161    
3162     if (schp == rchp) {
3163     - if (t4_clear_cq_armed(&rchp->cq) &&
3164     - (rq_flushed || sq_flushed)) {
3165     + if ((rq_flushed || sq_flushed) &&
3166     + t4_clear_cq_armed(&rchp->cq)) {
3167     spin_lock_irqsave(&rchp->comp_handler_lock, flag);
3168     (*rchp->ibcq.comp_handler)(&rchp->ibcq,
3169     rchp->ibcq.cq_context);
3170     spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
3171     }
3172     } else {
3173     - if (t4_clear_cq_armed(&rchp->cq) && rq_flushed) {
3174     + if (rq_flushed && t4_clear_cq_armed(&rchp->cq)) {
3175     spin_lock_irqsave(&rchp->comp_handler_lock, flag);
3176     (*rchp->ibcq.comp_handler)(&rchp->ibcq,
3177     rchp->ibcq.cq_context);
3178     spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
3179     }
3180     - if (t4_clear_cq_armed(&schp->cq) && sq_flushed) {
3181     + if (sq_flushed && t4_clear_cq_armed(&schp->cq)) {
3182     spin_lock_irqsave(&schp->comp_handler_lock, flag);
3183     (*schp->ibcq.comp_handler)(&schp->ibcq,
3184     schp->ibcq.cq_context);
3185     @@ -1325,8 +1394,8 @@ static void flush_qp(struct c4iw_qp *qhp)
3186     rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
3187     schp = to_c4iw_cq(qhp->ibqp.send_cq);
3188    
3189     - t4_set_wq_in_error(&qhp->wq);
3190     if (qhp->ibqp.uobject) {
3191     + t4_set_wq_in_error(&qhp->wq);
3192     t4_set_cq_in_error(&rchp->cq);
3193     spin_lock_irqsave(&rchp->comp_handler_lock, flag);
3194     (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
3195     diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
3196     index bcb80ca67d3d..80b390e861dc 100644
3197     --- a/drivers/infiniband/hw/cxgb4/t4.h
3198     +++ b/drivers/infiniband/hw/cxgb4/t4.h
3199     @@ -197,6 +197,11 @@ struct t4_cqe {
3200     #define CQE_SWCQE_G(x) ((((x) >> CQE_SWCQE_S)) & CQE_SWCQE_M)
3201     #define CQE_SWCQE_V(x) ((x)<<CQE_SWCQE_S)
3202    
3203     +#define CQE_DRAIN_S 10
3204     +#define CQE_DRAIN_M 0x1
3205     +#define CQE_DRAIN_G(x) ((((x) >> CQE_DRAIN_S)) & CQE_DRAIN_M)
3206     +#define CQE_DRAIN_V(x) ((x)<<CQE_DRAIN_S)
3207     +
3208     #define CQE_STATUS_S 5
3209     #define CQE_STATUS_M 0x1F
3210     #define CQE_STATUS_G(x) ((((x) >> CQE_STATUS_S)) & CQE_STATUS_M)
3211     @@ -213,6 +218,7 @@ struct t4_cqe {
3212     #define CQE_OPCODE_V(x) ((x)<<CQE_OPCODE_S)
3213    
3214     #define SW_CQE(x) (CQE_SWCQE_G(be32_to_cpu((x)->header)))
3215     +#define DRAIN_CQE(x) (CQE_DRAIN_G(be32_to_cpu((x)->header)))
3216     #define CQE_QPID(x) (CQE_QPID_G(be32_to_cpu((x)->header)))
3217     #define CQE_TYPE(x) (CQE_TYPE_G(be32_to_cpu((x)->header)))
3218     #define SQ_TYPE(x) (CQE_TYPE((x)))
3219     diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
3220     index 95178b4e3565..ee578fa713c2 100644
3221     --- a/drivers/infiniband/ulp/srpt/ib_srpt.c
3222     +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
3223     @@ -1000,8 +1000,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
3224     return -ENOMEM;
3225    
3226     attr->qp_state = IB_QPS_INIT;
3227     - attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
3228     - IB_ACCESS_REMOTE_WRITE;
3229     + attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
3230     attr->port_num = ch->sport->port;
3231     attr->pkey_index = 0;
3232    
3233     @@ -1992,7 +1991,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
3234     goto destroy_ib;
3235     }
3236    
3237     - guid = (__be16 *)&param->primary_path->sgid.global.interface_id;
3238     + guid = (__be16 *)&param->primary_path->dgid.global.interface_id;
3239     snprintf(ch->ini_guid, sizeof(ch->ini_guid), "%04x:%04x:%04x:%04x",
3240     be16_to_cpu(guid[0]), be16_to_cpu(guid[1]),
3241     be16_to_cpu(guid[2]), be16_to_cpu(guid[3]));
3242     diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
3243     index 8e3adcb46851..6d416fdc25cb 100644
3244     --- a/drivers/md/dm-bufio.c
3245     +++ b/drivers/md/dm-bufio.c
3246     @@ -1611,7 +1611,8 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
3247     int l;
3248     struct dm_buffer *b, *tmp;
3249     unsigned long freed = 0;
3250     - unsigned long count = nr_to_scan;
3251     + unsigned long count = c->n_buffers[LIST_CLEAN] +
3252     + c->n_buffers[LIST_DIRTY];
3253     unsigned long retain_target = get_retain_buffers(c);
3254    
3255     for (l = 0; l < LIST_SIZE; l++) {
3256     @@ -1647,8 +1648,11 @@ static unsigned long
3257     dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
3258     {
3259     struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker);
3260     + unsigned long count = ACCESS_ONCE(c->n_buffers[LIST_CLEAN]) +
3261     + ACCESS_ONCE(c->n_buffers[LIST_DIRTY]);
3262     + unsigned long retain_target = get_retain_buffers(c);
3263    
3264     - return ACCESS_ONCE(c->n_buffers[LIST_CLEAN]) + ACCESS_ONCE(c->n_buffers[LIST_DIRTY]);
3265     + return (count < retain_target) ? 0 : (count - retain_target);
3266     }
3267    
3268     /*
3269     diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
3270     index fcf7235d5742..157e1d9e7725 100644
3271     --- a/drivers/mmc/host/renesas_sdhi_core.c
3272     +++ b/drivers/mmc/host/renesas_sdhi_core.c
3273     @@ -24,6 +24,7 @@
3274     #include <linux/kernel.h>
3275     #include <linux/clk.h>
3276     #include <linux/slab.h>
3277     +#include <linux/module.h>
3278     #include <linux/of_device.h>
3279     #include <linux/platform_device.h>
3280     #include <linux/mmc/host.h>
3281     @@ -667,3 +668,5 @@ int renesas_sdhi_remove(struct platform_device *pdev)
3282     return 0;
3283     }
3284     EXPORT_SYMBOL_GPL(renesas_sdhi_remove);
3285     +
3286     +MODULE_LICENSE("GPL v2");
3287     diff --git a/drivers/mux/core.c b/drivers/mux/core.c
3288     index 2260063b0ea8..6e5cf9d9cd99 100644
3289     --- a/drivers/mux/core.c
3290     +++ b/drivers/mux/core.c
3291     @@ -413,6 +413,7 @@ static int of_dev_node_match(struct device *dev, const void *data)
3292     return dev->of_node == data;
3293     }
3294    
3295     +/* Note this function returns a reference to the mux_chip dev. */
3296     static struct mux_chip *of_find_mux_chip_by_node(struct device_node *np)
3297     {
3298     struct device *dev;
3299     @@ -466,6 +467,7 @@ struct mux_control *mux_control_get(struct device *dev, const char *mux_name)
3300     (!args.args_count && (mux_chip->controllers > 1))) {
3301     dev_err(dev, "%pOF: wrong #mux-control-cells for %pOF\n",
3302     np, args.np);
3303     + put_device(&mux_chip->dev);
3304     return ERR_PTR(-EINVAL);
3305     }
3306    
3307     @@ -476,10 +478,10 @@ struct mux_control *mux_control_get(struct device *dev, const char *mux_name)
3308     if (controller >= mux_chip->controllers) {
3309     dev_err(dev, "%pOF: bad mux controller %u specified in %pOF\n",
3310     np, controller, args.np);
3311     + put_device(&mux_chip->dev);
3312     return ERR_PTR(-EINVAL);
3313     }
3314    
3315     - get_device(&mux_chip->dev);
3316     return &mux_chip->mux[controller];
3317     }
3318     EXPORT_SYMBOL_GPL(mux_control_get);
3319     diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
3320     index 68ac3e88a8ce..8bf80ad9dc44 100644
3321     --- a/drivers/net/can/usb/gs_usb.c
3322     +++ b/drivers/net/can/usb/gs_usb.c
3323     @@ -449,7 +449,7 @@ static int gs_usb_set_bittiming(struct net_device *netdev)
3324     dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)",
3325     rc);
3326    
3327     - return rc;
3328     + return (rc > 0) ? 0 : rc;
3329     }
3330    
3331     static void gs_usb_xmit_callback(struct urb *urb)
3332     diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c
3333     index 8404e8852a0f..b4c4a2c76437 100644
3334     --- a/drivers/net/can/vxcan.c
3335     +++ b/drivers/net/can/vxcan.c
3336     @@ -194,7 +194,7 @@ static int vxcan_newlink(struct net *net, struct net_device *dev,
3337     tbp = peer_tb;
3338     }
3339    
3340     - if (tbp[IFLA_IFNAME]) {
3341     + if (ifmp && tbp[IFLA_IFNAME]) {
3342     nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
3343     name_assign_type = NET_NAME_USER;
3344     } else {
3345     diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
3346     index faf7cdc97ebf..311539c6625f 100644
3347     --- a/drivers/net/ethernet/freescale/fec_main.c
3348     +++ b/drivers/net/ethernet/freescale/fec_main.c
3349     @@ -3458,6 +3458,10 @@ fec_probe(struct platform_device *pdev)
3350     goto failed_regulator;
3351     }
3352     } else {
3353     + if (PTR_ERR(fep->reg_phy) == -EPROBE_DEFER) {
3354     + ret = -EPROBE_DEFER;
3355     + goto failed_regulator;
3356     + }
3357     fep->reg_phy = NULL;
3358     }
3359    
3360     @@ -3539,8 +3543,9 @@ fec_probe(struct platform_device *pdev)
3361     failed_clk:
3362     if (of_phy_is_fixed_link(np))
3363     of_phy_deregister_fixed_link(np);
3364     -failed_phy:
3365     of_node_put(phy_node);
3366     +failed_phy:
3367     + dev_id--;
3368     failed_ioremap:
3369     free_netdev(ndev);
3370    
3371     diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
3372     index d6d4ed7acf03..31277d3bb7dc 100644
3373     --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
3374     +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
3375     @@ -1367,6 +1367,9 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
3376     * Checks to see of the link status of the hardware has changed. If a
3377     * change in link status has been detected, then we read the PHY registers
3378     * to get the current speed/duplex if link exists.
3379     + *
3380     + * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link
3381     + * up).
3382     **/
3383     static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
3384     {
3385     @@ -1382,7 +1385,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
3386     * Change or Rx Sequence Error interrupt.
3387     */
3388     if (!mac->get_link_status)
3389     - return 0;
3390     + return 1;
3391    
3392     /* First we want to see if the MII Status Register reports
3393     * link. If so, then we want to get the current speed/duplex
3394     @@ -1613,10 +1616,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
3395     * different link partner.
3396     */
3397     ret_val = e1000e_config_fc_after_link_up(hw);
3398     - if (ret_val)
3399     + if (ret_val) {
3400     e_dbg("Error configuring flow control\n");
3401     + return ret_val;
3402     + }
3403    
3404     - return ret_val;
3405     + return 1;
3406     }
3407    
3408     static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
3409     diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
3410     index 3ead7439821c..99bd6e88ebc7 100644
3411     --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
3412     +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
3413     @@ -4235,7 +4235,10 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
3414     return -EINVAL;
3415     if (!info->linking)
3416     break;
3417     - if (netdev_has_any_upper_dev(upper_dev))
3418     + if (netdev_has_any_upper_dev(upper_dev) &&
3419     + (!netif_is_bridge_master(upper_dev) ||
3420     + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
3421     + upper_dev)))
3422     return -EINVAL;
3423     if (netif_is_lag_master(upper_dev) &&
3424     !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev,
3425     @@ -4347,6 +4350,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
3426     u16 vid)
3427     {
3428     struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
3429     + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
3430     struct netdev_notifier_changeupper_info *info = ptr;
3431     struct net_device *upper_dev;
3432     int err = 0;
3433     @@ -4358,7 +4362,10 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
3434     return -EINVAL;
3435     if (!info->linking)
3436     break;
3437     - if (netdev_has_any_upper_dev(upper_dev))
3438     + if (netdev_has_any_upper_dev(upper_dev) &&
3439     + (!netif_is_bridge_master(upper_dev) ||
3440     + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
3441     + upper_dev)))
3442     return -EINVAL;
3443     break;
3444     case NETDEV_CHANGEUPPER:
3445     diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
3446     index 84ce83acdc19..88892d47acae 100644
3447     --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
3448     +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
3449     @@ -326,6 +326,8 @@ int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
3450     void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
3451     struct net_device *brport_dev,
3452     struct net_device *br_dev);
3453     +bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
3454     + const struct net_device *br_dev);
3455    
3456     /* spectrum.c */
3457     int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
3458     diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3459     index 5189022a1c8c..c23cc51bb5a5 100644
3460     --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3461     +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3462     @@ -2536,7 +2536,7 @@ static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3463     {
3464     if (!removing)
3465     nh->should_offload = 1;
3466     - else if (nh->offloaded)
3467     + else
3468     nh->should_offload = 0;
3469     nh->update = 1;
3470     }
3471     diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
3472     index d39ffbfcc436..f5863e5bec81 100644
3473     --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
3474     +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
3475     @@ -134,6 +134,12 @@ mlxsw_sp_bridge_device_find(const struct mlxsw_sp_bridge *bridge,
3476     return NULL;
3477     }
3478    
3479     +bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
3480     + const struct net_device *br_dev)
3481     +{
3482     + return !!mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
3483     +}
3484     +
3485     static struct mlxsw_sp_bridge_device *
3486     mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge,
3487     struct net_device *br_dev)
3488     diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
3489     index d2e88a30f57b..db31963c5d9d 100644
3490     --- a/drivers/net/ethernet/renesas/sh_eth.c
3491     +++ b/drivers/net/ethernet/renesas/sh_eth.c
3492     @@ -3212,18 +3212,37 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
3493     /* ioremap the TSU registers */
3494     if (mdp->cd->tsu) {
3495     struct resource *rtsu;
3496     +
3497     rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1);
3498     - mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu);
3499     - if (IS_ERR(mdp->tsu_addr)) {
3500     - ret = PTR_ERR(mdp->tsu_addr);
3501     + if (!rtsu) {
3502     + dev_err(&pdev->dev, "no TSU resource\n");
3503     + ret = -ENODEV;
3504     + goto out_release;
3505     + }
3506     + /* We can only request the TSU region for the first port
3507     + * of the two sharing this TSU for the probe to succeed...
3508     + */
3509     + if (devno % 2 == 0 &&
3510     + !devm_request_mem_region(&pdev->dev, rtsu->start,
3511     + resource_size(rtsu),
3512     + dev_name(&pdev->dev))) {
3513     + dev_err(&pdev->dev, "can't request TSU resource.\n");
3514     + ret = -EBUSY;
3515     + goto out_release;
3516     + }
3517     + mdp->tsu_addr = devm_ioremap(&pdev->dev, rtsu->start,
3518     + resource_size(rtsu));
3519     + if (!mdp->tsu_addr) {
3520     + dev_err(&pdev->dev, "TSU region ioremap() failed.\n");
3521     + ret = -ENOMEM;
3522     goto out_release;
3523     }
3524     mdp->port = devno % 2;
3525     ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER;
3526     }
3527    
3528     - /* initialize first or needed device */
3529     - if (!devno || pd->needs_init) {
3530     + /* Need to init only the first port of the two sharing a TSU */
3531     + if (devno % 2 == 0) {
3532     if (mdp->cd->chip_reset)
3533     mdp->cd->chip_reset(ndev);
3534    
3535     diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
3536     index 28c4d6fa096c..0ad12c81a9e4 100644
3537     --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
3538     +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
3539     @@ -364,9 +364,15 @@ static void stmmac_eee_ctrl_timer(unsigned long arg)
3540     bool stmmac_eee_init(struct stmmac_priv *priv)
3541     {
3542     struct net_device *ndev = priv->dev;
3543     + int interface = priv->plat->interface;
3544     unsigned long flags;
3545     bool ret = false;
3546    
3547     + if ((interface != PHY_INTERFACE_MODE_MII) &&
3548     + (interface != PHY_INTERFACE_MODE_GMII) &&
3549     + !phy_interface_mode_is_rgmii(interface))
3550     + goto out;
3551     +
3552     /* Using PCS we cannot dial with the phy registers at this stage
3553     * so we do not support extra feature like EEE.
3554     */
3555     diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
3556     index 4b377b978a0b..cb85307f125b 100644
3557     --- a/drivers/net/phy/phylink.c
3558     +++ b/drivers/net/phy/phylink.c
3559     @@ -1428,9 +1428,8 @@ static void phylink_sfp_link_down(void *upstream)
3560     WARN_ON(!lockdep_rtnl_is_held());
3561    
3562     set_bit(PHYLINK_DISABLE_LINK, &pl->phylink_disable_state);
3563     + queue_work(system_power_efficient_wq, &pl->resolve);
3564     flush_work(&pl->resolve);
3565     -
3566     - netif_carrier_off(pl->netdev);
3567     }
3568    
3569     static void phylink_sfp_link_up(void *upstream)
3570     diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
3571     index 5cb5384697ea..7ae815bee52d 100644
3572     --- a/drivers/net/phy/sfp-bus.c
3573     +++ b/drivers/net/phy/sfp-bus.c
3574     @@ -359,7 +359,8 @@ EXPORT_SYMBOL_GPL(sfp_register_upstream);
3575     void sfp_unregister_upstream(struct sfp_bus *bus)
3576     {
3577     rtnl_lock();
3578     - sfp_unregister_bus(bus);
3579     + if (bus->sfp)
3580     + sfp_unregister_bus(bus);
3581     bus->upstream = NULL;
3582     bus->netdev = NULL;
3583     rtnl_unlock();
3584     @@ -464,7 +465,8 @@ EXPORT_SYMBOL_GPL(sfp_register_socket);
3585     void sfp_unregister_socket(struct sfp_bus *bus)
3586     {
3587     rtnl_lock();
3588     - sfp_unregister_bus(bus);
3589     + if (bus->netdev)
3590     + sfp_unregister_bus(bus);
3591     bus->sfp_dev = NULL;
3592     bus->sfp = NULL;
3593     bus->socket_ops = NULL;
3594     diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
3595     index 4fb7647995c3..9875ab5ce18c 100644
3596     --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
3597     +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
3598     @@ -666,11 +666,15 @@ static inline u8 iwl_pcie_get_cmd_index(struct iwl_txq *q, u32 index)
3599     return index & (q->n_window - 1);
3600     }
3601    
3602     -static inline void *iwl_pcie_get_tfd(struct iwl_trans_pcie *trans_pcie,
3603     +static inline void *iwl_pcie_get_tfd(struct iwl_trans *trans,
3604     struct iwl_txq *txq, int idx)
3605     {
3606     - return txq->tfds + trans_pcie->tfd_size * iwl_pcie_get_cmd_index(txq,
3607     - idx);
3608     + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
3609     +
3610     + if (trans->cfg->use_tfh)
3611     + idx = iwl_pcie_get_cmd_index(txq, idx);
3612     +
3613     + return txq->tfds + trans_pcie->tfd_size * idx;
3614     }
3615    
3616     static inline void iwl_enable_rfkill_int(struct iwl_trans *trans)
3617     diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
3618     index d74613fcb756..6f45c8148b27 100644
3619     --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
3620     +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
3621     @@ -171,8 +171,6 @@ static void iwl_pcie_gen2_tfd_unmap(struct iwl_trans *trans,
3622    
3623     static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
3624     {
3625     - struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
3626     -
3627     /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and
3628     * idx is bounded by n_window
3629     */
3630     @@ -181,7 +179,7 @@ static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
3631     lockdep_assert_held(&txq->lock);
3632    
3633     iwl_pcie_gen2_tfd_unmap(trans, &txq->entries[idx].meta,
3634     - iwl_pcie_get_tfd(trans_pcie, txq, idx));
3635     + iwl_pcie_get_tfd(trans, txq, idx));
3636    
3637     /* free SKB */
3638     if (txq->entries) {
3639     @@ -367,11 +365,9 @@ struct iwl_tfh_tfd *iwl_pcie_gen2_build_tfd(struct iwl_trans *trans,
3640     struct sk_buff *skb,
3641     struct iwl_cmd_meta *out_meta)
3642     {
3643     - struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
3644     struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
3645     int idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
3646     - struct iwl_tfh_tfd *tfd =
3647     - iwl_pcie_get_tfd(trans_pcie, txq, idx);
3648     + struct iwl_tfh_tfd *tfd = iwl_pcie_get_tfd(trans, txq, idx);
3649     dma_addr_t tb_phys;
3650     bool amsdu;
3651     int i, len, tb1_len, tb2_len, hdr_len;
3652     @@ -568,8 +564,7 @@ static int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
3653     u8 group_id = iwl_cmd_groupid(cmd->id);
3654     const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD];
3655     u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD];
3656     - struct iwl_tfh_tfd *tfd =
3657     - iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr);
3658     + struct iwl_tfh_tfd *tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr);
3659    
3660     memset(tfd, 0, sizeof(*tfd));
3661    
3662     diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
3663     index c645d10d3707..4704137a26e0 100644
3664     --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
3665     +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
3666     @@ -373,7 +373,7 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
3667     {
3668     struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
3669     int i, num_tbs;
3670     - void *tfd = iwl_pcie_get_tfd(trans_pcie, txq, index);
3671     + void *tfd = iwl_pcie_get_tfd(trans, txq, index);
3672    
3673     /* Sanity check on number of chunks */
3674     num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd);
3675     @@ -1999,7 +1999,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb,
3676     }
3677    
3678     trace_iwlwifi_dev_tx(trans->dev, skb,
3679     - iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr),
3680     + iwl_pcie_get_tfd(trans, txq, txq->write_ptr),
3681     trans_pcie->tfd_size,
3682     &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len,
3683     hdr_len);
3684     @@ -2073,7 +2073,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
3685     IEEE80211_CCMP_HDR_LEN : 0;
3686    
3687     trace_iwlwifi_dev_tx(trans->dev, skb,
3688     - iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr),
3689     + iwl_pcie_get_tfd(trans, txq, txq->write_ptr),
3690     trans_pcie->tfd_size,
3691     &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, 0);
3692    
3693     @@ -2406,7 +2406,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
3694     memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr,
3695     IWL_FIRST_TB_SIZE);
3696    
3697     - tfd = iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr);
3698     + tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr);
3699     /* Set up entry for this TFD in Tx byte-count array */
3700     iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len),
3701     iwl_pcie_tfd_get_num_tbs(trans, tfd));
3702     diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
3703     index 0765b1797d4c..7f8fa42a1084 100644
3704     --- a/drivers/platform/x86/wmi.c
3705     +++ b/drivers/platform/x86/wmi.c
3706     @@ -1268,5 +1268,5 @@ static void __exit acpi_wmi_exit(void)
3707     bus_unregister(&wmi_bus_type);
3708     }
3709    
3710     -subsys_initcall(acpi_wmi_init);
3711     +subsys_initcall_sync(acpi_wmi_init);
3712     module_exit(acpi_wmi_exit);
3713     diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
3714     index 0f695df14c9d..372ce9913e6d 100644
3715     --- a/drivers/staging/android/ashmem.c
3716     +++ b/drivers/staging/android/ashmem.c
3717     @@ -765,10 +765,12 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
3718     break;
3719     case ASHMEM_SET_SIZE:
3720     ret = -EINVAL;
3721     + mutex_lock(&ashmem_mutex);
3722     if (!asma->file) {
3723     ret = 0;
3724     asma->size = (size_t)arg;
3725     }
3726     + mutex_unlock(&ashmem_mutex);
3727     break;
3728     case ASHMEM_GET_SIZE:
3729     ret = asma->size;
3730     diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
3731     index def1b05ffca0..284bd1a7b570 100644
3732     --- a/drivers/usb/gadget/udc/core.c
3733     +++ b/drivers/usb/gadget/udc/core.c
3734     @@ -1158,11 +1158,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
3735    
3736     udc = kzalloc(sizeof(*udc), GFP_KERNEL);
3737     if (!udc)
3738     - goto err1;
3739     -
3740     - ret = device_add(&gadget->dev);
3741     - if (ret)
3742     - goto err2;
3743     + goto err_put_gadget;
3744    
3745     device_initialize(&udc->dev);
3746     udc->dev.release = usb_udc_release;
3747     @@ -1171,7 +1167,11 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
3748     udc->dev.parent = parent;
3749     ret = dev_set_name(&udc->dev, "%s", kobject_name(&parent->kobj));
3750     if (ret)
3751     - goto err3;
3752     + goto err_put_udc;
3753     +
3754     + ret = device_add(&gadget->dev);
3755     + if (ret)
3756     + goto err_put_udc;
3757    
3758     udc->gadget = gadget;
3759     gadget->udc = udc;
3760     @@ -1181,7 +1181,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
3761    
3762     ret = device_add(&udc->dev);
3763     if (ret)
3764     - goto err4;
3765     + goto err_unlist_udc;
3766    
3767     usb_gadget_set_state(gadget, USB_STATE_NOTATTACHED);
3768     udc->vbus = true;
3769     @@ -1189,27 +1189,25 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
3770     /* pick up one of pending gadget drivers */
3771     ret = check_pending_gadget_drivers(udc);
3772     if (ret)
3773     - goto err5;
3774     + goto err_del_udc;
3775    
3776     mutex_unlock(&udc_lock);
3777    
3778     return 0;
3779    
3780     -err5:
3781     + err_del_udc:
3782     device_del(&udc->dev);
3783    
3784     -err4:
3785     + err_unlist_udc:
3786     list_del(&udc->list);
3787     mutex_unlock(&udc_lock);
3788    
3789     -err3:
3790     - put_device(&udc->dev);
3791     device_del(&gadget->dev);
3792    
3793     -err2:
3794     - kfree(udc);
3795     + err_put_udc:
3796     + put_device(&udc->dev);
3797    
3798     -err1:
3799     + err_put_gadget:
3800     put_device(&gadget->dev);
3801     return ret;
3802     }
3803     diff --git a/drivers/usb/misc/usb3503.c b/drivers/usb/misc/usb3503.c
3804     index 8e7737d7ac0a..03be5d574f23 100644
3805     --- a/drivers/usb/misc/usb3503.c
3806     +++ b/drivers/usb/misc/usb3503.c
3807     @@ -292,6 +292,8 @@ static int usb3503_probe(struct usb3503 *hub)
3808     if (gpio_is_valid(hub->gpio_reset)) {
3809     err = devm_gpio_request_one(dev, hub->gpio_reset,
3810     GPIOF_OUT_INIT_LOW, "usb3503 reset");
3811     + /* Datasheet defines a hardware reset to be at least 100us */
3812     + usleep_range(100, 10000);
3813     if (err) {
3814     dev_err(dev,
3815     "unable to request GPIO %d as reset pin (%d)\n",
3816     diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
3817     index f6ae753ab99b..f932f40302df 100644
3818     --- a/drivers/usb/mon/mon_bin.c
3819     +++ b/drivers/usb/mon/mon_bin.c
3820     @@ -1004,7 +1004,9 @@ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg
3821     break;
3822    
3823     case MON_IOCQ_RING_SIZE:
3824     + mutex_lock(&rp->fetch_lock);
3825     ret = rp->b_size;
3826     + mutex_unlock(&rp->fetch_lock);
3827     break;
3828    
3829     case MON_IOCT_RING_SIZE:
3830     @@ -1231,12 +1233,16 @@ static int mon_bin_vma_fault(struct vm_fault *vmf)
3831     unsigned long offset, chunk_idx;
3832     struct page *pageptr;
3833    
3834     + mutex_lock(&rp->fetch_lock);
3835     offset = vmf->pgoff << PAGE_SHIFT;
3836     - if (offset >= rp->b_size)
3837     + if (offset >= rp->b_size) {
3838     + mutex_unlock(&rp->fetch_lock);
3839     return VM_FAULT_SIGBUS;
3840     + }
3841     chunk_idx = offset / CHUNK_SIZE;
3842     pageptr = rp->b_vec[chunk_idx].pg;
3843     get_page(pageptr);
3844     + mutex_unlock(&rp->fetch_lock);
3845     vmf->page = pageptr;
3846     return 0;
3847     }
3848     diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
3849     index 412f812522ee..aed182d24d23 100644
3850     --- a/drivers/usb/serial/cp210x.c
3851     +++ b/drivers/usb/serial/cp210x.c
3852     @@ -127,6 +127,7 @@ static const struct usb_device_id id_table[] = {
3853     { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */
3854     { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
3855     { USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */
3856     + { USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */
3857     { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */
3858     { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */
3859     { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
3860     @@ -177,6 +178,7 @@ static const struct usb_device_id id_table[] = {
3861     { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
3862     { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
3863     { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */
3864     + { USB_DEVICE(0x18EF, 0xE030) }, /* ELV ALC 8xxx Battery Charger */
3865     { USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */
3866     { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */
3867     { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */
3868     diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
3869     index 9f356f7cf7d5..719ec68ae309 100644
3870     --- a/drivers/usb/storage/unusual_uas.h
3871     +++ b/drivers/usb/storage/unusual_uas.h
3872     @@ -156,6 +156,13 @@ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999,
3873     USB_SC_DEVICE, USB_PR_DEVICE, NULL,
3874     US_FL_NO_ATA_1X),
3875    
3876     +/* Reported-by: Icenowy Zheng <icenowy@aosc.io> */
3877     +UNUSUAL_DEV(0x2537, 0x1068, 0x0000, 0x9999,
3878     + "Norelsys",
3879     + "NS1068X",
3880     + USB_SC_DEVICE, USB_PR_DEVICE, NULL,
3881     + US_FL_IGNORE_UAS),
3882     +
3883     /* Reported-by: Takeo Nakayama <javhera@gmx.com> */
3884     UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999,
3885     "JMicron",
3886     diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
3887     index 17b599b923f3..7f0d22131121 100644
3888     --- a/drivers/usb/usbip/usbip_common.c
3889     +++ b/drivers/usb/usbip/usbip_common.c
3890     @@ -105,7 +105,7 @@ static void usbip_dump_usb_device(struct usb_device *udev)
3891     dev_dbg(dev, " devnum(%d) devpath(%s) usb speed(%s)",
3892     udev->devnum, udev->devpath, usb_speed_string(udev->speed));
3893    
3894     - pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport);
3895     + pr_debug("tt hub ttport %d\n", udev->ttport);
3896    
3897     dev_dbg(dev, " ");
3898     for (i = 0; i < 16; i++)
3899     @@ -138,12 +138,8 @@ static void usbip_dump_usb_device(struct usb_device *udev)
3900     }
3901     pr_debug("\n");
3902    
3903     - dev_dbg(dev, "parent %p, bus %p\n", udev->parent, udev->bus);
3904     -
3905     - dev_dbg(dev,
3906     - "descriptor %p, config %p, actconfig %p, rawdescriptors %p\n",
3907     - &udev->descriptor, udev->config,
3908     - udev->actconfig, udev->rawdescriptors);
3909     + dev_dbg(dev, "parent %s, bus %s\n", dev_name(&udev->parent->dev),
3910     + udev->bus->bus_name);
3911    
3912     dev_dbg(dev, "have_langid %d, string_langid %d\n",
3913     udev->have_langid, udev->string_langid);
3914     @@ -251,9 +247,6 @@ void usbip_dump_urb(struct urb *urb)
3915    
3916     dev = &urb->dev->dev;
3917    
3918     - dev_dbg(dev, " urb :%p\n", urb);
3919     - dev_dbg(dev, " dev :%p\n", urb->dev);
3920     -
3921     usbip_dump_usb_device(urb->dev);
3922    
3923     dev_dbg(dev, " pipe :%08x ", urb->pipe);
3924     @@ -262,11 +255,9 @@ void usbip_dump_urb(struct urb *urb)
3925    
3926     dev_dbg(dev, " status :%d\n", urb->status);
3927     dev_dbg(dev, " transfer_flags :%08X\n", urb->transfer_flags);
3928     - dev_dbg(dev, " transfer_buffer :%p\n", urb->transfer_buffer);
3929     dev_dbg(dev, " transfer_buffer_length:%d\n",
3930     urb->transfer_buffer_length);
3931     dev_dbg(dev, " actual_length :%d\n", urb->actual_length);
3932     - dev_dbg(dev, " setup_packet :%p\n", urb->setup_packet);
3933    
3934     if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL)
3935     usbip_dump_usb_ctrlrequest(
3936     @@ -276,8 +267,6 @@ void usbip_dump_urb(struct urb *urb)
3937     dev_dbg(dev, " number_of_packets :%d\n", urb->number_of_packets);
3938     dev_dbg(dev, " interval :%d\n", urb->interval);
3939     dev_dbg(dev, " error_count :%d\n", urb->error_count);
3940     - dev_dbg(dev, " context :%p\n", urb->context);
3941     - dev_dbg(dev, " complete :%p\n", urb->complete);
3942     }
3943     EXPORT_SYMBOL_GPL(usbip_dump_urb);
3944    
3945     diff --git a/drivers/usb/usbip/vudc_rx.c b/drivers/usb/usbip/vudc_rx.c
3946     index e429b59f6f8a..d020e72b3122 100644
3947     --- a/drivers/usb/usbip/vudc_rx.c
3948     +++ b/drivers/usb/usbip/vudc_rx.c
3949     @@ -132,6 +132,25 @@ static int v_recv_cmd_submit(struct vudc *udc,
3950     urb_p->new = 1;
3951     urb_p->seqnum = pdu->base.seqnum;
3952    
3953     + if (urb_p->ep->type == USB_ENDPOINT_XFER_ISOC) {
3954     + /* validate packet size and number of packets */
3955     + unsigned int maxp, packets, bytes;
3956     +
3957     + maxp = usb_endpoint_maxp(urb_p->ep->desc);
3958     + maxp *= usb_endpoint_maxp_mult(urb_p->ep->desc);
3959     + bytes = pdu->u.cmd_submit.transfer_buffer_length;
3960     + packets = DIV_ROUND_UP(bytes, maxp);
3961     +
3962     + if (pdu->u.cmd_submit.number_of_packets < 0 ||
3963     + pdu->u.cmd_submit.number_of_packets > packets) {
3964     + dev_err(&udc->gadget.dev,
3965     + "CMD_SUBMIT: isoc invalid num packets %d\n",
3966     + pdu->u.cmd_submit.number_of_packets);
3967     + ret = -EMSGSIZE;
3968     + goto free_urbp;
3969     + }
3970     + }
3971     +
3972     ret = alloc_urb_from_cmd(&urb_p->urb, pdu, urb_p->ep->type);
3973     if (ret) {
3974     usbip_event_add(&udc->ud, VUDC_EVENT_ERROR_MALLOC);
3975     diff --git a/drivers/usb/usbip/vudc_tx.c b/drivers/usb/usbip/vudc_tx.c
3976     index 234661782fa0..3ab4c86486a7 100644
3977     --- a/drivers/usb/usbip/vudc_tx.c
3978     +++ b/drivers/usb/usbip/vudc_tx.c
3979     @@ -97,6 +97,13 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p)
3980     memset(&pdu_header, 0, sizeof(pdu_header));
3981     memset(&msg, 0, sizeof(msg));
3982    
3983     + if (urb->actual_length > 0 && !urb->transfer_buffer) {
3984     + dev_err(&udc->gadget.dev,
3985     + "urb: actual_length %d transfer_buffer null\n",
3986     + urb->actual_length);
3987     + return -1;
3988     + }
3989     +
3990     if (urb_p->type == USB_ENDPOINT_XFER_ISOC)
3991     iovnum = 2 + urb->number_of_packets;
3992     else
3993     @@ -112,8 +119,8 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p)
3994    
3995     /* 1. setup usbip_header */
3996     setup_ret_submit_pdu(&pdu_header, urb_p);
3997     - usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n",
3998     - pdu_header.base.seqnum, urb);
3999     + usbip_dbg_stub_tx("setup txdata seqnum: %d\n",
4000     + pdu_header.base.seqnum);
4001     usbip_header_correct_endian(&pdu_header, 1);
4002    
4003     iov[iovnum].iov_base = &pdu_header;
4004     diff --git a/include/linux/bpf.h b/include/linux/bpf.h
4005     index f1af7d63d678..0bcf803f20de 100644
4006     --- a/include/linux/bpf.h
4007     +++ b/include/linux/bpf.h
4008     @@ -51,6 +51,7 @@ struct bpf_map {
4009     u32 pages;
4010     u32 id;
4011     int numa_node;
4012     + bool unpriv_array;
4013     struct user_struct *user;
4014     const struct bpf_map_ops *ops;
4015     struct work_struct work;
4016     @@ -195,6 +196,7 @@ struct bpf_prog_aux {
4017     struct bpf_array {
4018     struct bpf_map map;
4019     u32 elem_size;
4020     + u32 index_mask;
4021     /* 'ownership' of prog_array is claimed by the first program that
4022     * is going to use this map or by the first program which FD is stored
4023     * in the map to make sure that all callers and callees have the same
4024     diff --git a/include/linux/cpu.h b/include/linux/cpu.h
4025     index 938ea8ae0ba4..c816e6f2730c 100644
4026     --- a/include/linux/cpu.h
4027     +++ b/include/linux/cpu.h
4028     @@ -47,6 +47,13 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr);
4029     extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
4030     extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
4031    
4032     +extern ssize_t cpu_show_meltdown(struct device *dev,
4033     + struct device_attribute *attr, char *buf);
4034     +extern ssize_t cpu_show_spectre_v1(struct device *dev,
4035     + struct device_attribute *attr, char *buf);
4036     +extern ssize_t cpu_show_spectre_v2(struct device *dev,
4037     + struct device_attribute *attr, char *buf);
4038     +
4039     extern __printf(4, 5)
4040     struct device *cpu_device_create(struct device *parent, void *drvdata,
4041     const struct attribute_group **groups,
4042     diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
4043     index 06097ef30449..b511f6d24b42 100644
4044     --- a/include/linux/crash_core.h
4045     +++ b/include/linux/crash_core.h
4046     @@ -42,6 +42,8 @@ phys_addr_t paddr_vmcoreinfo_note(void);
4047     vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
4048     #define VMCOREINFO_SYMBOL(name) \
4049     vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
4050     +#define VMCOREINFO_SYMBOL_ARRAY(name) \
4051     + vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name)
4052     #define VMCOREINFO_SIZE(name) \
4053     vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
4054     (unsigned long)sizeof(name))
4055     diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h
4056     index ff3642d267f7..94081e9a5010 100644
4057     --- a/include/linux/sh_eth.h
4058     +++ b/include/linux/sh_eth.h
4059     @@ -17,7 +17,6 @@ struct sh_eth_plat_data {
4060     unsigned char mac_addr[ETH_ALEN];
4061     unsigned no_ether_link:1;
4062     unsigned ether_link_active_low:1;
4063     - unsigned needs_init:1;
4064     };
4065    
4066     #endif
4067     diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
4068     index 0477945de1a3..8e1e1dc490fd 100644
4069     --- a/include/net/sctp/structs.h
4070     +++ b/include/net/sctp/structs.h
4071     @@ -955,7 +955,7 @@ void sctp_transport_burst_limited(struct sctp_transport *);
4072     void sctp_transport_burst_reset(struct sctp_transport *);
4073     unsigned long sctp_transport_timeout(struct sctp_transport *);
4074     void sctp_transport_reset(struct sctp_transport *t);
4075     -void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
4076     +bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
4077     void sctp_transport_immediate_rtx(struct sctp_transport *);
4078     void sctp_transport_dst_release(struct sctp_transport *t);
4079     void sctp_transport_dst_confirm(struct sctp_transport *t);
4080     diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
4081     index e4b0b8e09932..2c735a3e6613 100644
4082     --- a/include/trace/events/kvm.h
4083     +++ b/include/trace/events/kvm.h
4084     @@ -211,7 +211,7 @@ TRACE_EVENT(kvm_ack_irq,
4085     { KVM_TRACE_MMIO_WRITE, "write" }
4086    
4087     TRACE_EVENT(kvm_mmio,
4088     - TP_PROTO(int type, int len, u64 gpa, u64 val),
4089     + TP_PROTO(int type, int len, u64 gpa, void *val),
4090     TP_ARGS(type, len, gpa, val),
4091    
4092     TP_STRUCT__entry(
4093     @@ -225,7 +225,10 @@ TRACE_EVENT(kvm_mmio,
4094     __entry->type = type;
4095     __entry->len = len;
4096     __entry->gpa = gpa;
4097     - __entry->val = val;
4098     + __entry->val = 0;
4099     + if (val)
4100     + memcpy(&__entry->val, val,
4101     + min_t(u32, sizeof(__entry->val), len));
4102     ),
4103    
4104     TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
4105     diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
4106     index e2636737b69b..a4ae1ca44a57 100644
4107     --- a/kernel/bpf/arraymap.c
4108     +++ b/kernel/bpf/arraymap.c
4109     @@ -50,9 +50,10 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
4110     {
4111     bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
4112     int numa_node = bpf_map_attr_numa_node(attr);
4113     + u32 elem_size, index_mask, max_entries;
4114     + bool unpriv = !capable(CAP_SYS_ADMIN);
4115     struct bpf_array *array;
4116     - u64 array_size;
4117     - u32 elem_size;
4118     + u64 array_size, mask64;
4119    
4120     /* check sanity of attributes */
4121     if (attr->max_entries == 0 || attr->key_size != 4 ||
4122     @@ -68,11 +69,32 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
4123    
4124     elem_size = round_up(attr->value_size, 8);
4125    
4126     + max_entries = attr->max_entries;
4127     +
4128     + /* On 32 bit archs roundup_pow_of_two() with max_entries that has
4129     + * upper most bit set in u32 space is undefined behavior due to
4130     + * resulting 1U << 32, so do it manually here in u64 space.
4131     + */
4132     + mask64 = fls_long(max_entries - 1);
4133     + mask64 = 1ULL << mask64;
4134     + mask64 -= 1;
4135     +
4136     + index_mask = mask64;
4137     + if (unpriv) {
4138     + /* round up array size to nearest power of 2,
4139     + * since cpu will speculate within index_mask limits
4140     + */
4141     + max_entries = index_mask + 1;
4142     + /* Check for overflows. */
4143     + if (max_entries < attr->max_entries)
4144     + return ERR_PTR(-E2BIG);
4145     + }
4146     +
4147     array_size = sizeof(*array);
4148     if (percpu)
4149     - array_size += (u64) attr->max_entries * sizeof(void *);
4150     + array_size += (u64) max_entries * sizeof(void *);
4151     else
4152     - array_size += (u64) attr->max_entries * elem_size;
4153     + array_size += (u64) max_entries * elem_size;
4154    
4155     /* make sure there is no u32 overflow later in round_up() */
4156     if (array_size >= U32_MAX - PAGE_SIZE)
4157     @@ -82,6 +104,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
4158     array = bpf_map_area_alloc(array_size, numa_node);
4159     if (!array)
4160     return ERR_PTR(-ENOMEM);
4161     + array->index_mask = index_mask;
4162     + array->map.unpriv_array = unpriv;
4163    
4164     /* copy mandatory map attributes */
4165     array->map.map_type = attr->map_type;
4166     @@ -117,12 +141,13 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
4167     if (unlikely(index >= array->map.max_entries))
4168     return NULL;
4169    
4170     - return array->value + array->elem_size * index;
4171     + return array->value + array->elem_size * (index & array->index_mask);
4172     }
4173    
4174     /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
4175     static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
4176     {
4177     + struct bpf_array *array = container_of(map, struct bpf_array, map);
4178     struct bpf_insn *insn = insn_buf;
4179     u32 elem_size = round_up(map->value_size, 8);
4180     const int ret = BPF_REG_0;
4181     @@ -131,7 +156,12 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
4182    
4183     *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
4184     *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
4185     - *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
4186     + if (map->unpriv_array) {
4187     + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
4188     + *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
4189     + } else {
4190     + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
4191     + }
4192    
4193     if (is_power_of_2(elem_size)) {
4194     *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
4195     @@ -153,7 +183,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
4196     if (unlikely(index >= array->map.max_entries))
4197     return NULL;
4198    
4199     - return this_cpu_ptr(array->pptrs[index]);
4200     + return this_cpu_ptr(array->pptrs[index & array->index_mask]);
4201     }
4202    
4203     int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
4204     @@ -173,7 +203,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
4205     */
4206     size = round_up(map->value_size, 8);
4207     rcu_read_lock();
4208     - pptr = array->pptrs[index];
4209     + pptr = array->pptrs[index & array->index_mask];
4210     for_each_possible_cpu(cpu) {
4211     bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
4212     off += size;
4213     @@ -221,10 +251,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
4214     return -EEXIST;
4215    
4216     if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
4217     - memcpy(this_cpu_ptr(array->pptrs[index]),
4218     + memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
4219     value, map->value_size);
4220     else
4221     - memcpy(array->value + array->elem_size * index,
4222     + memcpy(array->value +
4223     + array->elem_size * (index & array->index_mask),
4224     value, map->value_size);
4225     return 0;
4226     }
4227     @@ -258,7 +289,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
4228     */
4229     size = round_up(map->value_size, 8);
4230     rcu_read_lock();
4231     - pptr = array->pptrs[index];
4232     + pptr = array->pptrs[index & array->index_mask];
4233     for_each_possible_cpu(cpu) {
4234     bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
4235     off += size;
4236     @@ -609,6 +640,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
4237     static u32 array_of_map_gen_lookup(struct bpf_map *map,
4238     struct bpf_insn *insn_buf)
4239     {
4240     + struct bpf_array *array = container_of(map, struct bpf_array, map);
4241     u32 elem_size = round_up(map->value_size, 8);
4242     struct bpf_insn *insn = insn_buf;
4243     const int ret = BPF_REG_0;
4244     @@ -617,7 +649,12 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map,
4245    
4246     *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
4247     *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
4248     - *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
4249     + if (map->unpriv_array) {
4250     + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
4251     + *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
4252     + } else {
4253     + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
4254     + }
4255     if (is_power_of_2(elem_size))
4256     *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
4257     else
4258     diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
4259     index c5ff809e86d0..75a5c3312f46 100644
4260     --- a/kernel/bpf/verifier.c
4261     +++ b/kernel/bpf/verifier.c
4262     @@ -1701,6 +1701,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
4263     err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
4264     if (err)
4265     return err;
4266     + if (func_id == BPF_FUNC_tail_call) {
4267     + if (meta.map_ptr == NULL) {
4268     + verbose("verifier bug\n");
4269     + return -EINVAL;
4270     + }
4271     + env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
4272     + }
4273     err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
4274     if (err)
4275     return err;
4276     @@ -2486,6 +2493,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
4277     return -EINVAL;
4278     }
4279    
4280     + if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) {
4281     + verbose("BPF_ARSH not supported for 32 bit ALU\n");
4282     + return -EINVAL;
4283     + }
4284     +
4285     if ((opcode == BPF_LSH || opcode == BPF_RSH ||
4286     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
4287     int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
4288     @@ -4315,6 +4327,35 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
4289     */
4290     insn->imm = 0;
4291     insn->code = BPF_JMP | BPF_TAIL_CALL;
4292     +
4293     + /* instead of changing every JIT dealing with tail_call
4294     + * emit two extra insns:
4295     + * if (index >= max_entries) goto out;
4296     + * index &= array->index_mask;
4297     + * to avoid out-of-bounds cpu speculation
4298     + */
4299     + map_ptr = env->insn_aux_data[i + delta].map_ptr;
4300     + if (map_ptr == BPF_MAP_PTR_POISON) {
4301     + verbose("tail_call obusing map_ptr\n");
4302     + return -EINVAL;
4303     + }
4304     + if (!map_ptr->unpriv_array)
4305     + continue;
4306     + insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
4307     + map_ptr->max_entries, 2);
4308     + insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
4309     + container_of(map_ptr,
4310     + struct bpf_array,
4311     + map)->index_mask);
4312     + insn_buf[2] = *insn;
4313     + cnt = 3;
4314     + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
4315     + if (!new_prog)
4316     + return -ENOMEM;
4317     +
4318     + delta += cnt - 1;
4319     + env->prog = prog = new_prog;
4320     + insn = new_prog->insnsi + i + delta;
4321     continue;
4322     }
4323    
4324     diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
4325     index 44857278eb8a..030e4286f14c 100644
4326     --- a/kernel/cgroup/cgroup.c
4327     +++ b/kernel/cgroup/cgroup.c
4328     @@ -4059,26 +4059,24 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
4329    
4330     static void css_task_iter_advance(struct css_task_iter *it)
4331     {
4332     - struct list_head *l = it->task_pos;
4333     + struct list_head *next;
4334    
4335     lockdep_assert_held(&css_set_lock);
4336     - WARN_ON_ONCE(!l);
4337     -
4338     repeat:
4339     /*
4340     * Advance iterator to find next entry. cset->tasks is consumed
4341     * first and then ->mg_tasks. After ->mg_tasks, we move onto the
4342     * next cset.
4343     */
4344     - l = l->next;
4345     + next = it->task_pos->next;
4346    
4347     - if (l == it->tasks_head)
4348     - l = it->mg_tasks_head->next;
4349     + if (next == it->tasks_head)
4350     + next = it->mg_tasks_head->next;
4351    
4352     - if (l == it->mg_tasks_head)
4353     + if (next == it->mg_tasks_head)
4354     css_task_iter_advance_css_set(it);
4355     else
4356     - it->task_pos = l;
4357     + it->task_pos = next;
4358    
4359     /* if PROCS, skip over tasks which aren't group leaders */
4360     if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos &&
4361     diff --git a/kernel/crash_core.c b/kernel/crash_core.c
4362     index 6db80fc0810b..2d90996dbe77 100644
4363     --- a/kernel/crash_core.c
4364     +++ b/kernel/crash_core.c
4365     @@ -409,7 +409,7 @@ static int __init crash_save_vmcoreinfo_init(void)
4366     VMCOREINFO_SYMBOL(contig_page_data);
4367     #endif
4368     #ifdef CONFIG_SPARSEMEM
4369     - VMCOREINFO_SYMBOL(mem_section);
4370     + VMCOREINFO_SYMBOL_ARRAY(mem_section);
4371     VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
4372     VMCOREINFO_STRUCT_SIZE(mem_section);
4373     VMCOREINFO_OFFSET(mem_section, section_mem_map);
4374     diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
4375     index dd7908743dab..9bcbacba82a8 100644
4376     --- a/kernel/sched/membarrier.c
4377     +++ b/kernel/sched/membarrier.c
4378     @@ -89,7 +89,9 @@ static int membarrier_private_expedited(void)
4379     rcu_read_unlock();
4380     }
4381     if (!fallback) {
4382     + preempt_disable();
4383     smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
4384     + preempt_enable();
4385     free_cpumask_var(tmpmask);
4386     }
4387     cpus_read_unlock();
4388     diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
4389     index 4a72ee4e2ae9..cf2e70003a53 100644
4390     --- a/net/8021q/vlan.c
4391     +++ b/net/8021q/vlan.c
4392     @@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
4393     vlan_gvrp_uninit_applicant(real_dev);
4394     }
4395    
4396     - /* Take it out of our own structures, but be sure to interlock with
4397     - * HW accelerating devices or SW vlan input packet processing if
4398     - * VLAN is not 0 (leave it there for 802.1p).
4399     - */
4400     - if (vlan_id)
4401     - vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
4402     + vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
4403    
4404     /* Get rid of the vlan's reference to real_dev */
4405     dev_put(real_dev);
4406     diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
4407     index 43ba91c440bc..fc6615d59165 100644
4408     --- a/net/bluetooth/l2cap_core.c
4409     +++ b/net/bluetooth/l2cap_core.c
4410     @@ -3363,9 +3363,10 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data
4411     break;
4412    
4413     case L2CAP_CONF_EFS:
4414     - remote_efs = 1;
4415     - if (olen == sizeof(efs))
4416     + if (olen == sizeof(efs)) {
4417     + remote_efs = 1;
4418     memcpy(&efs, (void *) val, olen);
4419     + }
4420     break;
4421    
4422     case L2CAP_CONF_EWS:
4423     @@ -3584,16 +3585,17 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
4424     break;
4425    
4426     case L2CAP_CONF_EFS:
4427     - if (olen == sizeof(efs))
4428     + if (olen == sizeof(efs)) {
4429     memcpy(&efs, (void *)val, olen);
4430    
4431     - if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
4432     - efs.stype != L2CAP_SERV_NOTRAFIC &&
4433     - efs.stype != chan->local_stype)
4434     - return -ECONNREFUSED;
4435     + if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
4436     + efs.stype != L2CAP_SERV_NOTRAFIC &&
4437     + efs.stype != chan->local_stype)
4438     + return -ECONNREFUSED;
4439    
4440     - l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
4441     - (unsigned long) &efs, endptr - ptr);
4442     + l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
4443     + (unsigned long) &efs, endptr - ptr);
4444     + }
4445     break;
4446    
4447     case L2CAP_CONF_FCS:
4448     diff --git a/net/core/ethtool.c b/net/core/ethtool.c
4449     index 9a9a3d77e327..d374a904f1b1 100644
4450     --- a/net/core/ethtool.c
4451     +++ b/net/core/ethtool.c
4452     @@ -754,15 +754,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev,
4453     return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
4454     }
4455    
4456     -static void
4457     -warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
4458     -{
4459     - char name[sizeof(current->comm)];
4460     -
4461     - pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n",
4462     - get_task_comm(name, current), details);
4463     -}
4464     -
4465     /* Query device for its ethtool_cmd settings.
4466     *
4467     * Backward compatibility note: for compatibility with legacy ethtool,
4468     @@ -789,10 +780,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
4469     &link_ksettings);
4470     if (err < 0)
4471     return err;
4472     - if (!convert_link_ksettings_to_legacy_settings(&cmd,
4473     - &link_ksettings))
4474     - warn_incomplete_ethtool_legacy_settings_conversion(
4475     - "link modes are only partially reported");
4476     + convert_link_ksettings_to_legacy_settings(&cmd,
4477     + &link_ksettings);
4478    
4479     /* send a sensible cmd tag back to user */
4480     cmd.cmd = ETHTOOL_GSET;
4481     diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
4482     index 217f4e3b82f6..146b50e30659 100644
4483     --- a/net/core/sock_diag.c
4484     +++ b/net/core/sock_diag.c
4485     @@ -288,7 +288,7 @@ static int sock_diag_bind(struct net *net, int group)
4486     case SKNLGRP_INET6_UDP_DESTROY:
4487     if (!sock_diag_handlers[AF_INET6])
4488     request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
4489     - NETLINK_SOCK_DIAG, AF_INET);
4490     + NETLINK_SOCK_DIAG, AF_INET6);
4491     break;
4492     }
4493     return 0;
4494     diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
4495     index 95516138e861..d6189c2a35e4 100644
4496     --- a/net/ipv6/exthdrs.c
4497     +++ b/net/ipv6/exthdrs.c
4498     @@ -884,6 +884,15 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
4499     sr_phdr->segments[0] = **addr_p;
4500     *addr_p = &sr_ihdr->segments[sr_ihdr->segments_left];
4501    
4502     + if (sr_ihdr->hdrlen > hops * 2) {
4503     + int tlvs_offset, tlvs_length;
4504     +
4505     + tlvs_offset = (1 + hops * 2) << 3;
4506     + tlvs_length = (sr_ihdr->hdrlen - hops * 2) << 3;
4507     + memcpy((char *)sr_phdr + tlvs_offset,
4508     + (char *)sr_ihdr + tlvs_offset, tlvs_length);
4509     + }
4510     +
4511     #ifdef CONFIG_IPV6_SEG6_HMAC
4512     if (sr_has_hmac(sr_phdr)) {
4513     struct net *net = NULL;
4514     diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
4515     index f7dd51c42314..688ba5f7516b 100644
4516     --- a/net/ipv6/ip6_output.c
4517     +++ b/net/ipv6/ip6_output.c
4518     @@ -1735,9 +1735,10 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
4519     cork.base.opt = NULL;
4520     v6_cork.opt = NULL;
4521     err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
4522     - if (err)
4523     + if (err) {
4524     + ip6_cork_release(&cork, &v6_cork);
4525     return ERR_PTR(err);
4526     -
4527     + }
4528     if (ipc6->dontfrag < 0)
4529     ipc6->dontfrag = inet6_sk(sk)->dontfrag;
4530    
4531     diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
4532     index ef958d50746b..3f46121ad139 100644
4533     --- a/net/ipv6/ip6_tunnel.c
4534     +++ b/net/ipv6/ip6_tunnel.c
4535     @@ -1081,10 +1081,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
4536     memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
4537     neigh_release(neigh);
4538     }
4539     - } else if (!(t->parms.flags &
4540     - (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
4541     - /* enable the cache only only if the routing decision does
4542     - * not depend on the current inner header value
4543     + } else if (t->parms.proto != 0 && !(t->parms.flags &
4544     + (IP6_TNL_F_USE_ORIG_TCLASS |
4545     + IP6_TNL_F_USE_ORIG_FWMARK))) {
4546     + /* enable the cache only if neither the outer protocol nor the
4547     + * routing decision depends on the current inner header value
4548     */
4549     use_cache = true;
4550     }
4551     diff --git a/net/rds/rdma.c b/net/rds/rdma.c
4552     index bc2f1e0977d6..634cfcb7bba6 100644
4553     --- a/net/rds/rdma.c
4554     +++ b/net/rds/rdma.c
4555     @@ -525,6 +525,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args)
4556    
4557     local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
4558    
4559     + if (args->nr_local == 0)
4560     + return -EINVAL;
4561     +
4562     /* figure out the number of pages in the vector */
4563     for (i = 0; i < args->nr_local; i++) {
4564     if (copy_from_user(&vec, &local_vec[i],
4565     @@ -874,6 +877,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
4566     err:
4567     if (page)
4568     put_page(page);
4569     + rm->atomic.op_active = 0;
4570     kfree(rm->atomic.op_notifier);
4571    
4572     return ret;
4573     diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
4574     index e29a48ef7fc3..a0ac42b3ed06 100644
4575     --- a/net/sched/act_gact.c
4576     +++ b/net/sched/act_gact.c
4577     @@ -159,7 +159,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
4578     if (action == TC_ACT_SHOT)
4579     this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
4580    
4581     - tm->lastuse = lastuse;
4582     + tm->lastuse = max_t(u64, tm->lastuse, lastuse);
4583     }
4584    
4585     static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
4586     diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
4587     index 416627c66f08..6ce8de373f83 100644
4588     --- a/net/sched/act_mirred.c
4589     +++ b/net/sched/act_mirred.c
4590     @@ -238,7 +238,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
4591     struct tcf_t *tm = &m->tcf_tm;
4592    
4593     _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
4594     - tm->lastuse = lastuse;
4595     + tm->lastuse = max_t(u64, tm->lastuse, lastuse);
4596     }
4597    
4598     static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
4599     diff --git a/net/sctp/input.c b/net/sctp/input.c
4600     index 621b5ca3fd1c..141c9c466ec1 100644
4601     --- a/net/sctp/input.c
4602     +++ b/net/sctp/input.c
4603     @@ -399,20 +399,24 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
4604     return;
4605     }
4606    
4607     - if (t->param_flags & SPP_PMTUD_ENABLE) {
4608     - /* Update transports view of the MTU */
4609     - sctp_transport_update_pmtu(t, pmtu);
4610     -
4611     - /* Update association pmtu. */
4612     - sctp_assoc_sync_pmtu(asoc);
4613     - }
4614     + if (!(t->param_flags & SPP_PMTUD_ENABLE))
4615     + /* We can't allow retransmitting in such case, as the
4616     + * retransmission would be sized just as before, and thus we
4617     + * would get another icmp, and retransmit again.
4618     + */
4619     + return;
4620    
4621     - /* Retransmit with the new pmtu setting.
4622     - * Normally, if PMTU discovery is disabled, an ICMP Fragmentation
4623     - * Needed will never be sent, but if a message was sent before
4624     - * PMTU discovery was disabled that was larger than the PMTU, it
4625     - * would not be fragmented, so it must be re-transmitted fragmented.
4626     + /* Update transports view of the MTU. Return if no update was needed.
4627     + * If an update wasn't needed/possible, it also doesn't make sense to
4628     + * try to retransmit now.
4629     */
4630     + if (!sctp_transport_update_pmtu(t, pmtu))
4631     + return;
4632     +
4633     + /* Update association pmtu. */
4634     + sctp_assoc_sync_pmtu(asoc);
4635     +
4636     + /* Retransmit with the new pmtu setting. */
4637     sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD);
4638     }
4639    
4640     diff --git a/net/sctp/transport.c b/net/sctp/transport.c
4641     index 2d9bd3776bc8..7ef77fd7b52a 100644
4642     --- a/net/sctp/transport.c
4643     +++ b/net/sctp/transport.c
4644     @@ -251,28 +251,37 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
4645     transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
4646     }
4647    
4648     -void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
4649     +bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
4650     {
4651     struct dst_entry *dst = sctp_transport_dst_check(t);
4652     + bool change = true;
4653    
4654     if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
4655     - pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
4656     - __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
4657     - /* Use default minimum segment size and disable
4658     - * pmtu discovery on this transport.
4659     - */
4660     - t->pathmtu = SCTP_DEFAULT_MINSEGMENT;
4661     - } else {
4662     - t->pathmtu = pmtu;
4663     + pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n",
4664     + __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
4665     + /* Use default minimum segment instead */
4666     + pmtu = SCTP_DEFAULT_MINSEGMENT;
4667     }
4668     + pmtu = SCTP_TRUNC4(pmtu);
4669    
4670     if (dst) {
4671     dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu);
4672     dst = sctp_transport_dst_check(t);
4673     }
4674    
4675     - if (!dst)
4676     + if (!dst) {
4677     t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk);
4678     + dst = t->dst;
4679     + }
4680     +
4681     + if (dst) {
4682     + /* Re-fetch, as under layers may have a higher minimum size */
4683     + pmtu = SCTP_TRUNC4(dst_mtu(dst));
4684     + change = t->pathmtu != pmtu;
4685     + }
4686     + t->pathmtu = pmtu;
4687     +
4688     + return change;
4689     }
4690    
4691     /* Caches the dst entry and source address for a transport's destination
4692     diff --git a/security/Kconfig b/security/Kconfig
4693     index 6614b9312b45..b5c2b5d0c6c0 100644
4694     --- a/security/Kconfig
4695     +++ b/security/Kconfig
4696     @@ -63,7 +63,7 @@ config PAGE_TABLE_ISOLATION
4697     ensuring that the majority of kernel addresses are not mapped
4698     into userspace.
4699    
4700     - See Documentation/x86/pagetable-isolation.txt for more details.
4701     + See Documentation/x86/pti.txt for more details.
4702    
4703     config SECURITY_INFINIBAND
4704     bool "Infiniband Security Hooks"
4705     diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h
4706     index 2b27bb79aec4..d7b7e7115160 100644
4707     --- a/security/apparmor/include/perms.h
4708     +++ b/security/apparmor/include/perms.h
4709     @@ -133,6 +133,9 @@ extern struct aa_perms allperms;
4710     #define xcheck_labels_profiles(L1, L2, FN, args...) \
4711     xcheck_ns_labels((L1), (L2), xcheck_ns_profile_label, (FN), args)
4712    
4713     +#define xcheck_labels(L1, L2, P, FN1, FN2) \
4714     + xcheck(fn_for_each((L1), (P), (FN1)), fn_for_each((L2), (P), (FN2)))
4715     +
4716    
4717     void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask);
4718     void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask);
4719     diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c
4720     index 7ca0032e7ba9..b40678f3c1d5 100644
4721     --- a/security/apparmor/ipc.c
4722     +++ b/security/apparmor/ipc.c
4723     @@ -64,40 +64,48 @@ static void audit_ptrace_cb(struct audit_buffer *ab, void *va)
4724     FLAGS_NONE, GFP_ATOMIC);
4725     }
4726    
4727     +/* assumes check for PROFILE_MEDIATES is already done */
4728     /* TODO: conditionals */
4729     static int profile_ptrace_perm(struct aa_profile *profile,
4730     - struct aa_profile *peer, u32 request,
4731     - struct common_audit_data *sa)
4732     + struct aa_label *peer, u32 request,
4733     + struct common_audit_data *sa)
4734     {
4735     struct aa_perms perms = { };
4736    
4737     - /* need because of peer in cross check */
4738     - if (profile_unconfined(profile) ||
4739     - !PROFILE_MEDIATES(profile, AA_CLASS_PTRACE))
4740     - return 0;
4741     -
4742     - aad(sa)->peer = &peer->label;
4743     - aa_profile_match_label(profile, &peer->label, AA_CLASS_PTRACE, request,
4744     + aad(sa)->peer = peer;
4745     + aa_profile_match_label(profile, peer, AA_CLASS_PTRACE, request,
4746     &perms);
4747     aa_apply_modes_to_perms(profile, &perms);
4748     return aa_check_perms(profile, &perms, request, sa, audit_ptrace_cb);
4749     }
4750    
4751     -static int cross_ptrace_perm(struct aa_profile *tracer,
4752     - struct aa_profile *tracee, u32 request,
4753     - struct common_audit_data *sa)
4754     +static int profile_tracee_perm(struct aa_profile *tracee,
4755     + struct aa_label *tracer, u32 request,
4756     + struct common_audit_data *sa)
4757     {
4758     + if (profile_unconfined(tracee) || unconfined(tracer) ||
4759     + !PROFILE_MEDIATES(tracee, AA_CLASS_PTRACE))
4760     + return 0;
4761     +
4762     + return profile_ptrace_perm(tracee, tracer, request, sa);
4763     +}
4764     +
4765     +static int profile_tracer_perm(struct aa_profile *tracer,
4766     + struct aa_label *tracee, u32 request,
4767     + struct common_audit_data *sa)
4768     +{
4769     + if (profile_unconfined(tracer))
4770     + return 0;
4771     +
4772     if (PROFILE_MEDIATES(tracer, AA_CLASS_PTRACE))
4773     - return xcheck(profile_ptrace_perm(tracer, tracee, request, sa),
4774     - profile_ptrace_perm(tracee, tracer,
4775     - request << PTRACE_PERM_SHIFT,
4776     - sa));
4777     - /* policy uses the old style capability check for ptrace */
4778     - if (profile_unconfined(tracer) || tracer == tracee)
4779     + return profile_ptrace_perm(tracer, tracee, request, sa);
4780     +
4781     + /* profile uses the old style capability check for ptrace */
4782     + if (&tracer->label == tracee)
4783     return 0;
4784    
4785     aad(sa)->label = &tracer->label;
4786     - aad(sa)->peer = &tracee->label;
4787     + aad(sa)->peer = tracee;
4788     aad(sa)->request = 0;
4789     aad(sa)->error = aa_capable(&tracer->label, CAP_SYS_PTRACE, 1);
4790    
4791     @@ -115,10 +123,13 @@ static int cross_ptrace_perm(struct aa_profile *tracer,
4792     int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee,
4793     u32 request)
4794     {
4795     + struct aa_profile *profile;
4796     + u32 xrequest = request << PTRACE_PERM_SHIFT;
4797     DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_PTRACE);
4798    
4799     - return xcheck_labels_profiles(tracer, tracee, cross_ptrace_perm,
4800     - request, &sa);
4801     + return xcheck_labels(tracer, tracee, profile,
4802     + profile_tracer_perm(profile, tracee, request, &sa),
4803     + profile_tracee_perm(profile, tracer, xrequest, &sa));
4804     }
4805    
4806    
4807     diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
4808     index e49f448ee04f..c2db7e905f7d 100644
4809     --- a/sound/core/oss/pcm_oss.c
4810     +++ b/sound/core/oss/pcm_oss.c
4811     @@ -455,7 +455,6 @@ static int snd_pcm_hw_param_near(struct snd_pcm_substream *pcm,
4812     v = snd_pcm_hw_param_last(pcm, params, var, dir);
4813     else
4814     v = snd_pcm_hw_param_first(pcm, params, var, dir);
4815     - snd_BUG_ON(v < 0);
4816     return v;
4817     }
4818    
4819     @@ -1335,8 +1334,11 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha
4820    
4821     if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
4822     return tmp;
4823     - mutex_lock(&runtime->oss.params_lock);
4824     while (bytes > 0) {
4825     + if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
4826     + tmp = -ERESTARTSYS;
4827     + break;
4828     + }
4829     if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
4830     tmp = bytes;
4831     if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes)
4832     @@ -1380,14 +1382,18 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha
4833     xfer += tmp;
4834     if ((substream->f_flags & O_NONBLOCK) != 0 &&
4835     tmp != runtime->oss.period_bytes)
4836     - break;
4837     + tmp = -EAGAIN;
4838     }
4839     - }
4840     - mutex_unlock(&runtime->oss.params_lock);
4841     - return xfer;
4842     -
4843     err:
4844     - mutex_unlock(&runtime->oss.params_lock);
4845     + mutex_unlock(&runtime->oss.params_lock);
4846     + if (tmp < 0)
4847     + break;
4848     + if (signal_pending(current)) {
4849     + tmp = -ERESTARTSYS;
4850     + break;
4851     + }
4852     + tmp = 0;
4853     + }
4854     return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
4855     }
4856    
4857     @@ -1435,8 +1441,11 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use
4858    
4859     if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
4860     return tmp;
4861     - mutex_lock(&runtime->oss.params_lock);
4862     while (bytes > 0) {
4863     + if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
4864     + tmp = -ERESTARTSYS;
4865     + break;
4866     + }
4867     if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
4868     if (runtime->oss.buffer_used == 0) {
4869     tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1);
4870     @@ -1467,12 +1476,16 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use
4871     bytes -= tmp;
4872     xfer += tmp;
4873     }
4874     - }
4875     - mutex_unlock(&runtime->oss.params_lock);
4876     - return xfer;
4877     -
4878     err:
4879     - mutex_unlock(&runtime->oss.params_lock);
4880     + mutex_unlock(&runtime->oss.params_lock);
4881     + if (tmp < 0)
4882     + break;
4883     + if (signal_pending(current)) {
4884     + tmp = -ERESTARTSYS;
4885     + break;
4886     + }
4887     + tmp = 0;
4888     + }
4889     return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
4890     }
4891    
4892     diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c
4893     index cadc93792868..85a56af104bd 100644
4894     --- a/sound/core/oss/pcm_plugin.c
4895     +++ b/sound/core/oss/pcm_plugin.c
4896     @@ -592,18 +592,26 @@ snd_pcm_sframes_t snd_pcm_plug_write_transfer(struct snd_pcm_substream *plug, st
4897     snd_pcm_sframes_t frames = size;
4898    
4899     plugin = snd_pcm_plug_first(plug);
4900     - while (plugin && frames > 0) {
4901     + while (plugin) {
4902     + if (frames <= 0)
4903     + return frames;
4904     if ((next = plugin->next) != NULL) {
4905     snd_pcm_sframes_t frames1 = frames;
4906     - if (plugin->dst_frames)
4907     + if (plugin->dst_frames) {
4908     frames1 = plugin->dst_frames(plugin, frames);
4909     + if (frames1 <= 0)
4910     + return frames1;
4911     + }
4912     if ((err = next->client_channels(next, frames1, &dst_channels)) < 0) {
4913     return err;
4914     }
4915     if (err != frames1) {
4916     frames = err;
4917     - if (plugin->src_frames)
4918     + if (plugin->src_frames) {
4919     frames = plugin->src_frames(plugin, frames1);
4920     + if (frames <= 0)
4921     + return frames;
4922     + }
4923     }
4924     } else
4925     dst_channels = NULL;
4926     diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
4927     index 10e7ef7a8804..db7894bb028c 100644
4928     --- a/sound/core/pcm_lib.c
4929     +++ b/sound/core/pcm_lib.c
4930     @@ -1632,7 +1632,7 @@ int snd_pcm_hw_param_first(struct snd_pcm_substream *pcm,
4931     return changed;
4932     if (params->rmask) {
4933     int err = snd_pcm_hw_refine(pcm, params);
4934     - if (snd_BUG_ON(err < 0))
4935     + if (err < 0)
4936     return err;
4937     }
4938     return snd_pcm_hw_param_value(params, var, dir);
4939     @@ -1678,7 +1678,7 @@ int snd_pcm_hw_param_last(struct snd_pcm_substream *pcm,
4940     return changed;
4941     if (params->rmask) {
4942     int err = snd_pcm_hw_refine(pcm, params);
4943     - if (snd_BUG_ON(err < 0))
4944     + if (err < 0)
4945     return err;
4946     }
4947     return snd_pcm_hw_param_value(params, var, dir);
4948     diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
4949     index 2fec2feac387..499f75b18e09 100644
4950     --- a/sound/core/pcm_native.c
4951     +++ b/sound/core/pcm_native.c
4952     @@ -2582,7 +2582,7 @@ static snd_pcm_sframes_t forward_appl_ptr(struct snd_pcm_substream *substream,
4953     return ret < 0 ? ret : frames;
4954     }
4955    
4956     -/* decrease the appl_ptr; returns the processed frames or a negative error */
4957     +/* decrease the appl_ptr; returns the processed frames or zero for error */
4958     static snd_pcm_sframes_t rewind_appl_ptr(struct snd_pcm_substream *substream,
4959     snd_pcm_uframes_t frames,
4960     snd_pcm_sframes_t avail)
4961     @@ -2599,7 +2599,12 @@ static snd_pcm_sframes_t rewind_appl_ptr(struct snd_pcm_substream *substream,
4962     if (appl_ptr < 0)
4963     appl_ptr += runtime->boundary;
4964     ret = pcm_lib_apply_appl_ptr(substream, appl_ptr);
4965     - return ret < 0 ? ret : frames;
4966     + /* NOTE: we return zero for errors because PulseAudio gets depressed
4967     + * upon receiving an error from rewind ioctl and stops processing
4968     + * any longer. Returning zero means that no rewind is done, so
4969     + * it's not absolutely wrong to answer like that.
4970     + */
4971     + return ret < 0 ? 0 : frames;
4972     }
4973    
4974     static snd_pcm_sframes_t snd_pcm_playback_rewind(struct snd_pcm_substream *substream,
4975     diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c
4976     index 135adb17703c..386ee829c655 100644
4977     --- a/sound/drivers/aloop.c
4978     +++ b/sound/drivers/aloop.c
4979     @@ -39,6 +39,7 @@
4980     #include <sound/core.h>
4981     #include <sound/control.h>
4982     #include <sound/pcm.h>
4983     +#include <sound/pcm_params.h>
4984     #include <sound/info.h>
4985     #include <sound/initval.h>
4986    
4987     @@ -305,19 +306,6 @@ static int loopback_trigger(struct snd_pcm_substream *substream, int cmd)
4988     return 0;
4989     }
4990    
4991     -static void params_change_substream(struct loopback_pcm *dpcm,
4992     - struct snd_pcm_runtime *runtime)
4993     -{
4994     - struct snd_pcm_runtime *dst_runtime;
4995     -
4996     - if (dpcm == NULL || dpcm->substream == NULL)
4997     - return;
4998     - dst_runtime = dpcm->substream->runtime;
4999     - if (dst_runtime == NULL)
5000     - return;
5001     - dst_runtime->hw = dpcm->cable->hw;
5002     -}
5003     -
5004     static void params_change(struct snd_pcm_substream *substream)
5005     {
5006     struct snd_pcm_runtime *runtime = substream->runtime;
5007     @@ -329,10 +317,6 @@ static void params_change(struct snd_pcm_substream *substream)
5008     cable->hw.rate_max = runtime->rate;
5009     cable->hw.channels_min = runtime->channels;
5010     cable->hw.channels_max = runtime->channels;
5011     - params_change_substream(cable->streams[SNDRV_PCM_STREAM_PLAYBACK],
5012     - runtime);
5013     - params_change_substream(cable->streams[SNDRV_PCM_STREAM_CAPTURE],
5014     - runtime);
5015     }
5016    
5017     static int loopback_prepare(struct snd_pcm_substream *substream)
5018     @@ -620,26 +604,29 @@ static unsigned int get_cable_index(struct snd_pcm_substream *substream)
5019     static int rule_format(struct snd_pcm_hw_params *params,
5020     struct snd_pcm_hw_rule *rule)
5021     {
5022     + struct loopback_pcm *dpcm = rule->private;
5023     + struct loopback_cable *cable = dpcm->cable;
5024     + struct snd_mask m;
5025    
5026     - struct snd_pcm_hardware *hw = rule->private;
5027     - struct snd_mask *maskp = hw_param_mask(params, rule->var);
5028     -
5029     - maskp->bits[0] &= (u_int32_t)hw->formats;
5030     - maskp->bits[1] &= (u_int32_t)(hw->formats >> 32);
5031     - memset(maskp->bits + 2, 0, (SNDRV_MASK_MAX-64) / 8); /* clear rest */
5032     - if (! maskp->bits[0] && ! maskp->bits[1])
5033     - return -EINVAL;
5034     - return 0;
5035     + snd_mask_none(&m);
5036     + mutex_lock(&dpcm->loopback->cable_lock);
5037     + m.bits[0] = (u_int32_t)cable->hw.formats;
5038     + m.bits[1] = (u_int32_t)(cable->hw.formats >> 32);
5039     + mutex_unlock(&dpcm->loopback->cable_lock);
5040     + return snd_mask_refine(hw_param_mask(params, rule->var), &m);
5041     }
5042    
5043     static int rule_rate(struct snd_pcm_hw_params *params,
5044     struct snd_pcm_hw_rule *rule)
5045     {
5046     - struct snd_pcm_hardware *hw = rule->private;
5047     + struct loopback_pcm *dpcm = rule->private;
5048     + struct loopback_cable *cable = dpcm->cable;
5049     struct snd_interval t;
5050    
5051     - t.min = hw->rate_min;
5052     - t.max = hw->rate_max;
5053     + mutex_lock(&dpcm->loopback->cable_lock);
5054     + t.min = cable->hw.rate_min;
5055     + t.max = cable->hw.rate_max;
5056     + mutex_unlock(&dpcm->loopback->cable_lock);
5057     t.openmin = t.openmax = 0;
5058     t.integer = 0;
5059     return snd_interval_refine(hw_param_interval(params, rule->var), &t);
5060     @@ -648,22 +635,44 @@ static int rule_rate(struct snd_pcm_hw_params *params,
5061     static int rule_channels(struct snd_pcm_hw_params *params,
5062     struct snd_pcm_hw_rule *rule)
5063     {
5064     - struct snd_pcm_hardware *hw = rule->private;
5065     + struct loopback_pcm *dpcm = rule->private;
5066     + struct loopback_cable *cable = dpcm->cable;
5067     struct snd_interval t;
5068    
5069     - t.min = hw->channels_min;
5070     - t.max = hw->channels_max;
5071     + mutex_lock(&dpcm->loopback->cable_lock);
5072     + t.min = cable->hw.channels_min;
5073     + t.max = cable->hw.channels_max;
5074     + mutex_unlock(&dpcm->loopback->cable_lock);
5075     t.openmin = t.openmax = 0;
5076     t.integer = 0;
5077     return snd_interval_refine(hw_param_interval(params, rule->var), &t);
5078     }
5079    
5080     +static void free_cable(struct snd_pcm_substream *substream)
5081     +{
5082     + struct loopback *loopback = substream->private_data;
5083     + int dev = get_cable_index(substream);
5084     + struct loopback_cable *cable;
5085     +
5086     + cable = loopback->cables[substream->number][dev];
5087     + if (!cable)
5088     + return;
5089     + if (cable->streams[!substream->stream]) {
5090     + /* other stream is still alive */
5091     + cable->streams[substream->stream] = NULL;
5092     + } else {
5093     + /* free the cable */
5094     + loopback->cables[substream->number][dev] = NULL;
5095     + kfree(cable);
5096     + }
5097     +}
5098     +
5099     static int loopback_open(struct snd_pcm_substream *substream)
5100     {
5101     struct snd_pcm_runtime *runtime = substream->runtime;
5102     struct loopback *loopback = substream->private_data;
5103     struct loopback_pcm *dpcm;
5104     - struct loopback_cable *cable;
5105     + struct loopback_cable *cable = NULL;
5106     int err = 0;
5107     int dev = get_cable_index(substream);
5108    
5109     @@ -682,7 +691,6 @@ static int loopback_open(struct snd_pcm_substream *substream)
5110     if (!cable) {
5111     cable = kzalloc(sizeof(*cable), GFP_KERNEL);
5112     if (!cable) {
5113     - kfree(dpcm);
5114     err = -ENOMEM;
5115     goto unlock;
5116     }
5117     @@ -700,19 +708,19 @@ static int loopback_open(struct snd_pcm_substream *substream)
5118     /* are cached -> they do not reflect the actual state */
5119     err = snd_pcm_hw_rule_add(runtime, 0,
5120     SNDRV_PCM_HW_PARAM_FORMAT,
5121     - rule_format, &runtime->hw,
5122     + rule_format, dpcm,
5123     SNDRV_PCM_HW_PARAM_FORMAT, -1);
5124     if (err < 0)
5125     goto unlock;
5126     err = snd_pcm_hw_rule_add(runtime, 0,
5127     SNDRV_PCM_HW_PARAM_RATE,
5128     - rule_rate, &runtime->hw,
5129     + rule_rate, dpcm,
5130     SNDRV_PCM_HW_PARAM_RATE, -1);
5131     if (err < 0)
5132     goto unlock;
5133     err = snd_pcm_hw_rule_add(runtime, 0,
5134     SNDRV_PCM_HW_PARAM_CHANNELS,
5135     - rule_channels, &runtime->hw,
5136     + rule_channels, dpcm,
5137     SNDRV_PCM_HW_PARAM_CHANNELS, -1);
5138     if (err < 0)
5139     goto unlock;
5140     @@ -724,6 +732,10 @@ static int loopback_open(struct snd_pcm_substream *substream)
5141     else
5142     runtime->hw = cable->hw;
5143     unlock:
5144     + if (err < 0) {
5145     + free_cable(substream);
5146     + kfree(dpcm);
5147     + }
5148     mutex_unlock(&loopback->cable_lock);
5149     return err;
5150     }
5151     @@ -732,20 +744,10 @@ static int loopback_close(struct snd_pcm_substream *substream)
5152     {
5153     struct loopback *loopback = substream->private_data;
5154     struct loopback_pcm *dpcm = substream->runtime->private_data;
5155     - struct loopback_cable *cable;
5156     - int dev = get_cable_index(substream);
5157    
5158     loopback_timer_stop(dpcm);
5159     mutex_lock(&loopback->cable_lock);
5160     - cable = loopback->cables[substream->number][dev];
5161     - if (cable->streams[!substream->stream]) {
5162     - /* other stream is still alive */
5163     - cable->streams[substream->stream] = NULL;
5164     - } else {
5165     - /* free the cable */
5166     - loopback->cables[substream->number][dev] = NULL;
5167     - kfree(cable);
5168     - }
5169     + free_cable(substream);
5170     mutex_unlock(&loopback->cable_lock);
5171     return 0;
5172     }
5173     diff --git a/tools/objtool/check.c b/tools/objtool/check.c
5174     index 9b341584eb1b..f40d46e24bcc 100644
5175     --- a/tools/objtool/check.c
5176     +++ b/tools/objtool/check.c
5177     @@ -427,6 +427,40 @@ static void add_ignores(struct objtool_file *file)
5178     }
5179     }
5180    
5181     +/*
5182     + * FIXME: For now, just ignore any alternatives which add retpolines. This is
5183     + * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline.
5184     + * But it at least allows objtool to understand the control flow *around* the
5185     + * retpoline.
5186     + */
5187     +static int add_nospec_ignores(struct objtool_file *file)
5188     +{
5189     + struct section *sec;
5190     + struct rela *rela;
5191     + struct instruction *insn;
5192     +
5193     + sec = find_section_by_name(file->elf, ".rela.discard.nospec");
5194     + if (!sec)
5195     + return 0;
5196     +
5197     + list_for_each_entry(rela, &sec->rela_list, list) {
5198     + if (rela->sym->type != STT_SECTION) {
5199     + WARN("unexpected relocation symbol type in %s", sec->name);
5200     + return -1;
5201     + }
5202     +
5203     + insn = find_insn(file, rela->sym->sec, rela->addend);
5204     + if (!insn) {
5205     + WARN("bad .discard.nospec entry");
5206     + return -1;
5207     + }
5208     +
5209     + insn->ignore_alts = true;
5210     + }
5211     +
5212     + return 0;
5213     +}
5214     +
5215     /*
5216     * Find the destination instructions for all jumps.
5217     */
5218     @@ -456,6 +490,13 @@ static int add_jump_destinations(struct objtool_file *file)
5219     } else if (rela->sym->sec->idx) {
5220     dest_sec = rela->sym->sec;
5221     dest_off = rela->sym->sym.st_value + rela->addend + 4;
5222     + } else if (strstr(rela->sym->name, "_indirect_thunk_")) {
5223     + /*
5224     + * Retpoline jumps are really dynamic jumps in
5225     + * disguise, so convert them accordingly.
5226     + */
5227     + insn->type = INSN_JUMP_DYNAMIC;
5228     + continue;
5229     } else {
5230     /* sibling call */
5231     insn->jump_dest = 0;
5232     @@ -502,11 +543,18 @@ static int add_call_destinations(struct objtool_file *file)
5233     dest_off = insn->offset + insn->len + insn->immediate;
5234     insn->call_dest = find_symbol_by_offset(insn->sec,
5235     dest_off);
5236     + /*
5237     + * FIXME: Thanks to retpolines, it's now considered
5238     + * normal for a function to call within itself. So
5239     + * disable this warning for now.
5240     + */
5241     +#if 0
5242     if (!insn->call_dest) {
5243     WARN_FUNC("can't find call dest symbol at offset 0x%lx",
5244     insn->sec, insn->offset, dest_off);
5245     return -1;
5246     }
5247     +#endif
5248     } else if (rela->sym->type == STT_SECTION) {
5249     insn->call_dest = find_symbol_by_offset(rela->sym->sec,
5250     rela->addend+4);
5251     @@ -671,12 +719,6 @@ static int add_special_section_alts(struct objtool_file *file)
5252     return ret;
5253    
5254     list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
5255     - alt = malloc(sizeof(*alt));
5256     - if (!alt) {
5257     - WARN("malloc failed");
5258     - ret = -1;
5259     - goto out;
5260     - }
5261    
5262     orig_insn = find_insn(file, special_alt->orig_sec,
5263     special_alt->orig_off);
5264     @@ -687,6 +729,10 @@ static int add_special_section_alts(struct objtool_file *file)
5265     goto out;
5266     }
5267    
5268     + /* Ignore retpoline alternatives. */
5269     + if (orig_insn->ignore_alts)
5270     + continue;
5271     +
5272     new_insn = NULL;
5273     if (!special_alt->group || special_alt->new_len) {
5274     new_insn = find_insn(file, special_alt->new_sec,
5275     @@ -712,6 +758,13 @@ static int add_special_section_alts(struct objtool_file *file)
5276     goto out;
5277     }
5278    
5279     + alt = malloc(sizeof(*alt));
5280     + if (!alt) {
5281     + WARN("malloc failed");
5282     + ret = -1;
5283     + goto out;
5284     + }
5285     +
5286     alt->insn = new_insn;
5287     list_add_tail(&alt->list, &orig_insn->alts);
5288    
5289     @@ -1028,6 +1081,10 @@ static int decode_sections(struct objtool_file *file)
5290    
5291     add_ignores(file);
5292    
5293     + ret = add_nospec_ignores(file);
5294     + if (ret)
5295     + return ret;
5296     +
5297     ret = add_jump_destinations(file);
5298     if (ret)
5299     return ret;
5300     diff --git a/tools/objtool/check.h b/tools/objtool/check.h
5301     index 47d9ea70a83d..dbadb304a410 100644
5302     --- a/tools/objtool/check.h
5303     +++ b/tools/objtool/check.h
5304     @@ -44,7 +44,7 @@ struct instruction {
5305     unsigned int len;
5306     unsigned char type;
5307     unsigned long immediate;
5308     - bool alt_group, visited, dead_end, ignore, hint, save, restore;
5309     + bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
5310     struct symbol *call_dest;
5311     struct instruction *jump_dest;
5312     struct list_head alts;
5313     diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
5314     index 7a2d221c4702..1241487de93f 100644
5315     --- a/tools/testing/selftests/bpf/test_verifier.c
5316     +++ b/tools/testing/selftests/bpf/test_verifier.c
5317     @@ -272,6 +272,46 @@ static struct bpf_test tests[] = {
5318     .errstr = "invalid bpf_ld_imm64 insn",
5319     .result = REJECT,
5320     },
5321     + {
5322     + "arsh32 on imm",
5323     + .insns = {
5324     + BPF_MOV64_IMM(BPF_REG_0, 1),
5325     + BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 5),
5326     + BPF_EXIT_INSN(),
5327     + },
5328     + .result = REJECT,
5329     + .errstr = "BPF_ARSH not supported for 32 bit ALU",
5330     + },
5331     + {
5332     + "arsh32 on reg",
5333     + .insns = {
5334     + BPF_MOV64_IMM(BPF_REG_0, 1),
5335     + BPF_MOV64_IMM(BPF_REG_1, 5),
5336     + BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1),
5337     + BPF_EXIT_INSN(),
5338     + },
5339     + .result = REJECT,
5340     + .errstr = "BPF_ARSH not supported for 32 bit ALU",
5341     + },
5342     + {
5343     + "arsh64 on imm",
5344     + .insns = {
5345     + BPF_MOV64_IMM(BPF_REG_0, 1),
5346     + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_0, 5),
5347     + BPF_EXIT_INSN(),
5348     + },
5349     + .result = ACCEPT,
5350     + },
5351     + {
5352     + "arsh64 on reg",
5353     + .insns = {
5354     + BPF_MOV64_IMM(BPF_REG_0, 1),
5355     + BPF_MOV64_IMM(BPF_REG_1, 5),
5356     + BPF_ALU64_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1),
5357     + BPF_EXIT_INSN(),
5358     + },
5359     + .result = ACCEPT,
5360     + },
5361     {
5362     "no bpf_exit",
5363     .insns = {
5364     diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
5365     index 7b1adeee4b0f..91fbfa8fdc15 100644
5366     --- a/tools/testing/selftests/x86/Makefile
5367     +++ b/tools/testing/selftests/x86/Makefile
5368     @@ -7,7 +7,7 @@ include ../lib.mk
5369    
5370     TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
5371     check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \
5372     - protection_keys test_vdso
5373     + protection_keys test_vdso test_vsyscall
5374     TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
5375     test_FCMOV test_FCOMI test_FISTTP \
5376     vdso_restorer
5377     diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
5378     new file mode 100644
5379     index 000000000000..6e0bd52ad53d
5380     --- /dev/null
5381     +++ b/tools/testing/selftests/x86/test_vsyscall.c
5382     @@ -0,0 +1,500 @@
5383     +/* SPDX-License-Identifier: GPL-2.0 */
5384     +
5385     +#define _GNU_SOURCE
5386     +
5387     +#include <stdio.h>
5388     +#include <sys/time.h>
5389     +#include <time.h>
5390     +#include <stdlib.h>
5391     +#include <sys/syscall.h>
5392     +#include <unistd.h>
5393     +#include <dlfcn.h>
5394     +#include <string.h>
5395     +#include <inttypes.h>
5396     +#include <signal.h>
5397     +#include <sys/ucontext.h>
5398     +#include <errno.h>
5399     +#include <err.h>
5400     +#include <sched.h>
5401     +#include <stdbool.h>
5402     +#include <setjmp.h>
5403     +
5404     +#ifdef __x86_64__
5405     +# define VSYS(x) (x)
5406     +#else
5407     +# define VSYS(x) 0
5408     +#endif
5409     +
5410     +#ifndef SYS_getcpu
5411     +# ifdef __x86_64__
5412     +# define SYS_getcpu 309
5413     +# else
5414     +# define SYS_getcpu 318
5415     +# endif
5416     +#endif
5417     +
5418     +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
5419     + int flags)
5420     +{
5421     + struct sigaction sa;
5422     + memset(&sa, 0, sizeof(sa));
5423     + sa.sa_sigaction = handler;
5424     + sa.sa_flags = SA_SIGINFO | flags;
5425     + sigemptyset(&sa.sa_mask);
5426     + if (sigaction(sig, &sa, 0))
5427     + err(1, "sigaction");
5428     +}
5429     +
5430     +/* vsyscalls and vDSO */
5431     +bool should_read_vsyscall = false;
5432     +
5433     +typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
5434     +gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
5435     +gtod_t vdso_gtod;
5436     +
5437     +typedef int (*vgettime_t)(clockid_t, struct timespec *);
5438     +vgettime_t vdso_gettime;
5439     +
5440     +typedef long (*time_func_t)(time_t *t);
5441     +time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
5442     +time_func_t vdso_time;
5443     +
5444     +typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
5445     +getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
5446     +getcpu_t vdso_getcpu;
5447     +
5448     +static void init_vdso(void)
5449     +{
5450     + void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
5451     + if (!vdso)
5452     + vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
5453     + if (!vdso) {
5454     + printf("[WARN]\tfailed to find vDSO\n");
5455     + return;
5456     + }
5457     +
5458     + vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday");
5459     + if (!vdso_gtod)
5460     + printf("[WARN]\tfailed to find gettimeofday in vDSO\n");
5461     +
5462     + vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
5463     + if (!vdso_gettime)
5464     + printf("[WARN]\tfailed to find clock_gettime in vDSO\n");
5465     +
5466     + vdso_time = (time_func_t)dlsym(vdso, "__vdso_time");
5467     + if (!vdso_time)
5468     + printf("[WARN]\tfailed to find time in vDSO\n");
5469     +
5470     + vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
5471     + if (!vdso_getcpu) {
5472     + /* getcpu() was never wired up in the 32-bit vDSO. */
5473     + printf("[%s]\tfailed to find getcpu in vDSO\n",
5474     + sizeof(long) == 8 ? "WARN" : "NOTE");
5475     + }
5476     +}
5477     +
5478     +static int init_vsys(void)
5479     +{
5480     +#ifdef __x86_64__
5481     + int nerrs = 0;
5482     + FILE *maps;
5483     + char line[128];
5484     + bool found = false;
5485     +
5486     + maps = fopen("/proc/self/maps", "r");
5487     + if (!maps) {
5488     + printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
5489     + should_read_vsyscall = true;
5490     + return 0;
5491     + }
5492     +
5493     + while (fgets(line, sizeof(line), maps)) {
5494     + char r, x;
5495     + void *start, *end;
5496     + char name[128];
5497     + if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
5498     + &start, &end, &r, &x, name) != 5)
5499     + continue;
5500     +
5501     + if (strcmp(name, "[vsyscall]"))
5502     + continue;
5503     +
5504     + printf("\tvsyscall map: %s", line);
5505     +
5506     + if (start != (void *)0xffffffffff600000 ||
5507     + end != (void *)0xffffffffff601000) {
5508     + printf("[FAIL]\taddress range is nonsense\n");
5509     + nerrs++;
5510     + }
5511     +
5512     + printf("\tvsyscall permissions are %c-%c\n", r, x);
5513     + should_read_vsyscall = (r == 'r');
5514     + if (x != 'x') {
5515     + vgtod = NULL;
5516     + vtime = NULL;
5517     + vgetcpu = NULL;
5518     + }
5519     +
5520     + found = true;
5521     + break;
5522     + }
5523     +
5524     + fclose(maps);
5525     +
5526     + if (!found) {
5527     + printf("\tno vsyscall map in /proc/self/maps\n");
5528     + should_read_vsyscall = false;
5529     + vgtod = NULL;
5530     + vtime = NULL;
5531     + vgetcpu = NULL;
5532     + }
5533     +
5534     + return nerrs;
5535     +#else
5536     + return 0;
5537     +#endif
5538     +}
5539     +
5540     +/* syscalls */
5541     +static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
5542     +{
5543     + return syscall(SYS_gettimeofday, tv, tz);
5544     +}
5545     +
5546     +static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
5547     +{
5548     + return syscall(SYS_clock_gettime, id, ts);
5549     +}
5550     +
5551     +static inline long sys_time(time_t *t)
5552     +{
5553     + return syscall(SYS_time, t);
5554     +}
5555     +
5556     +static inline long sys_getcpu(unsigned * cpu, unsigned * node,
5557     + void* cache)
5558     +{
5559     + return syscall(SYS_getcpu, cpu, node, cache);
5560     +}
5561     +
5562     +static jmp_buf jmpbuf;
5563     +
5564     +static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
5565     +{
5566     + siglongjmp(jmpbuf, 1);
5567     +}
5568     +
5569     +static double tv_diff(const struct timeval *a, const struct timeval *b)
5570     +{
5571     + return (double)(a->tv_sec - b->tv_sec) +
5572     + (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6;
5573     +}
5574     +
5575     +static int check_gtod(const struct timeval *tv_sys1,
5576     + const struct timeval *tv_sys2,
5577     + const struct timezone *tz_sys,
5578     + const char *which,
5579     + const struct timeval *tv_other,
5580     + const struct timezone *tz_other)
5581     +{
5582     + int nerrs = 0;
5583     + double d1, d2;
5584     +
5585     + if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) {
5586     + printf("[FAIL] %s tz mismatch\n", which);
5587     + nerrs++;
5588     + }
5589     +
5590     + d1 = tv_diff(tv_other, tv_sys1);
5591     + d2 = tv_diff(tv_sys2, tv_other);
5592     + printf("\t%s time offsets: %lf %lf\n", which, d1, d2);
5593     +
5594     + if (d1 < 0 || d2 < 0) {
5595     + printf("[FAIL]\t%s time was inconsistent with the syscall\n", which);
5596     + nerrs++;
5597     + } else {
5598     + printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which);
5599     + }
5600     +
5601     + return nerrs;
5602     +}
5603     +
5604     +static int test_gtod(void)
5605     +{
5606     + struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys;
5607     + struct timezone tz_sys, tz_vdso, tz_vsys;
5608     + long ret_vdso = -1;
5609     + long ret_vsys = -1;
5610     + int nerrs = 0;
5611     +
5612     + printf("[RUN]\ttest gettimeofday()\n");
5613     +
5614     + if (sys_gtod(&tv_sys1, &tz_sys) != 0)
5615     + err(1, "syscall gettimeofday");
5616     + if (vdso_gtod)
5617     + ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
5618     + if (vgtod)
5619     + ret_vsys = vgtod(&tv_vsys, &tz_vsys);
5620     + if (sys_gtod(&tv_sys2, &tz_sys) != 0)
5621     + err(1, "syscall gettimeofday");
5622     +
5623     + if (vdso_gtod) {
5624     + if (ret_vdso == 0) {
5625     + nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
5626     + } else {
5627     + printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso);
5628     + nerrs++;
5629     + }
5630     + }
5631     +
5632     + if (vgtod) {
5633     + if (ret_vsys == 0) {
5634     + nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
5635     + } else {
5636     + printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys);
5637     + nerrs++;
5638     + }
5639     + }
5640     +
5641     + return nerrs;
5642     +}
5643     +
5644     +static int test_time(void) {
5645     + int nerrs = 0;
5646     +
5647     + printf("[RUN]\ttest time()\n");
5648     + long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0;
5649     + long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1;
5650     + t_sys1 = sys_time(&t2_sys1);
5651     + if (vdso_time)
5652     + t_vdso = vdso_time(&t2_vdso);
5653     + if (vtime)
5654     + t_vsys = vtime(&t2_vsys);
5655     + t_sys2 = sys_time(&t2_sys2);
5656     + if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
5657     + printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2);
5658     + nerrs++;
5659     + return nerrs;
5660     + }
5661     +
5662     + if (vdso_time) {
5663     + if (t_vdso < 0 || t_vdso != t2_vdso) {
5664     + printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
5665     + nerrs++;
5666     + } else if (t_vdso < t_sys1 || t_vdso > t_sys2) {
5667     + printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2);
5668     + nerrs++;
5669     + } else {
5670     + printf("[OK]\tvDSO time() is okay\n");
5671     + }
5672     + }
5673     +
5674     + if (vtime) {
5675     + if (t_vsys < 0 || t_vsys != t2_vsys) {
5676     + printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
5677     + nerrs++;
5678     + } else if (t_vsys < t_sys1 || t_vsys > t_sys2) {
5679     + printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2);
5680     + nerrs++;
5681     + } else {
5682     + printf("[OK]\tvsyscall time() is okay\n");
5683     + }
5684     + }
5685     +
5686     + return nerrs;
5687     +}
5688     +
5689     +static int test_getcpu(int cpu)
5690     +{
5691     + int nerrs = 0;
5692     + long ret_sys, ret_vdso = -1, ret_vsys = -1;
5693     +
5694     + printf("[RUN]\tgetcpu() on CPU %d\n", cpu);
5695     +
5696     + cpu_set_t cpuset;
5697     + CPU_ZERO(&cpuset);
5698     + CPU_SET(cpu, &cpuset);
5699     + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
5700     + printf("[SKIP]\tfailed to force CPU %d\n", cpu);
5701     + return nerrs;
5702     + }
5703     +
5704     + unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
5705     + unsigned node = 0;
5706     + bool have_node = false;
5707     + ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
5708     + if (vdso_getcpu)
5709     + ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
5710     + if (vgetcpu)
5711     + ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
5712     +
5713     + if (ret_sys == 0) {
5714     + if (cpu_sys != cpu) {
5715     + printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu);
5716     + nerrs++;
5717     + }
5718     +
5719     + have_node = true;
5720     + node = node_sys;
5721     + }
5722     +
5723     + if (vdso_getcpu) {
5724     + if (ret_vdso) {
5725     + printf("[FAIL]\tvDSO getcpu() failed\n");
5726     + nerrs++;
5727     + } else {
5728     + if (!have_node) {
5729     + have_node = true;
5730     + node = node_vdso;
5731     + }
5732     +
5733     + if (cpu_vdso != cpu) {
5734     + printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu);
5735     + nerrs++;
5736     + } else {
5737     + printf("[OK]\tvDSO reported correct CPU\n");
5738     + }
5739     +
5740     + if (node_vdso != node) {
5741     + printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node);
5742     + nerrs++;
5743     + } else {
5744     + printf("[OK]\tvDSO reported correct node\n");
5745     + }
5746     + }
5747     + }
5748     +
5749     + if (vgetcpu) {
5750     + if (ret_vsys) {
5751     + printf("[FAIL]\tvsyscall getcpu() failed\n");
5752     + nerrs++;
5753     + } else {
5754     + if (!have_node) {
5755     + have_node = true;
5756     + node = node_vsys;
5757     + }
5758     +
5759     + if (cpu_vsys != cpu) {
5760     + printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu);
5761     + nerrs++;
5762     + } else {
5763     + printf("[OK]\tvsyscall reported correct CPU\n");
5764     + }
5765     +
5766     + if (node_vsys != node) {
5767     + printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node);
5768     + nerrs++;
5769     + } else {
5770     + printf("[OK]\tvsyscall reported correct node\n");
5771     + }
5772     + }
5773     + }
5774     +
5775     + return nerrs;
5776     +}
5777     +
5778     +static int test_vsys_r(void)
5779     +{
5780     +#ifdef __x86_64__
5781     + printf("[RUN]\tChecking read access to the vsyscall page\n");
5782     + bool can_read;
5783     + if (sigsetjmp(jmpbuf, 1) == 0) {
5784     + *(volatile int *)0xffffffffff600000;
5785     + can_read = true;
5786     + } else {
5787     + can_read = false;
5788     + }
5789     +
5790     + if (can_read && !should_read_vsyscall) {
5791     + printf("[FAIL]\tWe have read access, but we shouldn't\n");
5792     + return 1;
5793     + } else if (!can_read && should_read_vsyscall) {
5794     + printf("[FAIL]\tWe don't have read access, but we should\n");
5795     + return 1;
5796     + } else {
5797     + printf("[OK]\tgot expected result\n");
5798     + }
5799     +#endif
5800     +
5801     + return 0;
5802     +}
5803     +
5804     +
5805     +#ifdef __x86_64__
5806     +#define X86_EFLAGS_TF (1UL << 8)
5807     +static volatile sig_atomic_t num_vsyscall_traps;
5808     +
5809     +static unsigned long get_eflags(void)
5810     +{
5811     + unsigned long eflags;
5812     + asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
5813     + return eflags;
5814     +}
5815     +
5816     +static void set_eflags(unsigned long eflags)
5817     +{
5818     + asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
5819     +}
5820     +
5821     +static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
5822     +{
5823     + ucontext_t *ctx = (ucontext_t *)ctx_void;
5824     + unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP];
5825     +
5826     + if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0)
5827     + num_vsyscall_traps++;
5828     +}
5829     +
5830     +static int test_native_vsyscall(void)
5831     +{
5832     + time_t tmp;
5833     + bool is_native;
5834     +
5835     + if (!vtime)
5836     + return 0;
5837     +
5838     + printf("[RUN]\tchecking for native vsyscall\n");
5839     + sethandler(SIGTRAP, sigtrap, 0);
5840     + set_eflags(get_eflags() | X86_EFLAGS_TF);
5841     + vtime(&tmp);
5842     + set_eflags(get_eflags() & ~X86_EFLAGS_TF);
5843     +
5844     + /*
5845     + * If vsyscalls are emulated, we expect a single trap in the
5846     + * vsyscall page -- the call instruction will trap with RIP
5847     + * pointing to the entry point before emulation takes over.
5848     + * In native mode, we expect two traps, since whatever code
5849     + * the vsyscall page contains will be more than just a ret
5850     + * instruction.
5851     + */
5852     + is_native = (num_vsyscall_traps > 1);
5853     +
5854     + printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n",
5855     + (is_native ? "native" : "emulated"),
5856     + (int)num_vsyscall_traps);
5857     +
5858     + return 0;
5859     +}
5860     +#endif
5861     +
5862     +int main(int argc, char **argv)
5863     +{
5864     + int nerrs = 0;
5865     +
5866     + init_vdso();
5867     + nerrs += init_vsys();
5868     +
5869     + nerrs += test_gtod();
5870     + nerrs += test_time();
5871     + nerrs += test_getcpu(0);
5872     + nerrs += test_getcpu(1);
5873     +
5874     + sethandler(SIGSEGV, sigsegv, 0);
5875     + nerrs += test_vsys_r();
5876     +
5877     +#ifdef __x86_64__
5878     + nerrs += test_native_vsyscall();
5879     +#endif
5880     +
5881     + return nerrs ? 1 : 0;
5882     +}
5883     diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c
5884     index b6e715fd3c90..dac7ceb1a677 100644
5885     --- a/virt/kvm/arm/mmio.c
5886     +++ b/virt/kvm/arm/mmio.c
5887     @@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
5888     }
5889    
5890     trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
5891     - data);
5892     + &data);
5893     data = vcpu_data_host_to_guest(vcpu, data, len);
5894     vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
5895     }
5896     @@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
5897     data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
5898     len);
5899    
5900     - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
5901     + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data);
5902     kvm_mmio_write_buf(data_buf, len, data);
5903    
5904     ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
5905     data_buf);
5906     } else {
5907     trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
5908     - fault_ipa, 0);
5909     + fault_ipa, NULL);
5910    
5911     ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
5912     data_buf);