Magellan Linux

Contents of /trunk/kernel-magellan/patches-4.14/0113-4.14.14-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3074 - (show annotations) (download)
Wed Jan 17 13:27:15 2018 UTC (6 years, 3 months ago) by niro
File size: 193113 byte(s)
-linux-4.14.14
1 diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
2 index f3d5817c4ef0..258902db14bf 100644
3 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu
4 +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
5 @@ -373,3 +373,19 @@ Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
6 Description: information about CPUs heterogeneity.
7
8 cpu_capacity: capacity of cpu#.
9 +
10 +What: /sys/devices/system/cpu/vulnerabilities
11 + /sys/devices/system/cpu/vulnerabilities/meltdown
12 + /sys/devices/system/cpu/vulnerabilities/spectre_v1
13 + /sys/devices/system/cpu/vulnerabilities/spectre_v2
14 +Date: January 2018
15 +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
16 +Description: Information about CPU vulnerabilities
17 +
18 + The files are named after the code names of CPU
19 + vulnerabilities. The output of those files reflects the
20 + state of the CPUs in the system. Possible output values:
21 +
22 + "Not affected" CPU is not affected by the vulnerability
23 + "Vulnerable" CPU is affected and no mitigation in effect
24 + "Mitigation: $M" CPU is affected and mitigation $M is in effect
25 diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
26 index 520fdec15bbb..8122b5f98ea1 100644
27 --- a/Documentation/admin-guide/kernel-parameters.txt
28 +++ b/Documentation/admin-guide/kernel-parameters.txt
29 @@ -2599,6 +2599,11 @@
30 nosmt [KNL,S390] Disable symmetric multithreading (SMT).
31 Equivalent to smt=1.
32
33 + nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2
34 + (indirect branch prediction) vulnerability. System may
35 + allow data leaks with this option, which is equivalent
36 + to spectre_v2=off.
37 +
38 noxsave [BUGS=X86] Disables x86 extended register state save
39 and restore using xsave. The kernel will fallback to
40 enabling legacy floating-point and sse state.
41 @@ -2685,8 +2690,6 @@
42 steal time is computed, but won't influence scheduler
43 behaviour
44
45 - nopti [X86-64] Disable kernel page table isolation
46 -
47 nolapic [X86-32,APIC] Do not enable or use the local APIC.
48
49 nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
50 @@ -3255,11 +3258,20 @@
51 pt. [PARIDE]
52 See Documentation/blockdev/paride.txt.
53
54 - pti= [X86_64]
55 - Control user/kernel address space isolation:
56 - on - enable
57 - off - disable
58 - auto - default setting
59 + pti= [X86_64] Control Page Table Isolation of user and
60 + kernel address spaces. Disabling this feature
61 + removes hardening, but improves performance of
62 + system calls and interrupts.
63 +
64 + on - unconditionally enable
65 + off - unconditionally disable
66 + auto - kernel detects whether your CPU model is
67 + vulnerable to issues that PTI mitigates
68 +
69 + Not specifying this option is equivalent to pti=auto.
70 +
71 + nopti [X86_64]
72 + Equivalent to pti=off
73
74 pty.legacy_count=
75 [KNL] Number of legacy pty's. Overwrites compiled-in
76 @@ -3901,6 +3913,29 @@
77 sonypi.*= [HW] Sony Programmable I/O Control Device driver
78 See Documentation/laptops/sonypi.txt
79
80 + spectre_v2= [X86] Control mitigation of Spectre variant 2
81 + (indirect branch speculation) vulnerability.
82 +
83 + on - unconditionally enable
84 + off - unconditionally disable
85 + auto - kernel detects whether your CPU model is
86 + vulnerable
87 +
88 + Selecting 'on' will, and 'auto' may, choose a
89 + mitigation method at run time according to the
90 + CPU, the available microcode, the setting of the
91 + CONFIG_RETPOLINE configuration option, and the
92 + compiler with which the kernel was built.
93 +
94 + Specific mitigations can also be selected manually:
95 +
96 + retpoline - replace indirect branches
97 + retpoline,generic - google's original retpoline
98 + retpoline,amd - AMD-specific minimal thunk
99 +
100 + Not specifying this option is equivalent to
101 + spectre_v2=auto.
102 +
103 spia_io_base= [HW,MTD]
104 spia_fio_base=
105 spia_pedr=
106 diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt
107 new file mode 100644
108 index 000000000000..d11eff61fc9a
109 --- /dev/null
110 +++ b/Documentation/x86/pti.txt
111 @@ -0,0 +1,186 @@
112 +Overview
113 +========
114 +
115 +Page Table Isolation (pti, previously known as KAISER[1]) is a
116 +countermeasure against attacks on the shared user/kernel address
117 +space such as the "Meltdown" approach[2].
118 +
119 +To mitigate this class of attacks, we create an independent set of
120 +page tables for use only when running userspace applications. When
121 +the kernel is entered via syscalls, interrupts or exceptions, the
122 +page tables are switched to the full "kernel" copy. When the system
123 +switches back to user mode, the user copy is used again.
124 +
125 +The userspace page tables contain only a minimal amount of kernel
126 +data: only what is needed to enter/exit the kernel such as the
127 +entry/exit functions themselves and the interrupt descriptor table
128 +(IDT). There are a few strictly unnecessary things that get mapped
129 +such as the first C function when entering an interrupt (see
130 +comments in pti.c).
131 +
132 +This approach helps to ensure that side-channel attacks leveraging
133 +the paging structures do not function when PTI is enabled. It can be
134 +enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
135 +Once enabled at compile-time, it can be disabled at boot with the
136 +'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
137 +
138 +Page Table Management
139 +=====================
140 +
141 +When PTI is enabled, the kernel manages two sets of page tables.
142 +The first set is very similar to the single set which is present in
143 +kernels without PTI. This includes a complete mapping of userspace
144 +that the kernel can use for things like copy_to_user().
145 +
146 +Although _complete_, the user portion of the kernel page tables is
147 +crippled by setting the NX bit in the top level. This ensures
148 +that any missed kernel->user CR3 switch will immediately crash
149 +userspace upon executing its first instruction.
150 +
151 +The userspace page tables map only the kernel data needed to enter
152 +and exit the kernel. This data is entirely contained in the 'struct
153 +cpu_entry_area' structure which is placed in the fixmap which gives
154 +each CPU's copy of the area a compile-time-fixed virtual address.
155 +
156 +For new userspace mappings, the kernel makes the entries in its
157 +page tables like normal. The only difference is when the kernel
158 +makes entries in the top (PGD) level. In addition to setting the
159 +entry in the main kernel PGD, a copy of the entry is made in the
160 +userspace page tables' PGD.
161 +
162 +This sharing at the PGD level also inherently shares all the lower
163 +layers of the page tables. This leaves a single, shared set of
164 +userspace page tables to manage. One PTE to lock, one set of
165 +accessed bits, dirty bits, etc...
166 +
167 +Overhead
168 +========
169 +
170 +Protection against side-channel attacks is important. But,
171 +this protection comes at a cost:
172 +
173 +1. Increased Memory Use
174 + a. Each process now needs an order-1 PGD instead of order-0.
175 + (Consumes an additional 4k per process).
176 + b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
177 + aligned so that it can be mapped by setting a single PMD
178 + entry. This consumes nearly 2MB of RAM once the kernel
179 + is decompressed, but no space in the kernel image itself.
180 +
181 +2. Runtime Cost
182 + a. CR3 manipulation to switch between the page table copies
183 + must be done at interrupt, syscall, and exception entry
184 + and exit (it can be skipped when the kernel is interrupted,
185 + though.) Moves to CR3 are on the order of a hundred
186 + cycles, and are required at every entry and exit.
187 + b. A "trampoline" must be used for SYSCALL entry. This
188 + trampoline depends on a smaller set of resources than the
189 + non-PTI SYSCALL entry code, so requires mapping fewer
190 + things into the userspace page tables. The downside is
191 + that stacks must be switched at entry time.
192 + d. Global pages are disabled for all kernel structures not
193 + mapped into both kernel and userspace page tables. This
194 + feature of the MMU allows different processes to share TLB
195 + entries mapping the kernel. Losing the feature means more
196 + TLB misses after a context switch. The actual loss of
197 + performance is very small, however, never exceeding 1%.
198 + d. Process Context IDentifiers (PCID) is a CPU feature that
199 + allows us to skip flushing the entire TLB when switching page
200 + tables by setting a special bit in CR3 when the page tables
201 + are changed. This makes switching the page tables (at context
202 + switch, or kernel entry/exit) cheaper. But, on systems with
203 + PCID support, the context switch code must flush both the user
204 + and kernel entries out of the TLB. The user PCID TLB flush is
205 + deferred until the exit to userspace, minimizing the cost.
206 + See intel.com/sdm for the gory PCID/INVPCID details.
207 + e. The userspace page tables must be populated for each new
208 + process. Even without PTI, the shared kernel mappings
209 + are created by copying top-level (PGD) entries into each
210 + new process. But, with PTI, there are now *two* kernel
211 + mappings: one in the kernel page tables that maps everything
212 + and one for the entry/exit structures. At fork(), we need to
213 + copy both.
214 + f. In addition to the fork()-time copying, there must also
215 + be an update to the userspace PGD any time a set_pgd() is done
216 + on a PGD used to map userspace. This ensures that the kernel
217 + and userspace copies always map the same userspace
218 + memory.
219 + g. On systems without PCID support, each CR3 write flushes
220 + the entire TLB. That means that each syscall, interrupt
221 + or exception flushes the TLB.
222 + h. INVPCID is a TLB-flushing instruction which allows flushing
223 + of TLB entries for non-current PCIDs. Some systems support
224 + PCIDs, but do not support INVPCID. On these systems, addresses
225 + can only be flushed from the TLB for the current PCID. When
226 + flushing a kernel address, we need to flush all PCIDs, so a
227 + single kernel address flush will require a TLB-flushing CR3
228 + write upon the next use of every PCID.
229 +
230 +Possible Future Work
231 +====================
232 +1. We can be more careful about not actually writing to CR3
233 + unless its value is actually changed.
234 +2. Allow PTI to be enabled/disabled at runtime in addition to the
235 + boot-time switching.
236 +
237 +Testing
238 +========
239 +
240 +To test stability of PTI, the following test procedure is recommended,
241 +ideally doing all of these in parallel:
242 +
243 +1. Set CONFIG_DEBUG_ENTRY=y
244 +2. Run several copies of all of the tools/testing/selftests/x86/ tests
245 + (excluding MPX and protection_keys) in a loop on multiple CPUs for
246 + several minutes. These tests frequently uncover corner cases in the
247 + kernel entry code. In general, old kernels might cause these tests
248 + themselves to crash, but they should never crash the kernel.
249 +3. Run the 'perf' tool in a mode (top or record) that generates many
250 + frequent performance monitoring non-maskable interrupts (see "NMI"
251 + in /proc/interrupts). This exercises the NMI entry/exit code which
252 + is known to trigger bugs in code paths that did not expect to be
253 + interrupted, including nested NMIs. Using "-c" boosts the rate of
254 + NMIs, and using two -c with separate counters encourages nested NMIs
255 + and less deterministic behavior.
256 +
257 + while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
258 +
259 +4. Launch a KVM virtual machine.
260 +5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
261 + This has been a lightly-tested code path and needs extra scrutiny.
262 +
263 +Debugging
264 +=========
265 +
266 +Bugs in PTI cause a few different signatures of crashes
267 +that are worth noting here.
268 +
269 + * Failures of the selftests/x86 code. Usually a bug in one of the
270 + more obscure corners of entry_64.S
271 + * Crashes in early boot, especially around CPU bringup. Bugs
272 + in the trampoline code or mappings cause these.
273 + * Crashes at the first interrupt. Caused by bugs in entry_64.S,
274 + like screwing up a page table switch. Also caused by
275 + incorrectly mapping the IRQ handler entry code.
276 + * Crashes at the first NMI. The NMI code is separate from main
277 + interrupt handlers and can have bugs that do not affect
278 + normal interrupts. Also caused by incorrectly mapping NMI
279 + code. NMIs that interrupt the entry code must be very
280 + careful and can be the cause of crashes that show up when
281 + running perf.
282 + * Kernel crashes at the first exit to userspace. entry_64.S
283 + bugs, or failing to map some of the exit code.
284 + * Crashes at first interrupt that interrupts userspace. The paths
285 + in entry_64.S that return to userspace are sometimes separate
286 + from the ones that return to the kernel.
287 + * Double faults: overflowing the kernel stack because of page
288 + faults upon page faults. Caused by touching non-pti-mapped
289 + data in the entry code, or forgetting to switch to kernel
290 + CR3 before calling into C functions which are not pti-mapped.
291 + * Userspace segfaults early in boot, sometimes manifesting
292 + as mount(8) failing to mount the rootfs. These have
293 + tended to be TLB invalidation issues. Usually invalidating
294 + the wrong PCID, or otherwise missing an invalidation.
295 +
296 +1. https://gruss.cc/files/kaiser.pdf
297 +2. https://meltdownattack.com/meltdown.pdf
298 diff --git a/Makefile b/Makefile
299 index a67c5179052a..4951305eb867 100644
300 --- a/Makefile
301 +++ b/Makefile
302 @@ -1,7 +1,7 @@
303 # SPDX-License-Identifier: GPL-2.0
304 VERSION = 4
305 PATCHLEVEL = 14
306 -SUBLEVEL = 13
307 +SUBLEVEL = 14
308 EXTRAVERSION =
309 NAME = Petit Gorille
310
311 diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
312 index c5ff6bfe2825..2f2d176396aa 100644
313 --- a/arch/mips/kernel/process.c
314 +++ b/arch/mips/kernel/process.c
315 @@ -705,6 +705,18 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
316 struct task_struct *t;
317 int max_users;
318
319 + /* If nothing to change, return right away, successfully. */
320 + if (value == mips_get_process_fp_mode(task))
321 + return 0;
322 +
323 + /* Only accept a mode change if 64-bit FP enabled for o32. */
324 + if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT))
325 + return -EOPNOTSUPP;
326 +
327 + /* And only for o32 tasks. */
328 + if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS))
329 + return -EOPNOTSUPP;
330 +
331 /* Check the value is valid */
332 if (value & ~known_bits)
333 return -EOPNOTSUPP;
334 diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
335 index 5a09c2901a76..c552c20237d4 100644
336 --- a/arch/mips/kernel/ptrace.c
337 +++ b/arch/mips/kernel/ptrace.c
338 @@ -410,63 +410,160 @@ static int gpr64_set(struct task_struct *target,
339
340 #endif /* CONFIG_64BIT */
341
342 +/*
343 + * Copy the floating-point context to the supplied NT_PRFPREG buffer,
344 + * !CONFIG_CPU_HAS_MSA variant. FP context's general register slots
345 + * correspond 1:1 to buffer slots. Only general registers are copied.
346 + */
347 +static int fpr_get_fpa(struct task_struct *target,
348 + unsigned int *pos, unsigned int *count,
349 + void **kbuf, void __user **ubuf)
350 +{
351 + return user_regset_copyout(pos, count, kbuf, ubuf,
352 + &target->thread.fpu,
353 + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
354 +}
355 +
356 +/*
357 + * Copy the floating-point context to the supplied NT_PRFPREG buffer,
358 + * CONFIG_CPU_HAS_MSA variant. Only lower 64 bits of FP context's
359 + * general register slots are copied to buffer slots. Only general
360 + * registers are copied.
361 + */
362 +static int fpr_get_msa(struct task_struct *target,
363 + unsigned int *pos, unsigned int *count,
364 + void **kbuf, void __user **ubuf)
365 +{
366 + unsigned int i;
367 + u64 fpr_val;
368 + int err;
369 +
370 + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
371 + for (i = 0; i < NUM_FPU_REGS; i++) {
372 + fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
373 + err = user_regset_copyout(pos, count, kbuf, ubuf,
374 + &fpr_val, i * sizeof(elf_fpreg_t),
375 + (i + 1) * sizeof(elf_fpreg_t));
376 + if (err)
377 + return err;
378 + }
379 +
380 + return 0;
381 +}
382 +
383 +/*
384 + * Copy the floating-point context to the supplied NT_PRFPREG buffer.
385 + * Choose the appropriate helper for general registers, and then copy
386 + * the FCSR register separately.
387 + */
388 static int fpr_get(struct task_struct *target,
389 const struct user_regset *regset,
390 unsigned int pos, unsigned int count,
391 void *kbuf, void __user *ubuf)
392 {
393 - unsigned i;
394 + const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
395 int err;
396 - u64 fpr_val;
397
398 - /* XXX fcr31 */
399 + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
400 + err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf);
401 + else
402 + err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf);
403 + if (err)
404 + return err;
405
406 - if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
407 - return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
408 - &target->thread.fpu,
409 - 0, sizeof(elf_fpregset_t));
410 + err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
411 + &target->thread.fpu.fcr31,
412 + fcr31_pos, fcr31_pos + sizeof(u32));
413
414 - for (i = 0; i < NUM_FPU_REGS; i++) {
415 - fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
416 - err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
417 - &fpr_val, i * sizeof(elf_fpreg_t),
418 - (i + 1) * sizeof(elf_fpreg_t));
419 + return err;
420 +}
421 +
422 +/*
423 + * Copy the supplied NT_PRFPREG buffer to the floating-point context,
424 + * !CONFIG_CPU_HAS_MSA variant. Buffer slots correspond 1:1 to FP
425 + * context's general register slots. Only general registers are copied.
426 + */
427 +static int fpr_set_fpa(struct task_struct *target,
428 + unsigned int *pos, unsigned int *count,
429 + const void **kbuf, const void __user **ubuf)
430 +{
431 + return user_regset_copyin(pos, count, kbuf, ubuf,
432 + &target->thread.fpu,
433 + 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
434 +}
435 +
436 +/*
437 + * Copy the supplied NT_PRFPREG buffer to the floating-point context,
438 + * CONFIG_CPU_HAS_MSA variant. Buffer slots are copied to lower 64
439 + * bits only of FP context's general register slots. Only general
440 + * registers are copied.
441 + */
442 +static int fpr_set_msa(struct task_struct *target,
443 + unsigned int *pos, unsigned int *count,
444 + const void **kbuf, const void __user **ubuf)
445 +{
446 + unsigned int i;
447 + u64 fpr_val;
448 + int err;
449 +
450 + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
451 + for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) {
452 + err = user_regset_copyin(pos, count, kbuf, ubuf,
453 + &fpr_val, i * sizeof(elf_fpreg_t),
454 + (i + 1) * sizeof(elf_fpreg_t));
455 if (err)
456 return err;
457 + set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
458 }
459
460 return 0;
461 }
462
463 +/*
464 + * Copy the supplied NT_PRFPREG buffer to the floating-point context.
465 + * Choose the appropriate helper for general registers, and then copy
466 + * the FCSR register separately.
467 + *
468 + * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
469 + * which is supposed to have been guaranteed by the kernel before
470 + * calling us, e.g. in `ptrace_regset'. We enforce that requirement,
471 + * so that we can safely avoid preinitializing temporaries for
472 + * partial register writes.
473 + */
474 static int fpr_set(struct task_struct *target,
475 const struct user_regset *regset,
476 unsigned int pos, unsigned int count,
477 const void *kbuf, const void __user *ubuf)
478 {
479 - unsigned i;
480 + const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
481 + u32 fcr31;
482 int err;
483 - u64 fpr_val;
484
485 - /* XXX fcr31 */
486 + BUG_ON(count % sizeof(elf_fpreg_t));
487 +
488 + if (pos + count > sizeof(elf_fpregset_t))
489 + return -EIO;
490
491 init_fp_ctx(target);
492
493 - if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
494 - return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
495 - &target->thread.fpu,
496 - 0, sizeof(elf_fpregset_t));
497 + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
498 + err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf);
499 + else
500 + err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf);
501 + if (err)
502 + return err;
503
504 - BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
505 - for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) {
506 + if (count > 0) {
507 err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
508 - &fpr_val, i * sizeof(elf_fpreg_t),
509 - (i + 1) * sizeof(elf_fpreg_t));
510 + &fcr31,
511 + fcr31_pos, fcr31_pos + sizeof(u32));
512 if (err)
513 return err;
514 - set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
515 +
516 + ptrace_setfcr31(target, fcr31);
517 }
518
519 - return 0;
520 + return err;
521 }
522
523 enum mips_regset {
524 diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
525 index 29ebe2fd5867..a93d719edc90 100644
526 --- a/arch/powerpc/kvm/book3s_64_mmu.c
527 +++ b/arch/powerpc/kvm/book3s_64_mmu.c
528 @@ -235,6 +235,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
529 gpte->may_read = true;
530 gpte->may_write = true;
531 gpte->page_size = MMU_PAGE_4K;
532 + gpte->wimg = HPTE_R_M;
533
534 return 0;
535 }
536 diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
537 index 59247af5fd45..2645d484e945 100644
538 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
539 +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
540 @@ -65,11 +65,17 @@ struct kvm_resize_hpt {
541 u32 order;
542
543 /* These fields protected by kvm->lock */
544 +
545 + /* Possible values and their usage:
546 + * <0 an error occurred during allocation,
547 + * -EBUSY allocation is in the progress,
548 + * 0 allocation made successfuly.
549 + */
550 int error;
551 - bool prepare_done;
552
553 - /* Private to the work thread, until prepare_done is true,
554 - * then protected by kvm->resize_hpt_sem */
555 + /* Private to the work thread, until error != -EBUSY,
556 + * then protected by kvm->lock.
557 + */
558 struct kvm_hpt_info hpt;
559 };
560
561 @@ -159,8 +165,6 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
562 * Reset all the reverse-mapping chains for all memslots
563 */
564 kvmppc_rmap_reset(kvm);
565 - /* Ensure that each vcpu will flush its TLB on next entry. */
566 - cpumask_setall(&kvm->arch.need_tlb_flush);
567 err = 0;
568 goto out;
569 }
570 @@ -176,6 +180,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
571 kvmppc_set_hpt(kvm, &info);
572
573 out:
574 + if (err == 0)
575 + /* Ensure that each vcpu will flush its TLB on next entry. */
576 + cpumask_setall(&kvm->arch.need_tlb_flush);
577 +
578 mutex_unlock(&kvm->lock);
579 return err;
580 }
581 @@ -1424,16 +1432,20 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
582
583 static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
584 {
585 - BUG_ON(kvm->arch.resize_hpt != resize);
586 + if (WARN_ON(!mutex_is_locked(&kvm->lock)))
587 + return;
588
589 if (!resize)
590 return;
591
592 - if (resize->hpt.virt)
593 - kvmppc_free_hpt(&resize->hpt);
594 + if (resize->error != -EBUSY) {
595 + if (resize->hpt.virt)
596 + kvmppc_free_hpt(&resize->hpt);
597 + kfree(resize);
598 + }
599
600 - kvm->arch.resize_hpt = NULL;
601 - kfree(resize);
602 + if (kvm->arch.resize_hpt == resize)
603 + kvm->arch.resize_hpt = NULL;
604 }
605
606 static void resize_hpt_prepare_work(struct work_struct *work)
607 @@ -1442,17 +1454,41 @@ static void resize_hpt_prepare_work(struct work_struct *work)
608 struct kvm_resize_hpt,
609 work);
610 struct kvm *kvm = resize->kvm;
611 - int err;
612 + int err = 0;
613
614 - resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
615 - resize->order);
616 -
617 - err = resize_hpt_allocate(resize);
618 + if (WARN_ON(resize->error != -EBUSY))
619 + return;
620
621 mutex_lock(&kvm->lock);
622
623 + /* Request is still current? */
624 + if (kvm->arch.resize_hpt == resize) {
625 + /* We may request large allocations here:
626 + * do not sleep with kvm->lock held for a while.
627 + */
628 + mutex_unlock(&kvm->lock);
629 +
630 + resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
631 + resize->order);
632 +
633 + err = resize_hpt_allocate(resize);
634 +
635 + /* We have strict assumption about -EBUSY
636 + * when preparing for HPT resize.
637 + */
638 + if (WARN_ON(err == -EBUSY))
639 + err = -EINPROGRESS;
640 +
641 + mutex_lock(&kvm->lock);
642 + /* It is possible that kvm->arch.resize_hpt != resize
643 + * after we grab kvm->lock again.
644 + */
645 + }
646 +
647 resize->error = err;
648 - resize->prepare_done = true;
649 +
650 + if (kvm->arch.resize_hpt != resize)
651 + resize_hpt_release(kvm, resize);
652
653 mutex_unlock(&kvm->lock);
654 }
655 @@ -1477,14 +1513,12 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
656
657 if (resize) {
658 if (resize->order == shift) {
659 - /* Suitable resize in progress */
660 - if (resize->prepare_done) {
661 - ret = resize->error;
662 - if (ret != 0)
663 - resize_hpt_release(kvm, resize);
664 - } else {
665 + /* Suitable resize in progress? */
666 + ret = resize->error;
667 + if (ret == -EBUSY)
668 ret = 100; /* estimated time in ms */
669 - }
670 + else if (ret)
671 + resize_hpt_release(kvm, resize);
672
673 goto out;
674 }
675 @@ -1504,6 +1538,8 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
676 ret = -ENOMEM;
677 goto out;
678 }
679 +
680 + resize->error = -EBUSY;
681 resize->order = shift;
682 resize->kvm = kvm;
683 INIT_WORK(&resize->work, resize_hpt_prepare_work);
684 @@ -1558,16 +1594,12 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
685 if (!resize || (resize->order != shift))
686 goto out;
687
688 - ret = -EBUSY;
689 - if (!resize->prepare_done)
690 - goto out;
691 -
692 ret = resize->error;
693 - if (ret != 0)
694 + if (ret)
695 goto out;
696
697 ret = resize_hpt_rehash(resize);
698 - if (ret != 0)
699 + if (ret)
700 goto out;
701
702 resize_hpt_pivot(resize);
703 diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
704 index 69a09444d46e..e2ef16198456 100644
705 --- a/arch/powerpc/kvm/book3s_pr.c
706 +++ b/arch/powerpc/kvm/book3s_pr.c
707 @@ -60,6 +60,7 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
708 #define MSR_USER32 MSR_USER
709 #define MSR_USER64 MSR_USER
710 #define HW_PAGE_SIZE PAGE_SIZE
711 +#define HPTE_R_M _PAGE_COHERENT
712 #endif
713
714 static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
715 @@ -557,6 +558,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
716 pte.eaddr = eaddr;
717 pte.vpage = eaddr >> 12;
718 pte.page_size = MMU_PAGE_64K;
719 + pte.wimg = HPTE_R_M;
720 }
721
722 switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
723 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
724 index 592c974d4558..17de6acc0eab 100644
725 --- a/arch/x86/Kconfig
726 +++ b/arch/x86/Kconfig
727 @@ -89,6 +89,7 @@ config X86
728 select GENERIC_CLOCKEVENTS_MIN_ADJUST
729 select GENERIC_CMOS_UPDATE
730 select GENERIC_CPU_AUTOPROBE
731 + select GENERIC_CPU_VULNERABILITIES
732 select GENERIC_EARLY_IOREMAP
733 select GENERIC_FIND_FIRST_BIT
734 select GENERIC_IOMAP
735 @@ -428,6 +429,19 @@ config GOLDFISH
736 def_bool y
737 depends on X86_GOLDFISH
738
739 +config RETPOLINE
740 + bool "Avoid speculative indirect branches in kernel"
741 + default y
742 + help
743 + Compile kernel with the retpoline compiler options to guard against
744 + kernel-to-user data leaks by avoiding speculative indirect
745 + branches. Requires a compiler with -mindirect-branch=thunk-extern
746 + support for full protection. The kernel may run slower.
747 +
748 + Without compiler support, at least indirect branches in assembler
749 + code are eliminated. Since this includes the syscall entry path,
750 + it is not entirely pointless.
751 +
752 config INTEL_RDT
753 bool "Intel Resource Director Technology support"
754 default n
755 diff --git a/arch/x86/Makefile b/arch/x86/Makefile
756 index a20eacd9c7e9..504b1a4535ac 100644
757 --- a/arch/x86/Makefile
758 +++ b/arch/x86/Makefile
759 @@ -235,6 +235,14 @@ KBUILD_CFLAGS += -Wno-sign-compare
760 #
761 KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
762
763 +# Avoid indirect branches in kernel to deal with Spectre
764 +ifdef CONFIG_RETPOLINE
765 + RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
766 + ifneq ($(RETPOLINE_CFLAGS),)
767 + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
768 + endif
769 +endif
770 +
771 archscripts: scripts_basic
772 $(Q)$(MAKE) $(build)=arch/x86/tools relocs
773
774 diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
775 index 16627fec80b2..3d09e3aca18d 100644
776 --- a/arch/x86/crypto/aesni-intel_asm.S
777 +++ b/arch/x86/crypto/aesni-intel_asm.S
778 @@ -32,6 +32,7 @@
779 #include <linux/linkage.h>
780 #include <asm/inst.h>
781 #include <asm/frame.h>
782 +#include <asm/nospec-branch.h>
783
784 /*
785 * The following macros are used to move an (un)aligned 16 byte value to/from
786 @@ -2884,7 +2885,7 @@ ENTRY(aesni_xts_crypt8)
787 pxor INC, STATE4
788 movdqu IV, 0x30(OUTP)
789
790 - call *%r11
791 + CALL_NOSPEC %r11
792
793 movdqu 0x00(OUTP), INC
794 pxor INC, STATE1
795 @@ -2929,7 +2930,7 @@ ENTRY(aesni_xts_crypt8)
796 _aesni_gf128mul_x_ble()
797 movups IV, (IVP)
798
799 - call *%r11
800 + CALL_NOSPEC %r11
801
802 movdqu 0x40(OUTP), INC
803 pxor INC, STATE1
804 diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
805 index f7c495e2863c..a14af6eb09cb 100644
806 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
807 +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
808 @@ -17,6 +17,7 @@
809
810 #include <linux/linkage.h>
811 #include <asm/frame.h>
812 +#include <asm/nospec-branch.h>
813
814 #define CAMELLIA_TABLE_BYTE_LEN 272
815
816 @@ -1227,7 +1228,7 @@ camellia_xts_crypt_16way:
817 vpxor 14 * 16(%rax), %xmm15, %xmm14;
818 vpxor 15 * 16(%rax), %xmm15, %xmm15;
819
820 - call *%r9;
821 + CALL_NOSPEC %r9;
822
823 addq $(16 * 16), %rsp;
824
825 diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
826 index eee5b3982cfd..b66bbfa62f50 100644
827 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
828 +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
829 @@ -12,6 +12,7 @@
830
831 #include <linux/linkage.h>
832 #include <asm/frame.h>
833 +#include <asm/nospec-branch.h>
834
835 #define CAMELLIA_TABLE_BYTE_LEN 272
836
837 @@ -1343,7 +1344,7 @@ camellia_xts_crypt_32way:
838 vpxor 14 * 32(%rax), %ymm15, %ymm14;
839 vpxor 15 * 32(%rax), %ymm15, %ymm15;
840
841 - call *%r9;
842 + CALL_NOSPEC %r9;
843
844 addq $(16 * 32), %rsp;
845
846 diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
847 index 7a7de27c6f41..d9b734d0c8cc 100644
848 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
849 +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
850 @@ -45,6 +45,7 @@
851
852 #include <asm/inst.h>
853 #include <linux/linkage.h>
854 +#include <asm/nospec-branch.h>
855
856 ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
857
858 @@ -172,7 +173,7 @@ continue_block:
859 movzxw (bufp, %rax, 2), len
860 lea crc_array(%rip), bufp
861 lea (bufp, len, 1), bufp
862 - jmp *bufp
863 + JMP_NOSPEC bufp
864
865 ################################################################
866 ## 2a) PROCESS FULL BLOCKS:
867 diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
868 index 45a63e00a6af..3f48f695d5e6 100644
869 --- a/arch/x86/entry/calling.h
870 +++ b/arch/x86/entry/calling.h
871 @@ -198,8 +198,11 @@ For 32-bit we have the following conventions - kernel is built with
872 * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
873 * halves:
874 */
875 -#define PTI_SWITCH_PGTABLES_MASK (1<<PAGE_SHIFT)
876 -#define PTI_SWITCH_MASK (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
877 +#define PTI_USER_PGTABLE_BIT PAGE_SHIFT
878 +#define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT)
879 +#define PTI_USER_PCID_BIT X86_CR3_PTI_PCID_USER_BIT
880 +#define PTI_USER_PCID_MASK (1 << PTI_USER_PCID_BIT)
881 +#define PTI_USER_PGTABLE_AND_PCID_MASK (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
882
883 .macro SET_NOFLUSH_BIT reg:req
884 bts $X86_CR3_PCID_NOFLUSH_BIT, \reg
885 @@ -208,7 +211,7 @@ For 32-bit we have the following conventions - kernel is built with
886 .macro ADJUST_KERNEL_CR3 reg:req
887 ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
888 /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
889 - andq $(~PTI_SWITCH_MASK), \reg
890 + andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
891 .endm
892
893 .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
894 @@ -239,15 +242,19 @@ For 32-bit we have the following conventions - kernel is built with
895 /* Flush needed, clear the bit */
896 btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
897 movq \scratch_reg2, \scratch_reg
898 - jmp .Lwrcr3_\@
899 + jmp .Lwrcr3_pcid_\@
900
901 .Lnoflush_\@:
902 movq \scratch_reg2, \scratch_reg
903 SET_NOFLUSH_BIT \scratch_reg
904
905 +.Lwrcr3_pcid_\@:
906 + /* Flip the ASID to the user version */
907 + orq $(PTI_USER_PCID_MASK), \scratch_reg
908 +
909 .Lwrcr3_\@:
910 - /* Flip the PGD and ASID to the user version */
911 - orq $(PTI_SWITCH_MASK), \scratch_reg
912 + /* Flip the PGD to the user version */
913 + orq $(PTI_USER_PGTABLE_MASK), \scratch_reg
914 mov \scratch_reg, %cr3
915 .Lend_\@:
916 .endm
917 @@ -263,17 +270,12 @@ For 32-bit we have the following conventions - kernel is built with
918 movq %cr3, \scratch_reg
919 movq \scratch_reg, \save_reg
920 /*
921 - * Is the "switch mask" all zero? That means that both of
922 - * these are zero:
923 - *
924 - * 1. The user/kernel PCID bit, and
925 - * 2. The user/kernel "bit" that points CR3 to the
926 - * bottom half of the 8k PGD
927 - *
928 - * That indicates a kernel CR3 value, not a user CR3.
929 + * Test the user pagetable bit. If set, then the user page tables
930 + * are active. If clear CR3 already has the kernel page table
931 + * active.
932 */
933 - testq $(PTI_SWITCH_MASK), \scratch_reg
934 - jz .Ldone_\@
935 + bt $PTI_USER_PGTABLE_BIT, \scratch_reg
936 + jnc .Ldone_\@
937
938 ADJUST_KERNEL_CR3 \scratch_reg
939 movq \scratch_reg, %cr3
940 @@ -290,7 +292,7 @@ For 32-bit we have the following conventions - kernel is built with
941 * KERNEL pages can always resume with NOFLUSH as we do
942 * explicit flushes.
943 */
944 - bt $X86_CR3_PTI_SWITCH_BIT, \save_reg
945 + bt $PTI_USER_PGTABLE_BIT, \save_reg
946 jnc .Lnoflush_\@
947
948 /*
949 diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
950 index ace8f321a5a1..a1f28a54f23a 100644
951 --- a/arch/x86/entry/entry_32.S
952 +++ b/arch/x86/entry/entry_32.S
953 @@ -44,6 +44,7 @@
954 #include <asm/asm.h>
955 #include <asm/smap.h>
956 #include <asm/frame.h>
957 +#include <asm/nospec-branch.h>
958
959 .section .entry.text, "ax"
960
961 @@ -290,7 +291,7 @@ ENTRY(ret_from_fork)
962
963 /* kernel thread */
964 1: movl %edi, %eax
965 - call *%ebx
966 + CALL_NOSPEC %ebx
967 /*
968 * A kernel thread is allowed to return here after successfully
969 * calling do_execve(). Exit to userspace to complete the execve()
970 @@ -919,7 +920,7 @@ common_exception:
971 movl %ecx, %es
972 TRACE_IRQS_OFF
973 movl %esp, %eax # pt_regs pointer
974 - call *%edi
975 + CALL_NOSPEC %edi
976 jmp ret_from_exception
977 END(common_exception)
978
979 diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
980 index dd696b966e58..f5fda5f26e34 100644
981 --- a/arch/x86/entry/entry_64.S
982 +++ b/arch/x86/entry/entry_64.S
983 @@ -37,6 +37,7 @@
984 #include <asm/pgtable_types.h>
985 #include <asm/export.h>
986 #include <asm/frame.h>
987 +#include <asm/nospec-branch.h>
988 #include <linux/err.h>
989
990 #include "calling.h"
991 @@ -187,7 +188,7 @@ ENTRY(entry_SYSCALL_64_trampoline)
992 */
993 pushq %rdi
994 movq $entry_SYSCALL_64_stage2, %rdi
995 - jmp *%rdi
996 + JMP_NOSPEC %rdi
997 END(entry_SYSCALL_64_trampoline)
998
999 .popsection
1000 @@ -266,7 +267,12 @@ entry_SYSCALL_64_fastpath:
1001 * It might end up jumping to the slow path. If it jumps, RAX
1002 * and all argument registers are clobbered.
1003 */
1004 +#ifdef CONFIG_RETPOLINE
1005 + movq sys_call_table(, %rax, 8), %rax
1006 + call __x86_indirect_thunk_rax
1007 +#else
1008 call *sys_call_table(, %rax, 8)
1009 +#endif
1010 .Lentry_SYSCALL_64_after_fastpath_call:
1011
1012 movq %rax, RAX(%rsp)
1013 @@ -438,7 +444,7 @@ ENTRY(stub_ptregs_64)
1014 jmp entry_SYSCALL64_slow_path
1015
1016 1:
1017 - jmp *%rax /* Called from C */
1018 + JMP_NOSPEC %rax /* Called from C */
1019 END(stub_ptregs_64)
1020
1021 .macro ptregs_stub func
1022 @@ -517,7 +523,7 @@ ENTRY(ret_from_fork)
1023 1:
1024 /* kernel thread */
1025 movq %r12, %rdi
1026 - call *%rbx
1027 + CALL_NOSPEC %rbx
1028 /*
1029 * A kernel thread is allowed to return here after successfully
1030 * calling do_execve(). Exit to userspace to complete the execve()
1031 diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
1032 index 141e07b06216..24ffa1e88cf9 100644
1033 --- a/arch/x86/events/intel/bts.c
1034 +++ b/arch/x86/events/intel/bts.c
1035 @@ -582,6 +582,24 @@ static __init int bts_init(void)
1036 if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
1037 return -ENODEV;
1038
1039 + if (boot_cpu_has(X86_FEATURE_PTI)) {
1040 + /*
1041 + * BTS hardware writes through a virtual memory map we must
1042 + * either use the kernel physical map, or the user mapping of
1043 + * the AUX buffer.
1044 + *
1045 + * However, since this driver supports per-CPU and per-task inherit
1046 + * we cannot use the user mapping since it will not be availble
1047 + * if we're not running the owning process.
1048 + *
1049 + * With PTI we can't use the kernal map either, because its not
1050 + * there when we run userspace.
1051 + *
1052 + * For now, disable this driver when using PTI.
1053 + */
1054 + return -ENODEV;
1055 + }
1056 +
1057 bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
1058 PERF_PMU_CAP_EXCLUSIVE;
1059 bts_pmu.task_ctx_nr = perf_sw_context;
1060 diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
1061 index ff700d81e91e..0927cdc4f946 100644
1062 --- a/arch/x86/include/asm/asm-prototypes.h
1063 +++ b/arch/x86/include/asm/asm-prototypes.h
1064 @@ -11,7 +11,32 @@
1065 #include <asm/pgtable.h>
1066 #include <asm/special_insns.h>
1067 #include <asm/preempt.h>
1068 +#include <asm/asm.h>
1069
1070 #ifndef CONFIG_X86_CMPXCHG64
1071 extern void cmpxchg8b_emu(void);
1072 #endif
1073 +
1074 +#ifdef CONFIG_RETPOLINE
1075 +#ifdef CONFIG_X86_32
1076 +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
1077 +#else
1078 +#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
1079 +INDIRECT_THUNK(8)
1080 +INDIRECT_THUNK(9)
1081 +INDIRECT_THUNK(10)
1082 +INDIRECT_THUNK(11)
1083 +INDIRECT_THUNK(12)
1084 +INDIRECT_THUNK(13)
1085 +INDIRECT_THUNK(14)
1086 +INDIRECT_THUNK(15)
1087 +#endif
1088 +INDIRECT_THUNK(ax)
1089 +INDIRECT_THUNK(bx)
1090 +INDIRECT_THUNK(cx)
1091 +INDIRECT_THUNK(dx)
1092 +INDIRECT_THUNK(si)
1093 +INDIRECT_THUNK(di)
1094 +INDIRECT_THUNK(bp)
1095 +INDIRECT_THUNK(sp)
1096 +#endif /* CONFIG_RETPOLINE */
1097 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
1098 index 21ac898df2d8..f275447862f4 100644
1099 --- a/arch/x86/include/asm/cpufeatures.h
1100 +++ b/arch/x86/include/asm/cpufeatures.h
1101 @@ -203,6 +203,8 @@
1102 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
1103 #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
1104 #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
1105 +#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
1106 +#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
1107 #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
1108 #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
1109 #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
1110 @@ -342,5 +344,7 @@
1111 #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
1112 #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
1113 #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
1114 +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
1115 +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
1116
1117 #endif /* _ASM_X86_CPUFEATURES_H */
1118 diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
1119 index 581bb54dd464..5119e4b555cc 100644
1120 --- a/arch/x86/include/asm/mshyperv.h
1121 +++ b/arch/x86/include/asm/mshyperv.h
1122 @@ -7,6 +7,7 @@
1123 #include <linux/nmi.h>
1124 #include <asm/io.h>
1125 #include <asm/hyperv.h>
1126 +#include <asm/nospec-branch.h>
1127
1128 /*
1129 * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
1130 @@ -186,10 +187,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
1131 return U64_MAX;
1132
1133 __asm__ __volatile__("mov %4, %%r8\n"
1134 - "call *%5"
1135 + CALL_NOSPEC
1136 : "=a" (hv_status), ASM_CALL_CONSTRAINT,
1137 "+c" (control), "+d" (input_address)
1138 - : "r" (output_address), "m" (hv_hypercall_pg)
1139 + : "r" (output_address),
1140 + THUNK_TARGET(hv_hypercall_pg)
1141 : "cc", "memory", "r8", "r9", "r10", "r11");
1142 #else
1143 u32 input_address_hi = upper_32_bits(input_address);
1144 @@ -200,13 +202,13 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
1145 if (!hv_hypercall_pg)
1146 return U64_MAX;
1147
1148 - __asm__ __volatile__("call *%7"
1149 + __asm__ __volatile__(CALL_NOSPEC
1150 : "=A" (hv_status),
1151 "+c" (input_address_lo), ASM_CALL_CONSTRAINT
1152 : "A" (control),
1153 "b" (input_address_hi),
1154 "D"(output_address_hi), "S"(output_address_lo),
1155 - "m" (hv_hypercall_pg)
1156 + THUNK_TARGET(hv_hypercall_pg)
1157 : "cc", "memory");
1158 #endif /* !x86_64 */
1159 return hv_status;
1160 @@ -227,10 +229,10 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
1161
1162 #ifdef CONFIG_X86_64
1163 {
1164 - __asm__ __volatile__("call *%4"
1165 + __asm__ __volatile__(CALL_NOSPEC
1166 : "=a" (hv_status), ASM_CALL_CONSTRAINT,
1167 "+c" (control), "+d" (input1)
1168 - : "m" (hv_hypercall_pg)
1169 + : THUNK_TARGET(hv_hypercall_pg)
1170 : "cc", "r8", "r9", "r10", "r11");
1171 }
1172 #else
1173 @@ -238,13 +240,13 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
1174 u32 input1_hi = upper_32_bits(input1);
1175 u32 input1_lo = lower_32_bits(input1);
1176
1177 - __asm__ __volatile__ ("call *%5"
1178 + __asm__ __volatile__ (CALL_NOSPEC
1179 : "=A"(hv_status),
1180 "+c"(input1_lo),
1181 ASM_CALL_CONSTRAINT
1182 : "A" (control),
1183 "b" (input1_hi),
1184 - "m" (hv_hypercall_pg)
1185 + THUNK_TARGET(hv_hypercall_pg)
1186 : "cc", "edi", "esi");
1187 }
1188 #endif
1189 diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
1190 index ab022618a50a..fa11fb1fa570 100644
1191 --- a/arch/x86/include/asm/msr-index.h
1192 +++ b/arch/x86/include/asm/msr-index.h
1193 @@ -352,6 +352,9 @@
1194 #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
1195 #define FAM10H_MMIO_CONF_BASE_SHIFT 20
1196 #define MSR_FAM10H_NODE_ID 0xc001100c
1197 +#define MSR_F10H_DECFG 0xc0011029
1198 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
1199 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
1200
1201 /* K8 MSRs */
1202 #define MSR_K8_TOP_MEM1 0xc001001a
1203 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
1204 new file mode 100644
1205 index 000000000000..402a11c803c3
1206 --- /dev/null
1207 +++ b/arch/x86/include/asm/nospec-branch.h
1208 @@ -0,0 +1,214 @@
1209 +/* SPDX-License-Identifier: GPL-2.0 */
1210 +
1211 +#ifndef __NOSPEC_BRANCH_H__
1212 +#define __NOSPEC_BRANCH_H__
1213 +
1214 +#include <asm/alternative.h>
1215 +#include <asm/alternative-asm.h>
1216 +#include <asm/cpufeatures.h>
1217 +
1218 +/*
1219 + * Fill the CPU return stack buffer.
1220 + *
1221 + * Each entry in the RSB, if used for a speculative 'ret', contains an
1222 + * infinite 'pause; jmp' loop to capture speculative execution.
1223 + *
1224 + * This is required in various cases for retpoline and IBRS-based
1225 + * mitigations for the Spectre variant 2 vulnerability. Sometimes to
1226 + * eliminate potentially bogus entries from the RSB, and sometimes
1227 + * purely to ensure that it doesn't get empty, which on some CPUs would
1228 + * allow predictions from other (unwanted!) sources to be used.
1229 + *
1230 + * We define a CPP macro such that it can be used from both .S files and
1231 + * inline assembly. It's possible to do a .macro and then include that
1232 + * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
1233 + */
1234 +
1235 +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
1236 +#define RSB_FILL_LOOPS 16 /* To avoid underflow */
1237 +
1238 +/*
1239 + * Google experimented with loop-unrolling and this turned out to be
1240 + * the optimal version — two calls, each with their own speculation
1241 + * trap should their return address end up getting used, in a loop.
1242 + */
1243 +#define __FILL_RETURN_BUFFER(reg, nr, sp) \
1244 + mov $(nr/2), reg; \
1245 +771: \
1246 + call 772f; \
1247 +773: /* speculation trap */ \
1248 + pause; \
1249 + jmp 773b; \
1250 +772: \
1251 + call 774f; \
1252 +775: /* speculation trap */ \
1253 + pause; \
1254 + jmp 775b; \
1255 +774: \
1256 + dec reg; \
1257 + jnz 771b; \
1258 + add $(BITS_PER_LONG/8) * nr, sp;
1259 +
1260 +#ifdef __ASSEMBLY__
1261 +
1262 +/*
1263 + * This should be used immediately before a retpoline alternative. It tells
1264 + * objtool where the retpolines are so that it can make sense of the control
1265 + * flow by just reading the original instruction(s) and ignoring the
1266 + * alternatives.
1267 + */
1268 +.macro ANNOTATE_NOSPEC_ALTERNATIVE
1269 + .Lannotate_\@:
1270 + .pushsection .discard.nospec
1271 + .long .Lannotate_\@ - .
1272 + .popsection
1273 +.endm
1274 +
1275 +/*
1276 + * These are the bare retpoline primitives for indirect jmp and call.
1277 + * Do not use these directly; they only exist to make the ALTERNATIVE
1278 + * invocation below less ugly.
1279 + */
1280 +.macro RETPOLINE_JMP reg:req
1281 + call .Ldo_rop_\@
1282 +.Lspec_trap_\@:
1283 + pause
1284 + jmp .Lspec_trap_\@
1285 +.Ldo_rop_\@:
1286 + mov \reg, (%_ASM_SP)
1287 + ret
1288 +.endm
1289 +
1290 +/*
1291 + * This is a wrapper around RETPOLINE_JMP so the called function in reg
1292 + * returns to the instruction after the macro.
1293 + */
1294 +.macro RETPOLINE_CALL reg:req
1295 + jmp .Ldo_call_\@
1296 +.Ldo_retpoline_jmp_\@:
1297 + RETPOLINE_JMP \reg
1298 +.Ldo_call_\@:
1299 + call .Ldo_retpoline_jmp_\@
1300 +.endm
1301 +
1302 +/*
1303 + * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
1304 + * indirect jmp/call which may be susceptible to the Spectre variant 2
1305 + * attack.
1306 + */
1307 +.macro JMP_NOSPEC reg:req
1308 +#ifdef CONFIG_RETPOLINE
1309 + ANNOTATE_NOSPEC_ALTERNATIVE
1310 + ALTERNATIVE_2 __stringify(jmp *\reg), \
1311 + __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
1312 + __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
1313 +#else
1314 + jmp *\reg
1315 +#endif
1316 +.endm
1317 +
1318 +.macro CALL_NOSPEC reg:req
1319 +#ifdef CONFIG_RETPOLINE
1320 + ANNOTATE_NOSPEC_ALTERNATIVE
1321 + ALTERNATIVE_2 __stringify(call *\reg), \
1322 + __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
1323 + __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
1324 +#else
1325 + call *\reg
1326 +#endif
1327 +.endm
1328 +
1329 + /*
1330 + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
1331 + * monstrosity above, manually.
1332 + */
1333 +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
1334 +#ifdef CONFIG_RETPOLINE
1335 + ANNOTATE_NOSPEC_ALTERNATIVE
1336 + ALTERNATIVE "jmp .Lskip_rsb_\@", \
1337 + __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
1338 + \ftr
1339 +.Lskip_rsb_\@:
1340 +#endif
1341 +.endm
1342 +
1343 +#else /* __ASSEMBLY__ */
1344 +
1345 +#define ANNOTATE_NOSPEC_ALTERNATIVE \
1346 + "999:\n\t" \
1347 + ".pushsection .discard.nospec\n\t" \
1348 + ".long 999b - .\n\t" \
1349 + ".popsection\n\t"
1350 +
1351 +#if defined(CONFIG_X86_64) && defined(RETPOLINE)
1352 +
1353 +/*
1354 + * Since the inline asm uses the %V modifier which is only in newer GCC,
1355 + * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
1356 + */
1357 +# define CALL_NOSPEC \
1358 + ANNOTATE_NOSPEC_ALTERNATIVE \
1359 + ALTERNATIVE( \
1360 + "call *%[thunk_target]\n", \
1361 + "call __x86_indirect_thunk_%V[thunk_target]\n", \
1362 + X86_FEATURE_RETPOLINE)
1363 +# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
1364 +
1365 +#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
1366 +/*
1367 + * For i386 we use the original ret-equivalent retpoline, because
1368 + * otherwise we'll run out of registers. We don't care about CET
1369 + * here, anyway.
1370 + */
1371 +# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \
1372 + " jmp 904f;\n" \
1373 + " .align 16\n" \
1374 + "901: call 903f;\n" \
1375 + "902: pause;\n" \
1376 + " jmp 902b;\n" \
1377 + " .align 16\n" \
1378 + "903: addl $4, %%esp;\n" \
1379 + " pushl %[thunk_target];\n" \
1380 + " ret;\n" \
1381 + " .align 16\n" \
1382 + "904: call 901b;\n", \
1383 + X86_FEATURE_RETPOLINE)
1384 +
1385 +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
1386 +#else /* No retpoline for C / inline asm */
1387 +# define CALL_NOSPEC "call *%[thunk_target]\n"
1388 +# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
1389 +#endif
1390 +
1391 +/* The Spectre V2 mitigation variants */
1392 +enum spectre_v2_mitigation {
1393 + SPECTRE_V2_NONE,
1394 + SPECTRE_V2_RETPOLINE_MINIMAL,
1395 + SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
1396 + SPECTRE_V2_RETPOLINE_GENERIC,
1397 + SPECTRE_V2_RETPOLINE_AMD,
1398 + SPECTRE_V2_IBRS,
1399 +};
1400 +
1401 +/*
1402 + * On VMEXIT we must ensure that no RSB predictions learned in the guest
1403 + * can be followed in the host, by overwriting the RSB completely. Both
1404 + * retpoline and IBRS mitigations for Spectre v2 need this; only on future
1405 + * CPUs with IBRS_ATT *might* it be avoided.
1406 + */
1407 +static inline void vmexit_fill_RSB(void)
1408 +{
1409 +#ifdef CONFIG_RETPOLINE
1410 + unsigned long loops = RSB_CLEAR_LOOPS / 2;
1411 +
1412 + asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
1413 + ALTERNATIVE("jmp 910f",
1414 + __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
1415 + X86_FEATURE_RETPOLINE)
1416 + "910:"
1417 + : "=&r" (loops), ASM_CALL_CONSTRAINT
1418 + : "r" (loops) : "memory" );
1419 +#endif
1420 +}
1421 +#endif /* __ASSEMBLY__ */
1422 +#endif /* __NOSPEC_BRANCH_H__ */
1423 diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
1424 index 6a60fea90b9d..625a52a5594f 100644
1425 --- a/arch/x86/include/asm/processor-flags.h
1426 +++ b/arch/x86/include/asm/processor-flags.h
1427 @@ -40,7 +40,7 @@
1428 #define CR3_NOFLUSH BIT_ULL(63)
1429
1430 #ifdef CONFIG_PAGE_TABLE_ISOLATION
1431 -# define X86_CR3_PTI_SWITCH_BIT 11
1432 +# define X86_CR3_PTI_PCID_USER_BIT 11
1433 #endif
1434
1435 #else
1436 diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
1437 index f9b48ce152eb..3effd3c994af 100644
1438 --- a/arch/x86/include/asm/tlbflush.h
1439 +++ b/arch/x86/include/asm/tlbflush.h
1440 @@ -81,13 +81,13 @@ static inline u16 kern_pcid(u16 asid)
1441 * Make sure that the dynamic ASID space does not confict with the
1442 * bit we are using to switch between user and kernel ASIDs.
1443 */
1444 - BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT));
1445 + BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
1446
1447 /*
1448 * The ASID being passed in here should have respected the
1449 * MAX_ASID_AVAILABLE and thus never have the switch bit set.
1450 */
1451 - VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT));
1452 + VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));
1453 #endif
1454 /*
1455 * The dynamically-assigned ASIDs that get passed in are small
1456 @@ -112,7 +112,7 @@ static inline u16 user_pcid(u16 asid)
1457 {
1458 u16 ret = kern_pcid(asid);
1459 #ifdef CONFIG_PAGE_TABLE_ISOLATION
1460 - ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
1461 + ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
1462 #endif
1463 return ret;
1464 }
1465 diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
1466 index 7cb282e9e587..bfd882617613 100644
1467 --- a/arch/x86/include/asm/xen/hypercall.h
1468 +++ b/arch/x86/include/asm/xen/hypercall.h
1469 @@ -44,6 +44,7 @@
1470 #include <asm/page.h>
1471 #include <asm/pgtable.h>
1472 #include <asm/smap.h>
1473 +#include <asm/nospec-branch.h>
1474
1475 #include <xen/interface/xen.h>
1476 #include <xen/interface/sched.h>
1477 @@ -217,9 +218,9 @@ privcmd_call(unsigned call,
1478 __HYPERCALL_5ARG(a1, a2, a3, a4, a5);
1479
1480 stac();
1481 - asm volatile("call *%[call]"
1482 + asm volatile(CALL_NOSPEC
1483 : __HYPERCALL_5PARAM
1484 - : [call] "a" (&hypercall_page[call])
1485 + : [thunk_target] "a" (&hypercall_page[call])
1486 : __HYPERCALL_CLOBBER5);
1487 clac();
1488
1489 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
1490 index 079535e53e2a..9c2a002d9297 100644
1491 --- a/arch/x86/kernel/acpi/boot.c
1492 +++ b/arch/x86/kernel/acpi/boot.c
1493 @@ -342,13 +342,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
1494 #ifdef CONFIG_X86_IO_APIC
1495 #define MP_ISA_BUS 0
1496
1497 +static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
1498 + u8 trigger, u32 gsi);
1499 +
1500 static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
1501 u32 gsi)
1502 {
1503 - int ioapic;
1504 - int pin;
1505 - struct mpc_intsrc mp_irq;
1506 -
1507 /*
1508 * Check bus_irq boundary.
1509 */
1510 @@ -357,14 +356,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
1511 return;
1512 }
1513
1514 - /*
1515 - * Convert 'gsi' to 'ioapic.pin'.
1516 - */
1517 - ioapic = mp_find_ioapic(gsi);
1518 - if (ioapic < 0)
1519 - return;
1520 - pin = mp_find_ioapic_pin(ioapic, gsi);
1521 -
1522 /*
1523 * TBD: This check is for faulty timer entries, where the override
1524 * erroneously sets the trigger to level, resulting in a HUGE
1525 @@ -373,16 +364,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
1526 if ((bus_irq == 0) && (trigger == 3))
1527 trigger = 1;
1528
1529 - mp_irq.type = MP_INTSRC;
1530 - mp_irq.irqtype = mp_INT;
1531 - mp_irq.irqflag = (trigger << 2) | polarity;
1532 - mp_irq.srcbus = MP_ISA_BUS;
1533 - mp_irq.srcbusirq = bus_irq; /* IRQ */
1534 - mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
1535 - mp_irq.dstirq = pin; /* INTIN# */
1536 -
1537 - mp_save_irq(&mp_irq);
1538 -
1539 + if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0)
1540 + return;
1541 /*
1542 * Reset default identity mapping if gsi is also an legacy IRQ,
1543 * otherwise there will be more than one entry with the same GSI
1544 @@ -429,6 +412,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
1545 return 0;
1546 }
1547
1548 +static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
1549 + u8 trigger, u32 gsi)
1550 +{
1551 + struct mpc_intsrc mp_irq;
1552 + int ioapic, pin;
1553 +
1554 + /* Convert 'gsi' to 'ioapic.pin'(INTIN#) */
1555 + ioapic = mp_find_ioapic(gsi);
1556 + if (ioapic < 0) {
1557 + pr_warn("Failed to find ioapic for gsi : %u\n", gsi);
1558 + return ioapic;
1559 + }
1560 +
1561 + pin = mp_find_ioapic_pin(ioapic, gsi);
1562 +
1563 + mp_irq.type = MP_INTSRC;
1564 + mp_irq.irqtype = mp_INT;
1565 + mp_irq.irqflag = (trigger << 2) | polarity;
1566 + mp_irq.srcbus = MP_ISA_BUS;
1567 + mp_irq.srcbusirq = bus_irq;
1568 + mp_irq.dstapic = mpc_ioapic_id(ioapic);
1569 + mp_irq.dstirq = pin;
1570 +
1571 + mp_save_irq(&mp_irq);
1572 +
1573 + return 0;
1574 +}
1575 +
1576 static int __init
1577 acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
1578 {
1579 @@ -473,7 +484,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
1580 if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
1581 polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
1582
1583 - mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
1584 + if (bus_irq < NR_IRQS_LEGACY)
1585 + mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
1586 + else
1587 + mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi);
1588 +
1589 acpi_penalize_sci_irq(bus_irq, trigger, polarity);
1590
1591 /*
1592 diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
1593 index 3344d3382e91..e0b97e4d1db5 100644
1594 --- a/arch/x86/kernel/alternative.c
1595 +++ b/arch/x86/kernel/alternative.c
1596 @@ -344,9 +344,12 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
1597 static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
1598 {
1599 unsigned long flags;
1600 + int i;
1601
1602 - if (instr[0] != 0x90)
1603 - return;
1604 + for (i = 0; i < a->padlen; i++) {
1605 + if (instr[i] != 0x90)
1606 + return;
1607 + }
1608
1609 local_irq_save(flags);
1610 add_nops(instr + (a->instrlen - a->padlen), a->padlen);
1611 diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
1612 index bcb75dc97d44..ea831c858195 100644
1613 --- a/arch/x86/kernel/cpu/amd.c
1614 +++ b/arch/x86/kernel/cpu/amd.c
1615 @@ -829,8 +829,32 @@ static void init_amd(struct cpuinfo_x86 *c)
1616 set_cpu_cap(c, X86_FEATURE_K8);
1617
1618 if (cpu_has(c, X86_FEATURE_XMM2)) {
1619 - /* MFENCE stops RDTSC speculation */
1620 - set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
1621 + unsigned long long val;
1622 + int ret;
1623 +
1624 + /*
1625 + * A serializing LFENCE has less overhead than MFENCE, so
1626 + * use it for execution serialization. On families which
1627 + * don't have that MSR, LFENCE is already serializing.
1628 + * msr_set_bit() uses the safe accessors, too, even if the MSR
1629 + * is not present.
1630 + */
1631 + msr_set_bit(MSR_F10H_DECFG,
1632 + MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
1633 +
1634 + /*
1635 + * Verify that the MSR write was successful (could be running
1636 + * under a hypervisor) and only then assume that LFENCE is
1637 + * serializing.
1638 + */
1639 + ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
1640 + if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
1641 + /* A serializing LFENCE stops RDTSC speculation */
1642 + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
1643 + } else {
1644 + /* MFENCE stops RDTSC speculation */
1645 + set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
1646 + }
1647 }
1648
1649 /*
1650 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
1651 index ba0b2424c9b0..e4dc26185aa7 100644
1652 --- a/arch/x86/kernel/cpu/bugs.c
1653 +++ b/arch/x86/kernel/cpu/bugs.c
1654 @@ -10,6 +10,10 @@
1655 */
1656 #include <linux/init.h>
1657 #include <linux/utsname.h>
1658 +#include <linux/cpu.h>
1659 +
1660 +#include <asm/nospec-branch.h>
1661 +#include <asm/cmdline.h>
1662 #include <asm/bugs.h>
1663 #include <asm/processor.h>
1664 #include <asm/processor-flags.h>
1665 @@ -20,6 +24,8 @@
1666 #include <asm/pgtable.h>
1667 #include <asm/set_memory.h>
1668
1669 +static void __init spectre_v2_select_mitigation(void);
1670 +
1671 void __init check_bugs(void)
1672 {
1673 identify_boot_cpu();
1674 @@ -29,6 +35,9 @@ void __init check_bugs(void)
1675 print_cpu_info(&boot_cpu_data);
1676 }
1677
1678 + /* Select the proper spectre mitigation before patching alternatives */
1679 + spectre_v2_select_mitigation();
1680 +
1681 #ifdef CONFIG_X86_32
1682 /*
1683 * Check whether we are able to run this kernel safely on SMP.
1684 @@ -60,3 +69,179 @@ void __init check_bugs(void)
1685 set_memory_4k((unsigned long)__va(0), 1);
1686 #endif
1687 }
1688 +
1689 +/* The kernel command line selection */
1690 +enum spectre_v2_mitigation_cmd {
1691 + SPECTRE_V2_CMD_NONE,
1692 + SPECTRE_V2_CMD_AUTO,
1693 + SPECTRE_V2_CMD_FORCE,
1694 + SPECTRE_V2_CMD_RETPOLINE,
1695 + SPECTRE_V2_CMD_RETPOLINE_GENERIC,
1696 + SPECTRE_V2_CMD_RETPOLINE_AMD,
1697 +};
1698 +
1699 +static const char *spectre_v2_strings[] = {
1700 + [SPECTRE_V2_NONE] = "Vulnerable",
1701 + [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline",
1702 + [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
1703 + [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
1704 + [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
1705 +};
1706 +
1707 +#undef pr_fmt
1708 +#define pr_fmt(fmt) "Spectre V2 mitigation: " fmt
1709 +
1710 +static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
1711 +
1712 +static void __init spec2_print_if_insecure(const char *reason)
1713 +{
1714 + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
1715 + pr_info("%s\n", reason);
1716 +}
1717 +
1718 +static void __init spec2_print_if_secure(const char *reason)
1719 +{
1720 + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
1721 + pr_info("%s\n", reason);
1722 +}
1723 +
1724 +static inline bool retp_compiler(void)
1725 +{
1726 + return __is_defined(RETPOLINE);
1727 +}
1728 +
1729 +static inline bool match_option(const char *arg, int arglen, const char *opt)
1730 +{
1731 + int len = strlen(opt);
1732 +
1733 + return len == arglen && !strncmp(arg, opt, len);
1734 +}
1735 +
1736 +static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
1737 +{
1738 + char arg[20];
1739 + int ret;
1740 +
1741 + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
1742 + sizeof(arg));
1743 + if (ret > 0) {
1744 + if (match_option(arg, ret, "off")) {
1745 + goto disable;
1746 + } else if (match_option(arg, ret, "on")) {
1747 + spec2_print_if_secure("force enabled on command line.");
1748 + return SPECTRE_V2_CMD_FORCE;
1749 + } else if (match_option(arg, ret, "retpoline")) {
1750 + spec2_print_if_insecure("retpoline selected on command line.");
1751 + return SPECTRE_V2_CMD_RETPOLINE;
1752 + } else if (match_option(arg, ret, "retpoline,amd")) {
1753 + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
1754 + pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
1755 + return SPECTRE_V2_CMD_AUTO;
1756 + }
1757 + spec2_print_if_insecure("AMD retpoline selected on command line.");
1758 + return SPECTRE_V2_CMD_RETPOLINE_AMD;
1759 + } else if (match_option(arg, ret, "retpoline,generic")) {
1760 + spec2_print_if_insecure("generic retpoline selected on command line.");
1761 + return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
1762 + } else if (match_option(arg, ret, "auto")) {
1763 + return SPECTRE_V2_CMD_AUTO;
1764 + }
1765 + }
1766 +
1767 + if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
1768 + return SPECTRE_V2_CMD_AUTO;
1769 +disable:
1770 + spec2_print_if_insecure("disabled on command line.");
1771 + return SPECTRE_V2_CMD_NONE;
1772 +}
1773 +
1774 +static void __init spectre_v2_select_mitigation(void)
1775 +{
1776 + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
1777 + enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
1778 +
1779 + /*
1780 + * If the CPU is not affected and the command line mode is NONE or AUTO
1781 + * then nothing to do.
1782 + */
1783 + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
1784 + (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
1785 + return;
1786 +
1787 + switch (cmd) {
1788 + case SPECTRE_V2_CMD_NONE:
1789 + return;
1790 +
1791 + case SPECTRE_V2_CMD_FORCE:
1792 + /* FALLTRHU */
1793 + case SPECTRE_V2_CMD_AUTO:
1794 + goto retpoline_auto;
1795 +
1796 + case SPECTRE_V2_CMD_RETPOLINE_AMD:
1797 + if (IS_ENABLED(CONFIG_RETPOLINE))
1798 + goto retpoline_amd;
1799 + break;
1800 + case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
1801 + if (IS_ENABLED(CONFIG_RETPOLINE))
1802 + goto retpoline_generic;
1803 + break;
1804 + case SPECTRE_V2_CMD_RETPOLINE:
1805 + if (IS_ENABLED(CONFIG_RETPOLINE))
1806 + goto retpoline_auto;
1807 + break;
1808 + }
1809 + pr_err("kernel not compiled with retpoline; no mitigation available!");
1810 + return;
1811 +
1812 +retpoline_auto:
1813 + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
1814 + retpoline_amd:
1815 + if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
1816 + pr_err("LFENCE not serializing. Switching to generic retpoline\n");
1817 + goto retpoline_generic;
1818 + }
1819 + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
1820 + SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
1821 + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
1822 + setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
1823 + } else {
1824 + retpoline_generic:
1825 + mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
1826 + SPECTRE_V2_RETPOLINE_MINIMAL;
1827 + setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
1828 + }
1829 +
1830 + spectre_v2_enabled = mode;
1831 + pr_info("%s\n", spectre_v2_strings[mode]);
1832 +}
1833 +
1834 +#undef pr_fmt
1835 +
1836 +#ifdef CONFIG_SYSFS
1837 +ssize_t cpu_show_meltdown(struct device *dev,
1838 + struct device_attribute *attr, char *buf)
1839 +{
1840 + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
1841 + return sprintf(buf, "Not affected\n");
1842 + if (boot_cpu_has(X86_FEATURE_PTI))
1843 + return sprintf(buf, "Mitigation: PTI\n");
1844 + return sprintf(buf, "Vulnerable\n");
1845 +}
1846 +
1847 +ssize_t cpu_show_spectre_v1(struct device *dev,
1848 + struct device_attribute *attr, char *buf)
1849 +{
1850 + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
1851 + return sprintf(buf, "Not affected\n");
1852 + return sprintf(buf, "Vulnerable\n");
1853 +}
1854 +
1855 +ssize_t cpu_show_spectre_v2(struct device *dev,
1856 + struct device_attribute *attr, char *buf)
1857 +{
1858 + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
1859 + return sprintf(buf, "Not affected\n");
1860 +
1861 + return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
1862 +}
1863 +#endif
1864 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
1865 index 2d3bd2215e5b..372ba3fb400f 100644
1866 --- a/arch/x86/kernel/cpu/common.c
1867 +++ b/arch/x86/kernel/cpu/common.c
1868 @@ -902,6 +902,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
1869 if (c->x86_vendor != X86_VENDOR_AMD)
1870 setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
1871
1872 + setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
1873 + setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
1874 +
1875 fpu__init_system(c);
1876
1877 #ifdef CONFIG_X86_32
1878 diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
1879 index 8ccdca6d3f9e..d9e460fc7a3b 100644
1880 --- a/arch/x86/kernel/cpu/microcode/intel.c
1881 +++ b/arch/x86/kernel/cpu/microcode/intel.c
1882 @@ -910,8 +910,17 @@ static bool is_blacklisted(unsigned int cpu)
1883 {
1884 struct cpuinfo_x86 *c = &cpu_data(cpu);
1885
1886 - if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
1887 - pr_err_once("late loading on model 79 is disabled.\n");
1888 + /*
1889 + * Late loading on model 79 with microcode revision less than 0x0b000021
1890 + * may result in a system hang. This behavior is documented in item
1891 + * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
1892 + */
1893 + if (c->x86 == 6 &&
1894 + c->x86_model == INTEL_FAM6_BROADWELL_X &&
1895 + c->x86_mask == 0x01 &&
1896 + c->microcode < 0x0b000021) {
1897 + pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
1898 + pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
1899 return true;
1900 }
1901
1902 diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
1903 index b6c6468e10bc..4c8440de3355 100644
1904 --- a/arch/x86/kernel/ftrace_32.S
1905 +++ b/arch/x86/kernel/ftrace_32.S
1906 @@ -8,6 +8,7 @@
1907 #include <asm/segment.h>
1908 #include <asm/export.h>
1909 #include <asm/ftrace.h>
1910 +#include <asm/nospec-branch.h>
1911
1912 #ifdef CC_USING_FENTRY
1913 # define function_hook __fentry__
1914 @@ -197,7 +198,8 @@ ftrace_stub:
1915 movl 0x4(%ebp), %edx
1916 subl $MCOUNT_INSN_SIZE, %eax
1917
1918 - call *ftrace_trace_function
1919 + movl ftrace_trace_function, %ecx
1920 + CALL_NOSPEC %ecx
1921
1922 popl %edx
1923 popl %ecx
1924 @@ -241,5 +243,5 @@ return_to_handler:
1925 movl %eax, %ecx
1926 popl %edx
1927 popl %eax
1928 - jmp *%ecx
1929 + JMP_NOSPEC %ecx
1930 #endif
1931 diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
1932 index c832291d948a..7cb8ba08beb9 100644
1933 --- a/arch/x86/kernel/ftrace_64.S
1934 +++ b/arch/x86/kernel/ftrace_64.S
1935 @@ -7,7 +7,7 @@
1936 #include <asm/ptrace.h>
1937 #include <asm/ftrace.h>
1938 #include <asm/export.h>
1939 -
1940 +#include <asm/nospec-branch.h>
1941
1942 .code64
1943 .section .entry.text, "ax"
1944 @@ -286,8 +286,8 @@ trace:
1945 * ip and parent ip are used and the list function is called when
1946 * function tracing is enabled.
1947 */
1948 - call *ftrace_trace_function
1949 -
1950 + movq ftrace_trace_function, %r8
1951 + CALL_NOSPEC %r8
1952 restore_mcount_regs
1953
1954 jmp fgraph_trace
1955 @@ -329,5 +329,5 @@ GLOBAL(return_to_handler)
1956 movq 8(%rsp), %rdx
1957 movq (%rsp), %rax
1958 addq $24, %rsp
1959 - jmp *%rdi
1960 + JMP_NOSPEC %rdi
1961 #endif
1962 diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
1963 index a83b3346a0e1..c1bdbd3d3232 100644
1964 --- a/arch/x86/kernel/irq_32.c
1965 +++ b/arch/x86/kernel/irq_32.c
1966 @@ -20,6 +20,7 @@
1967 #include <linux/mm.h>
1968
1969 #include <asm/apic.h>
1970 +#include <asm/nospec-branch.h>
1971
1972 #ifdef CONFIG_DEBUG_STACKOVERFLOW
1973
1974 @@ -55,11 +56,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
1975 static void call_on_stack(void *func, void *stack)
1976 {
1977 asm volatile("xchgl %%ebx,%%esp \n"
1978 - "call *%%edi \n"
1979 + CALL_NOSPEC
1980 "movl %%ebx,%%esp \n"
1981 : "=b" (stack)
1982 : "0" (stack),
1983 - "D"(func)
1984 + [thunk_target] "D"(func)
1985 : "memory", "cc", "edx", "ecx", "eax");
1986 }
1987
1988 @@ -95,11 +96,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
1989 call_on_stack(print_stack_overflow, isp);
1990
1991 asm volatile("xchgl %%ebx,%%esp \n"
1992 - "call *%%edi \n"
1993 + CALL_NOSPEC
1994 "movl %%ebx,%%esp \n"
1995 : "=a" (arg1), "=b" (isp)
1996 : "0" (desc), "1" (isp),
1997 - "D" (desc->handle_irq)
1998 + [thunk_target] "D" (desc->handle_irq)
1999 : "memory", "cc", "ecx");
2000 return 1;
2001 }
2002 diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
2003 index a4eb27918ceb..a2486f444073 100644
2004 --- a/arch/x86/kernel/tboot.c
2005 +++ b/arch/x86/kernel/tboot.c
2006 @@ -138,6 +138,17 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
2007 return -1;
2008 set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
2009 pte_unmap(pte);
2010 +
2011 + /*
2012 + * PTI poisons low addresses in the kernel page tables in the
2013 + * name of making them unusable for userspace. To execute
2014 + * code at such a low address, the poison must be cleared.
2015 + *
2016 + * Note: 'pgd' actually gets set in p4d_alloc() _or_
2017 + * pud_alloc() depending on 4/5-level paging.
2018 + */
2019 + pgd->pgd &= ~_PAGE_NX;
2020 +
2021 return 0;
2022 }
2023
2024 diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
2025 index 17fb6c6d939a..6a8284f72328 100644
2026 --- a/arch/x86/kvm/svm.c
2027 +++ b/arch/x86/kvm/svm.c
2028 @@ -45,6 +45,7 @@
2029 #include <asm/debugreg.h>
2030 #include <asm/kvm_para.h>
2031 #include <asm/irq_remapping.h>
2032 +#include <asm/nospec-branch.h>
2033
2034 #include <asm/virtext.h>
2035 #include "trace.h"
2036 @@ -4964,6 +4965,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
2037 "mov %%r13, %c[r13](%[svm]) \n\t"
2038 "mov %%r14, %c[r14](%[svm]) \n\t"
2039 "mov %%r15, %c[r15](%[svm]) \n\t"
2040 +#endif
2041 + /*
2042 + * Clear host registers marked as clobbered to prevent
2043 + * speculative use.
2044 + */
2045 + "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
2046 + "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
2047 + "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
2048 + "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
2049 + "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
2050 +#ifdef CONFIG_X86_64
2051 + "xor %%r8, %%r8 \n\t"
2052 + "xor %%r9, %%r9 \n\t"
2053 + "xor %%r10, %%r10 \n\t"
2054 + "xor %%r11, %%r11 \n\t"
2055 + "xor %%r12, %%r12 \n\t"
2056 + "xor %%r13, %%r13 \n\t"
2057 + "xor %%r14, %%r14 \n\t"
2058 + "xor %%r15, %%r15 \n\t"
2059 #endif
2060 "pop %%" _ASM_BP
2061 :
2062 @@ -4994,6 +5014,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
2063 #endif
2064 );
2065
2066 + /* Eliminate branch target predictions from guest mode */
2067 + vmexit_fill_RSB();
2068 +
2069 #ifdef CONFIG_X86_64
2070 wrmsrl(MSR_GS_BASE, svm->host.gs_base);
2071 #else
2072 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
2073 index 47d9432756f3..ef16cf0f7cfd 100644
2074 --- a/arch/x86/kvm/vmx.c
2075 +++ b/arch/x86/kvm/vmx.c
2076 @@ -50,6 +50,7 @@
2077 #include <asm/apic.h>
2078 #include <asm/irq_remapping.h>
2079 #include <asm/mmu_context.h>
2080 +#include <asm/nospec-branch.h>
2081
2082 #include "trace.h"
2083 #include "pmu.h"
2084 @@ -888,8 +889,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
2085 {
2086 BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
2087
2088 - if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
2089 - vmcs_field_to_offset_table[field] == 0)
2090 + if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
2091 + return -ENOENT;
2092 +
2093 + /*
2094 + * FIXME: Mitigation for CVE-2017-5753. To be replaced with a
2095 + * generic mechanism.
2096 + */
2097 + asm("lfence");
2098 +
2099 + if (vmcs_field_to_offset_table[field] == 0)
2100 return -ENOENT;
2101
2102 return vmcs_field_to_offset_table[field];
2103 @@ -9405,6 +9414,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
2104 /* Save guest registers, load host registers, keep flags */
2105 "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
2106 "pop %0 \n\t"
2107 + "setbe %c[fail](%0)\n\t"
2108 "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
2109 "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
2110 __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
2111 @@ -9421,12 +9431,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
2112 "mov %%r13, %c[r13](%0) \n\t"
2113 "mov %%r14, %c[r14](%0) \n\t"
2114 "mov %%r15, %c[r15](%0) \n\t"
2115 + "xor %%r8d, %%r8d \n\t"
2116 + "xor %%r9d, %%r9d \n\t"
2117 + "xor %%r10d, %%r10d \n\t"
2118 + "xor %%r11d, %%r11d \n\t"
2119 + "xor %%r12d, %%r12d \n\t"
2120 + "xor %%r13d, %%r13d \n\t"
2121 + "xor %%r14d, %%r14d \n\t"
2122 + "xor %%r15d, %%r15d \n\t"
2123 #endif
2124 "mov %%cr2, %%" _ASM_AX " \n\t"
2125 "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
2126
2127 + "xor %%eax, %%eax \n\t"
2128 + "xor %%ebx, %%ebx \n\t"
2129 + "xor %%esi, %%esi \n\t"
2130 + "xor %%edi, %%edi \n\t"
2131 "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t"
2132 - "setbe %c[fail](%0) \n\t"
2133 ".pushsection .rodata \n\t"
2134 ".global vmx_return \n\t"
2135 "vmx_return: " _ASM_PTR " 2b \n\t"
2136 @@ -9463,6 +9484,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
2137 #endif
2138 );
2139
2140 + /* Eliminate branch target predictions from guest mode */
2141 + vmexit_fill_RSB();
2142 +
2143 /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
2144 if (debugctlmsr)
2145 update_debugctlmsr(debugctlmsr);
2146 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
2147 index 075619a92ce7..575c8953cc9a 100644
2148 --- a/arch/x86/kvm/x86.c
2149 +++ b/arch/x86/kvm/x86.c
2150 @@ -4362,7 +4362,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
2151 addr, n, v))
2152 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
2153 break;
2154 - trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
2155 + trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
2156 handled += n;
2157 addr += n;
2158 len -= n;
2159 @@ -4621,7 +4621,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
2160 {
2161 if (vcpu->mmio_read_completed) {
2162 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
2163 - vcpu->mmio_fragments[0].gpa, *(u64 *)val);
2164 + vcpu->mmio_fragments[0].gpa, val);
2165 vcpu->mmio_read_completed = 0;
2166 return 1;
2167 }
2168 @@ -4643,14 +4643,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
2169
2170 static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
2171 {
2172 - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
2173 + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
2174 return vcpu_mmio_write(vcpu, gpa, bytes, val);
2175 }
2176
2177 static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
2178 void *val, int bytes)
2179 {
2180 - trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
2181 + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
2182 return X86EMUL_IO_NEEDED;
2183 }
2184
2185 diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
2186 index 457f681ef379..d435c89875c1 100644
2187 --- a/arch/x86/lib/Makefile
2188 +++ b/arch/x86/lib/Makefile
2189 @@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o
2190 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
2191 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
2192 lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
2193 +lib-$(CONFIG_RETPOLINE) += retpoline.o
2194
2195 obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
2196
2197 diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
2198 index 4d34bb548b41..46e71a74e612 100644
2199 --- a/arch/x86/lib/checksum_32.S
2200 +++ b/arch/x86/lib/checksum_32.S
2201 @@ -29,7 +29,8 @@
2202 #include <asm/errno.h>
2203 #include <asm/asm.h>
2204 #include <asm/export.h>
2205 -
2206 +#include <asm/nospec-branch.h>
2207 +
2208 /*
2209 * computes a partial checksum, e.g. for TCP/UDP fragments
2210 */
2211 @@ -156,7 +157,7 @@ ENTRY(csum_partial)
2212 negl %ebx
2213 lea 45f(%ebx,%ebx,2), %ebx
2214 testl %esi, %esi
2215 - jmp *%ebx
2216 + JMP_NOSPEC %ebx
2217
2218 # Handle 2-byte-aligned regions
2219 20: addw (%esi), %ax
2220 @@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic)
2221 andl $-32,%edx
2222 lea 3f(%ebx,%ebx), %ebx
2223 testl %esi, %esi
2224 - jmp *%ebx
2225 + JMP_NOSPEC %ebx
2226 1: addl $64,%esi
2227 addl $64,%edi
2228 SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
2229 diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
2230 new file mode 100644
2231 index 000000000000..cb45c6cb465f
2232 --- /dev/null
2233 +++ b/arch/x86/lib/retpoline.S
2234 @@ -0,0 +1,48 @@
2235 +/* SPDX-License-Identifier: GPL-2.0 */
2236 +
2237 +#include <linux/stringify.h>
2238 +#include <linux/linkage.h>
2239 +#include <asm/dwarf2.h>
2240 +#include <asm/cpufeatures.h>
2241 +#include <asm/alternative-asm.h>
2242 +#include <asm/export.h>
2243 +#include <asm/nospec-branch.h>
2244 +
2245 +.macro THUNK reg
2246 + .section .text.__x86.indirect_thunk.\reg
2247 +
2248 +ENTRY(__x86_indirect_thunk_\reg)
2249 + CFI_STARTPROC
2250 + JMP_NOSPEC %\reg
2251 + CFI_ENDPROC
2252 +ENDPROC(__x86_indirect_thunk_\reg)
2253 +.endm
2254 +
2255 +/*
2256 + * Despite being an assembler file we can't just use .irp here
2257 + * because __KSYM_DEPS__ only uses the C preprocessor and would
2258 + * only see one instance of "__x86_indirect_thunk_\reg" rather
2259 + * than one per register with the correct names. So we do it
2260 + * the simple and nasty way...
2261 + */
2262 +#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
2263 +#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
2264 +
2265 +GENERATE_THUNK(_ASM_AX)
2266 +GENERATE_THUNK(_ASM_BX)
2267 +GENERATE_THUNK(_ASM_CX)
2268 +GENERATE_THUNK(_ASM_DX)
2269 +GENERATE_THUNK(_ASM_SI)
2270 +GENERATE_THUNK(_ASM_DI)
2271 +GENERATE_THUNK(_ASM_BP)
2272 +GENERATE_THUNK(_ASM_SP)
2273 +#ifdef CONFIG_64BIT
2274 +GENERATE_THUNK(r8)
2275 +GENERATE_THUNK(r9)
2276 +GENERATE_THUNK(r10)
2277 +GENERATE_THUNK(r11)
2278 +GENERATE_THUNK(r12)
2279 +GENERATE_THUNK(r13)
2280 +GENERATE_THUNK(r14)
2281 +GENERATE_THUNK(r15)
2282 +#endif
2283 diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
2284 index 43d4a4a29037..ce38f165489b 100644
2285 --- a/arch/x86/mm/pti.c
2286 +++ b/arch/x86/mm/pti.c
2287 @@ -149,7 +149,7 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
2288 *
2289 * Returns a pointer to a P4D on success, or NULL on failure.
2290 */
2291 -static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
2292 +static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
2293 {
2294 pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
2295 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
2296 @@ -164,12 +164,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
2297 if (!new_p4d_page)
2298 return NULL;
2299
2300 - if (pgd_none(*pgd)) {
2301 - set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
2302 - new_p4d_page = 0;
2303 - }
2304 - if (new_p4d_page)
2305 - free_page(new_p4d_page);
2306 + set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
2307 }
2308 BUILD_BUG_ON(pgd_large(*pgd) != 0);
2309
2310 @@ -182,7 +177,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
2311 *
2312 * Returns a pointer to a PMD on success, or NULL on failure.
2313 */
2314 -static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
2315 +static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
2316 {
2317 gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
2318 p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
2319 @@ -194,12 +189,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
2320 if (!new_pud_page)
2321 return NULL;
2322
2323 - if (p4d_none(*p4d)) {
2324 - set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
2325 - new_pud_page = 0;
2326 - }
2327 - if (new_pud_page)
2328 - free_page(new_pud_page);
2329 + set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
2330 }
2331
2332 pud = pud_offset(p4d, address);
2333 @@ -213,12 +203,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
2334 if (!new_pmd_page)
2335 return NULL;
2336
2337 - if (pud_none(*pud)) {
2338 - set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
2339 - new_pmd_page = 0;
2340 - }
2341 - if (new_pmd_page)
2342 - free_page(new_pmd_page);
2343 + set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
2344 }
2345
2346 return pmd_offset(pud, address);
2347 @@ -251,12 +236,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
2348 if (!new_pte_page)
2349 return NULL;
2350
2351 - if (pmd_none(*pmd)) {
2352 - set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
2353 - new_pte_page = 0;
2354 - }
2355 - if (new_pte_page)
2356 - free_page(new_pte_page);
2357 + set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
2358 }
2359
2360 pte = pte_offset_kernel(pmd, address);
2361 diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
2362 index 39c4b35ac7a4..61975b6bcb1a 100644
2363 --- a/arch/x86/platform/efi/efi_64.c
2364 +++ b/arch/x86/platform/efi/efi_64.c
2365 @@ -134,7 +134,9 @@ pgd_t * __init efi_call_phys_prolog(void)
2366 pud[j] = *pud_offset(p4d_k, vaddr);
2367 }
2368 }
2369 + pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
2370 }
2371 +
2372 out:
2373 __flush_tlb_all();
2374
2375 diff --git a/crypto/algapi.c b/crypto/algapi.c
2376 index aa699ff6c876..50eb828db767 100644
2377 --- a/crypto/algapi.c
2378 +++ b/crypto/algapi.c
2379 @@ -167,6 +167,18 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
2380
2381 spawn->alg = NULL;
2382 spawns = &inst->alg.cra_users;
2383 +
2384 + /*
2385 + * We may encounter an unregistered instance here, since
2386 + * an instance's spawns are set up prior to the instance
2387 + * being registered. An unregistered instance will have
2388 + * NULL ->cra_users.next, since ->cra_users isn't
2389 + * properly initialized until registration. But an
2390 + * unregistered instance cannot have any users, so treat
2391 + * it the same as ->cra_users being empty.
2392 + */
2393 + if (spawns->next == NULL)
2394 + break;
2395 }
2396 } while ((spawns = crypto_more_spawns(alg, &stack, &top,
2397 &secondary_spawns)));
2398 diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
2399 index bdc87907d6a1..2415ad9f6dd4 100644
2400 --- a/drivers/base/Kconfig
2401 +++ b/drivers/base/Kconfig
2402 @@ -236,6 +236,9 @@ config GENERIC_CPU_DEVICES
2403 config GENERIC_CPU_AUTOPROBE
2404 bool
2405
2406 +config GENERIC_CPU_VULNERABILITIES
2407 + bool
2408 +
2409 config SOC_BUS
2410 bool
2411 select GLOB
2412 diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
2413 index 321cd7b4d817..825964efda1d 100644
2414 --- a/drivers/base/cpu.c
2415 +++ b/drivers/base/cpu.c
2416 @@ -501,10 +501,58 @@ static void __init cpu_dev_register_generic(void)
2417 #endif
2418 }
2419
2420 +#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
2421 +
2422 +ssize_t __weak cpu_show_meltdown(struct device *dev,
2423 + struct device_attribute *attr, char *buf)
2424 +{
2425 + return sprintf(buf, "Not affected\n");
2426 +}
2427 +
2428 +ssize_t __weak cpu_show_spectre_v1(struct device *dev,
2429 + struct device_attribute *attr, char *buf)
2430 +{
2431 + return sprintf(buf, "Not affected\n");
2432 +}
2433 +
2434 +ssize_t __weak cpu_show_spectre_v2(struct device *dev,
2435 + struct device_attribute *attr, char *buf)
2436 +{
2437 + return sprintf(buf, "Not affected\n");
2438 +}
2439 +
2440 +static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
2441 +static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
2442 +static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
2443 +
2444 +static struct attribute *cpu_root_vulnerabilities_attrs[] = {
2445 + &dev_attr_meltdown.attr,
2446 + &dev_attr_spectre_v1.attr,
2447 + &dev_attr_spectre_v2.attr,
2448 + NULL
2449 +};
2450 +
2451 +static const struct attribute_group cpu_root_vulnerabilities_group = {
2452 + .name = "vulnerabilities",
2453 + .attrs = cpu_root_vulnerabilities_attrs,
2454 +};
2455 +
2456 +static void __init cpu_register_vulnerabilities(void)
2457 +{
2458 + if (sysfs_create_group(&cpu_subsys.dev_root->kobj,
2459 + &cpu_root_vulnerabilities_group))
2460 + pr_err("Unable to register CPU vulnerabilities\n");
2461 +}
2462 +
2463 +#else
2464 +static inline void cpu_register_vulnerabilities(void) { }
2465 +#endif
2466 +
2467 void __init cpu_dev_init(void)
2468 {
2469 if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
2470 panic("Failed to register CPU subsystem");
2471
2472 cpu_dev_register_generic();
2473 + cpu_register_vulnerabilities();
2474 }
2475 diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
2476 index adc877dfef5c..609227211295 100644
2477 --- a/drivers/block/rbd.c
2478 +++ b/drivers/block/rbd.c
2479 @@ -3074,13 +3074,21 @@ static void format_lock_cookie(struct rbd_device *rbd_dev, char *buf)
2480 mutex_unlock(&rbd_dev->watch_mutex);
2481 }
2482
2483 +static void __rbd_lock(struct rbd_device *rbd_dev, const char *cookie)
2484 +{
2485 + struct rbd_client_id cid = rbd_get_cid(rbd_dev);
2486 +
2487 + strcpy(rbd_dev->lock_cookie, cookie);
2488 + rbd_set_owner_cid(rbd_dev, &cid);
2489 + queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
2490 +}
2491 +
2492 /*
2493 * lock_rwsem must be held for write
2494 */
2495 static int rbd_lock(struct rbd_device *rbd_dev)
2496 {
2497 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
2498 - struct rbd_client_id cid = rbd_get_cid(rbd_dev);
2499 char cookie[32];
2500 int ret;
2501
2502 @@ -3095,9 +3103,7 @@ static int rbd_lock(struct rbd_device *rbd_dev)
2503 return ret;
2504
2505 rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED;
2506 - strcpy(rbd_dev->lock_cookie, cookie);
2507 - rbd_set_owner_cid(rbd_dev, &cid);
2508 - queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
2509 + __rbd_lock(rbd_dev, cookie);
2510 return 0;
2511 }
2512
2513 @@ -3883,7 +3889,7 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
2514 queue_delayed_work(rbd_dev->task_wq,
2515 &rbd_dev->lock_dwork, 0);
2516 } else {
2517 - strcpy(rbd_dev->lock_cookie, cookie);
2518 + __rbd_lock(rbd_dev, cookie);
2519 }
2520 }
2521
2522 @@ -4415,7 +4421,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
2523 segment_size = rbd_obj_bytes(&rbd_dev->header);
2524 blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
2525 q->limits.max_sectors = queue_max_hw_sectors(q);
2526 - blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
2527 + blk_queue_max_segments(q, USHRT_MAX);
2528 blk_queue_max_segment_size(q, segment_size);
2529 blk_queue_io_min(q, segment_size);
2530 blk_queue_io_opt(q, segment_size);
2531 diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
2532 index a385838e2919..dadacbe558ab 100644
2533 --- a/drivers/gpu/drm/i915/gvt/gtt.c
2534 +++ b/drivers/gpu/drm/i915/gvt/gtt.c
2535 @@ -1359,12 +1359,15 @@ static int ppgtt_handle_guest_write_page_table_bytes(void *gp,
2536 return ret;
2537 } else {
2538 if (!test_bit(index, spt->post_shadow_bitmap)) {
2539 + int type = spt->shadow_page.type;
2540 +
2541 ppgtt_get_shadow_entry(spt, &se, index);
2542 ret = ppgtt_handle_guest_entry_removal(gpt, &se, index);
2543 if (ret)
2544 return ret;
2545 + ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
2546 + ppgtt_set_shadow_entry(spt, &se, index);
2547 }
2548 -
2549 ppgtt_set_post_shadow(spt, index);
2550 }
2551
2552 diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
2553 index 82498f8232eb..5c5cb2ceee49 100644
2554 --- a/drivers/gpu/drm/i915/i915_drv.c
2555 +++ b/drivers/gpu/drm/i915/i915_drv.c
2556 @@ -1693,6 +1693,7 @@ static int i915_drm_resume(struct drm_device *dev)
2557 intel_guc_resume(dev_priv);
2558
2559 intel_modeset_init_hw(dev);
2560 + intel_init_clock_gating(dev_priv);
2561
2562 spin_lock_irq(&dev_priv->irq_lock);
2563 if (dev_priv->display.hpd_irq_setup)
2564 diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
2565 index ce2ed16f2a30..920c8914cec1 100644
2566 --- a/drivers/gpu/drm/i915/i915_reg.h
2567 +++ b/drivers/gpu/drm/i915/i915_reg.h
2568 @@ -6987,6 +6987,8 @@ enum {
2569 #define GEN9_SLICE_COMMON_ECO_CHICKEN0 _MMIO(0x7308)
2570 #define DISABLE_PIXEL_MASK_CAMMING (1<<14)
2571
2572 +#define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
2573 +
2574 #define GEN7_L3SQCREG1 _MMIO(0xB010)
2575 #define VLV_B0_WA_L3SQCREG1_VALUE 0x00D30000
2576
2577 diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
2578 index 1c73d5542681..095a2240af4f 100644
2579 --- a/drivers/gpu/drm/i915/intel_display.c
2580 +++ b/drivers/gpu/drm/i915/intel_display.c
2581 @@ -3800,6 +3800,7 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
2582
2583 intel_pps_unlock_regs_wa(dev_priv);
2584 intel_modeset_init_hw(dev);
2585 + intel_init_clock_gating(dev_priv);
2586
2587 spin_lock_irq(&dev_priv->irq_lock);
2588 if (dev_priv->display.hpd_irq_setup)
2589 @@ -14406,8 +14407,6 @@ void intel_modeset_init_hw(struct drm_device *dev)
2590
2591 intel_update_cdclk(dev_priv);
2592 dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw;
2593 -
2594 - intel_init_clock_gating(dev_priv);
2595 }
2596
2597 /*
2598 @@ -15124,6 +15123,15 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
2599 struct intel_encoder *encoder;
2600 int i;
2601
2602 + if (IS_HASWELL(dev_priv)) {
2603 + /*
2604 + * WaRsPkgCStateDisplayPMReq:hsw
2605 + * System hang if this isn't done before disabling all planes!
2606 + */
2607 + I915_WRITE(CHICKEN_PAR1_1,
2608 + I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
2609 + }
2610 +
2611 intel_modeset_readout_hw_state(dev);
2612
2613 /* HW state is read out, now we need to sanitize this mess. */
2614 @@ -15220,6 +15228,8 @@ void intel_modeset_gem_init(struct drm_device *dev)
2615
2616 intel_init_gt_powersave(dev_priv);
2617
2618 + intel_init_clock_gating(dev_priv);
2619 +
2620 intel_setup_overlay(dev_priv);
2621 }
2622
2623 diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
2624 index 3c2d9cf22ed5..b6a7e492c1a3 100644
2625 --- a/drivers/gpu/drm/i915/intel_engine_cs.c
2626 +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
2627 @@ -1125,6 +1125,11 @@ static int glk_init_workarounds(struct intel_engine_cs *engine)
2628 if (ret)
2629 return ret;
2630
2631 + /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
2632 + ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1);
2633 + if (ret)
2634 + return ret;
2635 +
2636 /* WaToEnableHwFixForPushConstHWBug:glk */
2637 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
2638 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
2639 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
2640 index cb950752c346..014e5c08571a 100644
2641 --- a/drivers/gpu/drm/i915/intel_pm.c
2642 +++ b/drivers/gpu/drm/i915/intel_pm.c
2643 @@ -5669,12 +5669,30 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
2644 mutex_unlock(&dev_priv->wm.wm_mutex);
2645 }
2646
2647 +/*
2648 + * FIXME should probably kill this and improve
2649 + * the real watermark readout/sanitation instead
2650 + */
2651 +static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
2652 +{
2653 + I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
2654 + I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
2655 + I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
2656 +
2657 + /*
2658 + * Don't touch WM1S_LP_EN here.
2659 + * Doing so could cause underruns.
2660 + */
2661 +}
2662 +
2663 void ilk_wm_get_hw_state(struct drm_device *dev)
2664 {
2665 struct drm_i915_private *dev_priv = to_i915(dev);
2666 struct ilk_wm_values *hw = &dev_priv->wm.hw;
2667 struct drm_crtc *crtc;
2668
2669 + ilk_init_lp_watermarks(dev_priv);
2670 +
2671 for_each_crtc(dev, crtc)
2672 ilk_pipe_wm_get_hw_state(crtc);
2673
2674 @@ -7959,18 +7977,6 @@ static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
2675 }
2676 }
2677
2678 -static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
2679 -{
2680 - I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
2681 - I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
2682 - I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
2683 -
2684 - /*
2685 - * Don't touch WM1S_LP_EN here.
2686 - * Doing so could cause underruns.
2687 - */
2688 -}
2689 -
2690 static void ironlake_init_clock_gating(struct drm_i915_private *dev_priv)
2691 {
2692 uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
2693 @@ -8004,8 +8010,6 @@ static void ironlake_init_clock_gating(struct drm_i915_private *dev_priv)
2694 (I915_READ(DISP_ARB_CTL) |
2695 DISP_FBC_WM_DIS));
2696
2697 - ilk_init_lp_watermarks(dev_priv);
2698 -
2699 /*
2700 * Based on the document from hardware guys the following bits
2701 * should be set unconditionally in order to enable FBC.
2702 @@ -8118,8 +8122,6 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
2703 I915_WRITE(GEN6_GT_MODE,
2704 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
2705
2706 - ilk_init_lp_watermarks(dev_priv);
2707 -
2708 I915_WRITE(CACHE_MODE_0,
2709 _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
2710
2711 @@ -8293,8 +8295,6 @@ static void broadwell_init_clock_gating(struct drm_i915_private *dev_priv)
2712 {
2713 enum pipe pipe;
2714
2715 - ilk_init_lp_watermarks(dev_priv);
2716 -
2717 /* WaSwitchSolVfFArbitrationPriority:bdw */
2718 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
2719
2720 @@ -8349,8 +8349,6 @@ static void broadwell_init_clock_gating(struct drm_i915_private *dev_priv)
2721
2722 static void haswell_init_clock_gating(struct drm_i915_private *dev_priv)
2723 {
2724 - ilk_init_lp_watermarks(dev_priv);
2725 -
2726 /* L3 caching of data atomics doesn't work -- disable it. */
2727 I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
2728 I915_WRITE(HSW_ROW_CHICKEN3,
2729 @@ -8394,10 +8392,6 @@ static void haswell_init_clock_gating(struct drm_i915_private *dev_priv)
2730 /* WaSwitchSolVfFArbitrationPriority:hsw */
2731 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
2732
2733 - /* WaRsPkgCStateDisplayPMReq:hsw */
2734 - I915_WRITE(CHICKEN_PAR1_1,
2735 - I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
2736 -
2737 lpt_init_clock_gating(dev_priv);
2738 }
2739
2740 @@ -8405,8 +8399,6 @@ static void ivybridge_init_clock_gating(struct drm_i915_private *dev_priv)
2741 {
2742 uint32_t snpcr;
2743
2744 - ilk_init_lp_watermarks(dev_priv);
2745 -
2746 I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
2747
2748 /* WaDisableEarlyCull:ivb */
2749 diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
2750 index 21c62a34e558..87e8af5776a3 100644
2751 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
2752 +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
2753 @@ -2731,6 +2731,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv,
2754 }
2755
2756 view_type = vmw_view_cmd_to_type(header->id);
2757 + if (view_type == vmw_view_max)
2758 + return -EINVAL;
2759 cmd = container_of(header, typeof(*cmd), header);
2760 ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
2761 user_surface_converter,
2762 diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
2763 index b850562fbdd6..62c2f4be8012 100644
2764 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
2765 +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
2766 @@ -697,7 +697,6 @@ vmw_du_plane_duplicate_state(struct drm_plane *plane)
2767 vps->pinned = 0;
2768
2769 /* Mapping is managed by prepare_fb/cleanup_fb */
2770 - memset(&vps->guest_map, 0, sizeof(vps->guest_map));
2771 memset(&vps->host_map, 0, sizeof(vps->host_map));
2772 vps->cpp = 0;
2773
2774 @@ -760,11 +759,6 @@ vmw_du_plane_destroy_state(struct drm_plane *plane,
2775
2776
2777 /* Should have been freed by cleanup_fb */
2778 - if (vps->guest_map.virtual) {
2779 - DRM_ERROR("Guest mapping not freed\n");
2780 - ttm_bo_kunmap(&vps->guest_map);
2781 - }
2782 -
2783 if (vps->host_map.virtual) {
2784 DRM_ERROR("Host mapping not freed\n");
2785 ttm_bo_kunmap(&vps->host_map);
2786 diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
2787 index ff9c8389ff21..cd9da2dd79af 100644
2788 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
2789 +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
2790 @@ -175,7 +175,7 @@ struct vmw_plane_state {
2791 int pinned;
2792
2793 /* For CPU Blit */
2794 - struct ttm_bo_kmap_obj host_map, guest_map;
2795 + struct ttm_bo_kmap_obj host_map;
2796 unsigned int cpp;
2797 };
2798
2799 diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
2800 index ca3afae2db1f..4dee05b15552 100644
2801 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
2802 +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
2803 @@ -114,7 +114,7 @@ struct vmw_screen_target_display_unit {
2804 bool defined;
2805
2806 /* For CPU Blit */
2807 - struct ttm_bo_kmap_obj host_map, guest_map;
2808 + struct ttm_bo_kmap_obj host_map;
2809 unsigned int cpp;
2810 };
2811
2812 @@ -695,7 +695,8 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
2813 s32 src_pitch, dst_pitch;
2814 u8 *src, *dst;
2815 bool not_used;
2816 -
2817 + struct ttm_bo_kmap_obj guest_map;
2818 + int ret;
2819
2820 if (!dirty->num_hits)
2821 return;
2822 @@ -706,6 +707,13 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
2823 if (width == 0 || height == 0)
2824 return;
2825
2826 + ret = ttm_bo_kmap(&ddirty->buf->base, 0, ddirty->buf->base.num_pages,
2827 + &guest_map);
2828 + if (ret) {
2829 + DRM_ERROR("Failed mapping framebuffer for blit: %d\n",
2830 + ret);
2831 + goto out_cleanup;
2832 + }
2833
2834 /* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */
2835 src_pitch = stdu->display_srf->base_size.width * stdu->cpp;
2836 @@ -713,7 +721,7 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
2837 src += ddirty->top * src_pitch + ddirty->left * stdu->cpp;
2838
2839 dst_pitch = ddirty->pitch;
2840 - dst = ttm_kmap_obj_virtual(&stdu->guest_map, &not_used);
2841 + dst = ttm_kmap_obj_virtual(&guest_map, &not_used);
2842 dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp;
2843
2844
2845 @@ -772,6 +780,7 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
2846 vmw_fifo_commit(dev_priv, sizeof(*cmd));
2847 }
2848
2849 + ttm_bo_kunmap(&guest_map);
2850 out_cleanup:
2851 ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX;
2852 ddirty->right = ddirty->bottom = S32_MIN;
2853 @@ -1109,9 +1118,6 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane,
2854 {
2855 struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
2856
2857 - if (vps->guest_map.virtual)
2858 - ttm_bo_kunmap(&vps->guest_map);
2859 -
2860 if (vps->host_map.virtual)
2861 ttm_bo_kunmap(&vps->host_map);
2862
2863 @@ -1277,33 +1283,11 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
2864 */
2865 if (vps->content_fb_type == SEPARATE_DMA &&
2866 !(dev_priv->capabilities & SVGA_CAP_3D)) {
2867 -
2868 - struct vmw_framebuffer_dmabuf *new_vfbd;
2869 -
2870 - new_vfbd = vmw_framebuffer_to_vfbd(new_fb);
2871 -
2872 - ret = ttm_bo_reserve(&new_vfbd->buffer->base, false, false,
2873 - NULL);
2874 - if (ret)
2875 - goto out_srf_unpin;
2876 -
2877 - ret = ttm_bo_kmap(&new_vfbd->buffer->base, 0,
2878 - new_vfbd->buffer->base.num_pages,
2879 - &vps->guest_map);
2880 -
2881 - ttm_bo_unreserve(&new_vfbd->buffer->base);
2882 -
2883 - if (ret) {
2884 - DRM_ERROR("Failed to map content buffer to CPU\n");
2885 - goto out_srf_unpin;
2886 - }
2887 -
2888 ret = ttm_bo_kmap(&vps->surf->res.backup->base, 0,
2889 vps->surf->res.backup->base.num_pages,
2890 &vps->host_map);
2891 if (ret) {
2892 DRM_ERROR("Failed to map display buffer to CPU\n");
2893 - ttm_bo_kunmap(&vps->guest_map);
2894 goto out_srf_unpin;
2895 }
2896
2897 @@ -1350,7 +1334,6 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane,
2898 stdu->display_srf = vps->surf;
2899 stdu->content_fb_type = vps->content_fb_type;
2900 stdu->cpp = vps->cpp;
2901 - memcpy(&stdu->guest_map, &vps->guest_map, sizeof(vps->guest_map));
2902 memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map));
2903
2904 if (!stdu->defined)
2905 diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
2906 index 514c1000ded1..73feeeeb4283 100644
2907 --- a/drivers/infiniband/hw/cxgb4/cq.c
2908 +++ b/drivers/infiniband/hw/cxgb4/cq.c
2909 @@ -410,7 +410,7 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp)
2910
2911 static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
2912 {
2913 - if (CQE_OPCODE(cqe) == C4IW_DRAIN_OPCODE) {
2914 + if (DRAIN_CQE(cqe)) {
2915 WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid);
2916 return 0;
2917 }
2918 @@ -509,7 +509,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
2919 /*
2920 * Special cqe for drain WR completions...
2921 */
2922 - if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) {
2923 + if (DRAIN_CQE(hw_cqe)) {
2924 *cookie = CQE_DRAIN_COOKIE(hw_cqe);
2925 *cqe = *hw_cqe;
2926 goto skip_cqe;
2927 @@ -766,9 +766,6 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
2928 c4iw_invalidate_mr(qhp->rhp,
2929 CQE_WRID_FR_STAG(&cqe));
2930 break;
2931 - case C4IW_DRAIN_OPCODE:
2932 - wc->opcode = IB_WC_SEND;
2933 - break;
2934 default:
2935 pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
2936 CQE_OPCODE(&cqe), CQE_QPID(&cqe));
2937 diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
2938 index 8f963df0bffc..9d25298d96fa 100644
2939 --- a/drivers/infiniband/hw/cxgb4/ev.c
2940 +++ b/drivers/infiniband/hw/cxgb4/ev.c
2941 @@ -109,9 +109,11 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp,
2942 if (qhp->ibqp.event_handler)
2943 (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
2944
2945 - spin_lock_irqsave(&chp->comp_handler_lock, flag);
2946 - (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
2947 - spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
2948 + if (t4_clear_cq_armed(&chp->cq)) {
2949 + spin_lock_irqsave(&chp->comp_handler_lock, flag);
2950 + (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
2951 + spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
2952 + }
2953 }
2954
2955 void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
2956 diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
2957 index 819a30635d53..20c481115a99 100644
2958 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
2959 +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
2960 @@ -631,8 +631,6 @@ static inline int to_ib_qp_state(int c4iw_qp_state)
2961 return IB_QPS_ERR;
2962 }
2963
2964 -#define C4IW_DRAIN_OPCODE FW_RI_SGE_EC_CR_RETURN
2965 -
2966 static inline u32 c4iw_ib_to_tpt_access(int a)
2967 {
2968 return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) |
2969 diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
2970 index e69453665a17..f311ea73c806 100644
2971 --- a/drivers/infiniband/hw/cxgb4/qp.c
2972 +++ b/drivers/infiniband/hw/cxgb4/qp.c
2973 @@ -794,21 +794,57 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
2974 return 0;
2975 }
2976
2977 -static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
2978 +static int ib_to_fw_opcode(int ib_opcode)
2979 +{
2980 + int opcode;
2981 +
2982 + switch (ib_opcode) {
2983 + case IB_WR_SEND_WITH_INV:
2984 + opcode = FW_RI_SEND_WITH_INV;
2985 + break;
2986 + case IB_WR_SEND:
2987 + opcode = FW_RI_SEND;
2988 + break;
2989 + case IB_WR_RDMA_WRITE:
2990 + opcode = FW_RI_RDMA_WRITE;
2991 + break;
2992 + case IB_WR_RDMA_READ:
2993 + case IB_WR_RDMA_READ_WITH_INV:
2994 + opcode = FW_RI_READ_REQ;
2995 + break;
2996 + case IB_WR_REG_MR:
2997 + opcode = FW_RI_FAST_REGISTER;
2998 + break;
2999 + case IB_WR_LOCAL_INV:
3000 + opcode = FW_RI_LOCAL_INV;
3001 + break;
3002 + default:
3003 + opcode = -EINVAL;
3004 + }
3005 + return opcode;
3006 +}
3007 +
3008 +static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
3009 {
3010 struct t4_cqe cqe = {};
3011 struct c4iw_cq *schp;
3012 unsigned long flag;
3013 struct t4_cq *cq;
3014 + int opcode;
3015
3016 schp = to_c4iw_cq(qhp->ibqp.send_cq);
3017 cq = &schp->cq;
3018
3019 + opcode = ib_to_fw_opcode(wr->opcode);
3020 + if (opcode < 0)
3021 + return opcode;
3022 +
3023 cqe.u.drain_cookie = wr->wr_id;
3024 cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
3025 - CQE_OPCODE_V(C4IW_DRAIN_OPCODE) |
3026 + CQE_OPCODE_V(opcode) |
3027 CQE_TYPE_V(1) |
3028 CQE_SWCQE_V(1) |
3029 + CQE_DRAIN_V(1) |
3030 CQE_QPID_V(qhp->wq.sq.qid));
3031
3032 spin_lock_irqsave(&schp->lock, flag);
3033 @@ -817,10 +853,29 @@ static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
3034 t4_swcq_produce(cq);
3035 spin_unlock_irqrestore(&schp->lock, flag);
3036
3037 - spin_lock_irqsave(&schp->comp_handler_lock, flag);
3038 - (*schp->ibcq.comp_handler)(&schp->ibcq,
3039 - schp->ibcq.cq_context);
3040 - spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
3041 + if (t4_clear_cq_armed(&schp->cq)) {
3042 + spin_lock_irqsave(&schp->comp_handler_lock, flag);
3043 + (*schp->ibcq.comp_handler)(&schp->ibcq,
3044 + schp->ibcq.cq_context);
3045 + spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
3046 + }
3047 + return 0;
3048 +}
3049 +
3050 +static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr,
3051 + struct ib_send_wr **bad_wr)
3052 +{
3053 + int ret = 0;
3054 +
3055 + while (wr) {
3056 + ret = complete_sq_drain_wr(qhp, wr);
3057 + if (ret) {
3058 + *bad_wr = wr;
3059 + break;
3060 + }
3061 + wr = wr->next;
3062 + }
3063 + return ret;
3064 }
3065
3066 static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
3067 @@ -835,9 +890,10 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
3068
3069 cqe.u.drain_cookie = wr->wr_id;
3070 cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
3071 - CQE_OPCODE_V(C4IW_DRAIN_OPCODE) |
3072 + CQE_OPCODE_V(FW_RI_SEND) |
3073 CQE_TYPE_V(0) |
3074 CQE_SWCQE_V(1) |
3075 + CQE_DRAIN_V(1) |
3076 CQE_QPID_V(qhp->wq.sq.qid));
3077
3078 spin_lock_irqsave(&rchp->lock, flag);
3079 @@ -846,10 +902,20 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
3080 t4_swcq_produce(cq);
3081 spin_unlock_irqrestore(&rchp->lock, flag);
3082
3083 - spin_lock_irqsave(&rchp->comp_handler_lock, flag);
3084 - (*rchp->ibcq.comp_handler)(&rchp->ibcq,
3085 - rchp->ibcq.cq_context);
3086 - spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
3087 + if (t4_clear_cq_armed(&rchp->cq)) {
3088 + spin_lock_irqsave(&rchp->comp_handler_lock, flag);
3089 + (*rchp->ibcq.comp_handler)(&rchp->ibcq,
3090 + rchp->ibcq.cq_context);
3091 + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
3092 + }
3093 +}
3094 +
3095 +static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
3096 +{
3097 + while (wr) {
3098 + complete_rq_drain_wr(qhp, wr);
3099 + wr = wr->next;
3100 + }
3101 }
3102
3103 int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3104 @@ -875,7 +941,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3105 */
3106 if (qhp->wq.flushed) {
3107 spin_unlock_irqrestore(&qhp->lock, flag);
3108 - complete_sq_drain_wr(qhp, wr);
3109 + err = complete_sq_drain_wrs(qhp, wr, bad_wr);
3110 return err;
3111 }
3112 num_wrs = t4_sq_avail(&qhp->wq);
3113 @@ -1024,7 +1090,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
3114 */
3115 if (qhp->wq.flushed) {
3116 spin_unlock_irqrestore(&qhp->lock, flag);
3117 - complete_rq_drain_wr(qhp, wr);
3118 + complete_rq_drain_wrs(qhp, wr);
3119 return err;
3120 }
3121 num_wrs = t4_rq_avail(&qhp->wq);
3122 @@ -1267,48 +1333,51 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
3123
3124 pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
3125
3126 - /* locking hierarchy: cq lock first, then qp lock. */
3127 + /* locking hierarchy: cqs lock first, then qp lock. */
3128 spin_lock_irqsave(&rchp->lock, flag);
3129 + if (schp != rchp)
3130 + spin_lock(&schp->lock);
3131 spin_lock(&qhp->lock);
3132
3133 if (qhp->wq.flushed) {
3134 spin_unlock(&qhp->lock);
3135 + if (schp != rchp)
3136 + spin_unlock(&schp->lock);
3137 spin_unlock_irqrestore(&rchp->lock, flag);
3138 return;
3139 }
3140 qhp->wq.flushed = 1;
3141 + t4_set_wq_in_error(&qhp->wq);
3142
3143 c4iw_flush_hw_cq(rchp);
3144 c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
3145 rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
3146 - spin_unlock(&qhp->lock);
3147 - spin_unlock_irqrestore(&rchp->lock, flag);
3148
3149 - /* locking hierarchy: cq lock first, then qp lock. */
3150 - spin_lock_irqsave(&schp->lock, flag);
3151 - spin_lock(&qhp->lock);
3152 if (schp != rchp)
3153 c4iw_flush_hw_cq(schp);
3154 sq_flushed = c4iw_flush_sq(qhp);
3155 +
3156 spin_unlock(&qhp->lock);
3157 - spin_unlock_irqrestore(&schp->lock, flag);
3158 + if (schp != rchp)
3159 + spin_unlock(&schp->lock);
3160 + spin_unlock_irqrestore(&rchp->lock, flag);
3161
3162 if (schp == rchp) {
3163 - if (t4_clear_cq_armed(&rchp->cq) &&
3164 - (rq_flushed || sq_flushed)) {
3165 + if ((rq_flushed || sq_flushed) &&
3166 + t4_clear_cq_armed(&rchp->cq)) {
3167 spin_lock_irqsave(&rchp->comp_handler_lock, flag);
3168 (*rchp->ibcq.comp_handler)(&rchp->ibcq,
3169 rchp->ibcq.cq_context);
3170 spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
3171 }
3172 } else {
3173 - if (t4_clear_cq_armed(&rchp->cq) && rq_flushed) {
3174 + if (rq_flushed && t4_clear_cq_armed(&rchp->cq)) {
3175 spin_lock_irqsave(&rchp->comp_handler_lock, flag);
3176 (*rchp->ibcq.comp_handler)(&rchp->ibcq,
3177 rchp->ibcq.cq_context);
3178 spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
3179 }
3180 - if (t4_clear_cq_armed(&schp->cq) && sq_flushed) {
3181 + if (sq_flushed && t4_clear_cq_armed(&schp->cq)) {
3182 spin_lock_irqsave(&schp->comp_handler_lock, flag);
3183 (*schp->ibcq.comp_handler)(&schp->ibcq,
3184 schp->ibcq.cq_context);
3185 @@ -1325,8 +1394,8 @@ static void flush_qp(struct c4iw_qp *qhp)
3186 rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
3187 schp = to_c4iw_cq(qhp->ibqp.send_cq);
3188
3189 - t4_set_wq_in_error(&qhp->wq);
3190 if (qhp->ibqp.uobject) {
3191 + t4_set_wq_in_error(&qhp->wq);
3192 t4_set_cq_in_error(&rchp->cq);
3193 spin_lock_irqsave(&rchp->comp_handler_lock, flag);
3194 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
3195 diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
3196 index bcb80ca67d3d..80b390e861dc 100644
3197 --- a/drivers/infiniband/hw/cxgb4/t4.h
3198 +++ b/drivers/infiniband/hw/cxgb4/t4.h
3199 @@ -197,6 +197,11 @@ struct t4_cqe {
3200 #define CQE_SWCQE_G(x) ((((x) >> CQE_SWCQE_S)) & CQE_SWCQE_M)
3201 #define CQE_SWCQE_V(x) ((x)<<CQE_SWCQE_S)
3202
3203 +#define CQE_DRAIN_S 10
3204 +#define CQE_DRAIN_M 0x1
3205 +#define CQE_DRAIN_G(x) ((((x) >> CQE_DRAIN_S)) & CQE_DRAIN_M)
3206 +#define CQE_DRAIN_V(x) ((x)<<CQE_DRAIN_S)
3207 +
3208 #define CQE_STATUS_S 5
3209 #define CQE_STATUS_M 0x1F
3210 #define CQE_STATUS_G(x) ((((x) >> CQE_STATUS_S)) & CQE_STATUS_M)
3211 @@ -213,6 +218,7 @@ struct t4_cqe {
3212 #define CQE_OPCODE_V(x) ((x)<<CQE_OPCODE_S)
3213
3214 #define SW_CQE(x) (CQE_SWCQE_G(be32_to_cpu((x)->header)))
3215 +#define DRAIN_CQE(x) (CQE_DRAIN_G(be32_to_cpu((x)->header)))
3216 #define CQE_QPID(x) (CQE_QPID_G(be32_to_cpu((x)->header)))
3217 #define CQE_TYPE(x) (CQE_TYPE_G(be32_to_cpu((x)->header)))
3218 #define SQ_TYPE(x) (CQE_TYPE((x)))
3219 diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
3220 index 95178b4e3565..ee578fa713c2 100644
3221 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c
3222 +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
3223 @@ -1000,8 +1000,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
3224 return -ENOMEM;
3225
3226 attr->qp_state = IB_QPS_INIT;
3227 - attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
3228 - IB_ACCESS_REMOTE_WRITE;
3229 + attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
3230 attr->port_num = ch->sport->port;
3231 attr->pkey_index = 0;
3232
3233 @@ -1992,7 +1991,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
3234 goto destroy_ib;
3235 }
3236
3237 - guid = (__be16 *)&param->primary_path->sgid.global.interface_id;
3238 + guid = (__be16 *)&param->primary_path->dgid.global.interface_id;
3239 snprintf(ch->ini_guid, sizeof(ch->ini_guid), "%04x:%04x:%04x:%04x",
3240 be16_to_cpu(guid[0]), be16_to_cpu(guid[1]),
3241 be16_to_cpu(guid[2]), be16_to_cpu(guid[3]));
3242 diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
3243 index 8e3adcb46851..6d416fdc25cb 100644
3244 --- a/drivers/md/dm-bufio.c
3245 +++ b/drivers/md/dm-bufio.c
3246 @@ -1611,7 +1611,8 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
3247 int l;
3248 struct dm_buffer *b, *tmp;
3249 unsigned long freed = 0;
3250 - unsigned long count = nr_to_scan;
3251 + unsigned long count = c->n_buffers[LIST_CLEAN] +
3252 + c->n_buffers[LIST_DIRTY];
3253 unsigned long retain_target = get_retain_buffers(c);
3254
3255 for (l = 0; l < LIST_SIZE; l++) {
3256 @@ -1647,8 +1648,11 @@ static unsigned long
3257 dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
3258 {
3259 struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker);
3260 + unsigned long count = ACCESS_ONCE(c->n_buffers[LIST_CLEAN]) +
3261 + ACCESS_ONCE(c->n_buffers[LIST_DIRTY]);
3262 + unsigned long retain_target = get_retain_buffers(c);
3263
3264 - return ACCESS_ONCE(c->n_buffers[LIST_CLEAN]) + ACCESS_ONCE(c->n_buffers[LIST_DIRTY]);
3265 + return (count < retain_target) ? 0 : (count - retain_target);
3266 }
3267
3268 /*
3269 diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
3270 index fcf7235d5742..157e1d9e7725 100644
3271 --- a/drivers/mmc/host/renesas_sdhi_core.c
3272 +++ b/drivers/mmc/host/renesas_sdhi_core.c
3273 @@ -24,6 +24,7 @@
3274 #include <linux/kernel.h>
3275 #include <linux/clk.h>
3276 #include <linux/slab.h>
3277 +#include <linux/module.h>
3278 #include <linux/of_device.h>
3279 #include <linux/platform_device.h>
3280 #include <linux/mmc/host.h>
3281 @@ -667,3 +668,5 @@ int renesas_sdhi_remove(struct platform_device *pdev)
3282 return 0;
3283 }
3284 EXPORT_SYMBOL_GPL(renesas_sdhi_remove);
3285 +
3286 +MODULE_LICENSE("GPL v2");
3287 diff --git a/drivers/mux/core.c b/drivers/mux/core.c
3288 index 2260063b0ea8..6e5cf9d9cd99 100644
3289 --- a/drivers/mux/core.c
3290 +++ b/drivers/mux/core.c
3291 @@ -413,6 +413,7 @@ static int of_dev_node_match(struct device *dev, const void *data)
3292 return dev->of_node == data;
3293 }
3294
3295 +/* Note this function returns a reference to the mux_chip dev. */
3296 static struct mux_chip *of_find_mux_chip_by_node(struct device_node *np)
3297 {
3298 struct device *dev;
3299 @@ -466,6 +467,7 @@ struct mux_control *mux_control_get(struct device *dev, const char *mux_name)
3300 (!args.args_count && (mux_chip->controllers > 1))) {
3301 dev_err(dev, "%pOF: wrong #mux-control-cells for %pOF\n",
3302 np, args.np);
3303 + put_device(&mux_chip->dev);
3304 return ERR_PTR(-EINVAL);
3305 }
3306
3307 @@ -476,10 +478,10 @@ struct mux_control *mux_control_get(struct device *dev, const char *mux_name)
3308 if (controller >= mux_chip->controllers) {
3309 dev_err(dev, "%pOF: bad mux controller %u specified in %pOF\n",
3310 np, controller, args.np);
3311 + put_device(&mux_chip->dev);
3312 return ERR_PTR(-EINVAL);
3313 }
3314
3315 - get_device(&mux_chip->dev);
3316 return &mux_chip->mux[controller];
3317 }
3318 EXPORT_SYMBOL_GPL(mux_control_get);
3319 diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
3320 index 68ac3e88a8ce..8bf80ad9dc44 100644
3321 --- a/drivers/net/can/usb/gs_usb.c
3322 +++ b/drivers/net/can/usb/gs_usb.c
3323 @@ -449,7 +449,7 @@ static int gs_usb_set_bittiming(struct net_device *netdev)
3324 dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)",
3325 rc);
3326
3327 - return rc;
3328 + return (rc > 0) ? 0 : rc;
3329 }
3330
3331 static void gs_usb_xmit_callback(struct urb *urb)
3332 diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c
3333 index 8404e8852a0f..b4c4a2c76437 100644
3334 --- a/drivers/net/can/vxcan.c
3335 +++ b/drivers/net/can/vxcan.c
3336 @@ -194,7 +194,7 @@ static int vxcan_newlink(struct net *net, struct net_device *dev,
3337 tbp = peer_tb;
3338 }
3339
3340 - if (tbp[IFLA_IFNAME]) {
3341 + if (ifmp && tbp[IFLA_IFNAME]) {
3342 nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
3343 name_assign_type = NET_NAME_USER;
3344 } else {
3345 diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
3346 index faf7cdc97ebf..311539c6625f 100644
3347 --- a/drivers/net/ethernet/freescale/fec_main.c
3348 +++ b/drivers/net/ethernet/freescale/fec_main.c
3349 @@ -3458,6 +3458,10 @@ fec_probe(struct platform_device *pdev)
3350 goto failed_regulator;
3351 }
3352 } else {
3353 + if (PTR_ERR(fep->reg_phy) == -EPROBE_DEFER) {
3354 + ret = -EPROBE_DEFER;
3355 + goto failed_regulator;
3356 + }
3357 fep->reg_phy = NULL;
3358 }
3359
3360 @@ -3539,8 +3543,9 @@ fec_probe(struct platform_device *pdev)
3361 failed_clk:
3362 if (of_phy_is_fixed_link(np))
3363 of_phy_deregister_fixed_link(np);
3364 -failed_phy:
3365 of_node_put(phy_node);
3366 +failed_phy:
3367 + dev_id--;
3368 failed_ioremap:
3369 free_netdev(ndev);
3370
3371 diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
3372 index d6d4ed7acf03..31277d3bb7dc 100644
3373 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
3374 +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
3375 @@ -1367,6 +1367,9 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
3376 * Checks to see of the link status of the hardware has changed. If a
3377 * change in link status has been detected, then we read the PHY registers
3378 * to get the current speed/duplex if link exists.
3379 + *
3380 + * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link
3381 + * up).
3382 **/
3383 static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
3384 {
3385 @@ -1382,7 +1385,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
3386 * Change or Rx Sequence Error interrupt.
3387 */
3388 if (!mac->get_link_status)
3389 - return 0;
3390 + return 1;
3391
3392 /* First we want to see if the MII Status Register reports
3393 * link. If so, then we want to get the current speed/duplex
3394 @@ -1613,10 +1616,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
3395 * different link partner.
3396 */
3397 ret_val = e1000e_config_fc_after_link_up(hw);
3398 - if (ret_val)
3399 + if (ret_val) {
3400 e_dbg("Error configuring flow control\n");
3401 + return ret_val;
3402 + }
3403
3404 - return ret_val;
3405 + return 1;
3406 }
3407
3408 static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
3409 diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
3410 index 3ead7439821c..99bd6e88ebc7 100644
3411 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
3412 +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
3413 @@ -4235,7 +4235,10 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
3414 return -EINVAL;
3415 if (!info->linking)
3416 break;
3417 - if (netdev_has_any_upper_dev(upper_dev))
3418 + if (netdev_has_any_upper_dev(upper_dev) &&
3419 + (!netif_is_bridge_master(upper_dev) ||
3420 + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
3421 + upper_dev)))
3422 return -EINVAL;
3423 if (netif_is_lag_master(upper_dev) &&
3424 !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev,
3425 @@ -4347,6 +4350,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
3426 u16 vid)
3427 {
3428 struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
3429 + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
3430 struct netdev_notifier_changeupper_info *info = ptr;
3431 struct net_device *upper_dev;
3432 int err = 0;
3433 @@ -4358,7 +4362,10 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
3434 return -EINVAL;
3435 if (!info->linking)
3436 break;
3437 - if (netdev_has_any_upper_dev(upper_dev))
3438 + if (netdev_has_any_upper_dev(upper_dev) &&
3439 + (!netif_is_bridge_master(upper_dev) ||
3440 + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
3441 + upper_dev)))
3442 return -EINVAL;
3443 break;
3444 case NETDEV_CHANGEUPPER:
3445 diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
3446 index 84ce83acdc19..88892d47acae 100644
3447 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
3448 +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
3449 @@ -326,6 +326,8 @@ int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
3450 void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
3451 struct net_device *brport_dev,
3452 struct net_device *br_dev);
3453 +bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
3454 + const struct net_device *br_dev);
3455
3456 /* spectrum.c */
3457 int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
3458 diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3459 index 5189022a1c8c..c23cc51bb5a5 100644
3460 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3461 +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3462 @@ -2536,7 +2536,7 @@ static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3463 {
3464 if (!removing)
3465 nh->should_offload = 1;
3466 - else if (nh->offloaded)
3467 + else
3468 nh->should_offload = 0;
3469 nh->update = 1;
3470 }
3471 diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
3472 index d39ffbfcc436..f5863e5bec81 100644
3473 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
3474 +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
3475 @@ -134,6 +134,12 @@ mlxsw_sp_bridge_device_find(const struct mlxsw_sp_bridge *bridge,
3476 return NULL;
3477 }
3478
3479 +bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
3480 + const struct net_device *br_dev)
3481 +{
3482 + return !!mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
3483 +}
3484 +
3485 static struct mlxsw_sp_bridge_device *
3486 mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge,
3487 struct net_device *br_dev)
3488 diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
3489 index d2e88a30f57b..db31963c5d9d 100644
3490 --- a/drivers/net/ethernet/renesas/sh_eth.c
3491 +++ b/drivers/net/ethernet/renesas/sh_eth.c
3492 @@ -3212,18 +3212,37 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
3493 /* ioremap the TSU registers */
3494 if (mdp->cd->tsu) {
3495 struct resource *rtsu;
3496 +
3497 rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1);
3498 - mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu);
3499 - if (IS_ERR(mdp->tsu_addr)) {
3500 - ret = PTR_ERR(mdp->tsu_addr);
3501 + if (!rtsu) {
3502 + dev_err(&pdev->dev, "no TSU resource\n");
3503 + ret = -ENODEV;
3504 + goto out_release;
3505 + }
3506 + /* We can only request the TSU region for the first port
3507 + * of the two sharing this TSU for the probe to succeed...
3508 + */
3509 + if (devno % 2 == 0 &&
3510 + !devm_request_mem_region(&pdev->dev, rtsu->start,
3511 + resource_size(rtsu),
3512 + dev_name(&pdev->dev))) {
3513 + dev_err(&pdev->dev, "can't request TSU resource.\n");
3514 + ret = -EBUSY;
3515 + goto out_release;
3516 + }
3517 + mdp->tsu_addr = devm_ioremap(&pdev->dev, rtsu->start,
3518 + resource_size(rtsu));
3519 + if (!mdp->tsu_addr) {
3520 + dev_err(&pdev->dev, "TSU region ioremap() failed.\n");
3521 + ret = -ENOMEM;
3522 goto out_release;
3523 }
3524 mdp->port = devno % 2;
3525 ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER;
3526 }
3527
3528 - /* initialize first or needed device */
3529 - if (!devno || pd->needs_init) {
3530 + /* Need to init only the first port of the two sharing a TSU */
3531 + if (devno % 2 == 0) {
3532 if (mdp->cd->chip_reset)
3533 mdp->cd->chip_reset(ndev);
3534
3535 diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
3536 index 28c4d6fa096c..0ad12c81a9e4 100644
3537 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
3538 +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
3539 @@ -364,9 +364,15 @@ static void stmmac_eee_ctrl_timer(unsigned long arg)
3540 bool stmmac_eee_init(struct stmmac_priv *priv)
3541 {
3542 struct net_device *ndev = priv->dev;
3543 + int interface = priv->plat->interface;
3544 unsigned long flags;
3545 bool ret = false;
3546
3547 + if ((interface != PHY_INTERFACE_MODE_MII) &&
3548 + (interface != PHY_INTERFACE_MODE_GMII) &&
3549 + !phy_interface_mode_is_rgmii(interface))
3550 + goto out;
3551 +
3552 /* Using PCS we cannot dial with the phy registers at this stage
3553 * so we do not support extra feature like EEE.
3554 */
3555 diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
3556 index 4b377b978a0b..cb85307f125b 100644
3557 --- a/drivers/net/phy/phylink.c
3558 +++ b/drivers/net/phy/phylink.c
3559 @@ -1428,9 +1428,8 @@ static void phylink_sfp_link_down(void *upstream)
3560 WARN_ON(!lockdep_rtnl_is_held());
3561
3562 set_bit(PHYLINK_DISABLE_LINK, &pl->phylink_disable_state);
3563 + queue_work(system_power_efficient_wq, &pl->resolve);
3564 flush_work(&pl->resolve);
3565 -
3566 - netif_carrier_off(pl->netdev);
3567 }
3568
3569 static void phylink_sfp_link_up(void *upstream)
3570 diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
3571 index 5cb5384697ea..7ae815bee52d 100644
3572 --- a/drivers/net/phy/sfp-bus.c
3573 +++ b/drivers/net/phy/sfp-bus.c
3574 @@ -359,7 +359,8 @@ EXPORT_SYMBOL_GPL(sfp_register_upstream);
3575 void sfp_unregister_upstream(struct sfp_bus *bus)
3576 {
3577 rtnl_lock();
3578 - sfp_unregister_bus(bus);
3579 + if (bus->sfp)
3580 + sfp_unregister_bus(bus);
3581 bus->upstream = NULL;
3582 bus->netdev = NULL;
3583 rtnl_unlock();
3584 @@ -464,7 +465,8 @@ EXPORT_SYMBOL_GPL(sfp_register_socket);
3585 void sfp_unregister_socket(struct sfp_bus *bus)
3586 {
3587 rtnl_lock();
3588 - sfp_unregister_bus(bus);
3589 + if (bus->netdev)
3590 + sfp_unregister_bus(bus);
3591 bus->sfp_dev = NULL;
3592 bus->sfp = NULL;
3593 bus->socket_ops = NULL;
3594 diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
3595 index 4fb7647995c3..9875ab5ce18c 100644
3596 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
3597 +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
3598 @@ -666,11 +666,15 @@ static inline u8 iwl_pcie_get_cmd_index(struct iwl_txq *q, u32 index)
3599 return index & (q->n_window - 1);
3600 }
3601
3602 -static inline void *iwl_pcie_get_tfd(struct iwl_trans_pcie *trans_pcie,
3603 +static inline void *iwl_pcie_get_tfd(struct iwl_trans *trans,
3604 struct iwl_txq *txq, int idx)
3605 {
3606 - return txq->tfds + trans_pcie->tfd_size * iwl_pcie_get_cmd_index(txq,
3607 - idx);
3608 + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
3609 +
3610 + if (trans->cfg->use_tfh)
3611 + idx = iwl_pcie_get_cmd_index(txq, idx);
3612 +
3613 + return txq->tfds + trans_pcie->tfd_size * idx;
3614 }
3615
3616 static inline void iwl_enable_rfkill_int(struct iwl_trans *trans)
3617 diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
3618 index d74613fcb756..6f45c8148b27 100644
3619 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
3620 +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
3621 @@ -171,8 +171,6 @@ static void iwl_pcie_gen2_tfd_unmap(struct iwl_trans *trans,
3622
3623 static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
3624 {
3625 - struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
3626 -
3627 /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and
3628 * idx is bounded by n_window
3629 */
3630 @@ -181,7 +179,7 @@ static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
3631 lockdep_assert_held(&txq->lock);
3632
3633 iwl_pcie_gen2_tfd_unmap(trans, &txq->entries[idx].meta,
3634 - iwl_pcie_get_tfd(trans_pcie, txq, idx));
3635 + iwl_pcie_get_tfd(trans, txq, idx));
3636
3637 /* free SKB */
3638 if (txq->entries) {
3639 @@ -367,11 +365,9 @@ struct iwl_tfh_tfd *iwl_pcie_gen2_build_tfd(struct iwl_trans *trans,
3640 struct sk_buff *skb,
3641 struct iwl_cmd_meta *out_meta)
3642 {
3643 - struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
3644 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
3645 int idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
3646 - struct iwl_tfh_tfd *tfd =
3647 - iwl_pcie_get_tfd(trans_pcie, txq, idx);
3648 + struct iwl_tfh_tfd *tfd = iwl_pcie_get_tfd(trans, txq, idx);
3649 dma_addr_t tb_phys;
3650 bool amsdu;
3651 int i, len, tb1_len, tb2_len, hdr_len;
3652 @@ -568,8 +564,7 @@ static int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
3653 u8 group_id = iwl_cmd_groupid(cmd->id);
3654 const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD];
3655 u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD];
3656 - struct iwl_tfh_tfd *tfd =
3657 - iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr);
3658 + struct iwl_tfh_tfd *tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr);
3659
3660 memset(tfd, 0, sizeof(*tfd));
3661
3662 diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
3663 index c645d10d3707..4704137a26e0 100644
3664 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
3665 +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
3666 @@ -373,7 +373,7 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
3667 {
3668 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
3669 int i, num_tbs;
3670 - void *tfd = iwl_pcie_get_tfd(trans_pcie, txq, index);
3671 + void *tfd = iwl_pcie_get_tfd(trans, txq, index);
3672
3673 /* Sanity check on number of chunks */
3674 num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd);
3675 @@ -1999,7 +1999,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb,
3676 }
3677
3678 trace_iwlwifi_dev_tx(trans->dev, skb,
3679 - iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr),
3680 + iwl_pcie_get_tfd(trans, txq, txq->write_ptr),
3681 trans_pcie->tfd_size,
3682 &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len,
3683 hdr_len);
3684 @@ -2073,7 +2073,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
3685 IEEE80211_CCMP_HDR_LEN : 0;
3686
3687 trace_iwlwifi_dev_tx(trans->dev, skb,
3688 - iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr),
3689 + iwl_pcie_get_tfd(trans, txq, txq->write_ptr),
3690 trans_pcie->tfd_size,
3691 &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, 0);
3692
3693 @@ -2406,7 +2406,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
3694 memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr,
3695 IWL_FIRST_TB_SIZE);
3696
3697 - tfd = iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr);
3698 + tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr);
3699 /* Set up entry for this TFD in Tx byte-count array */
3700 iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len),
3701 iwl_pcie_tfd_get_num_tbs(trans, tfd));
3702 diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
3703 index 0765b1797d4c..7f8fa42a1084 100644
3704 --- a/drivers/platform/x86/wmi.c
3705 +++ b/drivers/platform/x86/wmi.c
3706 @@ -1268,5 +1268,5 @@ static void __exit acpi_wmi_exit(void)
3707 bus_unregister(&wmi_bus_type);
3708 }
3709
3710 -subsys_initcall(acpi_wmi_init);
3711 +subsys_initcall_sync(acpi_wmi_init);
3712 module_exit(acpi_wmi_exit);
3713 diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
3714 index 0f695df14c9d..372ce9913e6d 100644
3715 --- a/drivers/staging/android/ashmem.c
3716 +++ b/drivers/staging/android/ashmem.c
3717 @@ -765,10 +765,12 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
3718 break;
3719 case ASHMEM_SET_SIZE:
3720 ret = -EINVAL;
3721 + mutex_lock(&ashmem_mutex);
3722 if (!asma->file) {
3723 ret = 0;
3724 asma->size = (size_t)arg;
3725 }
3726 + mutex_unlock(&ashmem_mutex);
3727 break;
3728 case ASHMEM_GET_SIZE:
3729 ret = asma->size;
3730 diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
3731 index def1b05ffca0..284bd1a7b570 100644
3732 --- a/drivers/usb/gadget/udc/core.c
3733 +++ b/drivers/usb/gadget/udc/core.c
3734 @@ -1158,11 +1158,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
3735
3736 udc = kzalloc(sizeof(*udc), GFP_KERNEL);
3737 if (!udc)
3738 - goto err1;
3739 -
3740 - ret = device_add(&gadget->dev);
3741 - if (ret)
3742 - goto err2;
3743 + goto err_put_gadget;
3744
3745 device_initialize(&udc->dev);
3746 udc->dev.release = usb_udc_release;
3747 @@ -1171,7 +1167,11 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
3748 udc->dev.parent = parent;
3749 ret = dev_set_name(&udc->dev, "%s", kobject_name(&parent->kobj));
3750 if (ret)
3751 - goto err3;
3752 + goto err_put_udc;
3753 +
3754 + ret = device_add(&gadget->dev);
3755 + if (ret)
3756 + goto err_put_udc;
3757
3758 udc->gadget = gadget;
3759 gadget->udc = udc;
3760 @@ -1181,7 +1181,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
3761
3762 ret = device_add(&udc->dev);
3763 if (ret)
3764 - goto err4;
3765 + goto err_unlist_udc;
3766
3767 usb_gadget_set_state(gadget, USB_STATE_NOTATTACHED);
3768 udc->vbus = true;
3769 @@ -1189,27 +1189,25 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
3770 /* pick up one of pending gadget drivers */
3771 ret = check_pending_gadget_drivers(udc);
3772 if (ret)
3773 - goto err5;
3774 + goto err_del_udc;
3775
3776 mutex_unlock(&udc_lock);
3777
3778 return 0;
3779
3780 -err5:
3781 + err_del_udc:
3782 device_del(&udc->dev);
3783
3784 -err4:
3785 + err_unlist_udc:
3786 list_del(&udc->list);
3787 mutex_unlock(&udc_lock);
3788
3789 -err3:
3790 - put_device(&udc->dev);
3791 device_del(&gadget->dev);
3792
3793 -err2:
3794 - kfree(udc);
3795 + err_put_udc:
3796 + put_device(&udc->dev);
3797
3798 -err1:
3799 + err_put_gadget:
3800 put_device(&gadget->dev);
3801 return ret;
3802 }
3803 diff --git a/drivers/usb/misc/usb3503.c b/drivers/usb/misc/usb3503.c
3804 index 8e7737d7ac0a..03be5d574f23 100644
3805 --- a/drivers/usb/misc/usb3503.c
3806 +++ b/drivers/usb/misc/usb3503.c
3807 @@ -292,6 +292,8 @@ static int usb3503_probe(struct usb3503 *hub)
3808 if (gpio_is_valid(hub->gpio_reset)) {
3809 err = devm_gpio_request_one(dev, hub->gpio_reset,
3810 GPIOF_OUT_INIT_LOW, "usb3503 reset");
3811 + /* Datasheet defines a hardware reset to be at least 100us */
3812 + usleep_range(100, 10000);
3813 if (err) {
3814 dev_err(dev,
3815 "unable to request GPIO %d as reset pin (%d)\n",
3816 diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
3817 index f6ae753ab99b..f932f40302df 100644
3818 --- a/drivers/usb/mon/mon_bin.c
3819 +++ b/drivers/usb/mon/mon_bin.c
3820 @@ -1004,7 +1004,9 @@ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg
3821 break;
3822
3823 case MON_IOCQ_RING_SIZE:
3824 + mutex_lock(&rp->fetch_lock);
3825 ret = rp->b_size;
3826 + mutex_unlock(&rp->fetch_lock);
3827 break;
3828
3829 case MON_IOCT_RING_SIZE:
3830 @@ -1231,12 +1233,16 @@ static int mon_bin_vma_fault(struct vm_fault *vmf)
3831 unsigned long offset, chunk_idx;
3832 struct page *pageptr;
3833
3834 + mutex_lock(&rp->fetch_lock);
3835 offset = vmf->pgoff << PAGE_SHIFT;
3836 - if (offset >= rp->b_size)
3837 + if (offset >= rp->b_size) {
3838 + mutex_unlock(&rp->fetch_lock);
3839 return VM_FAULT_SIGBUS;
3840 + }
3841 chunk_idx = offset / CHUNK_SIZE;
3842 pageptr = rp->b_vec[chunk_idx].pg;
3843 get_page(pageptr);
3844 + mutex_unlock(&rp->fetch_lock);
3845 vmf->page = pageptr;
3846 return 0;
3847 }
3848 diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
3849 index 412f812522ee..aed182d24d23 100644
3850 --- a/drivers/usb/serial/cp210x.c
3851 +++ b/drivers/usb/serial/cp210x.c
3852 @@ -127,6 +127,7 @@ static const struct usb_device_id id_table[] = {
3853 { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */
3854 { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
3855 { USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */
3856 + { USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */
3857 { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */
3858 { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */
3859 { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
3860 @@ -177,6 +178,7 @@ static const struct usb_device_id id_table[] = {
3861 { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
3862 { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
3863 { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */
3864 + { USB_DEVICE(0x18EF, 0xE030) }, /* ELV ALC 8xxx Battery Charger */
3865 { USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */
3866 { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */
3867 { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */
3868 diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
3869 index 9f356f7cf7d5..719ec68ae309 100644
3870 --- a/drivers/usb/storage/unusual_uas.h
3871 +++ b/drivers/usb/storage/unusual_uas.h
3872 @@ -156,6 +156,13 @@ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999,
3873 USB_SC_DEVICE, USB_PR_DEVICE, NULL,
3874 US_FL_NO_ATA_1X),
3875
3876 +/* Reported-by: Icenowy Zheng <icenowy@aosc.io> */
3877 +UNUSUAL_DEV(0x2537, 0x1068, 0x0000, 0x9999,
3878 + "Norelsys",
3879 + "NS1068X",
3880 + USB_SC_DEVICE, USB_PR_DEVICE, NULL,
3881 + US_FL_IGNORE_UAS),
3882 +
3883 /* Reported-by: Takeo Nakayama <javhera@gmx.com> */
3884 UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999,
3885 "JMicron",
3886 diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
3887 index 17b599b923f3..7f0d22131121 100644
3888 --- a/drivers/usb/usbip/usbip_common.c
3889 +++ b/drivers/usb/usbip/usbip_common.c
3890 @@ -105,7 +105,7 @@ static void usbip_dump_usb_device(struct usb_device *udev)
3891 dev_dbg(dev, " devnum(%d) devpath(%s) usb speed(%s)",
3892 udev->devnum, udev->devpath, usb_speed_string(udev->speed));
3893
3894 - pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport);
3895 + pr_debug("tt hub ttport %d\n", udev->ttport);
3896
3897 dev_dbg(dev, " ");
3898 for (i = 0; i < 16; i++)
3899 @@ -138,12 +138,8 @@ static void usbip_dump_usb_device(struct usb_device *udev)
3900 }
3901 pr_debug("\n");
3902
3903 - dev_dbg(dev, "parent %p, bus %p\n", udev->parent, udev->bus);
3904 -
3905 - dev_dbg(dev,
3906 - "descriptor %p, config %p, actconfig %p, rawdescriptors %p\n",
3907 - &udev->descriptor, udev->config,
3908 - udev->actconfig, udev->rawdescriptors);
3909 + dev_dbg(dev, "parent %s, bus %s\n", dev_name(&udev->parent->dev),
3910 + udev->bus->bus_name);
3911
3912 dev_dbg(dev, "have_langid %d, string_langid %d\n",
3913 udev->have_langid, udev->string_langid);
3914 @@ -251,9 +247,6 @@ void usbip_dump_urb(struct urb *urb)
3915
3916 dev = &urb->dev->dev;
3917
3918 - dev_dbg(dev, " urb :%p\n", urb);
3919 - dev_dbg(dev, " dev :%p\n", urb->dev);
3920 -
3921 usbip_dump_usb_device(urb->dev);
3922
3923 dev_dbg(dev, " pipe :%08x ", urb->pipe);
3924 @@ -262,11 +255,9 @@ void usbip_dump_urb(struct urb *urb)
3925
3926 dev_dbg(dev, " status :%d\n", urb->status);
3927 dev_dbg(dev, " transfer_flags :%08X\n", urb->transfer_flags);
3928 - dev_dbg(dev, " transfer_buffer :%p\n", urb->transfer_buffer);
3929 dev_dbg(dev, " transfer_buffer_length:%d\n",
3930 urb->transfer_buffer_length);
3931 dev_dbg(dev, " actual_length :%d\n", urb->actual_length);
3932 - dev_dbg(dev, " setup_packet :%p\n", urb->setup_packet);
3933
3934 if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL)
3935 usbip_dump_usb_ctrlrequest(
3936 @@ -276,8 +267,6 @@ void usbip_dump_urb(struct urb *urb)
3937 dev_dbg(dev, " number_of_packets :%d\n", urb->number_of_packets);
3938 dev_dbg(dev, " interval :%d\n", urb->interval);
3939 dev_dbg(dev, " error_count :%d\n", urb->error_count);
3940 - dev_dbg(dev, " context :%p\n", urb->context);
3941 - dev_dbg(dev, " complete :%p\n", urb->complete);
3942 }
3943 EXPORT_SYMBOL_GPL(usbip_dump_urb);
3944
3945 diff --git a/drivers/usb/usbip/vudc_rx.c b/drivers/usb/usbip/vudc_rx.c
3946 index e429b59f6f8a..d020e72b3122 100644
3947 --- a/drivers/usb/usbip/vudc_rx.c
3948 +++ b/drivers/usb/usbip/vudc_rx.c
3949 @@ -132,6 +132,25 @@ static int v_recv_cmd_submit(struct vudc *udc,
3950 urb_p->new = 1;
3951 urb_p->seqnum = pdu->base.seqnum;
3952
3953 + if (urb_p->ep->type == USB_ENDPOINT_XFER_ISOC) {
3954 + /* validate packet size and number of packets */
3955 + unsigned int maxp, packets, bytes;
3956 +
3957 + maxp = usb_endpoint_maxp(urb_p->ep->desc);
3958 + maxp *= usb_endpoint_maxp_mult(urb_p->ep->desc);
3959 + bytes = pdu->u.cmd_submit.transfer_buffer_length;
3960 + packets = DIV_ROUND_UP(bytes, maxp);
3961 +
3962 + if (pdu->u.cmd_submit.number_of_packets < 0 ||
3963 + pdu->u.cmd_submit.number_of_packets > packets) {
3964 + dev_err(&udc->gadget.dev,
3965 + "CMD_SUBMIT: isoc invalid num packets %d\n",
3966 + pdu->u.cmd_submit.number_of_packets);
3967 + ret = -EMSGSIZE;
3968 + goto free_urbp;
3969 + }
3970 + }
3971 +
3972 ret = alloc_urb_from_cmd(&urb_p->urb, pdu, urb_p->ep->type);
3973 if (ret) {
3974 usbip_event_add(&udc->ud, VUDC_EVENT_ERROR_MALLOC);
3975 diff --git a/drivers/usb/usbip/vudc_tx.c b/drivers/usb/usbip/vudc_tx.c
3976 index 234661782fa0..3ab4c86486a7 100644
3977 --- a/drivers/usb/usbip/vudc_tx.c
3978 +++ b/drivers/usb/usbip/vudc_tx.c
3979 @@ -97,6 +97,13 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p)
3980 memset(&pdu_header, 0, sizeof(pdu_header));
3981 memset(&msg, 0, sizeof(msg));
3982
3983 + if (urb->actual_length > 0 && !urb->transfer_buffer) {
3984 + dev_err(&udc->gadget.dev,
3985 + "urb: actual_length %d transfer_buffer null\n",
3986 + urb->actual_length);
3987 + return -1;
3988 + }
3989 +
3990 if (urb_p->type == USB_ENDPOINT_XFER_ISOC)
3991 iovnum = 2 + urb->number_of_packets;
3992 else
3993 @@ -112,8 +119,8 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p)
3994
3995 /* 1. setup usbip_header */
3996 setup_ret_submit_pdu(&pdu_header, urb_p);
3997 - usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n",
3998 - pdu_header.base.seqnum, urb);
3999 + usbip_dbg_stub_tx("setup txdata seqnum: %d\n",
4000 + pdu_header.base.seqnum);
4001 usbip_header_correct_endian(&pdu_header, 1);
4002
4003 iov[iovnum].iov_base = &pdu_header;
4004 diff --git a/include/linux/bpf.h b/include/linux/bpf.h
4005 index f1af7d63d678..0bcf803f20de 100644
4006 --- a/include/linux/bpf.h
4007 +++ b/include/linux/bpf.h
4008 @@ -51,6 +51,7 @@ struct bpf_map {
4009 u32 pages;
4010 u32 id;
4011 int numa_node;
4012 + bool unpriv_array;
4013 struct user_struct *user;
4014 const struct bpf_map_ops *ops;
4015 struct work_struct work;
4016 @@ -195,6 +196,7 @@ struct bpf_prog_aux {
4017 struct bpf_array {
4018 struct bpf_map map;
4019 u32 elem_size;
4020 + u32 index_mask;
4021 /* 'ownership' of prog_array is claimed by the first program that
4022 * is going to use this map or by the first program which FD is stored
4023 * in the map to make sure that all callers and callees have the same
4024 diff --git a/include/linux/cpu.h b/include/linux/cpu.h
4025 index 938ea8ae0ba4..c816e6f2730c 100644
4026 --- a/include/linux/cpu.h
4027 +++ b/include/linux/cpu.h
4028 @@ -47,6 +47,13 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr);
4029 extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
4030 extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
4031
4032 +extern ssize_t cpu_show_meltdown(struct device *dev,
4033 + struct device_attribute *attr, char *buf);
4034 +extern ssize_t cpu_show_spectre_v1(struct device *dev,
4035 + struct device_attribute *attr, char *buf);
4036 +extern ssize_t cpu_show_spectre_v2(struct device *dev,
4037 + struct device_attribute *attr, char *buf);
4038 +
4039 extern __printf(4, 5)
4040 struct device *cpu_device_create(struct device *parent, void *drvdata,
4041 const struct attribute_group **groups,
4042 diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
4043 index 06097ef30449..b511f6d24b42 100644
4044 --- a/include/linux/crash_core.h
4045 +++ b/include/linux/crash_core.h
4046 @@ -42,6 +42,8 @@ phys_addr_t paddr_vmcoreinfo_note(void);
4047 vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
4048 #define VMCOREINFO_SYMBOL(name) \
4049 vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
4050 +#define VMCOREINFO_SYMBOL_ARRAY(name) \
4051 + vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name)
4052 #define VMCOREINFO_SIZE(name) \
4053 vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
4054 (unsigned long)sizeof(name))
4055 diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h
4056 index ff3642d267f7..94081e9a5010 100644
4057 --- a/include/linux/sh_eth.h
4058 +++ b/include/linux/sh_eth.h
4059 @@ -17,7 +17,6 @@ struct sh_eth_plat_data {
4060 unsigned char mac_addr[ETH_ALEN];
4061 unsigned no_ether_link:1;
4062 unsigned ether_link_active_low:1;
4063 - unsigned needs_init:1;
4064 };
4065
4066 #endif
4067 diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
4068 index 0477945de1a3..8e1e1dc490fd 100644
4069 --- a/include/net/sctp/structs.h
4070 +++ b/include/net/sctp/structs.h
4071 @@ -955,7 +955,7 @@ void sctp_transport_burst_limited(struct sctp_transport *);
4072 void sctp_transport_burst_reset(struct sctp_transport *);
4073 unsigned long sctp_transport_timeout(struct sctp_transport *);
4074 void sctp_transport_reset(struct sctp_transport *t);
4075 -void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
4076 +bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
4077 void sctp_transport_immediate_rtx(struct sctp_transport *);
4078 void sctp_transport_dst_release(struct sctp_transport *t);
4079 void sctp_transport_dst_confirm(struct sctp_transport *t);
4080 diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
4081 index e4b0b8e09932..2c735a3e6613 100644
4082 --- a/include/trace/events/kvm.h
4083 +++ b/include/trace/events/kvm.h
4084 @@ -211,7 +211,7 @@ TRACE_EVENT(kvm_ack_irq,
4085 { KVM_TRACE_MMIO_WRITE, "write" }
4086
4087 TRACE_EVENT(kvm_mmio,
4088 - TP_PROTO(int type, int len, u64 gpa, u64 val),
4089 + TP_PROTO(int type, int len, u64 gpa, void *val),
4090 TP_ARGS(type, len, gpa, val),
4091
4092 TP_STRUCT__entry(
4093 @@ -225,7 +225,10 @@ TRACE_EVENT(kvm_mmio,
4094 __entry->type = type;
4095 __entry->len = len;
4096 __entry->gpa = gpa;
4097 - __entry->val = val;
4098 + __entry->val = 0;
4099 + if (val)
4100 + memcpy(&__entry->val, val,
4101 + min_t(u32, sizeof(__entry->val), len));
4102 ),
4103
4104 TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
4105 diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
4106 index e2636737b69b..a4ae1ca44a57 100644
4107 --- a/kernel/bpf/arraymap.c
4108 +++ b/kernel/bpf/arraymap.c
4109 @@ -50,9 +50,10 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
4110 {
4111 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
4112 int numa_node = bpf_map_attr_numa_node(attr);
4113 + u32 elem_size, index_mask, max_entries;
4114 + bool unpriv = !capable(CAP_SYS_ADMIN);
4115 struct bpf_array *array;
4116 - u64 array_size;
4117 - u32 elem_size;
4118 + u64 array_size, mask64;
4119
4120 /* check sanity of attributes */
4121 if (attr->max_entries == 0 || attr->key_size != 4 ||
4122 @@ -68,11 +69,32 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
4123
4124 elem_size = round_up(attr->value_size, 8);
4125
4126 + max_entries = attr->max_entries;
4127 +
4128 + /* On 32 bit archs roundup_pow_of_two() with max_entries that has
4129 + * upper most bit set in u32 space is undefined behavior due to
4130 + * resulting 1U << 32, so do it manually here in u64 space.
4131 + */
4132 + mask64 = fls_long(max_entries - 1);
4133 + mask64 = 1ULL << mask64;
4134 + mask64 -= 1;
4135 +
4136 + index_mask = mask64;
4137 + if (unpriv) {
4138 + /* round up array size to nearest power of 2,
4139 + * since cpu will speculate within index_mask limits
4140 + */
4141 + max_entries = index_mask + 1;
4142 + /* Check for overflows. */
4143 + if (max_entries < attr->max_entries)
4144 + return ERR_PTR(-E2BIG);
4145 + }
4146 +
4147 array_size = sizeof(*array);
4148 if (percpu)
4149 - array_size += (u64) attr->max_entries * sizeof(void *);
4150 + array_size += (u64) max_entries * sizeof(void *);
4151 else
4152 - array_size += (u64) attr->max_entries * elem_size;
4153 + array_size += (u64) max_entries * elem_size;
4154
4155 /* make sure there is no u32 overflow later in round_up() */
4156 if (array_size >= U32_MAX - PAGE_SIZE)
4157 @@ -82,6 +104,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
4158 array = bpf_map_area_alloc(array_size, numa_node);
4159 if (!array)
4160 return ERR_PTR(-ENOMEM);
4161 + array->index_mask = index_mask;
4162 + array->map.unpriv_array = unpriv;
4163
4164 /* copy mandatory map attributes */
4165 array->map.map_type = attr->map_type;
4166 @@ -117,12 +141,13 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
4167 if (unlikely(index >= array->map.max_entries))
4168 return NULL;
4169
4170 - return array->value + array->elem_size * index;
4171 + return array->value + array->elem_size * (index & array->index_mask);
4172 }
4173
4174 /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
4175 static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
4176 {
4177 + struct bpf_array *array = container_of(map, struct bpf_array, map);
4178 struct bpf_insn *insn = insn_buf;
4179 u32 elem_size = round_up(map->value_size, 8);
4180 const int ret = BPF_REG_0;
4181 @@ -131,7 +156,12 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
4182
4183 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
4184 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
4185 - *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
4186 + if (map->unpriv_array) {
4187 + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
4188 + *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
4189 + } else {
4190 + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
4191 + }
4192
4193 if (is_power_of_2(elem_size)) {
4194 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
4195 @@ -153,7 +183,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
4196 if (unlikely(index >= array->map.max_entries))
4197 return NULL;
4198
4199 - return this_cpu_ptr(array->pptrs[index]);
4200 + return this_cpu_ptr(array->pptrs[index & array->index_mask]);
4201 }
4202
4203 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
4204 @@ -173,7 +203,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
4205 */
4206 size = round_up(map->value_size, 8);
4207 rcu_read_lock();
4208 - pptr = array->pptrs[index];
4209 + pptr = array->pptrs[index & array->index_mask];
4210 for_each_possible_cpu(cpu) {
4211 bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
4212 off += size;
4213 @@ -221,10 +251,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
4214 return -EEXIST;
4215
4216 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
4217 - memcpy(this_cpu_ptr(array->pptrs[index]),
4218 + memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
4219 value, map->value_size);
4220 else
4221 - memcpy(array->value + array->elem_size * index,
4222 + memcpy(array->value +
4223 + array->elem_size * (index & array->index_mask),
4224 value, map->value_size);
4225 return 0;
4226 }
4227 @@ -258,7 +289,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
4228 */
4229 size = round_up(map->value_size, 8);
4230 rcu_read_lock();
4231 - pptr = array->pptrs[index];
4232 + pptr = array->pptrs[index & array->index_mask];
4233 for_each_possible_cpu(cpu) {
4234 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
4235 off += size;
4236 @@ -609,6 +640,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
4237 static u32 array_of_map_gen_lookup(struct bpf_map *map,
4238 struct bpf_insn *insn_buf)
4239 {
4240 + struct bpf_array *array = container_of(map, struct bpf_array, map);
4241 u32 elem_size = round_up(map->value_size, 8);
4242 struct bpf_insn *insn = insn_buf;
4243 const int ret = BPF_REG_0;
4244 @@ -617,7 +649,12 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map,
4245
4246 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
4247 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
4248 - *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
4249 + if (map->unpriv_array) {
4250 + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
4251 + *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
4252 + } else {
4253 + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
4254 + }
4255 if (is_power_of_2(elem_size))
4256 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
4257 else
4258 diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
4259 index c5ff809e86d0..75a5c3312f46 100644
4260 --- a/kernel/bpf/verifier.c
4261 +++ b/kernel/bpf/verifier.c
4262 @@ -1701,6 +1701,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
4263 err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
4264 if (err)
4265 return err;
4266 + if (func_id == BPF_FUNC_tail_call) {
4267 + if (meta.map_ptr == NULL) {
4268 + verbose("verifier bug\n");
4269 + return -EINVAL;
4270 + }
4271 + env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
4272 + }
4273 err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
4274 if (err)
4275 return err;
4276 @@ -2486,6 +2493,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
4277 return -EINVAL;
4278 }
4279
4280 + if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) {
4281 + verbose("BPF_ARSH not supported for 32 bit ALU\n");
4282 + return -EINVAL;
4283 + }
4284 +
4285 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
4286 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
4287 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
4288 @@ -4315,6 +4327,35 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
4289 */
4290 insn->imm = 0;
4291 insn->code = BPF_JMP | BPF_TAIL_CALL;
4292 +
4293 + /* instead of changing every JIT dealing with tail_call
4294 + * emit two extra insns:
4295 + * if (index >= max_entries) goto out;
4296 + * index &= array->index_mask;
4297 + * to avoid out-of-bounds cpu speculation
4298 + */
4299 + map_ptr = env->insn_aux_data[i + delta].map_ptr;
4300 + if (map_ptr == BPF_MAP_PTR_POISON) {
4301 + verbose("tail_call obusing map_ptr\n");
4302 + return -EINVAL;
4303 + }
4304 + if (!map_ptr->unpriv_array)
4305 + continue;
4306 + insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
4307 + map_ptr->max_entries, 2);
4308 + insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
4309 + container_of(map_ptr,
4310 + struct bpf_array,
4311 + map)->index_mask);
4312 + insn_buf[2] = *insn;
4313 + cnt = 3;
4314 + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
4315 + if (!new_prog)
4316 + return -ENOMEM;
4317 +
4318 + delta += cnt - 1;
4319 + env->prog = prog = new_prog;
4320 + insn = new_prog->insnsi + i + delta;
4321 continue;
4322 }
4323
4324 diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
4325 index 44857278eb8a..030e4286f14c 100644
4326 --- a/kernel/cgroup/cgroup.c
4327 +++ b/kernel/cgroup/cgroup.c
4328 @@ -4059,26 +4059,24 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
4329
4330 static void css_task_iter_advance(struct css_task_iter *it)
4331 {
4332 - struct list_head *l = it->task_pos;
4333 + struct list_head *next;
4334
4335 lockdep_assert_held(&css_set_lock);
4336 - WARN_ON_ONCE(!l);
4337 -
4338 repeat:
4339 /*
4340 * Advance iterator to find next entry. cset->tasks is consumed
4341 * first and then ->mg_tasks. After ->mg_tasks, we move onto the
4342 * next cset.
4343 */
4344 - l = l->next;
4345 + next = it->task_pos->next;
4346
4347 - if (l == it->tasks_head)
4348 - l = it->mg_tasks_head->next;
4349 + if (next == it->tasks_head)
4350 + next = it->mg_tasks_head->next;
4351
4352 - if (l == it->mg_tasks_head)
4353 + if (next == it->mg_tasks_head)
4354 css_task_iter_advance_css_set(it);
4355 else
4356 - it->task_pos = l;
4357 + it->task_pos = next;
4358
4359 /* if PROCS, skip over tasks which aren't group leaders */
4360 if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos &&
4361 diff --git a/kernel/crash_core.c b/kernel/crash_core.c
4362 index 6db80fc0810b..2d90996dbe77 100644
4363 --- a/kernel/crash_core.c
4364 +++ b/kernel/crash_core.c
4365 @@ -409,7 +409,7 @@ static int __init crash_save_vmcoreinfo_init(void)
4366 VMCOREINFO_SYMBOL(contig_page_data);
4367 #endif
4368 #ifdef CONFIG_SPARSEMEM
4369 - VMCOREINFO_SYMBOL(mem_section);
4370 + VMCOREINFO_SYMBOL_ARRAY(mem_section);
4371 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
4372 VMCOREINFO_STRUCT_SIZE(mem_section);
4373 VMCOREINFO_OFFSET(mem_section, section_mem_map);
4374 diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
4375 index dd7908743dab..9bcbacba82a8 100644
4376 --- a/kernel/sched/membarrier.c
4377 +++ b/kernel/sched/membarrier.c
4378 @@ -89,7 +89,9 @@ static int membarrier_private_expedited(void)
4379 rcu_read_unlock();
4380 }
4381 if (!fallback) {
4382 + preempt_disable();
4383 smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
4384 + preempt_enable();
4385 free_cpumask_var(tmpmask);
4386 }
4387 cpus_read_unlock();
4388 diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
4389 index 4a72ee4e2ae9..cf2e70003a53 100644
4390 --- a/net/8021q/vlan.c
4391 +++ b/net/8021q/vlan.c
4392 @@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
4393 vlan_gvrp_uninit_applicant(real_dev);
4394 }
4395
4396 - /* Take it out of our own structures, but be sure to interlock with
4397 - * HW accelerating devices or SW vlan input packet processing if
4398 - * VLAN is not 0 (leave it there for 802.1p).
4399 - */
4400 - if (vlan_id)
4401 - vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
4402 + vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
4403
4404 /* Get rid of the vlan's reference to real_dev */
4405 dev_put(real_dev);
4406 diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
4407 index 43ba91c440bc..fc6615d59165 100644
4408 --- a/net/bluetooth/l2cap_core.c
4409 +++ b/net/bluetooth/l2cap_core.c
4410 @@ -3363,9 +3363,10 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data
4411 break;
4412
4413 case L2CAP_CONF_EFS:
4414 - remote_efs = 1;
4415 - if (olen == sizeof(efs))
4416 + if (olen == sizeof(efs)) {
4417 + remote_efs = 1;
4418 memcpy(&efs, (void *) val, olen);
4419 + }
4420 break;
4421
4422 case L2CAP_CONF_EWS:
4423 @@ -3584,16 +3585,17 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
4424 break;
4425
4426 case L2CAP_CONF_EFS:
4427 - if (olen == sizeof(efs))
4428 + if (olen == sizeof(efs)) {
4429 memcpy(&efs, (void *)val, olen);
4430
4431 - if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
4432 - efs.stype != L2CAP_SERV_NOTRAFIC &&
4433 - efs.stype != chan->local_stype)
4434 - return -ECONNREFUSED;
4435 + if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
4436 + efs.stype != L2CAP_SERV_NOTRAFIC &&
4437 + efs.stype != chan->local_stype)
4438 + return -ECONNREFUSED;
4439
4440 - l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
4441 - (unsigned long) &efs, endptr - ptr);
4442 + l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
4443 + (unsigned long) &efs, endptr - ptr);
4444 + }
4445 break;
4446
4447 case L2CAP_CONF_FCS:
4448 diff --git a/net/core/ethtool.c b/net/core/ethtool.c
4449 index 9a9a3d77e327..d374a904f1b1 100644
4450 --- a/net/core/ethtool.c
4451 +++ b/net/core/ethtool.c
4452 @@ -754,15 +754,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev,
4453 return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
4454 }
4455
4456 -static void
4457 -warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
4458 -{
4459 - char name[sizeof(current->comm)];
4460 -
4461 - pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n",
4462 - get_task_comm(name, current), details);
4463 -}
4464 -
4465 /* Query device for its ethtool_cmd settings.
4466 *
4467 * Backward compatibility note: for compatibility with legacy ethtool,
4468 @@ -789,10 +780,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
4469 &link_ksettings);
4470 if (err < 0)
4471 return err;
4472 - if (!convert_link_ksettings_to_legacy_settings(&cmd,
4473 - &link_ksettings))
4474 - warn_incomplete_ethtool_legacy_settings_conversion(
4475 - "link modes are only partially reported");
4476 + convert_link_ksettings_to_legacy_settings(&cmd,
4477 + &link_ksettings);
4478
4479 /* send a sensible cmd tag back to user */
4480 cmd.cmd = ETHTOOL_GSET;
4481 diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
4482 index 217f4e3b82f6..146b50e30659 100644
4483 --- a/net/core/sock_diag.c
4484 +++ b/net/core/sock_diag.c
4485 @@ -288,7 +288,7 @@ static int sock_diag_bind(struct net *net, int group)
4486 case SKNLGRP_INET6_UDP_DESTROY:
4487 if (!sock_diag_handlers[AF_INET6])
4488 request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
4489 - NETLINK_SOCK_DIAG, AF_INET);
4490 + NETLINK_SOCK_DIAG, AF_INET6);
4491 break;
4492 }
4493 return 0;
4494 diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
4495 index 95516138e861..d6189c2a35e4 100644
4496 --- a/net/ipv6/exthdrs.c
4497 +++ b/net/ipv6/exthdrs.c
4498 @@ -884,6 +884,15 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
4499 sr_phdr->segments[0] = **addr_p;
4500 *addr_p = &sr_ihdr->segments[sr_ihdr->segments_left];
4501
4502 + if (sr_ihdr->hdrlen > hops * 2) {
4503 + int tlvs_offset, tlvs_length;
4504 +
4505 + tlvs_offset = (1 + hops * 2) << 3;
4506 + tlvs_length = (sr_ihdr->hdrlen - hops * 2) << 3;
4507 + memcpy((char *)sr_phdr + tlvs_offset,
4508 + (char *)sr_ihdr + tlvs_offset, tlvs_length);
4509 + }
4510 +
4511 #ifdef CONFIG_IPV6_SEG6_HMAC
4512 if (sr_has_hmac(sr_phdr)) {
4513 struct net *net = NULL;
4514 diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
4515 index f7dd51c42314..688ba5f7516b 100644
4516 --- a/net/ipv6/ip6_output.c
4517 +++ b/net/ipv6/ip6_output.c
4518 @@ -1735,9 +1735,10 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
4519 cork.base.opt = NULL;
4520 v6_cork.opt = NULL;
4521 err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
4522 - if (err)
4523 + if (err) {
4524 + ip6_cork_release(&cork, &v6_cork);
4525 return ERR_PTR(err);
4526 -
4527 + }
4528 if (ipc6->dontfrag < 0)
4529 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
4530
4531 diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
4532 index ef958d50746b..3f46121ad139 100644
4533 --- a/net/ipv6/ip6_tunnel.c
4534 +++ b/net/ipv6/ip6_tunnel.c
4535 @@ -1081,10 +1081,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
4536 memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
4537 neigh_release(neigh);
4538 }
4539 - } else if (!(t->parms.flags &
4540 - (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
4541 - /* enable the cache only only if the routing decision does
4542 - * not depend on the current inner header value
4543 + } else if (t->parms.proto != 0 && !(t->parms.flags &
4544 + (IP6_TNL_F_USE_ORIG_TCLASS |
4545 + IP6_TNL_F_USE_ORIG_FWMARK))) {
4546 + /* enable the cache only if neither the outer protocol nor the
4547 + * routing decision depends on the current inner header value
4548 */
4549 use_cache = true;
4550 }
4551 diff --git a/net/rds/rdma.c b/net/rds/rdma.c
4552 index bc2f1e0977d6..634cfcb7bba6 100644
4553 --- a/net/rds/rdma.c
4554 +++ b/net/rds/rdma.c
4555 @@ -525,6 +525,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args)
4556
4557 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
4558
4559 + if (args->nr_local == 0)
4560 + return -EINVAL;
4561 +
4562 /* figure out the number of pages in the vector */
4563 for (i = 0; i < args->nr_local; i++) {
4564 if (copy_from_user(&vec, &local_vec[i],
4565 @@ -874,6 +877,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
4566 err:
4567 if (page)
4568 put_page(page);
4569 + rm->atomic.op_active = 0;
4570 kfree(rm->atomic.op_notifier);
4571
4572 return ret;
4573 diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
4574 index e29a48ef7fc3..a0ac42b3ed06 100644
4575 --- a/net/sched/act_gact.c
4576 +++ b/net/sched/act_gact.c
4577 @@ -159,7 +159,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
4578 if (action == TC_ACT_SHOT)
4579 this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
4580
4581 - tm->lastuse = lastuse;
4582 + tm->lastuse = max_t(u64, tm->lastuse, lastuse);
4583 }
4584
4585 static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
4586 diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
4587 index 416627c66f08..6ce8de373f83 100644
4588 --- a/net/sched/act_mirred.c
4589 +++ b/net/sched/act_mirred.c
4590 @@ -238,7 +238,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
4591 struct tcf_t *tm = &m->tcf_tm;
4592
4593 _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
4594 - tm->lastuse = lastuse;
4595 + tm->lastuse = max_t(u64, tm->lastuse, lastuse);
4596 }
4597
4598 static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
4599 diff --git a/net/sctp/input.c b/net/sctp/input.c
4600 index 621b5ca3fd1c..141c9c466ec1 100644
4601 --- a/net/sctp/input.c
4602 +++ b/net/sctp/input.c
4603 @@ -399,20 +399,24 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
4604 return;
4605 }
4606
4607 - if (t->param_flags & SPP_PMTUD_ENABLE) {
4608 - /* Update transports view of the MTU */
4609 - sctp_transport_update_pmtu(t, pmtu);
4610 -
4611 - /* Update association pmtu. */
4612 - sctp_assoc_sync_pmtu(asoc);
4613 - }
4614 + if (!(t->param_flags & SPP_PMTUD_ENABLE))
4615 + /* We can't allow retransmitting in such case, as the
4616 + * retransmission would be sized just as before, and thus we
4617 + * would get another icmp, and retransmit again.
4618 + */
4619 + return;
4620
4621 - /* Retransmit with the new pmtu setting.
4622 - * Normally, if PMTU discovery is disabled, an ICMP Fragmentation
4623 - * Needed will never be sent, but if a message was sent before
4624 - * PMTU discovery was disabled that was larger than the PMTU, it
4625 - * would not be fragmented, so it must be re-transmitted fragmented.
4626 + /* Update transports view of the MTU. Return if no update was needed.
4627 + * If an update wasn't needed/possible, it also doesn't make sense to
4628 + * try to retransmit now.
4629 */
4630 + if (!sctp_transport_update_pmtu(t, pmtu))
4631 + return;
4632 +
4633 + /* Update association pmtu. */
4634 + sctp_assoc_sync_pmtu(asoc);
4635 +
4636 + /* Retransmit with the new pmtu setting. */
4637 sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD);
4638 }
4639
4640 diff --git a/net/sctp/transport.c b/net/sctp/transport.c
4641 index 2d9bd3776bc8..7ef77fd7b52a 100644
4642 --- a/net/sctp/transport.c
4643 +++ b/net/sctp/transport.c
4644 @@ -251,28 +251,37 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
4645 transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
4646 }
4647
4648 -void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
4649 +bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
4650 {
4651 struct dst_entry *dst = sctp_transport_dst_check(t);
4652 + bool change = true;
4653
4654 if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
4655 - pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
4656 - __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
4657 - /* Use default minimum segment size and disable
4658 - * pmtu discovery on this transport.
4659 - */
4660 - t->pathmtu = SCTP_DEFAULT_MINSEGMENT;
4661 - } else {
4662 - t->pathmtu = pmtu;
4663 + pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n",
4664 + __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
4665 + /* Use default minimum segment instead */
4666 + pmtu = SCTP_DEFAULT_MINSEGMENT;
4667 }
4668 + pmtu = SCTP_TRUNC4(pmtu);
4669
4670 if (dst) {
4671 dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu);
4672 dst = sctp_transport_dst_check(t);
4673 }
4674
4675 - if (!dst)
4676 + if (!dst) {
4677 t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk);
4678 + dst = t->dst;
4679 + }
4680 +
4681 + if (dst) {
4682 + /* Re-fetch, as under layers may have a higher minimum size */
4683 + pmtu = SCTP_TRUNC4(dst_mtu(dst));
4684 + change = t->pathmtu != pmtu;
4685 + }
4686 + t->pathmtu = pmtu;
4687 +
4688 + return change;
4689 }
4690
4691 /* Caches the dst entry and source address for a transport's destination
4692 diff --git a/security/Kconfig b/security/Kconfig
4693 index 6614b9312b45..b5c2b5d0c6c0 100644
4694 --- a/security/Kconfig
4695 +++ b/security/Kconfig
4696 @@ -63,7 +63,7 @@ config PAGE_TABLE_ISOLATION
4697 ensuring that the majority of kernel addresses are not mapped
4698 into userspace.
4699
4700 - See Documentation/x86/pagetable-isolation.txt for more details.
4701 + See Documentation/x86/pti.txt for more details.
4702
4703 config SECURITY_INFINIBAND
4704 bool "Infiniband Security Hooks"
4705 diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h
4706 index 2b27bb79aec4..d7b7e7115160 100644
4707 --- a/security/apparmor/include/perms.h
4708 +++ b/security/apparmor/include/perms.h
4709 @@ -133,6 +133,9 @@ extern struct aa_perms allperms;
4710 #define xcheck_labels_profiles(L1, L2, FN, args...) \
4711 xcheck_ns_labels((L1), (L2), xcheck_ns_profile_label, (FN), args)
4712
4713 +#define xcheck_labels(L1, L2, P, FN1, FN2) \
4714 + xcheck(fn_for_each((L1), (P), (FN1)), fn_for_each((L2), (P), (FN2)))
4715 +
4716
4717 void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask);
4718 void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask);
4719 diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c
4720 index 7ca0032e7ba9..b40678f3c1d5 100644
4721 --- a/security/apparmor/ipc.c
4722 +++ b/security/apparmor/ipc.c
4723 @@ -64,40 +64,48 @@ static void audit_ptrace_cb(struct audit_buffer *ab, void *va)
4724 FLAGS_NONE, GFP_ATOMIC);
4725 }
4726
4727 +/* assumes check for PROFILE_MEDIATES is already done */
4728 /* TODO: conditionals */
4729 static int profile_ptrace_perm(struct aa_profile *profile,
4730 - struct aa_profile *peer, u32 request,
4731 - struct common_audit_data *sa)
4732 + struct aa_label *peer, u32 request,
4733 + struct common_audit_data *sa)
4734 {
4735 struct aa_perms perms = { };
4736
4737 - /* need because of peer in cross check */
4738 - if (profile_unconfined(profile) ||
4739 - !PROFILE_MEDIATES(profile, AA_CLASS_PTRACE))
4740 - return 0;
4741 -
4742 - aad(sa)->peer = &peer->label;
4743 - aa_profile_match_label(profile, &peer->label, AA_CLASS_PTRACE, request,
4744 + aad(sa)->peer = peer;
4745 + aa_profile_match_label(profile, peer, AA_CLASS_PTRACE, request,
4746 &perms);
4747 aa_apply_modes_to_perms(profile, &perms);
4748 return aa_check_perms(profile, &perms, request, sa, audit_ptrace_cb);
4749 }
4750
4751 -static int cross_ptrace_perm(struct aa_profile *tracer,
4752 - struct aa_profile *tracee, u32 request,
4753 - struct common_audit_data *sa)
4754 +static int profile_tracee_perm(struct aa_profile *tracee,
4755 + struct aa_label *tracer, u32 request,
4756 + struct common_audit_data *sa)
4757 {
4758 + if (profile_unconfined(tracee) || unconfined(tracer) ||
4759 + !PROFILE_MEDIATES(tracee, AA_CLASS_PTRACE))
4760 + return 0;
4761 +
4762 + return profile_ptrace_perm(tracee, tracer, request, sa);
4763 +}
4764 +
4765 +static int profile_tracer_perm(struct aa_profile *tracer,
4766 + struct aa_label *tracee, u32 request,
4767 + struct common_audit_data *sa)
4768 +{
4769 + if (profile_unconfined(tracer))
4770 + return 0;
4771 +
4772 if (PROFILE_MEDIATES(tracer, AA_CLASS_PTRACE))
4773 - return xcheck(profile_ptrace_perm(tracer, tracee, request, sa),
4774 - profile_ptrace_perm(tracee, tracer,
4775 - request << PTRACE_PERM_SHIFT,
4776 - sa));
4777 - /* policy uses the old style capability check for ptrace */
4778 - if (profile_unconfined(tracer) || tracer == tracee)
4779 + return profile_ptrace_perm(tracer, tracee, request, sa);
4780 +
4781 + /* profile uses the old style capability check for ptrace */
4782 + if (&tracer->label == tracee)
4783 return 0;
4784
4785 aad(sa)->label = &tracer->label;
4786 - aad(sa)->peer = &tracee->label;
4787 + aad(sa)->peer = tracee;
4788 aad(sa)->request = 0;
4789 aad(sa)->error = aa_capable(&tracer->label, CAP_SYS_PTRACE, 1);
4790
4791 @@ -115,10 +123,13 @@ static int cross_ptrace_perm(struct aa_profile *tracer,
4792 int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee,
4793 u32 request)
4794 {
4795 + struct aa_profile *profile;
4796 + u32 xrequest = request << PTRACE_PERM_SHIFT;
4797 DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_PTRACE);
4798
4799 - return xcheck_labels_profiles(tracer, tracee, cross_ptrace_perm,
4800 - request, &sa);
4801 + return xcheck_labels(tracer, tracee, profile,
4802 + profile_tracer_perm(profile, tracee, request, &sa),
4803 + profile_tracee_perm(profile, tracer, xrequest, &sa));
4804 }
4805
4806
4807 diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
4808 index e49f448ee04f..c2db7e905f7d 100644
4809 --- a/sound/core/oss/pcm_oss.c
4810 +++ b/sound/core/oss/pcm_oss.c
4811 @@ -455,7 +455,6 @@ static int snd_pcm_hw_param_near(struct snd_pcm_substream *pcm,
4812 v = snd_pcm_hw_param_last(pcm, params, var, dir);
4813 else
4814 v = snd_pcm_hw_param_first(pcm, params, var, dir);
4815 - snd_BUG_ON(v < 0);
4816 return v;
4817 }
4818
4819 @@ -1335,8 +1334,11 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha
4820
4821 if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
4822 return tmp;
4823 - mutex_lock(&runtime->oss.params_lock);
4824 while (bytes > 0) {
4825 + if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
4826 + tmp = -ERESTARTSYS;
4827 + break;
4828 + }
4829 if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
4830 tmp = bytes;
4831 if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes)
4832 @@ -1380,14 +1382,18 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha
4833 xfer += tmp;
4834 if ((substream->f_flags & O_NONBLOCK) != 0 &&
4835 tmp != runtime->oss.period_bytes)
4836 - break;
4837 + tmp = -EAGAIN;
4838 }
4839 - }
4840 - mutex_unlock(&runtime->oss.params_lock);
4841 - return xfer;
4842 -
4843 err:
4844 - mutex_unlock(&runtime->oss.params_lock);
4845 + mutex_unlock(&runtime->oss.params_lock);
4846 + if (tmp < 0)
4847 + break;
4848 + if (signal_pending(current)) {
4849 + tmp = -ERESTARTSYS;
4850 + break;
4851 + }
4852 + tmp = 0;
4853 + }
4854 return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
4855 }
4856
4857 @@ -1435,8 +1441,11 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use
4858
4859 if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
4860 return tmp;
4861 - mutex_lock(&runtime->oss.params_lock);
4862 while (bytes > 0) {
4863 + if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
4864 + tmp = -ERESTARTSYS;
4865 + break;
4866 + }
4867 if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
4868 if (runtime->oss.buffer_used == 0) {
4869 tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1);
4870 @@ -1467,12 +1476,16 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use
4871 bytes -= tmp;
4872 xfer += tmp;
4873 }
4874 - }
4875 - mutex_unlock(&runtime->oss.params_lock);
4876 - return xfer;
4877 -
4878 err:
4879 - mutex_unlock(&runtime->oss.params_lock);
4880 + mutex_unlock(&runtime->oss.params_lock);
4881 + if (tmp < 0)
4882 + break;
4883 + if (signal_pending(current)) {
4884 + tmp = -ERESTARTSYS;
4885 + break;
4886 + }
4887 + tmp = 0;
4888 + }
4889 return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
4890 }
4891
4892 diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c
4893 index cadc93792868..85a56af104bd 100644
4894 --- a/sound/core/oss/pcm_plugin.c
4895 +++ b/sound/core/oss/pcm_plugin.c
4896 @@ -592,18 +592,26 @@ snd_pcm_sframes_t snd_pcm_plug_write_transfer(struct snd_pcm_substream *plug, st
4897 snd_pcm_sframes_t frames = size;
4898
4899 plugin = snd_pcm_plug_first(plug);
4900 - while (plugin && frames > 0) {
4901 + while (plugin) {
4902 + if (frames <= 0)
4903 + return frames;
4904 if ((next = plugin->next) != NULL) {
4905 snd_pcm_sframes_t frames1 = frames;
4906 - if (plugin->dst_frames)
4907 + if (plugin->dst_frames) {
4908 frames1 = plugin->dst_frames(plugin, frames);
4909 + if (frames1 <= 0)
4910 + return frames1;
4911 + }
4912 if ((err = next->client_channels(next, frames1, &dst_channels)) < 0) {
4913 return err;
4914 }
4915 if (err != frames1) {
4916 frames = err;
4917 - if (plugin->src_frames)
4918 + if (plugin->src_frames) {
4919 frames = plugin->src_frames(plugin, frames1);
4920 + if (frames <= 0)
4921 + return frames;
4922 + }
4923 }
4924 } else
4925 dst_channels = NULL;
4926 diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
4927 index 10e7ef7a8804..db7894bb028c 100644
4928 --- a/sound/core/pcm_lib.c
4929 +++ b/sound/core/pcm_lib.c
4930 @@ -1632,7 +1632,7 @@ int snd_pcm_hw_param_first(struct snd_pcm_substream *pcm,
4931 return changed;
4932 if (params->rmask) {
4933 int err = snd_pcm_hw_refine(pcm, params);
4934 - if (snd_BUG_ON(err < 0))
4935 + if (err < 0)
4936 return err;
4937 }
4938 return snd_pcm_hw_param_value(params, var, dir);
4939 @@ -1678,7 +1678,7 @@ int snd_pcm_hw_param_last(struct snd_pcm_substream *pcm,
4940 return changed;
4941 if (params->rmask) {
4942 int err = snd_pcm_hw_refine(pcm, params);
4943 - if (snd_BUG_ON(err < 0))
4944 + if (err < 0)
4945 return err;
4946 }
4947 return snd_pcm_hw_param_value(params, var, dir);
4948 diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
4949 index 2fec2feac387..499f75b18e09 100644
4950 --- a/sound/core/pcm_native.c
4951 +++ b/sound/core/pcm_native.c
4952 @@ -2582,7 +2582,7 @@ static snd_pcm_sframes_t forward_appl_ptr(struct snd_pcm_substream *substream,
4953 return ret < 0 ? ret : frames;
4954 }
4955
4956 -/* decrease the appl_ptr; returns the processed frames or a negative error */
4957 +/* decrease the appl_ptr; returns the processed frames or zero for error */
4958 static snd_pcm_sframes_t rewind_appl_ptr(struct snd_pcm_substream *substream,
4959 snd_pcm_uframes_t frames,
4960 snd_pcm_sframes_t avail)
4961 @@ -2599,7 +2599,12 @@ static snd_pcm_sframes_t rewind_appl_ptr(struct snd_pcm_substream *substream,
4962 if (appl_ptr < 0)
4963 appl_ptr += runtime->boundary;
4964 ret = pcm_lib_apply_appl_ptr(substream, appl_ptr);
4965 - return ret < 0 ? ret : frames;
4966 + /* NOTE: we return zero for errors because PulseAudio gets depressed
4967 + * upon receiving an error from rewind ioctl and stops processing
4968 + * any longer. Returning zero means that no rewind is done, so
4969 + * it's not absolutely wrong to answer like that.
4970 + */
4971 + return ret < 0 ? 0 : frames;
4972 }
4973
4974 static snd_pcm_sframes_t snd_pcm_playback_rewind(struct snd_pcm_substream *substream,
4975 diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c
4976 index 135adb17703c..386ee829c655 100644
4977 --- a/sound/drivers/aloop.c
4978 +++ b/sound/drivers/aloop.c
4979 @@ -39,6 +39,7 @@
4980 #include <sound/core.h>
4981 #include <sound/control.h>
4982 #include <sound/pcm.h>
4983 +#include <sound/pcm_params.h>
4984 #include <sound/info.h>
4985 #include <sound/initval.h>
4986
4987 @@ -305,19 +306,6 @@ static int loopback_trigger(struct snd_pcm_substream *substream, int cmd)
4988 return 0;
4989 }
4990
4991 -static void params_change_substream(struct loopback_pcm *dpcm,
4992 - struct snd_pcm_runtime *runtime)
4993 -{
4994 - struct snd_pcm_runtime *dst_runtime;
4995 -
4996 - if (dpcm == NULL || dpcm->substream == NULL)
4997 - return;
4998 - dst_runtime = dpcm->substream->runtime;
4999 - if (dst_runtime == NULL)
5000 - return;
5001 - dst_runtime->hw = dpcm->cable->hw;
5002 -}
5003 -
5004 static void params_change(struct snd_pcm_substream *substream)
5005 {
5006 struct snd_pcm_runtime *runtime = substream->runtime;
5007 @@ -329,10 +317,6 @@ static void params_change(struct snd_pcm_substream *substream)
5008 cable->hw.rate_max = runtime->rate;
5009 cable->hw.channels_min = runtime->channels;
5010 cable->hw.channels_max = runtime->channels;
5011 - params_change_substream(cable->streams[SNDRV_PCM_STREAM_PLAYBACK],
5012 - runtime);
5013 - params_change_substream(cable->streams[SNDRV_PCM_STREAM_CAPTURE],
5014 - runtime);
5015 }
5016
5017 static int loopback_prepare(struct snd_pcm_substream *substream)
5018 @@ -620,26 +604,29 @@ static unsigned int get_cable_index(struct snd_pcm_substream *substream)
5019 static int rule_format(struct snd_pcm_hw_params *params,
5020 struct snd_pcm_hw_rule *rule)
5021 {
5022 + struct loopback_pcm *dpcm = rule->private;
5023 + struct loopback_cable *cable = dpcm->cable;
5024 + struct snd_mask m;
5025
5026 - struct snd_pcm_hardware *hw = rule->private;
5027 - struct snd_mask *maskp = hw_param_mask(params, rule->var);
5028 -
5029 - maskp->bits[0] &= (u_int32_t)hw->formats;
5030 - maskp->bits[1] &= (u_int32_t)(hw->formats >> 32);
5031 - memset(maskp->bits + 2, 0, (SNDRV_MASK_MAX-64) / 8); /* clear rest */
5032 - if (! maskp->bits[0] && ! maskp->bits[1])
5033 - return -EINVAL;
5034 - return 0;
5035 + snd_mask_none(&m);
5036 + mutex_lock(&dpcm->loopback->cable_lock);
5037 + m.bits[0] = (u_int32_t)cable->hw.formats;
5038 + m.bits[1] = (u_int32_t)(cable->hw.formats >> 32);
5039 + mutex_unlock(&dpcm->loopback->cable_lock);
5040 + return snd_mask_refine(hw_param_mask(params, rule->var), &m);
5041 }
5042
5043 static int rule_rate(struct snd_pcm_hw_params *params,
5044 struct snd_pcm_hw_rule *rule)
5045 {
5046 - struct snd_pcm_hardware *hw = rule->private;
5047 + struct loopback_pcm *dpcm = rule->private;
5048 + struct loopback_cable *cable = dpcm->cable;
5049 struct snd_interval t;
5050
5051 - t.min = hw->rate_min;
5052 - t.max = hw->rate_max;
5053 + mutex_lock(&dpcm->loopback->cable_lock);
5054 + t.min = cable->hw.rate_min;
5055 + t.max = cable->hw.rate_max;
5056 + mutex_unlock(&dpcm->loopback->cable_lock);
5057 t.openmin = t.openmax = 0;
5058 t.integer = 0;
5059 return snd_interval_refine(hw_param_interval(params, rule->var), &t);
5060 @@ -648,22 +635,44 @@ static int rule_rate(struct snd_pcm_hw_params *params,
5061 static int rule_channels(struct snd_pcm_hw_params *params,
5062 struct snd_pcm_hw_rule *rule)
5063 {
5064 - struct snd_pcm_hardware *hw = rule->private;
5065 + struct loopback_pcm *dpcm = rule->private;
5066 + struct loopback_cable *cable = dpcm->cable;
5067 struct snd_interval t;
5068
5069 - t.min = hw->channels_min;
5070 - t.max = hw->channels_max;
5071 + mutex_lock(&dpcm->loopback->cable_lock);
5072 + t.min = cable->hw.channels_min;
5073 + t.max = cable->hw.channels_max;
5074 + mutex_unlock(&dpcm->loopback->cable_lock);
5075 t.openmin = t.openmax = 0;
5076 t.integer = 0;
5077 return snd_interval_refine(hw_param_interval(params, rule->var), &t);
5078 }
5079
5080 +static void free_cable(struct snd_pcm_substream *substream)
5081 +{
5082 + struct loopback *loopback = substream->private_data;
5083 + int dev = get_cable_index(substream);
5084 + struct loopback_cable *cable;
5085 +
5086 + cable = loopback->cables[substream->number][dev];
5087 + if (!cable)
5088 + return;
5089 + if (cable->streams[!substream->stream]) {
5090 + /* other stream is still alive */
5091 + cable->streams[substream->stream] = NULL;
5092 + } else {
5093 + /* free the cable */
5094 + loopback->cables[substream->number][dev] = NULL;
5095 + kfree(cable);
5096 + }
5097 +}
5098 +
5099 static int loopback_open(struct snd_pcm_substream *substream)
5100 {
5101 struct snd_pcm_runtime *runtime = substream->runtime;
5102 struct loopback *loopback = substream->private_data;
5103 struct loopback_pcm *dpcm;
5104 - struct loopback_cable *cable;
5105 + struct loopback_cable *cable = NULL;
5106 int err = 0;
5107 int dev = get_cable_index(substream);
5108
5109 @@ -682,7 +691,6 @@ static int loopback_open(struct snd_pcm_substream *substream)
5110 if (!cable) {
5111 cable = kzalloc(sizeof(*cable), GFP_KERNEL);
5112 if (!cable) {
5113 - kfree(dpcm);
5114 err = -ENOMEM;
5115 goto unlock;
5116 }
5117 @@ -700,19 +708,19 @@ static int loopback_open(struct snd_pcm_substream *substream)
5118 /* are cached -> they do not reflect the actual state */
5119 err = snd_pcm_hw_rule_add(runtime, 0,
5120 SNDRV_PCM_HW_PARAM_FORMAT,
5121 - rule_format, &runtime->hw,
5122 + rule_format, dpcm,
5123 SNDRV_PCM_HW_PARAM_FORMAT, -1);
5124 if (err < 0)
5125 goto unlock;
5126 err = snd_pcm_hw_rule_add(runtime, 0,
5127 SNDRV_PCM_HW_PARAM_RATE,
5128 - rule_rate, &runtime->hw,
5129 + rule_rate, dpcm,
5130 SNDRV_PCM_HW_PARAM_RATE, -1);
5131 if (err < 0)
5132 goto unlock;
5133 err = snd_pcm_hw_rule_add(runtime, 0,
5134 SNDRV_PCM_HW_PARAM_CHANNELS,
5135 - rule_channels, &runtime->hw,
5136 + rule_channels, dpcm,
5137 SNDRV_PCM_HW_PARAM_CHANNELS, -1);
5138 if (err < 0)
5139 goto unlock;
5140 @@ -724,6 +732,10 @@ static int loopback_open(struct snd_pcm_substream *substream)
5141 else
5142 runtime->hw = cable->hw;
5143 unlock:
5144 + if (err < 0) {
5145 + free_cable(substream);
5146 + kfree(dpcm);
5147 + }
5148 mutex_unlock(&loopback->cable_lock);
5149 return err;
5150 }
5151 @@ -732,20 +744,10 @@ static int loopback_close(struct snd_pcm_substream *substream)
5152 {
5153 struct loopback *loopback = substream->private_data;
5154 struct loopback_pcm *dpcm = substream->runtime->private_data;
5155 - struct loopback_cable *cable;
5156 - int dev = get_cable_index(substream);
5157
5158 loopback_timer_stop(dpcm);
5159 mutex_lock(&loopback->cable_lock);
5160 - cable = loopback->cables[substream->number][dev];
5161 - if (cable->streams[!substream->stream]) {
5162 - /* other stream is still alive */
5163 - cable->streams[substream->stream] = NULL;
5164 - } else {
5165 - /* free the cable */
5166 - loopback->cables[substream->number][dev] = NULL;
5167 - kfree(cable);
5168 - }
5169 + free_cable(substream);
5170 mutex_unlock(&loopback->cable_lock);
5171 return 0;
5172 }
5173 diff --git a/tools/objtool/check.c b/tools/objtool/check.c
5174 index 9b341584eb1b..f40d46e24bcc 100644
5175 --- a/tools/objtool/check.c
5176 +++ b/tools/objtool/check.c
5177 @@ -427,6 +427,40 @@ static void add_ignores(struct objtool_file *file)
5178 }
5179 }
5180
5181 +/*
5182 + * FIXME: For now, just ignore any alternatives which add retpolines. This is
5183 + * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline.
5184 + * But it at least allows objtool to understand the control flow *around* the
5185 + * retpoline.
5186 + */
5187 +static int add_nospec_ignores(struct objtool_file *file)
5188 +{
5189 + struct section *sec;
5190 + struct rela *rela;
5191 + struct instruction *insn;
5192 +
5193 + sec = find_section_by_name(file->elf, ".rela.discard.nospec");
5194 + if (!sec)
5195 + return 0;
5196 +
5197 + list_for_each_entry(rela, &sec->rela_list, list) {
5198 + if (rela->sym->type != STT_SECTION) {
5199 + WARN("unexpected relocation symbol type in %s", sec->name);
5200 + return -1;
5201 + }
5202 +
5203 + insn = find_insn(file, rela->sym->sec, rela->addend);
5204 + if (!insn) {
5205 + WARN("bad .discard.nospec entry");
5206 + return -1;
5207 + }
5208 +
5209 + insn->ignore_alts = true;
5210 + }
5211 +
5212 + return 0;
5213 +}
5214 +
5215 /*
5216 * Find the destination instructions for all jumps.
5217 */
5218 @@ -456,6 +490,13 @@ static int add_jump_destinations(struct objtool_file *file)
5219 } else if (rela->sym->sec->idx) {
5220 dest_sec = rela->sym->sec;
5221 dest_off = rela->sym->sym.st_value + rela->addend + 4;
5222 + } else if (strstr(rela->sym->name, "_indirect_thunk_")) {
5223 + /*
5224 + * Retpoline jumps are really dynamic jumps in
5225 + * disguise, so convert them accordingly.
5226 + */
5227 + insn->type = INSN_JUMP_DYNAMIC;
5228 + continue;
5229 } else {
5230 /* sibling call */
5231 insn->jump_dest = 0;
5232 @@ -502,11 +543,18 @@ static int add_call_destinations(struct objtool_file *file)
5233 dest_off = insn->offset + insn->len + insn->immediate;
5234 insn->call_dest = find_symbol_by_offset(insn->sec,
5235 dest_off);
5236 + /*
5237 + * FIXME: Thanks to retpolines, it's now considered
5238 + * normal for a function to call within itself. So
5239 + * disable this warning for now.
5240 + */
5241 +#if 0
5242 if (!insn->call_dest) {
5243 WARN_FUNC("can't find call dest symbol at offset 0x%lx",
5244 insn->sec, insn->offset, dest_off);
5245 return -1;
5246 }
5247 +#endif
5248 } else if (rela->sym->type == STT_SECTION) {
5249 insn->call_dest = find_symbol_by_offset(rela->sym->sec,
5250 rela->addend+4);
5251 @@ -671,12 +719,6 @@ static int add_special_section_alts(struct objtool_file *file)
5252 return ret;
5253
5254 list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
5255 - alt = malloc(sizeof(*alt));
5256 - if (!alt) {
5257 - WARN("malloc failed");
5258 - ret = -1;
5259 - goto out;
5260 - }
5261
5262 orig_insn = find_insn(file, special_alt->orig_sec,
5263 special_alt->orig_off);
5264 @@ -687,6 +729,10 @@ static int add_special_section_alts(struct objtool_file *file)
5265 goto out;
5266 }
5267
5268 + /* Ignore retpoline alternatives. */
5269 + if (orig_insn->ignore_alts)
5270 + continue;
5271 +
5272 new_insn = NULL;
5273 if (!special_alt->group || special_alt->new_len) {
5274 new_insn = find_insn(file, special_alt->new_sec,
5275 @@ -712,6 +758,13 @@ static int add_special_section_alts(struct objtool_file *file)
5276 goto out;
5277 }
5278
5279 + alt = malloc(sizeof(*alt));
5280 + if (!alt) {
5281 + WARN("malloc failed");
5282 + ret = -1;
5283 + goto out;
5284 + }
5285 +
5286 alt->insn = new_insn;
5287 list_add_tail(&alt->list, &orig_insn->alts);
5288
5289 @@ -1028,6 +1081,10 @@ static int decode_sections(struct objtool_file *file)
5290
5291 add_ignores(file);
5292
5293 + ret = add_nospec_ignores(file);
5294 + if (ret)
5295 + return ret;
5296 +
5297 ret = add_jump_destinations(file);
5298 if (ret)
5299 return ret;
5300 diff --git a/tools/objtool/check.h b/tools/objtool/check.h
5301 index 47d9ea70a83d..dbadb304a410 100644
5302 --- a/tools/objtool/check.h
5303 +++ b/tools/objtool/check.h
5304 @@ -44,7 +44,7 @@ struct instruction {
5305 unsigned int len;
5306 unsigned char type;
5307 unsigned long immediate;
5308 - bool alt_group, visited, dead_end, ignore, hint, save, restore;
5309 + bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
5310 struct symbol *call_dest;
5311 struct instruction *jump_dest;
5312 struct list_head alts;
5313 diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
5314 index 7a2d221c4702..1241487de93f 100644
5315 --- a/tools/testing/selftests/bpf/test_verifier.c
5316 +++ b/tools/testing/selftests/bpf/test_verifier.c
5317 @@ -272,6 +272,46 @@ static struct bpf_test tests[] = {
5318 .errstr = "invalid bpf_ld_imm64 insn",
5319 .result = REJECT,
5320 },
5321 + {
5322 + "arsh32 on imm",
5323 + .insns = {
5324 + BPF_MOV64_IMM(BPF_REG_0, 1),
5325 + BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 5),
5326 + BPF_EXIT_INSN(),
5327 + },
5328 + .result = REJECT,
5329 + .errstr = "BPF_ARSH not supported for 32 bit ALU",
5330 + },
5331 + {
5332 + "arsh32 on reg",
5333 + .insns = {
5334 + BPF_MOV64_IMM(BPF_REG_0, 1),
5335 + BPF_MOV64_IMM(BPF_REG_1, 5),
5336 + BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1),
5337 + BPF_EXIT_INSN(),
5338 + },
5339 + .result = REJECT,
5340 + .errstr = "BPF_ARSH not supported for 32 bit ALU",
5341 + },
5342 + {
5343 + "arsh64 on imm",
5344 + .insns = {
5345 + BPF_MOV64_IMM(BPF_REG_0, 1),
5346 + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_0, 5),
5347 + BPF_EXIT_INSN(),
5348 + },
5349 + .result = ACCEPT,
5350 + },
5351 + {
5352 + "arsh64 on reg",
5353 + .insns = {
5354 + BPF_MOV64_IMM(BPF_REG_0, 1),
5355 + BPF_MOV64_IMM(BPF_REG_1, 5),
5356 + BPF_ALU64_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1),
5357 + BPF_EXIT_INSN(),
5358 + },
5359 + .result = ACCEPT,
5360 + },
5361 {
5362 "no bpf_exit",
5363 .insns = {
5364 diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
5365 index 7b1adeee4b0f..91fbfa8fdc15 100644
5366 --- a/tools/testing/selftests/x86/Makefile
5367 +++ b/tools/testing/selftests/x86/Makefile
5368 @@ -7,7 +7,7 @@ include ../lib.mk
5369
5370 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
5371 check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \
5372 - protection_keys test_vdso
5373 + protection_keys test_vdso test_vsyscall
5374 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
5375 test_FCMOV test_FCOMI test_FISTTP \
5376 vdso_restorer
5377 diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
5378 new file mode 100644
5379 index 000000000000..6e0bd52ad53d
5380 --- /dev/null
5381 +++ b/tools/testing/selftests/x86/test_vsyscall.c
5382 @@ -0,0 +1,500 @@
5383 +/* SPDX-License-Identifier: GPL-2.0 */
5384 +
5385 +#define _GNU_SOURCE
5386 +
5387 +#include <stdio.h>
5388 +#include <sys/time.h>
5389 +#include <time.h>
5390 +#include <stdlib.h>
5391 +#include <sys/syscall.h>
5392 +#include <unistd.h>
5393 +#include <dlfcn.h>
5394 +#include <string.h>
5395 +#include <inttypes.h>
5396 +#include <signal.h>
5397 +#include <sys/ucontext.h>
5398 +#include <errno.h>
5399 +#include <err.h>
5400 +#include <sched.h>
5401 +#include <stdbool.h>
5402 +#include <setjmp.h>
5403 +
5404 +#ifdef __x86_64__
5405 +# define VSYS(x) (x)
5406 +#else
5407 +# define VSYS(x) 0
5408 +#endif
5409 +
5410 +#ifndef SYS_getcpu
5411 +# ifdef __x86_64__
5412 +# define SYS_getcpu 309
5413 +# else
5414 +# define SYS_getcpu 318
5415 +# endif
5416 +#endif
5417 +
5418 +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
5419 + int flags)
5420 +{
5421 + struct sigaction sa;
5422 + memset(&sa, 0, sizeof(sa));
5423 + sa.sa_sigaction = handler;
5424 + sa.sa_flags = SA_SIGINFO | flags;
5425 + sigemptyset(&sa.sa_mask);
5426 + if (sigaction(sig, &sa, 0))
5427 + err(1, "sigaction");
5428 +}
5429 +
5430 +/* vsyscalls and vDSO */
5431 +bool should_read_vsyscall = false;
5432 +
5433 +typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
5434 +gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
5435 +gtod_t vdso_gtod;
5436 +
5437 +typedef int (*vgettime_t)(clockid_t, struct timespec *);
5438 +vgettime_t vdso_gettime;
5439 +
5440 +typedef long (*time_func_t)(time_t *t);
5441 +time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
5442 +time_func_t vdso_time;
5443 +
5444 +typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
5445 +getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
5446 +getcpu_t vdso_getcpu;
5447 +
5448 +static void init_vdso(void)
5449 +{
5450 + void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
5451 + if (!vdso)
5452 + vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
5453 + if (!vdso) {
5454 + printf("[WARN]\tfailed to find vDSO\n");
5455 + return;
5456 + }
5457 +
5458 + vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday");
5459 + if (!vdso_gtod)
5460 + printf("[WARN]\tfailed to find gettimeofday in vDSO\n");
5461 +
5462 + vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
5463 + if (!vdso_gettime)
5464 + printf("[WARN]\tfailed to find clock_gettime in vDSO\n");
5465 +
5466 + vdso_time = (time_func_t)dlsym(vdso, "__vdso_time");
5467 + if (!vdso_time)
5468 + printf("[WARN]\tfailed to find time in vDSO\n");
5469 +
5470 + vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
5471 + if (!vdso_getcpu) {
5472 + /* getcpu() was never wired up in the 32-bit vDSO. */
5473 + printf("[%s]\tfailed to find getcpu in vDSO\n",
5474 + sizeof(long) == 8 ? "WARN" : "NOTE");
5475 + }
5476 +}
5477 +
5478 +static int init_vsys(void)
5479 +{
5480 +#ifdef __x86_64__
5481 + int nerrs = 0;
5482 + FILE *maps;
5483 + char line[128];
5484 + bool found = false;
5485 +
5486 + maps = fopen("/proc/self/maps", "r");
5487 + if (!maps) {
5488 + printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
5489 + should_read_vsyscall = true;
5490 + return 0;
5491 + }
5492 +
5493 + while (fgets(line, sizeof(line), maps)) {
5494 + char r, x;
5495 + void *start, *end;
5496 + char name[128];
5497 + if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
5498 + &start, &end, &r, &x, name) != 5)
5499 + continue;
5500 +
5501 + if (strcmp(name, "[vsyscall]"))
5502 + continue;
5503 +
5504 + printf("\tvsyscall map: %s", line);
5505 +
5506 + if (start != (void *)0xffffffffff600000 ||
5507 + end != (void *)0xffffffffff601000) {
5508 + printf("[FAIL]\taddress range is nonsense\n");
5509 + nerrs++;
5510 + }
5511 +
5512 + printf("\tvsyscall permissions are %c-%c\n", r, x);
5513 + should_read_vsyscall = (r == 'r');
5514 + if (x != 'x') {
5515 + vgtod = NULL;
5516 + vtime = NULL;
5517 + vgetcpu = NULL;
5518 + }
5519 +
5520 + found = true;
5521 + break;
5522 + }
5523 +
5524 + fclose(maps);
5525 +
5526 + if (!found) {
5527 + printf("\tno vsyscall map in /proc/self/maps\n");
5528 + should_read_vsyscall = false;
5529 + vgtod = NULL;
5530 + vtime = NULL;
5531 + vgetcpu = NULL;
5532 + }
5533 +
5534 + return nerrs;
5535 +#else
5536 + return 0;
5537 +#endif
5538 +}
5539 +
5540 +/* syscalls */
5541 +static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
5542 +{
5543 + return syscall(SYS_gettimeofday, tv, tz);
5544 +}
5545 +
5546 +static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
5547 +{
5548 + return syscall(SYS_clock_gettime, id, ts);
5549 +}
5550 +
5551 +static inline long sys_time(time_t *t)
5552 +{
5553 + return syscall(SYS_time, t);
5554 +}
5555 +
5556 +static inline long sys_getcpu(unsigned * cpu, unsigned * node,
5557 + void* cache)
5558 +{
5559 + return syscall(SYS_getcpu, cpu, node, cache);
5560 +}
5561 +
5562 +static jmp_buf jmpbuf;
5563 +
5564 +static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
5565 +{
5566 + siglongjmp(jmpbuf, 1);
5567 +}
5568 +
5569 +static double tv_diff(const struct timeval *a, const struct timeval *b)
5570 +{
5571 + return (double)(a->tv_sec - b->tv_sec) +
5572 + (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6;
5573 +}
5574 +
5575 +static int check_gtod(const struct timeval *tv_sys1,
5576 + const struct timeval *tv_sys2,
5577 + const struct timezone *tz_sys,
5578 + const char *which,
5579 + const struct timeval *tv_other,
5580 + const struct timezone *tz_other)
5581 +{
5582 + int nerrs = 0;
5583 + double d1, d2;
5584 +
5585 + if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) {
5586 + printf("[FAIL] %s tz mismatch\n", which);
5587 + nerrs++;
5588 + }
5589 +
5590 + d1 = tv_diff(tv_other, tv_sys1);
5591 + d2 = tv_diff(tv_sys2, tv_other);
5592 + printf("\t%s time offsets: %lf %lf\n", which, d1, d2);
5593 +
5594 + if (d1 < 0 || d2 < 0) {
5595 + printf("[FAIL]\t%s time was inconsistent with the syscall\n", which);
5596 + nerrs++;
5597 + } else {
5598 + printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which);
5599 + }
5600 +
5601 + return nerrs;
5602 +}
5603 +
5604 +static int test_gtod(void)
5605 +{
5606 + struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys;
5607 + struct timezone tz_sys, tz_vdso, tz_vsys;
5608 + long ret_vdso = -1;
5609 + long ret_vsys = -1;
5610 + int nerrs = 0;
5611 +
5612 + printf("[RUN]\ttest gettimeofday()\n");
5613 +
5614 + if (sys_gtod(&tv_sys1, &tz_sys) != 0)
5615 + err(1, "syscall gettimeofday");
5616 + if (vdso_gtod)
5617 + ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
5618 + if (vgtod)
5619 + ret_vsys = vgtod(&tv_vsys, &tz_vsys);
5620 + if (sys_gtod(&tv_sys2, &tz_sys) != 0)
5621 + err(1, "syscall gettimeofday");
5622 +
5623 + if (vdso_gtod) {
5624 + if (ret_vdso == 0) {
5625 + nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
5626 + } else {
5627 + printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso);
5628 + nerrs++;
5629 + }
5630 + }
5631 +
5632 + if (vgtod) {
5633 + if (ret_vsys == 0) {
5634 + nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
5635 + } else {
5636 + printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys);
5637 + nerrs++;
5638 + }
5639 + }
5640 +
5641 + return nerrs;
5642 +}
5643 +
5644 +static int test_time(void) {
5645 + int nerrs = 0;
5646 +
5647 + printf("[RUN]\ttest time()\n");
5648 + long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0;
5649 + long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1;
5650 + t_sys1 = sys_time(&t2_sys1);
5651 + if (vdso_time)
5652 + t_vdso = vdso_time(&t2_vdso);
5653 + if (vtime)
5654 + t_vsys = vtime(&t2_vsys);
5655 + t_sys2 = sys_time(&t2_sys2);
5656 + if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
5657 + printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2);
5658 + nerrs++;
5659 + return nerrs;
5660 + }
5661 +
5662 + if (vdso_time) {
5663 + if (t_vdso < 0 || t_vdso != t2_vdso) {
5664 + printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
5665 + nerrs++;
5666 + } else if (t_vdso < t_sys1 || t_vdso > t_sys2) {
5667 + printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2);
5668 + nerrs++;
5669 + } else {
5670 + printf("[OK]\tvDSO time() is okay\n");
5671 + }
5672 + }
5673 +
5674 + if (vtime) {
5675 + if (t_vsys < 0 || t_vsys != t2_vsys) {
5676 + printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
5677 + nerrs++;
5678 + } else if (t_vsys < t_sys1 || t_vsys > t_sys2) {
5679 + printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2);
5680 + nerrs++;
5681 + } else {
5682 + printf("[OK]\tvsyscall time() is okay\n");
5683 + }
5684 + }
5685 +
5686 + return nerrs;
5687 +}
5688 +
5689 +static int test_getcpu(int cpu)
5690 +{
5691 + int nerrs = 0;
5692 + long ret_sys, ret_vdso = -1, ret_vsys = -1;
5693 +
5694 + printf("[RUN]\tgetcpu() on CPU %d\n", cpu);
5695 +
5696 + cpu_set_t cpuset;
5697 + CPU_ZERO(&cpuset);
5698 + CPU_SET(cpu, &cpuset);
5699 + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
5700 + printf("[SKIP]\tfailed to force CPU %d\n", cpu);
5701 + return nerrs;
5702 + }
5703 +
5704 + unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
5705 + unsigned node = 0;
5706 + bool have_node = false;
5707 + ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
5708 + if (vdso_getcpu)
5709 + ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
5710 + if (vgetcpu)
5711 + ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
5712 +
5713 + if (ret_sys == 0) {
5714 + if (cpu_sys != cpu) {
5715 + printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu);
5716 + nerrs++;
5717 + }
5718 +
5719 + have_node = true;
5720 + node = node_sys;
5721 + }
5722 +
5723 + if (vdso_getcpu) {
5724 + if (ret_vdso) {
5725 + printf("[FAIL]\tvDSO getcpu() failed\n");
5726 + nerrs++;
5727 + } else {
5728 + if (!have_node) {
5729 + have_node = true;
5730 + node = node_vdso;
5731 + }
5732 +
5733 + if (cpu_vdso != cpu) {
5734 + printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu);
5735 + nerrs++;
5736 + } else {
5737 + printf("[OK]\tvDSO reported correct CPU\n");
5738 + }
5739 +
5740 + if (node_vdso != node) {
5741 + printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node);
5742 + nerrs++;
5743 + } else {
5744 + printf("[OK]\tvDSO reported correct node\n");
5745 + }
5746 + }
5747 + }
5748 +
5749 + if (vgetcpu) {
5750 + if (ret_vsys) {
5751 + printf("[FAIL]\tvsyscall getcpu() failed\n");
5752 + nerrs++;
5753 + } else {
5754 + if (!have_node) {
5755 + have_node = true;
5756 + node = node_vsys;
5757 + }
5758 +
5759 + if (cpu_vsys != cpu) {
5760 + printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu);
5761 + nerrs++;
5762 + } else {
5763 + printf("[OK]\tvsyscall reported correct CPU\n");
5764 + }
5765 +
5766 + if (node_vsys != node) {
5767 + printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node);
5768 + nerrs++;
5769 + } else {
5770 + printf("[OK]\tvsyscall reported correct node\n");
5771 + }
5772 + }
5773 + }
5774 +
5775 + return nerrs;
5776 +}
5777 +
5778 +static int test_vsys_r(void)
5779 +{
5780 +#ifdef __x86_64__
5781 + printf("[RUN]\tChecking read access to the vsyscall page\n");
5782 + bool can_read;
5783 + if (sigsetjmp(jmpbuf, 1) == 0) {
5784 + *(volatile int *)0xffffffffff600000;
5785 + can_read = true;
5786 + } else {
5787 + can_read = false;
5788 + }
5789 +
5790 + if (can_read && !should_read_vsyscall) {
5791 + printf("[FAIL]\tWe have read access, but we shouldn't\n");
5792 + return 1;
5793 + } else if (!can_read && should_read_vsyscall) {
5794 + printf("[FAIL]\tWe don't have read access, but we should\n");
5795 + return 1;
5796 + } else {
5797 + printf("[OK]\tgot expected result\n");
5798 + }
5799 +#endif
5800 +
5801 + return 0;
5802 +}
5803 +
5804 +
5805 +#ifdef __x86_64__
5806 +#define X86_EFLAGS_TF (1UL << 8)
5807 +static volatile sig_atomic_t num_vsyscall_traps;
5808 +
5809 +static unsigned long get_eflags(void)
5810 +{
5811 + unsigned long eflags;
5812 + asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
5813 + return eflags;
5814 +}
5815 +
5816 +static void set_eflags(unsigned long eflags)
5817 +{
5818 + asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
5819 +}
5820 +
5821 +static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
5822 +{
5823 + ucontext_t *ctx = (ucontext_t *)ctx_void;
5824 + unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP];
5825 +
5826 + if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0)
5827 + num_vsyscall_traps++;
5828 +}
5829 +
5830 +static int test_native_vsyscall(void)
5831 +{
5832 + time_t tmp;
5833 + bool is_native;
5834 +
5835 + if (!vtime)
5836 + return 0;
5837 +
5838 + printf("[RUN]\tchecking for native vsyscall\n");
5839 + sethandler(SIGTRAP, sigtrap, 0);
5840 + set_eflags(get_eflags() | X86_EFLAGS_TF);
5841 + vtime(&tmp);
5842 + set_eflags(get_eflags() & ~X86_EFLAGS_TF);
5843 +
5844 + /*
5845 + * If vsyscalls are emulated, we expect a single trap in the
5846 + * vsyscall page -- the call instruction will trap with RIP
5847 + * pointing to the entry point before emulation takes over.
5848 + * In native mode, we expect two traps, since whatever code
5849 + * the vsyscall page contains will be more than just a ret
5850 + * instruction.
5851 + */
5852 + is_native = (num_vsyscall_traps > 1);
5853 +
5854 + printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n",
5855 + (is_native ? "native" : "emulated"),
5856 + (int)num_vsyscall_traps);
5857 +
5858 + return 0;
5859 +}
5860 +#endif
5861 +
5862 +int main(int argc, char **argv)
5863 +{
5864 + int nerrs = 0;
5865 +
5866 + init_vdso();
5867 + nerrs += init_vsys();
5868 +
5869 + nerrs += test_gtod();
5870 + nerrs += test_time();
5871 + nerrs += test_getcpu(0);
5872 + nerrs += test_getcpu(1);
5873 +
5874 + sethandler(SIGSEGV, sigsegv, 0);
5875 + nerrs += test_vsys_r();
5876 +
5877 +#ifdef __x86_64__
5878 + nerrs += test_native_vsyscall();
5879 +#endif
5880 +
5881 + return nerrs ? 1 : 0;
5882 +}
5883 diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c
5884 index b6e715fd3c90..dac7ceb1a677 100644
5885 --- a/virt/kvm/arm/mmio.c
5886 +++ b/virt/kvm/arm/mmio.c
5887 @@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
5888 }
5889
5890 trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
5891 - data);
5892 + &data);
5893 data = vcpu_data_host_to_guest(vcpu, data, len);
5894 vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
5895 }
5896 @@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
5897 data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
5898 len);
5899
5900 - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
5901 + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data);
5902 kvm_mmio_write_buf(data_buf, len, data);
5903
5904 ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
5905 data_buf);
5906 } else {
5907 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
5908 - fault_ipa, 0);
5909 + fault_ipa, NULL);
5910
5911 ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
5912 data_buf);