Magellan Linux

Contents of /trunk/kernel-alx/patches-4.9/0314-4.9.215-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3589 - (show annotations) (download)
Thu Aug 13 10:21:31 2020 UTC (3 years, 8 months ago) by niro
File size: 401597 byte(s)
linux-215
1 diff --git a/Makefile b/Makefile
2 index 9a6aa41a9ec1..b594484788a8 100644
3 --- a/Makefile
4 +++ b/Makefile
5 @@ -1,6 +1,6 @@
6 VERSION = 4
7 PATCHLEVEL = 9
8 -SUBLEVEL = 214
9 +SUBLEVEL = 215
10 EXTRAVERSION =
11 NAME = Roaring Lionus
12
13 diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
14 index 74a70f91b01a..56bd9beb6a35 100644
15 --- a/arch/arm/Kconfig
16 +++ b/arch/arm/Kconfig
17 @@ -2020,7 +2020,7 @@ config XIP_PHYS_ADDR
18 config KEXEC
19 bool "Kexec system call (EXPERIMENTAL)"
20 depends on (!SMP || PM_SLEEP_SMP)
21 - depends on !CPU_V7M
22 + depends on MMU
23 select KEXEC_CORE
24 help
25 kexec is a system call that implements the ability to shutdown your
26 diff --git a/arch/arm/boot/dts/r8a7779.dtsi b/arch/arm/boot/dts/r8a7779.dtsi
27 index b9bbcce69dfb..6c6d4893e92d 100644
28 --- a/arch/arm/boot/dts/r8a7779.dtsi
29 +++ b/arch/arm/boot/dts/r8a7779.dtsi
30 @@ -67,6 +67,14 @@
31 <0xf0000100 0x100>;
32 };
33
34 + timer@f0000200 {
35 + compatible = "arm,cortex-a9-global-timer";
36 + reg = <0xf0000200 0x100>;
37 + interrupts = <GIC_PPI 11
38 + (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>;
39 + clocks = <&cpg_clocks R8A7779_CLK_ZS>;
40 + };
41 +
42 timer@f0000600 {
43 compatible = "arm,cortex-a9-twd-timer";
44 reg = <0xf0000600 0x20>;
45 diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
46 index 7e842dcae450..3626655175a2 100644
47 --- a/arch/arm64/include/asm/alternative.h
48 +++ b/arch/arm64/include/asm/alternative.h
49 @@ -29,13 +29,16 @@ typedef void (*alternative_cb_t)(struct alt_instr *alt,
50 void __init apply_alternatives_all(void);
51 void apply_alternatives(void *start, size_t length);
52
53 -#define ALTINSTR_ENTRY(feature,cb) \
54 +#define ALTINSTR_ENTRY(feature) \
55 " .word 661b - .\n" /* label */ \
56 - " .if " __stringify(cb) " == 0\n" \
57 " .word 663f - .\n" /* new instruction */ \
58 - " .else\n" \
59 + " .hword " __stringify(feature) "\n" /* feature bit */ \
60 + " .byte 662b-661b\n" /* source len */ \
61 + " .byte 664f-663f\n" /* replacement len */
62 +
63 +#define ALTINSTR_ENTRY_CB(feature, cb) \
64 + " .word 661b - .\n" /* label */ \
65 " .word " __stringify(cb) "- .\n" /* callback */ \
66 - " .endif\n" \
67 " .hword " __stringify(feature) "\n" /* feature bit */ \
68 " .byte 662b-661b\n" /* source len */ \
69 " .byte 664f-663f\n" /* replacement len */
70 @@ -56,15 +59,14 @@ void apply_alternatives(void *start, size_t length);
71 *
72 * Alternatives with callbacks do not generate replacement instructions.
73 */
74 -#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \
75 +#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \
76 ".if "__stringify(cfg_enabled)" == 1\n" \
77 "661:\n\t" \
78 oldinstr "\n" \
79 "662:\n" \
80 ".pushsection .altinstructions,\"a\"\n" \
81 - ALTINSTR_ENTRY(feature,cb) \
82 + ALTINSTR_ENTRY(feature) \
83 ".popsection\n" \
84 - " .if " __stringify(cb) " == 0\n" \
85 ".pushsection .altinstr_replacement, \"a\"\n" \
86 "663:\n\t" \
87 newinstr "\n" \
88 @@ -72,17 +74,25 @@ void apply_alternatives(void *start, size_t length);
89 ".popsection\n\t" \
90 ".org . - (664b-663b) + (662b-661b)\n\t" \
91 ".org . - (662b-661b) + (664b-663b)\n" \
92 - ".else\n\t" \
93 + ".endif\n"
94 +
95 +#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \
96 + ".if "__stringify(cfg_enabled)" == 1\n" \
97 + "661:\n\t" \
98 + oldinstr "\n" \
99 + "662:\n" \
100 + ".pushsection .altinstructions,\"a\"\n" \
101 + ALTINSTR_ENTRY_CB(feature, cb) \
102 + ".popsection\n" \
103 "663:\n\t" \
104 "664:\n\t" \
105 - ".endif\n" \
106 ".endif\n"
107
108 #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \
109 - __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0)
110 + __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
111
112 #define ALTERNATIVE_CB(oldinstr, cb) \
113 - __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb)
114 + __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb)
115 #else
116
117 #include <asm/assembler.h>
118 diff --git a/arch/microblaze/kernel/cpu/cache.c b/arch/microblaze/kernel/cpu/cache.c
119 index 0bde47e4fa69..dcba53803fa5 100644
120 --- a/arch/microblaze/kernel/cpu/cache.c
121 +++ b/arch/microblaze/kernel/cpu/cache.c
122 @@ -92,7 +92,8 @@ static inline void __disable_dcache_nomsr(void)
123 #define CACHE_LOOP_LIMITS(start, end, cache_line_length, cache_size) \
124 do { \
125 int align = ~(cache_line_length - 1); \
126 - end = min(start + cache_size, end); \
127 + if (start < UINT_MAX - cache_size) \
128 + end = min(start + cache_size, end); \
129 start &= align; \
130 } while (0)
131
132 diff --git a/arch/mips/loongson64/loongson-3/platform.c b/arch/mips/loongson64/loongson-3/platform.c
133 index 25a97cc0ee33..0db4cc3196eb 100644
134 --- a/arch/mips/loongson64/loongson-3/platform.c
135 +++ b/arch/mips/loongson64/loongson-3/platform.c
136 @@ -31,6 +31,9 @@ static int __init loongson3_platform_init(void)
137 continue;
138
139 pdev = kzalloc(sizeof(struct platform_device), GFP_KERNEL);
140 + if (!pdev)
141 + return -ENOMEM;
142 +
143 pdev->name = loongson_sysconf.sensors[i].name;
144 pdev->id = loongson_sysconf.sensors[i].id;
145 pdev->dev.platform_data = &loongson_sysconf.sensors[i];
146 diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
147 index 620e08d4eb6e..adac3dee4c57 100644
148 --- a/arch/powerpc/kernel/eeh_driver.c
149 +++ b/arch/powerpc/kernel/eeh_driver.c
150 @@ -520,12 +520,6 @@ static void *eeh_rmv_device(void *data, void *userdata)
151
152 pci_iov_remove_virtfn(edev->physfn, pdn->vf_index, 0);
153 edev->pdev = NULL;
154 -
155 - /*
156 - * We have to set the VF PE number to invalid one, which is
157 - * required to plug the VF successfully.
158 - */
159 - pdn->pe_number = IODA_INVALID_PE;
160 #endif
161 if (rmv_data)
162 list_add(&edev->rmv_list, &rmv_data->edev_list);
163 diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
164 index 592693437070..c8f1b78fbd0e 100644
165 --- a/arch/powerpc/kernel/pci_dn.c
166 +++ b/arch/powerpc/kernel/pci_dn.c
167 @@ -271,9 +271,22 @@ void remove_dev_pci_data(struct pci_dev *pdev)
168 continue;
169
170 #ifdef CONFIG_EEH
171 - /* Release EEH device for the VF */
172 + /*
173 + * Release EEH state for this VF. The PCI core
174 + * has already torn down the pci_dev for this VF, but
175 + * we're responsible to removing the eeh_dev since it
176 + * has the same lifetime as the pci_dn that spawned it.
177 + */
178 edev = pdn_to_eeh_dev(pdn);
179 if (edev) {
180 + /*
181 + * We allocate pci_dn's for the totalvfs count,
182 + * but only only the vfs that were activated
183 + * have a configured PE.
184 + */
185 + if (edev->pe)
186 + eeh_rmv_from_parent_pe(edev);
187 +
188 pdn->edev = NULL;
189 kfree(edev);
190 }
191 diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
192 index 3ec673b4ca6c..b787a669a1e2 100644
193 --- a/arch/powerpc/platforms/powernv/pci-ioda.c
194 +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
195 @@ -1524,6 +1524,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
196
197 /* Reserve PE for each VF */
198 for (vf_index = 0; vf_index < num_vfs; vf_index++) {
199 + int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index);
200 + int vf_bus = pci_iov_virtfn_bus(pdev, vf_index);
201 + struct pci_dn *vf_pdn;
202 +
203 if (pdn->m64_single_mode)
204 pe_num = pdn->pe_num_map[vf_index];
205 else
206 @@ -1536,13 +1540,11 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
207 pe->pbus = NULL;
208 pe->parent_dev = pdev;
209 pe->mve_number = -1;
210 - pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) |
211 - pci_iov_virtfn_devfn(pdev, vf_index);
212 + pe->rid = (vf_bus << 8) | vf_devfn;
213
214 pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%d\n",
215 hose->global_number, pdev->bus->number,
216 - PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)),
217 - PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num);
218 + PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num);
219
220 if (pnv_ioda_configure_pe(phb, pe)) {
221 /* XXX What do we do here ? */
222 @@ -1556,6 +1558,15 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
223 list_add_tail(&pe->list, &phb->ioda.pe_list);
224 mutex_unlock(&phb->ioda.pe_list_mutex);
225
226 + /* associate this pe to it's pdn */
227 + list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) {
228 + if (vf_pdn->busno == vf_bus &&
229 + vf_pdn->devfn == vf_devfn) {
230 + vf_pdn->pe_number = pe_num;
231 + break;
232 + }
233 + }
234 +
235 pnv_pci_ioda2_setup_dma_pe(phb, pe);
236 }
237 }
238 diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
239 index 00dbf1e895a9..2ed7627e991e 100644
240 --- a/arch/powerpc/platforms/powernv/pci.c
241 +++ b/arch/powerpc/platforms/powernv/pci.c
242 @@ -856,16 +856,12 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
243 struct pnv_phb *phb = hose->private_data;
244 #ifdef CONFIG_PCI_IOV
245 struct pnv_ioda_pe *pe;
246 - struct pci_dn *pdn;
247
248 /* Fix the VF pdn PE number */
249 if (pdev->is_virtfn) {
250 - pdn = pci_get_pdn(pdev);
251 - WARN_ON(pdn->pe_number != IODA_INVALID_PE);
252 list_for_each_entry(pe, &phb->ioda.pe_list, list) {
253 if (pe->rid == ((pdev->bus->number << 8) |
254 (pdev->devfn & 0xff))) {
255 - pdn->pe_number = pe->pe_number;
256 pe->pdev = pdev;
257 break;
258 }
259 diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
260 index 69b8a41fca84..e094c0cf6936 100644
261 --- a/arch/s390/include/asm/page.h
262 +++ b/arch/s390/include/asm/page.h
263 @@ -35,7 +35,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end);
264
265 static inline void storage_key_init_range(unsigned long start, unsigned long end)
266 {
267 - if (PAGE_DEFAULT_KEY)
268 + if (PAGE_DEFAULT_KEY != 0)
269 __storage_key_init_range(start, end);
270 }
271
272 diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
273 index 0bb08f341c09..f1330245b584 100644
274 --- a/arch/s390/include/asm/timex.h
275 +++ b/arch/s390/include/asm/timex.h
276 @@ -146,7 +146,7 @@ static inline void get_tod_clock_ext(char *clk)
277
278 static inline unsigned long long get_tod_clock(void)
279 {
280 - unsigned char clk[STORE_CLOCK_EXT_SIZE];
281 + char clk[STORE_CLOCK_EXT_SIZE];
282
283 get_tod_clock_ext(clk);
284 return *((unsigned long long *)&clk[1]);
285 diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
286 index be75e8e49e43..802a4ded9a62 100644
287 --- a/arch/s390/kernel/mcount.S
288 +++ b/arch/s390/kernel/mcount.S
289 @@ -24,6 +24,12 @@ ENTRY(ftrace_stub)
290 #define STACK_PTREGS (STACK_FRAME_OVERHEAD)
291 #define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS)
292 #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW)
293 +#ifdef __PACK_STACK
294 +/* allocate just enough for r14, r15 and backchain */
295 +#define TRACED_FUNC_FRAME_SIZE 24
296 +#else
297 +#define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD
298 +#endif
299
300 ENTRY(_mcount)
301 BR_EX %r14
302 @@ -37,9 +43,16 @@ ENTRY(ftrace_caller)
303 #ifndef CC_USING_HOTPATCH
304 aghi %r0,MCOUNT_RETURN_FIXUP
305 #endif
306 - aghi %r15,-STACK_FRAME_SIZE
307 + # allocate stack frame for ftrace_caller to contain traced function
308 + aghi %r15,-TRACED_FUNC_FRAME_SIZE
309 stg %r1,__SF_BACKCHAIN(%r15)
310 + stg %r0,(__SF_GPRS+8*8)(%r15)
311 + stg %r15,(__SF_GPRS+9*8)(%r15)
312 + # allocate pt_regs and stack frame for ftrace_trace_function
313 + aghi %r15,-STACK_FRAME_SIZE
314 stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15)
315 + aghi %r1,-TRACED_FUNC_FRAME_SIZE
316 + stg %r1,__SF_BACKCHAIN(%r15)
317 stg %r0,(STACK_PTREGS_PSW+8)(%r15)
318 stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15)
319 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
320 diff --git a/arch/sh/include/cpu-sh2a/cpu/sh7269.h b/arch/sh/include/cpu-sh2a/cpu/sh7269.h
321 index 2a0ca8780f0d..e4caddd443da 100644
322 --- a/arch/sh/include/cpu-sh2a/cpu/sh7269.h
323 +++ b/arch/sh/include/cpu-sh2a/cpu/sh7269.h
324 @@ -79,8 +79,15 @@ enum {
325 GPIO_FN_WDTOVF,
326
327 /* CAN */
328 - GPIO_FN_CTX1, GPIO_FN_CRX1, GPIO_FN_CTX0, GPIO_FN_CTX0_CTX1,
329 - GPIO_FN_CRX0, GPIO_FN_CRX0_CRX1, GPIO_FN_CRX0_CRX1_CRX2,
330 + GPIO_FN_CTX2, GPIO_FN_CRX2,
331 + GPIO_FN_CTX1, GPIO_FN_CRX1,
332 + GPIO_FN_CTX0, GPIO_FN_CRX0,
333 + GPIO_FN_CTX0_CTX1, GPIO_FN_CRX0_CRX1,
334 + GPIO_FN_CTX0_CTX1_CTX2, GPIO_FN_CRX0_CRX1_CRX2,
335 + GPIO_FN_CTX2_PJ21, GPIO_FN_CRX2_PJ20,
336 + GPIO_FN_CTX1_PJ23, GPIO_FN_CRX1_PJ22,
337 + GPIO_FN_CTX0_CTX1_PJ23, GPIO_FN_CRX0_CRX1_PJ22,
338 + GPIO_FN_CTX0_CTX1_CTX2_PJ21, GPIO_FN_CRX0_CRX1_CRX2_PJ20,
339
340 /* DMAC */
341 GPIO_FN_TEND0, GPIO_FN_DACK0, GPIO_FN_DREQ0,
342 diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
343 index 572db686f845..385d6d04564d 100644
344 --- a/arch/sparc/kernel/vmlinux.lds.S
345 +++ b/arch/sparc/kernel/vmlinux.lds.S
346 @@ -151,12 +151,14 @@ SECTIONS
347 }
348 PERCPU_SECTION(SMP_CACHE_BYTES)
349
350 -#ifdef CONFIG_JUMP_LABEL
351 . = ALIGN(PAGE_SIZE);
352 .exit.text : {
353 EXIT_TEXT
354 }
355 -#endif
356 +
357 + .exit.data : {
358 + EXIT_DATA
359 + }
360
361 . = ALIGN(PAGE_SIZE);
362 __init_end = .;
363 diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
364 index 3f9d1a83891a..50c1f77cab15 100644
365 --- a/arch/x86/entry/vdso/vdso32-setup.c
366 +++ b/arch/x86/entry/vdso/vdso32-setup.c
367 @@ -10,6 +10,7 @@
368 #include <linux/smp.h>
369 #include <linux/kernel.h>
370 #include <linux/mm_types.h>
371 +#include <linux/elf.h>
372
373 #include <asm/processor.h>
374 #include <asm/vdso.h>
375 diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
376 index 00b56cc69d37..836b7e4a2005 100644
377 --- a/arch/x86/events/amd/core.c
378 +++ b/arch/x86/events/amd/core.c
379 @@ -239,6 +239,7 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
380 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
381 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
382 [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
383 + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
384 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
385 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
386 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287,
387 diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
388 index ad31c01f810f..f562ddbeb20c 100644
389 --- a/arch/x86/events/intel/ds.c
390 +++ b/arch/x86/events/intel/ds.c
391 @@ -1326,6 +1326,8 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
392 old = ((s64)(prev_raw_count << shift) >> shift);
393 local64_add(new - old + count * period, &event->count);
394
395 + local64_set(&hwc->period_left, -new);
396 +
397 perf_event_update_userpage(event);
398
399 return 0;
400 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
401 index ccc4420f051b..fb457ba8ccc6 100644
402 --- a/arch/x86/include/asm/cpufeatures.h
403 +++ b/arch/x86/include/asm/cpufeatures.h
404 @@ -305,6 +305,7 @@
405 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
406 #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
407 #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
408 +#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */
409
410 /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
411 #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
412 diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
413 index e728699db774..3a01996db58f 100644
414 --- a/arch/x86/include/asm/vgtod.h
415 +++ b/arch/x86/include/asm/vgtod.h
416 @@ -89,8 +89,13 @@ static inline unsigned int __getcpu(void)
417 * works on all CPUs. This is volatile so that it orders
418 * correctly wrt barrier() and to keep gcc from cleverly
419 * hoisting it out of the calling function.
420 + *
421 + * If RDPID is available, use it.
422 */
423 - asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
424 + alternative_io ("lsl %[p],%[seg]",
425 + ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
426 + X86_FEATURE_RDPID,
427 + [p] "=a" (p), [seg] "r" (__PER_CPU_SEG));
428
429 return p;
430 }
431 diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
432 index 2a473cda3977..775d5f028fe8 100644
433 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
434 +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
435 @@ -846,9 +846,12 @@ static const struct sysfs_ops threshold_ops = {
436 .store = store,
437 };
438
439 +static void threshold_block_release(struct kobject *kobj);
440 +
441 static struct kobj_type threshold_ktype = {
442 .sysfs_ops = &threshold_ops,
443 .default_attrs = default_attrs,
444 + .release = threshold_block_release,
445 };
446
447 static const char *get_name(unsigned int bank, struct threshold_block *b)
448 @@ -879,8 +882,9 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
449 return buf_mcatype;
450 }
451
452 -static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
453 - unsigned int block, u32 address)
454 +static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb,
455 + unsigned int bank, unsigned int block,
456 + u32 address)
457 {
458 struct threshold_block *b = NULL;
459 u32 low, high;
460 @@ -924,16 +928,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
461
462 INIT_LIST_HEAD(&b->miscj);
463
464 - if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
465 - list_add(&b->miscj,
466 - &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
467 - } else {
468 - per_cpu(threshold_banks, cpu)[bank]->blocks = b;
469 - }
470 + if (tb->blocks)
471 + list_add(&b->miscj, &tb->blocks->miscj);
472 + else
473 + tb->blocks = b;
474
475 - err = kobject_init_and_add(&b->kobj, &threshold_ktype,
476 - per_cpu(threshold_banks, cpu)[bank]->kobj,
477 - get_name(bank, b));
478 + err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b));
479 if (err)
480 goto out_free;
481 recurse:
482 @@ -941,7 +941,7 @@ recurse:
483 if (!address)
484 return 0;
485
486 - err = allocate_threshold_blocks(cpu, bank, block, address);
487 + err = allocate_threshold_blocks(cpu, tb, bank, block, address);
488 if (err)
489 goto out_free;
490
491 @@ -1026,8 +1026,6 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
492 goto out_free;
493 }
494
495 - per_cpu(threshold_banks, cpu)[bank] = b;
496 -
497 if (is_shared_bank(bank)) {
498 atomic_set(&b->cpus, 1);
499
500 @@ -1038,9 +1036,13 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
501 }
502 }
503
504 - err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank));
505 - if (!err)
506 - goto out;
507 + err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank));
508 + if (err)
509 + goto out_free;
510 +
511 + per_cpu(threshold_banks, cpu)[bank] = b;
512 +
513 + return 0;
514
515 out_free:
516 kfree(b);
517 @@ -1074,8 +1076,12 @@ static int threshold_create_device(unsigned int cpu)
518 return err;
519 }
520
521 -static void deallocate_threshold_block(unsigned int cpu,
522 - unsigned int bank)
523 +static void threshold_block_release(struct kobject *kobj)
524 +{
525 + kfree(to_block(kobj));
526 +}
527 +
528 +static void deallocate_threshold_block(unsigned int cpu, unsigned int bank)
529 {
530 struct threshold_block *pos = NULL;
531 struct threshold_block *tmp = NULL;
532 @@ -1085,13 +1091,11 @@ static void deallocate_threshold_block(unsigned int cpu,
533 return;
534
535 list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
536 - kobject_put(&pos->kobj);
537 list_del(&pos->miscj);
538 - kfree(pos);
539 + kobject_put(&pos->kobj);
540 }
541
542 - kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
543 - per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
544 + kobject_put(&head->blocks->kobj);
545 }
546
547 static void __threshold_remove_blocks(struct threshold_bank *b)
548 diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c
549 index 85195d447a92..f3215346e47f 100644
550 --- a/arch/x86/kernel/sysfb_simplefb.c
551 +++ b/arch/x86/kernel/sysfb_simplefb.c
552 @@ -94,11 +94,11 @@ __init int create_simplefb(const struct screen_info *si,
553 if (si->orig_video_isVGA == VIDEO_TYPE_VLFB)
554 size <<= 16;
555 length = mode->height * mode->stride;
556 - length = PAGE_ALIGN(length);
557 if (length > size) {
558 printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n");
559 return -EINVAL;
560 }
561 + length = PAGE_ALIGN(length);
562
563 /* setup IORESOURCE_MEM as framebuffer memory */
564 memset(&res, 0, sizeof(res));
565 diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
566 index 242ad06fbe1a..c57dab0884fe 100644
567 --- a/arch/x86/kvm/cpuid.c
568 +++ b/arch/x86/kvm/cpuid.c
569 @@ -279,13 +279,18 @@ static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
570 {
571 switch (func) {
572 case 0:
573 - entry->eax = 1; /* only one leaf currently */
574 + entry->eax = 7;
575 ++*nent;
576 break;
577 case 1:
578 entry->ecx = F(MOVBE);
579 ++*nent;
580 break;
581 + case 7:
582 + entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
583 + if (index == 0)
584 + entry->ecx = F(RDPID);
585 + ++*nent;
586 default:
587 break;
588 }
589 diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
590 index c456a9dbade8..e9c7090858d6 100644
591 --- a/arch/x86/kvm/emulate.c
592 +++ b/arch/x86/kvm/emulate.c
593 @@ -3531,6 +3531,16 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt)
594 return X86EMUL_CONTINUE;
595 }
596
597 +static int em_rdpid(struct x86_emulate_ctxt *ctxt)
598 +{
599 + u64 tsc_aux = 0;
600 +
601 + if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux))
602 + return emulate_gp(ctxt, 0);
603 + ctxt->dst.val = tsc_aux;
604 + return X86EMUL_CONTINUE;
605 +}
606 +
607 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
608 {
609 u64 tsc = 0;
610 @@ -4391,10 +4401,20 @@ static const struct opcode group8[] = {
611 F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
612 };
613
614 +/*
615 + * The "memory" destination is actually always a register, since we come
616 + * from the register case of group9.
617 + */
618 +static const struct gprefix pfx_0f_c7_7 = {
619 + N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp),
620 +};
621 +
622 +
623 static const struct group_dual group9 = { {
624 N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
625 }, {
626 - N, N, N, N, N, N, N, N,
627 + N, N, N, N, N, N, N,
628 + GP(0, &pfx_0f_c7_7),
629 } };
630
631 static const struct opcode group11[] = {
632 diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
633 index 6c0191615f23..cf8b3c17657a 100644
634 --- a/arch/x86/kvm/irq_comm.c
635 +++ b/arch/x86/kvm/irq_comm.c
636 @@ -436,7 +436,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
637
638 kvm_set_msi_irq(vcpu->kvm, entry, &irq);
639
640 - if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0,
641 + if (irq.trig_mode && kvm_apic_match_dest(vcpu, NULL, 0,
642 irq.dest_id, irq.dest_mode))
643 __set_bit(irq.vector, ioapic_handled_vectors);
644 }
645 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
646 index caa17f8d4221..3988e26af3b5 100644
647 --- a/arch/x86/kvm/lapic.c
648 +++ b/arch/x86/kvm/lapic.c
649 @@ -532,9 +532,11 @@ static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
650 static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
651 {
652 u8 val;
653 - if (pv_eoi_get_user(vcpu, &val) < 0)
654 + if (pv_eoi_get_user(vcpu, &val) < 0) {
655 apic_debug("Can't read EOI MSR value: 0x%llx\n",
656 (unsigned long long)vcpu->arch.pv_eoi.msr_val);
657 + return false;
658 + }
659 return val & 0x1;
660 }
661
662 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
663 index 67cdb08a736f..8bd336651de5 100644
664 --- a/arch/x86/kvm/vmx.c
665 +++ b/arch/x86/kvm/vmx.c
666 @@ -4641,6 +4641,26 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
667 (ss.selector & SEGMENT_RPL_MASK));
668 }
669
670 +static bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu,
671 + unsigned int port, int size);
672 +static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
673 + struct vmcs12 *vmcs12)
674 +{
675 + unsigned long exit_qualification;
676 + unsigned short port;
677 + int size;
678 +
679 + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
680 + return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
681 +
682 + exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
683 +
684 + port = exit_qualification >> 16;
685 + size = (exit_qualification & 7) + 1;
686 +
687 + return nested_vmx_check_io_bitmaps(vcpu, port, size);
688 +}
689 +
690 /*
691 * Check if guest state is valid. Returns true if valid, false if
692 * not.
693 @@ -8026,23 +8046,17 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
694 static const int kvm_vmx_max_exit_handlers =
695 ARRAY_SIZE(kvm_vmx_exit_handlers);
696
697 -static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
698 - struct vmcs12 *vmcs12)
699 +/*
700 + * Return true if an IO instruction with the specified port and size should cause
701 + * a VM-exit into L1.
702 + */
703 +bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
704 + int size)
705 {
706 - unsigned long exit_qualification;
707 + struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
708 gpa_t bitmap, last_bitmap;
709 - unsigned int port;
710 - int size;
711 u8 b;
712
713 - if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
714 - return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
715 -
716 - exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
717 -
718 - port = exit_qualification >> 16;
719 - size = (exit_qualification & 7) + 1;
720 -
721 last_bitmap = (gpa_t)-1;
722 b = -1;
723
724 @@ -11335,11 +11349,71 @@ static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
725 to_vmx(vcpu)->nested.sync_shadow_vmcs = true;
726 }
727
728 +static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
729 + struct x86_instruction_info *info)
730 +{
731 + struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
732 + unsigned short port;
733 + bool intercept;
734 + int size;
735 +
736 + if (info->intercept == x86_intercept_in ||
737 + info->intercept == x86_intercept_ins) {
738 + port = info->src_val;
739 + size = info->dst_bytes;
740 + } else {
741 + port = info->dst_val;
742 + size = info->src_bytes;
743 + }
744 +
745 + /*
746 + * If the 'use IO bitmaps' VM-execution control is 0, IO instruction
747 + * VM-exits depend on the 'unconditional IO exiting' VM-execution
748 + * control.
749 + *
750 + * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps.
751 + */
752 + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
753 + intercept = nested_cpu_has(vmcs12,
754 + CPU_BASED_UNCOND_IO_EXITING);
755 + else
756 + intercept = nested_vmx_check_io_bitmaps(vcpu, port, size);
757 +
758 + return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
759 +}
760 +
761 static int vmx_check_intercept(struct kvm_vcpu *vcpu,
762 struct x86_instruction_info *info,
763 enum x86_intercept_stage stage)
764 {
765 - return X86EMUL_CONTINUE;
766 + struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
767 + struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
768 +
769 + switch (info->intercept) {
770 + /*
771 + * RDPID causes #UD if disabled through secondary execution controls.
772 + * Because it is marked as EmulateOnUD, we need to intercept it here.
773 + */
774 + case x86_intercept_rdtscp:
775 + if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
776 + ctxt->exception.vector = UD_VECTOR;
777 + ctxt->exception.error_code_valid = false;
778 + return X86EMUL_PROPAGATE_FAULT;
779 + }
780 + break;
781 +
782 + case x86_intercept_in:
783 + case x86_intercept_ins:
784 + case x86_intercept_out:
785 + case x86_intercept_outs:
786 + return vmx_check_intercept_io(vcpu, info);
787 +
788 + /* TODO: check more intercepts... */
789 + default:
790 + break;
791 + }
792 +
793 + return X86EMUL_UNHANDLEABLE;
794 }
795
796 #ifdef CONFIG_X86_64
797 diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
798 deleted file mode 100644
799 index 3791ce8d269e..000000000000
800 --- a/arch/x86/kvm/vmx/vmx.c
801 +++ /dev/null
802 @@ -1,8033 +0,0 @@
803 -// SPDX-License-Identifier: GPL-2.0-only
804 -/*
805 - * Kernel-based Virtual Machine driver for Linux
806 - *
807 - * This module enables machines with Intel VT-x extensions to run virtual
808 - * machines without emulation or binary translation.
809 - *
810 - * Copyright (C) 2006 Qumranet, Inc.
811 - * Copyright 2010 Red Hat, Inc. and/or its affiliates.
812 - *
813 - * Authors:
814 - * Avi Kivity <avi@qumranet.com>
815 - * Yaniv Kamay <yaniv@qumranet.com>
816 - */
817 -
818 -#include <linux/frame.h>
819 -#include <linux/highmem.h>
820 -#include <linux/hrtimer.h>
821 -#include <linux/kernel.h>
822 -#include <linux/kvm_host.h>
823 -#include <linux/module.h>
824 -#include <linux/moduleparam.h>
825 -#include <linux/mod_devicetable.h>
826 -#include <linux/mm.h>
827 -#include <linux/sched.h>
828 -#include <linux/sched/smt.h>
829 -#include <linux/slab.h>
830 -#include <linux/tboot.h>
831 -#include <linux/trace_events.h>
832 -
833 -#include <asm/apic.h>
834 -#include <asm/asm.h>
835 -#include <asm/cpu.h>
836 -#include <asm/debugreg.h>
837 -#include <asm/desc.h>
838 -#include <asm/fpu/internal.h>
839 -#include <asm/io.h>
840 -#include <asm/irq_remapping.h>
841 -#include <asm/kexec.h>
842 -#include <asm/perf_event.h>
843 -#include <asm/mce.h>
844 -#include <asm/mmu_context.h>
845 -#include <asm/mshyperv.h>
846 -#include <asm/spec-ctrl.h>
847 -#include <asm/virtext.h>
848 -#include <asm/vmx.h>
849 -
850 -#include "capabilities.h"
851 -#include "cpuid.h"
852 -#include "evmcs.h"
853 -#include "irq.h"
854 -#include "kvm_cache_regs.h"
855 -#include "lapic.h"
856 -#include "mmu.h"
857 -#include "nested.h"
858 -#include "ops.h"
859 -#include "pmu.h"
860 -#include "trace.h"
861 -#include "vmcs.h"
862 -#include "vmcs12.h"
863 -#include "vmx.h"
864 -#include "x86.h"
865 -
866 -MODULE_AUTHOR("Qumranet");
867 -MODULE_LICENSE("GPL");
868 -
869 -static const struct x86_cpu_id vmx_cpu_id[] = {
870 - X86_FEATURE_MATCH(X86_FEATURE_VMX),
871 - {}
872 -};
873 -MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
874 -
875 -bool __read_mostly enable_vpid = 1;
876 -module_param_named(vpid, enable_vpid, bool, 0444);
877 -
878 -static bool __read_mostly enable_vnmi = 1;
879 -module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
880 -
881 -bool __read_mostly flexpriority_enabled = 1;
882 -module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
883 -
884 -bool __read_mostly enable_ept = 1;
885 -module_param_named(ept, enable_ept, bool, S_IRUGO);
886 -
887 -bool __read_mostly enable_unrestricted_guest = 1;
888 -module_param_named(unrestricted_guest,
889 - enable_unrestricted_guest, bool, S_IRUGO);
890 -
891 -bool __read_mostly enable_ept_ad_bits = 1;
892 -module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
893 -
894 -static bool __read_mostly emulate_invalid_guest_state = true;
895 -module_param(emulate_invalid_guest_state, bool, S_IRUGO);
896 -
897 -static bool __read_mostly fasteoi = 1;
898 -module_param(fasteoi, bool, S_IRUGO);
899 -
900 -static bool __read_mostly enable_apicv = 1;
901 -module_param(enable_apicv, bool, S_IRUGO);
902 -
903 -/*
904 - * If nested=1, nested virtualization is supported, i.e., guests may use
905 - * VMX and be a hypervisor for its own guests. If nested=0, guests may not
906 - * use VMX instructions.
907 - */
908 -static bool __read_mostly nested = 1;
909 -module_param(nested, bool, S_IRUGO);
910 -
911 -bool __read_mostly enable_pml = 1;
912 -module_param_named(pml, enable_pml, bool, S_IRUGO);
913 -
914 -static bool __read_mostly dump_invalid_vmcs = 0;
915 -module_param(dump_invalid_vmcs, bool, 0644);
916 -
917 -#define MSR_BITMAP_MODE_X2APIC 1
918 -#define MSR_BITMAP_MODE_X2APIC_APICV 2
919 -
920 -#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
921 -
922 -/* Guest_tsc -> host_tsc conversion requires 64-bit division. */
923 -static int __read_mostly cpu_preemption_timer_multi;
924 -static bool __read_mostly enable_preemption_timer = 1;
925 -#ifdef CONFIG_X86_64
926 -module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
927 -#endif
928 -
929 -#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
930 -#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
931 -#define KVM_VM_CR0_ALWAYS_ON \
932 - (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \
933 - X86_CR0_WP | X86_CR0_PG | X86_CR0_PE)
934 -#define KVM_CR4_GUEST_OWNED_BITS \
935 - (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
936 - | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)
937 -
938 -#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
939 -#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
940 -#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
941 -
942 -#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
943 -
944 -#define MSR_IA32_RTIT_STATUS_MASK (~(RTIT_STATUS_FILTEREN | \
945 - RTIT_STATUS_CONTEXTEN | RTIT_STATUS_TRIGGEREN | \
946 - RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
947 - RTIT_STATUS_BYTECNT))
948 -
949 -#define MSR_IA32_RTIT_OUTPUT_BASE_MASK \
950 - (~((1UL << cpuid_query_maxphyaddr(vcpu)) - 1) | 0x7f)
951 -
952 -/*
953 - * These 2 parameters are used to config the controls for Pause-Loop Exiting:
954 - * ple_gap: upper bound on the amount of time between two successive
955 - * executions of PAUSE in a loop. Also indicate if ple enabled.
956 - * According to test, this time is usually smaller than 128 cycles.
957 - * ple_window: upper bound on the amount of time a guest is allowed to execute
958 - * in a PAUSE loop. Tests indicate that most spinlocks are held for
959 - * less than 2^12 cycles
960 - * Time is measured based on a counter that runs at the same rate as the TSC,
961 - * refer SDM volume 3b section 21.6.13 & 22.1.3.
962 - */
963 -static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP;
964 -module_param(ple_gap, uint, 0444);
965 -
966 -static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
967 -module_param(ple_window, uint, 0444);
968 -
969 -/* Default doubles per-vcpu window every exit. */
970 -static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
971 -module_param(ple_window_grow, uint, 0444);
972 -
973 -/* Default resets per-vcpu window every exit to ple_window. */
974 -static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
975 -module_param(ple_window_shrink, uint, 0444);
976 -
977 -/* Default is to compute the maximum so we can never overflow. */
978 -static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
979 -module_param(ple_window_max, uint, 0444);
980 -
981 -/* Default is SYSTEM mode, 1 for host-guest mode */
982 -int __read_mostly pt_mode = PT_MODE_SYSTEM;
983 -module_param(pt_mode, int, S_IRUGO);
984 -
985 -static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
986 -static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
987 -static DEFINE_MUTEX(vmx_l1d_flush_mutex);
988 -
989 -/* Storage for pre module init parameter parsing */
990 -static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO;
991 -
992 -static const struct {
993 - const char *option;
994 - bool for_parse;
995 -} vmentry_l1d_param[] = {
996 - [VMENTER_L1D_FLUSH_AUTO] = {"auto", true},
997 - [VMENTER_L1D_FLUSH_NEVER] = {"never", true},
998 - [VMENTER_L1D_FLUSH_COND] = {"cond", true},
999 - [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true},
1000 - [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false},
1001 - [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
1002 -};
1003 -
1004 -#define L1D_CACHE_ORDER 4
1005 -static void *vmx_l1d_flush_pages;
1006 -
1007 -static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
1008 -{
1009 - struct page *page;
1010 - unsigned int i;
1011 -
1012 - if (!boot_cpu_has_bug(X86_BUG_L1TF)) {
1013 - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
1014 - return 0;
1015 - }
1016 -
1017 - if (!enable_ept) {
1018 - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
1019 - return 0;
1020 - }
1021 -
1022 - if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
1023 - u64 msr;
1024 -
1025 - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
1026 - if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
1027 - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
1028 - return 0;
1029 - }
1030 - }
1031 -
1032 - /* If set to auto use the default l1tf mitigation method */
1033 - if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
1034 - switch (l1tf_mitigation) {
1035 - case L1TF_MITIGATION_OFF:
1036 - l1tf = VMENTER_L1D_FLUSH_NEVER;
1037 - break;
1038 - case L1TF_MITIGATION_FLUSH_NOWARN:
1039 - case L1TF_MITIGATION_FLUSH:
1040 - case L1TF_MITIGATION_FLUSH_NOSMT:
1041 - l1tf = VMENTER_L1D_FLUSH_COND;
1042 - break;
1043 - case L1TF_MITIGATION_FULL:
1044 - case L1TF_MITIGATION_FULL_FORCE:
1045 - l1tf = VMENTER_L1D_FLUSH_ALWAYS;
1046 - break;
1047 - }
1048 - } else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) {
1049 - l1tf = VMENTER_L1D_FLUSH_ALWAYS;
1050 - }
1051 -
1052 - if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
1053 - !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
1054 - /*
1055 - * This allocation for vmx_l1d_flush_pages is not tied to a VM
1056 - * lifetime and so should not be charged to a memcg.
1057 - */
1058 - page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
1059 - if (!page)
1060 - return -ENOMEM;
1061 - vmx_l1d_flush_pages = page_address(page);
1062 -
1063 - /*
1064 - * Initialize each page with a different pattern in
1065 - * order to protect against KSM in the nested
1066 - * virtualization case.
1067 - */
1068 - for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) {
1069 - memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1,
1070 - PAGE_SIZE);
1071 - }
1072 - }
1073 -
1074 - l1tf_vmx_mitigation = l1tf;
1075 -
1076 - if (l1tf != VMENTER_L1D_FLUSH_NEVER)
1077 - static_branch_enable(&vmx_l1d_should_flush);
1078 - else
1079 - static_branch_disable(&vmx_l1d_should_flush);
1080 -
1081 - if (l1tf == VMENTER_L1D_FLUSH_COND)
1082 - static_branch_enable(&vmx_l1d_flush_cond);
1083 - else
1084 - static_branch_disable(&vmx_l1d_flush_cond);
1085 - return 0;
1086 -}
1087 -
1088 -static int vmentry_l1d_flush_parse(const char *s)
1089 -{
1090 - unsigned int i;
1091 -
1092 - if (s) {
1093 - for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
1094 - if (vmentry_l1d_param[i].for_parse &&
1095 - sysfs_streq(s, vmentry_l1d_param[i].option))
1096 - return i;
1097 - }
1098 - }
1099 - return -EINVAL;
1100 -}
1101 -
1102 -static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
1103 -{
1104 - int l1tf, ret;
1105 -
1106 - l1tf = vmentry_l1d_flush_parse(s);
1107 - if (l1tf < 0)
1108 - return l1tf;
1109 -
1110 - if (!boot_cpu_has(X86_BUG_L1TF))
1111 - return 0;
1112 -
1113 - /*
1114 - * Has vmx_init() run already? If not then this is the pre init
1115 - * parameter parsing. In that case just store the value and let
1116 - * vmx_init() do the proper setup after enable_ept has been
1117 - * established.
1118 - */
1119 - if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) {
1120 - vmentry_l1d_flush_param = l1tf;
1121 - return 0;
1122 - }
1123 -
1124 - mutex_lock(&vmx_l1d_flush_mutex);
1125 - ret = vmx_setup_l1d_flush(l1tf);
1126 - mutex_unlock(&vmx_l1d_flush_mutex);
1127 - return ret;
1128 -}
1129 -
1130 -static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
1131 -{
1132 - if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
1133 - return sprintf(s, "???\n");
1134 -
1135 - return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
1136 -}
1137 -
1138 -static const struct kernel_param_ops vmentry_l1d_flush_ops = {
1139 - .set = vmentry_l1d_flush_set,
1140 - .get = vmentry_l1d_flush_get,
1141 -};
1142 -module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
1143 -
1144 -static bool guest_state_valid(struct kvm_vcpu *vcpu);
1145 -static u32 vmx_segment_access_rights(struct kvm_segment *var);
1146 -static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
1147 - u32 msr, int type);
1148 -
1149 -void vmx_vmexit(void);
1150 -
1151 -#define vmx_insn_failed(fmt...) \
1152 -do { \
1153 - WARN_ONCE(1, fmt); \
1154 - pr_warn_ratelimited(fmt); \
1155 -} while (0)
1156 -
1157 -asmlinkage void vmread_error(unsigned long field, bool fault)
1158 -{
1159 - if (fault)
1160 - kvm_spurious_fault();
1161 - else
1162 - vmx_insn_failed("kvm: vmread failed: field=%lx\n", field);
1163 -}
1164 -
1165 -noinline void vmwrite_error(unsigned long field, unsigned long value)
1166 -{
1167 - vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n",
1168 - field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
1169 -}
1170 -
1171 -noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr)
1172 -{
1173 - vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr);
1174 -}
1175 -
1176 -noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr)
1177 -{
1178 - vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr);
1179 -}
1180 -
1181 -noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva)
1182 -{
1183 - vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n",
1184 - ext, vpid, gva);
1185 -}
1186 -
1187 -noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa)
1188 -{
1189 - vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n",
1190 - ext, eptp, gpa);
1191 -}
1192 -
1193 -static DEFINE_PER_CPU(struct vmcs *, vmxarea);
1194 -DEFINE_PER_CPU(struct vmcs *, current_vmcs);
1195 -/*
1196 - * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
1197 - * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
1198 - */
1199 -static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
1200 -
1201 -/*
1202 - * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
1203 - * can find which vCPU should be waken up.
1204 - */
1205 -static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
1206 -static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
1207 -
1208 -static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
1209 -static DEFINE_SPINLOCK(vmx_vpid_lock);
1210 -
1211 -struct vmcs_config vmcs_config;
1212 -struct vmx_capability vmx_capability;
1213 -
1214 -#define VMX_SEGMENT_FIELD(seg) \
1215 - [VCPU_SREG_##seg] = { \
1216 - .selector = GUEST_##seg##_SELECTOR, \
1217 - .base = GUEST_##seg##_BASE, \
1218 - .limit = GUEST_##seg##_LIMIT, \
1219 - .ar_bytes = GUEST_##seg##_AR_BYTES, \
1220 - }
1221 -
1222 -static const struct kvm_vmx_segment_field {
1223 - unsigned selector;
1224 - unsigned base;
1225 - unsigned limit;
1226 - unsigned ar_bytes;
1227 -} kvm_vmx_segment_fields[] = {
1228 - VMX_SEGMENT_FIELD(CS),
1229 - VMX_SEGMENT_FIELD(DS),
1230 - VMX_SEGMENT_FIELD(ES),
1231 - VMX_SEGMENT_FIELD(FS),
1232 - VMX_SEGMENT_FIELD(GS),
1233 - VMX_SEGMENT_FIELD(SS),
1234 - VMX_SEGMENT_FIELD(TR),
1235 - VMX_SEGMENT_FIELD(LDTR),
1236 -};
1237 -
1238 -u64 host_efer;
1239 -static unsigned long host_idt_base;
1240 -
1241 -/*
1242 - * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
1243 - * will emulate SYSCALL in legacy mode if the vendor string in guest
1244 - * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
1245 - * support this emulation, IA32_STAR must always be included in
1246 - * vmx_msr_index[], even in i386 builds.
1247 - */
1248 -const u32 vmx_msr_index[] = {
1249 -#ifdef CONFIG_X86_64
1250 - MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
1251 -#endif
1252 - MSR_EFER, MSR_TSC_AUX, MSR_STAR,
1253 - MSR_IA32_TSX_CTRL,
1254 -};
1255 -
1256 -#if IS_ENABLED(CONFIG_HYPERV)
1257 -static bool __read_mostly enlightened_vmcs = true;
1258 -module_param(enlightened_vmcs, bool, 0444);
1259 -
1260 -/* check_ept_pointer() should be under protection of ept_pointer_lock. */
1261 -static void check_ept_pointer_match(struct kvm *kvm)
1262 -{
1263 - struct kvm_vcpu *vcpu;
1264 - u64 tmp_eptp = INVALID_PAGE;
1265 - int i;
1266 -
1267 - kvm_for_each_vcpu(i, vcpu, kvm) {
1268 - if (!VALID_PAGE(tmp_eptp)) {
1269 - tmp_eptp = to_vmx(vcpu)->ept_pointer;
1270 - } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
1271 - to_kvm_vmx(kvm)->ept_pointers_match
1272 - = EPT_POINTERS_MISMATCH;
1273 - return;
1274 - }
1275 - }
1276 -
1277 - to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH;
1278 -}
1279 -
1280 -static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush,
1281 - void *data)
1282 -{
1283 - struct kvm_tlb_range *range = data;
1284 -
1285 - return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn,
1286 - range->pages);
1287 -}
1288 -
1289 -static inline int __hv_remote_flush_tlb_with_range(struct kvm *kvm,
1290 - struct kvm_vcpu *vcpu, struct kvm_tlb_range *range)
1291 -{
1292 - u64 ept_pointer = to_vmx(vcpu)->ept_pointer;
1293 -
1294 - /*
1295 - * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs address
1296 - * of the base of EPT PML4 table, strip off EPT configuration
1297 - * information.
1298 - */
1299 - if (range)
1300 - return hyperv_flush_guest_mapping_range(ept_pointer & PAGE_MASK,
1301 - kvm_fill_hv_flush_list_func, (void *)range);
1302 - else
1303 - return hyperv_flush_guest_mapping(ept_pointer & PAGE_MASK);
1304 -}
1305 -
1306 -static int hv_remote_flush_tlb_with_range(struct kvm *kvm,
1307 - struct kvm_tlb_range *range)
1308 -{
1309 - struct kvm_vcpu *vcpu;
1310 - int ret = 0, i;
1311 -
1312 - spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
1313 -
1314 - if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK)
1315 - check_ept_pointer_match(kvm);
1316 -
1317 - if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) {
1318 - kvm_for_each_vcpu(i, vcpu, kvm) {
1319 - /* If ept_pointer is invalid pointer, bypass flush request. */
1320 - if (VALID_PAGE(to_vmx(vcpu)->ept_pointer))
1321 - ret |= __hv_remote_flush_tlb_with_range(
1322 - kvm, vcpu, range);
1323 - }
1324 - } else {
1325 - ret = __hv_remote_flush_tlb_with_range(kvm,
1326 - kvm_get_vcpu(kvm, 0), range);
1327 - }
1328 -
1329 - spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
1330 - return ret;
1331 -}
1332 -static int hv_remote_flush_tlb(struct kvm *kvm)
1333 -{
1334 - return hv_remote_flush_tlb_with_range(kvm, NULL);
1335 -}
1336 -
1337 -static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
1338 -{
1339 - struct hv_enlightened_vmcs *evmcs;
1340 - struct hv_partition_assist_pg **p_hv_pa_pg =
1341 - &vcpu->kvm->arch.hyperv.hv_pa_pg;
1342 - /*
1343 - * Synthetic VM-Exit is not enabled in current code and so All
1344 - * evmcs in singe VM shares same assist page.
1345 - */
1346 - if (!*p_hv_pa_pg)
1347 - *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL);
1348 -
1349 - if (!*p_hv_pa_pg)
1350 - return -ENOMEM;
1351 -
1352 - evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs;
1353 -
1354 - evmcs->partition_assist_page =
1355 - __pa(*p_hv_pa_pg);
1356 - evmcs->hv_vm_id = (unsigned long)vcpu->kvm;
1357 - evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
1358 -
1359 - return 0;
1360 -}
1361 -
1362 -#endif /* IS_ENABLED(CONFIG_HYPERV) */
1363 -
1364 -/*
1365 - * Comment's format: document - errata name - stepping - processor name.
1366 - * Refer from
1367 - * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp
1368 - */
1369 -static u32 vmx_preemption_cpu_tfms[] = {
1370 -/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */
1371 -0x000206E6,
1372 -/* 323056.pdf - AAX65 - C2 - Xeon L3406 */
1373 -/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */
1374 -/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */
1375 -0x00020652,
1376 -/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */
1377 -0x00020655,
1378 -/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */
1379 -/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */
1380 -/*
1381 - * 320767.pdf - AAP86 - B1 -
1382 - * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile
1383 - */
1384 -0x000106E5,
1385 -/* 321333.pdf - AAM126 - C0 - Xeon 3500 */
1386 -0x000106A0,
1387 -/* 321333.pdf - AAM126 - C1 - Xeon 3500 */
1388 -0x000106A1,
1389 -/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */
1390 -0x000106A4,
1391 - /* 321333.pdf - AAM126 - D0 - Xeon 3500 */
1392 - /* 321324.pdf - AAK139 - D0 - Xeon 5500 */
1393 - /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */
1394 -0x000106A5,
1395 - /* Xeon E3-1220 V2 */
1396 -0x000306A8,
1397 -};
1398 -
1399 -static inline bool cpu_has_broken_vmx_preemption_timer(void)
1400 -{
1401 - u32 eax = cpuid_eax(0x00000001), i;
1402 -
1403 - /* Clear the reserved bits */
1404 - eax &= ~(0x3U << 14 | 0xfU << 28);
1405 - for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++)
1406 - if (eax == vmx_preemption_cpu_tfms[i])
1407 - return true;
1408 -
1409 - return false;
1410 -}
1411 -
1412 -static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
1413 -{
1414 - return flexpriority_enabled && lapic_in_kernel(vcpu);
1415 -}
1416 -
1417 -static inline bool report_flexpriority(void)
1418 -{
1419 - return flexpriority_enabled;
1420 -}
1421 -
1422 -static inline int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
1423 -{
1424 - int i;
1425 -
1426 - for (i = 0; i < vmx->nmsrs; ++i)
1427 - if (vmx_msr_index[vmx->guest_msrs[i].index] == msr)
1428 - return i;
1429 - return -1;
1430 -}
1431 -
1432 -struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
1433 -{
1434 - int i;
1435 -
1436 - i = __find_msr_index(vmx, msr);
1437 - if (i >= 0)
1438 - return &vmx->guest_msrs[i];
1439 - return NULL;
1440 -}
1441 -
1442 -static int vmx_set_guest_msr(struct vcpu_vmx *vmx, struct shared_msr_entry *msr, u64 data)
1443 -{
1444 - int ret = 0;
1445 -
1446 - u64 old_msr_data = msr->data;
1447 - msr->data = data;
1448 - if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
1449 - preempt_disable();
1450 - ret = kvm_set_shared_msr(msr->index, msr->data,
1451 - msr->mask);
1452 - preempt_enable();
1453 - if (ret)
1454 - msr->data = old_msr_data;
1455 - }
1456 - return ret;
1457 -}
1458 -
1459 -void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
1460 -{
1461 - vmcs_clear(loaded_vmcs->vmcs);
1462 - if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
1463 - vmcs_clear(loaded_vmcs->shadow_vmcs);
1464 - loaded_vmcs->cpu = -1;
1465 - loaded_vmcs->launched = 0;
1466 -}
1467 -
1468 -#ifdef CONFIG_KEXEC_CORE
1469 -/*
1470 - * This bitmap is used to indicate whether the vmclear
1471 - * operation is enabled on all cpus. All disabled by
1472 - * default.
1473 - */
1474 -static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
1475 -
1476 -static inline void crash_enable_local_vmclear(int cpu)
1477 -{
1478 - cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
1479 -}
1480 -
1481 -static inline void crash_disable_local_vmclear(int cpu)
1482 -{
1483 - cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
1484 -}
1485 -
1486 -static inline int crash_local_vmclear_enabled(int cpu)
1487 -{
1488 - return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
1489 -}
1490 -
1491 -static void crash_vmclear_local_loaded_vmcss(void)
1492 -{
1493 - int cpu = raw_smp_processor_id();
1494 - struct loaded_vmcs *v;
1495 -
1496 - if (!crash_local_vmclear_enabled(cpu))
1497 - return;
1498 -
1499 - list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
1500 - loaded_vmcss_on_cpu_link)
1501 - vmcs_clear(v->vmcs);
1502 -}
1503 -#else
1504 -static inline void crash_enable_local_vmclear(int cpu) { }
1505 -static inline void crash_disable_local_vmclear(int cpu) { }
1506 -#endif /* CONFIG_KEXEC_CORE */
1507 -
1508 -static void __loaded_vmcs_clear(void *arg)
1509 -{
1510 - struct loaded_vmcs *loaded_vmcs = arg;
1511 - int cpu = raw_smp_processor_id();
1512 -
1513 - if (loaded_vmcs->cpu != cpu)
1514 - return; /* vcpu migration can race with cpu offline */
1515 - if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
1516 - per_cpu(current_vmcs, cpu) = NULL;
1517 - crash_disable_local_vmclear(cpu);
1518 - list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
1519 -
1520 - /*
1521 - * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link
1522 - * is before setting loaded_vmcs->vcpu to -1 which is done in
1523 - * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist
1524 - * then adds the vmcs into percpu list before it is deleted.
1525 - */
1526 - smp_wmb();
1527 -
1528 - loaded_vmcs_init(loaded_vmcs);
1529 - crash_enable_local_vmclear(cpu);
1530 -}
1531 -
1532 -void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
1533 -{
1534 - int cpu = loaded_vmcs->cpu;
1535 -
1536 - if (cpu != -1)
1537 - smp_call_function_single(cpu,
1538 - __loaded_vmcs_clear, loaded_vmcs, 1);
1539 -}
1540 -
1541 -static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
1542 - unsigned field)
1543 -{
1544 - bool ret;
1545 - u32 mask = 1 << (seg * SEG_FIELD_NR + field);
1546 -
1547 - if (!kvm_register_is_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS)) {
1548 - kvm_register_mark_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS);
1549 - vmx->segment_cache.bitmask = 0;
1550 - }
1551 - ret = vmx->segment_cache.bitmask & mask;
1552 - vmx->segment_cache.bitmask |= mask;
1553 - return ret;
1554 -}
1555 -
1556 -static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
1557 -{
1558 - u16 *p = &vmx->segment_cache.seg[seg].selector;
1559 -
1560 - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
1561 - *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
1562 - return *p;
1563 -}
1564 -
1565 -static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
1566 -{
1567 - ulong *p = &vmx->segment_cache.seg[seg].base;
1568 -
1569 - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
1570 - *p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
1571 - return *p;
1572 -}
1573 -
1574 -static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
1575 -{
1576 - u32 *p = &vmx->segment_cache.seg[seg].limit;
1577 -
1578 - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
1579 - *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
1580 - return *p;
1581 -}
1582 -
1583 -static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
1584 -{
1585 - u32 *p = &vmx->segment_cache.seg[seg].ar;
1586 -
1587 - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
1588 - *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
1589 - return *p;
1590 -}
1591 -
1592 -void update_exception_bitmap(struct kvm_vcpu *vcpu)
1593 -{
1594 - u32 eb;
1595 -
1596 - eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
1597 - (1u << DB_VECTOR) | (1u << AC_VECTOR);
1598 - /*
1599 - * Guest access to VMware backdoor ports could legitimately
1600 - * trigger #GP because of TSS I/O permission bitmap.
1601 - * We intercept those #GP and allow access to them anyway
1602 - * as VMware does.
1603 - */
1604 - if (enable_vmware_backdoor)
1605 - eb |= (1u << GP_VECTOR);
1606 - if ((vcpu->guest_debug &
1607 - (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
1608 - (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
1609 - eb |= 1u << BP_VECTOR;
1610 - if (to_vmx(vcpu)->rmode.vm86_active)
1611 - eb = ~0;
1612 - if (enable_ept)
1613 - eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
1614 -
1615 - /* When we are running a nested L2 guest and L1 specified for it a
1616 - * certain exception bitmap, we must trap the same exceptions and pass
1617 - * them to L1. When running L2, we will only handle the exceptions
1618 - * specified above if L1 did not want them.
1619 - */
1620 - if (is_guest_mode(vcpu))
1621 - eb |= get_vmcs12(vcpu)->exception_bitmap;
1622 -
1623 - vmcs_write32(EXCEPTION_BITMAP, eb);
1624 -}
1625 -
1626 -/*
1627 - * Check if MSR is intercepted for currently loaded MSR bitmap.
1628 - */
1629 -static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
1630 -{
1631 - unsigned long *msr_bitmap;
1632 - int f = sizeof(unsigned long);
1633 -
1634 - if (!cpu_has_vmx_msr_bitmap())
1635 - return true;
1636 -
1637 - msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
1638 -
1639 - if (msr <= 0x1fff) {
1640 - return !!test_bit(msr, msr_bitmap + 0x800 / f);
1641 - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
1642 - msr &= 0x1fff;
1643 - return !!test_bit(msr, msr_bitmap + 0xc00 / f);
1644 - }
1645 -
1646 - return true;
1647 -}
1648 -
1649 -static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
1650 - unsigned long entry, unsigned long exit)
1651 -{
1652 - vm_entry_controls_clearbit(vmx, entry);
1653 - vm_exit_controls_clearbit(vmx, exit);
1654 -}
1655 -
1656 -int vmx_find_msr_index(struct vmx_msrs *m, u32 msr)
1657 -{
1658 - unsigned int i;
1659 -
1660 - for (i = 0; i < m->nr; ++i) {
1661 - if (m->val[i].index == msr)
1662 - return i;
1663 - }
1664 - return -ENOENT;
1665 -}
1666 -
1667 -static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
1668 -{
1669 - int i;
1670 - struct msr_autoload *m = &vmx->msr_autoload;
1671 -
1672 - switch (msr) {
1673 - case MSR_EFER:
1674 - if (cpu_has_load_ia32_efer()) {
1675 - clear_atomic_switch_msr_special(vmx,
1676 - VM_ENTRY_LOAD_IA32_EFER,
1677 - VM_EXIT_LOAD_IA32_EFER);
1678 - return;
1679 - }
1680 - break;
1681 - case MSR_CORE_PERF_GLOBAL_CTRL:
1682 - if (cpu_has_load_perf_global_ctrl()) {
1683 - clear_atomic_switch_msr_special(vmx,
1684 - VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1685 - VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
1686 - return;
1687 - }
1688 - break;
1689 - }
1690 - i = vmx_find_msr_index(&m->guest, msr);
1691 - if (i < 0)
1692 - goto skip_guest;
1693 - --m->guest.nr;
1694 - m->guest.val[i] = m->guest.val[m->guest.nr];
1695 - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
1696 -
1697 -skip_guest:
1698 - i = vmx_find_msr_index(&m->host, msr);
1699 - if (i < 0)
1700 - return;
1701 -
1702 - --m->host.nr;
1703 - m->host.val[i] = m->host.val[m->host.nr];
1704 - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
1705 -}
1706 -
1707 -static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
1708 - unsigned long entry, unsigned long exit,
1709 - unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
1710 - u64 guest_val, u64 host_val)
1711 -{
1712 - vmcs_write64(guest_val_vmcs, guest_val);
1713 - if (host_val_vmcs != HOST_IA32_EFER)
1714 - vmcs_write64(host_val_vmcs, host_val);
1715 - vm_entry_controls_setbit(vmx, entry);
1716 - vm_exit_controls_setbit(vmx, exit);
1717 -}
1718 -
1719 -static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
1720 - u64 guest_val, u64 host_val, bool entry_only)
1721 -{
1722 - int i, j = 0;
1723 - struct msr_autoload *m = &vmx->msr_autoload;
1724 -
1725 - switch (msr) {
1726 - case MSR_EFER:
1727 - if (cpu_has_load_ia32_efer()) {
1728 - add_atomic_switch_msr_special(vmx,
1729 - VM_ENTRY_LOAD_IA32_EFER,
1730 - VM_EXIT_LOAD_IA32_EFER,
1731 - GUEST_IA32_EFER,
1732 - HOST_IA32_EFER,
1733 - guest_val, host_val);
1734 - return;
1735 - }
1736 - break;
1737 - case MSR_CORE_PERF_GLOBAL_CTRL:
1738 - if (cpu_has_load_perf_global_ctrl()) {
1739 - add_atomic_switch_msr_special(vmx,
1740 - VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1741 - VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
1742 - GUEST_IA32_PERF_GLOBAL_CTRL,
1743 - HOST_IA32_PERF_GLOBAL_CTRL,
1744 - guest_val, host_val);
1745 - return;
1746 - }
1747 - break;
1748 - case MSR_IA32_PEBS_ENABLE:
1749 - /* PEBS needs a quiescent period after being disabled (to write
1750 - * a record). Disabling PEBS through VMX MSR swapping doesn't
1751 - * provide that period, so a CPU could write host's record into
1752 - * guest's memory.
1753 - */
1754 - wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
1755 - }
1756 -
1757 - i = vmx_find_msr_index(&m->guest, msr);
1758 - if (!entry_only)
1759 - j = vmx_find_msr_index(&m->host, msr);
1760 -
1761 - if ((i < 0 && m->guest.nr == NR_LOADSTORE_MSRS) ||
1762 - (j < 0 && m->host.nr == NR_LOADSTORE_MSRS)) {
1763 - printk_once(KERN_WARNING "Not enough msr switch entries. "
1764 - "Can't add msr %x\n", msr);
1765 - return;
1766 - }
1767 - if (i < 0) {
1768 - i = m->guest.nr++;
1769 - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
1770 - }
1771 - m->guest.val[i].index = msr;
1772 - m->guest.val[i].value = guest_val;
1773 -
1774 - if (entry_only)
1775 - return;
1776 -
1777 - if (j < 0) {
1778 - j = m->host.nr++;
1779 - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
1780 - }
1781 - m->host.val[j].index = msr;
1782 - m->host.val[j].value = host_val;
1783 -}
1784 -
1785 -static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
1786 -{
1787 - u64 guest_efer = vmx->vcpu.arch.efer;
1788 - u64 ignore_bits = 0;
1789 -
1790 - /* Shadow paging assumes NX to be available. */
1791 - if (!enable_ept)
1792 - guest_efer |= EFER_NX;
1793 -
1794 - /*
1795 - * LMA and LME handled by hardware; SCE meaningless outside long mode.
1796 - */
1797 - ignore_bits |= EFER_SCE;
1798 -#ifdef CONFIG_X86_64
1799 - ignore_bits |= EFER_LMA | EFER_LME;
1800 - /* SCE is meaningful only in long mode on Intel */
1801 - if (guest_efer & EFER_LMA)
1802 - ignore_bits &= ~(u64)EFER_SCE;
1803 -#endif
1804 -
1805 - /*
1806 - * On EPT, we can't emulate NX, so we must switch EFER atomically.
1807 - * On CPUs that support "load IA32_EFER", always switch EFER
1808 - * atomically, since it's faster than switching it manually.
1809 - */
1810 - if (cpu_has_load_ia32_efer() ||
1811 - (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
1812 - if (!(guest_efer & EFER_LMA))
1813 - guest_efer &= ~EFER_LME;
1814 - if (guest_efer != host_efer)
1815 - add_atomic_switch_msr(vmx, MSR_EFER,
1816 - guest_efer, host_efer, false);
1817 - else
1818 - clear_atomic_switch_msr(vmx, MSR_EFER);
1819 - return false;
1820 - } else {
1821 - clear_atomic_switch_msr(vmx, MSR_EFER);
1822 -
1823 - guest_efer &= ~ignore_bits;
1824 - guest_efer |= host_efer & ignore_bits;
1825 -
1826 - vmx->guest_msrs[efer_offset].data = guest_efer;
1827 - vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
1828 -
1829 - return true;
1830 - }
1831 -}
1832 -
1833 -#ifdef CONFIG_X86_32
1834 -/*
1835 - * On 32-bit kernels, VM exits still load the FS and GS bases from the
1836 - * VMCS rather than the segment table. KVM uses this helper to figure
1837 - * out the current bases to poke them into the VMCS before entry.
1838 - */
1839 -static unsigned long segment_base(u16 selector)
1840 -{
1841 - struct desc_struct *table;
1842 - unsigned long v;
1843 -
1844 - if (!(selector & ~SEGMENT_RPL_MASK))
1845 - return 0;
1846 -
1847 - table = get_current_gdt_ro();
1848 -
1849 - if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
1850 - u16 ldt_selector = kvm_read_ldt();
1851 -
1852 - if (!(ldt_selector & ~SEGMENT_RPL_MASK))
1853 - return 0;
1854 -
1855 - table = (struct desc_struct *)segment_base(ldt_selector);
1856 - }
1857 - v = get_desc_base(&table[selector >> 3]);
1858 - return v;
1859 -}
1860 -#endif
1861 -
1862 -static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range)
1863 -{
1864 - u32 i;
1865 -
1866 - wrmsrl(MSR_IA32_RTIT_STATUS, ctx->status);
1867 - wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
1868 - wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
1869 - wrmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
1870 - for (i = 0; i < addr_range; i++) {
1871 - wrmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
1872 - wrmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
1873 - }
1874 -}
1875 -
1876 -static inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range)
1877 -{
1878 - u32 i;
1879 -
1880 - rdmsrl(MSR_IA32_RTIT_STATUS, ctx->status);
1881 - rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
1882 - rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
1883 - rdmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
1884 - for (i = 0; i < addr_range; i++) {
1885 - rdmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
1886 - rdmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
1887 - }
1888 -}
1889 -
1890 -static void pt_guest_enter(struct vcpu_vmx *vmx)
1891 -{
1892 - if (pt_mode == PT_MODE_SYSTEM)
1893 - return;
1894 -
1895 - /*
1896 - * GUEST_IA32_RTIT_CTL is already set in the VMCS.
1897 - * Save host state before VM entry.
1898 - */
1899 - rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
1900 - if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
1901 - wrmsrl(MSR_IA32_RTIT_CTL, 0);
1902 - pt_save_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range);
1903 - pt_load_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range);
1904 - }
1905 -}
1906 -
1907 -static void pt_guest_exit(struct vcpu_vmx *vmx)
1908 -{
1909 - if (pt_mode == PT_MODE_SYSTEM)
1910 - return;
1911 -
1912 - if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
1913 - pt_save_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range);
1914 - pt_load_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range);
1915 - }
1916 -
1917 - /* Reload host state (IA32_RTIT_CTL will be cleared on VM exit). */
1918 - wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
1919 -}
1920 -
1921 -void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
1922 - unsigned long fs_base, unsigned long gs_base)
1923 -{
1924 - if (unlikely(fs_sel != host->fs_sel)) {
1925 - if (!(fs_sel & 7))
1926 - vmcs_write16(HOST_FS_SELECTOR, fs_sel);
1927 - else
1928 - vmcs_write16(HOST_FS_SELECTOR, 0);
1929 - host->fs_sel = fs_sel;
1930 - }
1931 - if (unlikely(gs_sel != host->gs_sel)) {
1932 - if (!(gs_sel & 7))
1933 - vmcs_write16(HOST_GS_SELECTOR, gs_sel);
1934 - else
1935 - vmcs_write16(HOST_GS_SELECTOR, 0);
1936 - host->gs_sel = gs_sel;
1937 - }
1938 - if (unlikely(fs_base != host->fs_base)) {
1939 - vmcs_writel(HOST_FS_BASE, fs_base);
1940 - host->fs_base = fs_base;
1941 - }
1942 - if (unlikely(gs_base != host->gs_base)) {
1943 - vmcs_writel(HOST_GS_BASE, gs_base);
1944 - host->gs_base = gs_base;
1945 - }
1946 -}
1947 -
1948 -void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
1949 -{
1950 - struct vcpu_vmx *vmx = to_vmx(vcpu);
1951 - struct vmcs_host_state *host_state;
1952 -#ifdef CONFIG_X86_64
1953 - int cpu = raw_smp_processor_id();
1954 -#endif
1955 - unsigned long fs_base, gs_base;
1956 - u16 fs_sel, gs_sel;
1957 - int i;
1958 -
1959 - vmx->req_immediate_exit = false;
1960 -
1961 - /*
1962 - * Note that guest MSRs to be saved/restored can also be changed
1963 - * when guest state is loaded. This happens when guest transitions
1964 - * to/from long-mode by setting MSR_EFER.LMA.
1965 - */
1966 - if (!vmx->guest_msrs_ready) {
1967 - vmx->guest_msrs_ready = true;
1968 - for (i = 0; i < vmx->save_nmsrs; ++i)
1969 - kvm_set_shared_msr(vmx->guest_msrs[i].index,
1970 - vmx->guest_msrs[i].data,
1971 - vmx->guest_msrs[i].mask);
1972 -
1973 - }
1974 - if (vmx->guest_state_loaded)
1975 - return;
1976 -
1977 - host_state = &vmx->loaded_vmcs->host_state;
1978 -
1979 - /*
1980 - * Set host fs and gs selectors. Unfortunately, 22.2.3 does not
1981 - * allow segment selectors with cpl > 0 or ti == 1.
1982 - */
1983 - host_state->ldt_sel = kvm_read_ldt();
1984 -
1985 -#ifdef CONFIG_X86_64
1986 - savesegment(ds, host_state->ds_sel);
1987 - savesegment(es, host_state->es_sel);
1988 -
1989 - gs_base = cpu_kernelmode_gs_base(cpu);
1990 - if (likely(is_64bit_mm(current->mm))) {
1991 - save_fsgs_for_kvm();
1992 - fs_sel = current->thread.fsindex;
1993 - gs_sel = current->thread.gsindex;
1994 - fs_base = current->thread.fsbase;
1995 - vmx->msr_host_kernel_gs_base = current->thread.gsbase;
1996 - } else {
1997 - savesegment(fs, fs_sel);
1998 - savesegment(gs, gs_sel);
1999 - fs_base = read_msr(MSR_FS_BASE);
2000 - vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
2001 - }
2002 -
2003 - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
2004 -#else
2005 - savesegment(fs, fs_sel);
2006 - savesegment(gs, gs_sel);
2007 - fs_base = segment_base(fs_sel);
2008 - gs_base = segment_base(gs_sel);
2009 -#endif
2010 -
2011 - vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
2012 - vmx->guest_state_loaded = true;
2013 -}
2014 -
2015 -static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
2016 -{
2017 - struct vmcs_host_state *host_state;
2018 -
2019 - if (!vmx->guest_state_loaded)
2020 - return;
2021 -
2022 - host_state = &vmx->loaded_vmcs->host_state;
2023 -
2024 - ++vmx->vcpu.stat.host_state_reload;
2025 -
2026 -#ifdef CONFIG_X86_64
2027 - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
2028 -#endif
2029 - if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
2030 - kvm_load_ldt(host_state->ldt_sel);
2031 -#ifdef CONFIG_X86_64
2032 - load_gs_index(host_state->gs_sel);
2033 -#else
2034 - loadsegment(gs, host_state->gs_sel);
2035 -#endif
2036 - }
2037 - if (host_state->fs_sel & 7)
2038 - loadsegment(fs, host_state->fs_sel);
2039 -#ifdef CONFIG_X86_64
2040 - if (unlikely(host_state->ds_sel | host_state->es_sel)) {
2041 - loadsegment(ds, host_state->ds_sel);
2042 - loadsegment(es, host_state->es_sel);
2043 - }
2044 -#endif
2045 - invalidate_tss_limit();
2046 -#ifdef CONFIG_X86_64
2047 - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
2048 -#endif
2049 - load_fixmap_gdt(raw_smp_processor_id());
2050 - vmx->guest_state_loaded = false;
2051 - vmx->guest_msrs_ready = false;
2052 -}
2053 -
2054 -#ifdef CONFIG_X86_64
2055 -static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
2056 -{
2057 - preempt_disable();
2058 - if (vmx->guest_state_loaded)
2059 - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
2060 - preempt_enable();
2061 - return vmx->msr_guest_kernel_gs_base;
2062 -}
2063 -
2064 -static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
2065 -{
2066 - preempt_disable();
2067 - if (vmx->guest_state_loaded)
2068 - wrmsrl(MSR_KERNEL_GS_BASE, data);
2069 - preempt_enable();
2070 - vmx->msr_guest_kernel_gs_base = data;
2071 -}
2072 -#endif
2073 -
2074 -static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
2075 -{
2076 - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
2077 - struct pi_desc old, new;
2078 - unsigned int dest;
2079 -
2080 - /*
2081 - * In case of hot-plug or hot-unplug, we may have to undo
2082 - * vmx_vcpu_pi_put even if there is no assigned device. And we
2083 - * always keep PI.NDST up to date for simplicity: it makes the
2084 - * code easier, and CPU migration is not a fast path.
2085 - */
2086 - if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
2087 - return;
2088 -
2089 - /*
2090 - * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
2091 - * PI.NDST: pi_post_block is the one expected to change PID.NDST and the
2092 - * wakeup handler expects the vCPU to be on the blocked_vcpu_list that
2093 - * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
2094 - * correctly.
2095 - */
2096 - if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
2097 - pi_clear_sn(pi_desc);
2098 - goto after_clear_sn;
2099 - }
2100 -
2101 - /* The full case. */
2102 - do {
2103 - old.control = new.control = pi_desc->control;
2104 -
2105 - dest = cpu_physical_id(cpu);
2106 -
2107 - if (x2apic_enabled())
2108 - new.ndst = dest;
2109 - else
2110 - new.ndst = (dest << 8) & 0xFF00;
2111 -
2112 - new.sn = 0;
2113 - } while (cmpxchg64(&pi_desc->control, old.control,
2114 - new.control) != old.control);
2115 -
2116 -after_clear_sn:
2117 -
2118 - /*
2119 - * Clear SN before reading the bitmap. The VT-d firmware
2120 - * writes the bitmap and reads SN atomically (5.2.3 in the
2121 - * spec), so it doesn't really have a memory barrier that
2122 - * pairs with this, but we cannot do that and we need one.
2123 - */
2124 - smp_mb__after_atomic();
2125 -
2126 - if (!pi_is_pir_empty(pi_desc))
2127 - pi_set_on(pi_desc);
2128 -}
2129 -
2130 -void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
2131 -{
2132 - struct vcpu_vmx *vmx = to_vmx(vcpu);
2133 - bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
2134 -
2135 - if (!already_loaded) {
2136 - loaded_vmcs_clear(vmx->loaded_vmcs);
2137 - local_irq_disable();
2138 - crash_disable_local_vmclear(cpu);
2139 -
2140 - /*
2141 - * Read loaded_vmcs->cpu should be before fetching
2142 - * loaded_vmcs->loaded_vmcss_on_cpu_link.
2143 - * See the comments in __loaded_vmcs_clear().
2144 - */
2145 - smp_rmb();
2146 -
2147 - list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
2148 - &per_cpu(loaded_vmcss_on_cpu, cpu));
2149 - crash_enable_local_vmclear(cpu);
2150 - local_irq_enable();
2151 - }
2152 -
2153 - if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
2154 - per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
2155 - vmcs_load(vmx->loaded_vmcs->vmcs);
2156 - indirect_branch_prediction_barrier();
2157 - }
2158 -
2159 - if (!already_loaded) {
2160 - void *gdt = get_current_gdt_ro();
2161 - unsigned long sysenter_esp;
2162 -
2163 - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2164 -
2165 - /*
2166 - * Linux uses per-cpu TSS and GDT, so set these when switching
2167 - * processors. See 22.2.4.
2168 - */
2169 - vmcs_writel(HOST_TR_BASE,
2170 - (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
2171 - vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
2172 -
2173 - rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
2174 - vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
2175 -
2176 - vmx->loaded_vmcs->cpu = cpu;
2177 - }
2178 -
2179 - /* Setup TSC multiplier */
2180 - if (kvm_has_tsc_control &&
2181 - vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
2182 - decache_tsc_multiplier(vmx);
2183 -}
2184 -
2185 -/*
2186 - * Switches to specified vcpu, until a matching vcpu_put(), but assumes
2187 - * vcpu mutex is already taken.
2188 - */
2189 -void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2190 -{
2191 - struct vcpu_vmx *vmx = to_vmx(vcpu);
2192 -
2193 - vmx_vcpu_load_vmcs(vcpu, cpu);
2194 -
2195 - vmx_vcpu_pi_load(vcpu, cpu);
2196 -
2197 - vmx->host_pkru = read_pkru();
2198 - vmx->host_debugctlmsr = get_debugctlmsr();
2199 -}
2200 -
2201 -static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
2202 -{
2203 - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
2204 -
2205 - if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
2206 - !irq_remapping_cap(IRQ_POSTING_CAP) ||
2207 - !kvm_vcpu_apicv_active(vcpu))
2208 - return;
2209 -
2210 - /* Set SN when the vCPU is preempted */
2211 - if (vcpu->preempted)
2212 - pi_set_sn(pi_desc);
2213 -}
2214 -
2215 -static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
2216 -{
2217 - vmx_vcpu_pi_put(vcpu);
2218 -
2219 - vmx_prepare_switch_to_host(to_vmx(vcpu));
2220 -}
2221 -
2222 -static bool emulation_required(struct kvm_vcpu *vcpu)
2223 -{
2224 - return emulate_invalid_guest_state && !guest_state_valid(vcpu);
2225 -}
2226 -
2227 -static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
2228 -
2229 -unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
2230 -{
2231 - struct vcpu_vmx *vmx = to_vmx(vcpu);
2232 - unsigned long rflags, save_rflags;
2233 -
2234 - if (!kvm_register_is_available(vcpu, VCPU_EXREG_RFLAGS)) {
2235 - kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
2236 - rflags = vmcs_readl(GUEST_RFLAGS);
2237 - if (vmx->rmode.vm86_active) {
2238 - rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
2239 - save_rflags = vmx->rmode.save_rflags;
2240 - rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
2241 - }
2242 - vmx->rflags = rflags;
2243 - }
2244 - return vmx->rflags;
2245 -}
2246 -
2247 -void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
2248 -{
2249 - struct vcpu_vmx *vmx = to_vmx(vcpu);
2250 - unsigned long old_rflags;
2251 -
2252 - if (enable_unrestricted_guest) {
2253 - kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
2254 - vmx->rflags = rflags;
2255 - vmcs_writel(GUEST_RFLAGS, rflags);
2256 - return;
2257 - }
2258 -
2259 - old_rflags = vmx_get_rflags(vcpu);
2260 - vmx->rflags = rflags;
2261 - if (vmx->rmode.vm86_active) {
2262 - vmx->rmode.save_rflags = rflags;
2263 - rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
2264 - }
2265 - vmcs_writel(GUEST_RFLAGS, rflags);
2266 -
2267 - if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM)
2268 - vmx->emulation_required = emulation_required(vcpu);
2269 -}
2270 -
2271 -u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
2272 -{
2273 - u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
2274 - int ret = 0;
2275 -
2276 - if (interruptibility & GUEST_INTR_STATE_STI)
2277 - ret |= KVM_X86_SHADOW_INT_STI;
2278 - if (interruptibility & GUEST_INTR_STATE_MOV_SS)
2279 - ret |= KVM_X86_SHADOW_INT_MOV_SS;
2280 -
2281 - return ret;
2282 -}
2283 -
2284 -void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
2285 -{
2286 - u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
2287 - u32 interruptibility = interruptibility_old;
2288 -
2289 - interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
2290 -
2291 - if (mask & KVM_X86_SHADOW_INT_MOV_SS)
2292 - interruptibility |= GUEST_INTR_STATE_MOV_SS;
2293 - else if (mask & KVM_X86_SHADOW_INT_STI)
2294 - interruptibility |= GUEST_INTR_STATE_STI;
2295 -
2296 - if ((interruptibility != interruptibility_old))
2297 - vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
2298 -}
2299 -
2300 -static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
2301 -{
2302 - struct vcpu_vmx *vmx = to_vmx(vcpu);
2303 - unsigned long value;
2304 -
2305 - /*
2306 - * Any MSR write that attempts to change bits marked reserved will
2307 - * case a #GP fault.
2308 - */
2309 - if (data & vmx->pt_desc.ctl_bitmask)
2310 - return 1;
2311 -
2312 - /*
2313 - * Any attempt to modify IA32_RTIT_CTL while TraceEn is set will
2314 - * result in a #GP unless the same write also clears TraceEn.
2315 - */
2316 - if ((vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) &&
2317 - ((vmx->pt_desc.guest.ctl ^ data) & ~RTIT_CTL_TRACEEN))
2318 - return 1;
2319 -
2320 - /*
2321 - * WRMSR to IA32_RTIT_CTL that sets TraceEn but clears this bit
2322 - * and FabricEn would cause #GP, if
2323 - * CPUID.(EAX=14H, ECX=0):ECX.SNGLRGNOUT[bit 2] = 0
2324 - */
2325 - if ((data & RTIT_CTL_TRACEEN) && !(data & RTIT_CTL_TOPA) &&
2326 - !(data & RTIT_CTL_FABRIC_EN) &&
2327 - !intel_pt_validate_cap(vmx->pt_desc.caps,
2328 - PT_CAP_single_range_output))
2329 - return 1;
2330 -
2331 - /*
2332 - * MTCFreq, CycThresh and PSBFreq encodings check, any MSR write that
2333 - * utilize encodings marked reserved will casue a #GP fault.
2334 - */
2335 - value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc_periods);
2336 - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc) &&
2337 - !test_bit((data & RTIT_CTL_MTC_RANGE) >>
2338 - RTIT_CTL_MTC_RANGE_OFFSET, &value))
2339 - return 1;
2340 - value = intel_pt_validate_cap(vmx->pt_desc.caps,
2341 - PT_CAP_cycle_thresholds);
2342 - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
2343 - !test_bit((data & RTIT_CTL_CYC_THRESH) >>
2344 - RTIT_CTL_CYC_THRESH_OFFSET, &value))
2345 - return 1;
2346 - value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_periods);
2347 - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
2348 - !test_bit((data & RTIT_CTL_PSB_FREQ) >>
2349 - RTIT_CTL_PSB_FREQ_OFFSET, &value))
2350 - return 1;
2351 -
2352 - /*
2353 - * If ADDRx_CFG is reserved or the encodings is >2 will
2354 - * cause a #GP fault.
2355 - */
2356 - value = (data & RTIT_CTL_ADDR0) >> RTIT_CTL_ADDR0_OFFSET;
2357 - if ((value && (vmx->pt_desc.addr_range < 1)) || (value > 2))
2358 - return 1;
2359 - value = (data & RTIT_CTL_ADDR1) >> RTIT_CTL_ADDR1_OFFSET;
2360 - if ((value && (vmx->pt_desc.addr_range < 2)) || (value > 2))
2361 - return 1;
2362 - value = (data & RTIT_CTL_ADDR2) >> RTIT_CTL_ADDR2_OFFSET;
2363 - if ((value && (vmx->pt_desc.addr_range < 3)) || (value > 2))
2364 - return 1;
2365 - value = (data & RTIT_CTL_ADDR3) >> RTIT_CTL_ADDR3_OFFSET;
2366 - if ((value && (vmx->pt_desc.addr_range < 4)) || (value > 2))
2367 - return 1;
2368 -
2369 - return 0;
2370 -}
2371 -
2372 -static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
2373 -{
2374 - unsigned long rip;
2375 -
2376 - /*
2377 - * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on
2378 - * undefined behavior: Intel's SDM doesn't mandate the VMCS field be
2379 - * set when EPT misconfig occurs. In practice, real hardware updates
2380 - * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors
2381 - * (namely Hyper-V) don't set it due to it being undefined behavior,
2382 - * i.e. we end up advancing IP with some random value.
2383 - */
2384 - if (!static_cpu_has(X86_FEATURE_HYPERVISOR) ||
2385 - to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) {
2386 - rip = kvm_rip_read(vcpu);
2387 - rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
2388 - kvm_rip_write(vcpu, rip);
2389 - } else {
2390 - if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
2391 - return 0;
2392 - }
2393 -
2394 - /* skipping an emulated instruction also counts */
2395 - vmx_set_interrupt_shadow(vcpu, 0);
2396 -
2397 - return 1;
2398 -}
2399 -
2400 -static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
2401 -{
2402 - /*
2403 - * Ensure that we clear the HLT state in the VMCS. We don't need to
2404 - * explicitly skip the instruction because if the HLT state is set,
2405 - * then the instruction is already executing and RIP has already been
2406 - * advanced.
2407 - */
2408 - if (kvm_hlt_in_guest(vcpu->kvm) &&
2409 - vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
2410 - vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
2411 -}
2412 -
2413 -static void vmx_queue_exception(struct kvm_vcpu *vcpu)
2414 -{
2415 - struct vcpu_vmx *vmx = to_vmx(vcpu);
2416 - unsigned nr = vcpu->arch.exception.nr;
2417 - bool has_error_code = vcpu->arch.exception.has_error_code;
2418 - u32 error_code = vcpu->arch.exception.error_code;
2419 - u32 intr_info = nr | INTR_INFO_VALID_MASK;
2420 -
2421 - kvm_deliver_exception_payload(vcpu);
2422 -
2423 - if (has_error_code) {
2424 - vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
2425 - intr_info |= INTR_INFO_DELIVER_CODE_MASK;
2426 - }
2427 -
2428 - if (vmx->rmode.vm86_active) {
2429 - int inc_eip = 0;
2430 - if (kvm_exception_is_soft(nr))
2431 - inc_eip = vcpu->arch.event_exit_inst_len;
2432 - kvm_inject_realmode_interrupt(vcpu, nr, inc_eip);
2433 - return;
2434 - }
2435 -
2436 - WARN_ON_ONCE(vmx->emulation_required);
2437 -
2438 - if (kvm_exception_is_soft(nr)) {
2439 - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
2440 - vmx->vcpu.arch.event_exit_inst_len);
2441 - intr_info |= INTR_TYPE_SOFT_EXCEPTION;
2442 - } else
2443 - intr_info |= INTR_TYPE_HARD_EXCEPTION;
2444 -
2445 - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
2446 -
2447 - vmx_clear_hlt(vcpu);
2448 -}
2449 -
2450 -static bool vmx_rdtscp_supported(void)
2451 -{
2452 - return cpu_has_vmx_rdtscp();
2453 -}
2454 -
2455 -static bool vmx_invpcid_supported(void)
2456 -{
2457 - return cpu_has_vmx_invpcid();
2458 -}
2459 -
2460 -/*
2461 - * Swap MSR entry in host/guest MSR entry array.
2462 - */
2463 -static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
2464 -{
2465 - struct shared_msr_entry tmp;
2466 -
2467 - tmp = vmx->guest_msrs[to];
2468 - vmx->guest_msrs[to] = vmx->guest_msrs[from];
2469 - vmx->guest_msrs[from] = tmp;
2470 -}
2471 -
2472 -/*
2473 - * Set up the vmcs to automatically save and restore system
2474 - * msrs. Don't touch the 64-bit msrs if the guest is in legacy
2475 - * mode, as fiddling with msrs is very expensive.
2476 - */
2477 -static void setup_msrs(struct vcpu_vmx *vmx)
2478 -{
2479 - int save_nmsrs, index;
2480 -
2481 - save_nmsrs = 0;
2482 -#ifdef CONFIG_X86_64
2483 - /*
2484 - * The SYSCALL MSRs are only needed on long mode guests, and only
2485 - * when EFER.SCE is set.
2486 - */
2487 - if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) {
2488 - index = __find_msr_index(vmx, MSR_STAR);
2489 - if (index >= 0)
2490 - move_msr_up(vmx, index, save_nmsrs++);
2491 - index = __find_msr_index(vmx, MSR_LSTAR);
2492 - if (index >= 0)
2493 - move_msr_up(vmx, index, save_nmsrs++);
2494 - index = __find_msr_index(vmx, MSR_SYSCALL_MASK);
2495 - if (index >= 0)
2496 - move_msr_up(vmx, index, save_nmsrs++);
2497 - }
2498 -#endif
2499 - index = __find_msr_index(vmx, MSR_EFER);
2500 - if (index >= 0 && update_transition_efer(vmx, index))
2501 - move_msr_up(vmx, index, save_nmsrs++);
2502 - index = __find_msr_index(vmx, MSR_TSC_AUX);
2503 - if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
2504 - move_msr_up(vmx, index, save_nmsrs++);
2505 - index = __find_msr_index(vmx, MSR_IA32_TSX_CTRL);
2506 - if (index >= 0)
2507 - move_msr_up(vmx, index, save_nmsrs++);
2508 -
2509 - vmx->save_nmsrs = save_nmsrs;
2510 - vmx->guest_msrs_ready = false;
2511 -
2512 - if (cpu_has_vmx_msr_bitmap())
2513 - vmx_update_msr_bitmap(&vmx->vcpu);
2514 -}
2515 -
2516 -static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
2517 -{
2518 - struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2519 -
2520 - if (is_guest_mode(vcpu) &&
2521 - (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING))
2522 - return vcpu->arch.tsc_offset - vmcs12->tsc_offset;
2523 -
2524 - return vcpu->arch.tsc_offset;
2525 -}
2526 -
2527 -static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
2528 -{
2529 - struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2530 - u64 g_tsc_offset = 0;
2531 -
2532 - /*
2533 - * We're here if L1 chose not to trap WRMSR to TSC. According
2534 - * to the spec, this should set L1's TSC; The offset that L1
2535 - * set for L2 remains unchanged, and still needs to be added
2536 - * to the newly set TSC to get L2's TSC.
2537 - */
2538 - if (is_guest_mode(vcpu) &&
2539 - (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING))
2540 - g_tsc_offset = vmcs12->tsc_offset;
2541 -
2542 - trace_kvm_write_tsc_offset(vcpu->vcpu_id,
2543 - vcpu->arch.tsc_offset - g_tsc_offset,
2544 - offset);
2545 - vmcs_write64(TSC_OFFSET, offset + g_tsc_offset);
2546 - return offset + g_tsc_offset;
2547 -}
2548 -
2549 -/*
2550 - * nested_vmx_allowed() checks whether a guest should be allowed to use VMX
2551 - * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for
2552 - * all guests if the "nested" module option is off, and can also be disabled
2553 - * for a single guest by disabling its VMX cpuid bit.
2554 - */
2555 -bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
2556 -{
2557 - return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
2558 -}
2559 -
2560 -static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
2561 - uint64_t val)
2562 -{
2563 - uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits;
2564 -
2565 - return !(val & ~valid_bits);
2566 -}
2567 -
2568 -static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
2569 -{
2570 - switch (msr->index) {
2571 - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2572 - if (!nested)
2573 - return 1;
2574 - return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
2575 - default:
2576 - return 1;
2577 - }
2578 -}
2579 -
2580 -/*
2581 - * Reads an msr value (of 'msr_index') into 'pdata'.
2582 - * Returns 0 on success, non-0 otherwise.
2583 - * Assumes vcpu_load() was already called.
2584 - */
2585 -static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2586 -{
2587 - struct vcpu_vmx *vmx = to_vmx(vcpu);
2588 - struct shared_msr_entry *msr;
2589 - u32 index;
2590 -
2591 - switch (msr_info->index) {
2592 -#ifdef CONFIG_X86_64
2593 - case MSR_FS_BASE:
2594 - msr_info->data = vmcs_readl(GUEST_FS_BASE);
2595 - break;
2596 - case MSR_GS_BASE:
2597 - msr_info->data = vmcs_readl(GUEST_GS_BASE);
2598 - break;
2599 - case MSR_KERNEL_GS_BASE:
2600 - msr_info->data = vmx_read_guest_kernel_gs_base(vmx);
2601 - break;
2602 -#endif
2603 - case MSR_EFER:
2604 - return kvm_get_msr_common(vcpu, msr_info);
2605 - case MSR_IA32_TSX_CTRL:
2606 - if (!msr_info->host_initiated &&
2607 - !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
2608 - return 1;
2609 - goto find_shared_msr;
2610 - case MSR_IA32_UMWAIT_CONTROL:
2611 - if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
2612 - return 1;
2613 -
2614 - msr_info->data = vmx->msr_ia32_umwait_control;
2615 - break;
2616 - case MSR_IA32_SPEC_CTRL:
2617 - if (!msr_info->host_initiated &&
2618 - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
2619 - return 1;
2620 -
2621 - msr_info->data = to_vmx(vcpu)->spec_ctrl;
2622 - break;
2623 - case MSR_IA32_SYSENTER_CS:
2624 - msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
2625 - break;
2626 - case MSR_IA32_SYSENTER_EIP:
2627 - msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP);
2628 - break;
2629 - case MSR_IA32_SYSENTER_ESP:
2630 - msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
2631 - break;
2632 - case MSR_IA32_BNDCFGS:
2633 - if (!kvm_mpx_supported() ||
2634 - (!msr_info->host_initiated &&
2635 - !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
2636 - return 1;
2637 - msr_info->data = vmcs_read64(GUEST_BNDCFGS);
2638 - break;
2639 - case MSR_IA32_MCG_EXT_CTL:
2640 - if (!msr_info->host_initiated &&
2641 - !(vmx->msr_ia32_feature_control &
2642 - FEATURE_CONTROL_LMCE))
2643 - return 1;
2644 - msr_info->data = vcpu->arch.mcg_ext_ctl;
2645 - break;
2646 - case MSR_IA32_FEATURE_CONTROL:
2647 - msr_info->data = vmx->msr_ia32_feature_control;
2648 - break;
2649 - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2650 - if (!nested_vmx_allowed(vcpu))
2651 - return 1;
2652 - return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
2653 - &msr_info->data);
2654 - case MSR_IA32_RTIT_CTL:
2655 - if (pt_mode != PT_MODE_HOST_GUEST)
2656 - return 1;
2657 - msr_info->data = vmx->pt_desc.guest.ctl;
2658 - break;
2659 - case MSR_IA32_RTIT_STATUS:
2660 - if (pt_mode != PT_MODE_HOST_GUEST)
2661 - return 1;
2662 - msr_info->data = vmx->pt_desc.guest.status;
2663 - break;
2664 - case MSR_IA32_RTIT_CR3_MATCH:
2665 - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2666 - !intel_pt_validate_cap(vmx->pt_desc.caps,
2667 - PT_CAP_cr3_filtering))
2668 - return 1;
2669 - msr_info->data = vmx->pt_desc.guest.cr3_match;
2670 - break;
2671 - case MSR_IA32_RTIT_OUTPUT_BASE:
2672 - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2673 - (!intel_pt_validate_cap(vmx->pt_desc.caps,
2674 - PT_CAP_topa_output) &&
2675 - !intel_pt_validate_cap(vmx->pt_desc.caps,
2676 - PT_CAP_single_range_output)))
2677 - return 1;
2678 - msr_info->data = vmx->pt_desc.guest.output_base;
2679 - break;
2680 - case MSR_IA32_RTIT_OUTPUT_MASK:
2681 - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2682 - (!intel_pt_validate_cap(vmx->pt_desc.caps,
2683 - PT_CAP_topa_output) &&
2684 - !intel_pt_validate_cap(vmx->pt_desc.caps,
2685 - PT_CAP_single_range_output)))
2686 - return 1;
2687 - msr_info->data = vmx->pt_desc.guest.output_mask;
2688 - break;
2689 - case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
2690 - index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
2691 - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2692 - (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps,
2693 - PT_CAP_num_address_ranges)))
2694 - return 1;
2695 - if (is_noncanonical_address(data, vcpu))
2696 - return 1;
2697 - if (index % 2)
2698 - msr_info->data = vmx->pt_desc.guest.addr_b[index / 2];
2699 - else
2700 - msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
2701 - break;
2702 - case MSR_TSC_AUX:
2703 - if (!msr_info->host_initiated &&
2704 - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
2705 - return 1;
2706 - goto find_shared_msr;
2707 - default:
2708 - find_shared_msr:
2709 - msr = find_msr_entry(vmx, msr_info->index);
2710 - if (msr) {
2711 - msr_info->data = msr->data;
2712 - break;
2713 - }
2714 - return kvm_get_msr_common(vcpu, msr_info);
2715 - }
2716 -
2717 - return 0;
2718 -}
2719 -
2720 -/*
2721 - * Writes msr value into the appropriate "register".
2722 - * Returns 0 on success, non-0 otherwise.
2723 - * Assumes vcpu_load() was already called.
2724 - */
2725 -static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2726 -{
2727 - struct vcpu_vmx *vmx = to_vmx(vcpu);
2728 - struct shared_msr_entry *msr;
2729 - int ret = 0;
2730 - u32 msr_index = msr_info->index;
2731 - u64 data = msr_info->data;
2732 - u32 index;
2733 -
2734 - switch (msr_index) {
2735 - case MSR_EFER:
2736 - ret = kvm_set_msr_common(vcpu, msr_info);
2737 - break;
2738 -#ifdef CONFIG_X86_64
2739 - case MSR_FS_BASE:
2740 - vmx_segment_cache_clear(vmx);
2741 - vmcs_writel(GUEST_FS_BASE, data);
2742 - break;
2743 - case MSR_GS_BASE:
2744 - vmx_segment_cache_clear(vmx);
2745 - vmcs_writel(GUEST_GS_BASE, data);
2746 - break;
2747 - case MSR_KERNEL_GS_BASE:
2748 - vmx_write_guest_kernel_gs_base(vmx, data);
2749 - break;
2750 -#endif
2751 - case MSR_IA32_SYSENTER_CS:
2752 - if (is_guest_mode(vcpu))
2753 - get_vmcs12(vcpu)->guest_sysenter_cs = data;
2754 - vmcs_write32(GUEST_SYSENTER_CS, data);
2755 - break;
2756 - case MSR_IA32_SYSENTER_EIP:
2757 - if (is_guest_mode(vcpu))
2758 - get_vmcs12(vcpu)->guest_sysenter_eip = data;
2759 - vmcs_writel(GUEST_SYSENTER_EIP, data);
2760 - break;
2761 - case MSR_IA32_SYSENTER_ESP:
2762 - if (is_guest_mode(vcpu))
2763 - get_vmcs12(vcpu)->guest_sysenter_esp = data;
2764 - vmcs_writel(GUEST_SYSENTER_ESP, data);
2765 - break;
2766 - case MSR_IA32_DEBUGCTLMSR:
2767 - if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls &
2768 - VM_EXIT_SAVE_DEBUG_CONTROLS)
2769 - get_vmcs12(vcpu)->guest_ia32_debugctl = data;
2770 -
2771 - ret = kvm_set_msr_common(vcpu, msr_info);
2772 - break;
2773 -
2774 - case MSR_IA32_BNDCFGS:
2775 - if (!kvm_mpx_supported() ||
2776 - (!msr_info->host_initiated &&
2777 - !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
2778 - return 1;
2779 - if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
2780 - (data & MSR_IA32_BNDCFGS_RSVD))
2781 - return 1;
2782 - vmcs_write64(GUEST_BNDCFGS, data);
2783 - break;
2784 - case MSR_IA32_UMWAIT_CONTROL:
2785 - if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
2786 - return 1;
2787 -
2788 - /* The reserved bit 1 and non-32 bit [63:32] should be zero */
2789 - if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32)))
2790 - return 1;
2791 -
2792 - vmx->msr_ia32_umwait_control = data;
2793 - break;
2794 - case MSR_IA32_SPEC_CTRL:
2795 - if (!msr_info->host_initiated &&
2796 - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
2797 - return 1;
2798 -
2799 - /* The STIBP bit doesn't fault even if it's not advertised */
2800 - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
2801 - return 1;
2802 -
2803 - vmx->spec_ctrl = data;
2804 -
2805 - if (!data)
2806 - break;
2807 -
2808 - /*
2809 - * For non-nested:
2810 - * When it's written (to non-zero) for the first time, pass
2811 - * it through.
2812 - *
2813 - * For nested:
2814 - * The handling of the MSR bitmap for L2 guests is done in
2815 - * nested_vmx_prepare_msr_bitmap. We should not touch the
2816 - * vmcs02.msr_bitmap here since it gets completely overwritten
2817 - * in the merging. We update the vmcs01 here for L1 as well
2818 - * since it will end up touching the MSR anyway now.
2819 - */
2820 - vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
2821 - MSR_IA32_SPEC_CTRL,
2822 - MSR_TYPE_RW);
2823 - break;
2824 - case MSR_IA32_TSX_CTRL:
2825 - if (!msr_info->host_initiated &&
2826 - !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
2827 - return 1;
2828 - if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR))
2829 - return 1;
2830 - goto find_shared_msr;
2831 - case MSR_IA32_PRED_CMD:
2832 - if (!msr_info->host_initiated &&
2833 - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
2834 - return 1;
2835 -
2836 - if (data & ~PRED_CMD_IBPB)
2837 - return 1;
2838 -
2839 - if (!data)
2840 - break;
2841 -
2842 - wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
2843 -
2844 - /*
2845 - * For non-nested:
2846 - * When it's written (to non-zero) for the first time, pass
2847 - * it through.
2848 - *
2849 - * For nested:
2850 - * The handling of the MSR bitmap for L2 guests is done in
2851 - * nested_vmx_prepare_msr_bitmap. We should not touch the
2852 - * vmcs02.msr_bitmap here since it gets completely overwritten
2853 - * in the merging.
2854 - */
2855 - vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
2856 - MSR_TYPE_W);
2857 - break;
2858 - case MSR_IA32_CR_PAT:
2859 - if (!kvm_pat_valid(data))
2860 - return 1;
2861 -
2862 - if (is_guest_mode(vcpu) &&
2863 - get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
2864 - get_vmcs12(vcpu)->guest_ia32_pat = data;
2865 -
2866 - if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
2867 - vmcs_write64(GUEST_IA32_PAT, data);
2868 - vcpu->arch.pat = data;
2869 - break;
2870 - }
2871 - ret = kvm_set_msr_common(vcpu, msr_info);
2872 - break;
2873 - case MSR_IA32_TSC_ADJUST:
2874 - ret = kvm_set_msr_common(vcpu, msr_info);
2875 - break;
2876 - case MSR_IA32_MCG_EXT_CTL:
2877 - if ((!msr_info->host_initiated &&
2878 - !(to_vmx(vcpu)->msr_ia32_feature_control &
2879 - FEATURE_CONTROL_LMCE)) ||
2880 - (data & ~MCG_EXT_CTL_LMCE_EN))
2881 - return 1;
2882 - vcpu->arch.mcg_ext_ctl = data;
2883 - break;
2884 - case MSR_IA32_FEATURE_CONTROL:
2885 - if (!vmx_feature_control_msr_valid(vcpu, data) ||
2886 - (to_vmx(vcpu)->msr_ia32_feature_control &
2887 - FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
2888 - return 1;
2889 - vmx->msr_ia32_feature_control = data;
2890 - if (msr_info->host_initiated && data == 0)
2891 - vmx_leave_nested(vcpu);
2892 - break;
2893 - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2894 - if (!msr_info->host_initiated)
2895 - return 1; /* they are read-only */
2896 - if (!nested_vmx_allowed(vcpu))
2897 - return 1;
2898 - return vmx_set_vmx_msr(vcpu, msr_index, data);
2899 - case MSR_IA32_RTIT_CTL:
2900 - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2901 - vmx_rtit_ctl_check(vcpu, data) ||
2902 - vmx->nested.vmxon)
2903 - return 1;
2904 - vmcs_write64(GUEST_IA32_RTIT_CTL, data);
2905 - vmx->pt_desc.guest.ctl = data;
2906 - pt_update_intercept_for_msr(vmx);
2907 - break;
2908 - case MSR_IA32_RTIT_STATUS:
2909 - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2910 - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2911 - (data & MSR_IA32_RTIT_STATUS_MASK))
2912 - return 1;
2913 - vmx->pt_desc.guest.status = data;
2914 - break;
2915 - case MSR_IA32_RTIT_CR3_MATCH:
2916 - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2917 - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2918 - !intel_pt_validate_cap(vmx->pt_desc.caps,
2919 - PT_CAP_cr3_filtering))
2920 - return 1;
2921 - vmx->pt_desc.guest.cr3_match = data;
2922 - break;
2923 - case MSR_IA32_RTIT_OUTPUT_BASE:
2924 - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2925 - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2926 - (!intel_pt_validate_cap(vmx->pt_desc.caps,
2927 - PT_CAP_topa_output) &&
2928 - !intel_pt_validate_cap(vmx->pt_desc.caps,
2929 - PT_CAP_single_range_output)) ||
2930 - (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK))
2931 - return 1;
2932 - vmx->pt_desc.guest.output_base = data;
2933 - break;
2934 - case MSR_IA32_RTIT_OUTPUT_MASK:
2935 - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2936 - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2937 - (!intel_pt_validate_cap(vmx->pt_desc.caps,
2938 - PT_CAP_topa_output) &&
2939 - !intel_pt_validate_cap(vmx->pt_desc.caps,
2940 - PT_CAP_single_range_output)))
2941 - return 1;
2942 - vmx->pt_desc.guest.output_mask = data;
2943 - break;
2944 - case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
2945 - index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
2946 - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2947 - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2948 - (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps,
2949 - PT_CAP_num_address_ranges)))
2950 - return 1;
2951 - if (is_noncanonical_address(data, vcpu))
2952 - return 1;
2953 - if (index % 2)
2954 - vmx->pt_desc.guest.addr_b[index / 2] = data;
2955 - else
2956 - vmx->pt_desc.guest.addr_a[index / 2] = data;
2957 - break;
2958 - case MSR_TSC_AUX:
2959 - if (!msr_info->host_initiated &&
2960 - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
2961 - return 1;
2962 - /* Check reserved bit, higher 32 bits should be zero */
2963 - if ((data >> 32) != 0)
2964 - return 1;
2965 - goto find_shared_msr;
2966 -
2967 - default:
2968 - find_shared_msr:
2969 - msr = find_msr_entry(vmx, msr_index);
2970 - if (msr)
2971 - ret = vmx_set_guest_msr(vmx, msr, data);
2972 - else
2973 - ret = kvm_set_msr_common(vcpu, msr_info);
2974 - }
2975 -
2976 - return ret;
2977 -}
2978 -
2979 -static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
2980 -{
2981 - kvm_register_mark_available(vcpu, reg);
2982 -
2983 - switch (reg) {
2984 - case VCPU_REGS_RSP:
2985 - vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
2986 - break;
2987 - case VCPU_REGS_RIP:
2988 - vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP);
2989 - break;
2990 - case VCPU_EXREG_PDPTR:
2991 - if (enable_ept)
2992 - ept_save_pdptrs(vcpu);
2993 - break;
2994 - case VCPU_EXREG_CR3:
2995 - if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
2996 - vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
2997 - break;
2998 - default:
2999 - WARN_ON_ONCE(1);
3000 - break;
3001 - }
3002 -}
3003 -
3004 -static __init int cpu_has_kvm_support(void)
3005 -{
3006 - return cpu_has_vmx();
3007 -}
3008 -
3009 -static __init int vmx_disabled_by_bios(void)
3010 -{
3011 - u64 msr;
3012 -
3013 - rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
3014 - if (msr & FEATURE_CONTROL_LOCKED) {
3015 - /* launched w/ TXT and VMX disabled */
3016 - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
3017 - && tboot_enabled())
3018 - return 1;
3019 - /* launched w/o TXT and VMX only enabled w/ TXT */
3020 - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
3021 - && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
3022 - && !tboot_enabled()) {
3023 - printk(KERN_WARNING "kvm: disable TXT in the BIOS or "
3024 - "activate TXT before enabling KVM\n");
3025 - return 1;
3026 - }
3027 - /* launched w/o TXT and VMX disabled */
3028 - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
3029 - && !tboot_enabled())
3030 - return 1;
3031 - }
3032 -
3033 - return 0;
3034 -}
3035 -
3036 -static void kvm_cpu_vmxon(u64 addr)
3037 -{
3038 - cr4_set_bits(X86_CR4_VMXE);
3039 - intel_pt_handle_vmx(1);
3040 -
3041 - asm volatile ("vmxon %0" : : "m"(addr));
3042 -}
3043 -
3044 -static int hardware_enable(void)
3045 -{
3046 - int cpu = raw_smp_processor_id();
3047 - u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
3048 - u64 old, test_bits;
3049 -
3050 - if (cr4_read_shadow() & X86_CR4_VMXE)
3051 - return -EBUSY;
3052 -
3053 - /*
3054 - * This can happen if we hot-added a CPU but failed to allocate
3055 - * VP assist page for it.
3056 - */
3057 - if (static_branch_unlikely(&enable_evmcs) &&
3058 - !hv_get_vp_assist_page(cpu))
3059 - return -EFAULT;
3060 -
3061 - INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
3062 - INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
3063 - spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
3064 -
3065 - /*
3066 - * Now we can enable the vmclear operation in kdump
3067 - * since the loaded_vmcss_on_cpu list on this cpu
3068 - * has been initialized.
3069 - *
3070 - * Though the cpu is not in VMX operation now, there
3071 - * is no problem to enable the vmclear operation
3072 - * for the loaded_vmcss_on_cpu list is empty!
3073 - */
3074 - crash_enable_local_vmclear(cpu);
3075 -
3076 - rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
3077 -
3078 - test_bits = FEATURE_CONTROL_LOCKED;
3079 - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
3080 - if (tboot_enabled())
3081 - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX;
3082 -
3083 - if ((old & test_bits) != test_bits) {
3084 - /* enable and lock */
3085 - wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
3086 - }
3087 - kvm_cpu_vmxon(phys_addr);
3088 - if (enable_ept)
3089 - ept_sync_global();
3090 -
3091 - return 0;
3092 -}
3093 -
3094 -static void vmclear_local_loaded_vmcss(void)
3095 -{
3096 - int cpu = raw_smp_processor_id();
3097 - struct loaded_vmcs *v, *n;
3098 -
3099 - list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
3100 - loaded_vmcss_on_cpu_link)
3101 - __loaded_vmcs_clear(v);
3102 -}
3103 -
3104 -
3105 -/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
3106 - * tricks.
3107 - */
3108 -static void kvm_cpu_vmxoff(void)
3109 -{
3110 - asm volatile (__ex("vmxoff"));
3111 -
3112 - intel_pt_handle_vmx(0);
3113 - cr4_clear_bits(X86_CR4_VMXE);
3114 -}
3115 -
3116 -static void hardware_disable(void)
3117 -{
3118 - vmclear_local_loaded_vmcss();
3119 - kvm_cpu_vmxoff();
3120 -}
3121 -
3122 -static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
3123 - u32 msr, u32 *result)
3124 -{
3125 - u32 vmx_msr_low, vmx_msr_high;
3126 - u32 ctl = ctl_min | ctl_opt;
3127 -
3128 - rdmsr(msr, vmx_msr_low, vmx_msr_high);
3129 -
3130 - ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */
3131 - ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */
3132 -
3133 - /* Ensure minimum (required) set of control bits are supported. */
3134 - if (ctl_min & ~ctl)
3135 - return -EIO;
3136 -
3137 - *result = ctl;
3138 - return 0;
3139 -}
3140 -
3141 -static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
3142 - struct vmx_capability *vmx_cap)
3143 -{
3144 - u32 vmx_msr_low, vmx_msr_high;
3145 - u32 min, opt, min2, opt2;
3146 - u32 _pin_based_exec_control = 0;
3147 - u32 _cpu_based_exec_control = 0;
3148 - u32 _cpu_based_2nd_exec_control = 0;
3149 - u32 _vmexit_control = 0;
3150 - u32 _vmentry_control = 0;
3151 -
3152 - memset(vmcs_conf, 0, sizeof(*vmcs_conf));
3153 - min = CPU_BASED_HLT_EXITING |
3154 -#ifdef CONFIG_X86_64
3155 - CPU_BASED_CR8_LOAD_EXITING |
3156 - CPU_BASED_CR8_STORE_EXITING |
3157 -#endif
3158 - CPU_BASED_CR3_LOAD_EXITING |
3159 - CPU_BASED_CR3_STORE_EXITING |
3160 - CPU_BASED_UNCOND_IO_EXITING |
3161 - CPU_BASED_MOV_DR_EXITING |
3162 - CPU_BASED_USE_TSC_OFFSETTING |
3163 - CPU_BASED_MWAIT_EXITING |
3164 - CPU_BASED_MONITOR_EXITING |
3165 - CPU_BASED_INVLPG_EXITING |
3166 - CPU_BASED_RDPMC_EXITING;
3167 -
3168 - opt = CPU_BASED_TPR_SHADOW |
3169 - CPU_BASED_USE_MSR_BITMAPS |
3170 - CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
3171 - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
3172 - &_cpu_based_exec_control) < 0)
3173 - return -EIO;
3174 -#ifdef CONFIG_X86_64
3175 - if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
3176 - _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING &
3177 - ~CPU_BASED_CR8_STORE_EXITING;
3178 -#endif
3179 - if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
3180 - min2 = 0;
3181 - opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
3182 - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
3183 - SECONDARY_EXEC_WBINVD_EXITING |
3184 - SECONDARY_EXEC_ENABLE_VPID |
3185 - SECONDARY_EXEC_ENABLE_EPT |
3186 - SECONDARY_EXEC_UNRESTRICTED_GUEST |
3187 - SECONDARY_EXEC_PAUSE_LOOP_EXITING |
3188 - SECONDARY_EXEC_DESC |
3189 - SECONDARY_EXEC_RDTSCP |
3190 - SECONDARY_EXEC_ENABLE_INVPCID |
3191 - SECONDARY_EXEC_APIC_REGISTER_VIRT |
3192 - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
3193 - SECONDARY_EXEC_SHADOW_VMCS |
3194 - SECONDARY_EXEC_XSAVES |
3195 - SECONDARY_EXEC_RDSEED_EXITING |
3196 - SECONDARY_EXEC_RDRAND_EXITING |
3197 - SECONDARY_EXEC_ENABLE_PML |
3198 - SECONDARY_EXEC_TSC_SCALING |
3199 - SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
3200 - SECONDARY_EXEC_PT_USE_GPA |
3201 - SECONDARY_EXEC_PT_CONCEAL_VMX |
3202 - SECONDARY_EXEC_ENABLE_VMFUNC |
3203 - SECONDARY_EXEC_ENCLS_EXITING;
3204 - if (adjust_vmx_controls(min2, opt2,
3205 - MSR_IA32_VMX_PROCBASED_CTLS2,
3206 - &_cpu_based_2nd_exec_control) < 0)
3207 - return -EIO;
3208 - }
3209 -#ifndef CONFIG_X86_64
3210 - if (!(_cpu_based_2nd_exec_control &
3211 - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
3212 - _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
3213 -#endif
3214 -
3215 - if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
3216 - _cpu_based_2nd_exec_control &= ~(
3217 - SECONDARY_EXEC_APIC_REGISTER_VIRT |
3218 - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
3219 - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
3220 -
3221 - rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP,
3222 - &vmx_cap->ept, &vmx_cap->vpid);
3223 -
3224 - if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
3225 - /* CR3 accesses and invlpg don't need to cause VM Exits when EPT
3226 - enabled */
3227 - _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
3228 - CPU_BASED_CR3_STORE_EXITING |
3229 - CPU_BASED_INVLPG_EXITING);
3230 - } else if (vmx_cap->ept) {
3231 - vmx_cap->ept = 0;
3232 - pr_warn_once("EPT CAP should not exist if not support "
3233 - "1-setting enable EPT VM-execution control\n");
3234 - }
3235 - if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) &&
3236 - vmx_cap->vpid) {
3237 - vmx_cap->vpid = 0;
3238 - pr_warn_once("VPID CAP should not exist if not support "
3239 - "1-setting enable VPID VM-execution control\n");
3240 - }
3241 -
3242 - min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
3243 -#ifdef CONFIG_X86_64
3244 - min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
3245 -#endif
3246 - opt = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
3247 - VM_EXIT_LOAD_IA32_PAT |
3248 - VM_EXIT_LOAD_IA32_EFER |
3249 - VM_EXIT_CLEAR_BNDCFGS |
3250 - VM_EXIT_PT_CONCEAL_PIP |
3251 - VM_EXIT_CLEAR_IA32_RTIT_CTL;
3252 - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
3253 - &_vmexit_control) < 0)
3254 - return -EIO;
3255 -
3256 - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
3257 - opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
3258 - PIN_BASED_VMX_PREEMPTION_TIMER;
3259 - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
3260 - &_pin_based_exec_control) < 0)
3261 - return -EIO;
3262 -
3263 - if (cpu_has_broken_vmx_preemption_timer())
3264 - _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
3265 - if (!(_cpu_based_2nd_exec_control &
3266 - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY))
3267 - _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
3268 -
3269 - min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
3270 - opt = VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
3271 - VM_ENTRY_LOAD_IA32_PAT |
3272 - VM_ENTRY_LOAD_IA32_EFER |
3273 - VM_ENTRY_LOAD_BNDCFGS |
3274 - VM_ENTRY_PT_CONCEAL_PIP |
3275 - VM_ENTRY_LOAD_IA32_RTIT_CTL;
3276 - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
3277 - &_vmentry_control) < 0)
3278 - return -EIO;
3279 -
3280 - /*
3281 - * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they
3282 - * can't be used due to an errata where VM Exit may incorrectly clear
3283 - * IA32_PERF_GLOBAL_CTRL[34:32]. Workaround the errata by using the
3284 - * MSR load mechanism to switch IA32_PERF_GLOBAL_CTRL.
3285 - */
3286 - if (boot_cpu_data.x86 == 0x6) {
3287 - switch (boot_cpu_data.x86_model) {
3288 - case 26: /* AAK155 */
3289 - case 30: /* AAP115 */
3290 - case 37: /* AAT100 */
3291 - case 44: /* BC86,AAY89,BD102 */
3292 - case 46: /* BA97 */
3293 - _vmentry_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
3294 - _vmexit_control &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
3295 - pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
3296 - "does not work properly. Using workaround\n");
3297 - break;
3298 - default:
3299 - break;
3300 - }
3301 - }
3302 -
3303 -
3304 - rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
3305 -
3306 - /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
3307 - if ((vmx_msr_high & 0x1fff) > PAGE_SIZE)
3308 - return -EIO;
3309 -
3310 -#ifdef CONFIG_X86_64
3311 - /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */
3312 - if (vmx_msr_high & (1u<<16))
3313 - return -EIO;
3314 -#endif
3315 -
3316 - /* Require Write-Back (WB) memory type for VMCS accesses. */
3317 - if (((vmx_msr_high >> 18) & 15) != 6)
3318 - return -EIO;
3319 -
3320 - vmcs_conf->size = vmx_msr_high & 0x1fff;
3321 - vmcs_conf->order = get_order(vmcs_conf->size);
3322 - vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
3323 -
3324 - vmcs_conf->revision_id = vmx_msr_low;
3325 -
3326 - vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
3327 - vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
3328 - vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control;
3329 - vmcs_conf->vmexit_ctrl = _vmexit_control;
3330 - vmcs_conf->vmentry_ctrl = _vmentry_control;
3331 -
3332 - if (static_branch_unlikely(&enable_evmcs))
3333 - evmcs_sanitize_exec_ctrls(vmcs_conf);
3334 -
3335 - return 0;
3336 -}
3337 -
3338 -struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
3339 -{
3340 - int node = cpu_to_node(cpu);
3341 - struct page *pages;
3342 - struct vmcs *vmcs;
3343 -
3344 - pages = __alloc_pages_node(node, flags, vmcs_config.order);
3345 - if (!pages)
3346 - return NULL;
3347 - vmcs = page_address(pages);
3348 - memset(vmcs, 0, vmcs_config.size);
3349 -
3350 - /* KVM supports Enlightened VMCS v1 only */
3351 - if (static_branch_unlikely(&enable_evmcs))
3352 - vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
3353 - else
3354 - vmcs->hdr.revision_id = vmcs_config.revision_id;
3355 -
3356 - if (shadow)
3357 - vmcs->hdr.shadow_vmcs = 1;
3358 - return vmcs;
3359 -}
3360 -
3361 -void free_vmcs(struct vmcs *vmcs)
3362 -{
3363 - free_pages((unsigned long)vmcs, vmcs_config.order);
3364 -}
3365 -
3366 -/*
3367 - * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded
3368 - */
3369 -void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
3370 -{
3371 - if (!loaded_vmcs->vmcs)
3372 - return;
3373 - loaded_vmcs_clear(loaded_vmcs);
3374 - free_vmcs(loaded_vmcs->vmcs);
3375 - loaded_vmcs->vmcs = NULL;
3376 - if (loaded_vmcs->msr_bitmap)
3377 - free_page((unsigned long)loaded_vmcs->msr_bitmap);
3378 - WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
3379 -}
3380 -
3381 -int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
3382 -{
3383 - loaded_vmcs->vmcs = alloc_vmcs(false);
3384 - if (!loaded_vmcs->vmcs)
3385 - return -ENOMEM;
3386 -
3387 - loaded_vmcs->shadow_vmcs = NULL;
3388 - loaded_vmcs->hv_timer_soft_disabled = false;
3389 - loaded_vmcs_init(loaded_vmcs);
3390 -
3391 - if (cpu_has_vmx_msr_bitmap()) {
3392 - loaded_vmcs->msr_bitmap = (unsigned long *)
3393 - __get_free_page(GFP_KERNEL_ACCOUNT);
3394 - if (!loaded_vmcs->msr_bitmap)
3395 - goto out_vmcs;
3396 - memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
3397 -
3398 - if (IS_ENABLED(CONFIG_HYPERV) &&
3399 - static_branch_unlikely(&enable_evmcs) &&
3400 - (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
3401 - struct hv_enlightened_vmcs *evmcs =
3402 - (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;
3403 -
3404 - evmcs->hv_enlightenments_control.msr_bitmap = 1;
3405 - }
3406 - }
3407 -
3408 - memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
3409 - memset(&loaded_vmcs->controls_shadow, 0,
3410 - sizeof(struct vmcs_controls_shadow));
3411 -
3412 - return 0;
3413 -
3414 -out_vmcs:
3415 - free_loaded_vmcs(loaded_vmcs);
3416 - return -ENOMEM;
3417 -}
3418 -
3419 -static void free_kvm_area(void)
3420 -{
3421 - int cpu;
3422 -
3423 - for_each_possible_cpu(cpu) {
3424 - free_vmcs(per_cpu(vmxarea, cpu));
3425 - per_cpu(vmxarea, cpu) = NULL;
3426 - }
3427 -}
3428 -
3429 -static __init int alloc_kvm_area(void)
3430 -{
3431 - int cpu;
3432 -
3433 - for_each_possible_cpu(cpu) {
3434 - struct vmcs *vmcs;
3435 -
3436 - vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL);
3437 - if (!vmcs) {
3438 - free_kvm_area();
3439 - return -ENOMEM;
3440 - }
3441 -
3442 - /*
3443 - * When eVMCS is enabled, alloc_vmcs_cpu() sets
3444 - * vmcs->revision_id to KVM_EVMCS_VERSION instead of
3445 - * revision_id reported by MSR_IA32_VMX_BASIC.
3446 - *
3447 - * However, even though not explicitly documented by
3448 - * TLFS, VMXArea passed as VMXON argument should
3449 - * still be marked with revision_id reported by
3450 - * physical CPU.
3451 - */
3452 - if (static_branch_unlikely(&enable_evmcs))
3453 - vmcs->hdr.revision_id = vmcs_config.revision_id;
3454 -
3455 - per_cpu(vmxarea, cpu) = vmcs;
3456 - }
3457 - return 0;
3458 -}
3459 -
3460 -static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
3461 - struct kvm_segment *save)
3462 -{
3463 - if (!emulate_invalid_guest_state) {
3464 - /*
3465 - * CS and SS RPL should be equal during guest entry according
3466 - * to VMX spec, but in reality it is not always so. Since vcpu
3467 - * is in the middle of the transition from real mode to
3468 - * protected mode it is safe to assume that RPL 0 is a good
3469 - * default value.
3470 - */
3471 - if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
3472 - save->selector &= ~SEGMENT_RPL_MASK;
3473 - save->dpl = save->selector & SEGMENT_RPL_MASK;
3474 - save->s = 1;
3475 - }
3476 - vmx_set_segment(vcpu, save, seg);
3477 -}
3478 -
3479 -static void enter_pmode(struct kvm_vcpu *vcpu)
3480 -{
3481 - unsigned long flags;
3482 - struct vcpu_vmx *vmx = to_vmx(vcpu);
3483 -
3484 - /*
3485 - * Update real mode segment cache. It may be not up-to-date if sement
3486 - * register was written while vcpu was in a guest mode.
3487 - */
3488 - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
3489 - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
3490 - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
3491 - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
3492 - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
3493 - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
3494 -
3495 - vmx->rmode.vm86_active = 0;
3496 -
3497 - vmx_segment_cache_clear(vmx);
3498 -
3499 - vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
3500 -
3501 - flags = vmcs_readl(GUEST_RFLAGS);
3502 - flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
3503 - flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
3504 - vmcs_writel(GUEST_RFLAGS, flags);
3505 -
3506 - vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
3507 - (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME));
3508 -
3509 - update_exception_bitmap(vcpu);
3510 -
3511 - fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
3512 - fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
3513 - fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
3514 - fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
3515 - fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
3516 - fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
3517 -}
3518 -
3519 -static void fix_rmode_seg(int seg, struct kvm_segment *save)
3520 -{
3521 - const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
3522 - struct kvm_segment var = *save;
3523 -
3524 - var.dpl = 0x3;
3525 - if (seg == VCPU_SREG_CS)
3526 - var.type = 0x3;
3527 -
3528 - if (!emulate_invalid_guest_state) {
3529 - var.selector = var.base >> 4;
3530 - var.base = var.base & 0xffff0;
3531 - var.limit = 0xffff;
3532 - var.g = 0;
3533 - var.db = 0;
3534 - var.present = 1;
3535 - var.s = 1;
3536 - var.l = 0;
3537 - var.unusable = 0;
3538 - var.type = 0x3;
3539 - var.avl = 0;
3540 - if (save->base & 0xf)
3541 - printk_once(KERN_WARNING "kvm: segment base is not "
3542 - "paragraph aligned when entering "
3543 - "protected mode (seg=%d)", seg);
3544 - }
3545 -
3546 - vmcs_write16(sf->selector, var.selector);
3547 - vmcs_writel(sf->base, var.base);
3548 - vmcs_write32(sf->limit, var.limit);
3549 - vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
3550 -}
3551 -
3552 -static void enter_rmode(struct kvm_vcpu *vcpu)
3553 -{
3554 - unsigned long flags;
3555 - struct vcpu_vmx *vmx = to_vmx(vcpu);
3556 - struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
3557 -
3558 - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
3559 - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
3560 - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
3561 - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
3562 - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
3563 - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
3564 - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
3565 -
3566 - vmx->rmode.vm86_active = 1;
3567 -
3568 - /*
3569 - * Very old userspace does not call KVM_SET_TSS_ADDR before entering
3570 - * vcpu. Warn the user that an update is overdue.
3571 - */
3572 - if (!kvm_vmx->tss_addr)
3573 - printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
3574 - "called before entering vcpu\n");
3575 -
3576 - vmx_segment_cache_clear(vmx);
3577 -
3578 - vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
3579 - vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
3580 - vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
3581 -
3582 - flags = vmcs_readl(GUEST_RFLAGS);
3583 - vmx->rmode.save_rflags = flags;
3584 -
3585 - flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
3586 -
3587 - vmcs_writel(GUEST_RFLAGS, flags);
3588 - vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
3589 - update_exception_bitmap(vcpu);
3590 -
3591 - fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
3592 - fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
3593 - fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
3594 - fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
3595 - fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
3596 - fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
3597 -
3598 - kvm_mmu_reset_context(vcpu);
3599 -}
3600 -
3601 -void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
3602 -{
3603 - struct vcpu_vmx *vmx = to_vmx(vcpu);
3604 - struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
3605 -
3606 - if (!msr)
3607 - return;
3608 -
3609 - vcpu->arch.efer = efer;
3610 - if (efer & EFER_LMA) {
3611 - vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3612 - msr->data = efer;
3613 - } else {
3614 - vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3615 -
3616 - msr->data = efer & ~EFER_LME;
3617 - }
3618 - setup_msrs(vmx);
3619 -}
3620 -
3621 -#ifdef CONFIG_X86_64
3622 -
3623 -static void enter_lmode(struct kvm_vcpu *vcpu)
3624 -{
3625 - u32 guest_tr_ar;
3626 -
3627 - vmx_segment_cache_clear(to_vmx(vcpu));
3628 -
3629 - guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
3630 - if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
3631 - pr_debug_ratelimited("%s: tss fixup for long mode. \n",
3632 - __func__);
3633 - vmcs_write32(GUEST_TR_AR_BYTES,
3634 - (guest_tr_ar & ~VMX_AR_TYPE_MASK)
3635 - | VMX_AR_TYPE_BUSY_64_TSS);
3636 - }
3637 - vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
3638 -}
3639 -
3640 -static void exit_lmode(struct kvm_vcpu *vcpu)
3641 -{
3642 - vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3643 - vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
3644 -}
3645 -
3646 -#endif
3647 -
3648 -static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
3649 -{
3650 - int vpid = to_vmx(vcpu)->vpid;
3651 -
3652 - if (!vpid_sync_vcpu_addr(vpid, addr))
3653 - vpid_sync_context(vpid);
3654 -
3655 - /*
3656 - * If VPIDs are not supported or enabled, then the above is a no-op.
3657 - * But we don't really need a TLB flush in that case anyway, because
3658 - * each VM entry/exit includes an implicit flush when VPID is 0.
3659 - */
3660 -}
3661 -
3662 -static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
3663 -{
3664 - ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
3665 -
3666 - vcpu->arch.cr0 &= ~cr0_guest_owned_bits;
3667 - vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
3668 -}
3669 -
3670 -static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
3671 -{
3672 - ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
3673 -
3674 - vcpu->arch.cr4 &= ~cr4_guest_owned_bits;
3675 - vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits;
3676 -}
3677 -
3678 -static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
3679 -{
3680 - struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
3681 -
3682 - if (!kvm_register_is_dirty(vcpu, VCPU_EXREG_PDPTR))
3683 - return;
3684 -
3685 - if (is_pae_paging(vcpu)) {
3686 - vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
3687 - vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
3688 - vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
3689 - vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]);
3690 - }
3691 -}
3692 -
3693 -void ept_save_pdptrs(struct kvm_vcpu *vcpu)
3694 -{
3695 - struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
3696 -
3697 - if (is_pae_paging(vcpu)) {
3698 - mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
3699 - mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
3700 - mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
3701 - mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
3702 - }
3703 -
3704 - kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
3705 -}
3706 -
3707 -static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
3708 - unsigned long cr0,
3709 - struct kvm_vcpu *vcpu)
3710 -{
3711 - struct vcpu_vmx *vmx = to_vmx(vcpu);
3712 -
3713 - if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
3714 - vmx_cache_reg(vcpu, VCPU_EXREG_CR3);
3715 - if (!(cr0 & X86_CR0_PG)) {
3716 - /* From paging/starting to nonpaging */
3717 - exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
3718 - CPU_BASED_CR3_STORE_EXITING);
3719 - vcpu->arch.cr0 = cr0;
3720 - vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
3721 - } else if (!is_paging(vcpu)) {
3722 - /* From nonpaging to paging */
3723 - exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
3724 - CPU_BASED_CR3_STORE_EXITING);
3725 - vcpu->arch.cr0 = cr0;
3726 - vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
3727 - }
3728 -
3729 - if (!(cr0 & X86_CR0_WP))
3730 - *hw_cr0 &= ~X86_CR0_WP;
3731 -}
3732 -
3733 -void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
3734 -{
3735 - struct vcpu_vmx *vmx = to_vmx(vcpu);
3736 - unsigned long hw_cr0;
3737 -
3738 - hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
3739 - if (enable_unrestricted_guest)
3740 - hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
3741 - else {
3742 - hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
3743 -
3744 - if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
3745 - enter_pmode(vcpu);
3746 -
3747 - if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE))
3748 - enter_rmode(vcpu);
3749 - }
3750 -
3751 -#ifdef CONFIG_X86_64
3752 - if (vcpu->arch.efer & EFER_LME) {
3753 - if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
3754 - enter_lmode(vcpu);
3755 - if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
3756 - exit_lmode(vcpu);
3757 - }
3758 -#endif
3759 -
3760 - if (enable_ept && !enable_unrestricted_guest)
3761 - ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
3762 -
3763 - vmcs_writel(CR0_READ_SHADOW, cr0);
3764 - vmcs_writel(GUEST_CR0, hw_cr0);
3765 - vcpu->arch.cr0 = cr0;
3766 -
3767 - /* depends on vcpu->arch.cr0 to be set to a new value */
3768 - vmx->emulation_required = emulation_required(vcpu);
3769 -}
3770 -
3771 -static int get_ept_level(struct kvm_vcpu *vcpu)
3772 -{
3773 - if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
3774 - return 5;
3775 - return 4;
3776 -}
3777 -
3778 -u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
3779 -{
3780 - u64 eptp = VMX_EPTP_MT_WB;
3781 -
3782 - eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4;
3783 -
3784 - if (enable_ept_ad_bits &&
3785 - (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
3786 - eptp |= VMX_EPTP_AD_ENABLE_BIT;
3787 - eptp |= (root_hpa & PAGE_MASK);
3788 -
3789 - return eptp;
3790 -}
3791 -
3792 -void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
3793 -{
3794 - struct kvm *kvm = vcpu->kvm;
3795 - bool update_guest_cr3 = true;
3796 - unsigned long guest_cr3;
3797 - u64 eptp;
3798 -
3799 - guest_cr3 = cr3;
3800 - if (enable_ept) {
3801 - eptp = construct_eptp(vcpu, cr3);
3802 - vmcs_write64(EPT_POINTER, eptp);
3803 -
3804 - if (kvm_x86_ops->tlb_remote_flush) {
3805 - spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
3806 - to_vmx(vcpu)->ept_pointer = eptp;
3807 - to_kvm_vmx(kvm)->ept_pointers_match
3808 - = EPT_POINTERS_CHECK;
3809 - spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
3810 - }
3811 -
3812 - /* Loading vmcs02.GUEST_CR3 is handled by nested VM-Enter. */
3813 - if (is_guest_mode(vcpu))
3814 - update_guest_cr3 = false;
3815 - else if (!enable_unrestricted_guest && !is_paging(vcpu))
3816 - guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
3817 - else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
3818 - guest_cr3 = vcpu->arch.cr3;
3819 - else /* vmcs01.GUEST_CR3 is already up-to-date. */
3820 - update_guest_cr3 = false;
3821 - ept_load_pdptrs(vcpu);
3822 - }
3823 -
3824 - if (update_guest_cr3)
3825 - vmcs_writel(GUEST_CR3, guest_cr3);
3826 -}
3827 -
3828 -int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
3829 -{
3830 - struct vcpu_vmx *vmx = to_vmx(vcpu);
3831 - /*
3832 - * Pass through host's Machine Check Enable value to hw_cr4, which
3833 - * is in force while we are in guest mode. Do not let guests control
3834 - * this bit, even if host CR4.MCE == 0.
3835 - */
3836 - unsigned long hw_cr4;
3837 -
3838 - hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
3839 - if (enable_unrestricted_guest)
3840 - hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
3841 - else if (vmx->rmode.vm86_active)
3842 - hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
3843 - else
3844 - hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
3845 -
3846 - if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
3847 - if (cr4 & X86_CR4_UMIP) {
3848 - secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC);
3849 - hw_cr4 &= ~X86_CR4_UMIP;
3850 - } else if (!is_guest_mode(vcpu) ||
3851 - !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) {
3852 - secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC);
3853 - }
3854 - }
3855 -
3856 - if (cr4 & X86_CR4_VMXE) {
3857 - /*
3858 - * To use VMXON (and later other VMX instructions), a guest
3859 - * must first be able to turn on cr4.VMXE (see handle_vmon()).
3860 - * So basically the check on whether to allow nested VMX
3861 - * is here. We operate under the default treatment of SMM,
3862 - * so VMX cannot be enabled under SMM.
3863 - */
3864 - if (!nested_vmx_allowed(vcpu) || is_smm(vcpu))
3865 - return 1;
3866 - }
3867 -
3868 - if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
3869 - return 1;
3870 -
3871 - vcpu->arch.cr4 = cr4;
3872 -
3873 - if (!enable_unrestricted_guest) {
3874 - if (enable_ept) {
3875 - if (!is_paging(vcpu)) {
3876 - hw_cr4 &= ~X86_CR4_PAE;
3877 - hw_cr4 |= X86_CR4_PSE;
3878 - } else if (!(cr4 & X86_CR4_PAE)) {
3879 - hw_cr4 &= ~X86_CR4_PAE;
3880 - }
3881 - }
3882 -
3883 - /*
3884 - * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in
3885 - * hardware. To emulate this behavior, SMEP/SMAP/PKU needs
3886 - * to be manually disabled when guest switches to non-paging
3887 - * mode.
3888 - *
3889 - * If !enable_unrestricted_guest, the CPU is always running
3890 - * with CR0.PG=1 and CR4 needs to be modified.
3891 - * If enable_unrestricted_guest, the CPU automatically
3892 - * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0.
3893 - */
3894 - if (!is_paging(vcpu))
3895 - hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
3896 - }
3897 -
3898 - vmcs_writel(CR4_READ_SHADOW, cr4);
3899 - vmcs_writel(GUEST_CR4, hw_cr4);
3900 - return 0;
3901 -}
3902 -
3903 -void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
3904 -{
3905 - struct vcpu_vmx *vmx = to_vmx(vcpu);
3906 - u32 ar;
3907 -
3908 - if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
3909 - *var = vmx->rmode.segs[seg];
3910 - if (seg == VCPU_SREG_TR
3911 - || var->selector == vmx_read_guest_seg_selector(vmx, seg))
3912 - return;
3913 - var->base = vmx_read_guest_seg_base(vmx, seg);
3914 - var->selector = vmx_read_guest_seg_selector(vmx, seg);
3915 - return;
3916 - }
3917 - var->base = vmx_read_guest_seg_base(vmx, seg);
3918 - var->limit = vmx_read_guest_seg_limit(vmx, seg);
3919 - var->selector = vmx_read_guest_seg_selector(vmx, seg);
3920 - ar = vmx_read_guest_seg_ar(vmx, seg);
3921 - var->unusable = (ar >> 16) & 1;
3922 - var->type = ar & 15;
3923 - var->s = (ar >> 4) & 1;
3924 - var->dpl = (ar >> 5) & 3;
3925 - /*
3926 - * Some userspaces do not preserve unusable property. Since usable
3927 - * segment has to be present according to VMX spec we can use present
3928 - * property to amend userspace bug by making unusable segment always
3929 - * nonpresent. vmx_segment_access_rights() already marks nonpresent
3930 - * segment as unusable.
3931 - */
3932 - var->present = !var->unusable;
3933 - var->avl = (ar >> 12) & 1;
3934 - var->l = (ar >> 13) & 1;
3935 - var->db = (ar >> 14) & 1;
3936 - var->g = (ar >> 15) & 1;
3937 -}
3938 -
3939 -static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
3940 -{
3941 - struct kvm_segment s;
3942 -
3943 - if (to_vmx(vcpu)->rmode.vm86_active) {
3944 - vmx_get_segment(vcpu, &s, seg);
3945 - return s.base;
3946 - }
3947 - return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
3948 -}
3949 -
3950 -int vmx_get_cpl(struct kvm_vcpu *vcpu)
3951 -{
3952 - struct vcpu_vmx *vmx = to_vmx(vcpu);
3953 -
3954 - if (unlikely(vmx->rmode.vm86_active))
3955 - return 0;
3956 - else {
3957 - int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
3958 - return VMX_AR_DPL(ar);
3959 - }
3960 -}
3961 -
3962 -static u32 vmx_segment_access_rights(struct kvm_segment *var)
3963 -{
3964 - u32 ar;
3965 -
3966 - if (var->unusable || !var->present)
3967 - ar = 1 << 16;
3968 - else {
3969 - ar = var->type & 15;
3970 - ar |= (var->s & 1) << 4;
3971 - ar |= (var->dpl & 3) << 5;
3972 - ar |= (var->present & 1) << 7;
3973 - ar |= (var->avl & 1) << 12;
3974 - ar |= (var->l & 1) << 13;
3975 - ar |= (var->db & 1) << 14;
3976 - ar |= (var->g & 1) << 15;
3977 - }
3978 -
3979 - return ar;
3980 -}
3981 -
3982 -void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
3983 -{
3984 - struct vcpu_vmx *vmx = to_vmx(vcpu);
3985 - const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
3986 -
3987 - vmx_segment_cache_clear(vmx);
3988 -
3989 - if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
3990 - vmx->rmode.segs[seg] = *var;
3991 - if (seg == VCPU_SREG_TR)
3992 - vmcs_write16(sf->selector, var->selector);
3993 - else if (var->s)
3994 - fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
3995 - goto out;
3996 - }
3997 -
3998 - vmcs_writel(sf->base, var->base);
3999 - vmcs_write32(sf->limit, var->limit);
4000 - vmcs_write16(sf->selector, var->selector);
4001 -
4002 - /*
4003 - * Fix the "Accessed" bit in AR field of segment registers for older
4004 - * qemu binaries.
4005 - * IA32 arch specifies that at the time of processor reset the
4006 - * "Accessed" bit in the AR field of segment registers is 1. And qemu
4007 - * is setting it to 0 in the userland code. This causes invalid guest
4008 - * state vmexit when "unrestricted guest" mode is turned on.
4009 - * Fix for this setup issue in cpu_reset is being pushed in the qemu
4010 - * tree. Newer qemu binaries with that qemu fix would not need this
4011 - * kvm hack.
4012 - */
4013 - if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
4014 - var->type |= 0x1; /* Accessed */
4015 -
4016 - vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
4017 -
4018 -out:
4019 - vmx->emulation_required = emulation_required(vcpu);
4020 -}
4021 -
4022 -static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
4023 -{
4024 - u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
4025 -
4026 - *db = (ar >> 14) & 1;
4027 - *l = (ar >> 13) & 1;
4028 -}
4029 -
4030 -static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
4031 -{
4032 - dt->size = vmcs_read32(GUEST_IDTR_LIMIT);
4033 - dt->address = vmcs_readl(GUEST_IDTR_BASE);
4034 -}
4035 -
4036 -static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
4037 -{
4038 - vmcs_write32(GUEST_IDTR_LIMIT, dt->size);
4039 - vmcs_writel(GUEST_IDTR_BASE, dt->address);
4040 -}
4041 -
4042 -static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
4043 -{
4044 - dt->size = vmcs_read32(GUEST_GDTR_LIMIT);
4045 - dt->address = vmcs_readl(GUEST_GDTR_BASE);
4046 -}
4047 -
4048 -static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
4049 -{
4050 - vmcs_write32(GUEST_GDTR_LIMIT, dt->size);
4051 - vmcs_writel(GUEST_GDTR_BASE, dt->address);
4052 -}
4053 -
4054 -static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
4055 -{
4056 - struct kvm_segment var;
4057 - u32 ar;
4058 -
4059 - vmx_get_segment(vcpu, &var, seg);
4060 - var.dpl = 0x3;
4061 - if (seg == VCPU_SREG_CS)
4062 - var.type = 0x3;
4063 - ar = vmx_segment_access_rights(&var);
4064 -
4065 - if (var.base != (var.selector << 4))
4066 - return false;
4067 - if (var.limit != 0xffff)
4068 - return false;
4069 - if (ar != 0xf3)
4070 - return false;
4071 -
4072 - return true;
4073 -}
4074 -
4075 -static bool code_segment_valid(struct kvm_vcpu *vcpu)
4076 -{
4077 - struct kvm_segment cs;
4078 - unsigned int cs_rpl;
4079 -
4080 - vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
4081 - cs_rpl = cs.selector & SEGMENT_RPL_MASK;
4082 -
4083 - if (cs.unusable)
4084 - return false;
4085 - if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK))
4086 - return false;
4087 - if (!cs.s)
4088 - return false;
4089 - if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) {
4090 - if (cs.dpl > cs_rpl)
4091 - return false;
4092 - } else {
4093 - if (cs.dpl != cs_rpl)
4094 - return false;
4095 - }
4096 - if (!cs.present)
4097 - return false;
4098 -
4099 - /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */
4100 - return true;
4101 -}
4102 -
4103 -static bool stack_segment_valid(struct kvm_vcpu *vcpu)
4104 -{
4105 - struct kvm_segment ss;
4106 - unsigned int ss_rpl;
4107 -
4108 - vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
4109 - ss_rpl = ss.selector & SEGMENT_RPL_MASK;
4110 -
4111 - if (ss.unusable)
4112 - return true;
4113 - if (ss.type != 3 && ss.type != 7)
4114 - return false;
4115 - if (!ss.s)
4116 - return false;
4117 - if (ss.dpl != ss_rpl) /* DPL != RPL */
4118 - return false;
4119 - if (!ss.present)
4120 - return false;
4121 -
4122 - return true;
4123 -}
4124 -
4125 -static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
4126 -{
4127 - struct kvm_segment var;
4128 - unsigned int rpl;
4129 -
4130 - vmx_get_segment(vcpu, &var, seg);
4131 - rpl = var.selector & SEGMENT_RPL_MASK;
4132 -
4133 - if (var.unusable)
4134 - return true;
4135 - if (!var.s)
4136 - return false;
4137 - if (!var.present)
4138 - return false;
4139 - if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) {
4140 - if (var.dpl < rpl) /* DPL < RPL */
4141 - return false;
4142 - }
4143 -
4144 - /* TODO: Add other members to kvm_segment_field to allow checking for other access
4145 - * rights flags
4146 - */
4147 - return true;
4148 -}
4149 -
4150 -static bool tr_valid(struct kvm_vcpu *vcpu)
4151 -{
4152 - struct kvm_segment tr;
4153 -
4154 - vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
4155 -
4156 - if (tr.unusable)
4157 - return false;
4158 - if (tr.selector & SEGMENT_TI_MASK) /* TI = 1 */
4159 - return false;
4160 - if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
4161 - return false;
4162 - if (!tr.present)
4163 - return false;
4164 -
4165 - return true;
4166 -}
4167 -
4168 -static bool ldtr_valid(struct kvm_vcpu *vcpu)
4169 -{
4170 - struct kvm_segment ldtr;
4171 -
4172 - vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
4173 -
4174 - if (ldtr.unusable)
4175 - return true;
4176 - if (ldtr.selector & SEGMENT_TI_MASK) /* TI = 1 */
4177 - return false;
4178 - if (ldtr.type != 2)
4179 - return false;
4180 - if (!ldtr.present)
4181 - return false;
4182 -
4183 - return true;
4184 -}
4185 -
4186 -static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
4187 -{
4188 - struct kvm_segment cs, ss;
4189 -
4190 - vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
4191 - vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
4192 -
4193 - return ((cs.selector & SEGMENT_RPL_MASK) ==
4194 - (ss.selector & SEGMENT_RPL_MASK));
4195 -}
4196 -
4197 -/*
4198 - * Check if guest state is valid. Returns true if valid, false if
4199 - * not.
4200 - * We assume that registers are always usable
4201 - */
4202 -static bool guest_state_valid(struct kvm_vcpu *vcpu)
4203 -{
4204 - if (enable_unrestricted_guest)
4205 - return true;
4206 -
4207 - /* real mode guest state checks */
4208 - if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
4209 - if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
4210 - return false;
4211 - if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
4212 - return false;
4213 - if (!rmode_segment_valid(vcpu, VCPU_SREG_DS))
4214 - return false;
4215 - if (!rmode_segment_valid(vcpu, VCPU_SREG_ES))
4216 - return false;
4217 - if (!rmode_segment_valid(vcpu, VCPU_SREG_FS))
4218 - return false;
4219 - if (!rmode_segment_valid(vcpu, VCPU_SREG_GS))
4220 - return false;
4221 - } else {
4222 - /* protected mode guest state checks */
4223 - if (!cs_ss_rpl_check(vcpu))
4224 - return false;
4225 - if (!code_segment_valid(vcpu))
4226 - return false;
4227 - if (!stack_segment_valid(vcpu))
4228 - return false;
4229 - if (!data_segment_valid(vcpu, VCPU_SREG_DS))
4230 - return false;
4231 - if (!data_segment_valid(vcpu, VCPU_SREG_ES))
4232 - return false;
4233 - if (!data_segment_valid(vcpu, VCPU_SREG_FS))
4234 - return false;
4235 - if (!data_segment_valid(vcpu, VCPU_SREG_GS))
4236 - return false;
4237 - if (!tr_valid(vcpu))
4238 - return false;
4239 - if (!ldtr_valid(vcpu))
4240 - return false;
4241 - }
4242 - /* TODO:
4243 - * - Add checks on RIP
4244 - * - Add checks on RFLAGS
4245 - */
4246 -
4247 - return true;
4248 -}
4249 -
4250 -static int init_rmode_tss(struct kvm *kvm)
4251 -{
4252 - gfn_t fn;
4253 - u16 data = 0;
4254 - int idx, r;
4255 -
4256 - idx = srcu_read_lock(&kvm->srcu);
4257 - fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
4258 - r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
4259 - if (r < 0)
4260 - goto out;
4261 - data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
4262 - r = kvm_write_guest_page(kvm, fn++, &data,
4263 - TSS_IOPB_BASE_OFFSET, sizeof(u16));
4264 - if (r < 0)
4265 - goto out;
4266 - r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
4267 - if (r < 0)
4268 - goto out;
4269 - r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
4270 - if (r < 0)
4271 - goto out;
4272 - data = ~0;
4273 - r = kvm_write_guest_page(kvm, fn, &data,
4274 - RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
4275 - sizeof(u8));
4276 -out:
4277 - srcu_read_unlock(&kvm->srcu, idx);
4278 - return r;
4279 -}
4280 -
4281 -static int init_rmode_identity_map(struct kvm *kvm)
4282 -{
4283 - struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
4284 - int i, idx, r = 0;
4285 - kvm_pfn_t identity_map_pfn;
4286 - u32 tmp;
4287 -
4288 - /* Protect kvm_vmx->ept_identity_pagetable_done. */
4289 - mutex_lock(&kvm->slots_lock);
4290 -
4291 - if (likely(kvm_vmx->ept_identity_pagetable_done))
4292 - goto out2;
4293 -
4294 - if (!kvm_vmx->ept_identity_map_addr)
4295 - kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
4296 - identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT;
4297 -
4298 - r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
4299 - kvm_vmx->ept_identity_map_addr, PAGE_SIZE);
4300 - if (r < 0)
4301 - goto out2;
4302 -
4303 - idx = srcu_read_lock(&kvm->srcu);
4304 - r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
4305 - if (r < 0)
4306 - goto out;
4307 - /* Set up identity-mapping pagetable for EPT in real mode */
4308 - for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
4309 - tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
4310 - _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
4311 - r = kvm_write_guest_page(kvm, identity_map_pfn,
4312 - &tmp, i * sizeof(tmp), sizeof(tmp));
4313 - if (r < 0)
4314 - goto out;
4315 - }
4316 - kvm_vmx->ept_identity_pagetable_done = true;
4317 -
4318 -out:
4319 - srcu_read_unlock(&kvm->srcu, idx);
4320 -
4321 -out2:
4322 - mutex_unlock(&kvm->slots_lock);
4323 - return r;
4324 -}
4325 -
4326 -static void seg_setup(int seg)
4327 -{
4328 - const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
4329 - unsigned int ar;
4330 -
4331 - vmcs_write16(sf->selector, 0);
4332 - vmcs_writel(sf->base, 0);
4333 - vmcs_write32(sf->limit, 0xffff);
4334 - ar = 0x93;
4335 - if (seg == VCPU_SREG_CS)
4336 - ar |= 0x08; /* code segment */
4337 -
4338 - vmcs_write32(sf->ar_bytes, ar);
4339 -}
4340 -
4341 -static int alloc_apic_access_page(struct kvm *kvm)
4342 -{
4343 - struct page *page;
4344 - int r = 0;
4345 -
4346 - mutex_lock(&kvm->slots_lock);
4347 - if (kvm->arch.apic_access_page_done)
4348 - goto out;
4349 - r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
4350 - APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
4351 - if (r)
4352 - goto out;
4353 -
4354 - page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
4355 - if (is_error_page(page)) {
4356 - r = -EFAULT;
4357 - goto out;
4358 - }
4359 -
4360 - /*
4361 - * Do not pin the page in memory, so that memory hot-unplug
4362 - * is able to migrate it.
4363 - */
4364 - put_page(page);
4365 - kvm->arch.apic_access_page_done = true;
4366 -out:
4367 - mutex_unlock(&kvm->slots_lock);
4368 - return r;
4369 -}
4370 -
4371 -int allocate_vpid(void)
4372 -{
4373 - int vpid;
4374 -
4375 - if (!enable_vpid)
4376 - return 0;
4377 - spin_lock(&vmx_vpid_lock);
4378 - vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
4379 - if (vpid < VMX_NR_VPIDS)
4380 - __set_bit(vpid, vmx_vpid_bitmap);
4381 - else
4382 - vpid = 0;
4383 - spin_unlock(&vmx_vpid_lock);
4384 - return vpid;
4385 -}
4386 -
4387 -void free_vpid(int vpid)
4388 -{
4389 - if (!enable_vpid || vpid == 0)
4390 - return;
4391 - spin_lock(&vmx_vpid_lock);
4392 - __clear_bit(vpid, vmx_vpid_bitmap);
4393 - spin_unlock(&vmx_vpid_lock);
4394 -}
4395 -
4396 -static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
4397 - u32 msr, int type)
4398 -{
4399 - int f = sizeof(unsigned long);
4400 -
4401 - if (!cpu_has_vmx_msr_bitmap())
4402 - return;
4403 -
4404 - if (static_branch_unlikely(&enable_evmcs))
4405 - evmcs_touch_msr_bitmap();
4406 -
4407 - /*
4408 - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
4409 - * have the write-low and read-high bitmap offsets the wrong way round.
4410 - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
4411 - */
4412 - if (msr <= 0x1fff) {
4413 - if (type & MSR_TYPE_R)
4414 - /* read-low */
4415 - __clear_bit(msr, msr_bitmap + 0x000 / f);
4416 -
4417 - if (type & MSR_TYPE_W)
4418 - /* write-low */
4419 - __clear_bit(msr, msr_bitmap + 0x800 / f);
4420 -
4421 - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
4422 - msr &= 0x1fff;
4423 - if (type & MSR_TYPE_R)
4424 - /* read-high */
4425 - __clear_bit(msr, msr_bitmap + 0x400 / f);
4426 -
4427 - if (type & MSR_TYPE_W)
4428 - /* write-high */
4429 - __clear_bit(msr, msr_bitmap + 0xc00 / f);
4430 -
4431 - }
4432 -}
4433 -
4434 -static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
4435 - u32 msr, int type)
4436 -{
4437 - int f = sizeof(unsigned long);
4438 -
4439 - if (!cpu_has_vmx_msr_bitmap())
4440 - return;
4441 -
4442 - if (static_branch_unlikely(&enable_evmcs))
4443 - evmcs_touch_msr_bitmap();
4444 -
4445 - /*
4446 - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
4447 - * have the write-low and read-high bitmap offsets the wrong way round.
4448 - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
4449 - */
4450 - if (msr <= 0x1fff) {
4451 - if (type & MSR_TYPE_R)
4452 - /* read-low */
4453 - __set_bit(msr, msr_bitmap + 0x000 / f);
4454 -
4455 - if (type & MSR_TYPE_W)
4456 - /* write-low */
4457 - __set_bit(msr, msr_bitmap + 0x800 / f);
4458 -
4459 - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
4460 - msr &= 0x1fff;
4461 - if (type & MSR_TYPE_R)
4462 - /* read-high */
4463 - __set_bit(msr, msr_bitmap + 0x400 / f);
4464 -
4465 - if (type & MSR_TYPE_W)
4466 - /* write-high */
4467 - __set_bit(msr, msr_bitmap + 0xc00 / f);
4468 -
4469 - }
4470 -}
4471 -
4472 -static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
4473 - u32 msr, int type, bool value)
4474 -{
4475 - if (value)
4476 - vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
4477 - else
4478 - vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
4479 -}
4480 -
4481 -static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
4482 -{
4483 - u8 mode = 0;
4484 -
4485 - if (cpu_has_secondary_exec_ctrls() &&
4486 - (secondary_exec_controls_get(to_vmx(vcpu)) &
4487 - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
4488 - mode |= MSR_BITMAP_MODE_X2APIC;
4489 - if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
4490 - mode |= MSR_BITMAP_MODE_X2APIC_APICV;
4491 - }
4492 -
4493 - return mode;
4494 -}
4495 -
4496 -static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
4497 - u8 mode)
4498 -{
4499 - int msr;
4500 -
4501 - for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
4502 - unsigned word = msr / BITS_PER_LONG;
4503 - msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
4504 - msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
4505 - }
4506 -
4507 - if (mode & MSR_BITMAP_MODE_X2APIC) {
4508 - /*
4509 - * TPR reads and writes can be virtualized even if virtual interrupt
4510 - * delivery is not in use.
4511 - */
4512 - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
4513 - if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
4514 - vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
4515 - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
4516 - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
4517 - }
4518 - }
4519 -}
4520 -
4521 -void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
4522 -{
4523 - struct vcpu_vmx *vmx = to_vmx(vcpu);
4524 - unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
4525 - u8 mode = vmx_msr_bitmap_mode(vcpu);
4526 - u8 changed = mode ^ vmx->msr_bitmap_mode;
4527 -
4528 - if (!changed)
4529 - return;
4530 -
4531 - if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
4532 - vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
4533 -
4534 - vmx->msr_bitmap_mode = mode;
4535 -}
4536 -
4537 -void pt_update_intercept_for_msr(struct vcpu_vmx *vmx)
4538 -{
4539 - unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
4540 - bool flag = !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN);
4541 - u32 i;
4542 -
4543 - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_STATUS,
4544 - MSR_TYPE_RW, flag);
4545 - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_BASE,
4546 - MSR_TYPE_RW, flag);
4547 - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_MASK,
4548 - MSR_TYPE_RW, flag);
4549 - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_CR3_MATCH,
4550 - MSR_TYPE_RW, flag);
4551 - for (i = 0; i < vmx->pt_desc.addr_range; i++) {
4552 - vmx_set_intercept_for_msr(msr_bitmap,
4553 - MSR_IA32_RTIT_ADDR0_A + i * 2, MSR_TYPE_RW, flag);
4554 - vmx_set_intercept_for_msr(msr_bitmap,
4555 - MSR_IA32_RTIT_ADDR0_B + i * 2, MSR_TYPE_RW, flag);
4556 - }
4557 -}
4558 -
4559 -static bool vmx_get_enable_apicv(struct kvm *kvm)
4560 -{
4561 - return enable_apicv;
4562 -}
4563 -
4564 -static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
4565 -{
4566 - struct vcpu_vmx *vmx = to_vmx(vcpu);
4567 - void *vapic_page;
4568 - u32 vppr;
4569 - int rvi;
4570 -
4571 - if (WARN_ON_ONCE(!is_guest_mode(vcpu)) ||
4572 - !nested_cpu_has_vid(get_vmcs12(vcpu)) ||
4573 - WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn))
4574 - return false;
4575 -
4576 - rvi = vmx_get_rvi();
4577 -
4578 - vapic_page = vmx->nested.virtual_apic_map.hva;
4579 - vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
4580 -
4581 - return ((rvi & 0xf0) > (vppr & 0xf0));
4582 -}
4583 -
4584 -static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
4585 - bool nested)
4586 -{
4587 -#ifdef CONFIG_SMP
4588 - int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
4589 -
4590 - if (vcpu->mode == IN_GUEST_MODE) {
4591 - /*
4592 - * The vector of interrupt to be delivered to vcpu had
4593 - * been set in PIR before this function.
4594 - *
4595 - * Following cases will be reached in this block, and
4596 - * we always send a notification event in all cases as
4597 - * explained below.
4598 - *
4599 - * Case 1: vcpu keeps in non-root mode. Sending a
4600 - * notification event posts the interrupt to vcpu.
4601 - *
4602 - * Case 2: vcpu exits to root mode and is still
4603 - * runnable. PIR will be synced to vIRR before the
4604 - * next vcpu entry. Sending a notification event in
4605 - * this case has no effect, as vcpu is not in root
4606 - * mode.
4607 - *
4608 - * Case 3: vcpu exits to root mode and is blocked.
4609 - * vcpu_block() has already synced PIR to vIRR and
4610 - * never blocks vcpu if vIRR is not cleared. Therefore,
4611 - * a blocked vcpu here does not wait for any requested
4612 - * interrupts in PIR, and sending a notification event
4613 - * which has no effect is safe here.
4614 - */
4615 -
4616 - apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
4617 - return true;
4618 - }
4619 -#endif
4620 - return false;
4621 -}
4622 -
4623 -static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
4624 - int vector)
4625 -{
4626 - struct vcpu_vmx *vmx = to_vmx(vcpu);
4627 -
4628 - if (is_guest_mode(vcpu) &&
4629 - vector == vmx->nested.posted_intr_nv) {
4630 - /*
4631 - * If a posted intr is not recognized by hardware,
4632 - * we will accomplish it in the next vmentry.
4633 - */
4634 - vmx->nested.pi_pending = true;
4635 - kvm_make_request(KVM_REQ_EVENT, vcpu);
4636 - /* the PIR and ON have been set by L1. */
4637 - if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true))
4638 - kvm_vcpu_kick(vcpu);
4639 - return 0;
4640 - }
4641 - return -1;
4642 -}
4643 -/*
4644 - * Send interrupt to vcpu via posted interrupt way.
4645 - * 1. If target vcpu is running(non-root mode), send posted interrupt
4646 - * notification to vcpu and hardware will sync PIR to vIRR atomically.
4647 - * 2. If target vcpu isn't running(root mode), kick it to pick up the
4648 - * interrupt from PIR in next vmentry.
4649 - */
4650 -static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
4651 -{
4652 - struct vcpu_vmx *vmx = to_vmx(vcpu);
4653 - int r;
4654 -
4655 - r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
4656 - if (!r)
4657 - return;
4658 -
4659 - if (pi_test_and_set_pir(vector, &vmx->pi_desc))
4660 - return;
4661 -
4662 - /* If a previous notification has sent the IPI, nothing to do. */
4663 - if (pi_test_and_set_on(&vmx->pi_desc))
4664 - return;
4665 -
4666 - if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
4667 - kvm_vcpu_kick(vcpu);
4668 -}
4669 -
4670 -/*
4671 - * Set up the vmcs's constant host-state fields, i.e., host-state fields that
4672 - * will not change in the lifetime of the guest.
4673 - * Note that host-state that does change is set elsewhere. E.g., host-state
4674 - * that is set differently for each CPU is set in vmx_vcpu_load(), not here.
4675 - */
4676 -void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
4677 -{
4678 - u32 low32, high32;
4679 - unsigned long tmpl;
4680 - unsigned long cr0, cr3, cr4;
4681 -
4682 - cr0 = read_cr0();
4683 - WARN_ON(cr0 & X86_CR0_TS);
4684 - vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */
4685 -
4686 - /*
4687 - * Save the most likely value for this task's CR3 in the VMCS.
4688 - * We can't use __get_current_cr3_fast() because we're not atomic.
4689 - */
4690 - cr3 = __read_cr3();
4691 - vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */
4692 - vmx->loaded_vmcs->host_state.cr3 = cr3;
4693 -
4694 - /* Save the most likely value for this task's CR4 in the VMCS. */
4695 - cr4 = cr4_read_shadow();
4696 - vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */
4697 - vmx->loaded_vmcs->host_state.cr4 = cr4;
4698 -
4699 - vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
4700 -#ifdef CONFIG_X86_64
4701 - /*
4702 - * Load null selectors, so we can avoid reloading them in
4703 - * vmx_prepare_switch_to_host(), in case userspace uses
4704 - * the null selectors too (the expected case).
4705 - */
4706 - vmcs_write16(HOST_DS_SELECTOR, 0);
4707 - vmcs_write16(HOST_ES_SELECTOR, 0);
4708 -#else
4709 - vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
4710 - vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
4711 -#endif
4712 - vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
4713 - vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
4714 -
4715 - vmcs_writel(HOST_IDTR_BASE, host_idt_base); /* 22.2.4 */
4716 -
4717 - vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); /* 22.2.5 */
4718 -
4719 - rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
4720 - vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
4721 - rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
4722 - vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */
4723 -
4724 - if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
4725 - rdmsr(MSR_IA32_CR_PAT, low32, high32);
4726 - vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32));
4727 - }
4728 -
4729 - if (cpu_has_load_ia32_efer())
4730 - vmcs_write64(HOST_IA32_EFER, host_efer);
4731 -}
4732 -
4733 -void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
4734 -{
4735 - vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
4736 - if (enable_ept)
4737 - vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
4738 - if (is_guest_mode(&vmx->vcpu))
4739 - vmx->vcpu.arch.cr4_guest_owned_bits &=
4740 - ~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask;
4741 - vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
4742 -}
4743 -
4744 -u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
4745 -{
4746 - u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
4747 -
4748 - if (!kvm_vcpu_apicv_active(&vmx->vcpu))
4749 - pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
4750 -
4751 - if (!enable_vnmi)
4752 - pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
4753 -
4754 - if (!enable_preemption_timer)
4755 - pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
4756 -
4757 - return pin_based_exec_ctrl;
4758 -}
4759 -
4760 -static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
4761 -{
4762 - struct vcpu_vmx *vmx = to_vmx(vcpu);
4763 -
4764 - pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
4765 - if (cpu_has_secondary_exec_ctrls()) {
4766 - if (kvm_vcpu_apicv_active(vcpu))
4767 - secondary_exec_controls_setbit(vmx,
4768 - SECONDARY_EXEC_APIC_REGISTER_VIRT |
4769 - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
4770 - else
4771 - secondary_exec_controls_clearbit(vmx,
4772 - SECONDARY_EXEC_APIC_REGISTER_VIRT |
4773 - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
4774 - }
4775 -
4776 - if (cpu_has_vmx_msr_bitmap())
4777 - vmx_update_msr_bitmap(vcpu);
4778 -}
4779 -
4780 -u32 vmx_exec_control(struct vcpu_vmx *vmx)
4781 -{
4782 - u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
4783 -
4784 - if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
4785 - exec_control &= ~CPU_BASED_MOV_DR_EXITING;
4786 -
4787 - if (!cpu_need_tpr_shadow(&vmx->vcpu)) {
4788 - exec_control &= ~CPU_BASED_TPR_SHADOW;
4789 -#ifdef CONFIG_X86_64
4790 - exec_control |= CPU_BASED_CR8_STORE_EXITING |
4791 - CPU_BASED_CR8_LOAD_EXITING;
4792 -#endif
4793 - }
4794 - if (!enable_ept)
4795 - exec_control |= CPU_BASED_CR3_STORE_EXITING |
4796 - CPU_BASED_CR3_LOAD_EXITING |
4797 - CPU_BASED_INVLPG_EXITING;
4798 - if (kvm_mwait_in_guest(vmx->vcpu.kvm))
4799 - exec_control &= ~(CPU_BASED_MWAIT_EXITING |
4800 - CPU_BASED_MONITOR_EXITING);
4801 - if (kvm_hlt_in_guest(vmx->vcpu.kvm))
4802 - exec_control &= ~CPU_BASED_HLT_EXITING;
4803 - return exec_control;
4804 -}
4805 -
4806 -
4807 -static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
4808 -{
4809 - struct kvm_vcpu *vcpu = &vmx->vcpu;
4810 -
4811 - u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
4812 -
4813 - if (pt_mode == PT_MODE_SYSTEM)
4814 - exec_control &= ~(SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX);
4815 - if (!cpu_need_virtualize_apic_accesses(vcpu))
4816 - exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
4817 - if (vmx->vpid == 0)
4818 - exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
4819 - if (!enable_ept) {
4820 - exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
4821 - enable_unrestricted_guest = 0;
4822 - }
4823 - if (!enable_unrestricted_guest)
4824 - exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
4825 - if (kvm_pause_in_guest(vmx->vcpu.kvm))
4826 - exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
4827 - if (!kvm_vcpu_apicv_active(vcpu))
4828 - exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
4829 - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
4830 - exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
4831 -
4832 - /* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP,
4833 - * in vmx_set_cr4. */
4834 - exec_control &= ~SECONDARY_EXEC_DESC;
4835 -
4836 - /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD
4837 - (handle_vmptrld).
4838 - We can NOT enable shadow_vmcs here because we don't have yet
4839 - a current VMCS12
4840 - */
4841 - exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
4842 -
4843 - if (!enable_pml)
4844 - exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
4845 -
4846 - if (vmx_xsaves_supported()) {
4847 - /* Exposing XSAVES only when XSAVE is exposed */
4848 - bool xsaves_enabled =
4849 - guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
4850 - guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
4851 -
4852 - vcpu->arch.xsaves_enabled = xsaves_enabled;
4853 -
4854 - if (!xsaves_enabled)
4855 - exec_control &= ~SECONDARY_EXEC_XSAVES;
4856 -
4857 - if (nested) {
4858 - if (xsaves_enabled)
4859 - vmx->nested.msrs.secondary_ctls_high |=
4860 - SECONDARY_EXEC_XSAVES;
4861 - else
4862 - vmx->nested.msrs.secondary_ctls_high &=
4863 - ~SECONDARY_EXEC_XSAVES;
4864 - }
4865 - }
4866 -
4867 - if (vmx_rdtscp_supported()) {
4868 - bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP);
4869 - if (!rdtscp_enabled)
4870 - exec_control &= ~SECONDARY_EXEC_RDTSCP;
4871 -
4872 - if (nested) {
4873 - if (rdtscp_enabled)
4874 - vmx->nested.msrs.secondary_ctls_high |=
4875 - SECONDARY_EXEC_RDTSCP;
4876 - else
4877 - vmx->nested.msrs.secondary_ctls_high &=
4878 - ~SECONDARY_EXEC_RDTSCP;
4879 - }
4880 - }
4881 -
4882 - if (vmx_invpcid_supported()) {
4883 - /* Exposing INVPCID only when PCID is exposed */
4884 - bool invpcid_enabled =
4885 - guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) &&
4886 - guest_cpuid_has(vcpu, X86_FEATURE_PCID);
4887 -
4888 - if (!invpcid_enabled) {
4889 - exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
4890 - guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID);
4891 - }
4892 -
4893 - if (nested) {
4894 - if (invpcid_enabled)
4895 - vmx->nested.msrs.secondary_ctls_high |=
4896 - SECONDARY_EXEC_ENABLE_INVPCID;
4897 - else
4898 - vmx->nested.msrs.secondary_ctls_high &=
4899 - ~SECONDARY_EXEC_ENABLE_INVPCID;
4900 - }
4901 - }
4902 -
4903 - if (vmx_rdrand_supported()) {
4904 - bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND);
4905 - if (rdrand_enabled)
4906 - exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING;
4907 -
4908 - if (nested) {
4909 - if (rdrand_enabled)
4910 - vmx->nested.msrs.secondary_ctls_high |=
4911 - SECONDARY_EXEC_RDRAND_EXITING;
4912 - else
4913 - vmx->nested.msrs.secondary_ctls_high &=
4914 - ~SECONDARY_EXEC_RDRAND_EXITING;
4915 - }
4916 - }
4917 -
4918 - if (vmx_rdseed_supported()) {
4919 - bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED);
4920 - if (rdseed_enabled)
4921 - exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING;
4922 -
4923 - if (nested) {
4924 - if (rdseed_enabled)
4925 - vmx->nested.msrs.secondary_ctls_high |=
4926 - SECONDARY_EXEC_RDSEED_EXITING;
4927 - else
4928 - vmx->nested.msrs.secondary_ctls_high &=
4929 - ~SECONDARY_EXEC_RDSEED_EXITING;
4930 - }
4931 - }
4932 -
4933 - if (vmx_waitpkg_supported()) {
4934 - bool waitpkg_enabled =
4935 - guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG);
4936 -
4937 - if (!waitpkg_enabled)
4938 - exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
4939 -
4940 - if (nested) {
4941 - if (waitpkg_enabled)
4942 - vmx->nested.msrs.secondary_ctls_high |=
4943 - SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
4944 - else
4945 - vmx->nested.msrs.secondary_ctls_high &=
4946 - ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
4947 - }
4948 - }
4949 -
4950 - vmx->secondary_exec_control = exec_control;
4951 -}
4952 -
4953 -static void ept_set_mmio_spte_mask(void)
4954 -{
4955 - /*
4956 - * EPT Misconfigurations can be generated if the value of bits 2:0
4957 - * of an EPT paging-structure entry is 110b (write/execute).
4958 - */
4959 - kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK,
4960 - VMX_EPT_MISCONFIG_WX_VALUE, 0);
4961 -}
4962 -
4963 -#define VMX_XSS_EXIT_BITMAP 0
4964 -
4965 -/*
4966 - * Noting that the initialization of Guest-state Area of VMCS is in
4967 - * vmx_vcpu_reset().
4968 - */
4969 -static void init_vmcs(struct vcpu_vmx *vmx)
4970 -{
4971 - if (nested)
4972 - nested_vmx_set_vmcs_shadowing_bitmap();
4973 -
4974 - if (cpu_has_vmx_msr_bitmap())
4975 - vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
4976 -
4977 - vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
4978 -
4979 - /* Control */
4980 - pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
4981 -
4982 - exec_controls_set(vmx, vmx_exec_control(vmx));
4983 -
4984 - if (cpu_has_secondary_exec_ctrls()) {
4985 - vmx_compute_secondary_exec_control(vmx);
4986 - secondary_exec_controls_set(vmx, vmx->secondary_exec_control);
4987 - }
4988 -
4989 - if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
4990 - vmcs_write64(EOI_EXIT_BITMAP0, 0);
4991 - vmcs_write64(EOI_EXIT_BITMAP1, 0);
4992 - vmcs_write64(EOI_EXIT_BITMAP2, 0);
4993 - vmcs_write64(EOI_EXIT_BITMAP3, 0);
4994 -
4995 - vmcs_write16(GUEST_INTR_STATUS, 0);
4996 -
4997 - vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
4998 - vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
4999 - }
5000 -
5001 - if (!kvm_pause_in_guest(vmx->vcpu.kvm)) {
5002 - vmcs_write32(PLE_GAP, ple_gap);
5003 - vmx->ple_window = ple_window;
5004 - vmx->ple_window_dirty = true;
5005 - }
5006 -
5007 - vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
5008 - vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
5009 - vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */
5010 -
5011 - vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
5012 - vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */
5013 - vmx_set_constant_host_state(vmx);
5014 - vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */
5015 - vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
5016 -
5017 - if (cpu_has_vmx_vmfunc())
5018 - vmcs_write64(VM_FUNCTION_CONTROL, 0);
5019 -
5020 - vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
5021 - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
5022 - vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
5023 - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
5024 - vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
5025 -
5026 - if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
5027 - vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
5028 -
5029 - vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
5030 -
5031 - /* 22.2.1, 20.8.1 */
5032 - vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
5033 -
5034 - vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS;
5035 - vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS);
5036 -
5037 - set_cr4_guest_host_mask(vmx);
5038 -
5039 - if (vmx->vpid != 0)
5040 - vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
5041 -
5042 - if (vmx_xsaves_supported())
5043 - vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
5044 -
5045 - if (enable_pml) {
5046 - vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
5047 - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
5048 - }
5049 -
5050 - if (cpu_has_vmx_encls_vmexit())
5051 - vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
5052 -
5053 - if (pt_mode == PT_MODE_HOST_GUEST) {
5054 - memset(&vmx->pt_desc, 0, sizeof(vmx->pt_desc));
5055 - /* Bit[6~0] are forced to 1, writes are ignored. */
5056 - vmx->pt_desc.guest.output_mask = 0x7F;
5057 - vmcs_write64(GUEST_IA32_RTIT_CTL, 0);
5058 - }
5059 -}
5060 -
5061 -static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
5062 -{
5063 - struct vcpu_vmx *vmx = to_vmx(vcpu);
5064 - struct msr_data apic_base_msr;
5065 - u64 cr0;
5066 -
5067 - vmx->rmode.vm86_active = 0;
5068 - vmx->spec_ctrl = 0;
5069 -
5070 - vmx->msr_ia32_umwait_control = 0;
5071 -
5072 - vcpu->arch.microcode_version = 0x100000000ULL;
5073 - vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
5074 - vmx->hv_deadline_tsc = -1;
5075 - kvm_set_cr8(vcpu, 0);
5076 -
5077 - if (!init_event) {
5078 - apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
5079 - MSR_IA32_APICBASE_ENABLE;
5080 - if (kvm_vcpu_is_reset_bsp(vcpu))
5081 - apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
5082 - apic_base_msr.host_initiated = true;
5083 - kvm_set_apic_base(vcpu, &apic_base_msr);
5084 - }
5085 -
5086 - vmx_segment_cache_clear(vmx);
5087 -
5088 - seg_setup(VCPU_SREG_CS);
5089 - vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
5090 - vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
5091 -
5092 - seg_setup(VCPU_SREG_DS);
5093 - seg_setup(VCPU_SREG_ES);
5094 - seg_setup(VCPU_SREG_FS);
5095 - seg_setup(VCPU_SREG_GS);
5096 - seg_setup(VCPU_SREG_SS);
5097 -
5098 - vmcs_write16(GUEST_TR_SELECTOR, 0);
5099 - vmcs_writel(GUEST_TR_BASE, 0);
5100 - vmcs_write32(GUEST_TR_LIMIT, 0xffff);
5101 - vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
5102 -
5103 - vmcs_write16(GUEST_LDTR_SELECTOR, 0);
5104 - vmcs_writel(GUEST_LDTR_BASE, 0);
5105 - vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
5106 - vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
5107 -
5108 - if (!init_event) {
5109 - vmcs_write32(GUEST_SYSENTER_CS, 0);
5110 - vmcs_writel(GUEST_SYSENTER_ESP, 0);
5111 - vmcs_writel(GUEST_SYSENTER_EIP, 0);
5112 - vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
5113 - }
5114 -
5115 - kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
5116 - kvm_rip_write(vcpu, 0xfff0);
5117 -
5118 - vmcs_writel(GUEST_GDTR_BASE, 0);
5119 - vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
5120 -
5121 - vmcs_writel(GUEST_IDTR_BASE, 0);
5122 - vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
5123 -
5124 - vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
5125 - vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
5126 - vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
5127 - if (kvm_mpx_supported())
5128 - vmcs_write64(GUEST_BNDCFGS, 0);
5129 -
5130 - setup_msrs(vmx);
5131 -
5132 - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
5133 -
5134 - if (cpu_has_vmx_tpr_shadow() && !init_event) {
5135 - vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
5136 - if (cpu_need_tpr_shadow(vcpu))
5137 - vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
5138 - __pa(vcpu->arch.apic->regs));
5139 - vmcs_write32(TPR_THRESHOLD, 0);
5140 - }
5141 -
5142 - kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
5143 -
5144 - cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
5145 - vmx->vcpu.arch.cr0 = cr0;
5146 - vmx_set_cr0(vcpu, cr0); /* enter rmode */
5147 - vmx_set_cr4(vcpu, 0);
5148 - vmx_set_efer(vcpu, 0);
5149 -
5150 - update_exception_bitmap(vcpu);
5151 -
5152 - vpid_sync_context(vmx->vpid);
5153 - if (init_event)
5154 - vmx_clear_hlt(vcpu);
5155 -}
5156 -
5157 -static void enable_irq_window(struct kvm_vcpu *vcpu)
5158 -{
5159 - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING);
5160 -}
5161 -
5162 -static void enable_nmi_window(struct kvm_vcpu *vcpu)
5163 -{
5164 - if (!enable_vnmi ||
5165 - vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
5166 - enable_irq_window(vcpu);
5167 - return;
5168 - }
5169 -
5170 - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
5171 -}
5172 -
5173 -static void vmx_inject_irq(struct kvm_vcpu *vcpu)
5174 -{
5175 - struct vcpu_vmx *vmx = to_vmx(vcpu);
5176 - uint32_t intr;
5177 - int irq = vcpu->arch.interrupt.nr;
5178 -
5179 - trace_kvm_inj_virq(irq);
5180 -
5181 - ++vcpu->stat.irq_injections;
5182 - if (vmx->rmode.vm86_active) {
5183 - int inc_eip = 0;
5184 - if (vcpu->arch.interrupt.soft)
5185 - inc_eip = vcpu->arch.event_exit_inst_len;
5186 - kvm_inject_realmode_interrupt(vcpu, irq, inc_eip);
5187 - return;
5188 - }
5189 - intr = irq | INTR_INFO_VALID_MASK;
5190 - if (vcpu->arch.interrupt.soft) {
5191 - intr |= INTR_TYPE_SOFT_INTR;
5192 - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
5193 - vmx->vcpu.arch.event_exit_inst_len);
5194 - } else
5195 - intr |= INTR_TYPE_EXT_INTR;
5196 - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
5197 -
5198 - vmx_clear_hlt(vcpu);
5199 -}
5200 -
5201 -static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
5202 -{
5203 - struct vcpu_vmx *vmx = to_vmx(vcpu);
5204 -
5205 - if (!enable_vnmi) {
5206 - /*
5207 - * Tracking the NMI-blocked state in software is built upon
5208 - * finding the next open IRQ window. This, in turn, depends on
5209 - * well-behaving guests: They have to keep IRQs disabled at
5210 - * least as long as the NMI handler runs. Otherwise we may
5211 - * cause NMI nesting, maybe breaking the guest. But as this is
5212 - * highly unlikely, we can live with the residual risk.
5213 - */
5214 - vmx->loaded_vmcs->soft_vnmi_blocked = 1;
5215 - vmx->loaded_vmcs->vnmi_blocked_time = 0;
5216 - }
5217 -
5218 - ++vcpu->stat.nmi_injections;
5219 - vmx->loaded_vmcs->nmi_known_unmasked = false;
5220 -
5221 - if (vmx->rmode.vm86_active) {
5222 - kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0);
5223 - return;
5224 - }
5225 -
5226 - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
5227 - INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
5228 -
5229 - vmx_clear_hlt(vcpu);
5230 -}
5231 -
5232 -bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
5233 -{
5234 - struct vcpu_vmx *vmx = to_vmx(vcpu);
5235 - bool masked;
5236 -
5237 - if (!enable_vnmi)
5238 - return vmx->loaded_vmcs->soft_vnmi_blocked;
5239 - if (vmx->loaded_vmcs->nmi_known_unmasked)
5240 - return false;
5241 - masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
5242 - vmx->loaded_vmcs->nmi_known_unmasked = !masked;
5243 - return masked;
5244 -}
5245 -
5246 -void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
5247 -{
5248 - struct vcpu_vmx *vmx = to_vmx(vcpu);
5249 -
5250 - if (!enable_vnmi) {
5251 - if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
5252 - vmx->loaded_vmcs->soft_vnmi_blocked = masked;
5253 - vmx->loaded_vmcs->vnmi_blocked_time = 0;
5254 - }
5255 - } else {
5256 - vmx->loaded_vmcs->nmi_known_unmasked = !masked;
5257 - if (masked)
5258 - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
5259 - GUEST_INTR_STATE_NMI);
5260 - else
5261 - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
5262 - GUEST_INTR_STATE_NMI);
5263 - }
5264 -}
5265 -
5266 -static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
5267 -{
5268 - if (to_vmx(vcpu)->nested.nested_run_pending)
5269 - return 0;
5270 -
5271 - if (!enable_vnmi &&
5272 - to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
5273 - return 0;
5274 -
5275 - return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
5276 - (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
5277 - | GUEST_INTR_STATE_NMI));
5278 -}
5279 -
5280 -static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
5281 -{
5282 - return (!to_vmx(vcpu)->nested.nested_run_pending &&
5283 - vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
5284 - !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
5285 - (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
5286 -}
5287 -
5288 -static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
5289 -{
5290 - int ret;
5291 -
5292 - if (enable_unrestricted_guest)
5293 - return 0;
5294 -
5295 - ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
5296 - PAGE_SIZE * 3);
5297 - if (ret)
5298 - return ret;
5299 - to_kvm_vmx(kvm)->tss_addr = addr;
5300 - return init_rmode_tss(kvm);
5301 -}
5302 -
5303 -static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
5304 -{
5305 - to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr;
5306 - return 0;
5307 -}
5308 -
5309 -static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
5310 -{
5311 - switch (vec) {
5312 - case BP_VECTOR:
5313 - /*
5314 - * Update instruction length as we may reinject the exception
5315 - * from user space while in guest debugging mode.
5316 - */
5317 - to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
5318 - vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
5319 - if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
5320 - return false;
5321 - /* fall through */
5322 - case DB_VECTOR:
5323 - if (vcpu->guest_debug &
5324 - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
5325 - return false;
5326 - /* fall through */
5327 - case DE_VECTOR:
5328 - case OF_VECTOR:
5329 - case BR_VECTOR:
5330 - case UD_VECTOR:
5331 - case DF_VECTOR:
5332 - case SS_VECTOR:
5333 - case GP_VECTOR:
5334 - case MF_VECTOR:
5335 - return true;
5336 - break;
5337 - }
5338 - return false;
5339 -}
5340 -
5341 -static int handle_rmode_exception(struct kvm_vcpu *vcpu,
5342 - int vec, u32 err_code)
5343 -{
5344 - /*
5345 - * Instruction with address size override prefix opcode 0x67
5346 - * Cause the #SS fault with 0 error code in VM86 mode.
5347 - */
5348 - if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
5349 - if (kvm_emulate_instruction(vcpu, 0)) {
5350 - if (vcpu->arch.halt_request) {
5351 - vcpu->arch.halt_request = 0;
5352 - return kvm_vcpu_halt(vcpu);
5353 - }
5354 - return 1;
5355 - }
5356 - return 0;
5357 - }
5358 -
5359 - /*
5360 - * Forward all other exceptions that are valid in real mode.
5361 - * FIXME: Breaks guest debugging in real mode, needs to be fixed with
5362 - * the required debugging infrastructure rework.
5363 - */
5364 - kvm_queue_exception(vcpu, vec);
5365 - return 1;
5366 -}
5367 -
5368 -/*
5369 - * Trigger machine check on the host. We assume all the MSRs are already set up
5370 - * by the CPU and that we still run on the same CPU as the MCE occurred on.
5371 - * We pass a fake environment to the machine check handler because we want
5372 - * the guest to be always treated like user space, no matter what context
5373 - * it used internally.
5374 - */
5375 -static void kvm_machine_check(void)
5376 -{
5377 -#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64)
5378 - struct pt_regs regs = {
5379 - .cs = 3, /* Fake ring 3 no matter what the guest ran on */
5380 - .flags = X86_EFLAGS_IF,
5381 - };
5382 -
5383 - do_machine_check(&regs, 0);
5384 -#endif
5385 -}
5386 -
5387 -static int handle_machine_check(struct kvm_vcpu *vcpu)
5388 -{
5389 - /* handled by vmx_vcpu_run() */
5390 - return 1;
5391 -}
5392 -
5393 -static int handle_exception_nmi(struct kvm_vcpu *vcpu)
5394 -{
5395 - struct vcpu_vmx *vmx = to_vmx(vcpu);
5396 - struct kvm_run *kvm_run = vcpu->run;
5397 - u32 intr_info, ex_no, error_code;
5398 - unsigned long cr2, rip, dr6;
5399 - u32 vect_info;
5400 -
5401 - vect_info = vmx->idt_vectoring_info;
5402 - intr_info = vmx->exit_intr_info;
5403 -
5404 - if (is_machine_check(intr_info) || is_nmi(intr_info))
5405 - return 1; /* handled by handle_exception_nmi_irqoff() */
5406 -
5407 - if (is_invalid_opcode(intr_info))
5408 - return handle_ud(vcpu);
5409 -
5410 - error_code = 0;
5411 - if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
5412 - error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
5413 -
5414 - if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
5415 - WARN_ON_ONCE(!enable_vmware_backdoor);
5416 -
5417 - /*
5418 - * VMware backdoor emulation on #GP interception only handles
5419 - * IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero
5420 - * error code on #GP.
5421 - */
5422 - if (error_code) {
5423 - kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
5424 - return 1;
5425 - }
5426 - return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
5427 - }
5428 -
5429 - /*
5430 - * The #PF with PFEC.RSVD = 1 indicates the guest is accessing
5431 - * MMIO, it is better to report an internal error.
5432 - * See the comments in vmx_handle_exit.
5433 - */
5434 - if ((vect_info & VECTORING_INFO_VALID_MASK) &&
5435 - !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
5436 - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
5437 - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
5438 - vcpu->run->internal.ndata = 3;
5439 - vcpu->run->internal.data[0] = vect_info;
5440 - vcpu->run->internal.data[1] = intr_info;
5441 - vcpu->run->internal.data[2] = error_code;
5442 - return 0;
5443 - }
5444 -
5445 - if (is_page_fault(intr_info)) {
5446 - cr2 = vmcs_readl(EXIT_QUALIFICATION);
5447 - /* EPT won't cause page fault directly */
5448 - WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
5449 - return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
5450 - }
5451 -
5452 - ex_no = intr_info & INTR_INFO_VECTOR_MASK;
5453 -
5454 - if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no))
5455 - return handle_rmode_exception(vcpu, ex_no, error_code);
5456 -
5457 - switch (ex_no) {
5458 - case AC_VECTOR:
5459 - kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
5460 - return 1;
5461 - case DB_VECTOR:
5462 - dr6 = vmcs_readl(EXIT_QUALIFICATION);
5463 - if (!(vcpu->guest_debug &
5464 - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
5465 - vcpu->arch.dr6 &= ~DR_TRAP_BITS;
5466 - vcpu->arch.dr6 |= dr6 | DR6_RTM;
5467 - if (is_icebp(intr_info))
5468 - WARN_ON(!skip_emulated_instruction(vcpu));
5469 -
5470 - kvm_queue_exception(vcpu, DB_VECTOR);
5471 - return 1;
5472 - }
5473 - kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
5474 - kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
5475 - /* fall through */
5476 - case BP_VECTOR:
5477 - /*
5478 - * Update instruction length as we may reinject #BP from
5479 - * user space while in guest debugging mode. Reading it for
5480 - * #DB as well causes no harm, it is not used in that case.
5481 - */
5482 - vmx->vcpu.arch.event_exit_inst_len =
5483 - vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
5484 - kvm_run->exit_reason = KVM_EXIT_DEBUG;
5485 - rip = kvm_rip_read(vcpu);
5486 - kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
5487 - kvm_run->debug.arch.exception = ex_no;
5488 - break;
5489 - default:
5490 - kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
5491 - kvm_run->ex.exception = ex_no;
5492 - kvm_run->ex.error_code = error_code;
5493 - break;
5494 - }
5495 - return 0;
5496 -}
5497 -
5498 -static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu)
5499 -{
5500 - ++vcpu->stat.irq_exits;
5501 - return 1;
5502 -}
5503 -
5504 -static int handle_triple_fault(struct kvm_vcpu *vcpu)
5505 -{
5506 - vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
5507 - vcpu->mmio_needed = 0;
5508 - return 0;
5509 -}
5510 -
5511 -static int handle_io(struct kvm_vcpu *vcpu)
5512 -{
5513 - unsigned long exit_qualification;
5514 - int size, in, string;
5515 - unsigned port;
5516 -
5517 - exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5518 - string = (exit_qualification & 16) != 0;
5519 -
5520 - ++vcpu->stat.io_exits;
5521 -
5522 - if (string)
5523 - return kvm_emulate_instruction(vcpu, 0);
5524 -
5525 - port = exit_qualification >> 16;
5526 - size = (exit_qualification & 7) + 1;
5527 - in = (exit_qualification & 8) != 0;
5528 -
5529 - return kvm_fast_pio(vcpu, size, port, in);
5530 -}
5531 -
5532 -static void
5533 -vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
5534 -{
5535 - /*
5536 - * Patch in the VMCALL instruction:
5537 - */
5538 - hypercall[0] = 0x0f;
5539 - hypercall[1] = 0x01;
5540 - hypercall[2] = 0xc1;
5541 -}
5542 -
5543 -/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
5544 -static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
5545 -{
5546 - if (is_guest_mode(vcpu)) {
5547 - struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5548 - unsigned long orig_val = val;
5549 -
5550 - /*
5551 - * We get here when L2 changed cr0 in a way that did not change
5552 - * any of L1's shadowed bits (see nested_vmx_exit_handled_cr),
5553 - * but did change L0 shadowed bits. So we first calculate the
5554 - * effective cr0 value that L1 would like to write into the
5555 - * hardware. It consists of the L2-owned bits from the new
5556 - * value combined with the L1-owned bits from L1's guest_cr0.
5557 - */
5558 - val = (val & ~vmcs12->cr0_guest_host_mask) |
5559 - (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
5560 -
5561 - if (!nested_guest_cr0_valid(vcpu, val))
5562 - return 1;
5563 -
5564 - if (kvm_set_cr0(vcpu, val))
5565 - return 1;
5566 - vmcs_writel(CR0_READ_SHADOW, orig_val);
5567 - return 0;
5568 - } else {
5569 - if (to_vmx(vcpu)->nested.vmxon &&
5570 - !nested_host_cr0_valid(vcpu, val))
5571 - return 1;
5572 -
5573 - return kvm_set_cr0(vcpu, val);
5574 - }
5575 -}
5576 -
5577 -static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
5578 -{
5579 - if (is_guest_mode(vcpu)) {
5580 - struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5581 - unsigned long orig_val = val;
5582 -
5583 - /* analogously to handle_set_cr0 */
5584 - val = (val & ~vmcs12->cr4_guest_host_mask) |
5585 - (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask);
5586 - if (kvm_set_cr4(vcpu, val))
5587 - return 1;
5588 - vmcs_writel(CR4_READ_SHADOW, orig_val);
5589 - return 0;
5590 - } else
5591 - return kvm_set_cr4(vcpu, val);
5592 -}
5593 -
5594 -static int handle_desc(struct kvm_vcpu *vcpu)
5595 -{
5596 - WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
5597 - return kvm_emulate_instruction(vcpu, 0);
5598 -}
5599 -
5600 -static int handle_cr(struct kvm_vcpu *vcpu)
5601 -{
5602 - unsigned long exit_qualification, val;
5603 - int cr;
5604 - int reg;
5605 - int err;
5606 - int ret;
5607 -
5608 - exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5609 - cr = exit_qualification & 15;
5610 - reg = (exit_qualification >> 8) & 15;
5611 - switch ((exit_qualification >> 4) & 3) {
5612 - case 0: /* mov to cr */
5613 - val = kvm_register_readl(vcpu, reg);
5614 - trace_kvm_cr_write(cr, val);
5615 - switch (cr) {
5616 - case 0:
5617 - err = handle_set_cr0(vcpu, val);
5618 - return kvm_complete_insn_gp(vcpu, err);
5619 - case 3:
5620 - WARN_ON_ONCE(enable_unrestricted_guest);
5621 - err = kvm_set_cr3(vcpu, val);
5622 - return kvm_complete_insn_gp(vcpu, err);
5623 - case 4:
5624 - err = handle_set_cr4(vcpu, val);
5625 - return kvm_complete_insn_gp(vcpu, err);
5626 - case 8: {
5627 - u8 cr8_prev = kvm_get_cr8(vcpu);
5628 - u8 cr8 = (u8)val;
5629 - err = kvm_set_cr8(vcpu, cr8);
5630 - ret = kvm_complete_insn_gp(vcpu, err);
5631 - if (lapic_in_kernel(vcpu))
5632 - return ret;
5633 - if (cr8_prev <= cr8)
5634 - return ret;
5635 - /*
5636 - * TODO: we might be squashing a
5637 - * KVM_GUESTDBG_SINGLESTEP-triggered
5638 - * KVM_EXIT_DEBUG here.
5639 - */
5640 - vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
5641 - return 0;
5642 - }
5643 - }
5644 - break;
5645 - case 2: /* clts */
5646 - WARN_ONCE(1, "Guest should always own CR0.TS");
5647 - vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
5648 - trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
5649 - return kvm_skip_emulated_instruction(vcpu);
5650 - case 1: /*mov from cr*/
5651 - switch (cr) {
5652 - case 3:
5653 - WARN_ON_ONCE(enable_unrestricted_guest);
5654 - val = kvm_read_cr3(vcpu);
5655 - kvm_register_write(vcpu, reg, val);
5656 - trace_kvm_cr_read(cr, val);
5657 - return kvm_skip_emulated_instruction(vcpu);
5658 - case 8:
5659 - val = kvm_get_cr8(vcpu);
5660 - kvm_register_write(vcpu, reg, val);
5661 - trace_kvm_cr_read(cr, val);
5662 - return kvm_skip_emulated_instruction(vcpu);
5663 - }
5664 - break;
5665 - case 3: /* lmsw */
5666 - val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
5667 - trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
5668 - kvm_lmsw(vcpu, val);
5669 -
5670 - return kvm_skip_emulated_instruction(vcpu);
5671 - default:
5672 - break;
5673 - }
5674 - vcpu->run->exit_reason = 0;
5675 - vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
5676 - (int)(exit_qualification >> 4) & 3, cr);
5677 - return 0;
5678 -}
5679 -
5680 -static int handle_dr(struct kvm_vcpu *vcpu)
5681 -{
5682 - unsigned long exit_qualification;
5683 - int dr, dr7, reg;
5684 -
5685 - exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5686 - dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
5687 -
5688 - /* First, if DR does not exist, trigger UD */
5689 - if (!kvm_require_dr(vcpu, dr))
5690 - return 1;
5691 -
5692 - /* Do not handle if the CPL > 0, will trigger GP on re-entry */
5693 - if (!kvm_require_cpl(vcpu, 0))
5694 - return 1;
5695 - dr7 = vmcs_readl(GUEST_DR7);
5696 - if (dr7 & DR7_GD) {
5697 - /*
5698 - * As the vm-exit takes precedence over the debug trap, we
5699 - * need to emulate the latter, either for the host or the
5700 - * guest debugging itself.
5701 - */
5702 - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
5703 - vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
5704 - vcpu->run->debug.arch.dr7 = dr7;
5705 - vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
5706 - vcpu->run->debug.arch.exception = DB_VECTOR;
5707 - vcpu->run->exit_reason = KVM_EXIT_DEBUG;
5708 - return 0;
5709 - } else {
5710 - vcpu->arch.dr6 &= ~DR_TRAP_BITS;
5711 - vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
5712 - kvm_queue_exception(vcpu, DB_VECTOR);
5713 - return 1;
5714 - }
5715 - }
5716 -
5717 - if (vcpu->guest_debug == 0) {
5718 - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
5719 -
5720 - /*
5721 - * No more DR vmexits; force a reload of the debug registers
5722 - * and reenter on this instruction. The next vmexit will
5723 - * retrieve the full state of the debug registers.
5724 - */
5725 - vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
5726 - return 1;
5727 - }
5728 -
5729 - reg = DEBUG_REG_ACCESS_REG(exit_qualification);
5730 - if (exit_qualification & TYPE_MOV_FROM_DR) {
5731 - unsigned long val;
5732 -
5733 - if (kvm_get_dr(vcpu, dr, &val))
5734 - return 1;
5735 - kvm_register_write(vcpu, reg, val);
5736 - } else
5737 - if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg)))
5738 - return 1;
5739 -
5740 - return kvm_skip_emulated_instruction(vcpu);
5741 -}
5742 -
5743 -static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
5744 -{
5745 - return vcpu->arch.dr6;
5746 -}
5747 -
5748 -static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
5749 -{
5750 -}
5751 -
5752 -static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
5753 -{
5754 - get_debugreg(vcpu->arch.db[0], 0);
5755 - get_debugreg(vcpu->arch.db[1], 1);
5756 - get_debugreg(vcpu->arch.db[2], 2);
5757 - get_debugreg(vcpu->arch.db[3], 3);
5758 - get_debugreg(vcpu->arch.dr6, 6);
5759 - vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
5760 -
5761 - vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
5762 - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
5763 -}
5764 -
5765 -static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
5766 -{
5767 - vmcs_writel(GUEST_DR7, val);
5768 -}
5769 -
5770 -static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
5771 -{
5772 - kvm_apic_update_ppr(vcpu);
5773 - return 1;
5774 -}
5775 -
5776 -static int handle_interrupt_window(struct kvm_vcpu *vcpu)
5777 -{
5778 - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING);
5779 -
5780 - kvm_make_request(KVM_REQ_EVENT, vcpu);
5781 -
5782 - ++vcpu->stat.irq_window_exits;
5783 - return 1;
5784 -}
5785 -
5786 -static int handle_vmcall(struct kvm_vcpu *vcpu)
5787 -{
5788 - return kvm_emulate_hypercall(vcpu);
5789 -}
5790 -
5791 -static int handle_invd(struct kvm_vcpu *vcpu)
5792 -{
5793 - return kvm_emulate_instruction(vcpu, 0);
5794 -}
5795 -
5796 -static int handle_invlpg(struct kvm_vcpu *vcpu)
5797 -{
5798 - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5799 -
5800 - kvm_mmu_invlpg(vcpu, exit_qualification);
5801 - return kvm_skip_emulated_instruction(vcpu);
5802 -}
5803 -
5804 -static int handle_rdpmc(struct kvm_vcpu *vcpu)
5805 -{
5806 - int err;
5807 -
5808 - err = kvm_rdpmc(vcpu);
5809 - return kvm_complete_insn_gp(vcpu, err);
5810 -}
5811 -
5812 -static int handle_wbinvd(struct kvm_vcpu *vcpu)
5813 -{
5814 - return kvm_emulate_wbinvd(vcpu);
5815 -}
5816 -
5817 -static int handle_xsetbv(struct kvm_vcpu *vcpu)
5818 -{
5819 - u64 new_bv = kvm_read_edx_eax(vcpu);
5820 - u32 index = kvm_rcx_read(vcpu);
5821 -
5822 - if (kvm_set_xcr(vcpu, index, new_bv) == 0)
5823 - return kvm_skip_emulated_instruction(vcpu);
5824 - return 1;
5825 -}
5826 -
5827 -static int handle_apic_access(struct kvm_vcpu *vcpu)
5828 -{
5829 - if (likely(fasteoi)) {
5830 - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5831 - int access_type, offset;
5832 -
5833 - access_type = exit_qualification & APIC_ACCESS_TYPE;
5834 - offset = exit_qualification & APIC_ACCESS_OFFSET;
5835 - /*
5836 - * Sane guest uses MOV to write EOI, with written value
5837 - * not cared. So make a short-circuit here by avoiding
5838 - * heavy instruction emulation.
5839 - */
5840 - if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&
5841 - (offset == APIC_EOI)) {
5842 - kvm_lapic_set_eoi(vcpu);
5843 - return kvm_skip_emulated_instruction(vcpu);
5844 - }
5845 - }
5846 - return kvm_emulate_instruction(vcpu, 0);
5847 -}
5848 -
5849 -static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
5850 -{
5851 - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5852 - int vector = exit_qualification & 0xff;
5853 -
5854 - /* EOI-induced VM exit is trap-like and thus no need to adjust IP */
5855 - kvm_apic_set_eoi_accelerated(vcpu, vector);
5856 - return 1;
5857 -}
5858 -
5859 -static int handle_apic_write(struct kvm_vcpu *vcpu)
5860 -{
5861 - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5862 - u32 offset = exit_qualification & 0xfff;
5863 -
5864 - /* APIC-write VM exit is trap-like and thus no need to adjust IP */
5865 - kvm_apic_write_nodecode(vcpu, offset);
5866 - return 1;
5867 -}
5868 -
5869 -static int handle_task_switch(struct kvm_vcpu *vcpu)
5870 -{
5871 - struct vcpu_vmx *vmx = to_vmx(vcpu);
5872 - unsigned long exit_qualification;
5873 - bool has_error_code = false;
5874 - u32 error_code = 0;
5875 - u16 tss_selector;
5876 - int reason, type, idt_v, idt_index;
5877 -
5878 - idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
5879 - idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
5880 - type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
5881 -
5882 - exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5883 -
5884 - reason = (u32)exit_qualification >> 30;
5885 - if (reason == TASK_SWITCH_GATE && idt_v) {
5886 - switch (type) {
5887 - case INTR_TYPE_NMI_INTR:
5888 - vcpu->arch.nmi_injected = false;
5889 - vmx_set_nmi_mask(vcpu, true);
5890 - break;
5891 - case INTR_TYPE_EXT_INTR:
5892 - case INTR_TYPE_SOFT_INTR:
5893 - kvm_clear_interrupt_queue(vcpu);
5894 - break;
5895 - case INTR_TYPE_HARD_EXCEPTION:
5896 - if (vmx->idt_vectoring_info &
5897 - VECTORING_INFO_DELIVER_CODE_MASK) {
5898 - has_error_code = true;
5899 - error_code =
5900 - vmcs_read32(IDT_VECTORING_ERROR_CODE);
5901 - }
5902 - /* fall through */
5903 - case INTR_TYPE_SOFT_EXCEPTION:
5904 - kvm_clear_exception_queue(vcpu);
5905 - break;
5906 - default:
5907 - break;
5908 - }
5909 - }
5910 - tss_selector = exit_qualification;
5911 -
5912 - if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
5913 - type != INTR_TYPE_EXT_INTR &&
5914 - type != INTR_TYPE_NMI_INTR))
5915 - WARN_ON(!skip_emulated_instruction(vcpu));
5916 -
5917 - /*
5918 - * TODO: What about debug traps on tss switch?
5919 - * Are we supposed to inject them and update dr6?
5920 - */
5921 - return kvm_task_switch(vcpu, tss_selector,
5922 - type == INTR_TYPE_SOFT_INTR ? idt_index : -1,
5923 - reason, has_error_code, error_code);
5924 -}
5925 -
5926 -static int handle_ept_violation(struct kvm_vcpu *vcpu)
5927 -{
5928 - unsigned long exit_qualification;
5929 - gpa_t gpa;
5930 - u64 error_code;
5931 -
5932 - exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5933 -
5934 - /*
5935 - * EPT violation happened while executing iret from NMI,
5936 - * "blocked by NMI" bit has to be set before next VM entry.
5937 - * There are errata that may cause this bit to not be set:
5938 - * AAK134, BY25.
5939 - */
5940 - if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
5941 - enable_vnmi &&
5942 - (exit_qualification & INTR_INFO_UNBLOCK_NMI))
5943 - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
5944 -
5945 - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
5946 - trace_kvm_page_fault(gpa, exit_qualification);
5947 -
5948 - /* Is it a read fault? */
5949 - error_code = (exit_qualification & EPT_VIOLATION_ACC_READ)
5950 - ? PFERR_USER_MASK : 0;
5951 - /* Is it a write fault? */
5952 - error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE)
5953 - ? PFERR_WRITE_MASK : 0;
5954 - /* Is it a fetch fault? */
5955 - error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
5956 - ? PFERR_FETCH_MASK : 0;
5957 - /* ept page table entry is present? */
5958 - error_code |= (exit_qualification &
5959 - (EPT_VIOLATION_READABLE | EPT_VIOLATION_WRITABLE |
5960 - EPT_VIOLATION_EXECUTABLE))
5961 - ? PFERR_PRESENT_MASK : 0;
5962 -
5963 - error_code |= (exit_qualification & 0x100) != 0 ?
5964 - PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
5965 -
5966 - vcpu->arch.exit_qualification = exit_qualification;
5967 - return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
5968 -}
5969 -
5970 -static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
5971 -{
5972 - gpa_t gpa;
5973 -
5974 - /*
5975 - * A nested guest cannot optimize MMIO vmexits, because we have an
5976 - * nGPA here instead of the required GPA.
5977 - */
5978 - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
5979 - if (!is_guest_mode(vcpu) &&
5980 - !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
5981 - trace_kvm_fast_mmio(gpa);
5982 - return kvm_skip_emulated_instruction(vcpu);
5983 - }
5984 -
5985 - return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
5986 -}
5987 -
5988 -static int handle_nmi_window(struct kvm_vcpu *vcpu)
5989 -{
5990 - WARN_ON_ONCE(!enable_vnmi);
5991 - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
5992 - ++vcpu->stat.nmi_window_exits;
5993 - kvm_make_request(KVM_REQ_EVENT, vcpu);
5994 -
5995 - return 1;
5996 -}
5997 -
5998 -static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
5999 -{
6000 - struct vcpu_vmx *vmx = to_vmx(vcpu);
6001 - bool intr_window_requested;
6002 - unsigned count = 130;
6003 -
6004 - /*
6005 - * We should never reach the point where we are emulating L2
6006 - * due to invalid guest state as that means we incorrectly
6007 - * allowed a nested VMEntry with an invalid vmcs12.
6008 - */
6009 - WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
6010 -
6011 - intr_window_requested = exec_controls_get(vmx) &
6012 - CPU_BASED_INTR_WINDOW_EXITING;
6013 -
6014 - while (vmx->emulation_required && count-- != 0) {
6015 - if (intr_window_requested && vmx_interrupt_allowed(vcpu))
6016 - return handle_interrupt_window(&vmx->vcpu);
6017 -
6018 - if (kvm_test_request(KVM_REQ_EVENT, vcpu))
6019 - return 1;
6020 -
6021 - if (!kvm_emulate_instruction(vcpu, 0))
6022 - return 0;
6023 -
6024 - if (vmx->emulation_required && !vmx->rmode.vm86_active &&
6025 - vcpu->arch.exception.pending) {
6026 - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6027 - vcpu->run->internal.suberror =
6028 - KVM_INTERNAL_ERROR_EMULATION;
6029 - vcpu->run->internal.ndata = 0;
6030 - return 0;
6031 - }
6032 -
6033 - if (vcpu->arch.halt_request) {
6034 - vcpu->arch.halt_request = 0;
6035 - return kvm_vcpu_halt(vcpu);
6036 - }
6037 -
6038 - /*
6039 - * Note, return 1 and not 0, vcpu_run() is responsible for
6040 - * morphing the pending signal into the proper return code.
6041 - */
6042 - if (signal_pending(current))
6043 - return 1;
6044 -
6045 - if (need_resched())
6046 - schedule();
6047 - }
6048 -
6049 - return 1;
6050 -}
6051 -
6052 -static void grow_ple_window(struct kvm_vcpu *vcpu)
6053 -{
6054 - struct vcpu_vmx *vmx = to_vmx(vcpu);
6055 - unsigned int old = vmx->ple_window;
6056 -
6057 - vmx->ple_window = __grow_ple_window(old, ple_window,
6058 - ple_window_grow,
6059 - ple_window_max);
6060 -
6061 - if (vmx->ple_window != old) {
6062 - vmx->ple_window_dirty = true;
6063 - trace_kvm_ple_window_update(vcpu->vcpu_id,
6064 - vmx->ple_window, old);
6065 - }
6066 -}
6067 -
6068 -static void shrink_ple_window(struct kvm_vcpu *vcpu)
6069 -{
6070 - struct vcpu_vmx *vmx = to_vmx(vcpu);
6071 - unsigned int old = vmx->ple_window;
6072 -
6073 - vmx->ple_window = __shrink_ple_window(old, ple_window,
6074 - ple_window_shrink,
6075 - ple_window);
6076 -
6077 - if (vmx->ple_window != old) {
6078 - vmx->ple_window_dirty = true;
6079 - trace_kvm_ple_window_update(vcpu->vcpu_id,
6080 - vmx->ple_window, old);
6081 - }
6082 -}
6083 -
6084 -/*
6085 - * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
6086 - */
6087 -static void wakeup_handler(void)
6088 -{
6089 - struct kvm_vcpu *vcpu;
6090 - int cpu = smp_processor_id();
6091 -
6092 - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
6093 - list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
6094 - blocked_vcpu_list) {
6095 - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
6096 -
6097 - if (pi_test_on(pi_desc) == 1)
6098 - kvm_vcpu_kick(vcpu);
6099 - }
6100 - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
6101 -}
6102 -
6103 -static void vmx_enable_tdp(void)
6104 -{
6105 - kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
6106 - enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull,
6107 - enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull,
6108 - 0ull, VMX_EPT_EXECUTABLE_MASK,
6109 - cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK,
6110 - VMX_EPT_RWX_MASK, 0ull);
6111 -
6112 - ept_set_mmio_spte_mask();
6113 - kvm_enable_tdp();
6114 -}
6115 -
6116 -/*
6117 - * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
6118 - * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
6119 - */
6120 -static int handle_pause(struct kvm_vcpu *vcpu)
6121 -{
6122 - if (!kvm_pause_in_guest(vcpu->kvm))
6123 - grow_ple_window(vcpu);
6124 -
6125 - /*
6126 - * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting"
6127 - * VM-execution control is ignored if CPL > 0. OTOH, KVM
6128 - * never set PAUSE_EXITING and just set PLE if supported,
6129 - * so the vcpu must be CPL=0 if it gets a PAUSE exit.
6130 - */
6131 - kvm_vcpu_on_spin(vcpu, true);
6132 - return kvm_skip_emulated_instruction(vcpu);
6133 -}
6134 -
6135 -static int handle_nop(struct kvm_vcpu *vcpu)
6136 -{
6137 - return kvm_skip_emulated_instruction(vcpu);
6138 -}
6139 -
6140 -static int handle_mwait(struct kvm_vcpu *vcpu)
6141 -{
6142 - printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
6143 - return handle_nop(vcpu);
6144 -}
6145 -
6146 -static int handle_invalid_op(struct kvm_vcpu *vcpu)
6147 -{
6148 - kvm_queue_exception(vcpu, UD_VECTOR);
6149 - return 1;
6150 -}
6151 -
6152 -static int handle_monitor_trap(struct kvm_vcpu *vcpu)
6153 -{
6154 - return 1;
6155 -}
6156 -
6157 -static int handle_monitor(struct kvm_vcpu *vcpu)
6158 -{
6159 - printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
6160 - return handle_nop(vcpu);
6161 -}
6162 -
6163 -static int handle_invpcid(struct kvm_vcpu *vcpu)
6164 -{
6165 - u32 vmx_instruction_info;
6166 - unsigned long type;
6167 - bool pcid_enabled;
6168 - gva_t gva;
6169 - struct x86_exception e;
6170 - unsigned i;
6171 - unsigned long roots_to_free = 0;
6172 - struct {
6173 - u64 pcid;
6174 - u64 gla;
6175 - } operand;
6176 -
6177 - if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
6178 - kvm_queue_exception(vcpu, UD_VECTOR);
6179 - return 1;
6180 - }
6181 -
6182 - vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
6183 - type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
6184 -
6185 - if (type > 3) {
6186 - kvm_inject_gp(vcpu, 0);
6187 - return 1;
6188 - }
6189 -
6190 - /* According to the Intel instruction reference, the memory operand
6191 - * is read even if it isn't needed (e.g., for type==all)
6192 - */
6193 - if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
6194 - vmx_instruction_info, false,
6195 - sizeof(operand), &gva))
6196 - return 1;
6197 -
6198 - if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
6199 - kvm_inject_page_fault(vcpu, &e);
6200 - return 1;
6201 - }
6202 -
6203 - if (operand.pcid >> 12 != 0) {
6204 - kvm_inject_gp(vcpu, 0);
6205 - return 1;
6206 - }
6207 -
6208 - pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
6209 -
6210 - switch (type) {
6211 - case INVPCID_TYPE_INDIV_ADDR:
6212 - if ((!pcid_enabled && (operand.pcid != 0)) ||
6213 - is_noncanonical_address(operand.gla, vcpu)) {
6214 - kvm_inject_gp(vcpu, 0);
6215 - return 1;
6216 - }
6217 - kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
6218 - return kvm_skip_emulated_instruction(vcpu);
6219 -
6220 - case INVPCID_TYPE_SINGLE_CTXT:
6221 - if (!pcid_enabled && (operand.pcid != 0)) {
6222 - kvm_inject_gp(vcpu, 0);
6223 - return 1;
6224 - }
6225 -
6226 - if (kvm_get_active_pcid(vcpu) == operand.pcid) {
6227 - kvm_mmu_sync_roots(vcpu);
6228 - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
6229 - }
6230 -
6231 - for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
6232 - if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].cr3)
6233 - == operand.pcid)
6234 - roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
6235 -
6236 - kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
6237 - /*
6238 - * If neither the current cr3 nor any of the prev_roots use the
6239 - * given PCID, then nothing needs to be done here because a
6240 - * resync will happen anyway before switching to any other CR3.
6241 - */
6242 -
6243 - return kvm_skip_emulated_instruction(vcpu);
6244 -
6245 - case INVPCID_TYPE_ALL_NON_GLOBAL:
6246 - /*
6247 - * Currently, KVM doesn't mark global entries in the shadow
6248 - * page tables, so a non-global flush just degenerates to a
6249 - * global flush. If needed, we could optimize this later by
6250 - * keeping track of global entries in shadow page tables.
6251 - */
6252 -
6253 - /* fall-through */
6254 - case INVPCID_TYPE_ALL_INCL_GLOBAL:
6255 - kvm_mmu_unload(vcpu);
6256 - return kvm_skip_emulated_instruction(vcpu);
6257 -
6258 - default:
6259 - BUG(); /* We have already checked above that type <= 3 */
6260 - }
6261 -}
6262 -
6263 -static int handle_pml_full(struct kvm_vcpu *vcpu)
6264 -{
6265 - unsigned long exit_qualification;
6266 -
6267 - trace_kvm_pml_full(vcpu->vcpu_id);
6268 -
6269 - exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6270 -
6271 - /*
6272 - * PML buffer FULL happened while executing iret from NMI,
6273 - * "blocked by NMI" bit has to be set before next VM entry.
6274 - */
6275 - if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
6276 - enable_vnmi &&
6277 - (exit_qualification & INTR_INFO_UNBLOCK_NMI))
6278 - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
6279 - GUEST_INTR_STATE_NMI);
6280 -
6281 - /*
6282 - * PML buffer already flushed at beginning of VMEXIT. Nothing to do
6283 - * here.., and there's no userspace involvement needed for PML.
6284 - */
6285 - return 1;
6286 -}
6287 -
6288 -static int handle_preemption_timer(struct kvm_vcpu *vcpu)
6289 -{
6290 - struct vcpu_vmx *vmx = to_vmx(vcpu);
6291 -
6292 - if (!vmx->req_immediate_exit &&
6293 - !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled))
6294 - kvm_lapic_expired_hv_timer(vcpu);
6295 -
6296 - return 1;
6297 -}
6298 -
6299 -/*
6300 - * When nested=0, all VMX instruction VM Exits filter here. The handlers
6301 - * are overwritten by nested_vmx_setup() when nested=1.
6302 - */
6303 -static int handle_vmx_instruction(struct kvm_vcpu *vcpu)
6304 -{
6305 - kvm_queue_exception(vcpu, UD_VECTOR);
6306 - return 1;
6307 -}
6308 -
6309 -static int handle_encls(struct kvm_vcpu *vcpu)
6310 -{
6311 - /*
6312 - * SGX virtualization is not yet supported. There is no software
6313 - * enable bit for SGX, so we have to trap ENCLS and inject a #UD
6314 - * to prevent the guest from executing ENCLS.
6315 - */
6316 - kvm_queue_exception(vcpu, UD_VECTOR);
6317 - return 1;
6318 -}
6319 -
6320 -/*
6321 - * The exit handlers return 1 if the exit was handled fully and guest execution
6322 - * may resume. Otherwise they set the kvm_run parameter to indicate what needs
6323 - * to be done to userspace and return 0.
6324 - */
6325 -static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
6326 - [EXIT_REASON_EXCEPTION_NMI] = handle_exception_nmi,
6327 - [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
6328 - [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
6329 - [EXIT_REASON_NMI_WINDOW] = handle_nmi_window,
6330 - [EXIT_REASON_IO_INSTRUCTION] = handle_io,
6331 - [EXIT_REASON_CR_ACCESS] = handle_cr,
6332 - [EXIT_REASON_DR_ACCESS] = handle_dr,
6333 - [EXIT_REASON_CPUID] = kvm_emulate_cpuid,
6334 - [EXIT_REASON_MSR_READ] = kvm_emulate_rdmsr,
6335 - [EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr,
6336 - [EXIT_REASON_INTERRUPT_WINDOW] = handle_interrupt_window,
6337 - [EXIT_REASON_HLT] = kvm_emulate_halt,
6338 - [EXIT_REASON_INVD] = handle_invd,
6339 - [EXIT_REASON_INVLPG] = handle_invlpg,
6340 - [EXIT_REASON_RDPMC] = handle_rdpmc,
6341 - [EXIT_REASON_VMCALL] = handle_vmcall,
6342 - [EXIT_REASON_VMCLEAR] = handle_vmx_instruction,
6343 - [EXIT_REASON_VMLAUNCH] = handle_vmx_instruction,
6344 - [EXIT_REASON_VMPTRLD] = handle_vmx_instruction,
6345 - [EXIT_REASON_VMPTRST] = handle_vmx_instruction,
6346 - [EXIT_REASON_VMREAD] = handle_vmx_instruction,
6347 - [EXIT_REASON_VMRESUME] = handle_vmx_instruction,
6348 - [EXIT_REASON_VMWRITE] = handle_vmx_instruction,
6349 - [EXIT_REASON_VMOFF] = handle_vmx_instruction,
6350 - [EXIT_REASON_VMON] = handle_vmx_instruction,
6351 - [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
6352 - [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
6353 - [EXIT_REASON_APIC_WRITE] = handle_apic_write,
6354 - [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced,
6355 - [EXIT_REASON_WBINVD] = handle_wbinvd,
6356 - [EXIT_REASON_XSETBV] = handle_xsetbv,
6357 - [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
6358 - [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
6359 - [EXIT_REASON_GDTR_IDTR] = handle_desc,
6360 - [EXIT_REASON_LDTR_TR] = handle_desc,
6361 - [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
6362 - [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
6363 - [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
6364 - [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
6365 - [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap,
6366 - [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
6367 - [EXIT_REASON_INVEPT] = handle_vmx_instruction,
6368 - [EXIT_REASON_INVVPID] = handle_vmx_instruction,
6369 - [EXIT_REASON_RDRAND] = handle_invalid_op,
6370 - [EXIT_REASON_RDSEED] = handle_invalid_op,
6371 - [EXIT_REASON_PML_FULL] = handle_pml_full,
6372 - [EXIT_REASON_INVPCID] = handle_invpcid,
6373 - [EXIT_REASON_VMFUNC] = handle_vmx_instruction,
6374 - [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
6375 - [EXIT_REASON_ENCLS] = handle_encls,
6376 -};
6377 -
6378 -static const int kvm_vmx_max_exit_handlers =
6379 - ARRAY_SIZE(kvm_vmx_exit_handlers);
6380 -
6381 -static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
6382 -{
6383 - *info1 = vmcs_readl(EXIT_QUALIFICATION);
6384 - *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
6385 -}
6386 -
6387 -static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
6388 -{
6389 - if (vmx->pml_pg) {
6390 - __free_page(vmx->pml_pg);
6391 - vmx->pml_pg = NULL;
6392 - }
6393 -}
6394 -
6395 -static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
6396 -{
6397 - struct vcpu_vmx *vmx = to_vmx(vcpu);
6398 - u64 *pml_buf;
6399 - u16 pml_idx;
6400 -
6401 - pml_idx = vmcs_read16(GUEST_PML_INDEX);
6402 -
6403 - /* Do nothing if PML buffer is empty */
6404 - if (pml_idx == (PML_ENTITY_NUM - 1))
6405 - return;
6406 -
6407 - /* PML index always points to next available PML buffer entity */
6408 - if (pml_idx >= PML_ENTITY_NUM)
6409 - pml_idx = 0;
6410 - else
6411 - pml_idx++;
6412 -
6413 - pml_buf = page_address(vmx->pml_pg);
6414 - for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
6415 - u64 gpa;
6416 -
6417 - gpa = pml_buf[pml_idx];
6418 - WARN_ON(gpa & (PAGE_SIZE - 1));
6419 - kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
6420 - }
6421 -
6422 - /* reset PML index */
6423 - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
6424 -}
6425 -
6426 -/*
6427 - * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
6428 - * Called before reporting dirty_bitmap to userspace.
6429 - */
6430 -static void kvm_flush_pml_buffers(struct kvm *kvm)
6431 -{
6432 - int i;
6433 - struct kvm_vcpu *vcpu;
6434 - /*
6435 - * We only need to kick vcpu out of guest mode here, as PML buffer
6436 - * is flushed at beginning of all VMEXITs, and it's obvious that only
6437 - * vcpus running in guest are possible to have unflushed GPAs in PML
6438 - * buffer.
6439 - */
6440 - kvm_for_each_vcpu(i, vcpu, kvm)
6441 - kvm_vcpu_kick(vcpu);
6442 -}
6443 -
6444 -static void vmx_dump_sel(char *name, uint32_t sel)
6445 -{
6446 - pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
6447 - name, vmcs_read16(sel),
6448 - vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
6449 - vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
6450 - vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
6451 -}
6452 -
6453 -static void vmx_dump_dtsel(char *name, uint32_t limit)
6454 -{
6455 - pr_err("%s limit=0x%08x, base=0x%016lx\n",
6456 - name, vmcs_read32(limit),
6457 - vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
6458 -}
6459 -
6460 -void dump_vmcs(void)
6461 -{
6462 - u32 vmentry_ctl, vmexit_ctl;
6463 - u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control;
6464 - unsigned long cr4;
6465 - u64 efer;
6466 - int i, n;
6467 -
6468 - if (!dump_invalid_vmcs) {
6469 - pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n");
6470 - return;
6471 - }
6472 -
6473 - vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
6474 - vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
6475 - cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
6476 - pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
6477 - cr4 = vmcs_readl(GUEST_CR4);
6478 - efer = vmcs_read64(GUEST_IA32_EFER);
6479 - secondary_exec_control = 0;
6480 - if (cpu_has_secondary_exec_ctrls())
6481 - secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
6482 -
6483 - pr_err("*** Guest State ***\n");
6484 - pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
6485 - vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW),
6486 - vmcs_readl(CR0_GUEST_HOST_MASK));
6487 - pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
6488 - cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK));
6489 - pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3));
6490 - if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
6491 - (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
6492 - {
6493 - pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n",
6494 - vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1));
6495 - pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n",
6496 - vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3));
6497 - }
6498 - pr_err("RSP = 0x%016lx RIP = 0x%016lx\n",
6499 - vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
6500 - pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n",
6501 - vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7));
6502 - pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
6503 - vmcs_readl(GUEST_SYSENTER_ESP),
6504 - vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP));
6505 - vmx_dump_sel("CS: ", GUEST_CS_SELECTOR);
6506 - vmx_dump_sel("DS: ", GUEST_DS_SELECTOR);
6507 - vmx_dump_sel("SS: ", GUEST_SS_SELECTOR);
6508 - vmx_dump_sel("ES: ", GUEST_ES_SELECTOR);
6509 - vmx_dump_sel("FS: ", GUEST_FS_SELECTOR);
6510 - vmx_dump_sel("GS: ", GUEST_GS_SELECTOR);
6511 - vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT);
6512 - vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR);
6513 - vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT);
6514 - vmx_dump_sel("TR: ", GUEST_TR_SELECTOR);
6515 - if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
6516 - (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
6517 - pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
6518 - efer, vmcs_read64(GUEST_IA32_PAT));
6519 - pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n",
6520 - vmcs_read64(GUEST_IA32_DEBUGCTL),
6521 - vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
6522 - if (cpu_has_load_perf_global_ctrl() &&
6523 - vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
6524 - pr_err("PerfGlobCtl = 0x%016llx\n",
6525 - vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
6526 - if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
6527 - pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS));
6528 - pr_err("Interruptibility = %08x ActivityState = %08x\n",
6529 - vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
6530 - vmcs_read32(GUEST_ACTIVITY_STATE));
6531 - if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
6532 - pr_err("InterruptStatus = %04x\n",
6533 - vmcs_read16(GUEST_INTR_STATUS));
6534 -
6535 - pr_err("*** Host State ***\n");
6536 - pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
6537 - vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
6538 - pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
6539 - vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR),
6540 - vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR),
6541 - vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR),
6542 - vmcs_read16(HOST_TR_SELECTOR));
6543 - pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
6544 - vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE),
6545 - vmcs_readl(HOST_TR_BASE));
6546 - pr_err("GDTBase=%016lx IDTBase=%016lx\n",
6547 - vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE));
6548 - pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n",
6549 - vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3),
6550 - vmcs_readl(HOST_CR4));
6551 - pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
6552 - vmcs_readl(HOST_IA32_SYSENTER_ESP),
6553 - vmcs_read32(HOST_IA32_SYSENTER_CS),
6554 - vmcs_readl(HOST_IA32_SYSENTER_EIP));
6555 - if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
6556 - pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
6557 - vmcs_read64(HOST_IA32_EFER),
6558 - vmcs_read64(HOST_IA32_PAT));
6559 - if (cpu_has_load_perf_global_ctrl() &&
6560 - vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
6561 - pr_err("PerfGlobCtl = 0x%016llx\n",
6562 - vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
6563 -
6564 - pr_err("*** Control State ***\n");
6565 - pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
6566 - pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control);
6567 - pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
6568 - pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
6569 - vmcs_read32(EXCEPTION_BITMAP),
6570 - vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK),
6571 - vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH));
6572 - pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
6573 - vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
6574 - vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE),
6575 - vmcs_read32(VM_ENTRY_INSTRUCTION_LEN));
6576 - pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
6577 - vmcs_read32(VM_EXIT_INTR_INFO),
6578 - vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
6579 - vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
6580 - pr_err(" reason=%08x qualification=%016lx\n",
6581 - vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION));
6582 - pr_err("IDTVectoring: info=%08x errcode=%08x\n",
6583 - vmcs_read32(IDT_VECTORING_INFO_FIELD),
6584 - vmcs_read32(IDT_VECTORING_ERROR_CODE));
6585 - pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET));
6586 - if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
6587 - pr_err("TSC Multiplier = 0x%016llx\n",
6588 - vmcs_read64(TSC_MULTIPLIER));
6589 - if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) {
6590 - if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
6591 - u16 status = vmcs_read16(GUEST_INTR_STATUS);
6592 - pr_err("SVI|RVI = %02x|%02x ", status >> 8, status & 0xff);
6593 - }
6594 - pr_cont("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
6595 - if (secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
6596 - pr_err("APIC-access addr = 0x%016llx ", vmcs_read64(APIC_ACCESS_ADDR));
6597 - pr_cont("virt-APIC addr = 0x%016llx\n", vmcs_read64(VIRTUAL_APIC_PAGE_ADDR));
6598 - }
6599 - if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
6600 - pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
6601 - if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
6602 - pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
6603 - n = vmcs_read32(CR3_TARGET_COUNT);
6604 - for (i = 0; i + 1 < n; i += 4)
6605 - pr_err("CR3 target%u=%016lx target%u=%016lx\n",
6606 - i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2),
6607 - i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2));
6608 - if (i < n)
6609 - pr_err("CR3 target%u=%016lx\n",
6610 - i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2));
6611 - if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
6612 - pr_err("PLE Gap=%08x Window=%08x\n",
6613 - vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW));
6614 - if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
6615 - pr_err("Virtual processor ID = 0x%04x\n",
6616 - vmcs_read16(VIRTUAL_PROCESSOR_ID));
6617 -}
6618 -
6619 -/*
6620 - * The guest has exited. See if we can fix it or if we need userspace
6621 - * assistance.
6622 - */
6623 -static int vmx_handle_exit(struct kvm_vcpu *vcpu,
6624 - enum exit_fastpath_completion exit_fastpath)
6625 -{
6626 - struct vcpu_vmx *vmx = to_vmx(vcpu);
6627 - u32 exit_reason = vmx->exit_reason;
6628 - u32 vectoring_info = vmx->idt_vectoring_info;
6629 -
6630 - trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
6631 -
6632 - /*
6633 - * Flush logged GPAs PML buffer, this will make dirty_bitmap more
6634 - * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
6635 - * querying dirty_bitmap, we only need to kick all vcpus out of guest
6636 - * mode as if vcpus is in root mode, the PML buffer must has been
6637 - * flushed already.
6638 - */
6639 - if (enable_pml)
6640 - vmx_flush_pml_buffer(vcpu);
6641 -
6642 - /* If guest state is invalid, start emulating */
6643 - if (vmx->emulation_required)
6644 - return handle_invalid_guest_state(vcpu);
6645 -
6646 - if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason))
6647 - return nested_vmx_reflect_vmexit(vcpu, exit_reason);
6648 -
6649 - if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
6650 - dump_vmcs();
6651 - vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
6652 - vcpu->run->fail_entry.hardware_entry_failure_reason
6653 - = exit_reason;
6654 - return 0;
6655 - }
6656 -
6657 - if (unlikely(vmx->fail)) {
6658 - dump_vmcs();
6659 - vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
6660 - vcpu->run->fail_entry.hardware_entry_failure_reason
6661 - = vmcs_read32(VM_INSTRUCTION_ERROR);
6662 - return 0;
6663 - }
6664 -
6665 - /*
6666 - * Note:
6667 - * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by
6668 - * delivery event since it indicates guest is accessing MMIO.
6669 - * The vm-exit can be triggered again after return to guest that
6670 - * will cause infinite loop.
6671 - */
6672 - if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
6673 - (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
6674 - exit_reason != EXIT_REASON_EPT_VIOLATION &&
6675 - exit_reason != EXIT_REASON_PML_FULL &&
6676 - exit_reason != EXIT_REASON_TASK_SWITCH)) {
6677 - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6678 - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
6679 - vcpu->run->internal.ndata = 3;
6680 - vcpu->run->internal.data[0] = vectoring_info;
6681 - vcpu->run->internal.data[1] = exit_reason;
6682 - vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
6683 - if (exit_reason == EXIT_REASON_EPT_MISCONFIG) {
6684 - vcpu->run->internal.ndata++;
6685 - vcpu->run->internal.data[3] =
6686 - vmcs_read64(GUEST_PHYSICAL_ADDRESS);
6687 - }
6688 - return 0;
6689 - }
6690 -
6691 - if (unlikely(!enable_vnmi &&
6692 - vmx->loaded_vmcs->soft_vnmi_blocked)) {
6693 - if (vmx_interrupt_allowed(vcpu)) {
6694 - vmx->loaded_vmcs->soft_vnmi_blocked = 0;
6695 - } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
6696 - vcpu->arch.nmi_pending) {
6697 - /*
6698 - * This CPU don't support us in finding the end of an
6699 - * NMI-blocked window if the guest runs with IRQs
6700 - * disabled. So we pull the trigger after 1 s of
6701 - * futile waiting, but inform the user about this.
6702 - */
6703 - printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
6704 - "state on VCPU %d after 1 s timeout\n",
6705 - __func__, vcpu->vcpu_id);
6706 - vmx->loaded_vmcs->soft_vnmi_blocked = 0;
6707 - }
6708 - }
6709 -
6710 - if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) {
6711 - kvm_skip_emulated_instruction(vcpu);
6712 - return 1;
6713 - } else if (exit_reason < kvm_vmx_max_exit_handlers
6714 - && kvm_vmx_exit_handlers[exit_reason]) {
6715 -#ifdef CONFIG_RETPOLINE
6716 - if (exit_reason == EXIT_REASON_MSR_WRITE)
6717 - return kvm_emulate_wrmsr(vcpu);
6718 - else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER)
6719 - return handle_preemption_timer(vcpu);
6720 - else if (exit_reason == EXIT_REASON_INTERRUPT_WINDOW)
6721 - return handle_interrupt_window(vcpu);
6722 - else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
6723 - return handle_external_interrupt(vcpu);
6724 - else if (exit_reason == EXIT_REASON_HLT)
6725 - return kvm_emulate_halt(vcpu);
6726 - else if (exit_reason == EXIT_REASON_EPT_MISCONFIG)
6727 - return handle_ept_misconfig(vcpu);
6728 -#endif
6729 - return kvm_vmx_exit_handlers[exit_reason](vcpu);
6730 - } else {
6731 - vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
6732 - exit_reason);
6733 - dump_vmcs();
6734 - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6735 - vcpu->run->internal.suberror =
6736 - KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
6737 - vcpu->run->internal.ndata = 1;
6738 - vcpu->run->internal.data[0] = exit_reason;
6739 - return 0;
6740 - }
6741 -}
6742 -
6743 -/*
6744 - * Software based L1D cache flush which is used when microcode providing
6745 - * the cache control MSR is not loaded.
6746 - *
6747 - * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to
6748 - * flush it is required to read in 64 KiB because the replacement algorithm
6749 - * is not exactly LRU. This could be sized at runtime via topology
6750 - * information but as all relevant affected CPUs have 32KiB L1D cache size
6751 - * there is no point in doing so.
6752 - */
6753 -static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
6754 -{
6755 - int size = PAGE_SIZE << L1D_CACHE_ORDER;
6756 -
6757 - /*
6758 - * This code is only executed when the the flush mode is 'cond' or
6759 - * 'always'
6760 - */
6761 - if (static_branch_likely(&vmx_l1d_flush_cond)) {
6762 - bool flush_l1d;
6763 -
6764 - /*
6765 - * Clear the per-vcpu flush bit, it gets set again
6766 - * either from vcpu_run() or from one of the unsafe
6767 - * VMEXIT handlers.
6768 - */
6769 - flush_l1d = vcpu->arch.l1tf_flush_l1d;
6770 - vcpu->arch.l1tf_flush_l1d = false;
6771 -
6772 - /*
6773 - * Clear the per-cpu flush bit, it gets set again from
6774 - * the interrupt handlers.
6775 - */
6776 - flush_l1d |= kvm_get_cpu_l1tf_flush_l1d();
6777 - kvm_clear_cpu_l1tf_flush_l1d();
6778 -
6779 - if (!flush_l1d)
6780 - return;
6781 - }
6782 -
6783 - vcpu->stat.l1d_flush++;
6784 -
6785 - if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
6786 - wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
6787 - return;
6788 - }
6789 -
6790 - asm volatile(
6791 - /* First ensure the pages are in the TLB */
6792 - "xorl %%eax, %%eax\n"
6793 - ".Lpopulate_tlb:\n\t"
6794 - "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
6795 - "addl $4096, %%eax\n\t"
6796 - "cmpl %%eax, %[size]\n\t"
6797 - "jne .Lpopulate_tlb\n\t"
6798 - "xorl %%eax, %%eax\n\t"
6799 - "cpuid\n\t"
6800 - /* Now fill the cache */
6801 - "xorl %%eax, %%eax\n"
6802 - ".Lfill_cache:\n"
6803 - "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
6804 - "addl $64, %%eax\n\t"
6805 - "cmpl %%eax, %[size]\n\t"
6806 - "jne .Lfill_cache\n\t"
6807 - "lfence\n"
6808 - :: [flush_pages] "r" (vmx_l1d_flush_pages),
6809 - [size] "r" (size)
6810 - : "eax", "ebx", "ecx", "edx");
6811 -}
6812 -
6813 -static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
6814 -{
6815 - struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6816 - int tpr_threshold;
6817 -
6818 - if (is_guest_mode(vcpu) &&
6819 - nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
6820 - return;
6821 -
6822 - tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr;
6823 - if (is_guest_mode(vcpu))
6824 - to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold;
6825 - else
6826 - vmcs_write32(TPR_THRESHOLD, tpr_threshold);
6827 -}
6828 -
6829 -void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
6830 -{
6831 - struct vcpu_vmx *vmx = to_vmx(vcpu);
6832 - u32 sec_exec_control;
6833 -
6834 - if (!lapic_in_kernel(vcpu))
6835 - return;
6836 -
6837 - if (!flexpriority_enabled &&
6838 - !cpu_has_vmx_virtualize_x2apic_mode())
6839 - return;
6840 -
6841 - /* Postpone execution until vmcs01 is the current VMCS. */
6842 - if (is_guest_mode(vcpu)) {
6843 - vmx->nested.change_vmcs01_virtual_apic_mode = true;
6844 - return;
6845 - }
6846 -
6847 - sec_exec_control = secondary_exec_controls_get(vmx);
6848 - sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
6849 - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
6850 -
6851 - switch (kvm_get_apic_mode(vcpu)) {
6852 - case LAPIC_MODE_INVALID:
6853 - WARN_ONCE(true, "Invalid local APIC state");
6854 - case LAPIC_MODE_DISABLED:
6855 - break;
6856 - case LAPIC_MODE_XAPIC:
6857 - if (flexpriority_enabled) {
6858 - sec_exec_control |=
6859 - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6860 - vmx_flush_tlb(vcpu, true);
6861 - }
6862 - break;
6863 - case LAPIC_MODE_X2APIC:
6864 - if (cpu_has_vmx_virtualize_x2apic_mode())
6865 - sec_exec_control |=
6866 - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
6867 - break;
6868 - }
6869 - secondary_exec_controls_set(vmx, sec_exec_control);
6870 -
6871 - vmx_update_msr_bitmap(vcpu);
6872 -}
6873 -
6874 -static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
6875 -{
6876 - if (!is_guest_mode(vcpu)) {
6877 - vmcs_write64(APIC_ACCESS_ADDR, hpa);
6878 - vmx_flush_tlb(vcpu, true);
6879 - }
6880 -}
6881 -
6882 -static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
6883 -{
6884 - u16 status;
6885 - u8 old;
6886 -
6887 - if (max_isr == -1)
6888 - max_isr = 0;
6889 -
6890 - status = vmcs_read16(GUEST_INTR_STATUS);
6891 - old = status >> 8;
6892 - if (max_isr != old) {
6893 - status &= 0xff;
6894 - status |= max_isr << 8;
6895 - vmcs_write16(GUEST_INTR_STATUS, status);
6896 - }
6897 -}
6898 -
6899 -static void vmx_set_rvi(int vector)
6900 -{
6901 - u16 status;
6902 - u8 old;
6903 -
6904 - if (vector == -1)
6905 - vector = 0;
6906 -
6907 - status = vmcs_read16(GUEST_INTR_STATUS);
6908 - old = (u8)status & 0xff;
6909 - if ((u8)vector != old) {
6910 - status &= ~0xff;
6911 - status |= (u8)vector;
6912 - vmcs_write16(GUEST_INTR_STATUS, status);
6913 - }
6914 -}
6915 -
6916 -static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
6917 -{
6918 - /*
6919 - * When running L2, updating RVI is only relevant when
6920 - * vmcs12 virtual-interrupt-delivery enabled.
6921 - * However, it can be enabled only when L1 also
6922 - * intercepts external-interrupts and in that case
6923 - * we should not update vmcs02 RVI but instead intercept
6924 - * interrupt. Therefore, do nothing when running L2.
6925 - */
6926 - if (!is_guest_mode(vcpu))
6927 - vmx_set_rvi(max_irr);
6928 -}
6929 -
6930 -static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
6931 -{
6932 - struct vcpu_vmx *vmx = to_vmx(vcpu);
6933 - int max_irr;
6934 - bool max_irr_updated;
6935 -
6936 - WARN_ON(!vcpu->arch.apicv_active);
6937 - if (pi_test_on(&vmx->pi_desc)) {
6938 - pi_clear_on(&vmx->pi_desc);
6939 - /*
6940 - * IOMMU can write to PID.ON, so the barrier matters even on UP.
6941 - * But on x86 this is just a compiler barrier anyway.
6942 - */
6943 - smp_mb__after_atomic();
6944 - max_irr_updated =
6945 - kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
6946 -
6947 - /*
6948 - * If we are running L2 and L1 has a new pending interrupt
6949 - * which can be injected, we should re-evaluate
6950 - * what should be done with this new L1 interrupt.
6951 - * If L1 intercepts external-interrupts, we should
6952 - * exit from L2 to L1. Otherwise, interrupt should be
6953 - * delivered directly to L2.
6954 - */
6955 - if (is_guest_mode(vcpu) && max_irr_updated) {
6956 - if (nested_exit_on_intr(vcpu))
6957 - kvm_vcpu_exiting_guest_mode(vcpu);
6958 - else
6959 - kvm_make_request(KVM_REQ_EVENT, vcpu);
6960 - }
6961 - } else {
6962 - max_irr = kvm_lapic_find_highest_irr(vcpu);
6963 - }
6964 - vmx_hwapic_irr_update(vcpu, max_irr);
6965 - return max_irr;
6966 -}
6967 -
6968 -static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
6969 -{
6970 - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
6971 -
6972 - return pi_test_on(pi_desc) ||
6973 - (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc));
6974 -}
6975 -
6976 -static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
6977 -{
6978 - if (!kvm_vcpu_apicv_active(vcpu))
6979 - return;
6980 -
6981 - vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
6982 - vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
6983 - vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
6984 - vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
6985 -}
6986 -
6987 -static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
6988 -{
6989 - struct vcpu_vmx *vmx = to_vmx(vcpu);
6990 -
6991 - pi_clear_on(&vmx->pi_desc);
6992 - memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
6993 -}
6994 -
6995 -static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
6996 -{
6997 - vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
6998 -
6999 - /* if exit due to PF check for async PF */
7000 - if (is_page_fault(vmx->exit_intr_info))
7001 - vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
7002 -
7003 - /* Handle machine checks before interrupts are enabled */
7004 - if (is_machine_check(vmx->exit_intr_info))
7005 - kvm_machine_check();
7006 -
7007 - /* We need to handle NMIs before interrupts are enabled */
7008 - if (is_nmi(vmx->exit_intr_info)) {
7009 - kvm_before_interrupt(&vmx->vcpu);
7010 - asm("int $2");
7011 - kvm_after_interrupt(&vmx->vcpu);
7012 - }
7013 -}
7014 -
7015 -static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
7016 -{
7017 - unsigned int vector;
7018 - unsigned long entry;
7019 -#ifdef CONFIG_X86_64
7020 - unsigned long tmp;
7021 -#endif
7022 - gate_desc *desc;
7023 - u32 intr_info;
7024 -
7025 - intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
7026 - if (WARN_ONCE(!is_external_intr(intr_info),
7027 - "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
7028 - return;
7029 -
7030 - vector = intr_info & INTR_INFO_VECTOR_MASK;
7031 - desc = (gate_desc *)host_idt_base + vector;
7032 - entry = gate_offset(desc);
7033 -
7034 - kvm_before_interrupt(vcpu);
7035 -
7036 - asm volatile(
7037 -#ifdef CONFIG_X86_64
7038 - "mov %%" _ASM_SP ", %[sp]\n\t"
7039 - "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
7040 - "push $%c[ss]\n\t"
7041 - "push %[sp]\n\t"
7042 -#endif
7043 - "pushf\n\t"
7044 - __ASM_SIZE(push) " $%c[cs]\n\t"
7045 - CALL_NOSPEC
7046 - :
7047 -#ifdef CONFIG_X86_64
7048 - [sp]"=&r"(tmp),
7049 -#endif
7050 - ASM_CALL_CONSTRAINT
7051 - :
7052 - THUNK_TARGET(entry),
7053 - [ss]"i"(__KERNEL_DS),
7054 - [cs]"i"(__KERNEL_CS)
7055 - );
7056 -
7057 - kvm_after_interrupt(vcpu);
7058 -}
7059 -STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
7060 -
7061 -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
7062 - enum exit_fastpath_completion *exit_fastpath)
7063 -{
7064 - struct vcpu_vmx *vmx = to_vmx(vcpu);
7065 -
7066 - if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
7067 - handle_external_interrupt_irqoff(vcpu);
7068 - else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
7069 - handle_exception_nmi_irqoff(vmx);
7070 - else if (!is_guest_mode(vcpu) &&
7071 - vmx->exit_reason == EXIT_REASON_MSR_WRITE)
7072 - *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
7073 -}
7074 -
7075 -static bool vmx_has_emulated_msr(int index)
7076 -{
7077 - switch (index) {
7078 - case MSR_IA32_SMBASE:
7079 - /*
7080 - * We cannot do SMM unless we can run the guest in big
7081 - * real mode.
7082 - */
7083 - return enable_unrestricted_guest || emulate_invalid_guest_state;
7084 - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
7085 - return nested;
7086 - case MSR_AMD64_VIRT_SPEC_CTRL:
7087 - /* This is AMD only. */
7088 - return false;
7089 - default:
7090 - return true;
7091 - }
7092 -}
7093 -
7094 -static bool vmx_pt_supported(void)
7095 -{
7096 - return pt_mode == PT_MODE_HOST_GUEST;
7097 -}
7098 -
7099 -static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
7100 -{
7101 - u32 exit_intr_info;
7102 - bool unblock_nmi;
7103 - u8 vector;
7104 - bool idtv_info_valid;
7105 -
7106 - idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
7107 -
7108 - if (enable_vnmi) {
7109 - if (vmx->loaded_vmcs->nmi_known_unmasked)
7110 - return;
7111 - /*
7112 - * Can't use vmx->exit_intr_info since we're not sure what
7113 - * the exit reason is.
7114 - */
7115 - exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
7116 - unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
7117 - vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
7118 - /*
7119 - * SDM 3: 27.7.1.2 (September 2008)
7120 - * Re-set bit "block by NMI" before VM entry if vmexit caused by
7121 - * a guest IRET fault.
7122 - * SDM 3: 23.2.2 (September 2008)
7123 - * Bit 12 is undefined in any of the following cases:
7124 - * If the VM exit sets the valid bit in the IDT-vectoring
7125 - * information field.
7126 - * If the VM exit is due to a double fault.
7127 - */
7128 - if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
7129 - vector != DF_VECTOR && !idtv_info_valid)
7130 - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
7131 - GUEST_INTR_STATE_NMI);
7132 - else
7133 - vmx->loaded_vmcs->nmi_known_unmasked =
7134 - !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
7135 - & GUEST_INTR_STATE_NMI);
7136 - } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
7137 - vmx->loaded_vmcs->vnmi_blocked_time +=
7138 - ktime_to_ns(ktime_sub(ktime_get(),
7139 - vmx->loaded_vmcs->entry_time));
7140 -}
7141 -
7142 -static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
7143 - u32 idt_vectoring_info,
7144 - int instr_len_field,
7145 - int error_code_field)
7146 -{
7147 - u8 vector;
7148 - int type;
7149 - bool idtv_info_valid;
7150 -
7151 - idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
7152 -
7153 - vcpu->arch.nmi_injected = false;
7154 - kvm_clear_exception_queue(vcpu);
7155 - kvm_clear_interrupt_queue(vcpu);
7156 -
7157 - if (!idtv_info_valid)
7158 - return;
7159 -
7160 - kvm_make_request(KVM_REQ_EVENT, vcpu);
7161 -
7162 - vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
7163 - type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
7164 -
7165 - switch (type) {
7166 - case INTR_TYPE_NMI_INTR:
7167 - vcpu->arch.nmi_injected = true;
7168 - /*
7169 - * SDM 3: 27.7.1.2 (September 2008)
7170 - * Clear bit "block by NMI" before VM entry if a NMI
7171 - * delivery faulted.
7172 - */
7173 - vmx_set_nmi_mask(vcpu, false);
7174 - break;
7175 - case INTR_TYPE_SOFT_EXCEPTION:
7176 - vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
7177 - /* fall through */
7178 - case INTR_TYPE_HARD_EXCEPTION:
7179 - if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
7180 - u32 err = vmcs_read32(error_code_field);
7181 - kvm_requeue_exception_e(vcpu, vector, err);
7182 - } else
7183 - kvm_requeue_exception(vcpu, vector);
7184 - break;
7185 - case INTR_TYPE_SOFT_INTR:
7186 - vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
7187 - /* fall through */
7188 - case INTR_TYPE_EXT_INTR:
7189 - kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
7190 - break;
7191 - default:
7192 - break;
7193 - }
7194 -}
7195 -
7196 -static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
7197 -{
7198 - __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
7199 - VM_EXIT_INSTRUCTION_LEN,
7200 - IDT_VECTORING_ERROR_CODE);
7201 -}
7202 -
7203 -static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
7204 -{
7205 - __vmx_complete_interrupts(vcpu,
7206 - vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
7207 - VM_ENTRY_INSTRUCTION_LEN,
7208 - VM_ENTRY_EXCEPTION_ERROR_CODE);
7209 -
7210 - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
7211 -}
7212 -
7213 -static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
7214 -{
7215 - int i, nr_msrs;
7216 - struct perf_guest_switch_msr *msrs;
7217 -
7218 - msrs = perf_guest_get_msrs(&nr_msrs);
7219 -
7220 - if (!msrs)
7221 - return;
7222 -
7223 - for (i = 0; i < nr_msrs; i++)
7224 - if (msrs[i].host == msrs[i].guest)
7225 - clear_atomic_switch_msr(vmx, msrs[i].msr);
7226 - else
7227 - add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
7228 - msrs[i].host, false);
7229 -}
7230 -
7231 -static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx)
7232 -{
7233 - u32 host_umwait_control;
7234 -
7235 - if (!vmx_has_waitpkg(vmx))
7236 - return;
7237 -
7238 - host_umwait_control = get_umwait_control_msr();
7239 -
7240 - if (vmx->msr_ia32_umwait_control != host_umwait_control)
7241 - add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL,
7242 - vmx->msr_ia32_umwait_control,
7243 - host_umwait_control, false);
7244 - else
7245 - clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL);
7246 -}
7247 -
7248 -static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
7249 -{
7250 - struct vcpu_vmx *vmx = to_vmx(vcpu);
7251 - u64 tscl;
7252 - u32 delta_tsc;
7253 -
7254 - if (vmx->req_immediate_exit) {
7255 - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
7256 - vmx->loaded_vmcs->hv_timer_soft_disabled = false;
7257 - } else if (vmx->hv_deadline_tsc != -1) {
7258 - tscl = rdtsc();
7259 - if (vmx->hv_deadline_tsc > tscl)
7260 - /* set_hv_timer ensures the delta fits in 32-bits */
7261 - delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
7262 - cpu_preemption_timer_multi);
7263 - else
7264 - delta_tsc = 0;
7265 -
7266 - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
7267 - vmx->loaded_vmcs->hv_timer_soft_disabled = false;
7268 - } else if (!vmx->loaded_vmcs->hv_timer_soft_disabled) {
7269 - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, -1);
7270 - vmx->loaded_vmcs->hv_timer_soft_disabled = true;
7271 - }
7272 -}
7273 -
7274 -void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
7275 -{
7276 - if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) {
7277 - vmx->loaded_vmcs->host_state.rsp = host_rsp;
7278 - vmcs_writel(HOST_RSP, host_rsp);
7279 - }
7280 -}
7281 -
7282 -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
7283 -
7284 -static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
7285 -{
7286 - struct vcpu_vmx *vmx = to_vmx(vcpu);
7287 - unsigned long cr3, cr4;
7288 -
7289 - /* Record the guest's net vcpu time for enforced NMI injections. */
7290 - if (unlikely(!enable_vnmi &&
7291 - vmx->loaded_vmcs->soft_vnmi_blocked))
7292 - vmx->loaded_vmcs->entry_time = ktime_get();
7293 -
7294 - /* Don't enter VMX if guest state is invalid, let the exit handler
7295 - start emulation until we arrive back to a valid state */
7296 - if (vmx->emulation_required)
7297 - return;
7298 -
7299 - if (vmx->ple_window_dirty) {
7300 - vmx->ple_window_dirty = false;
7301 - vmcs_write32(PLE_WINDOW, vmx->ple_window);
7302 - }
7303 -
7304 - if (vmx->nested.need_vmcs12_to_shadow_sync)
7305 - nested_sync_vmcs12_to_shadow(vcpu);
7306 -
7307 - if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP))
7308 - vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
7309 - if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP))
7310 - vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
7311 -
7312 - cr3 = __get_current_cr3_fast();
7313 - if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
7314 - vmcs_writel(HOST_CR3, cr3);
7315 - vmx->loaded_vmcs->host_state.cr3 = cr3;
7316 - }
7317 -
7318 - cr4 = cr4_read_shadow();
7319 - if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
7320 - vmcs_writel(HOST_CR4, cr4);
7321 - vmx->loaded_vmcs->host_state.cr4 = cr4;
7322 - }
7323 -
7324 - /* When single-stepping over STI and MOV SS, we must clear the
7325 - * corresponding interruptibility bits in the guest state. Otherwise
7326 - * vmentry fails as it then expects bit 14 (BS) in pending debug
7327 - * exceptions being set, but that's not correct for the guest debugging
7328 - * case. */
7329 - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
7330 - vmx_set_interrupt_shadow(vcpu, 0);
7331 -
7332 - kvm_load_guest_xsave_state(vcpu);
7333 -
7334 - if (static_cpu_has(X86_FEATURE_PKU) &&
7335 - kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
7336 - vcpu->arch.pkru != vmx->host_pkru)
7337 - __write_pkru(vcpu->arch.pkru);
7338 -
7339 - pt_guest_enter(vmx);
7340 -
7341 - atomic_switch_perf_msrs(vmx);
7342 - atomic_switch_umwait_control_msr(vmx);
7343 -
7344 - if (enable_preemption_timer)
7345 - vmx_update_hv_timer(vcpu);
7346 -
7347 - if (lapic_in_kernel(vcpu) &&
7348 - vcpu->arch.apic->lapic_timer.timer_advance_ns)
7349 - kvm_wait_lapic_expire(vcpu);
7350 -
7351 - /*
7352 - * If this vCPU has touched SPEC_CTRL, restore the guest's value if
7353 - * it's non-zero. Since vmentry is serialising on affected CPUs, there
7354 - * is no need to worry about the conditional branch over the wrmsr
7355 - * being speculatively taken.
7356 - */
7357 - x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
7358 -
7359 - /* L1D Flush includes CPU buffer clear to mitigate MDS */
7360 - if (static_branch_unlikely(&vmx_l1d_should_flush))
7361 - vmx_l1d_flush(vcpu);
7362 - else if (static_branch_unlikely(&mds_user_clear))
7363 - mds_clear_cpu_buffers();
7364 -
7365 - if (vcpu->arch.cr2 != read_cr2())
7366 - write_cr2(vcpu->arch.cr2);
7367 -
7368 - vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
7369 - vmx->loaded_vmcs->launched);
7370 -
7371 - vcpu->arch.cr2 = read_cr2();
7372 -
7373 - /*
7374 - * We do not use IBRS in the kernel. If this vCPU has used the
7375 - * SPEC_CTRL MSR it may have left it on; save the value and
7376 - * turn it off. This is much more efficient than blindly adding
7377 - * it to the atomic save/restore list. Especially as the former
7378 - * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
7379 - *
7380 - * For non-nested case:
7381 - * If the L01 MSR bitmap does not intercept the MSR, then we need to
7382 - * save it.
7383 - *
7384 - * For nested case:
7385 - * If the L02 MSR bitmap does not intercept the MSR, then we need to
7386 - * save it.
7387 - */
7388 - if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
7389 - vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
7390 -
7391 - x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
7392 -
7393 - /* All fields are clean at this point */
7394 - if (static_branch_unlikely(&enable_evmcs))
7395 - current_evmcs->hv_clean_fields |=
7396 - HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
7397 -
7398 - if (static_branch_unlikely(&enable_evmcs))
7399 - current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index;
7400 -
7401 - /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
7402 - if (vmx->host_debugctlmsr)
7403 - update_debugctlmsr(vmx->host_debugctlmsr);
7404 -
7405 -#ifndef CONFIG_X86_64
7406 - /*
7407 - * The sysexit path does not restore ds/es, so we must set them to
7408 - * a reasonable value ourselves.
7409 - *
7410 - * We can't defer this to vmx_prepare_switch_to_host() since that
7411 - * function may be executed in interrupt context, which saves and
7412 - * restore segments around it, nullifying its effect.
7413 - */
7414 - loadsegment(ds, __USER_DS);
7415 - loadsegment(es, __USER_DS);
7416 -#endif
7417 -
7418 - vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
7419 - | (1 << VCPU_EXREG_RFLAGS)
7420 - | (1 << VCPU_EXREG_PDPTR)
7421 - | (1 << VCPU_EXREG_SEGMENTS)
7422 - | (1 << VCPU_EXREG_CR3));
7423 - vcpu->arch.regs_dirty = 0;
7424 -
7425 - pt_guest_exit(vmx);
7426 -
7427 - /*
7428 - * eager fpu is enabled if PKEY is supported and CR4 is switched
7429 - * back on host, so it is safe to read guest PKRU from current
7430 - * XSAVE.
7431 - */
7432 - if (static_cpu_has(X86_FEATURE_PKU) &&
7433 - kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) {
7434 - vcpu->arch.pkru = rdpkru();
7435 - if (vcpu->arch.pkru != vmx->host_pkru)
7436 - __write_pkru(vmx->host_pkru);
7437 - }
7438 -
7439 - kvm_load_host_xsave_state(vcpu);
7440 -
7441 - vmx->nested.nested_run_pending = 0;
7442 - vmx->idt_vectoring_info = 0;
7443 -
7444 - vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
7445 - if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
7446 - kvm_machine_check();
7447 -
7448 - if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
7449 - return;
7450 -
7451 - vmx->loaded_vmcs->launched = 1;
7452 - vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
7453 -
7454 - vmx_recover_nmi_blocking(vmx);
7455 - vmx_complete_interrupts(vmx);
7456 -}
7457 -
7458 -static struct kvm *vmx_vm_alloc(void)
7459 -{
7460 - struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx),
7461 - GFP_KERNEL_ACCOUNT | __GFP_ZERO,
7462 - PAGE_KERNEL);
7463 - return &kvm_vmx->kvm;
7464 -}
7465 -
7466 -static void vmx_vm_free(struct kvm *kvm)
7467 -{
7468 - kfree(kvm->arch.hyperv.hv_pa_pg);
7469 - vfree(to_kvm_vmx(kvm));
7470 -}
7471 -
7472 -static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
7473 -{
7474 - struct vcpu_vmx *vmx = to_vmx(vcpu);
7475 -
7476 - if (enable_pml)
7477 - vmx_destroy_pml_buffer(vmx);
7478 - free_vpid(vmx->vpid);
7479 - nested_vmx_free_vcpu(vcpu);
7480 - free_loaded_vmcs(vmx->loaded_vmcs);
7481 - kvm_vcpu_uninit(vcpu);
7482 - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
7483 - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
7484 - kmem_cache_free(kvm_vcpu_cache, vmx);
7485 -}
7486 -
7487 -static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
7488 -{
7489 - int err;
7490 - struct vcpu_vmx *vmx;
7491 - unsigned long *msr_bitmap;
7492 - int i, cpu;
7493 -
7494 - BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
7495 - "struct kvm_vcpu must be at offset 0 for arch usercopy region");
7496 -
7497 - vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
7498 - if (!vmx)
7499 - return ERR_PTR(-ENOMEM);
7500 -
7501 - vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
7502 - GFP_KERNEL_ACCOUNT);
7503 - if (!vmx->vcpu.arch.user_fpu) {
7504 - printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
7505 - err = -ENOMEM;
7506 - goto free_partial_vcpu;
7507 - }
7508 -
7509 - vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
7510 - GFP_KERNEL_ACCOUNT);
7511 - if (!vmx->vcpu.arch.guest_fpu) {
7512 - printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
7513 - err = -ENOMEM;
7514 - goto free_user_fpu;
7515 - }
7516 -
7517 - vmx->vpid = allocate_vpid();
7518 -
7519 - err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
7520 - if (err)
7521 - goto free_vcpu;
7522 -
7523 - err = -ENOMEM;
7524 -
7525 - /*
7526 - * If PML is turned on, failure on enabling PML just results in failure
7527 - * of creating the vcpu, therefore we can simplify PML logic (by
7528 - * avoiding dealing with cases, such as enabling PML partially on vcpus
7529 - * for the guest), etc.
7530 - */
7531 - if (enable_pml) {
7532 - vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
7533 - if (!vmx->pml_pg)
7534 - goto uninit_vcpu;
7535 - }
7536 -
7537 - BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) != NR_SHARED_MSRS);
7538 -
7539 - for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
7540 - u32 index = vmx_msr_index[i];
7541 - u32 data_low, data_high;
7542 - int j = vmx->nmsrs;
7543 -
7544 - if (rdmsr_safe(index, &data_low, &data_high) < 0)
7545 - continue;
7546 - if (wrmsr_safe(index, data_low, data_high) < 0)
7547 - continue;
7548 -
7549 - vmx->guest_msrs[j].index = i;
7550 - vmx->guest_msrs[j].data = 0;
7551 - switch (index) {
7552 - case MSR_IA32_TSX_CTRL:
7553 - /*
7554 - * No need to pass TSX_CTRL_CPUID_CLEAR through, so
7555 - * let's avoid changing CPUID bits under the host
7556 - * kernel's feet.
7557 - */
7558 - vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
7559 - break;
7560 - default:
7561 - vmx->guest_msrs[j].mask = -1ull;
7562 - break;
7563 - }
7564 - ++vmx->nmsrs;
7565 - }
7566 -
7567 - err = alloc_loaded_vmcs(&vmx->vmcs01);
7568 - if (err < 0)
7569 - goto free_pml;
7570 -
7571 - msr_bitmap = vmx->vmcs01.msr_bitmap;
7572 - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R);
7573 - vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
7574 - vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
7575 - vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
7576 - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
7577 - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
7578 - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
7579 - if (kvm_cstate_in_guest(kvm)) {
7580 - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
7581 - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
7582 - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
7583 - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
7584 - }
7585 - vmx->msr_bitmap_mode = 0;
7586 -
7587 - vmx->loaded_vmcs = &vmx->vmcs01;
7588 - cpu = get_cpu();
7589 - vmx_vcpu_load(&vmx->vcpu, cpu);
7590 - vmx->vcpu.cpu = cpu;
7591 - init_vmcs(vmx);
7592 - vmx_vcpu_put(&vmx->vcpu);
7593 - put_cpu();
7594 - if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
7595 - err = alloc_apic_access_page(kvm);
7596 - if (err)
7597 - goto free_vmcs;
7598 - }
7599 -
7600 - if (enable_ept && !enable_unrestricted_guest) {
7601 - err = init_rmode_identity_map(kvm);
7602 - if (err)
7603 - goto free_vmcs;
7604 - }
7605 -
7606 - if (nested)
7607 - nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
7608 - vmx_capability.ept,
7609 - kvm_vcpu_apicv_active(&vmx->vcpu));
7610 - else
7611 - memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
7612 -
7613 - vmx->nested.posted_intr_nv = -1;
7614 - vmx->nested.current_vmptr = -1ull;
7615 -
7616 - vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
7617 -
7618 - /*
7619 - * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
7620 - * or POSTED_INTR_WAKEUP_VECTOR.
7621 - */
7622 - vmx->pi_desc.nv = POSTED_INTR_VECTOR;
7623 - vmx->pi_desc.sn = 1;
7624 -
7625 - vmx->ept_pointer = INVALID_PAGE;
7626 -
7627 - return &vmx->vcpu;
7628 -
7629 -free_vmcs:
7630 - free_loaded_vmcs(vmx->loaded_vmcs);
7631 -free_pml:
7632 - vmx_destroy_pml_buffer(vmx);
7633 -uninit_vcpu:
7634 - kvm_vcpu_uninit(&vmx->vcpu);
7635 -free_vcpu:
7636 - free_vpid(vmx->vpid);
7637 - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
7638 -free_user_fpu:
7639 - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
7640 -free_partial_vcpu:
7641 - kmem_cache_free(kvm_vcpu_cache, vmx);
7642 - return ERR_PTR(err);
7643 -}
7644 -
7645 -#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
7646 -#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
7647 -
7648 -static int vmx_vm_init(struct kvm *kvm)
7649 -{
7650 - spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
7651 -
7652 - if (!ple_gap)
7653 - kvm->arch.pause_in_guest = true;
7654 -
7655 - if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) {
7656 - switch (l1tf_mitigation) {
7657 - case L1TF_MITIGATION_OFF:
7658 - case L1TF_MITIGATION_FLUSH_NOWARN:
7659 - /* 'I explicitly don't care' is set */
7660 - break;
7661 - case L1TF_MITIGATION_FLUSH:
7662 - case L1TF_MITIGATION_FLUSH_NOSMT:
7663 - case L1TF_MITIGATION_FULL:
7664 - /*
7665 - * Warn upon starting the first VM in a potentially
7666 - * insecure environment.
7667 - */
7668 - if (sched_smt_active())
7669 - pr_warn_once(L1TF_MSG_SMT);
7670 - if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER)
7671 - pr_warn_once(L1TF_MSG_L1D);
7672 - break;
7673 - case L1TF_MITIGATION_FULL_FORCE:
7674 - /* Flush is enforced */
7675 - break;
7676 - }
7677 - }
7678 - return 0;
7679 -}
7680 -
7681 -static int __init vmx_check_processor_compat(void)
7682 -{
7683 - struct vmcs_config vmcs_conf;
7684 - struct vmx_capability vmx_cap;
7685 -
7686 - if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
7687 - return -EIO;
7688 - if (nested)
7689 - nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept,
7690 - enable_apicv);
7691 - if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
7692 - printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
7693 - smp_processor_id());
7694 - return -EIO;
7695 - }
7696 - return 0;
7697 -}
7698 -
7699 -static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
7700 -{
7701 - u8 cache;
7702 - u64 ipat = 0;
7703 -
7704 - /* For VT-d and EPT combination
7705 - * 1. MMIO: always map as UC
7706 - * 2. EPT with VT-d:
7707 - * a. VT-d without snooping control feature: can't guarantee the
7708 - * result, try to trust guest.
7709 - * b. VT-d with snooping control feature: snooping control feature of
7710 - * VT-d engine can guarantee the cache correctness. Just set it
7711 - * to WB to keep consistent with host. So the same as item 3.
7712 - * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
7713 - * consistent with host MTRR
7714 - */
7715 - if (is_mmio) {
7716 - cache = MTRR_TYPE_UNCACHABLE;
7717 - goto exit;
7718 - }
7719 -
7720 - if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
7721 - ipat = VMX_EPT_IPAT_BIT;
7722 - cache = MTRR_TYPE_WRBACK;
7723 - goto exit;
7724 - }
7725 -
7726 - if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
7727 - ipat = VMX_EPT_IPAT_BIT;
7728 - if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
7729 - cache = MTRR_TYPE_WRBACK;
7730 - else
7731 - cache = MTRR_TYPE_UNCACHABLE;
7732 - goto exit;
7733 - }
7734 -
7735 - cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
7736 -
7737 -exit:
7738 - return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
7739 -}
7740 -
7741 -static int vmx_get_lpage_level(void)
7742 -{
7743 - if (enable_ept && !cpu_has_vmx_ept_1g_page())
7744 - return PT_DIRECTORY_LEVEL;
7745 - else
7746 - /* For shadow and EPT supported 1GB page */
7747 - return PT_PDPE_LEVEL;
7748 -}
7749 -
7750 -static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx)
7751 -{
7752 - /*
7753 - * These bits in the secondary execution controls field
7754 - * are dynamic, the others are mostly based on the hypervisor
7755 - * architecture and the guest's CPUID. Do not touch the
7756 - * dynamic bits.
7757 - */
7758 - u32 mask =
7759 - SECONDARY_EXEC_SHADOW_VMCS |
7760 - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
7761 - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
7762 - SECONDARY_EXEC_DESC;
7763 -
7764 - u32 new_ctl = vmx->secondary_exec_control;
7765 - u32 cur_ctl = secondary_exec_controls_get(vmx);
7766 -
7767 - secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask));
7768 -}
7769 -
7770 -/*
7771 - * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits
7772 - * (indicating "allowed-1") if they are supported in the guest's CPUID.
7773 - */
7774 -static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
7775 -{
7776 - struct vcpu_vmx *vmx = to_vmx(vcpu);
7777 - struct kvm_cpuid_entry2 *entry;
7778 -
7779 - vmx->nested.msrs.cr0_fixed1 = 0xffffffff;
7780 - vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE;
7781 -
7782 -#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \
7783 - if (entry && (entry->_reg & (_cpuid_mask))) \
7784 - vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \
7785 -} while (0)
7786 -
7787 - entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
7788 - cr4_fixed1_update(X86_CR4_VME, edx, bit(X86_FEATURE_VME));
7789 - cr4_fixed1_update(X86_CR4_PVI, edx, bit(X86_FEATURE_VME));
7790 - cr4_fixed1_update(X86_CR4_TSD, edx, bit(X86_FEATURE_TSC));
7791 - cr4_fixed1_update(X86_CR4_DE, edx, bit(X86_FEATURE_DE));
7792 - cr4_fixed1_update(X86_CR4_PSE, edx, bit(X86_FEATURE_PSE));
7793 - cr4_fixed1_update(X86_CR4_PAE, edx, bit(X86_FEATURE_PAE));
7794 - cr4_fixed1_update(X86_CR4_MCE, edx, bit(X86_FEATURE_MCE));
7795 - cr4_fixed1_update(X86_CR4_PGE, edx, bit(X86_FEATURE_PGE));
7796 - cr4_fixed1_update(X86_CR4_OSFXSR, edx, bit(X86_FEATURE_FXSR));
7797 - cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM));
7798 - cr4_fixed1_update(X86_CR4_VMXE, ecx, bit(X86_FEATURE_VMX));
7799 - cr4_fixed1_update(X86_CR4_SMXE, ecx, bit(X86_FEATURE_SMX));
7800 - cr4_fixed1_update(X86_CR4_PCIDE, ecx, bit(X86_FEATURE_PCID));
7801 - cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, bit(X86_FEATURE_XSAVE));
7802 -
7803 - entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
7804 - cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, bit(X86_FEATURE_FSGSBASE));
7805 - cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP));
7806 - cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP));
7807 - cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU));
7808 - cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP));
7809 - cr4_fixed1_update(X86_CR4_LA57, ecx, bit(X86_FEATURE_LA57));
7810 -
7811 -#undef cr4_fixed1_update
7812 -}
7813 -
7814 -static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
7815 -{
7816 - struct vcpu_vmx *vmx = to_vmx(vcpu);
7817 -
7818 - if (kvm_mpx_supported()) {
7819 - bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX);
7820 -
7821 - if (mpx_enabled) {
7822 - vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
7823 - vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
7824 - } else {
7825 - vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS;
7826 - vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS;
7827 - }
7828 - }
7829 -}
7830 -
7831 -static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
7832 -{
7833 - struct vcpu_vmx *vmx = to_vmx(vcpu);
7834 - struct kvm_cpuid_entry2 *best = NULL;
7835 - int i;
7836 -
7837 - for (i = 0; i < PT_CPUID_LEAVES; i++) {
7838 - best = kvm_find_cpuid_entry(vcpu, 0x14, i);
7839 - if (!best)
7840 - return;
7841 - vmx->pt_desc.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM] = best->eax;
7842 - vmx->pt_desc.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM] = best->ebx;
7843 - vmx->pt_desc.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM] = best->ecx;
7844 - vmx->pt_desc.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM] = best->edx;
7845 - }
7846 -
7847 - /* Get the number of configurable Address Ranges for filtering */
7848 - vmx->pt_desc.addr_range = intel_pt_validate_cap(vmx->pt_desc.caps,
7849 - PT_CAP_num_address_ranges);
7850 -
7851 - /* Initialize and clear the no dependency bits */
7852 - vmx->pt_desc.ctl_bitmask = ~(RTIT_CTL_TRACEEN | RTIT_CTL_OS |
7853 - RTIT_CTL_USR | RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC);
7854 -
7855 - /*
7856 - * If CPUID.(EAX=14H,ECX=0):EBX[0]=1 CR3Filter can be set otherwise
7857 - * will inject an #GP
7858 - */
7859 - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_cr3_filtering))
7860 - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_CR3EN;
7861 -
7862 - /*
7863 - * If CPUID.(EAX=14H,ECX=0):EBX[1]=1 CYCEn, CycThresh and
7864 - * PSBFreq can be set
7865 - */
7866 - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc))
7867 - vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_CYCLEACC |
7868 - RTIT_CTL_CYC_THRESH | RTIT_CTL_PSB_FREQ);
7869 -
7870 - /*
7871 - * If CPUID.(EAX=14H,ECX=0):EBX[3]=1 MTCEn BranchEn and
7872 - * MTCFreq can be set
7873 - */
7874 - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc))
7875 - vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_MTC_EN |
7876 - RTIT_CTL_BRANCH_EN | RTIT_CTL_MTC_RANGE);
7877 -
7878 - /* If CPUID.(EAX=14H,ECX=0):EBX[4]=1 FUPonPTW and PTWEn can be set */
7879 - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_ptwrite))
7880 - vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_FUP_ON_PTW |
7881 - RTIT_CTL_PTW_EN);
7882 -
7883 - /* If CPUID.(EAX=14H,ECX=0):EBX[5]=1 PwrEvEn can be set */
7884 - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_power_event_trace))
7885 - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_PWR_EVT_EN;
7886 -
7887 - /* If CPUID.(EAX=14H,ECX=0):ECX[0]=1 ToPA can be set */
7888 - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_topa_output))
7889 - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_TOPA;
7890 -
7891 - /* If CPUID.(EAX=14H,ECX=0):ECX[3]=1 FabircEn can be set */
7892 - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_output_subsys))
7893 - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_FABRIC_EN;
7894 -
7895 - /* unmask address range configure area */
7896 - for (i = 0; i < vmx->pt_desc.addr_range; i++)
7897 - vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4));
7898 -}
7899 -
7900 -static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
7901 -{
7902 - struct vcpu_vmx *vmx = to_vmx(vcpu);
7903 -
7904 - /* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */
7905 - vcpu->arch.xsaves_enabled = false;
7906 -
7907 - if (cpu_has_secondary_exec_ctrls()) {
7908 - vmx_compute_secondary_exec_control(vmx);
7909 - vmcs_set_secondary_exec_control(vmx);
7910 - }
7911 -
7912 - if (nested_vmx_allowed(vcpu))
7913 - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
7914 - FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX |
7915 - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
7916 - else
7917 - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
7918 - ~(FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX |
7919 - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX);
7920 -
7921 - if (nested_vmx_allowed(vcpu)) {
7922 - nested_vmx_cr_fixed1_bits_update(vcpu);
7923 - nested_vmx_entry_exit_ctls_update(vcpu);
7924 - }
7925 -
7926 - if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
7927 - guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT))
7928 - update_intel_pt_cfg(vcpu);
7929 -
7930 - if (boot_cpu_has(X86_FEATURE_RTM)) {
7931 - struct shared_msr_entry *msr;
7932 - msr = find_msr_entry(vmx, MSR_IA32_TSX_CTRL);
7933 - if (msr) {
7934 - bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM);
7935 - vmx_set_guest_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE);
7936 - }
7937 - }
7938 -}
7939 -
7940 -static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
7941 -{
7942 - if (func == 1 && nested)
7943 - entry->ecx |= bit(X86_FEATURE_VMX);
7944 -}
7945 -
7946 -static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
7947 -{
7948 - to_vmx(vcpu)->req_immediate_exit = true;
7949 -}
7950 -
7951 -static int vmx_check_intercept(struct kvm_vcpu *vcpu,
7952 - struct x86_instruction_info *info,
7953 - enum x86_intercept_stage stage)
7954 -{
7955 - struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7956 - struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
7957 -
7958 - /*
7959 - * RDPID causes #UD if disabled through secondary execution controls.
7960 - * Because it is marked as EmulateOnUD, we need to intercept it here.
7961 - */
7962 - if (info->intercept == x86_intercept_rdtscp &&
7963 - !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
7964 - ctxt->exception.vector = UD_VECTOR;
7965 - ctxt->exception.error_code_valid = false;
7966 - return X86EMUL_PROPAGATE_FAULT;
7967 - }
7968 -
7969 - /* TODO: check more intercepts... */
7970 - return X86EMUL_CONTINUE;
7971 -}
7972 -
7973 -#ifdef CONFIG_X86_64
7974 -/* (a << shift) / divisor, return 1 if overflow otherwise 0 */
7975 -static inline int u64_shl_div_u64(u64 a, unsigned int shift,
7976 - u64 divisor, u64 *result)
7977 -{
7978 - u64 low = a << shift, high = a >> (64 - shift);
7979 -
7980 - /* To avoid the overflow on divq */
7981 - if (high >= divisor)
7982 - return 1;
7983 -
7984 - /* Low hold the result, high hold rem which is discarded */
7985 - asm("divq %2\n\t" : "=a" (low), "=d" (high) :
7986 - "rm" (divisor), "0" (low), "1" (high));
7987 - *result = low;
7988 -
7989 - return 0;
7990 -}
7991 -
7992 -static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
7993 - bool *expired)
7994 -{
7995 - struct vcpu_vmx *vmx;
7996 - u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
7997 - struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
7998 -
7999 - if (kvm_mwait_in_guest(vcpu->kvm) ||
8000 - kvm_can_post_timer_interrupt(vcpu))
8001 - return -EOPNOTSUPP;
8002 -
8003 - vmx = to_vmx(vcpu);
8004 - tscl = rdtsc();
8005 - guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
8006 - delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
8007 - lapic_timer_advance_cycles = nsec_to_cycles(vcpu,
8008 - ktimer->timer_advance_ns);
8009 -
8010 - if (delta_tsc > lapic_timer_advance_cycles)
8011 - delta_tsc -= lapic_timer_advance_cycles;
8012 - else
8013 - delta_tsc = 0;
8014 -
8015 - /* Convert to host delta tsc if tsc scaling is enabled */
8016 - if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
8017 - delta_tsc && u64_shl_div_u64(delta_tsc,
8018 - kvm_tsc_scaling_ratio_frac_bits,
8019 - vcpu->arch.tsc_scaling_ratio, &delta_tsc))
8020 - return -ERANGE;
8021 -
8022 - /*
8023 - * If the delta tsc can't fit in the 32 bit after the multi shift,
8024 - * we can't use the preemption timer.
8025 - * It's possible that it fits on later vmentries, but checking
8026 - * on every vmentry is costly so we just use an hrtimer.
8027 - */
8028 - if (delta_tsc >> (cpu_preemption_timer_multi + 32))
8029 - return -ERANGE;
8030 -
8031 - vmx->hv_deadline_tsc = tscl + delta_tsc;
8032 - *expired = !delta_tsc;
8033 - return 0;
8034 -}
8035 -
8036 -static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
8037 -{
8038 - to_vmx(vcpu)->hv_deadline_tsc = -1;
8039 -}
8040 -#endif
8041 -
8042 -static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
8043 -{
8044 - if (!kvm_pause_in_guest(vcpu->kvm))
8045 - shrink_ple_window(vcpu);
8046 -}
8047 -
8048 -static void vmx_slot_enable_log_dirty(struct kvm *kvm,
8049 - struct kvm_memory_slot *slot)
8050 -{
8051 - kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
8052 - kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
8053 -}
8054 -
8055 -static void vmx_slot_disable_log_dirty(struct kvm *kvm,
8056 - struct kvm_memory_slot *slot)
8057 -{
8058 - kvm_mmu_slot_set_dirty(kvm, slot);
8059 -}
8060 -
8061 -static void vmx_flush_log_dirty(struct kvm *kvm)
8062 -{
8063 - kvm_flush_pml_buffers(kvm);
8064 -}
8065 -
8066 -static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
8067 -{
8068 - struct vmcs12 *vmcs12;
8069 - struct vcpu_vmx *vmx = to_vmx(vcpu);
8070 - gpa_t gpa, dst;
8071 -
8072 - if (is_guest_mode(vcpu)) {
8073 - WARN_ON_ONCE(vmx->nested.pml_full);
8074 -
8075 - /*
8076 - * Check if PML is enabled for the nested guest.
8077 - * Whether eptp bit 6 is set is already checked
8078 - * as part of A/D emulation.
8079 - */
8080 - vmcs12 = get_vmcs12(vcpu);
8081 - if (!nested_cpu_has_pml(vmcs12))
8082 - return 0;
8083 -
8084 - if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
8085 - vmx->nested.pml_full = true;
8086 - return 1;
8087 - }
8088 -
8089 - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
8090 - dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index;
8091 -
8092 - if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa,
8093 - offset_in_page(dst), sizeof(gpa)))
8094 - return 0;
8095 -
8096 - vmcs12->guest_pml_index--;
8097 - }
8098 -
8099 - return 0;
8100 -}
8101 -
8102 -static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
8103 - struct kvm_memory_slot *memslot,
8104 - gfn_t offset, unsigned long mask)
8105 -{
8106 - kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
8107 -}
8108 -
8109 -static void __pi_post_block(struct kvm_vcpu *vcpu)
8110 -{
8111 - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
8112 - struct pi_desc old, new;
8113 - unsigned int dest;
8114 -
8115 - do {
8116 - old.control = new.control = pi_desc->control;
8117 - WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
8118 - "Wakeup handler not enabled while the VCPU is blocked\n");
8119 -
8120 - dest = cpu_physical_id(vcpu->cpu);
8121 -
8122 - if (x2apic_enabled())
8123 - new.ndst = dest;
8124 - else
8125 - new.ndst = (dest << 8) & 0xFF00;
8126 -
8127 - /* set 'NV' to 'notification vector' */
8128 - new.nv = POSTED_INTR_VECTOR;
8129 - } while (cmpxchg64(&pi_desc->control, old.control,
8130 - new.control) != old.control);
8131 -
8132 - if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
8133 - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
8134 - list_del(&vcpu->blocked_vcpu_list);
8135 - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
8136 - vcpu->pre_pcpu = -1;
8137 - }
8138 -}
8139 -
8140 -/*
8141 - * This routine does the following things for vCPU which is going
8142 - * to be blocked if VT-d PI is enabled.
8143 - * - Store the vCPU to the wakeup list, so when interrupts happen
8144 - * we can find the right vCPU to wake up.
8145 - * - Change the Posted-interrupt descriptor as below:
8146 - * 'NDST' <-- vcpu->pre_pcpu
8147 - * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR
8148 - * - If 'ON' is set during this process, which means at least one
8149 - * interrupt is posted for this vCPU, we cannot block it, in
8150 - * this case, return 1, otherwise, return 0.
8151 - *
8152 - */
8153 -static int pi_pre_block(struct kvm_vcpu *vcpu)
8154 -{
8155 - unsigned int dest;
8156 - struct pi_desc old, new;
8157 - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
8158 -
8159 - if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
8160 - !irq_remapping_cap(IRQ_POSTING_CAP) ||
8161 - !kvm_vcpu_apicv_active(vcpu))
8162 - return 0;
8163 -
8164 - WARN_ON(irqs_disabled());
8165 - local_irq_disable();
8166 - if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
8167 - vcpu->pre_pcpu = vcpu->cpu;
8168 - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
8169 - list_add_tail(&vcpu->blocked_vcpu_list,
8170 - &per_cpu(blocked_vcpu_on_cpu,
8171 - vcpu->pre_pcpu));
8172 - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
8173 - }
8174 -
8175 - do {
8176 - old.control = new.control = pi_desc->control;
8177 -
8178 - WARN((pi_desc->sn == 1),
8179 - "Warning: SN field of posted-interrupts "
8180 - "is set before blocking\n");
8181 -
8182 - /*
8183 - * Since vCPU can be preempted during this process,
8184 - * vcpu->cpu could be different with pre_pcpu, we
8185 - * need to set pre_pcpu as the destination of wakeup
8186 - * notification event, then we can find the right vCPU
8187 - * to wakeup in wakeup handler if interrupts happen
8188 - * when the vCPU is in blocked state.
8189 - */
8190 - dest = cpu_physical_id(vcpu->pre_pcpu);
8191 -
8192 - if (x2apic_enabled())
8193 - new.ndst = dest;
8194 - else
8195 - new.ndst = (dest << 8) & 0xFF00;
8196 -
8197 - /* set 'NV' to 'wakeup vector' */
8198 - new.nv = POSTED_INTR_WAKEUP_VECTOR;
8199 - } while (cmpxchg64(&pi_desc->control, old.control,
8200 - new.control) != old.control);
8201 -
8202 - /* We should not block the vCPU if an interrupt is posted for it. */
8203 - if (pi_test_on(pi_desc) == 1)
8204 - __pi_post_block(vcpu);
8205 -
8206 - local_irq_enable();
8207 - return (vcpu->pre_pcpu == -1);
8208 -}
8209 -
8210 -static int vmx_pre_block(struct kvm_vcpu *vcpu)
8211 -{
8212 - if (pi_pre_block(vcpu))
8213 - return 1;
8214 -
8215 - if (kvm_lapic_hv_timer_in_use(vcpu))
8216 - kvm_lapic_switch_to_sw_timer(vcpu);
8217 -
8218 - return 0;
8219 -}
8220 -
8221 -static void pi_post_block(struct kvm_vcpu *vcpu)
8222 -{
8223 - if (vcpu->pre_pcpu == -1)
8224 - return;
8225 -
8226 - WARN_ON(irqs_disabled());
8227 - local_irq_disable();
8228 - __pi_post_block(vcpu);
8229 - local_irq_enable();
8230 -}
8231 -
8232 -static void vmx_post_block(struct kvm_vcpu *vcpu)
8233 -{
8234 - if (kvm_x86_ops->set_hv_timer)
8235 - kvm_lapic_switch_to_hv_timer(vcpu);
8236 -
8237 - pi_post_block(vcpu);
8238 -}
8239 -
8240 -/*
8241 - * vmx_update_pi_irte - set IRTE for Posted-Interrupts
8242 - *
8243 - * @kvm: kvm
8244 - * @host_irq: host irq of the interrupt
8245 - * @guest_irq: gsi of the interrupt
8246 - * @set: set or unset PI
8247 - * returns 0 on success, < 0 on failure
8248 - */
8249 -static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
8250 - uint32_t guest_irq, bool set)
8251 -{
8252 - struct kvm_kernel_irq_routing_entry *e;
8253 - struct kvm_irq_routing_table *irq_rt;
8254 - struct kvm_lapic_irq irq;
8255 - struct kvm_vcpu *vcpu;
8256 - struct vcpu_data vcpu_info;
8257 - int idx, ret = 0;
8258 -
8259 - if (!kvm_arch_has_assigned_device(kvm) ||
8260 - !irq_remapping_cap(IRQ_POSTING_CAP) ||
8261 - !kvm_vcpu_apicv_active(kvm->vcpus[0]))
8262 - return 0;
8263 -
8264 - idx = srcu_read_lock(&kvm->irq_srcu);
8265 - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
8266 - if (guest_irq >= irq_rt->nr_rt_entries ||
8267 - hlist_empty(&irq_rt->map[guest_irq])) {
8268 - pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
8269 - guest_irq, irq_rt->nr_rt_entries);
8270 - goto out;
8271 - }
8272 -
8273 - hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
8274 - if (e->type != KVM_IRQ_ROUTING_MSI)
8275 - continue;
8276 - /*
8277 - * VT-d PI cannot support posting multicast/broadcast
8278 - * interrupts to a vCPU, we still use interrupt remapping
8279 - * for these kind of interrupts.
8280 - *
8281 - * For lowest-priority interrupts, we only support
8282 - * those with single CPU as the destination, e.g. user
8283 - * configures the interrupts via /proc/irq or uses
8284 - * irqbalance to make the interrupts single-CPU.
8285 - *
8286 - * We will support full lowest-priority interrupt later.
8287 - *
8288 - * In addition, we can only inject generic interrupts using
8289 - * the PI mechanism, refuse to route others through it.
8290 - */
8291 -
8292 - kvm_set_msi_irq(kvm, e, &irq);
8293 - if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
8294 - !kvm_irq_is_postable(&irq)) {
8295 - /*
8296 - * Make sure the IRTE is in remapped mode if
8297 - * we don't handle it in posted mode.
8298 - */
8299 - ret = irq_set_vcpu_affinity(host_irq, NULL);
8300 - if (ret < 0) {
8301 - printk(KERN_INFO
8302 - "failed to back to remapped mode, irq: %u\n",
8303 - host_irq);
8304 - goto out;
8305 - }
8306 -
8307 - continue;
8308 - }
8309 -
8310 - vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
8311 - vcpu_info.vector = irq.vector;
8312 -
8313 - trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
8314 - vcpu_info.vector, vcpu_info.pi_desc_addr, set);
8315 -
8316 - if (set)
8317 - ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
8318 - else
8319 - ret = irq_set_vcpu_affinity(host_irq, NULL);
8320 -
8321 - if (ret < 0) {
8322 - printk(KERN_INFO "%s: failed to update PI IRTE\n",
8323 - __func__);
8324 - goto out;
8325 - }
8326 - }
8327 -
8328 - ret = 0;
8329 -out:
8330 - srcu_read_unlock(&kvm->irq_srcu, idx);
8331 - return ret;
8332 -}
8333 -
8334 -static void vmx_setup_mce(struct kvm_vcpu *vcpu)
8335 -{
8336 - if (vcpu->arch.mcg_cap & MCG_LMCE_P)
8337 - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
8338 - FEATURE_CONTROL_LMCE;
8339 - else
8340 - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
8341 - ~FEATURE_CONTROL_LMCE;
8342 -}
8343 -
8344 -static int vmx_smi_allowed(struct kvm_vcpu *vcpu)
8345 -{
8346 - /* we need a nested vmexit to enter SMM, postpone if run is pending */
8347 - if (to_vmx(vcpu)->nested.nested_run_pending)
8348 - return 0;
8349 - return 1;
8350 -}
8351 -
8352 -static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
8353 -{
8354 - struct vcpu_vmx *vmx = to_vmx(vcpu);
8355 -
8356 - vmx->nested.smm.guest_mode = is_guest_mode(vcpu);
8357 - if (vmx->nested.smm.guest_mode)
8358 - nested_vmx_vmexit(vcpu, -1, 0, 0);
8359 -
8360 - vmx->nested.smm.vmxon = vmx->nested.vmxon;
8361 - vmx->nested.vmxon = false;
8362 - vmx_clear_hlt(vcpu);
8363 - return 0;
8364 -}
8365 -
8366 -static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
8367 -{
8368 - struct vcpu_vmx *vmx = to_vmx(vcpu);
8369 - int ret;
8370 -
8371 - if (vmx->nested.smm.vmxon) {
8372 - vmx->nested.vmxon = true;
8373 - vmx->nested.smm.vmxon = false;
8374 - }
8375 -
8376 - if (vmx->nested.smm.guest_mode) {
8377 - ret = nested_vmx_enter_non_root_mode(vcpu, false);
8378 - if (ret)
8379 - return ret;
8380 -
8381 - vmx->nested.smm.guest_mode = false;
8382 - }
8383 - return 0;
8384 -}
8385 -
8386 -static int enable_smi_window(struct kvm_vcpu *vcpu)
8387 -{
8388 - return 0;
8389 -}
8390 -
8391 -static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
8392 -{
8393 - return false;
8394 -}
8395 -
8396 -static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
8397 -{
8398 - return to_vmx(vcpu)->nested.vmxon;
8399 -}
8400 -
8401 -static __init int hardware_setup(void)
8402 -{
8403 - unsigned long host_bndcfgs;
8404 - struct desc_ptr dt;
8405 - int r, i;
8406 -
8407 - rdmsrl_safe(MSR_EFER, &host_efer);
8408 -
8409 - store_idt(&dt);
8410 - host_idt_base = dt.address;
8411 -
8412 - for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
8413 - kvm_define_shared_msr(i, vmx_msr_index[i]);
8414 -
8415 - if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
8416 - return -EIO;
8417 -
8418 - if (boot_cpu_has(X86_FEATURE_NX))
8419 - kvm_enable_efer_bits(EFER_NX);
8420 -
8421 - if (boot_cpu_has(X86_FEATURE_MPX)) {
8422 - rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs);
8423 - WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
8424 - }
8425 -
8426 - if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
8427 - !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
8428 - enable_vpid = 0;
8429 -
8430 - if (!cpu_has_vmx_ept() ||
8431 - !cpu_has_vmx_ept_4levels() ||
8432 - !cpu_has_vmx_ept_mt_wb() ||
8433 - !cpu_has_vmx_invept_global())
8434 - enable_ept = 0;
8435 -
8436 - if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
8437 - enable_ept_ad_bits = 0;
8438 -
8439 - if (!cpu_has_vmx_unrestricted_guest() || !enable_ept)
8440 - enable_unrestricted_guest = 0;
8441 -
8442 - if (!cpu_has_vmx_flexpriority())
8443 - flexpriority_enabled = 0;
8444 -
8445 - if (!cpu_has_virtual_nmis())
8446 - enable_vnmi = 0;
8447 -
8448 - /*
8449 - * set_apic_access_page_addr() is used to reload apic access
8450 - * page upon invalidation. No need to do anything if not
8451 - * using the APIC_ACCESS_ADDR VMCS field.
8452 - */
8453 - if (!flexpriority_enabled)
8454 - kvm_x86_ops->set_apic_access_page_addr = NULL;
8455 -
8456 - if (!cpu_has_vmx_tpr_shadow())
8457 - kvm_x86_ops->update_cr8_intercept = NULL;
8458 -
8459 - if (enable_ept && !cpu_has_vmx_ept_2m_page())
8460 - kvm_disable_largepages();
8461 -
8462 -#if IS_ENABLED(CONFIG_HYPERV)
8463 - if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
8464 - && enable_ept) {
8465 - kvm_x86_ops->tlb_remote_flush = hv_remote_flush_tlb;
8466 - kvm_x86_ops->tlb_remote_flush_with_range =
8467 - hv_remote_flush_tlb_with_range;
8468 - }
8469 -#endif
8470 -
8471 - if (!cpu_has_vmx_ple()) {
8472 - ple_gap = 0;
8473 - ple_window = 0;
8474 - ple_window_grow = 0;
8475 - ple_window_max = 0;
8476 - ple_window_shrink = 0;
8477 - }
8478 -
8479 - if (!cpu_has_vmx_apicv()) {
8480 - enable_apicv = 0;
8481 - kvm_x86_ops->sync_pir_to_irr = NULL;
8482 - }
8483 -
8484 - if (cpu_has_vmx_tsc_scaling()) {
8485 - kvm_has_tsc_control = true;
8486 - kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
8487 - kvm_tsc_scaling_ratio_frac_bits = 48;
8488 - }
8489 -
8490 - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
8491 -
8492 - if (enable_ept)
8493 - vmx_enable_tdp();
8494 - else
8495 - kvm_disable_tdp();
8496 -
8497 - /*
8498 - * Only enable PML when hardware supports PML feature, and both EPT
8499 - * and EPT A/D bit features are enabled -- PML depends on them to work.
8500 - */
8501 - if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
8502 - enable_pml = 0;
8503 -
8504 - if (!enable_pml) {
8505 - kvm_x86_ops->slot_enable_log_dirty = NULL;
8506 - kvm_x86_ops->slot_disable_log_dirty = NULL;
8507 - kvm_x86_ops->flush_log_dirty = NULL;
8508 - kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
8509 - }
8510 -
8511 - if (!cpu_has_vmx_preemption_timer())
8512 - enable_preemption_timer = false;
8513 -
8514 - if (enable_preemption_timer) {
8515 - u64 use_timer_freq = 5000ULL * 1000 * 1000;
8516 - u64 vmx_msr;
8517 -
8518 - rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
8519 - cpu_preemption_timer_multi =
8520 - vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
8521 -
8522 - if (tsc_khz)
8523 - use_timer_freq = (u64)tsc_khz * 1000;
8524 - use_timer_freq >>= cpu_preemption_timer_multi;
8525 -
8526 - /*
8527 - * KVM "disables" the preemption timer by setting it to its max
8528 - * value. Don't use the timer if it might cause spurious exits
8529 - * at a rate faster than 0.1 Hz (of uninterrupted guest time).
8530 - */
8531 - if (use_timer_freq > 0xffffffffu / 10)
8532 - enable_preemption_timer = false;
8533 - }
8534 -
8535 - if (!enable_preemption_timer) {
8536 - kvm_x86_ops->set_hv_timer = NULL;
8537 - kvm_x86_ops->cancel_hv_timer = NULL;
8538 - kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
8539 - }
8540 -
8541 - kvm_set_posted_intr_wakeup_handler(wakeup_handler);
8542 -
8543 - kvm_mce_cap_supported |= MCG_LMCE_P;
8544 -
8545 - if (pt_mode != PT_MODE_SYSTEM && pt_mode != PT_MODE_HOST_GUEST)
8546 - return -EINVAL;
8547 - if (!enable_ept || !cpu_has_vmx_intel_pt())
8548 - pt_mode = PT_MODE_SYSTEM;
8549 -
8550 - if (nested) {
8551 - nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
8552 - vmx_capability.ept, enable_apicv);
8553 -
8554 - r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
8555 - if (r)
8556 - return r;
8557 - }
8558 -
8559 - r = alloc_kvm_area();
8560 - if (r)
8561 - nested_vmx_hardware_unsetup();
8562 - return r;
8563 -}
8564 -
8565 -static __exit void hardware_unsetup(void)
8566 -{
8567 - if (nested)
8568 - nested_vmx_hardware_unsetup();
8569 -
8570 - free_kvm_area();
8571 -}
8572 -
8573 -static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
8574 - .cpu_has_kvm_support = cpu_has_kvm_support,
8575 - .disabled_by_bios = vmx_disabled_by_bios,
8576 - .hardware_setup = hardware_setup,
8577 - .hardware_unsetup = hardware_unsetup,
8578 - .check_processor_compatibility = vmx_check_processor_compat,
8579 - .hardware_enable = hardware_enable,
8580 - .hardware_disable = hardware_disable,
8581 - .cpu_has_accelerated_tpr = report_flexpriority,
8582 - .has_emulated_msr = vmx_has_emulated_msr,
8583 -
8584 - .vm_init = vmx_vm_init,
8585 - .vm_alloc = vmx_vm_alloc,
8586 - .vm_free = vmx_vm_free,
8587 -
8588 - .vcpu_create = vmx_create_vcpu,
8589 - .vcpu_free = vmx_free_vcpu,
8590 - .vcpu_reset = vmx_vcpu_reset,
8591 -
8592 - .prepare_guest_switch = vmx_prepare_switch_to_guest,
8593 - .vcpu_load = vmx_vcpu_load,
8594 - .vcpu_put = vmx_vcpu_put,
8595 -
8596 - .update_bp_intercept = update_exception_bitmap,
8597 - .get_msr_feature = vmx_get_msr_feature,
8598 - .get_msr = vmx_get_msr,
8599 - .set_msr = vmx_set_msr,
8600 - .get_segment_base = vmx_get_segment_base,
8601 - .get_segment = vmx_get_segment,
8602 - .set_segment = vmx_set_segment,
8603 - .get_cpl = vmx_get_cpl,
8604 - .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
8605 - .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
8606 - .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
8607 - .set_cr0 = vmx_set_cr0,
8608 - .set_cr3 = vmx_set_cr3,
8609 - .set_cr4 = vmx_set_cr4,
8610 - .set_efer = vmx_set_efer,
8611 - .get_idt = vmx_get_idt,
8612 - .set_idt = vmx_set_idt,
8613 - .get_gdt = vmx_get_gdt,
8614 - .set_gdt = vmx_set_gdt,
8615 - .get_dr6 = vmx_get_dr6,
8616 - .set_dr6 = vmx_set_dr6,
8617 - .set_dr7 = vmx_set_dr7,
8618 - .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
8619 - .cache_reg = vmx_cache_reg,
8620 - .get_rflags = vmx_get_rflags,
8621 - .set_rflags = vmx_set_rflags,
8622 -
8623 - .tlb_flush = vmx_flush_tlb,
8624 - .tlb_flush_gva = vmx_flush_tlb_gva,
8625 -
8626 - .run = vmx_vcpu_run,
8627 - .handle_exit = vmx_handle_exit,
8628 - .skip_emulated_instruction = skip_emulated_instruction,
8629 - .set_interrupt_shadow = vmx_set_interrupt_shadow,
8630 - .get_interrupt_shadow = vmx_get_interrupt_shadow,
8631 - .patch_hypercall = vmx_patch_hypercall,
8632 - .set_irq = vmx_inject_irq,
8633 - .set_nmi = vmx_inject_nmi,
8634 - .queue_exception = vmx_queue_exception,
8635 - .cancel_injection = vmx_cancel_injection,
8636 - .interrupt_allowed = vmx_interrupt_allowed,
8637 - .nmi_allowed = vmx_nmi_allowed,
8638 - .get_nmi_mask = vmx_get_nmi_mask,
8639 - .set_nmi_mask = vmx_set_nmi_mask,
8640 - .enable_nmi_window = enable_nmi_window,
8641 - .enable_irq_window = enable_irq_window,
8642 - .update_cr8_intercept = update_cr8_intercept,
8643 - .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
8644 - .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
8645 - .get_enable_apicv = vmx_get_enable_apicv,
8646 - .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
8647 - .load_eoi_exitmap = vmx_load_eoi_exitmap,
8648 - .apicv_post_state_restore = vmx_apicv_post_state_restore,
8649 - .hwapic_irr_update = vmx_hwapic_irr_update,
8650 - .hwapic_isr_update = vmx_hwapic_isr_update,
8651 - .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
8652 - .sync_pir_to_irr = vmx_sync_pir_to_irr,
8653 - .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
8654 - .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt,
8655 -
8656 - .set_tss_addr = vmx_set_tss_addr,
8657 - .set_identity_map_addr = vmx_set_identity_map_addr,
8658 - .get_tdp_level = get_ept_level,
8659 - .get_mt_mask = vmx_get_mt_mask,
8660 -
8661 - .get_exit_info = vmx_get_exit_info,
8662 -
8663 - .get_lpage_level = vmx_get_lpage_level,
8664 -
8665 - .cpuid_update = vmx_cpuid_update,
8666 -
8667 - .rdtscp_supported = vmx_rdtscp_supported,
8668 - .invpcid_supported = vmx_invpcid_supported,
8669 -
8670 - .set_supported_cpuid = vmx_set_supported_cpuid,
8671 -
8672 - .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
8673 -
8674 - .read_l1_tsc_offset = vmx_read_l1_tsc_offset,
8675 - .write_l1_tsc_offset = vmx_write_l1_tsc_offset,
8676 -
8677 - .set_tdp_cr3 = vmx_set_cr3,
8678 -
8679 - .check_intercept = vmx_check_intercept,
8680 - .handle_exit_irqoff = vmx_handle_exit_irqoff,
8681 - .mpx_supported = vmx_mpx_supported,
8682 - .xsaves_supported = vmx_xsaves_supported,
8683 - .umip_emulated = vmx_umip_emulated,
8684 - .pt_supported = vmx_pt_supported,
8685 -
8686 - .request_immediate_exit = vmx_request_immediate_exit,
8687 -
8688 - .sched_in = vmx_sched_in,
8689 -
8690 - .slot_enable_log_dirty = vmx_slot_enable_log_dirty,
8691 - .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
8692 - .flush_log_dirty = vmx_flush_log_dirty,
8693 - .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
8694 - .write_log_dirty = vmx_write_pml_buffer,
8695 -
8696 - .pre_block = vmx_pre_block,
8697 - .post_block = vmx_post_block,
8698 -
8699 - .pmu_ops = &intel_pmu_ops,
8700 -
8701 - .update_pi_irte = vmx_update_pi_irte,
8702 -
8703 -#ifdef CONFIG_X86_64
8704 - .set_hv_timer = vmx_set_hv_timer,
8705 - .cancel_hv_timer = vmx_cancel_hv_timer,
8706 -#endif
8707 -
8708 - .setup_mce = vmx_setup_mce,
8709 -
8710 - .smi_allowed = vmx_smi_allowed,
8711 - .pre_enter_smm = vmx_pre_enter_smm,
8712 - .pre_leave_smm = vmx_pre_leave_smm,
8713 - .enable_smi_window = enable_smi_window,
8714 -
8715 - .check_nested_events = NULL,
8716 - .get_nested_state = NULL,
8717 - .set_nested_state = NULL,
8718 - .get_vmcs12_pages = NULL,
8719 - .nested_enable_evmcs = NULL,
8720 - .nested_get_evmcs_version = NULL,
8721 - .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
8722 - .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
8723 -};
8724 -
8725 -static void vmx_cleanup_l1d_flush(void)
8726 -{
8727 - if (vmx_l1d_flush_pages) {
8728 - free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
8729 - vmx_l1d_flush_pages = NULL;
8730 - }
8731 - /* Restore state so sysfs ignores VMX */
8732 - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
8733 -}
8734 -
8735 -static void vmx_exit(void)
8736 -{
8737 -#ifdef CONFIG_KEXEC_CORE
8738 - RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
8739 - synchronize_rcu();
8740 -#endif
8741 -
8742 - kvm_exit();
8743 -
8744 -#if IS_ENABLED(CONFIG_HYPERV)
8745 - if (static_branch_unlikely(&enable_evmcs)) {
8746 - int cpu;
8747 - struct hv_vp_assist_page *vp_ap;
8748 - /*
8749 - * Reset everything to support using non-enlightened VMCS
8750 - * access later (e.g. when we reload the module with
8751 - * enlightened_vmcs=0)
8752 - */
8753 - for_each_online_cpu(cpu) {
8754 - vp_ap = hv_get_vp_assist_page(cpu);
8755 -
8756 - if (!vp_ap)
8757 - continue;
8758 -
8759 - vp_ap->nested_control.features.directhypercall = 0;
8760 - vp_ap->current_nested_vmcs = 0;
8761 - vp_ap->enlighten_vmentry = 0;
8762 - }
8763 -
8764 - static_branch_disable(&enable_evmcs);
8765 - }
8766 -#endif
8767 - vmx_cleanup_l1d_flush();
8768 -}
8769 -module_exit(vmx_exit);
8770 -
8771 -static int __init vmx_init(void)
8772 -{
8773 - int r;
8774 -
8775 -#if IS_ENABLED(CONFIG_HYPERV)
8776 - /*
8777 - * Enlightened VMCS usage should be recommended and the host needs
8778 - * to support eVMCS v1 or above. We can also disable eVMCS support
8779 - * with module parameter.
8780 - */
8781 - if (enlightened_vmcs &&
8782 - ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED &&
8783 - (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >=
8784 - KVM_EVMCS_VERSION) {
8785 - int cpu;
8786 -
8787 - /* Check that we have assist pages on all online CPUs */
8788 - for_each_online_cpu(cpu) {
8789 - if (!hv_get_vp_assist_page(cpu)) {
8790 - enlightened_vmcs = false;
8791 - break;
8792 - }
8793 - }
8794 -
8795 - if (enlightened_vmcs) {
8796 - pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
8797 - static_branch_enable(&enable_evmcs);
8798 - }
8799 -
8800 - if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
8801 - vmx_x86_ops.enable_direct_tlbflush
8802 - = hv_enable_direct_tlbflush;
8803 -
8804 - } else {
8805 - enlightened_vmcs = false;
8806 - }
8807 -#endif
8808 -
8809 - r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
8810 - __alignof__(struct vcpu_vmx), THIS_MODULE);
8811 - if (r)
8812 - return r;
8813 -
8814 - /*
8815 - * Must be called after kvm_init() so enable_ept is properly set
8816 - * up. Hand the parameter mitigation value in which was stored in
8817 - * the pre module init parser. If no parameter was given, it will
8818 - * contain 'auto' which will be turned into the default 'cond'
8819 - * mitigation mode.
8820 - */
8821 - r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
8822 - if (r) {
8823 - vmx_exit();
8824 - return r;
8825 - }
8826 -
8827 -#ifdef CONFIG_KEXEC_CORE
8828 - rcu_assign_pointer(crash_vmclear_loaded_vmcss,
8829 - crash_vmclear_local_loaded_vmcss);
8830 -#endif
8831 - vmx_check_vmcs12_offsets();
8832 -
8833 - return 0;
8834 -}
8835 -module_init(vmx_init);
8836 diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
8837 index 0f7eb4f5bdb7..82e105b284e0 100644
8838 --- a/arch/x86/lib/x86-opcode-map.txt
8839 +++ b/arch/x86/lib/x86-opcode-map.txt
8840 @@ -909,7 +909,7 @@ EndTable
8841
8842 GrpTable: Grp3_2
8843 0: TEST Ev,Iz
8844 -1:
8845 +1: TEST Ev,Iz
8846 2: NOT Ev
8847 3: NEG Ev
8848 4: MUL rAX,Ev
8849 diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
8850 index b6669d326545..f08abdf8bb67 100644
8851 --- a/arch/x86/platform/efi/efi.c
8852 +++ b/arch/x86/platform/efi/efi.c
8853 @@ -478,7 +478,6 @@ void __init efi_init(void)
8854 efi_char16_t *c16;
8855 char vendor[100] = "unknown";
8856 int i = 0;
8857 - void *tmp;
8858
8859 #ifdef CONFIG_X86_32
8860 if (boot_params.efi_info.efi_systab_hi ||
8861 @@ -503,14 +502,16 @@ void __init efi_init(void)
8862 /*
8863 * Show what we know for posterity
8864 */
8865 - c16 = tmp = early_memremap(efi.systab->fw_vendor, 2);
8866 + c16 = early_memremap_ro(efi.systab->fw_vendor,
8867 + sizeof(vendor) * sizeof(efi_char16_t));
8868 if (c16) {
8869 - for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
8870 - vendor[i] = *c16++;
8871 + for (i = 0; i < sizeof(vendor) - 1 && c16[i]; ++i)
8872 + vendor[i] = c16[i];
8873 vendor[i] = '\0';
8874 - } else
8875 + early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t));
8876 + } else {
8877 pr_err("Could not map the firmware vendor!\n");
8878 - early_memunmap(tmp, 2);
8879 + }
8880
8881 pr_info("EFI v%u.%.02u by %s\n",
8882 efi.systab->hdr.revision >> 16,
8883 diff --git a/drivers/acpi/acpica/dsfield.c b/drivers/acpi/acpica/dsfield.c
8884 index 6a4b603d0e83..10bbf6ca082a 100644
8885 --- a/drivers/acpi/acpica/dsfield.c
8886 +++ b/drivers/acpi/acpica/dsfield.c
8887 @@ -272,7 +272,7 @@ cleanup:
8888 * FUNCTION: acpi_ds_get_field_names
8889 *
8890 * PARAMETERS: info - create_field info structure
8891 - * ` walk_state - Current method state
8892 + * walk_state - Current method state
8893 * arg - First parser arg for the field name list
8894 *
8895 * RETURN: Status
8896 diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c
8897 index fd34040d4f44..9c41d2153d0f 100644
8898 --- a/drivers/acpi/acpica/dswload.c
8899 +++ b/drivers/acpi/acpica/dswload.c
8900 @@ -440,6 +440,27 @@ acpi_status acpi_ds_load1_end_op(struct acpi_walk_state *walk_state)
8901 ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, "Op=%p State=%p\n", op,
8902 walk_state));
8903
8904 + /*
8905 + * Disassembler: handle create field operators here.
8906 + *
8907 + * create_buffer_field is a deferred op that is typically processed in load
8908 + * pass 2. However, disassembly of control method contents walk the parse
8909 + * tree with ACPI_PARSE_LOAD_PASS1 and AML_CREATE operators are processed
8910 + * in a later walk. This is a problem when there is a control method that
8911 + * has the same name as the AML_CREATE object. In this case, any use of the
8912 + * name segment will be detected as a method call rather than a reference
8913 + * to a buffer field.
8914 + *
8915 + * This earlier creation during disassembly solves this issue by inserting
8916 + * the named object in the ACPI namespace so that references to this name
8917 + * would be a name string rather than a method call.
8918 + */
8919 + if ((walk_state->parse_flags & ACPI_PARSE_DISASSEMBLE) &&
8920 + (walk_state->op_info->flags & AML_CREATE)) {
8921 + status = acpi_ds_create_buffer_field(op, walk_state);
8922 + return_ACPI_STATUS(status);
8923 + }
8924 +
8925 /* We are only interested in opcodes that have an associated name */
8926
8927 if (!(walk_state->op_info->flags & (AML_NAMED | AML_FIELD))) {
8928 diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
8929 index 5408a292078b..89e62043d02e 100644
8930 --- a/drivers/ata/ahci.c
8931 +++ b/drivers/ata/ahci.c
8932 @@ -86,6 +86,7 @@ enum board_ids {
8933
8934 static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
8935 static void ahci_remove_one(struct pci_dev *dev);
8936 +static void ahci_shutdown_one(struct pci_dev *dev);
8937 static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class,
8938 unsigned long deadline);
8939 static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class,
8940 @@ -582,6 +583,7 @@ static struct pci_driver ahci_pci_driver = {
8941 .id_table = ahci_pci_tbl,
8942 .probe = ahci_init_one,
8943 .remove = ahci_remove_one,
8944 + .shutdown = ahci_shutdown_one,
8945 .driver = {
8946 .pm = &ahci_pci_pm_ops,
8947 },
8948 @@ -1775,6 +1777,11 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
8949 return 0;
8950 }
8951
8952 +static void ahci_shutdown_one(struct pci_dev *pdev)
8953 +{
8954 + ata_pci_shutdown_one(pdev);
8955 +}
8956 +
8957 static void ahci_remove_one(struct pci_dev *pdev)
8958 {
8959 pm_runtime_get_noresume(&pdev->dev);
8960 diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
8961 index b1582f161171..ba0cffbd0bb6 100644
8962 --- a/drivers/ata/libata-core.c
8963 +++ b/drivers/ata/libata-core.c
8964 @@ -6580,6 +6580,26 @@ void ata_pci_remove_one(struct pci_dev *pdev)
8965 ata_host_detach(host);
8966 }
8967
8968 +void ata_pci_shutdown_one(struct pci_dev *pdev)
8969 +{
8970 + struct ata_host *host = pci_get_drvdata(pdev);
8971 + int i;
8972 +
8973 + for (i = 0; i < host->n_ports; i++) {
8974 + struct ata_port *ap = host->ports[i];
8975 +
8976 + ap->pflags |= ATA_PFLAG_FROZEN;
8977 +
8978 + /* Disable port interrupts */
8979 + if (ap->ops->freeze)
8980 + ap->ops->freeze(ap);
8981 +
8982 + /* Stop the port DMA engines */
8983 + if (ap->ops->port_stop)
8984 + ap->ops->port_stop(ap);
8985 + }
8986 +}
8987 +
8988 /* move to PCI subsystem */
8989 int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits)
8990 {
8991 @@ -7200,6 +7220,7 @@ EXPORT_SYMBOL_GPL(ata_timing_cycle2mode);
8992
8993 #ifdef CONFIG_PCI
8994 EXPORT_SYMBOL_GPL(pci_test_config_bits);
8995 +EXPORT_SYMBOL_GPL(ata_pci_shutdown_one);
8996 EXPORT_SYMBOL_GPL(ata_pci_remove_one);
8997 #ifdef CONFIG_PM
8998 EXPORT_SYMBOL_GPL(ata_pci_device_do_suspend);
8999 diff --git a/drivers/base/dd.c b/drivers/base/dd.c
9000 index ee25a69630c3..854d218ea76a 100644
9001 --- a/drivers/base/dd.c
9002 +++ b/drivers/base/dd.c
9003 @@ -341,7 +341,10 @@ static int really_probe(struct device *dev, struct device_driver *drv)
9004 atomic_inc(&probe_count);
9005 pr_debug("bus: '%s': %s: probing driver %s with device %s\n",
9006 drv->bus->name, __func__, drv->name, dev_name(dev));
9007 - WARN_ON(!list_empty(&dev->devres_head));
9008 + if (!list_empty(&dev->devres_head)) {
9009 + dev_crit(dev, "Resources present before probing\n");
9010 + return -EBUSY;
9011 + }
9012
9013 re_probe:
9014 dev->driver = drv;
9015 diff --git a/drivers/base/platform.c b/drivers/base/platform.c
9016 index f90b1b9bbad0..bef299ef6227 100644
9017 --- a/drivers/base/platform.c
9018 +++ b/drivers/base/platform.c
9019 @@ -28,6 +28,7 @@
9020 #include <linux/limits.h>
9021 #include <linux/property.h>
9022 #include <linux/kmemleak.h>
9023 +#include <linux/types.h>
9024
9025 #include "base.h"
9026 #include "power/power.h"
9027 @@ -68,7 +69,7 @@ void __weak arch_setup_pdev_archdata(struct platform_device *pdev)
9028 struct resource *platform_get_resource(struct platform_device *dev,
9029 unsigned int type, unsigned int num)
9030 {
9031 - int i;
9032 + u32 i;
9033
9034 for (i = 0; i < dev->num_resources; i++) {
9035 struct resource *r = &dev->resource[i];
9036 @@ -153,7 +154,7 @@ struct resource *platform_get_resource_byname(struct platform_device *dev,
9037 unsigned int type,
9038 const char *name)
9039 {
9040 - int i;
9041 + u32 i;
9042
9043 for (i = 0; i < dev->num_resources; i++) {
9044 struct resource *r = &dev->resource[i];
9045 @@ -350,7 +351,8 @@ EXPORT_SYMBOL_GPL(platform_device_add_properties);
9046 */
9047 int platform_device_add(struct platform_device *pdev)
9048 {
9049 - int i, ret;
9050 + u32 i;
9051 + int ret;
9052
9053 if (!pdev)
9054 return -EINVAL;
9055 @@ -416,7 +418,7 @@ int platform_device_add(struct platform_device *pdev)
9056 pdev->id = PLATFORM_DEVID_AUTO;
9057 }
9058
9059 - while (--i >= 0) {
9060 + while (i--) {
9061 struct resource *r = &pdev->resource[i];
9062 if (r->parent)
9063 release_resource(r);
9064 @@ -437,7 +439,7 @@ EXPORT_SYMBOL_GPL(platform_device_add);
9065 */
9066 void platform_device_del(struct platform_device *pdev)
9067 {
9068 - int i;
9069 + u32 i;
9070
9071 if (pdev) {
9072 device_remove_properties(&pdev->dev);
9073 diff --git a/drivers/block/brd.c b/drivers/block/brd.c
9074 index 0c76d4016eeb..7e35574a17df 100644
9075 --- a/drivers/block/brd.c
9076 +++ b/drivers/block/brd.c
9077 @@ -581,6 +581,25 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
9078 return kobj;
9079 }
9080
9081 +static inline void brd_check_and_reset_par(void)
9082 +{
9083 + if (unlikely(!max_part))
9084 + max_part = 1;
9085 +
9086 + /*
9087 + * make sure 'max_part' can be divided exactly by (1U << MINORBITS),
9088 + * otherwise, it is possiable to get same dev_t when adding partitions.
9089 + */
9090 + if ((1U << MINORBITS) % max_part != 0)
9091 + max_part = 1UL << fls(max_part);
9092 +
9093 + if (max_part > DISK_MAX_PARTS) {
9094 + pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n",
9095 + DISK_MAX_PARTS, DISK_MAX_PARTS);
9096 + max_part = DISK_MAX_PARTS;
9097 + }
9098 +}
9099 +
9100 static int __init brd_init(void)
9101 {
9102 struct brd_device *brd, *next;
9103 @@ -604,8 +623,7 @@ static int __init brd_init(void)
9104 if (register_blkdev(RAMDISK_MAJOR, "ramdisk"))
9105 return -EIO;
9106
9107 - if (unlikely(!max_part))
9108 - max_part = 1;
9109 + brd_check_and_reset_par();
9110
9111 for (i = 0; i < rd_nr; i++) {
9112 brd = brd_alloc(i);
9113 diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
9114 index ece4f706b38f..4496e7a49235 100644
9115 --- a/drivers/block/floppy.c
9116 +++ b/drivers/block/floppy.c
9117 @@ -848,14 +848,17 @@ static void reset_fdc_info(int mode)
9118 /* selects the fdc and drive, and enables the fdc's input/dma. */
9119 static void set_fdc(int drive)
9120 {
9121 + unsigned int new_fdc = fdc;
9122 +
9123 if (drive >= 0 && drive < N_DRIVE) {
9124 - fdc = FDC(drive);
9125 + new_fdc = FDC(drive);
9126 current_drive = drive;
9127 }
9128 - if (fdc != 1 && fdc != 0) {
9129 + if (new_fdc >= N_FDC) {
9130 pr_info("bad fdc value\n");
9131 return;
9132 }
9133 + fdc = new_fdc;
9134 set_dor(fdc, ~0, 8);
9135 #if N_FDC > 1
9136 set_dor(1 - fdc, ~8, 0);
9137 diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c
9138 index 0ae1b0a66eb5..d8601b138dc1 100644
9139 --- a/drivers/clk/qcom/clk-rcg2.c
9140 +++ b/drivers/clk/qcom/clk-rcg2.c
9141 @@ -194,6 +194,9 @@ static int _freq_tbl_determine_rate(struct clk_hw *hw,
9142
9143 clk_flags = clk_hw_get_flags(hw);
9144 p = clk_hw_get_parent_by_index(hw, index);
9145 + if (!p)
9146 + return -EINVAL;
9147 +
9148 if (clk_flags & CLK_SET_RATE_PARENT) {
9149 if (f->pre_div) {
9150 if (!rate)
9151 diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
9152 index 41254e702f1e..2ce7cc94d78b 100644
9153 --- a/drivers/devfreq/Kconfig
9154 +++ b/drivers/devfreq/Kconfig
9155 @@ -102,7 +102,8 @@ config ARM_TEGRA_DEVFREQ
9156
9157 config ARM_RK3399_DMC_DEVFREQ
9158 tristate "ARM RK3399 DMC DEVFREQ Driver"
9159 - depends on ARCH_ROCKCHIP
9160 + depends on (ARCH_ROCKCHIP && HAVE_ARM_SMCCC) || \
9161 + (COMPILE_TEST && HAVE_ARM_SMCCC)
9162 select DEVFREQ_EVENT_ROCKCHIP_DFI
9163 select DEVFREQ_GOV_SIMPLE_ONDEMAND
9164 select PM_DEVFREQ_EVENT
9165 diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig
9166 index cd949800eed9..8851bc4e8e3e 100644
9167 --- a/drivers/devfreq/event/Kconfig
9168 +++ b/drivers/devfreq/event/Kconfig
9169 @@ -33,7 +33,7 @@ config DEVFREQ_EVENT_EXYNOS_PPMU
9170
9171 config DEVFREQ_EVENT_ROCKCHIP_DFI
9172 tristate "ROCKCHIP DFI DEVFREQ event Driver"
9173 - depends on ARCH_ROCKCHIP
9174 + depends on ARCH_ROCKCHIP || COMPILE_TEST
9175 help
9176 This add the devfreq-event driver for Rockchip SoC. It provides DFI
9177 (DDR Monitor Module) driver to count ddr load.
9178 diff --git a/drivers/gpio/gpio-grgpio.c b/drivers/gpio/gpio-grgpio.c
9179 index 7847dd34f86f..036a78b70427 100644
9180 --- a/drivers/gpio/gpio-grgpio.c
9181 +++ b/drivers/gpio/gpio-grgpio.c
9182 @@ -259,17 +259,16 @@ static int grgpio_irq_map(struct irq_domain *d, unsigned int irq,
9183 lirq->irq = irq;
9184 uirq = &priv->uirqs[lirq->index];
9185 if (uirq->refcnt == 0) {
9186 + spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
9187 ret = request_irq(uirq->uirq, grgpio_irq_handler, 0,
9188 dev_name(priv->dev), priv);
9189 if (ret) {
9190 dev_err(priv->dev,
9191 "Could not request underlying irq %d\n",
9192 uirq->uirq);
9193 -
9194 - spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
9195 -
9196 return ret;
9197 }
9198 + spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
9199 }
9200 uirq->refcnt++;
9201
9202 @@ -315,8 +314,11 @@ static void grgpio_irq_unmap(struct irq_domain *d, unsigned int irq)
9203 if (index >= 0) {
9204 uirq = &priv->uirqs[lirq->index];
9205 uirq->refcnt--;
9206 - if (uirq->refcnt == 0)
9207 + if (uirq->refcnt == 0) {
9208 + spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
9209 free_irq(uirq->uirq, priv);
9210 + return;
9211 + }
9212 }
9213
9214 spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
9215 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
9216 index 26afdffab5a0..ac8885562919 100644
9217 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
9218 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
9219 @@ -336,17 +336,9 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *
9220 path_size += le16_to_cpu(path->usSize);
9221
9222 if (device_support & le16_to_cpu(path->usDeviceTag)) {
9223 - uint8_t con_obj_id, con_obj_num, con_obj_type;
9224 -
9225 - con_obj_id =
9226 + uint8_t con_obj_id =
9227 (le16_to_cpu(path->usConnObjectId) & OBJECT_ID_MASK)
9228 >> OBJECT_ID_SHIFT;
9229 - con_obj_num =
9230 - (le16_to_cpu(path->usConnObjectId) & ENUM_ID_MASK)
9231 - >> ENUM_ID_SHIFT;
9232 - con_obj_type =
9233 - (le16_to_cpu(path->usConnObjectId) &
9234 - OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
9235
9236 /* Skip TV/CV support */
9237 if ((le16_to_cpu(path->usDeviceTag) ==
9238 @@ -371,14 +363,7 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *
9239 router.ddc_valid = false;
9240 router.cd_valid = false;
9241 for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2); j++) {
9242 - uint8_t grph_obj_id, grph_obj_num, grph_obj_type;
9243 -
9244 - grph_obj_id =
9245 - (le16_to_cpu(path->usGraphicObjIds[j]) &
9246 - OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
9247 - grph_obj_num =
9248 - (le16_to_cpu(path->usGraphicObjIds[j]) &
9249 - ENUM_ID_MASK) >> ENUM_ID_SHIFT;
9250 + uint8_t grph_obj_type=
9251 grph_obj_type =
9252 (le16_to_cpu(path->usGraphicObjIds[j]) &
9253 OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
9254 diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
9255 index 3a44e705db53..d224fc12b757 100644
9256 --- a/drivers/gpu/drm/gma500/framebuffer.c
9257 +++ b/drivers/gpu/drm/gma500/framebuffer.c
9258 @@ -516,6 +516,7 @@ static int psbfb_probe(struct drm_fb_helper *helper,
9259 container_of(helper, struct psb_fbdev, psb_fb_helper);
9260 struct drm_device *dev = psb_fbdev->psb_fb_helper.dev;
9261 struct drm_psb_private *dev_priv = dev->dev_private;
9262 + unsigned int fb_size;
9263 int bytespp;
9264
9265 bytespp = sizes->surface_bpp / 8;
9266 @@ -525,8 +526,11 @@ static int psbfb_probe(struct drm_fb_helper *helper,
9267 /* If the mode will not fit in 32bit then switch to 16bit to get
9268 a console on full resolution. The X mode setting server will
9269 allocate its own 32bit GEM framebuffer */
9270 - if (ALIGN(sizes->fb_width * bytespp, 64) * sizes->fb_height >
9271 - dev_priv->vram_stolen_size) {
9272 + fb_size = ALIGN(sizes->surface_width * bytespp, 64) *
9273 + sizes->surface_height;
9274 + fb_size = ALIGN(fb_size, PAGE_SIZE);
9275 +
9276 + if (fb_size > dev_priv->vram_stolen_size) {
9277 sizes->surface_bpp = 16;
9278 sizes->surface_depth = 16;
9279 }
9280 diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
9281 index 01a21dd835b5..1ed60da76a0c 100644
9282 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
9283 +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
9284 @@ -306,6 +306,7 @@ err_pm_runtime_put:
9285 static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc)
9286 {
9287 struct drm_device *drm = mtk_crtc->base.dev;
9288 + struct drm_crtc *crtc = &mtk_crtc->base;
9289 int i;
9290
9291 DRM_DEBUG_DRIVER("%s\n", __func__);
9292 @@ -327,6 +328,13 @@ static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc)
9293 mtk_disp_mutex_unprepare(mtk_crtc->mutex);
9294
9295 pm_runtime_put(drm->dev);
9296 +
9297 + if (crtc->state->event && !crtc->state->active) {
9298 + spin_lock_irq(&crtc->dev->event_lock);
9299 + drm_crtc_send_vblank_event(crtc, crtc->state->event);
9300 + crtc->state->event = NULL;
9301 + spin_unlock_irq(&crtc->dev->event_lock);
9302 + }
9303 }
9304
9305 static void mtk_drm_crtc_enable(struct drm_crtc *crtc)
9306 diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
9307 index 4bb9ab892ae1..78e521d00251 100644
9308 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
9309 +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
9310 @@ -158,7 +158,7 @@ nouveau_fence_wait_uevent_handler(struct nvif_notify *notify)
9311
9312 fence = list_entry(fctx->pending.next, typeof(*fence), head);
9313 chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock));
9314 - if (nouveau_fence_update(fence->channel, fctx))
9315 + if (nouveau_fence_update(chan, fctx))
9316 ret = NVIF_NOTIFY_DROP;
9317 }
9318 spin_unlock_irqrestore(&fctx->lock, flags);
9319 diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c
9320 index 9d90d8b4b7e6..f5a8db1bb8b7 100644
9321 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c
9322 +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c
9323 @@ -72,6 +72,8 @@ nv50_disp_chan_mthd(struct nv50_disp_chan *chan, int debug)
9324
9325 if (debug > subdev->debug)
9326 return;
9327 + if (!mthd)
9328 + return;
9329
9330 for (i = 0; (list = mthd->data[i].mthd) != NULL; i++) {
9331 u32 base = chan->head * mthd->addr;
9332 diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
9333 index de8b806b88fd..7618b2eb4fdf 100644
9334 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
9335 +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
9336 @@ -143,23 +143,24 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *fw_name,
9337
9338 nent = (fuc.size / sizeof(struct gk20a_fw_av));
9339
9340 - pack = vzalloc((sizeof(*pack) * max_classes) +
9341 - (sizeof(*init) * (nent + 1)));
9342 + pack = vzalloc((sizeof(*pack) * (max_classes + 1)) +
9343 + (sizeof(*init) * (nent + max_classes + 1)));
9344 if (!pack) {
9345 ret = -ENOMEM;
9346 goto end;
9347 }
9348
9349 - init = (void *)(pack + max_classes);
9350 + init = (void *)(pack + max_classes + 1);
9351
9352 - for (i = 0; i < nent; i++) {
9353 - struct gf100_gr_init *ent = &init[i];
9354 + for (i = 0; i < nent; i++, init++) {
9355 struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc.data)[i];
9356 u32 class = av->addr & 0xffff;
9357 u32 addr = (av->addr & 0xffff0000) >> 14;
9358
9359 if (prevclass != class) {
9360 - pack[classidx].init = ent;
9361 + if (prevclass) /* Add terminator to the method list. */
9362 + init++;
9363 + pack[classidx].init = init;
9364 pack[classidx].type = class;
9365 prevclass = class;
9366 if (++classidx >= max_classes) {
9367 @@ -169,10 +170,10 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *fw_name,
9368 }
9369 }
9370
9371 - ent->addr = addr;
9372 - ent->data = av->data;
9373 - ent->count = 1;
9374 - ent->pitch = 1;
9375 + init->addr = addr;
9376 + init->data = av->data;
9377 + init->count = 1;
9378 + init->pitch = 1;
9379 }
9380
9381 *ppack = pack;
9382 diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
9383 index 8b6f8aa23806..432ad7d73cb9 100644
9384 --- a/drivers/gpu/drm/radeon/radeon_display.c
9385 +++ b/drivers/gpu/drm/radeon/radeon_display.c
9386 @@ -110,6 +110,8 @@ static void dce5_crtc_load_lut(struct drm_crtc *crtc)
9387
9388 DRM_DEBUG_KMS("%d\n", radeon_crtc->crtc_id);
9389
9390 + msleep(10);
9391 +
9392 WREG32(NI_INPUT_CSC_CONTROL + radeon_crtc->crtc_offset,
9393 (NI_INPUT_CSC_GRPH_MODE(NI_INPUT_CSC_BYPASS) |
9394 NI_INPUT_CSC_OVL_MODE(NI_INPUT_CSC_BYPASS)));
9395 diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
9396 index 1f013d45c9e9..0c7c3005594c 100644
9397 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
9398 +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
9399 @@ -210,8 +210,10 @@ int vmw_cmdbuf_res_add(struct vmw_cmdbuf_res_manager *man,
9400
9401 cres->hash.key = user_key | (res_type << 24);
9402 ret = drm_ht_insert_item(&man->resources, &cres->hash);
9403 - if (unlikely(ret != 0))
9404 + if (unlikely(ret != 0)) {
9405 + kfree(cres);
9406 goto out_invalid_key;
9407 + }
9408
9409 cres->state = VMW_CMDBUF_RES_ADD;
9410 cres->res = vmw_resource_reference(res);
9411 diff --git a/drivers/hwmon/pmbus/ltc2978.c b/drivers/hwmon/pmbus/ltc2978.c
9412 index 58b789c28b48..94eea2ac6251 100644
9413 --- a/drivers/hwmon/pmbus/ltc2978.c
9414 +++ b/drivers/hwmon/pmbus/ltc2978.c
9415 @@ -89,8 +89,8 @@ enum chips { ltc2974, ltc2975, ltc2977, ltc2978, ltc2980, ltc3880, ltc3882,
9416
9417 #define LTC_POLL_TIMEOUT 100 /* in milli-seconds */
9418
9419 -#define LTC_NOT_BUSY BIT(5)
9420 -#define LTC_NOT_PENDING BIT(4)
9421 +#define LTC_NOT_BUSY BIT(6)
9422 +#define LTC_NOT_PENDING BIT(5)
9423
9424 /*
9425 * LTC2978 clears peak data whenever the CLEAR_FAULTS command is executed, which
9426 diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c
9427 index b127ed60c733..9dde8390da09 100644
9428 --- a/drivers/ide/cmd64x.c
9429 +++ b/drivers/ide/cmd64x.c
9430 @@ -65,6 +65,9 @@ static void cmd64x_program_timings(ide_drive_t *drive, u8 mode)
9431 struct ide_timing t;
9432 u8 arttim = 0;
9433
9434 + if (drive->dn >= ARRAY_SIZE(drwtim_regs))
9435 + return;
9436 +
9437 ide_timing_compute(drive, mode, &t, T, 0);
9438
9439 /*
9440 diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c
9441 index a97affca18ab..0f57d45484d1 100644
9442 --- a/drivers/ide/serverworks.c
9443 +++ b/drivers/ide/serverworks.c
9444 @@ -114,6 +114,9 @@ static void svwks_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
9445 struct pci_dev *dev = to_pci_dev(hwif->dev);
9446 const u8 pio = drive->pio_mode - XFER_PIO_0;
9447
9448 + if (drive->dn >= ARRAY_SIZE(drive_pci))
9449 + return;
9450 +
9451 pci_write_config_byte(dev, drive_pci[drive->dn], pio_modes[pio]);
9452
9453 if (svwks_csb_check(dev)) {
9454 @@ -140,6 +143,9 @@ static void svwks_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
9455
9456 u8 ultra_enable = 0, ultra_timing = 0, dma_timing = 0;
9457
9458 + if (drive->dn >= ARRAY_SIZE(drive_pci2))
9459 + return;
9460 +
9461 pci_read_config_byte(dev, (0x56|hwif->channel), &ultra_timing);
9462 pci_read_config_byte(dev, 0x54, &ultra_enable);
9463
9464 diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
9465 index 47003d2a4a46..dee3853163b6 100644
9466 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
9467 +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
9468 @@ -422,7 +422,7 @@ struct rxe_dev {
9469 struct list_head pending_mmaps;
9470
9471 spinlock_t mmap_offset_lock; /* guard mmap_offset */
9472 - int mmap_offset;
9473 + u64 mmap_offset;
9474
9475 struct rxe_port port;
9476 struct list_head list;
9477 diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
9478 index 0d2ab9a2cf44..02a5e2d7e574 100644
9479 --- a/drivers/infiniband/ulp/isert/ib_isert.c
9480 +++ b/drivers/infiniband/ulp/isert/ib_isert.c
9481 @@ -2555,6 +2555,17 @@ isert_wait4logout(struct isert_conn *isert_conn)
9482 }
9483 }
9484
9485 +static void
9486 +isert_wait4cmds(struct iscsi_conn *conn)
9487 +{
9488 + isert_info("iscsi_conn %p\n", conn);
9489 +
9490 + if (conn->sess) {
9491 + target_sess_cmd_list_set_waiting(conn->sess->se_sess);
9492 + target_wait_for_sess_cmds(conn->sess->se_sess);
9493 + }
9494 +}
9495 +
9496 /**
9497 * isert_put_unsol_pending_cmds() - Drop commands waiting for
9498 * unsolicitate dataout
9499 @@ -2602,6 +2613,7 @@ static void isert_wait_conn(struct iscsi_conn *conn)
9500
9501 ib_drain_qp(isert_conn->qp);
9502 isert_put_unsol_pending_cmds(conn);
9503 + isert_wait4cmds(conn);
9504 isert_wait4logout(isert_conn);
9505
9506 queue_work(isert_release_wq, &isert_conn->release_work);
9507 diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c
9508 index 28466e358fee..22c8d2070faa 100644
9509 --- a/drivers/input/touchscreen/edt-ft5x06.c
9510 +++ b/drivers/input/touchscreen/edt-ft5x06.c
9511 @@ -887,6 +887,7 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client,
9512 {
9513 const struct edt_i2c_chip_data *chip_data;
9514 struct edt_ft5x06_ts_data *tsdata;
9515 + u8 buf[2] = { 0xfc, 0x00 };
9516 struct input_dev *input;
9517 unsigned long irq_flags;
9518 int error;
9519 @@ -956,6 +957,12 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client,
9520 return error;
9521 }
9522
9523 + /*
9524 + * Dummy read access. EP0700MLP1 returns bogus data on the first
9525 + * register read access and ignores writes.
9526 + */
9527 + edt_ft5x06_ts_readwrite(tsdata->client, 2, buf, 2, buf);
9528 +
9529 edt_ft5x06_ts_set_regs(tsdata);
9530 edt_ft5x06_ts_get_defaults(&client->dev, tsdata);
9531 edt_ft5x06_ts_get_parameters(tsdata);
9532 diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
9533 index 7bd98585d78d..48d382008788 100644
9534 --- a/drivers/iommu/arm-smmu-v3.c
9535 +++ b/drivers/iommu/arm-smmu-v3.c
9536 @@ -1103,7 +1103,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
9537 }
9538
9539 arm_smmu_sync_ste_for_sid(smmu, sid);
9540 - dst[0] = cpu_to_le64(val);
9541 + /* See comment in arm_smmu_write_ctx_desc() */
9542 + WRITE_ONCE(dst[0], cpu_to_le64(val));
9543 arm_smmu_sync_ste_for_sid(smmu, sid);
9544
9545 /* It's likely that we'll want to use the new STE soon */
9546 diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
9547 index 0c0cd2768d6e..d1efbb8dadc5 100644
9548 --- a/drivers/irqchip/irq-gic-v3-its.c
9549 +++ b/drivers/irqchip/irq-gic-v3-its.c
9550 @@ -365,7 +365,7 @@ static struct its_collection *its_build_invall_cmd(struct its_cmd_block *cmd,
9551 struct its_cmd_desc *desc)
9552 {
9553 its_encode_cmd(cmd, GITS_CMD_INVALL);
9554 - its_encode_collection(cmd, desc->its_mapc_cmd.col->col_id);
9555 + its_encode_collection(cmd, desc->its_invall_cmd.col->col_id);
9556
9557 its_fixup_cmd(cmd);
9558
9559 diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
9560 index f7b8681aed3f..2ab6060031a4 100644
9561 --- a/drivers/irqchip/irq-gic-v3.c
9562 +++ b/drivers/irqchip/irq-gic-v3.c
9563 @@ -1195,6 +1195,7 @@ static struct
9564 struct redist_region *redist_regs;
9565 u32 nr_redist_regions;
9566 bool single_redist;
9567 + int enabled_rdists;
9568 u32 maint_irq;
9569 int maint_irq_mode;
9570 phys_addr_t vcpu_base;
9571 @@ -1289,8 +1290,10 @@ static int __init gic_acpi_match_gicc(struct acpi_subtable_header *header,
9572 * If GICC is enabled and has valid gicr base address, then it means
9573 * GICR base is presented via GICC
9574 */
9575 - if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address)
9576 + if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) {
9577 + acpi_data.enabled_rdists++;
9578 return 0;
9579 + }
9580
9581 /*
9582 * It's perfectly valid firmware can pass disabled GICC entry, driver
9583 @@ -1320,8 +1323,10 @@ static int __init gic_acpi_count_gicr_regions(void)
9584
9585 count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
9586 gic_acpi_match_gicc, 0);
9587 - if (count > 0)
9588 + if (count > 0) {
9589 acpi_data.single_redist = true;
9590 + count = acpi_data.enabled_rdists;
9591 + }
9592
9593 return count;
9594 }
9595 diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
9596 index b935839ab79c..f483041eed98 100644
9597 --- a/drivers/md/bcache/bset.h
9598 +++ b/drivers/md/bcache/bset.h
9599 @@ -380,7 +380,8 @@ void bch_btree_keys_stats(struct btree_keys *, struct bset_stats *);
9600
9601 /* Bkey utility code */
9602
9603 -#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys)
9604 +#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, \
9605 + (unsigned int)(i)->keys)
9606
9607 static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx)
9608 {
9609 diff --git a/drivers/media/i2c/mt9v032.c b/drivers/media/i2c/mt9v032.c
9610 index 58eb62f1ba21..a018a76662df 100644
9611 --- a/drivers/media/i2c/mt9v032.c
9612 +++ b/drivers/media/i2c/mt9v032.c
9613 @@ -423,10 +423,12 @@ static int mt9v032_enum_mbus_code(struct v4l2_subdev *subdev,
9614 struct v4l2_subdev_pad_config *cfg,
9615 struct v4l2_subdev_mbus_code_enum *code)
9616 {
9617 + struct mt9v032 *mt9v032 = to_mt9v032(subdev);
9618 +
9619 if (code->index > 0)
9620 return -EINVAL;
9621
9622 - code->code = MEDIA_BUS_FMT_SGRBG10_1X10;
9623 + code->code = mt9v032->format.code;
9624 return 0;
9625 }
9626
9627 @@ -434,7 +436,11 @@ static int mt9v032_enum_frame_size(struct v4l2_subdev *subdev,
9628 struct v4l2_subdev_pad_config *cfg,
9629 struct v4l2_subdev_frame_size_enum *fse)
9630 {
9631 - if (fse->index >= 3 || fse->code != MEDIA_BUS_FMT_SGRBG10_1X10)
9632 + struct mt9v032 *mt9v032 = to_mt9v032(subdev);
9633 +
9634 + if (fse->index >= 3)
9635 + return -EINVAL;
9636 + if (mt9v032->format.code != fse->code)
9637 return -EINVAL;
9638
9639 fse->min_width = MT9V032_WINDOW_WIDTH_DEF / (1 << fse->index);
9640 diff --git a/drivers/media/platform/sti/bdisp/bdisp-hw.c b/drivers/media/platform/sti/bdisp/bdisp-hw.c
9641 index b7892f3efd98..5c4c3f0c57be 100644
9642 --- a/drivers/media/platform/sti/bdisp/bdisp-hw.c
9643 +++ b/drivers/media/platform/sti/bdisp/bdisp-hw.c
9644 @@ -14,8 +14,8 @@
9645 #define MAX_SRC_WIDTH 2048
9646
9647 /* Reset & boot poll config */
9648 -#define POLL_RST_MAX 50
9649 -#define POLL_RST_DELAY_MS 20
9650 +#define POLL_RST_MAX 500
9651 +#define POLL_RST_DELAY_MS 2
9652
9653 enum bdisp_target_plan {
9654 BDISP_RGB,
9655 @@ -382,7 +382,7 @@ int bdisp_hw_reset(struct bdisp_dev *bdisp)
9656 for (i = 0; i < POLL_RST_MAX; i++) {
9657 if (readl(bdisp->regs + BLT_STA1) & BLT_STA1_IDLE)
9658 break;
9659 - msleep(POLL_RST_DELAY_MS);
9660 + udelay(POLL_RST_DELAY_MS * 1000);
9661 }
9662 if (i == POLL_RST_MAX)
9663 dev_err(bdisp->dev, "Reset timeout\n");
9664 diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
9665 index b73d9ba9496c..96290b83dfde 100644
9666 --- a/drivers/net/ethernet/cisco/enic/enic_main.c
9667 +++ b/drivers/net/ethernet/cisco/enic/enic_main.c
9668 @@ -1806,10 +1806,10 @@ static int enic_stop(struct net_device *netdev)
9669 }
9670
9671 netif_carrier_off(netdev);
9672 - netif_tx_disable(netdev);
9673 if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX)
9674 for (i = 0; i < enic->wq_count; i++)
9675 napi_disable(&enic->napi[enic_cq_wq(enic, i)]);
9676 + netif_tx_disable(netdev);
9677
9678 if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic))
9679 enic_dev_del_station_addr(enic);
9680 diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
9681 index 60bd1b36df60..b665d27f8e29 100644
9682 --- a/drivers/net/ethernet/freescale/gianfar.c
9683 +++ b/drivers/net/ethernet/freescale/gianfar.c
9684 @@ -2688,13 +2688,17 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
9685 skb_dirtytx = tx_queue->skb_dirtytx;
9686
9687 while ((skb = tx_queue->tx_skbuff[skb_dirtytx])) {
9688 + bool do_tstamp;
9689 +
9690 + do_tstamp = (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
9691 + priv->hwts_tx_en;
9692
9693 frags = skb_shinfo(skb)->nr_frags;
9694
9695 /* When time stamping, one additional TxBD must be freed.
9696 * Also, we need to dma_unmap_single() the TxPAL.
9697 */
9698 - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
9699 + if (unlikely(do_tstamp))
9700 nr_txbds = frags + 2;
9701 else
9702 nr_txbds = frags + 1;
9703 @@ -2708,7 +2712,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
9704 (lstatus & BD_LENGTH_MASK))
9705 break;
9706
9707 - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
9708 + if (unlikely(do_tstamp)) {
9709 next = next_txbd(bdp, base, tx_ring_size);
9710 buflen = be16_to_cpu(next->length) +
9711 GMAC_FCB_LEN + GMAC_TXPAL_LEN;
9712 @@ -2718,7 +2722,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
9713 dma_unmap_single(priv->dev, be32_to_cpu(bdp->bufPtr),
9714 buflen, DMA_TO_DEVICE);
9715
9716 - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
9717 + if (unlikely(do_tstamp)) {
9718 struct skb_shared_hwtstamps shhwtstamps;
9719 u64 *ns = (u64 *)(((uintptr_t)skb->data + 0x10) &
9720 ~0x7UL);
9721 diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
9722 index af85a1b3135e..87bf05a81db5 100644
9723 --- a/drivers/net/wan/fsl_ucc_hdlc.c
9724 +++ b/drivers/net/wan/fsl_ucc_hdlc.c
9725 @@ -209,6 +209,11 @@ static int uhdlc_init(struct ucc_hdlc_private *priv)
9726 ret = -ENOMEM;
9727 goto free_riptr;
9728 }
9729 + if (riptr != (u16)riptr || tiptr != (u16)tiptr) {
9730 + dev_err(priv->dev, "MURAM allocation out of addressable range\n");
9731 + ret = -ENOMEM;
9732 + goto free_tiptr;
9733 + }
9734
9735 /* Set RIPTR, TIPTR */
9736 iowrite16be(riptr, &priv->ucc_pram->riptr);
9737 diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c
9738 index e7bbdb7af53a..97968e6a6a4e 100644
9739 --- a/drivers/net/wan/ixp4xx_hss.c
9740 +++ b/drivers/net/wan/ixp4xx_hss.c
9741 @@ -261,7 +261,7 @@ struct port {
9742 struct hss_plat_info *plat;
9743 buffer_t *rx_buff_tab[RX_DESCS], *tx_buff_tab[TX_DESCS];
9744 struct desc *desc_tab; /* coherent */
9745 - u32 desc_tab_phys;
9746 + dma_addr_t desc_tab_phys;
9747 unsigned int id;
9748 unsigned int clock_type, clock_rate, loopback;
9749 unsigned int initialized, carrier;
9750 @@ -861,7 +861,7 @@ static int hss_hdlc_xmit(struct sk_buff *skb, struct net_device *dev)
9751 dev->stats.tx_dropped++;
9752 return NETDEV_TX_OK;
9753 }
9754 - memcpy_swab32(mem, (u32 *)((int)skb->data & ~3), bytes / 4);
9755 + memcpy_swab32(mem, (u32 *)((uintptr_t)skb->data & ~3), bytes / 4);
9756 dev_kfree_skb(skb);
9757 #endif
9758
9759 diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c
9760 index 83770d2ea057..9da8bd792702 100644
9761 --- a/drivers/net/wireless/broadcom/b43legacy/main.c
9762 +++ b/drivers/net/wireless/broadcom/b43legacy/main.c
9763 @@ -1304,8 +1304,9 @@ static void handle_irq_ucode_debug(struct b43legacy_wldev *dev)
9764 }
9765
9766 /* Interrupt handler bottom-half */
9767 -static void b43legacy_interrupt_tasklet(struct b43legacy_wldev *dev)
9768 +static void b43legacy_interrupt_tasklet(unsigned long data)
9769 {
9770 + struct b43legacy_wldev *dev = (struct b43legacy_wldev *)data;
9771 u32 reason;
9772 u32 dma_reason[ARRAY_SIZE(dev->dma_reason)];
9773 u32 merged_dma_reason = 0;
9774 @@ -3775,7 +3776,7 @@ static int b43legacy_one_core_attach(struct ssb_device *dev,
9775 b43legacy_set_status(wldev, B43legacy_STAT_UNINIT);
9776 wldev->bad_frames_preempt = modparam_bad_frames_preempt;
9777 tasklet_init(&wldev->isr_tasklet,
9778 - (void (*)(unsigned long))b43legacy_interrupt_tasklet,
9779 + b43legacy_interrupt_tasklet,
9780 (unsigned long)wldev);
9781 if (modparam_pio)
9782 wldev->__using_pio = true;
9783 diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
9784 index de52d826eb24..998a4bd6db78 100644
9785 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
9786 +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
9787 @@ -1921,6 +1921,7 @@ static uint brcmf_sdio_readframes(struct brcmf_sdio *bus, uint maxframes)
9788 BRCMF_SDIO_FT_NORMAL)) {
9789 rd->len = 0;
9790 brcmu_pkt_buf_free_skb(pkt);
9791 + continue;
9792 }
9793 bus->sdcnt.rx_readahead_cnt++;
9794 if (rd->len != roundup(rd_new.len, 16)) {
9795 diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
9796 index bfa542c8d6f1..86c84b11218d 100644
9797 --- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c
9798 +++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
9799 @@ -3220,8 +3220,9 @@ static void ipw2100_tx_send_data(struct ipw2100_priv *priv)
9800 }
9801 }
9802
9803 -static void ipw2100_irq_tasklet(struct ipw2100_priv *priv)
9804 +static void ipw2100_irq_tasklet(unsigned long data)
9805 {
9806 + struct ipw2100_priv *priv = (struct ipw2100_priv *)data;
9807 struct net_device *dev = priv->net_dev;
9808 unsigned long flags;
9809 u32 inta, tmp;
9810 @@ -6029,7 +6030,7 @@ static void ipw2100_rf_kill(struct work_struct *work)
9811 spin_unlock_irqrestore(&priv->low_lock, flags);
9812 }
9813
9814 -static void ipw2100_irq_tasklet(struct ipw2100_priv *priv);
9815 +static void ipw2100_irq_tasklet(unsigned long data);
9816
9817 static const struct net_device_ops ipw2100_netdev_ops = {
9818 .ndo_open = ipw2100_open,
9819 @@ -6158,7 +6159,7 @@ static struct net_device *ipw2100_alloc_device(struct pci_dev *pci_dev,
9820 INIT_DELAYED_WORK(&priv->rf_kill, ipw2100_rf_kill);
9821 INIT_DELAYED_WORK(&priv->scan_event, ipw2100_scan_event);
9822
9823 - tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long))
9824 + tasklet_init(&priv->irq_tasklet,
9825 ipw2100_irq_tasklet, (unsigned long)priv);
9826
9827 /* NOTE: We do not start the deferred work for status checks yet */
9828 diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
9829 index bfd68612a535..48edb2b6eb7d 100644
9830 --- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c
9831 +++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
9832 @@ -1968,8 +1968,9 @@ static void notify_wx_assoc_event(struct ipw_priv *priv)
9833 wireless_send_event(priv->net_dev, SIOCGIWAP, &wrqu, NULL);
9834 }
9835
9836 -static void ipw_irq_tasklet(struct ipw_priv *priv)
9837 +static void ipw_irq_tasklet(unsigned long data)
9838 {
9839 + struct ipw_priv *priv = (struct ipw_priv *)data;
9840 u32 inta, inta_mask, handled = 0;
9841 unsigned long flags;
9842 int rc = 0;
9843 @@ -10705,7 +10706,7 @@ static int ipw_setup_deferred_work(struct ipw_priv *priv)
9844 INIT_WORK(&priv->qos_activate, ipw_bg_qos_activate);
9845 #endif /* CONFIG_IPW2200_QOS */
9846
9847 - tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long))
9848 + tasklet_init(&priv->irq_tasklet,
9849 ipw_irq_tasklet, (unsigned long)priv);
9850
9851 return ret;
9852 diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
9853 index 466912eb2d87..d853ccbf74cb 100644
9854 --- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c
9855 +++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
9856 @@ -1399,8 +1399,9 @@ il3945_dump_nic_error_log(struct il_priv *il)
9857 }
9858
9859 static void
9860 -il3945_irq_tasklet(struct il_priv *il)
9861 +il3945_irq_tasklet(unsigned long data)
9862 {
9863 + struct il_priv *il = (struct il_priv *)data;
9864 u32 inta, handled = 0;
9865 u32 inta_fh;
9866 unsigned long flags;
9867 @@ -3432,7 +3433,7 @@ il3945_setup_deferred_work(struct il_priv *il)
9868 setup_timer(&il->watchdog, il_bg_watchdog, (unsigned long)il);
9869
9870 tasklet_init(&il->irq_tasklet,
9871 - (void (*)(unsigned long))il3945_irq_tasklet,
9872 + il3945_irq_tasklet,
9873 (unsigned long)il);
9874 }
9875
9876 diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
9877 index a91d170a614b..6c2dcd236713 100644
9878 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c
9879 +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
9880 @@ -4361,8 +4361,9 @@ il4965_synchronize_irq(struct il_priv *il)
9881 }
9882
9883 static void
9884 -il4965_irq_tasklet(struct il_priv *il)
9885 +il4965_irq_tasklet(unsigned long data)
9886 {
9887 + struct il_priv *il = (struct il_priv *)data;
9888 u32 inta, handled = 0;
9889 u32 inta_fh;
9890 unsigned long flags;
9891 @@ -6260,7 +6261,7 @@ il4965_setup_deferred_work(struct il_priv *il)
9892 setup_timer(&il->watchdog, il_bg_watchdog, (unsigned long)il);
9893
9894 tasklet_init(&il->irq_tasklet,
9895 - (void (*)(unsigned long))il4965_irq_tasklet,
9896 + il4965_irq_tasklet,
9897 (unsigned long)il);
9898 }
9899
9900 diff --git a/drivers/net/wireless/intel/iwlegacy/common.c b/drivers/net/wireless/intel/iwlegacy/common.c
9901 index 140b6ea8f7cc..db2373fe8ac3 100644
9902 --- a/drivers/net/wireless/intel/iwlegacy/common.c
9903 +++ b/drivers/net/wireless/intel/iwlegacy/common.c
9904 @@ -717,7 +717,7 @@ il_eeprom_init(struct il_priv *il)
9905 u32 gp = _il_rd(il, CSR_EEPROM_GP);
9906 int sz;
9907 int ret;
9908 - u16 addr;
9909 + int addr;
9910
9911 /* allocate eeprom */
9912 sz = il->cfg->eeprom_size;
9913 diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
9914 index c5203568a47a..f0f205c3aadb 100644
9915 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
9916 +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
9917 @@ -736,7 +736,8 @@ static struct thermal_zone_device_ops tzone_ops = {
9918 static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
9919 {
9920 int i;
9921 - char name[] = "iwlwifi";
9922 + char name[16];
9923 + static atomic_t counter = ATOMIC_INIT(0);
9924
9925 if (!iwl_mvm_is_tt_in_fw(mvm)) {
9926 mvm->tz_device.tzone = NULL;
9927 @@ -746,6 +747,7 @@ static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
9928
9929 BUILD_BUG_ON(ARRAY_SIZE(name) >= THERMAL_NAME_LENGTH);
9930
9931 + sprintf(name, "iwlwifi_%u", atomic_inc_return(&counter) & 0xFF);
9932 mvm->tz_device.tzone = thermal_zone_device_register(name,
9933 IWL_MAX_DTS_TRIPS,
9934 IWL_WRITABLE_TRIPS_MSK,
9935 diff --git a/drivers/net/wireless/intersil/hostap/hostap_ap.c b/drivers/net/wireless/intersil/hostap/hostap_ap.c
9936 index c995ace153ee..30171d4c4718 100644
9937 --- a/drivers/net/wireless/intersil/hostap/hostap_ap.c
9938 +++ b/drivers/net/wireless/intersil/hostap/hostap_ap.c
9939 @@ -2570,7 +2570,7 @@ static int prism2_hostapd_add_sta(struct ap_data *ap,
9940 sta->supported_rates[0] = 2;
9941 if (sta->tx_supp_rates & WLAN_RATE_2M)
9942 sta->supported_rates[1] = 4;
9943 - if (sta->tx_supp_rates & WLAN_RATE_5M5)
9944 + if (sta->tx_supp_rates & WLAN_RATE_5M5)
9945 sta->supported_rates[2] = 11;
9946 if (sta->tx_supp_rates & WLAN_RATE_11M)
9947 sta->supported_rates[3] = 22;
9948 diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
9949 index 8244d8262951..4e91c74fcfad 100644
9950 --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
9951 +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
9952 @@ -1351,7 +1351,8 @@ static int ezusb_init(struct hermes *hw)
9953 int retval;
9954
9955 BUG_ON(in_interrupt());
9956 - BUG_ON(!upriv);
9957 + if (!upriv)
9958 + return -EINVAL;
9959
9960 upriv->reply_count = 0;
9961 /* Write the MAGIC number on the simulated registers to keep
9962 diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c
9963 index e15b462d096b..21b7cb845bf4 100644
9964 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c
9965 +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c
9966 @@ -1095,13 +1095,15 @@ done:
9967 return ret;
9968 }
9969
9970 -static void _rtl_pci_irq_tasklet(struct ieee80211_hw *hw)
9971 +static void _rtl_pci_irq_tasklet(unsigned long data)
9972 {
9973 + struct ieee80211_hw *hw = (struct ieee80211_hw *)data;
9974 _rtl_pci_tx_chk_waitq(hw);
9975 }
9976
9977 -static void _rtl_pci_prepare_bcn_tasklet(struct ieee80211_hw *hw)
9978 +static void _rtl_pci_prepare_bcn_tasklet(unsigned long data)
9979 {
9980 + struct ieee80211_hw *hw = (struct ieee80211_hw *)data;
9981 struct rtl_priv *rtlpriv = rtl_priv(hw);
9982 struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
9983 struct rtl_mac *mac = rtl_mac(rtl_priv(hw));
9984 @@ -1223,10 +1225,10 @@ static void _rtl_pci_init_struct(struct ieee80211_hw *hw,
9985
9986 /*task */
9987 tasklet_init(&rtlpriv->works.irq_tasklet,
9988 - (void (*)(unsigned long))_rtl_pci_irq_tasklet,
9989 + _rtl_pci_irq_tasklet,
9990 (unsigned long)hw);
9991 tasklet_init(&rtlpriv->works.irq_prepare_bcn_tasklet,
9992 - (void (*)(unsigned long))_rtl_pci_prepare_bcn_tasklet,
9993 + _rtl_pci_prepare_bcn_tasklet,
9994 (unsigned long)hw);
9995 INIT_WORK(&rtlpriv->works.lps_change_work,
9996 rtl_lps_change_work_callback);
9997 diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c
9998 index 3cd995de1bbb..151b220381f9 100644
9999 --- a/drivers/nfc/port100.c
10000 +++ b/drivers/nfc/port100.c
10001 @@ -573,7 +573,7 @@ static void port100_tx_update_payload_len(void *_frame, int len)
10002 {
10003 struct port100_frame *frame = _frame;
10004
10005 - frame->datalen = cpu_to_le16(le16_to_cpu(frame->datalen) + len);
10006 + le16_add_cpu(&frame->datalen, len);
10007 }
10008
10009 static bool port100_rx_frame_is_valid(void *_frame)
10010 diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
10011 index 1d32fe2d97aa..9ec3cb628b0b 100644
10012 --- a/drivers/pci/iov.c
10013 +++ b/drivers/pci/iov.c
10014 @@ -181,6 +181,7 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset)
10015 failed2:
10016 sysfs_remove_link(&dev->dev.kobj, buf);
10017 failed1:
10018 + pci_stop_and_remove_bus_device(virtfn);
10019 pci_dev_put(dev);
10020 mutex_lock(&iov->dev->sriov->lock);
10021 pci_stop_and_remove_bus_device(virtfn);
10022 diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c
10023 index f83a2a60d9c9..1e945aa77734 100644
10024 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c
10025 +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c
10026 @@ -958,7 +958,13 @@ static void byt_gpio_clear_triggering(struct byt_gpio *vg, unsigned int offset)
10027
10028 raw_spin_lock_irqsave(&byt_lock, flags);
10029 value = readl(reg);
10030 - value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL);
10031 +
10032 + /* Do not clear direct-irq enabled IRQs (from gpio_disable_free) */
10033 + if (value & BYT_DIRECT_IRQ_EN)
10034 + /* nothing to do */ ;
10035 + else
10036 + value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL);
10037 +
10038 writel(value, reg);
10039 raw_spin_unlock_irqrestore(&byt_lock, flags);
10040 }
10041 diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7264.c b/drivers/pinctrl/sh-pfc/pfc-sh7264.c
10042 index e1c34e19222e..3ddb9565ed80 100644
10043 --- a/drivers/pinctrl/sh-pfc/pfc-sh7264.c
10044 +++ b/drivers/pinctrl/sh-pfc/pfc-sh7264.c
10045 @@ -500,17 +500,15 @@ enum {
10046 SD_WP_MARK, SD_CLK_MARK, SD_CMD_MARK,
10047 CRX0_MARK, CRX1_MARK,
10048 CTX0_MARK, CTX1_MARK,
10049 + CRX0_CRX1_MARK, CTX0_CTX1_MARK,
10050
10051 PWM1A_MARK, PWM1B_MARK, PWM1C_MARK, PWM1D_MARK,
10052 PWM1E_MARK, PWM1F_MARK, PWM1G_MARK, PWM1H_MARK,
10053 PWM2A_MARK, PWM2B_MARK, PWM2C_MARK, PWM2D_MARK,
10054 PWM2E_MARK, PWM2F_MARK, PWM2G_MARK, PWM2H_MARK,
10055 IERXD_MARK, IETXD_MARK,
10056 - CRX0_CRX1_MARK,
10057 WDTOVF_MARK,
10058
10059 - CRX0X1_MARK,
10060 -
10061 /* DMAC */
10062 TEND0_MARK, DACK0_MARK, DREQ0_MARK,
10063 TEND1_MARK, DACK1_MARK, DREQ1_MARK,
10064 @@ -998,12 +996,12 @@ static const u16 pinmux_data[] = {
10065
10066 PINMUX_DATA(PJ3_DATA, PJ3MD_00),
10067 PINMUX_DATA(CRX1_MARK, PJ3MD_01),
10068 - PINMUX_DATA(CRX0X1_MARK, PJ3MD_10),
10069 + PINMUX_DATA(CRX0_CRX1_MARK, PJ3MD_10),
10070 PINMUX_DATA(IRQ1_PJ_MARK, PJ3MD_11),
10071
10072 PINMUX_DATA(PJ2_DATA, PJ2MD_000),
10073 PINMUX_DATA(CTX1_MARK, PJ2MD_001),
10074 - PINMUX_DATA(CRX0_CRX1_MARK, PJ2MD_010),
10075 + PINMUX_DATA(CTX0_CTX1_MARK, PJ2MD_010),
10076 PINMUX_DATA(CS2_MARK, PJ2MD_011),
10077 PINMUX_DATA(SCK0_MARK, PJ2MD_100),
10078 PINMUX_DATA(LCD_M_DISP_MARK, PJ2MD_101),
10079 @@ -1248,6 +1246,7 @@ static const struct pinmux_func pinmux_func_gpios[] = {
10080 GPIO_FN(CTX1),
10081 GPIO_FN(CRX1),
10082 GPIO_FN(CTX0),
10083 + GPIO_FN(CTX0_CTX1),
10084 GPIO_FN(CRX0),
10085 GPIO_FN(CRX0_CRX1),
10086
10087 diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7269.c b/drivers/pinctrl/sh-pfc/pfc-sh7269.c
10088 index cfdb4fc177c3..3df0c0d139d0 100644
10089 --- a/drivers/pinctrl/sh-pfc/pfc-sh7269.c
10090 +++ b/drivers/pinctrl/sh-pfc/pfc-sh7269.c
10091 @@ -740,13 +740,12 @@ enum {
10092 CRX0_MARK, CTX0_MARK,
10093 CRX1_MARK, CTX1_MARK,
10094 CRX2_MARK, CTX2_MARK,
10095 - CRX0_CRX1_MARK,
10096 - CRX0_CRX1_CRX2_MARK,
10097 - CTX0CTX1CTX2_MARK,
10098 + CRX0_CRX1_MARK, CTX0_CTX1_MARK,
10099 + CRX0_CRX1_CRX2_MARK, CTX0_CTX1_CTX2_MARK,
10100 CRX1_PJ22_MARK, CTX1_PJ23_MARK,
10101 CRX2_PJ20_MARK, CTX2_PJ21_MARK,
10102 - CRX0CRX1_PJ22_MARK,
10103 - CRX0CRX1CRX2_PJ20_MARK,
10104 + CRX0_CRX1_PJ22_MARK, CTX0_CTX1_PJ23_MARK,
10105 + CRX0_CRX1_CRX2_PJ20_MARK, CTX0_CTX1_CTX2_PJ21_MARK,
10106
10107 /* VDC */
10108 DV_CLK_MARK,
10109 @@ -824,6 +823,7 @@ static const u16 pinmux_data[] = {
10110 PINMUX_DATA(CS3_MARK, PC8MD_001),
10111 PINMUX_DATA(TXD7_MARK, PC8MD_010),
10112 PINMUX_DATA(CTX1_MARK, PC8MD_011),
10113 + PINMUX_DATA(CTX0_CTX1_MARK, PC8MD_100),
10114
10115 PINMUX_DATA(PC7_DATA, PC7MD_000),
10116 PINMUX_DATA(CKE_MARK, PC7MD_001),
10117 @@ -836,11 +836,12 @@ static const u16 pinmux_data[] = {
10118 PINMUX_DATA(CAS_MARK, PC6MD_001),
10119 PINMUX_DATA(SCK7_MARK, PC6MD_010),
10120 PINMUX_DATA(CTX0_MARK, PC6MD_011),
10121 + PINMUX_DATA(CTX0_CTX1_CTX2_MARK, PC6MD_100),
10122
10123 PINMUX_DATA(PC5_DATA, PC5MD_000),
10124 PINMUX_DATA(RAS_MARK, PC5MD_001),
10125 PINMUX_DATA(CRX0_MARK, PC5MD_011),
10126 - PINMUX_DATA(CTX0CTX1CTX2_MARK, PC5MD_100),
10127 + PINMUX_DATA(CTX0_CTX1_CTX2_MARK, PC5MD_100),
10128 PINMUX_DATA(IRQ0_PC_MARK, PC5MD_101),
10129
10130 PINMUX_DATA(PC4_DATA, PC4MD_00),
10131 @@ -1292,30 +1293,32 @@ static const u16 pinmux_data[] = {
10132 PINMUX_DATA(LCD_DATA23_PJ23_MARK, PJ23MD_010),
10133 PINMUX_DATA(LCD_TCON6_MARK, PJ23MD_011),
10134 PINMUX_DATA(IRQ3_PJ_MARK, PJ23MD_100),
10135 - PINMUX_DATA(CTX1_MARK, PJ23MD_101),
10136 + PINMUX_DATA(CTX1_PJ23_MARK, PJ23MD_101),
10137 + PINMUX_DATA(CTX0_CTX1_PJ23_MARK, PJ23MD_110),
10138
10139 PINMUX_DATA(PJ22_DATA, PJ22MD_000),
10140 PINMUX_DATA(DV_DATA22_MARK, PJ22MD_001),
10141 PINMUX_DATA(LCD_DATA22_PJ22_MARK, PJ22MD_010),
10142 PINMUX_DATA(LCD_TCON5_MARK, PJ22MD_011),
10143 PINMUX_DATA(IRQ2_PJ_MARK, PJ22MD_100),
10144 - PINMUX_DATA(CRX1_MARK, PJ22MD_101),
10145 - PINMUX_DATA(CRX0_CRX1_MARK, PJ22MD_110),
10146 + PINMUX_DATA(CRX1_PJ22_MARK, PJ22MD_101),
10147 + PINMUX_DATA(CRX0_CRX1_PJ22_MARK, PJ22MD_110),
10148
10149 PINMUX_DATA(PJ21_DATA, PJ21MD_000),
10150 PINMUX_DATA(DV_DATA21_MARK, PJ21MD_001),
10151 PINMUX_DATA(LCD_DATA21_PJ21_MARK, PJ21MD_010),
10152 PINMUX_DATA(LCD_TCON4_MARK, PJ21MD_011),
10153 PINMUX_DATA(IRQ1_PJ_MARK, PJ21MD_100),
10154 - PINMUX_DATA(CTX2_MARK, PJ21MD_101),
10155 + PINMUX_DATA(CTX2_PJ21_MARK, PJ21MD_101),
10156 + PINMUX_DATA(CTX0_CTX1_CTX2_PJ21_MARK, PJ21MD_110),
10157
10158 PINMUX_DATA(PJ20_DATA, PJ20MD_000),
10159 PINMUX_DATA(DV_DATA20_MARK, PJ20MD_001),
10160 PINMUX_DATA(LCD_DATA20_PJ20_MARK, PJ20MD_010),
10161 PINMUX_DATA(LCD_TCON3_MARK, PJ20MD_011),
10162 PINMUX_DATA(IRQ0_PJ_MARK, PJ20MD_100),
10163 - PINMUX_DATA(CRX2_MARK, PJ20MD_101),
10164 - PINMUX_DATA(CRX0CRX1CRX2_PJ20_MARK, PJ20MD_110),
10165 + PINMUX_DATA(CRX2_PJ20_MARK, PJ20MD_101),
10166 + PINMUX_DATA(CRX0_CRX1_CRX2_PJ20_MARK, PJ20MD_110),
10167
10168 PINMUX_DATA(PJ19_DATA, PJ19MD_000),
10169 PINMUX_DATA(DV_DATA19_MARK, PJ19MD_001),
10170 @@ -1666,12 +1669,24 @@ static const struct pinmux_func pinmux_func_gpios[] = {
10171 GPIO_FN(WDTOVF),
10172
10173 /* CAN */
10174 + GPIO_FN(CTX2),
10175 + GPIO_FN(CRX2),
10176 GPIO_FN(CTX1),
10177 GPIO_FN(CRX1),
10178 GPIO_FN(CTX0),
10179 GPIO_FN(CRX0),
10180 + GPIO_FN(CTX0_CTX1),
10181 GPIO_FN(CRX0_CRX1),
10182 + GPIO_FN(CTX0_CTX1_CTX2),
10183 GPIO_FN(CRX0_CRX1_CRX2),
10184 + GPIO_FN(CTX2_PJ21),
10185 + GPIO_FN(CRX2_PJ20),
10186 + GPIO_FN(CTX1_PJ23),
10187 + GPIO_FN(CRX1_PJ22),
10188 + GPIO_FN(CTX0_CTX1_PJ23),
10189 + GPIO_FN(CRX0_CRX1_PJ22),
10190 + GPIO_FN(CTX0_CTX1_CTX2_PJ21),
10191 + GPIO_FN(CRX0_CRX1_CRX2_PJ20),
10192
10193 /* DMAC */
10194 GPIO_FN(TEND0),
10195 diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c
10196 index 5ad42f33e70c..2e15acf13893 100644
10197 --- a/drivers/pwm/pwm-omap-dmtimer.c
10198 +++ b/drivers/pwm/pwm-omap-dmtimer.c
10199 @@ -337,6 +337,11 @@ static int pwm_omap_dmtimer_probe(struct platform_device *pdev)
10200 static int pwm_omap_dmtimer_remove(struct platform_device *pdev)
10201 {
10202 struct pwm_omap_dmtimer_chip *omap = platform_get_drvdata(pdev);
10203 + int ret;
10204 +
10205 + ret = pwmchip_remove(&omap->chip);
10206 + if (ret)
10207 + return ret;
10208
10209 if (pm_runtime_active(&omap->dm_timer_pdev->dev))
10210 omap->pdata->stop(omap->dm_timer);
10211 @@ -345,7 +350,7 @@ static int pwm_omap_dmtimer_remove(struct platform_device *pdev)
10212
10213 mutex_destroy(&omap->mutex);
10214
10215 - return pwmchip_remove(&omap->chip);
10216 + return 0;
10217 }
10218
10219 static const struct of_device_id pwm_omap_dmtimer_of_match[] = {
10220 diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c
10221 index dfa8d50a5d74..28646e4cf3ba 100644
10222 --- a/drivers/regulator/rk808-regulator.c
10223 +++ b/drivers/regulator/rk808-regulator.c
10224 @@ -589,7 +589,7 @@ static int rk808_regulator_dt_parse_pdata(struct device *dev,
10225 }
10226
10227 if (!pdata->dvs_gpio[i]) {
10228 - dev_warn(dev, "there is no dvs%d gpio\n", i);
10229 + dev_info(dev, "there is no dvs%d gpio\n", i);
10230 continue;
10231 }
10232
10233 diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
10234 index c6bfb3496684..b99780574044 100644
10235 --- a/drivers/remoteproc/remoteproc_core.c
10236 +++ b/drivers/remoteproc/remoteproc_core.c
10237 @@ -1488,7 +1488,7 @@ static int __init remoteproc_init(void)
10238
10239 return 0;
10240 }
10241 -module_init(remoteproc_init);
10242 +subsys_initcall(remoteproc_init);
10243
10244 static void __exit remoteproc_exit(void)
10245 {
10246 diff --git a/drivers/scsi/aic7xxx/aic7xxx_core.c b/drivers/scsi/aic7xxx/aic7xxx_core.c
10247 index 64ab9eaec428..def3208dd290 100644
10248 --- a/drivers/scsi/aic7xxx/aic7xxx_core.c
10249 +++ b/drivers/scsi/aic7xxx/aic7xxx_core.c
10250 @@ -2321,7 +2321,7 @@ ahc_find_syncrate(struct ahc_softc *ahc, u_int *period,
10251 * At some speeds, we only support
10252 * ST transfers.
10253 */
10254 - if ((syncrate->sxfr_u2 & ST_SXFR) != 0)
10255 + if ((syncrate->sxfr_u2 & ST_SXFR) != 0)
10256 *ppr_options &= ~MSG_EXT_PPR_DT_REQ;
10257 break;
10258 }
10259 diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
10260 index d60564397be5..60c3e2bf8761 100644
10261 --- a/drivers/scsi/iscsi_tcp.c
10262 +++ b/drivers/scsi/iscsi_tcp.c
10263 @@ -882,6 +882,10 @@ free_host:
10264 static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session)
10265 {
10266 struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
10267 + struct iscsi_session *session = cls_session->dd_data;
10268 +
10269 + if (WARN_ON_ONCE(session->leadconn))
10270 + return;
10271
10272 iscsi_tcp_r2tpool_free(cls_session->dd_data);
10273 iscsi_session_teardown(cls_session);
10274 diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
10275 index e730aabc26d0..65bbca715f57 100644
10276 --- a/drivers/scsi/qla2xxx/qla_os.c
10277 +++ b/drivers/scsi/qla2xxx/qla_os.c
10278 @@ -451,6 +451,12 @@ static int qla25xx_setup_mode(struct scsi_qla_host *vha)
10279 goto fail;
10280 }
10281 if (ql2xmultique_tag) {
10282 + ha->wq = alloc_workqueue("qla2xxx_wq", WQ_MEM_RECLAIM, 1);
10283 + if (unlikely(!ha->wq)) {
10284 + ql_log(ql_log_warn, vha, 0x01e0,
10285 + "Failed to alloc workqueue.\n");
10286 + goto fail;
10287 + }
10288 /* create a request queue for IO */
10289 options |= BIT_7;
10290 req = qla25xx_create_req_que(ha, options, 0, 0, -1,
10291 @@ -458,9 +464,8 @@ static int qla25xx_setup_mode(struct scsi_qla_host *vha)
10292 if (!req) {
10293 ql_log(ql_log_warn, vha, 0x00e0,
10294 "Failed to create request queue.\n");
10295 - goto fail;
10296 + goto fail2;
10297 }
10298 - ha->wq = alloc_workqueue("qla2xxx_wq", WQ_MEM_RECLAIM, 1);
10299 vha->req = ha->req_q_map[req];
10300 options |= BIT_1;
10301 for (ques = 1; ques < ha->max_rsp_queues; ques++) {
10302 @@ -468,7 +473,7 @@ static int qla25xx_setup_mode(struct scsi_qla_host *vha)
10303 if (!ret) {
10304 ql_log(ql_log_warn, vha, 0x00e8,
10305 "Failed to create response queue.\n");
10306 - goto fail2;
10307 + goto fail3;
10308 }
10309 }
10310 ha->flags.cpu_affinity_enabled = 1;
10311 @@ -482,11 +487,13 @@ static int qla25xx_setup_mode(struct scsi_qla_host *vha)
10312 ha->max_rsp_queues, ha->max_req_queues);
10313 }
10314 return 0;
10315 -fail2:
10316 +
10317 +fail3:
10318 qla25xx_delete_queues(vha);
10319 - destroy_workqueue(ha->wq);
10320 - ha->wq = NULL;
10321 vha->req = ha->req_q_map[0];
10322 +fail2:
10323 + destroy_workqueue(ha->wq);
10324 + ha->wq = NULL;
10325 fail:
10326 ha->mqenable = 0;
10327 kfree(ha->req_q_map);
10328 diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
10329 index ab7bc4e63425..fff9c4d0f7c8 100644
10330 --- a/drivers/scsi/scsi_transport_iscsi.c
10331 +++ b/drivers/scsi/scsi_transport_iscsi.c
10332 @@ -2964,6 +2964,24 @@ iscsi_set_path(struct iscsi_transport *transport, struct iscsi_uevent *ev)
10333 return err;
10334 }
10335
10336 +static int iscsi_session_has_conns(int sid)
10337 +{
10338 + struct iscsi_cls_conn *conn;
10339 + unsigned long flags;
10340 + int found = 0;
10341 +
10342 + spin_lock_irqsave(&connlock, flags);
10343 + list_for_each_entry(conn, &connlist, conn_list) {
10344 + if (iscsi_conn_get_sid(conn) == sid) {
10345 + found = 1;
10346 + break;
10347 + }
10348 + }
10349 + spin_unlock_irqrestore(&connlock, flags);
10350 +
10351 + return found;
10352 +}
10353 +
10354 static int
10355 iscsi_set_iface_params(struct iscsi_transport *transport,
10356 struct iscsi_uevent *ev, uint32_t len)
10357 @@ -3538,10 +3556,12 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
10358 break;
10359 case ISCSI_UEVENT_DESTROY_SESSION:
10360 session = iscsi_session_lookup(ev->u.d_session.sid);
10361 - if (session)
10362 - transport->destroy_session(session);
10363 - else
10364 + if (!session)
10365 err = -EINVAL;
10366 + else if (iscsi_session_has_conns(ev->u.d_session.sid))
10367 + err = -EBUSY;
10368 + else
10369 + transport->destroy_session(session);
10370 break;
10371 case ISCSI_UEVENT_UNBIND_SESSION:
10372 session = iscsi_session_lookup(ev->u.d_session.sid);
10373 diff --git a/drivers/soc/tegra/fuse/tegra-apbmisc.c b/drivers/soc/tegra/fuse/tegra-apbmisc.c
10374 index 5b18f6ffa45c..cd61c883c19f 100644
10375 --- a/drivers/soc/tegra/fuse/tegra-apbmisc.c
10376 +++ b/drivers/soc/tegra/fuse/tegra-apbmisc.c
10377 @@ -134,7 +134,7 @@ void __init tegra_init_apbmisc(void)
10378 apbmisc.flags = IORESOURCE_MEM;
10379
10380 /* strapping options */
10381 - if (tegra_get_chip_id() == TEGRA124) {
10382 + if (of_machine_is_compatible("nvidia,tegra124")) {
10383 straps.start = 0x7000e864;
10384 straps.end = 0x7000e867;
10385 } else {
10386 diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
10387 index c6314d1552ea..99fd4f53c856 100644
10388 --- a/drivers/staging/android/ashmem.c
10389 +++ b/drivers/staging/android/ashmem.c
10390 @@ -370,8 +370,23 @@ static inline vm_flags_t calc_vm_may_flags(unsigned long prot)
10391 _calc_vm_trans(prot, PROT_EXEC, VM_MAYEXEC);
10392 }
10393
10394 +static int ashmem_vmfile_mmap(struct file *file, struct vm_area_struct *vma)
10395 +{
10396 + /* do not allow to mmap ashmem backing shmem file directly */
10397 + return -EPERM;
10398 +}
10399 +
10400 +static unsigned long
10401 +ashmem_vmfile_get_unmapped_area(struct file *file, unsigned long addr,
10402 + unsigned long len, unsigned long pgoff,
10403 + unsigned long flags)
10404 +{
10405 + return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
10406 +}
10407 +
10408 static int ashmem_mmap(struct file *file, struct vm_area_struct *vma)
10409 {
10410 + static struct file_operations vmfile_fops;
10411 struct ashmem_area *asma = file->private_data;
10412 int ret = 0;
10413
10414 @@ -412,6 +427,19 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma)
10415 }
10416 vmfile->f_mode |= FMODE_LSEEK;
10417 asma->file = vmfile;
10418 + /*
10419 + * override mmap operation of the vmfile so that it can't be
10420 + * remapped which would lead to creation of a new vma with no
10421 + * asma permission checks. Have to override get_unmapped_area
10422 + * as well to prevent VM_BUG_ON check for f_ops modification.
10423 + */
10424 + if (!vmfile_fops.mmap) {
10425 + vmfile_fops = *vmfile->f_op;
10426 + vmfile_fops.mmap = ashmem_vmfile_mmap;
10427 + vmfile_fops.get_unmapped_area =
10428 + ashmem_vmfile_get_unmapped_area;
10429 + }
10430 + vmfile->f_op = &vmfile_fops;
10431 }
10432 get_file(asma->file);
10433
10434 diff --git a/drivers/staging/greybus/audio_manager.c b/drivers/staging/greybus/audio_manager.c
10435 index aa6508b44fab..ed7c32542cb3 100644
10436 --- a/drivers/staging/greybus/audio_manager.c
10437 +++ b/drivers/staging/greybus/audio_manager.c
10438 @@ -90,8 +90,8 @@ void gb_audio_manager_remove_all(void)
10439
10440 list_for_each_entry_safe(module, next, &modules_list, list) {
10441 list_del(&module->list);
10442 - kobject_put(&module->kobj);
10443 ida_simple_remove(&module_id, module->id);
10444 + kobject_put(&module->kobj);
10445 }
10446
10447 is_empty = list_empty(&modules_list);
10448 diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
10449 index c7bf8ab26192..50793c9df1b3 100644
10450 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
10451 +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
10452 @@ -2052,7 +2052,7 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p)
10453 struct ieee_param *param;
10454 uint ret = 0;
10455
10456 - if (p->length < sizeof(struct ieee_param) || !p->pointer) {
10457 + if (!p->pointer || p->length != sizeof(struct ieee_param)) {
10458 ret = -EINVAL;
10459 goto out;
10460 }
10461 @@ -2859,7 +2859,7 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p)
10462 goto out;
10463 }
10464
10465 - if (!p->pointer) {
10466 + if (!p->pointer || p->length != sizeof(struct ieee_param)) {
10467 ret = -EINVAL;
10468 goto out;
10469 }
10470 diff --git a/drivers/staging/vt6656/dpc.c b/drivers/staging/vt6656/dpc.c
10471 index 655f0002f880..7b73fa2f8834 100644
10472 --- a/drivers/staging/vt6656/dpc.c
10473 +++ b/drivers/staging/vt6656/dpc.c
10474 @@ -140,7 +140,7 @@ int vnt_rx_data(struct vnt_private *priv, struct vnt_rcb *ptr_rcb,
10475
10476 vnt_rf_rssi_to_dbm(priv, *rssi, &rx_dbm);
10477
10478 - priv->bb_pre_ed_rssi = (u8)rx_dbm + 1;
10479 + priv->bb_pre_ed_rssi = (u8)-rx_dbm + 1;
10480 priv->current_rssi = priv->bb_pre_ed_rssi;
10481
10482 frame = skb_data + 8;
10483 diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
10484 index 9636d8744347..b6c4f55f79e7 100644
10485 --- a/drivers/target/iscsi/iscsi_target.c
10486 +++ b/drivers/target/iscsi/iscsi_target.c
10487 @@ -1168,9 +1168,7 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
10488 hdr->cmdsn, be32_to_cpu(hdr->data_length), payload_length,
10489 conn->cid);
10490
10491 - if (target_get_sess_cmd(&cmd->se_cmd, true) < 0)
10492 - return iscsit_add_reject_cmd(cmd,
10493 - ISCSI_REASON_WAITING_FOR_LOGOUT, buf);
10494 + target_get_sess_cmd(&cmd->se_cmd, true);
10495
10496 cmd->sense_reason = transport_lookup_cmd_lun(&cmd->se_cmd,
10497 scsilun_to_int(&hdr->lun));
10498 @@ -1988,9 +1986,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
10499 conn->sess->se_sess, 0, DMA_NONE,
10500 TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
10501
10502 - if (target_get_sess_cmd(&cmd->se_cmd, true) < 0)
10503 - return iscsit_add_reject_cmd(cmd,
10504 - ISCSI_REASON_WAITING_FOR_LOGOUT, buf);
10505 + target_get_sess_cmd(&cmd->se_cmd, true);
10506
10507 /*
10508 * TASK_REASSIGN for ERL=2 / connection stays inside of
10509 @@ -4162,6 +4158,9 @@ int iscsit_close_connection(
10510 iscsit_stop_nopin_response_timer(conn);
10511 iscsit_stop_nopin_timer(conn);
10512
10513 + if (conn->conn_transport->iscsit_wait_conn)
10514 + conn->conn_transport->iscsit_wait_conn(conn);
10515 +
10516 /*
10517 * During Connection recovery drop unacknowledged out of order
10518 * commands for this connection, and prepare the other commands
10519 @@ -4244,11 +4243,6 @@ int iscsit_close_connection(
10520 * must wait until they have completed.
10521 */
10522 iscsit_check_conn_usage_count(conn);
10523 - target_sess_cmd_list_set_waiting(sess->se_sess);
10524 - target_wait_for_sess_cmds(sess->se_sess);
10525 -
10526 - if (conn->conn_transport->iscsit_wait_conn)
10527 - conn->conn_transport->iscsit_wait_conn(conn);
10528
10529 ahash_request_free(conn->conn_tx_hash);
10530 if (conn->conn_rx_hash) {
10531 diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
10532 index 325f9db2da86..4a7eb85f7c85 100644
10533 --- a/drivers/tty/serial/atmel_serial.c
10534 +++ b/drivers/tty/serial/atmel_serial.c
10535 @@ -501,7 +501,8 @@ static void atmel_stop_tx(struct uart_port *port)
10536 atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask);
10537
10538 if (atmel_uart_is_half_duplex(port))
10539 - atmel_start_rx(port);
10540 + if (!atomic_read(&atmel_port->tasklet_shutdown))
10541 + atmel_start_rx(port);
10542
10543 }
10544
10545 diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
10546 index e75bd8d7e6f6..325c38c9b451 100644
10547 --- a/drivers/tty/serial/imx.c
10548 +++ b/drivers/tty/serial/imx.c
10549 @@ -532,7 +532,7 @@ static void imx_dma_tx(struct imx_port *sport)
10550
10551 sport->tx_bytes = uart_circ_chars_pending(xmit);
10552
10553 - if (xmit->tail < xmit->head) {
10554 + if (xmit->tail < xmit->head || xmit->head == 0) {
10555 sport->dma_tx_nents = 1;
10556 sg_init_one(sgl, xmit->buf + xmit->tail, sport->tx_bytes);
10557 } else {
10558 diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
10559 index e645ee1cfd98..7446ce29f677 100644
10560 --- a/drivers/tty/synclink_gt.c
10561 +++ b/drivers/tty/synclink_gt.c
10562 @@ -1349,10 +1349,10 @@ static void throttle(struct tty_struct * tty)
10563 DBGINFO(("%s throttle\n", info->device_name));
10564 if (I_IXOFF(tty))
10565 send_xchar(tty, STOP_CHAR(tty));
10566 - if (C_CRTSCTS(tty)) {
10567 + if (C_CRTSCTS(tty)) {
10568 spin_lock_irqsave(&info->lock,flags);
10569 info->signals &= ~SerialSignal_RTS;
10570 - set_signals(info);
10571 + set_signals(info);
10572 spin_unlock_irqrestore(&info->lock,flags);
10573 }
10574 }
10575 @@ -1374,10 +1374,10 @@ static void unthrottle(struct tty_struct * tty)
10576 else
10577 send_xchar(tty, START_CHAR(tty));
10578 }
10579 - if (C_CRTSCTS(tty)) {
10580 + if (C_CRTSCTS(tty)) {
10581 spin_lock_irqsave(&info->lock,flags);
10582 info->signals |= SerialSignal_RTS;
10583 - set_signals(info);
10584 + set_signals(info);
10585 spin_unlock_irqrestore(&info->lock,flags);
10586 }
10587 }
10588 @@ -2576,8 +2576,8 @@ static void change_params(struct slgt_info *info)
10589 info->read_status_mask = IRQ_RXOVER;
10590 if (I_INPCK(info->port.tty))
10591 info->read_status_mask |= MASK_PARITY | MASK_FRAMING;
10592 - if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty))
10593 - info->read_status_mask |= MASK_BREAK;
10594 + if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty))
10595 + info->read_status_mask |= MASK_BREAK;
10596 if (I_IGNPAR(info->port.tty))
10597 info->ignore_status_mask |= MASK_PARITY | MASK_FRAMING;
10598 if (I_IGNBRK(info->port.tty)) {
10599 @@ -3208,7 +3208,7 @@ static int tiocmset(struct tty_struct *tty,
10600 info->signals &= ~SerialSignal_DTR;
10601
10602 spin_lock_irqsave(&info->lock,flags);
10603 - set_signals(info);
10604 + set_signals(info);
10605 spin_unlock_irqrestore(&info->lock,flags);
10606 return 0;
10607 }
10608 @@ -3219,7 +3219,7 @@ static int carrier_raised(struct tty_port *port)
10609 struct slgt_info *info = container_of(port, struct slgt_info, port);
10610
10611 spin_lock_irqsave(&info->lock,flags);
10612 - get_signals(info);
10613 + get_signals(info);
10614 spin_unlock_irqrestore(&info->lock,flags);
10615 return (info->signals & SerialSignal_DCD) ? 1 : 0;
10616 }
10617 @@ -3234,7 +3234,7 @@ static void dtr_rts(struct tty_port *port, int on)
10618 info->signals |= SerialSignal_RTS | SerialSignal_DTR;
10619 else
10620 info->signals &= ~(SerialSignal_RTS | SerialSignal_DTR);
10621 - set_signals(info);
10622 + set_signals(info);
10623 spin_unlock_irqrestore(&info->lock,flags);
10624 }
10625
10626 diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c
10627 index dec156586de1..2f6df8d74b4a 100644
10628 --- a/drivers/tty/synclinkmp.c
10629 +++ b/drivers/tty/synclinkmp.c
10630 @@ -1467,10 +1467,10 @@ static void throttle(struct tty_struct * tty)
10631 if (I_IXOFF(tty))
10632 send_xchar(tty, STOP_CHAR(tty));
10633
10634 - if (C_CRTSCTS(tty)) {
10635 + if (C_CRTSCTS(tty)) {
10636 spin_lock_irqsave(&info->lock,flags);
10637 info->serial_signals &= ~SerialSignal_RTS;
10638 - set_signals(info);
10639 + set_signals(info);
10640 spin_unlock_irqrestore(&info->lock,flags);
10641 }
10642 }
10643 @@ -1496,10 +1496,10 @@ static void unthrottle(struct tty_struct * tty)
10644 send_xchar(tty, START_CHAR(tty));
10645 }
10646
10647 - if (C_CRTSCTS(tty)) {
10648 + if (C_CRTSCTS(tty)) {
10649 spin_lock_irqsave(&info->lock,flags);
10650 info->serial_signals |= SerialSignal_RTS;
10651 - set_signals(info);
10652 + set_signals(info);
10653 spin_unlock_irqrestore(&info->lock,flags);
10654 }
10655 }
10656 @@ -2485,7 +2485,7 @@ static void isr_io_pin( SLMP_INFO *info, u16 status )
10657 if (status & SerialSignal_CTS) {
10658 if ( debug_level >= DEBUG_LEVEL_ISR )
10659 printk("CTS tx start...");
10660 - info->port.tty->hw_stopped = 0;
10661 + info->port.tty->hw_stopped = 0;
10662 tx_start(info);
10663 info->pending_bh |= BH_TRANSMIT;
10664 return;
10665 @@ -2494,7 +2494,7 @@ static void isr_io_pin( SLMP_INFO *info, u16 status )
10666 if (!(status & SerialSignal_CTS)) {
10667 if ( debug_level >= DEBUG_LEVEL_ISR )
10668 printk("CTS tx stop...");
10669 - info->port.tty->hw_stopped = 1;
10670 + info->port.tty->hw_stopped = 1;
10671 tx_stop(info);
10672 }
10673 }
10674 @@ -2821,8 +2821,8 @@ static void change_params(SLMP_INFO *info)
10675 info->read_status_mask2 = OVRN;
10676 if (I_INPCK(info->port.tty))
10677 info->read_status_mask2 |= PE | FRME;
10678 - if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty))
10679 - info->read_status_mask1 |= BRKD;
10680 + if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty))
10681 + info->read_status_mask1 |= BRKD;
10682 if (I_IGNPAR(info->port.tty))
10683 info->ignore_status_mask2 |= PE | FRME;
10684 if (I_IGNBRK(info->port.tty)) {
10685 @@ -3192,7 +3192,7 @@ static int tiocmget(struct tty_struct *tty)
10686 unsigned long flags;
10687
10688 spin_lock_irqsave(&info->lock,flags);
10689 - get_signals(info);
10690 + get_signals(info);
10691 spin_unlock_irqrestore(&info->lock,flags);
10692
10693 result = ((info->serial_signals & SerialSignal_RTS) ? TIOCM_RTS : 0) |
10694 @@ -3230,7 +3230,7 @@ static int tiocmset(struct tty_struct *tty,
10695 info->serial_signals &= ~SerialSignal_DTR;
10696
10697 spin_lock_irqsave(&info->lock,flags);
10698 - set_signals(info);
10699 + set_signals(info);
10700 spin_unlock_irqrestore(&info->lock,flags);
10701
10702 return 0;
10703 @@ -3242,7 +3242,7 @@ static int carrier_raised(struct tty_port *port)
10704 unsigned long flags;
10705
10706 spin_lock_irqsave(&info->lock,flags);
10707 - get_signals(info);
10708 + get_signals(info);
10709 spin_unlock_irqrestore(&info->lock,flags);
10710
10711 return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
10712 @@ -3258,7 +3258,7 @@ static void dtr_rts(struct tty_port *port, int on)
10713 info->serial_signals |= SerialSignal_RTS | SerialSignal_DTR;
10714 else
10715 info->serial_signals &= ~(SerialSignal_RTS | SerialSignal_DTR);
10716 - set_signals(info);
10717 + set_signals(info);
10718 spin_unlock_irqrestore(&info->lock,flags);
10719 }
10720
10721 diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c
10722 index 368ce1803e8f..6ac05021c4a7 100644
10723 --- a/drivers/tty/vt/selection.c
10724 +++ b/drivers/tty/vt/selection.c
10725 @@ -341,6 +341,7 @@ int paste_selection(struct tty_struct *tty)
10726 unsigned int count;
10727 struct tty_ldisc *ld;
10728 DECLARE_WAITQUEUE(wait, current);
10729 + int ret = 0;
10730
10731 console_lock();
10732 poke_blanked_console();
10733 @@ -354,6 +355,10 @@ int paste_selection(struct tty_struct *tty)
10734 add_wait_queue(&vc->paste_wait, &wait);
10735 while (sel_buffer && sel_buffer_lth > pasted) {
10736 set_current_state(TASK_INTERRUPTIBLE);
10737 + if (signal_pending(current)) {
10738 + ret = -EINTR;
10739 + break;
10740 + }
10741 if (tty_throttled(tty)) {
10742 schedule();
10743 continue;
10744 @@ -369,5 +374,5 @@ int paste_selection(struct tty_struct *tty)
10745
10746 tty_buffer_unlock_exclusive(&vc->port);
10747 tty_ldisc_deref(ld);
10748 - return 0;
10749 + return ret;
10750 }
10751 diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
10752 index 638eb9bbd59f..e8efb270dc8f 100644
10753 --- a/drivers/tty/vt/vt_ioctl.c
10754 +++ b/drivers/tty/vt/vt_ioctl.c
10755 @@ -850,58 +850,49 @@ int vt_ioctl(struct tty_struct *tty,
10756
10757 case VT_RESIZEX:
10758 {
10759 - struct vt_consize __user *vtconsize = up;
10760 - ushort ll,cc,vlin,clin,vcol,ccol;
10761 + struct vt_consize v;
10762 if (!perm)
10763 return -EPERM;
10764 - if (!access_ok(VERIFY_READ, vtconsize,
10765 - sizeof(struct vt_consize))) {
10766 - ret = -EFAULT;
10767 - break;
10768 - }
10769 + if (copy_from_user(&v, up, sizeof(struct vt_consize)))
10770 + return -EFAULT;
10771 /* FIXME: Should check the copies properly */
10772 - __get_user(ll, &vtconsize->v_rows);
10773 - __get_user(cc, &vtconsize->v_cols);
10774 - __get_user(vlin, &vtconsize->v_vlin);
10775 - __get_user(clin, &vtconsize->v_clin);
10776 - __get_user(vcol, &vtconsize->v_vcol);
10777 - __get_user(ccol, &vtconsize->v_ccol);
10778 - vlin = vlin ? vlin : vc->vc_scan_lines;
10779 - if (clin) {
10780 - if (ll) {
10781 - if (ll != vlin/clin) {
10782 - /* Parameters don't add up */
10783 - ret = -EINVAL;
10784 - break;
10785 - }
10786 - } else
10787 - ll = vlin/clin;
10788 + if (!v.v_vlin)
10789 + v.v_vlin = vc->vc_scan_lines;
10790 + if (v.v_clin) {
10791 + int rows = v.v_vlin/v.v_clin;
10792 + if (v.v_rows != rows) {
10793 + if (v.v_rows) /* Parameters don't add up */
10794 + return -EINVAL;
10795 + v.v_rows = rows;
10796 + }
10797 }
10798 - if (vcol && ccol) {
10799 - if (cc) {
10800 - if (cc != vcol/ccol) {
10801 - ret = -EINVAL;
10802 - break;
10803 - }
10804 - } else
10805 - cc = vcol/ccol;
10806 + if (v.v_vcol && v.v_ccol) {
10807 + int cols = v.v_vcol/v.v_ccol;
10808 + if (v.v_cols != cols) {
10809 + if (v.v_cols)
10810 + return -EINVAL;
10811 + v.v_cols = cols;
10812 + }
10813 }
10814
10815 - if (clin > 32) {
10816 - ret = -EINVAL;
10817 - break;
10818 - }
10819 -
10820 + if (v.v_clin > 32)
10821 + return -EINVAL;
10822 +
10823 for (i = 0; i < MAX_NR_CONSOLES; i++) {
10824 + struct vc_data *vcp;
10825 +
10826 if (!vc_cons[i].d)
10827 continue;
10828 console_lock();
10829 - if (vlin)
10830 - vc_cons[i].d->vc_scan_lines = vlin;
10831 - if (clin)
10832 - vc_cons[i].d->vc_font.height = clin;
10833 - vc_cons[i].d->vc_resize_user = 1;
10834 - vc_resize(vc_cons[i].d, cc, ll);
10835 + vcp = vc_cons[i].d;
10836 + if (vcp) {
10837 + if (v.v_vlin)
10838 + vcp->vc_scan_lines = v.v_vlin;
10839 + if (v.v_clin)
10840 + vcp->vc_font.height = v.v_clin;
10841 + vcp->vc_resize_user = 1;
10842 + vc_resize(vcp, v.v_cols, v.v_rows);
10843 + }
10844 console_unlock();
10845 }
10846 break;
10847 diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c
10848 index e1134a4d97f3..a00b4aee6c79 100644
10849 --- a/drivers/uio/uio_dmem_genirq.c
10850 +++ b/drivers/uio/uio_dmem_genirq.c
10851 @@ -135,11 +135,13 @@ static int uio_dmem_genirq_irqcontrol(struct uio_info *dev_info, s32 irq_on)
10852 if (irq_on) {
10853 if (test_and_clear_bit(0, &priv->flags))
10854 enable_irq(dev_info->irq);
10855 + spin_unlock_irqrestore(&priv->lock, flags);
10856 } else {
10857 - if (!test_and_set_bit(0, &priv->flags))
10858 + if (!test_and_set_bit(0, &priv->flags)) {
10859 + spin_unlock_irqrestore(&priv->lock, flags);
10860 disable_irq(dev_info->irq);
10861 + }
10862 }
10863 - spin_unlock_irqrestore(&priv->lock, flags);
10864
10865 return 0;
10866 }
10867 diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
10868 index 9f05f9a81f69..3fcc3e74ae2e 100644
10869 --- a/drivers/usb/core/hub.c
10870 +++ b/drivers/usb/core/hub.c
10871 @@ -1187,11 +1187,6 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
10872 #ifdef CONFIG_PM
10873 udev->reset_resume = 1;
10874 #endif
10875 - /* Don't set the change_bits when the device
10876 - * was powered off.
10877 - */
10878 - if (test_bit(port1, hub->power_bits))
10879 - set_bit(port1, hub->change_bits);
10880
10881 } else {
10882 /* The power session is gone; tell hub_wq */
10883 diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
10884 index 19e819aa2419..ad8307140df8 100644
10885 --- a/drivers/usb/core/quirks.c
10886 +++ b/drivers/usb/core/quirks.c
10887 @@ -291,6 +291,9 @@ static const struct usb_device_id usb_quirk_list[] = {
10888 /* INTEL VALUE SSD */
10889 { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
10890
10891 + /* novation SoundControl XL */
10892 + { USB_DEVICE(0x1235, 0x0061), .driver_info = USB_QUIRK_RESET_RESUME },
10893 +
10894 { } /* terminating entry must be last */
10895 };
10896
10897 diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
10898 index 854c4ec0af2c..4d7df2f6caf5 100644
10899 --- a/drivers/usb/gadget/composite.c
10900 +++ b/drivers/usb/gadget/composite.c
10901 @@ -437,12 +437,10 @@ static u8 encode_bMaxPower(enum usb_device_speed speed,
10902 val = CONFIG_USB_GADGET_VBUS_DRAW;
10903 if (!val)
10904 return 0;
10905 - switch (speed) {
10906 - case USB_SPEED_SUPER:
10907 - return DIV_ROUND_UP(val, 8);
10908 - default:
10909 + if (speed < USB_SPEED_SUPER)
10910 return DIV_ROUND_UP(val, 2);
10911 - }
10912 + else
10913 + return DIV_ROUND_UP(val, 8);
10914 }
10915
10916 static int config_buf(struct usb_configuration *config,
10917 diff --git a/drivers/usb/gadget/udc/gr_udc.c b/drivers/usb/gadget/udc/gr_udc.c
10918 index 39b7136d31d9..9e246d2e55ca 100644
10919 --- a/drivers/usb/gadget/udc/gr_udc.c
10920 +++ b/drivers/usb/gadget/udc/gr_udc.c
10921 @@ -2200,8 +2200,6 @@ static int gr_probe(struct platform_device *pdev)
10922 return -ENOMEM;
10923 }
10924
10925 - spin_lock(&dev->lock);
10926 -
10927 /* Inside lock so that no gadget can use this udc until probe is done */
10928 retval = usb_add_gadget_udc(dev->dev, &dev->gadget);
10929 if (retval) {
10930 @@ -2210,15 +2208,21 @@ static int gr_probe(struct platform_device *pdev)
10931 }
10932 dev->added = 1;
10933
10934 + spin_lock(&dev->lock);
10935 +
10936 retval = gr_udc_init(dev);
10937 - if (retval)
10938 + if (retval) {
10939 + spin_unlock(&dev->lock);
10940 goto out;
10941 -
10942 - gr_dfs_create(dev);
10943 + }
10944
10945 /* Clear all interrupt enables that might be left on since last boot */
10946 gr_disable_interrupts_and_pullup(dev);
10947
10948 + spin_unlock(&dev->lock);
10949 +
10950 + gr_dfs_create(dev);
10951 +
10952 retval = gr_request_irq(dev, dev->irq);
10953 if (retval) {
10954 dev_err(dev->dev, "Failed to request irq %d\n", dev->irq);
10955 @@ -2247,8 +2251,6 @@ static int gr_probe(struct platform_device *pdev)
10956 dev_info(dev->dev, "regs: %p, irq %d\n", dev->regs, dev->irq);
10957
10958 out:
10959 - spin_unlock(&dev->lock);
10960 -
10961 if (retval)
10962 gr_remove(pdev);
10963
10964 diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
10965 index aad64a26a767..3cca60b845a8 100644
10966 --- a/drivers/usb/host/xhci-mem.c
10967 +++ b/drivers/usb/host/xhci-mem.c
10968 @@ -1532,9 +1532,15 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
10969 /* Allow 3 retries for everything but isoc, set CErr = 3 */
10970 if (!usb_endpoint_xfer_isoc(&ep->desc))
10971 err_count = 3;
10972 - /* Some devices get this wrong */
10973 - if (usb_endpoint_xfer_bulk(&ep->desc) && udev->speed == USB_SPEED_HIGH)
10974 - max_packet = 512;
10975 + /* HS bulk max packet should be 512, FS bulk supports 8, 16, 32 or 64 */
10976 + if (usb_endpoint_xfer_bulk(&ep->desc)) {
10977 + if (udev->speed == USB_SPEED_HIGH)
10978 + max_packet = 512;
10979 + if (udev->speed == USB_SPEED_FULL) {
10980 + max_packet = rounddown_pow_of_two(max_packet);
10981 + max_packet = clamp_val(max_packet, 8, 64);
10982 + }
10983 + }
10984 /* xHCI 1.0 and 1.1 indicates that ctrl ep avg TRB Length should be 8 */
10985 if (usb_endpoint_xfer_control(&ep->desc) && xhci->hci_version >= 0x100)
10986 avg_trb_len = 8;
10987 diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
10988 index aec6b20262e9..4355fbc36fce 100644
10989 --- a/drivers/usb/host/xhci-pci.c
10990 +++ b/drivers/usb/host/xhci-pci.c
10991 @@ -53,6 +53,7 @@
10992 #define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI 0x1aa8
10993 #define PCI_DEVICE_ID_INTEL_APL_XHCI 0x5aa8
10994 #define PCI_DEVICE_ID_INTEL_DNV_XHCI 0x19d0
10995 +#define PCI_DEVICE_ID_INTEL_CML_XHCI 0xa3af
10996
10997 #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142
10998
10999 @@ -170,7 +171,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
11000 pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI ||
11001 pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI ||
11002 pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI ||
11003 - pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) {
11004 + pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI ||
11005 + pdev->device == PCI_DEVICE_ID_INTEL_CML_XHCI)) {
11006 xhci->quirks |= XHCI_PME_STUCK_QUIRK;
11007 }
11008 if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
11009 diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
11010 index e8be8e39ab8f..457ad33f4caa 100644
11011 --- a/drivers/usb/musb/omap2430.c
11012 +++ b/drivers/usb/musb/omap2430.c
11013 @@ -388,8 +388,6 @@ static const struct musb_platform_ops omap2430_ops = {
11014 .init = omap2430_musb_init,
11015 .exit = omap2430_musb_exit,
11016
11017 - .set_vbus = omap2430_musb_set_vbus,
11018 -
11019 .enable = omap2430_musb_enable,
11020 .disable = omap2430_musb_disable,
11021
11022 diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
11023 index a6999042e7ad..d022b5ff4cd0 100644
11024 --- a/drivers/usb/storage/uas.c
11025 +++ b/drivers/usb/storage/uas.c
11026 @@ -46,6 +46,7 @@ struct uas_dev_info {
11027 struct scsi_cmnd *cmnd[MAX_CMNDS];
11028 spinlock_t lock;
11029 struct work_struct work;
11030 + struct work_struct scan_work; /* for async scanning */
11031 };
11032
11033 enum {
11034 @@ -115,6 +116,17 @@ out:
11035 spin_unlock_irqrestore(&devinfo->lock, flags);
11036 }
11037
11038 +static void uas_scan_work(struct work_struct *work)
11039 +{
11040 + struct uas_dev_info *devinfo =
11041 + container_of(work, struct uas_dev_info, scan_work);
11042 + struct Scsi_Host *shost = usb_get_intfdata(devinfo->intf);
11043 +
11044 + dev_dbg(&devinfo->intf->dev, "starting scan\n");
11045 + scsi_scan_host(shost);
11046 + dev_dbg(&devinfo->intf->dev, "scan complete\n");
11047 +}
11048 +
11049 static void uas_add_work(struct uas_cmd_info *cmdinfo)
11050 {
11051 struct scsi_pointer *scp = (void *)cmdinfo;
11052 @@ -989,6 +1001,7 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id)
11053 init_usb_anchor(&devinfo->data_urbs);
11054 spin_lock_init(&devinfo->lock);
11055 INIT_WORK(&devinfo->work, uas_do_work);
11056 + INIT_WORK(&devinfo->scan_work, uas_scan_work);
11057
11058 result = uas_configure_endpoints(devinfo);
11059 if (result)
11060 @@ -1005,7 +1018,9 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id)
11061 if (result)
11062 goto free_streams;
11063
11064 - scsi_scan_host(shost);
11065 + /* Submit the delayed_work for SCSI-device scanning */
11066 + schedule_work(&devinfo->scan_work);
11067 +
11068 return result;
11069
11070 free_streams:
11071 @@ -1173,6 +1188,12 @@ static void uas_disconnect(struct usb_interface *intf)
11072 usb_kill_anchored_urbs(&devinfo->data_urbs);
11073 uas_zap_pending(devinfo, DID_NO_CONNECT);
11074
11075 + /*
11076 + * Prevent SCSI scanning (if it hasn't started yet)
11077 + * or wait for the SCSI-scanning routine to stop.
11078 + */
11079 + cancel_work_sync(&devinfo->scan_work);
11080 +
11081 scsi_remove_host(shost);
11082 uas_free_streams(devinfo);
11083 scsi_host_put(shost);
11084 diff --git a/drivers/video/fbdev/pxa168fb.c b/drivers/video/fbdev/pxa168fb.c
11085 index d059d04c63ac..20195d3dbf08 100644
11086 --- a/drivers/video/fbdev/pxa168fb.c
11087 +++ b/drivers/video/fbdev/pxa168fb.c
11088 @@ -769,8 +769,8 @@ failed_free_cmap:
11089 failed_free_clk:
11090 clk_disable_unprepare(fbi->clk);
11091 failed_free_fbmem:
11092 - dma_free_coherent(fbi->dev, info->fix.smem_len,
11093 - info->screen_base, fbi->fb_start_dma);
11094 + dma_free_wc(fbi->dev, info->fix.smem_len,
11095 + info->screen_base, fbi->fb_start_dma);
11096 failed_free_info:
11097 kfree(info);
11098
11099 @@ -804,7 +804,7 @@ static int pxa168fb_remove(struct platform_device *pdev)
11100
11101 irq = platform_get_irq(pdev, 0);
11102
11103 - dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
11104 + dma_free_wc(fbi->dev, info->fix.smem_len,
11105 info->screen_base, info->fix.smem_start);
11106
11107 clk_disable_unprepare(fbi->clk);
11108 diff --git a/drivers/vme/bridges/vme_fake.c b/drivers/vme/bridges/vme_fake.c
11109 index 30b3acc93833..e81ec763b555 100644
11110 --- a/drivers/vme/bridges/vme_fake.c
11111 +++ b/drivers/vme/bridges/vme_fake.c
11112 @@ -418,8 +418,9 @@ static void fake_lm_check(struct fake_driver *bridge, unsigned long long addr,
11113 }
11114 }
11115
11116 -static u8 fake_vmeread8(struct fake_driver *bridge, unsigned long long addr,
11117 - u32 aspace, u32 cycle)
11118 +static noinline_for_stack u8 fake_vmeread8(struct fake_driver *bridge,
11119 + unsigned long long addr,
11120 + u32 aspace, u32 cycle)
11121 {
11122 u8 retval = 0xff;
11123 int i;
11124 @@ -450,8 +451,9 @@ static u8 fake_vmeread8(struct fake_driver *bridge, unsigned long long addr,
11125 return retval;
11126 }
11127
11128 -static u16 fake_vmeread16(struct fake_driver *bridge, unsigned long long addr,
11129 - u32 aspace, u32 cycle)
11130 +static noinline_for_stack u16 fake_vmeread16(struct fake_driver *bridge,
11131 + unsigned long long addr,
11132 + u32 aspace, u32 cycle)
11133 {
11134 u16 retval = 0xffff;
11135 int i;
11136 @@ -482,8 +484,9 @@ static u16 fake_vmeread16(struct fake_driver *bridge, unsigned long long addr,
11137 return retval;
11138 }
11139
11140 -static u32 fake_vmeread32(struct fake_driver *bridge, unsigned long long addr,
11141 - u32 aspace, u32 cycle)
11142 +static noinline_for_stack u32 fake_vmeread32(struct fake_driver *bridge,
11143 + unsigned long long addr,
11144 + u32 aspace, u32 cycle)
11145 {
11146 u32 retval = 0xffffffff;
11147 int i;
11148 @@ -613,8 +616,9 @@ out:
11149 return retval;
11150 }
11151
11152 -static void fake_vmewrite8(struct fake_driver *bridge, u8 *buf,
11153 - unsigned long long addr, u32 aspace, u32 cycle)
11154 +static noinline_for_stack void fake_vmewrite8(struct fake_driver *bridge,
11155 + u8 *buf, unsigned long long addr,
11156 + u32 aspace, u32 cycle)
11157 {
11158 int i;
11159 unsigned long long start, end, offset;
11160 @@ -643,8 +647,9 @@ static void fake_vmewrite8(struct fake_driver *bridge, u8 *buf,
11161
11162 }
11163
11164 -static void fake_vmewrite16(struct fake_driver *bridge, u16 *buf,
11165 - unsigned long long addr, u32 aspace, u32 cycle)
11166 +static noinline_for_stack void fake_vmewrite16(struct fake_driver *bridge,
11167 + u16 *buf, unsigned long long addr,
11168 + u32 aspace, u32 cycle)
11169 {
11170 int i;
11171 unsigned long long start, end, offset;
11172 @@ -673,8 +678,9 @@ static void fake_vmewrite16(struct fake_driver *bridge, u16 *buf,
11173
11174 }
11175
11176 -static void fake_vmewrite32(struct fake_driver *bridge, u32 *buf,
11177 - unsigned long long addr, u32 aspace, u32 cycle)
11178 +static noinline_for_stack void fake_vmewrite32(struct fake_driver *bridge,
11179 + u32 *buf, unsigned long long addr,
11180 + u32 aspace, u32 cycle)
11181 {
11182 int i;
11183 unsigned long long start, end, offset;
11184 diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c
11185 index 08cb419eb4e6..5f6b77ea34fb 100644
11186 --- a/drivers/xen/preempt.c
11187 +++ b/drivers/xen/preempt.c
11188 @@ -37,7 +37,9 @@ asmlinkage __visible void xen_maybe_preempt_hcall(void)
11189 * cpu.
11190 */
11191 __this_cpu_write(xen_in_preemptible_hcall, false);
11192 - _cond_resched();
11193 + local_irq_enable();
11194 + cond_resched();
11195 + local_irq_disable();
11196 __this_cpu_write(xen_in_preemptible_hcall, true);
11197 }
11198 }
11199 diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
11200 index e3524ecce3d7..390053557d4d 100644
11201 --- a/fs/btrfs/disk-io.c
11202 +++ b/fs/btrfs/disk-io.c
11203 @@ -2979,6 +2979,7 @@ retry_root_backup:
11204 /* do not make disk changes in broken FS or nologreplay is given */
11205 if (btrfs_super_log_root(disk_super) != 0 &&
11206 !btrfs_test_opt(tree_root->fs_info, NOLOGREPLAY)) {
11207 + btrfs_info(fs_info, "start tree-log replay");
11208 ret = btrfs_replay_log(fs_info, fs_devices);
11209 if (ret) {
11210 err = ret;
11211 diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
11212 index 26f9ac719d20..4f59b4089eb0 100644
11213 --- a/fs/btrfs/extent_map.c
11214 +++ b/fs/btrfs/extent_map.c
11215 @@ -227,6 +227,17 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
11216 struct extent_map *merge = NULL;
11217 struct rb_node *rb;
11218
11219 + /*
11220 + * We can't modify an extent map that is in the tree and that is being
11221 + * used by another task, as it can cause that other task to see it in
11222 + * inconsistent state during the merging. We always have 1 reference for
11223 + * the tree and 1 for this task (which is unpinning the extent map or
11224 + * clearing the logging flag), so anything > 2 means it's being used by
11225 + * other tasks too.
11226 + */
11227 + if (atomic_read(&em->refs) > 2)
11228 + return;
11229 +
11230 if (em->start != 0) {
11231 rb = rb_prev(&em->rb_node);
11232 if (rb)
11233 diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
11234 index b2d1e95de7be..7dc2284017fa 100644
11235 --- a/fs/btrfs/ordered-data.c
11236 +++ b/fs/btrfs/ordered-data.c
11237 @@ -837,10 +837,15 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
11238 }
11239 btrfs_start_ordered_extent(inode, ordered, 1);
11240 end = ordered->file_offset;
11241 + /*
11242 + * If the ordered extent had an error save the error but don't
11243 + * exit without waiting first for all other ordered extents in
11244 + * the range to complete.
11245 + */
11246 if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
11247 ret = -EIO;
11248 btrfs_put_ordered_extent(ordered);
11249 - if (ret || end == 0 || end == start)
11250 + if (end == 0 || end == start)
11251 break;
11252 end--;
11253 }
11254 diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
11255 index 0c71cdd3f98b..9286603a6a98 100644
11256 --- a/fs/btrfs/super.c
11257 +++ b/fs/btrfs/super.c
11258 @@ -1809,6 +1809,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
11259 }
11260
11261 if (btrfs_super_log_root(fs_info->super_copy) != 0) {
11262 + btrfs_warn(fs_info,
11263 + "mount required to replay tree-log, cannot remount read-write");
11264 ret = -EINVAL;
11265 goto restore;
11266 }
11267 diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
11268 index 751bdde6515d..961fcb40183a 100644
11269 --- a/fs/cifs/connect.c
11270 +++ b/fs/cifs/connect.c
11271 @@ -2927,8 +2927,10 @@ match_prepath(struct super_block *sb, struct cifs_mnt_data *mnt_data)
11272 {
11273 struct cifs_sb_info *old = CIFS_SB(sb);
11274 struct cifs_sb_info *new = mnt_data->cifs_sb;
11275 - bool old_set = old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH;
11276 - bool new_set = new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH;
11277 + bool old_set = (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) &&
11278 + old->prepath;
11279 + bool new_set = (new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) &&
11280 + new->prepath;
11281
11282 if (old_set && new_set && !strcmp(new->prepath, old->prepath))
11283 return 1;
11284 diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
11285 index cb77e7ee2c9f..ff6cf23be8a2 100644
11286 --- a/fs/ecryptfs/crypto.c
11287 +++ b/fs/ecryptfs/crypto.c
11288 @@ -339,8 +339,10 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
11289 struct extent_crypt_result ecr;
11290 int rc = 0;
11291
11292 - BUG_ON(!crypt_stat || !crypt_stat->tfm
11293 - || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED));
11294 + if (!crypt_stat || !crypt_stat->tfm
11295 + || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED))
11296 + return -EINVAL;
11297 +
11298 if (unlikely(ecryptfs_verbosity > 0)) {
11299 ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n",
11300 crypt_stat->key_size);
11301 diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
11302 index fa218cd64f74..3f3ec50bf773 100644
11303 --- a/fs/ecryptfs/keystore.c
11304 +++ b/fs/ecryptfs/keystore.c
11305 @@ -1285,7 +1285,7 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat,
11306 printk(KERN_ERR "Enter w/ first byte != 0x%.2x\n",
11307 ECRYPTFS_TAG_1_PACKET_TYPE);
11308 rc = -EINVAL;
11309 - goto out;
11310 + goto out_free;
11311 }
11312 /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or
11313 * at end of function upon failure */
11314 diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
11315 index 4f457d5c4933..26464f9d9b76 100644
11316 --- a/fs/ecryptfs/messaging.c
11317 +++ b/fs/ecryptfs/messaging.c
11318 @@ -397,6 +397,7 @@ int __init ecryptfs_init_messaging(void)
11319 * ecryptfs_message_buf_len),
11320 GFP_KERNEL);
11321 if (!ecryptfs_msg_ctx_arr) {
11322 + kfree(ecryptfs_daemon_hash);
11323 rc = -ENOMEM;
11324 printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
11325 goto out;
11326 diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
11327 index 7fb8df7b6a43..6b3a32f75dad 100644
11328 --- a/fs/ext4/dir.c
11329 +++ b/fs/ext4/dir.c
11330 @@ -124,12 +124,14 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
11331 if (err != ERR_BAD_DX_DIR) {
11332 return err;
11333 }
11334 - /*
11335 - * We don't set the inode dirty flag since it's not
11336 - * critical that it get flushed back to the disk.
11337 - */
11338 - ext4_clear_inode_flag(file_inode(file),
11339 - EXT4_INODE_INDEX);
11340 + /* Can we just clear INDEX flag to ignore htree information? */
11341 + if (!ext4_has_metadata_csum(sb)) {
11342 + /*
11343 + * We don't set the inode dirty flag since it's not
11344 + * critical that it gets flushed back to the disk.
11345 + */
11346 + ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
11347 + }
11348 }
11349
11350 if (ext4_has_inline_data(inode)) {
11351 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
11352 index 567a6c7af677..9713d3d41412 100644
11353 --- a/fs/ext4/ext4.h
11354 +++ b/fs/ext4/ext4.h
11355 @@ -1514,8 +1514,11 @@ struct ext4_sb_info {
11356 struct ratelimit_state s_warning_ratelimit_state;
11357 struct ratelimit_state s_msg_ratelimit_state;
11358
11359 - /* Barrier between changing inodes' journal flags and writepages ops. */
11360 - struct percpu_rw_semaphore s_journal_flag_rwsem;
11361 + /*
11362 + * Barrier between writepages ops and changing any inode's JOURNAL_DATA
11363 + * or EXTENTS flag.
11364 + */
11365 + struct percpu_rw_semaphore s_writepages_rwsem;
11366
11367 /* Encryption support */
11368 #ifdef CONFIG_EXT4_FS_ENCRYPTION
11369 @@ -2375,8 +2378,11 @@ int ext4_insert_dentry(struct inode *dir,
11370 struct ext4_filename *fname);
11371 static inline void ext4_update_dx_flag(struct inode *inode)
11372 {
11373 - if (!ext4_has_feature_dir_index(inode->i_sb))
11374 + if (!ext4_has_feature_dir_index(inode->i_sb)) {
11375 + /* ext4_iget() should have caught this... */
11376 + WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb));
11377 ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
11378 + }
11379 }
11380 static unsigned char ext4_filetype_table[] = {
11381 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
11382 @@ -2848,7 +2854,7 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
11383 !inode_is_locked(inode));
11384 down_write(&EXT4_I(inode)->i_data_sem);
11385 if (newsize > EXT4_I(inode)->i_disksize)
11386 - EXT4_I(inode)->i_disksize = newsize;
11387 + WRITE_ONCE(EXT4_I(inode)->i_disksize, newsize);
11388 up_write(&EXT4_I(inode)->i_data_sem);
11389 }
11390
11391 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
11392 index 8133e6529994..911a49e861d2 100644
11393 --- a/fs/ext4/inode.c
11394 +++ b/fs/ext4/inode.c
11395 @@ -2475,7 +2475,7 @@ update_disksize:
11396 * truncate are avoided by checking i_size under i_data_sem.
11397 */
11398 disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT;
11399 - if (disksize > EXT4_I(inode)->i_disksize) {
11400 + if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) {
11401 int err2;
11402 loff_t i_size;
11403
11404 @@ -2652,7 +2652,7 @@ static int ext4_writepages(struct address_space *mapping,
11405 struct blk_plug plug;
11406 bool give_up_on_write = false;
11407
11408 - percpu_down_read(&sbi->s_journal_flag_rwsem);
11409 + percpu_down_read(&sbi->s_writepages_rwsem);
11410 trace_ext4_writepages(inode, wbc);
11411
11412 if (dax_mapping(mapping)) {
11413 @@ -2853,7 +2853,7 @@ retry:
11414 out_writepages:
11415 trace_ext4_writepages_result(inode, wbc, ret,
11416 nr_to_write - wbc->nr_to_write);
11417 - percpu_up_read(&sbi->s_journal_flag_rwsem);
11418 + percpu_up_read(&sbi->s_writepages_rwsem);
11419 return ret;
11420 }
11421
11422 @@ -4594,6 +4594,18 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
11423 ret = -EFSCORRUPTED;
11424 goto bad_inode;
11425 }
11426 + /*
11427 + * If dir_index is not enabled but there's dir with INDEX flag set,
11428 + * we'd normally treat htree data as empty space. But with metadata
11429 + * checksumming that corrupts checksums so forbid that.
11430 + */
11431 + if (!ext4_has_feature_dir_index(sb) && ext4_has_metadata_csum(sb) &&
11432 + ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) {
11433 + EXT4_ERROR_INODE(inode,
11434 + "iget: Dir with htree data on filesystem without dir_index feature.");
11435 + ret = -EFSCORRUPTED;
11436 + goto bad_inode;
11437 + }
11438 ei->i_disksize = inode->i_size;
11439 #ifdef CONFIG_QUOTA
11440 ei->i_reserved_quota = 0;
11441 @@ -5676,7 +5688,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
11442 }
11443 }
11444
11445 - percpu_down_write(&sbi->s_journal_flag_rwsem);
11446 + percpu_down_write(&sbi->s_writepages_rwsem);
11447 jbd2_journal_lock_updates(journal);
11448
11449 /*
11450 @@ -5693,7 +5705,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
11451 err = jbd2_journal_flush(journal);
11452 if (err < 0) {
11453 jbd2_journal_unlock_updates(journal);
11454 - percpu_up_write(&sbi->s_journal_flag_rwsem);
11455 + percpu_up_write(&sbi->s_writepages_rwsem);
11456 ext4_inode_resume_unlocked_dio(inode);
11457 return err;
11458 }
11459 @@ -5702,7 +5714,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
11460 ext4_set_aops(inode);
11461
11462 jbd2_journal_unlock_updates(journal);
11463 - percpu_up_write(&sbi->s_journal_flag_rwsem);
11464 + percpu_up_write(&sbi->s_writepages_rwsem);
11465
11466 if (val)
11467 up_write(&EXT4_I(inode)->i_mmap_sem);
11468 diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
11469 index 364ea4d4a943..bce2d696d6b9 100644
11470 --- a/fs/ext4/migrate.c
11471 +++ b/fs/ext4/migrate.c
11472 @@ -434,6 +434,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode)
11473
11474 int ext4_ext_migrate(struct inode *inode)
11475 {
11476 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
11477 handle_t *handle;
11478 int retval = 0, i;
11479 __le32 *i_data;
11480 @@ -458,6 +459,8 @@ int ext4_ext_migrate(struct inode *inode)
11481 */
11482 return retval;
11483
11484 + percpu_down_write(&sbi->s_writepages_rwsem);
11485 +
11486 /*
11487 * Worst case we can touch the allocation bitmaps, a bgd
11488 * block, and a block to link in the orphan list. We do need
11489 @@ -468,7 +471,7 @@ int ext4_ext_migrate(struct inode *inode)
11490
11491 if (IS_ERR(handle)) {
11492 retval = PTR_ERR(handle);
11493 - return retval;
11494 + goto out_unlock;
11495 }
11496 goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
11497 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
11498 @@ -479,7 +482,7 @@ int ext4_ext_migrate(struct inode *inode)
11499 if (IS_ERR(tmp_inode)) {
11500 retval = PTR_ERR(tmp_inode);
11501 ext4_journal_stop(handle);
11502 - return retval;
11503 + goto out_unlock;
11504 }
11505 i_size_write(tmp_inode, i_size_read(inode));
11506 /*
11507 @@ -521,7 +524,7 @@ int ext4_ext_migrate(struct inode *inode)
11508 */
11509 ext4_orphan_del(NULL, tmp_inode);
11510 retval = PTR_ERR(handle);
11511 - goto out;
11512 + goto out_tmp_inode;
11513 }
11514
11515 ei = EXT4_I(inode);
11516 @@ -602,10 +605,11 @@ err_out:
11517 /* Reset the extent details */
11518 ext4_ext_tree_init(handle, tmp_inode);
11519 ext4_journal_stop(handle);
11520 -out:
11521 +out_tmp_inode:
11522 unlock_new_inode(tmp_inode);
11523 iput(tmp_inode);
11524 -
11525 +out_unlock:
11526 + percpu_up_write(&sbi->s_writepages_rwsem);
11527 return retval;
11528 }
11529
11530 @@ -615,7 +619,8 @@ out:
11531 int ext4_ind_migrate(struct inode *inode)
11532 {
11533 struct ext4_extent_header *eh;
11534 - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
11535 + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
11536 + struct ext4_super_block *es = sbi->s_es;
11537 struct ext4_inode_info *ei = EXT4_I(inode);
11538 struct ext4_extent *ex;
11539 unsigned int i, len;
11540 @@ -639,9 +644,13 @@ int ext4_ind_migrate(struct inode *inode)
11541 if (test_opt(inode->i_sb, DELALLOC))
11542 ext4_alloc_da_blocks(inode);
11543
11544 + percpu_down_write(&sbi->s_writepages_rwsem);
11545 +
11546 handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
11547 - if (IS_ERR(handle))
11548 - return PTR_ERR(handle);
11549 + if (IS_ERR(handle)) {
11550 + ret = PTR_ERR(handle);
11551 + goto out_unlock;
11552 + }
11553
11554 down_write(&EXT4_I(inode)->i_data_sem);
11555 ret = ext4_ext_check_inode(inode);
11556 @@ -676,5 +685,7 @@ int ext4_ind_migrate(struct inode *inode)
11557 errout:
11558 ext4_journal_stop(handle);
11559 up_write(&EXT4_I(inode)->i_data_sem);
11560 +out_unlock:
11561 + percpu_up_write(&sbi->s_writepages_rwsem);
11562 return ret;
11563 }
11564 diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
11565 index c2e830a6206d..fb1ad9510c5f 100644
11566 --- a/fs/ext4/mmp.c
11567 +++ b/fs/ext4/mmp.c
11568 @@ -119,10 +119,10 @@ void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
11569 {
11570 __ext4_warning(sb, function, line, "%s", msg);
11571 __ext4_warning(sb, function, line,
11572 - "MMP failure info: last update time: %llu, last update "
11573 - "node: %s, last update device: %s",
11574 - (long long unsigned int) le64_to_cpu(mmp->mmp_time),
11575 - mmp->mmp_nodename, mmp->mmp_bdevname);
11576 + "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s",
11577 + (unsigned long long)le64_to_cpu(mmp->mmp_time),
11578 + (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename,
11579 + (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname);
11580 }
11581
11582 /*
11583 @@ -153,6 +153,7 @@ static int kmmpd(void *data)
11584 mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
11585 EXT4_MMP_MIN_CHECK_INTERVAL);
11586 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
11587 + BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE);
11588 bdevname(bh->b_bdev, mmp->mmp_bdevname);
11589
11590 memcpy(mmp->mmp_nodename, init_utsname()->nodename,
11591 @@ -377,7 +378,8 @@ skip:
11592 /*
11593 * Start a kernel thread to update the MMP block periodically.
11594 */
11595 - EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
11596 + EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%.*s",
11597 + (int)sizeof(mmp->mmp_bdevname),
11598 bdevname(bh->b_bdev,
11599 mmp->mmp_bdevname));
11600 if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
11601 diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
11602 index f0ce535d514c..339ede11896a 100644
11603 --- a/fs/ext4/namei.c
11604 +++ b/fs/ext4/namei.c
11605 @@ -1445,6 +1445,7 @@ restart:
11606 /*
11607 * We deal with the read-ahead logic here.
11608 */
11609 + cond_resched();
11610 if (ra_ptr >= ra_max) {
11611 /* Refill the readahead buffer */
11612 ra_ptr = 0;
11613 @@ -2148,6 +2149,13 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
11614 retval = ext4_dx_add_entry(handle, &fname, dir, inode);
11615 if (!retval || (retval != ERR_BAD_DX_DIR))
11616 goto out;
11617 + /* Can we just ignore htree data? */
11618 + if (ext4_has_metadata_csum(sb)) {
11619 + EXT4_ERROR_INODE(dir,
11620 + "Directory has corrupted htree index.");
11621 + retval = -EFSCORRUPTED;
11622 + goto out;
11623 + }
11624 ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
11625 dx_fallback++;
11626 ext4_mark_inode_dirty(handle, dir);
11627 diff --git a/fs/ext4/super.c b/fs/ext4/super.c
11628 index 391ab55808c9..b69a78c061cb 100644
11629 --- a/fs/ext4/super.c
11630 +++ b/fs/ext4/super.c
11631 @@ -865,7 +865,7 @@ static void ext4_put_super(struct super_block *sb)
11632 percpu_counter_destroy(&sbi->s_freeinodes_counter);
11633 percpu_counter_destroy(&sbi->s_dirs_counter);
11634 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
11635 - percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
11636 + percpu_free_rwsem(&sbi->s_writepages_rwsem);
11637 brelse(sbi->s_sbh);
11638 #ifdef CONFIG_QUOTA
11639 for (i = 0; i < EXT4_MAXQUOTAS; i++)
11640 @@ -2743,17 +2743,11 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
11641 return 0;
11642 }
11643
11644 -#ifndef CONFIG_QUOTA
11645 - if (ext4_has_feature_quota(sb) && !readonly) {
11646 +#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
11647 + if (!readonly && (ext4_has_feature_quota(sb) ||
11648 + ext4_has_feature_project(sb))) {
11649 ext4_msg(sb, KERN_ERR,
11650 - "Filesystem with quota feature cannot be mounted RDWR "
11651 - "without CONFIG_QUOTA");
11652 - return 0;
11653 - }
11654 - if (ext4_has_feature_project(sb) && !readonly) {
11655 - ext4_msg(sb, KERN_ERR,
11656 - "Filesystem with project quota feature cannot be mounted RDWR "
11657 - "without CONFIG_QUOTA");
11658 + "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
11659 return 0;
11660 }
11661 #endif /* CONFIG_QUOTA */
11662 @@ -4229,7 +4223,7 @@ no_journal:
11663 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
11664 GFP_KERNEL);
11665 if (!err)
11666 - err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem);
11667 + err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
11668
11669 if (err) {
11670 ext4_msg(sb, KERN_ERR, "insufficient memory");
11671 @@ -4328,7 +4322,7 @@ failed_mount6:
11672 percpu_counter_destroy(&sbi->s_freeinodes_counter);
11673 percpu_counter_destroy(&sbi->s_dirs_counter);
11674 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
11675 - percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
11676 + percpu_free_rwsem(&sbi->s_writepages_rwsem);
11677 failed_mount5:
11678 ext4_ext_release(sb);
11679 ext4_release_system_zone(sb);
11680 diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
11681 index 4d5a5a4cc017..addb0784dd1c 100644
11682 --- a/fs/jbd2/checkpoint.c
11683 +++ b/fs/jbd2/checkpoint.c
11684 @@ -168,7 +168,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
11685 "journal space in %s\n", __func__,
11686 journal->j_devname);
11687 WARN_ON(1);
11688 - jbd2_journal_abort(journal, 0);
11689 + jbd2_journal_abort(journal, -EIO);
11690 }
11691 write_lock(&journal->j_state_lock);
11692 } else {
11693 diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
11694 index d002b2b6895f..1d06f81ee8b4 100644
11695 --- a/fs/jbd2/commit.c
11696 +++ b/fs/jbd2/commit.c
11697 @@ -779,7 +779,7 @@ start_journal_io:
11698 err = journal_submit_commit_record(journal, commit_transaction,
11699 &cbh, crc32_sum);
11700 if (err)
11701 - __jbd2_journal_abort_hard(journal);
11702 + jbd2_journal_abort(journal, err);
11703 }
11704
11705 blk_finish_plug(&plug);
11706 @@ -872,7 +872,7 @@ start_journal_io:
11707 err = journal_submit_commit_record(journal, commit_transaction,
11708 &cbh, crc32_sum);
11709 if (err)
11710 - __jbd2_journal_abort_hard(journal);
11711 + jbd2_journal_abort(journal, err);
11712 }
11713 if (cbh)
11714 err = journal_wait_on_commit_record(journal, cbh);
11715 @@ -969,29 +969,33 @@ restart_loop:
11716 * it. */
11717
11718 /*
11719 - * A buffer which has been freed while still being journaled by
11720 - * a previous transaction.
11721 - */
11722 - if (buffer_freed(bh)) {
11723 + * A buffer which has been freed while still being journaled
11724 + * by a previous transaction, refile the buffer to BJ_Forget of
11725 + * the running transaction. If the just committed transaction
11726 + * contains "add to orphan" operation, we can completely
11727 + * invalidate the buffer now. We are rather through in that
11728 + * since the buffer may be still accessible when blocksize <
11729 + * pagesize and it is attached to the last partial page.
11730 + */
11731 + if (buffer_freed(bh) && !jh->b_next_transaction) {
11732 + struct address_space *mapping;
11733 +
11734 + clear_buffer_freed(bh);
11735 + clear_buffer_jbddirty(bh);
11736 +
11737 /*
11738 - * If the running transaction is the one containing
11739 - * "add to orphan" operation (b_next_transaction !=
11740 - * NULL), we have to wait for that transaction to
11741 - * commit before we can really get rid of the buffer.
11742 - * So just clear b_modified to not confuse transaction
11743 - * credit accounting and refile the buffer to
11744 - * BJ_Forget of the running transaction. If the just
11745 - * committed transaction contains "add to orphan"
11746 - * operation, we can completely invalidate the buffer
11747 - * now. We are rather through in that since the
11748 - * buffer may be still accessible when blocksize <
11749 - * pagesize and it is attached to the last partial
11750 - * page.
11751 + * Block device buffers need to stay mapped all the
11752 + * time, so it is enough to clear buffer_jbddirty and
11753 + * buffer_freed bits. For the file mapping buffers (i.e.
11754 + * journalled data) we need to unmap buffer and clear
11755 + * more bits. We also need to be careful about the check
11756 + * because the data page mapping can get cleared under
11757 + * out hands, which alse need not to clear more bits
11758 + * because the page and buffers will be freed and can
11759 + * never be reused once we are done with them.
11760 */
11761 - jh->b_modified = 0;
11762 - if (!jh->b_next_transaction) {
11763 - clear_buffer_freed(bh);
11764 - clear_buffer_jbddirty(bh);
11765 + mapping = READ_ONCE(bh->b_page->mapping);
11766 + if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) {
11767 clear_buffer_mapped(bh);
11768 clear_buffer_new(bh);
11769 clear_buffer_req(bh);
11770 diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
11771 index 3cbcf649ac66..efc8cfd06073 100644
11772 --- a/fs/jbd2/journal.c
11773 +++ b/fs/jbd2/journal.c
11774 @@ -1670,6 +1670,11 @@ int jbd2_journal_load(journal_t *journal)
11775 journal->j_devname);
11776 return -EFSCORRUPTED;
11777 }
11778 + /*
11779 + * clear JBD2_ABORT flag initialized in journal_init_common
11780 + * here to update log tail information with the newest seq.
11781 + */
11782 + journal->j_flags &= ~JBD2_ABORT;
11783
11784 /* OK, we've finished with the dynamic journal bits:
11785 * reinitialise the dynamic contents of the superblock in memory
11786 @@ -1677,7 +1682,6 @@ int jbd2_journal_load(journal_t *journal)
11787 if (journal_reset(journal))
11788 goto recovery_error;
11789
11790 - journal->j_flags &= ~JBD2_ABORT;
11791 journal->j_flags |= JBD2_LOADED;
11792 return 0;
11793
11794 @@ -2096,12 +2100,10 @@ static void __journal_abort_soft (journal_t *journal, int errno)
11795
11796 __jbd2_journal_abort_hard(journal);
11797
11798 - if (errno) {
11799 - jbd2_journal_update_sb_errno(journal);
11800 - write_lock(&journal->j_state_lock);
11801 - journal->j_flags |= JBD2_REC_ERR;
11802 - write_unlock(&journal->j_state_lock);
11803 - }
11804 + jbd2_journal_update_sb_errno(journal);
11805 + write_lock(&journal->j_state_lock);
11806 + journal->j_flags |= JBD2_REC_ERR;
11807 + write_unlock(&journal->j_state_lock);
11808 }
11809
11810 /**
11811 @@ -2143,11 +2145,6 @@ static void __journal_abort_soft (journal_t *journal, int errno)
11812 * failure to disk. ext3_error, for example, now uses this
11813 * functionality.
11814 *
11815 - * Errors which originate from within the journaling layer will NOT
11816 - * supply an errno; a null errno implies that absolutely no further
11817 - * writes are done to the journal (unless there are any already in
11818 - * progress).
11819 - *
11820 */
11821
11822 void jbd2_journal_abort(journal_t *journal, int errno)
11823 diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
11824 index 799f96c67211..04dd0652bb5c 100644
11825 --- a/fs/jbd2/transaction.c
11826 +++ b/fs/jbd2/transaction.c
11827 @@ -2213,14 +2213,16 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
11828 return -EBUSY;
11829 }
11830 /*
11831 - * OK, buffer won't be reachable after truncate. We just set
11832 - * j_next_transaction to the running transaction (if there is
11833 - * one) and mark buffer as freed so that commit code knows it
11834 - * should clear dirty bits when it is done with the buffer.
11835 + * OK, buffer won't be reachable after truncate. We just clear
11836 + * b_modified to not confuse transaction credit accounting, and
11837 + * set j_next_transaction to the running transaction (if there
11838 + * is one) and mark buffer as freed so that commit code knows
11839 + * it should clear dirty bits when it is done with the buffer.
11840 */
11841 set_buffer_freed(bh);
11842 if (journal->j_running_transaction && buffer_jbddirty(bh))
11843 jh->b_next_transaction = journal->j_running_transaction;
11844 + jh->b_modified = 0;
11845 jbd2_journal_put_journal_head(jh);
11846 spin_unlock(&journal->j_list_lock);
11847 jbd_unlock_bh_state(bh);
11848 diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
11849 index 497a4171ef61..bfb50fc51528 100644
11850 --- a/fs/ocfs2/journal.h
11851 +++ b/fs/ocfs2/journal.h
11852 @@ -637,9 +637,11 @@ static inline void ocfs2_update_inode_fsync_trans(handle_t *handle,
11853 {
11854 struct ocfs2_inode_info *oi = OCFS2_I(inode);
11855
11856 - oi->i_sync_tid = handle->h_transaction->t_tid;
11857 - if (datasync)
11858 - oi->i_datasync_tid = handle->h_transaction->t_tid;
11859 + if (!is_handle_aborted(handle)) {
11860 + oi->i_sync_tid = handle->h_transaction->t_tid;
11861 + if (datasync)
11862 + oi->i_datasync_tid = handle->h_transaction->t_tid;
11863 + }
11864 }
11865
11866 #endif /* OCFS2_JOURNAL_H */
11867 diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
11868 index 0748a26598fc..7d7df003f9d8 100644
11869 --- a/fs/orangefs/orangefs-debugfs.c
11870 +++ b/fs/orangefs/orangefs-debugfs.c
11871 @@ -304,6 +304,7 @@ static void *help_start(struct seq_file *m, loff_t *pos)
11872
11873 static void *help_next(struct seq_file *m, void *v, loff_t *pos)
11874 {
11875 + (*pos)++;
11876 gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_next: start\n");
11877
11878 return NULL;
11879 diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
11880 index a97e352d05d3..5f5fff068877 100644
11881 --- a/fs/reiserfs/stree.c
11882 +++ b/fs/reiserfs/stree.c
11883 @@ -2249,7 +2249,8 @@ error_out:
11884 /* also releases the path */
11885 unfix_nodes(&s_ins_balance);
11886 #ifdef REISERQUOTA_DEBUG
11887 - reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
11888 + if (inode)
11889 + reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
11890 "reiserquota insert_item(): freeing %u id=%u type=%c",
11891 quota_bytes, inode->i_uid, head2type(ih));
11892 #endif
11893 diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
11894 index bfed2a700015..677608a89b08 100644
11895 --- a/fs/reiserfs/super.c
11896 +++ b/fs/reiserfs/super.c
11897 @@ -1928,7 +1928,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
11898 if (!sbi->s_jdev) {
11899 SWARN(silent, s, "", "Cannot allocate memory for "
11900 "journal device name");
11901 - goto error;
11902 + goto error_unlocked;
11903 }
11904 }
11905 #ifdef CONFIG_QUOTA
11906 diff --git a/fs/udf/super.c b/fs/udf/super.c
11907 index 03369a89600e..4abdba453885 100644
11908 --- a/fs/udf/super.c
11909 +++ b/fs/udf/super.c
11910 @@ -2460,17 +2460,29 @@ static unsigned int udf_count_free_table(struct super_block *sb,
11911 static unsigned int udf_count_free(struct super_block *sb)
11912 {
11913 unsigned int accum = 0;
11914 - struct udf_sb_info *sbi;
11915 + struct udf_sb_info *sbi = UDF_SB(sb);
11916 struct udf_part_map *map;
11917 + unsigned int part = sbi->s_partition;
11918 + int ptype = sbi->s_partmaps[part].s_partition_type;
11919 +
11920 + if (ptype == UDF_METADATA_MAP25) {
11921 + part = sbi->s_partmaps[part].s_type_specific.s_metadata.
11922 + s_phys_partition_ref;
11923 + } else if (ptype == UDF_VIRTUAL_MAP15 || ptype == UDF_VIRTUAL_MAP20) {
11924 + /*
11925 + * Filesystems with VAT are append-only and we cannot write to
11926 + * them. Let's just report 0 here.
11927 + */
11928 + return 0;
11929 + }
11930
11931 - sbi = UDF_SB(sb);
11932 if (sbi->s_lvid_bh) {
11933 struct logicalVolIntegrityDesc *lvid =
11934 (struct logicalVolIntegrityDesc *)
11935 sbi->s_lvid_bh->b_data;
11936 - if (le32_to_cpu(lvid->numOfPartitions) > sbi->s_partition) {
11937 + if (le32_to_cpu(lvid->numOfPartitions) > part) {
11938 accum = le32_to_cpu(
11939 - lvid->freeSpaceTable[sbi->s_partition]);
11940 + lvid->freeSpaceTable[part]);
11941 if (accum == 0xFFFFFFFF)
11942 accum = 0;
11943 }
11944 @@ -2479,7 +2491,7 @@ static unsigned int udf_count_free(struct super_block *sb)
11945 if (accum)
11946 return accum;
11947
11948 - map = &sbi->s_partmaps[sbi->s_partition];
11949 + map = &sbi->s_partmaps[part];
11950 if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) {
11951 accum += udf_count_free_bitmap(sb,
11952 map->s_uspace.s_bitmap);
11953 diff --git a/include/linux/libata.h b/include/linux/libata.h
11954 index df58b01e6962..cdfb67b22317 100644
11955 --- a/include/linux/libata.h
11956 +++ b/include/linux/libata.h
11957 @@ -1222,6 +1222,7 @@ struct pci_bits {
11958 };
11959
11960 extern int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits);
11961 +extern void ata_pci_shutdown_one(struct pci_dev *pdev);
11962 extern void ata_pci_remove_one(struct pci_dev *pdev);
11963
11964 #ifdef CONFIG_PM
11965 diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
11966 index 87ff4f58a2f0..9e20bf7f46a2 100644
11967 --- a/include/linux/list_nulls.h
11968 +++ b/include/linux/list_nulls.h
11969 @@ -71,10 +71,10 @@ static inline void hlist_nulls_add_head(struct hlist_nulls_node *n,
11970 struct hlist_nulls_node *first = h->first;
11971
11972 n->next = first;
11973 - n->pprev = &h->first;
11974 + WRITE_ONCE(n->pprev, &h->first);
11975 h->first = n;
11976 if (!is_a_nulls(first))
11977 - first->pprev = &n->next;
11978 + WRITE_ONCE(first->pprev, &n->next);
11979 }
11980
11981 static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
11982 @@ -84,13 +84,13 @@ static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
11983
11984 WRITE_ONCE(*pprev, next);
11985 if (!is_a_nulls(next))
11986 - next->pprev = pprev;
11987 + WRITE_ONCE(next->pprev, pprev);
11988 }
11989
11990 static inline void hlist_nulls_del(struct hlist_nulls_node *n)
11991 {
11992 __hlist_nulls_del(n);
11993 - n->pprev = LIST_POISON2;
11994 + WRITE_ONCE(n->pprev, LIST_POISON2);
11995 }
11996
11997 /**
11998 diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
11999 index 106f4e0d7bd3..4d71e3687d1e 100644
12000 --- a/include/linux/rculist_nulls.h
12001 +++ b/include/linux/rculist_nulls.h
12002 @@ -33,7 +33,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n)
12003 {
12004 if (!hlist_nulls_unhashed(n)) {
12005 __hlist_nulls_del(n);
12006 - n->pprev = NULL;
12007 + WRITE_ONCE(n->pprev, NULL);
12008 }
12009 }
12010
12011 @@ -65,7 +65,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n)
12012 static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n)
12013 {
12014 __hlist_nulls_del(n);
12015 - n->pprev = LIST_POISON2;
12016 + WRITE_ONCE(n->pprev, LIST_POISON2);
12017 }
12018
12019 /**
12020 @@ -93,10 +93,10 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
12021 struct hlist_nulls_node *first = h->first;
12022
12023 n->next = first;
12024 - n->pprev = &h->first;
12025 + WRITE_ONCE(n->pprev, &h->first);
12026 rcu_assign_pointer(hlist_nulls_first_rcu(h), n);
12027 if (!is_a_nulls(first))
12028 - first->pprev = &n->next;
12029 + WRITE_ONCE(first->pprev, &n->next);
12030 }
12031
12032 /**
12033 diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h
12034 index 8ffa94009d1a..76002416cead 100644
12035 --- a/include/media/v4l2-device.h
12036 +++ b/include/media/v4l2-device.h
12037 @@ -268,7 +268,7 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd,
12038 struct v4l2_subdev *__sd; \
12039 \
12040 __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \
12041 - !(grpid) || __sd->grp_id == (grpid), o, f , \
12042 + (grpid) == 0 || __sd->grp_id == (grpid), o, f , \
12043 ##args); \
12044 } while (0)
12045
12046 @@ -280,7 +280,7 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd,
12047 ({ \
12048 struct v4l2_subdev *__sd; \
12049 __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \
12050 - !(grpid) || __sd->grp_id == (grpid), o, f , \
12051 + (grpid) == 0 || __sd->grp_id == (grpid), o, f , \
12052 ##args); \
12053 })
12054
12055 @@ -294,8 +294,8 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd,
12056 struct v4l2_subdev *__sd; \
12057 \
12058 __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \
12059 - !(grpmsk) || (__sd->grp_id & (grpmsk)), o, f , \
12060 - ##args); \
12061 + (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \
12062 + f , ##args); \
12063 } while (0)
12064
12065 /*
12066 @@ -308,8 +308,8 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd,
12067 ({ \
12068 struct v4l2_subdev *__sd; \
12069 __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \
12070 - !(grpmsk) || (__sd->grp_id & (grpmsk)), o, f , \
12071 - ##args); \
12072 + (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \
12073 + f , ##args); \
12074 })
12075
12076 /*
12077 diff --git a/include/scsi/iscsi_proto.h b/include/scsi/iscsi_proto.h
12078 index 1a2ae0862e23..c1260d80ef30 100644
12079 --- a/include/scsi/iscsi_proto.h
12080 +++ b/include/scsi/iscsi_proto.h
12081 @@ -638,7 +638,6 @@ struct iscsi_reject {
12082 #define ISCSI_REASON_BOOKMARK_INVALID 9
12083 #define ISCSI_REASON_BOOKMARK_NO_RESOURCES 10
12084 #define ISCSI_REASON_NEGOTIATION_RESET 11
12085 -#define ISCSI_REASON_WAITING_FOR_LOGOUT 12
12086
12087 /* Max. number of Key=Value pairs in a text message */
12088 #define MAX_KEY_VALUE_PAIRS 8192
12089 diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h
12090 index f730b91e472f..5432111c8761 100644
12091 --- a/include/sound/rawmidi.h
12092 +++ b/include/sound/rawmidi.h
12093 @@ -92,9 +92,9 @@ struct snd_rawmidi_substream {
12094 struct list_head list; /* list of all substream for given stream */
12095 int stream; /* direction */
12096 int number; /* substream number */
12097 - unsigned int opened: 1, /* open flag */
12098 - append: 1, /* append flag (merge more streams) */
12099 - active_sensing: 1; /* send active sensing when close */
12100 + bool opened; /* open flag */
12101 + bool append; /* append flag (merge more streams) */
12102 + bool active_sensing; /* send active sensing when close */
12103 int use_count; /* use counter (for output) */
12104 size_t bytes;
12105 struct snd_rawmidi *rmidi;
12106 diff --git a/ipc/sem.c b/ipc/sem.c
12107 index 10b94bc59d4a..5cd9d802592f 100644
12108 --- a/ipc/sem.c
12109 +++ b/ipc/sem.c
12110 @@ -2159,11 +2159,9 @@ void exit_sem(struct task_struct *tsk)
12111 ipc_assert_locked_object(&sma->sem_perm);
12112 list_del(&un->list_id);
12113
12114 - /* we are the last process using this ulp, acquiring ulp->lock
12115 - * isn't required. Besides that, we are also protected against
12116 - * IPC_RMID as we hold sma->sem_perm lock now
12117 - */
12118 + spin_lock(&ulp->lock);
12119 list_del_rcu(&un->list_proc);
12120 + spin_unlock(&ulp->lock);
12121
12122 /* perform adjustments registered in un */
12123 for (i = 0; i < sma->sem_nsems; i++) {
12124 diff --git a/kernel/cpu.c b/kernel/cpu.c
12125 index c2573e858009..1fbe93fefc1f 100644
12126 --- a/kernel/cpu.c
12127 +++ b/kernel/cpu.c
12128 @@ -515,8 +515,7 @@ static int bringup_wait_for_ap(unsigned int cpu)
12129 if (WARN_ON_ONCE((!cpu_online(cpu))))
12130 return -ECANCELED;
12131
12132 - /* Unpark the stopper thread and the hotplug thread of the target cpu */
12133 - stop_machine_unpark(cpu);
12134 + /* Unpark the hotplug thread of the target cpu */
12135 kthread_unpark(st->thread);
12136
12137 /*
12138 @@ -1115,8 +1114,8 @@ void notify_cpu_starting(unsigned int cpu)
12139
12140 /*
12141 * Called from the idle task. Wake up the controlling task which brings the
12142 - * stopper and the hotplug thread of the upcoming CPU up and then delegates
12143 - * the rest of the online bringup to the hotplug thread.
12144 + * hotplug thread of the upcoming CPU up and then delegates the rest of the
12145 + * online bringup to the hotplug thread.
12146 */
12147 void cpuhp_online_idle(enum cpuhp_state state)
12148 {
12149 @@ -1126,6 +1125,12 @@ void cpuhp_online_idle(enum cpuhp_state state)
12150 if (state != CPUHP_AP_ONLINE_IDLE)
12151 return;
12152
12153 + /*
12154 + * Unpart the stopper thread before we start the idle loop (and start
12155 + * scheduling); this ensures the stopper task is always available.
12156 + */
12157 + stop_machine_unpark(smp_processor_id());
12158 +
12159 st->state = CPUHP_AP_ONLINE_IDLE;
12160 complete(&st->done);
12161 }
12162 diff --git a/kernel/padata.c b/kernel/padata.c
12163 index 63449fc584da..286c5142a0f7 100644
12164 --- a/kernel/padata.c
12165 +++ b/kernel/padata.c
12166 @@ -34,6 +34,8 @@
12167
12168 #define MAX_OBJ_NUM 1000
12169
12170 +static void padata_free_pd(struct parallel_data *pd);
12171 +
12172 static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
12173 {
12174 int cpu, target_cpu;
12175 @@ -301,6 +303,7 @@ static void padata_serial_worker(struct work_struct *serial_work)
12176 struct padata_serial_queue *squeue;
12177 struct parallel_data *pd;
12178 LIST_HEAD(local_list);
12179 + int cnt;
12180
12181 local_bh_disable();
12182 squeue = container_of(serial_work, struct padata_serial_queue, work);
12183 @@ -310,6 +313,8 @@ static void padata_serial_worker(struct work_struct *serial_work)
12184 list_replace_init(&squeue->serial.list, &local_list);
12185 spin_unlock(&squeue->serial.lock);
12186
12187 + cnt = 0;
12188 +
12189 while (!list_empty(&local_list)) {
12190 struct padata_priv *padata;
12191
12192 @@ -319,9 +324,12 @@ static void padata_serial_worker(struct work_struct *serial_work)
12193 list_del_init(&padata->list);
12194
12195 padata->serial(padata);
12196 - atomic_dec(&pd->refcnt);
12197 + cnt++;
12198 }
12199 local_bh_enable();
12200 +
12201 + if (atomic_sub_and_test(cnt, &pd->refcnt))
12202 + padata_free_pd(pd);
12203 }
12204
12205 /**
12206 @@ -444,7 +452,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
12207 setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
12208 atomic_set(&pd->seq_nr, -1);
12209 atomic_set(&pd->reorder_objects, 0);
12210 - atomic_set(&pd->refcnt, 0);
12211 + atomic_set(&pd->refcnt, 1);
12212 pd->pinst = pinst;
12213 spin_lock_init(&pd->lock);
12214
12215 @@ -469,31 +477,6 @@ static void padata_free_pd(struct parallel_data *pd)
12216 kfree(pd);
12217 }
12218
12219 -/* Flush all objects out of the padata queues. */
12220 -static void padata_flush_queues(struct parallel_data *pd)
12221 -{
12222 - int cpu;
12223 - struct padata_parallel_queue *pqueue;
12224 - struct padata_serial_queue *squeue;
12225 -
12226 - for_each_cpu(cpu, pd->cpumask.pcpu) {
12227 - pqueue = per_cpu_ptr(pd->pqueue, cpu);
12228 - flush_work(&pqueue->work);
12229 - }
12230 -
12231 - del_timer_sync(&pd->timer);
12232 -
12233 - if (atomic_read(&pd->reorder_objects))
12234 - padata_reorder(pd);
12235 -
12236 - for_each_cpu(cpu, pd->cpumask.cbcpu) {
12237 - squeue = per_cpu_ptr(pd->squeue, cpu);
12238 - flush_work(&squeue->work);
12239 - }
12240 -
12241 - BUG_ON(atomic_read(&pd->refcnt) != 0);
12242 -}
12243 -
12244 static void __padata_start(struct padata_instance *pinst)
12245 {
12246 pinst->flags |= PADATA_INIT;
12247 @@ -507,10 +490,6 @@ static void __padata_stop(struct padata_instance *pinst)
12248 pinst->flags &= ~PADATA_INIT;
12249
12250 synchronize_rcu();
12251 -
12252 - get_online_cpus();
12253 - padata_flush_queues(pinst->pd);
12254 - put_online_cpus();
12255 }
12256
12257 /* Replace the internal control structure with a new one. */
12258 @@ -531,8 +510,8 @@ static void padata_replace(struct padata_instance *pinst,
12259 if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
12260 notification_mask |= PADATA_CPU_SERIAL;
12261
12262 - padata_flush_queues(pd_old);
12263 - padata_free_pd(pd_old);
12264 + if (atomic_dec_and_test(&pd_old->refcnt))
12265 + padata_free_pd(pd_old);
12266
12267 if (notification_mask)
12268 blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
12269 diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
12270 index 71a40e5c3a9f..2ae98f8bce81 100644
12271 --- a/kernel/trace/ftrace.c
12272 +++ b/kernel/trace/ftrace.c
12273 @@ -5455,9 +5455,10 @@ static void *fpid_next(struct seq_file *m, void *v, loff_t *pos)
12274 struct trace_array *tr = m->private;
12275 struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_pids);
12276
12277 - if (v == FTRACE_NO_PIDS)
12278 + if (v == FTRACE_NO_PIDS) {
12279 + (*pos)++;
12280 return NULL;
12281 -
12282 + }
12283 return trace_pid_next(pid_list, v, pos);
12284 }
12285
12286 diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
12287 index 7e6971ba9541..8a88e85c8c61 100644
12288 --- a/kernel/trace/trace_events_trigger.c
12289 +++ b/kernel/trace/trace_events_trigger.c
12290 @@ -126,9 +126,10 @@ static void *trigger_next(struct seq_file *m, void *t, loff_t *pos)
12291 {
12292 struct trace_event_file *event_file = event_file_data(m->private);
12293
12294 - if (t == SHOW_AVAILABLE_TRIGGERS)
12295 + if (t == SHOW_AVAILABLE_TRIGGERS) {
12296 + (*pos)++;
12297 return NULL;
12298 -
12299 + }
12300 return seq_list_next(t, &event_file->triggers, pos);
12301 }
12302
12303 diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
12304 index 413ff108fbd0..d19f2191960e 100644
12305 --- a/kernel/trace/trace_stat.c
12306 +++ b/kernel/trace/trace_stat.c
12307 @@ -277,18 +277,22 @@ static int tracing_stat_init(void)
12308
12309 d_tracing = tracing_init_dentry();
12310 if (IS_ERR(d_tracing))
12311 - return 0;
12312 + return -ENODEV;
12313
12314 stat_dir = tracefs_create_dir("trace_stat", d_tracing);
12315 - if (!stat_dir)
12316 + if (!stat_dir) {
12317 pr_warn("Could not create tracefs 'trace_stat' entry\n");
12318 + return -ENOMEM;
12319 + }
12320 return 0;
12321 }
12322
12323 static int init_stat_file(struct stat_session *session)
12324 {
12325 - if (!stat_dir && tracing_stat_init())
12326 - return -ENODEV;
12327 + int ret;
12328 +
12329 + if (!stat_dir && (ret = tracing_stat_init()))
12330 + return ret;
12331
12332 session->file = tracefs_create_file(session->ts->name, 0644,
12333 stat_dir,
12334 @@ -301,7 +305,7 @@ static int init_stat_file(struct stat_session *session)
12335 int register_stat_tracer(struct tracer_stat *trace)
12336 {
12337 struct stat_session *session, *node;
12338 - int ret;
12339 + int ret = -EINVAL;
12340
12341 if (!trace)
12342 return -EINVAL;
12343 @@ -312,17 +316,15 @@ int register_stat_tracer(struct tracer_stat *trace)
12344 /* Already registered? */
12345 mutex_lock(&all_stat_sessions_mutex);
12346 list_for_each_entry(node, &all_stat_sessions, session_list) {
12347 - if (node->ts == trace) {
12348 - mutex_unlock(&all_stat_sessions_mutex);
12349 - return -EINVAL;
12350 - }
12351 + if (node->ts == trace)
12352 + goto out;
12353 }
12354 - mutex_unlock(&all_stat_sessions_mutex);
12355
12356 + ret = -ENOMEM;
12357 /* Init the session */
12358 session = kzalloc(sizeof(*session), GFP_KERNEL);
12359 if (!session)
12360 - return -ENOMEM;
12361 + goto out;
12362
12363 session->ts = trace;
12364 INIT_LIST_HEAD(&session->session_list);
12365 @@ -331,15 +333,16 @@ int register_stat_tracer(struct tracer_stat *trace)
12366 ret = init_stat_file(session);
12367 if (ret) {
12368 destroy_session(session);
12369 - return ret;
12370 + goto out;
12371 }
12372
12373 + ret = 0;
12374 /* Register */
12375 - mutex_lock(&all_stat_sessions_mutex);
12376 list_add_tail(&session->session_list, &all_stat_sessions);
12377 + out:
12378 mutex_unlock(&all_stat_sessions_mutex);
12379
12380 - return 0;
12381 + return ret;
12382 }
12383
12384 void unregister_stat_tracer(struct tracer_stat *trace)
12385 diff --git a/lib/scatterlist.c b/lib/scatterlist.c
12386 index a854cc39f084..ef8c14a56d0a 100644
12387 --- a/lib/scatterlist.c
12388 +++ b/lib/scatterlist.c
12389 @@ -317,7 +317,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
12390 if (prv)
12391 table->nents = ++table->orig_nents;
12392
12393 - return -ENOMEM;
12394 + return -ENOMEM;
12395 }
12396
12397 sg_init_table(sg, alloc_size);
12398 diff --git a/lib/stackdepot.c b/lib/stackdepot.c
12399 index f87d138e9672..759ff419fe61 100644
12400 --- a/lib/stackdepot.c
12401 +++ b/lib/stackdepot.c
12402 @@ -92,15 +92,19 @@ static bool init_stack_slab(void **prealloc)
12403 return true;
12404 if (stack_slabs[depot_index] == NULL) {
12405 stack_slabs[depot_index] = *prealloc;
12406 + *prealloc = NULL;
12407 } else {
12408 - stack_slabs[depot_index + 1] = *prealloc;
12409 + /* If this is the last depot slab, do not touch the next one. */
12410 + if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) {
12411 + stack_slabs[depot_index + 1] = *prealloc;
12412 + *prealloc = NULL;
12413 + }
12414 /*
12415 * This smp_store_release pairs with smp_load_acquire() from
12416 * |next_slab_inited| above and in depot_save_stack().
12417 */
12418 smp_store_release(&next_slab_inited, 1);
12419 }
12420 - *prealloc = NULL;
12421 return true;
12422 }
12423
12424 diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
12425 index dffee9d47ec4..7b993f25aab9 100644
12426 --- a/net/netfilter/xt_bpf.c
12427 +++ b/net/netfilter/xt_bpf.c
12428 @@ -25,6 +25,9 @@ static int bpf_mt_check(const struct xt_mtchk_param *par)
12429 struct xt_bpf_info *info = par->matchinfo;
12430 struct sock_fprog_kern program;
12431
12432 + if (info->bpf_program_num_elem > XT_BPF_MAX_NUM_INSTR)
12433 + return -EINVAL;
12434 +
12435 program.len = info->bpf_program_num_elem;
12436 program.filter = info->bpf_program;
12437
12438 diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
12439 index a1a29cdc58fc..140a9ae262ef 100644
12440 --- a/net/netfilter/xt_hashlimit.c
12441 +++ b/net/netfilter/xt_hashlimit.c
12442 @@ -735,6 +735,8 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
12443 return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 2);
12444 }
12445
12446 +#define HASHLIMIT_MAX_SIZE 1048576
12447 +
12448 static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
12449 struct xt_hashlimit_htable **hinfo,
12450 struct hashlimit_cfg2 *cfg,
12451 @@ -745,6 +747,14 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
12452
12453 if (cfg->gc_interval == 0 || cfg->expire == 0)
12454 return -EINVAL;
12455 + if (cfg->size > HASHLIMIT_MAX_SIZE) {
12456 + cfg->size = HASHLIMIT_MAX_SIZE;
12457 + pr_info_ratelimited("size too large, truncated to %u\n", cfg->size);
12458 + }
12459 + if (cfg->max > HASHLIMIT_MAX_SIZE) {
12460 + cfg->max = HASHLIMIT_MAX_SIZE;
12461 + pr_info_ratelimited("max too large, truncated to %u\n", cfg->max);
12462 + }
12463 if (par->family == NFPROTO_IPV4) {
12464 if (cfg->srcmask > 32 || cfg->dstmask > 32)
12465 return -EINVAL;
12466 diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
12467 index eee299bb6bcf..de03b7b49e05 100644
12468 --- a/net/sched/cls_flower.c
12469 +++ b/net/sched/cls_flower.c
12470 @@ -364,6 +364,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
12471 [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NLA_U16 },
12472 [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NLA_U16 },
12473 [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NLA_U16 },
12474 + [TCA_FLOWER_FLAGS] = { .type = NLA_U32 },
12475 };
12476
12477 static void fl_set_key_val(struct nlattr **tb,
12478 diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
12479 index 61ddfbad2aae..fe29c576e494 100644
12480 --- a/net/sched/cls_matchall.c
12481 +++ b/net/sched/cls_matchall.c
12482 @@ -111,6 +111,7 @@ static unsigned long mall_get(struct tcf_proto *tp, u32 handle)
12483 static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
12484 [TCA_MATCHALL_UNSPEC] = { .type = NLA_UNSPEC },
12485 [TCA_MATCHALL_CLASSID] = { .type = NLA_U32 },
12486 + [TCA_MATCHALL_FLAGS] = { .type = NLA_U32 },
12487 };
12488
12489 static int mall_set_parms(struct net *net, struct tcf_proto *tp,
12490 diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
12491 index 27aac273205b..fa423fcd1a92 100644
12492 --- a/scripts/kconfig/confdata.c
12493 +++ b/scripts/kconfig/confdata.c
12494 @@ -1238,7 +1238,7 @@ bool conf_set_all_new_symbols(enum conf_def_mode mode)
12495
12496 sym_calc_value(csym);
12497 if (mode == def_random)
12498 - has_changed = randomize_choice_values(csym);
12499 + has_changed |= randomize_choice_values(csym);
12500 else {
12501 set_all_choice_values(csym);
12502 has_changed = true;
12503 diff --git a/security/selinux/avc.c b/security/selinux/avc.c
12504 index 52f3c550abcc..f3c473791b69 100644
12505 --- a/security/selinux/avc.c
12506 +++ b/security/selinux/avc.c
12507 @@ -865,7 +865,7 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid,
12508 if (orig->ae.xp_node) {
12509 rc = avc_xperms_populate(node, orig->ae.xp_node);
12510 if (rc) {
12511 - kmem_cache_free(avc_node_cachep, node);
12512 + avc_node_kill(node);
12513 goto out_unlock;
12514 }
12515 }
12516 diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c
12517 index eee4ea17a8f5..198eea5c8c2f 100644
12518 --- a/sound/core/seq/seq_clientmgr.c
12519 +++ b/sound/core/seq/seq_clientmgr.c
12520 @@ -564,7 +564,7 @@ static int update_timestamp_of_queue(struct snd_seq_event *event,
12521 event->queue = queue;
12522 event->flags &= ~SNDRV_SEQ_TIME_STAMP_MASK;
12523 if (real_time) {
12524 - event->time.time = snd_seq_timer_get_cur_time(q->timer);
12525 + event->time.time = snd_seq_timer_get_cur_time(q->timer, true);
12526 event->flags |= SNDRV_SEQ_TIME_STAMP_REAL;
12527 } else {
12528 event->time.tick = snd_seq_timer_get_cur_tick(q->timer);
12529 @@ -1639,7 +1639,7 @@ static int snd_seq_ioctl_get_queue_status(struct snd_seq_client *client,
12530 tmr = queue->timer;
12531 status->events = queue->tickq->cells + queue->timeq->cells;
12532
12533 - status->time = snd_seq_timer_get_cur_time(tmr);
12534 + status->time = snd_seq_timer_get_cur_time(tmr, true);
12535 status->tick = snd_seq_timer_get_cur_tick(tmr);
12536
12537 status->running = tmr->running;
12538 diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c
12539 index 1a6dc4ff44a6..ea1aa0796276 100644
12540 --- a/sound/core/seq/seq_queue.c
12541 +++ b/sound/core/seq/seq_queue.c
12542 @@ -261,6 +261,8 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop)
12543 {
12544 unsigned long flags;
12545 struct snd_seq_event_cell *cell;
12546 + snd_seq_tick_time_t cur_tick;
12547 + snd_seq_real_time_t cur_time;
12548
12549 if (q == NULL)
12550 return;
12551 @@ -277,17 +279,18 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop)
12552
12553 __again:
12554 /* Process tick queue... */
12555 + cur_tick = snd_seq_timer_get_cur_tick(q->timer);
12556 for (;;) {
12557 - cell = snd_seq_prioq_cell_out(q->tickq,
12558 - &q->timer->tick.cur_tick);
12559 + cell = snd_seq_prioq_cell_out(q->tickq, &cur_tick);
12560 if (!cell)
12561 break;
12562 snd_seq_dispatch_event(cell, atomic, hop);
12563 }
12564
12565 /* Process time queue... */
12566 + cur_time = snd_seq_timer_get_cur_time(q->timer, false);
12567 for (;;) {
12568 - cell = snd_seq_prioq_cell_out(q->timeq, &q->timer->cur_time);
12569 + cell = snd_seq_prioq_cell_out(q->timeq, &cur_time);
12570 if (!cell)
12571 break;
12572 snd_seq_dispatch_event(cell, atomic, hop);
12573 @@ -415,6 +418,7 @@ int snd_seq_queue_check_access(int queueid, int client)
12574 int snd_seq_queue_set_owner(int queueid, int client, int locked)
12575 {
12576 struct snd_seq_queue *q = queueptr(queueid);
12577 + unsigned long flags;
12578
12579 if (q == NULL)
12580 return -EINVAL;
12581 @@ -424,8 +428,10 @@ int snd_seq_queue_set_owner(int queueid, int client, int locked)
12582 return -EPERM;
12583 }
12584
12585 + spin_lock_irqsave(&q->owner_lock, flags);
12586 q->locked = locked ? 1 : 0;
12587 q->owner = client;
12588 + spin_unlock_irqrestore(&q->owner_lock, flags);
12589 queue_access_unlock(q);
12590 queuefree(q);
12591
12592 @@ -564,15 +570,17 @@ void snd_seq_queue_client_termination(int client)
12593 unsigned long flags;
12594 int i;
12595 struct snd_seq_queue *q;
12596 + bool matched;
12597
12598 for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) {
12599 if ((q = queueptr(i)) == NULL)
12600 continue;
12601 spin_lock_irqsave(&q->owner_lock, flags);
12602 - if (q->owner == client)
12603 + matched = (q->owner == client);
12604 + if (matched)
12605 q->klocked = 1;
12606 spin_unlock_irqrestore(&q->owner_lock, flags);
12607 - if (q->owner == client) {
12608 + if (matched) {
12609 if (q->timer->running)
12610 snd_seq_timer_stop(q->timer);
12611 snd_seq_timer_reset(q->timer);
12612 @@ -764,6 +772,8 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry,
12613 int i, bpm;
12614 struct snd_seq_queue *q;
12615 struct snd_seq_timer *tmr;
12616 + bool locked;
12617 + int owner;
12618
12619 for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) {
12620 if ((q = queueptr(i)) == NULL)
12621 @@ -775,9 +785,14 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry,
12622 else
12623 bpm = 0;
12624
12625 + spin_lock_irq(&q->owner_lock);
12626 + locked = q->locked;
12627 + owner = q->owner;
12628 + spin_unlock_irq(&q->owner_lock);
12629 +
12630 snd_iprintf(buffer, "queue %d: [%s]\n", q->queue, q->name);
12631 - snd_iprintf(buffer, "owned by client : %d\n", q->owner);
12632 - snd_iprintf(buffer, "lock status : %s\n", q->locked ? "Locked" : "Free");
12633 + snd_iprintf(buffer, "owned by client : %d\n", owner);
12634 + snd_iprintf(buffer, "lock status : %s\n", locked ? "Locked" : "Free");
12635 snd_iprintf(buffer, "queued time events : %d\n", snd_seq_prioq_avail(q->timeq));
12636 snd_iprintf(buffer, "queued tick events : %d\n", snd_seq_prioq_avail(q->tickq));
12637 snd_iprintf(buffer, "timer state : %s\n", tmr->running ? "Running" : "Stopped");
12638 diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c
12639 index 0e1feb597586..bd5e5a5d52a8 100644
12640 --- a/sound/core/seq/seq_timer.c
12641 +++ b/sound/core/seq/seq_timer.c
12642 @@ -436,14 +436,15 @@ int snd_seq_timer_continue(struct snd_seq_timer *tmr)
12643 }
12644
12645 /* return current 'real' time. use timeofday() to get better granularity. */
12646 -snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr)
12647 +snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr,
12648 + bool adjust_ktime)
12649 {
12650 snd_seq_real_time_t cur_time;
12651 unsigned long flags;
12652
12653 spin_lock_irqsave(&tmr->lock, flags);
12654 cur_time = tmr->cur_time;
12655 - if (tmr->running) {
12656 + if (adjust_ktime && tmr->running) {
12657 struct timespec64 tm;
12658
12659 ktime_get_ts64(&tm);
12660 @@ -460,7 +461,13 @@ snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr)
12661 high PPQ values) */
12662 snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr)
12663 {
12664 - return tmr->tick.cur_tick;
12665 + snd_seq_tick_time_t cur_tick;
12666 + unsigned long flags;
12667 +
12668 + spin_lock_irqsave(&tmr->lock, flags);
12669 + cur_tick = tmr->tick.cur_tick;
12670 + spin_unlock_irqrestore(&tmr->lock, flags);
12671 + return cur_tick;
12672 }
12673
12674
12675 diff --git a/sound/core/seq/seq_timer.h b/sound/core/seq/seq_timer.h
12676 index 9506b661fe5b..5d47d559465e 100644
12677 --- a/sound/core/seq/seq_timer.h
12678 +++ b/sound/core/seq/seq_timer.h
12679 @@ -135,7 +135,8 @@ int snd_seq_timer_set_ppq(struct snd_seq_timer *tmr, int ppq);
12680 int snd_seq_timer_set_position_tick(struct snd_seq_timer *tmr, snd_seq_tick_time_t position);
12681 int snd_seq_timer_set_position_time(struct snd_seq_timer *tmr, snd_seq_real_time_t position);
12682 int snd_seq_timer_set_skew(struct snd_seq_timer *tmr, unsigned int skew, unsigned int base);
12683 -snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr);
12684 +snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr,
12685 + bool adjust_ktime);
12686 snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr);
12687
12688 extern int seq_default_timer_class;
12689 diff --git a/sound/hda/hdmi_chmap.c b/sound/hda/hdmi_chmap.c
12690 index f21633cd9b38..acbe61b8db7b 100644
12691 --- a/sound/hda/hdmi_chmap.c
12692 +++ b/sound/hda/hdmi_chmap.c
12693 @@ -249,7 +249,7 @@ void snd_hdac_print_channel_allocation(int spk_alloc, char *buf, int buflen)
12694
12695 for (i = 0, j = 0; i < ARRAY_SIZE(cea_speaker_allocation_names); i++) {
12696 if (spk_alloc & (1 << i))
12697 - j += snprintf(buf + j, buflen - j, " %s",
12698 + j += scnprintf(buf + j, buflen - j, " %s",
12699 cea_speaker_allocation_names[i]);
12700 }
12701 buf[j] = '\0'; /* necessary when j == 0 */
12702 diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
12703 index 1b5e217d1bb2..2ad28ce7ff49 100644
12704 --- a/sound/pci/hda/hda_codec.c
12705 +++ b/sound/pci/hda/hda_codec.c
12706 @@ -4104,7 +4104,7 @@ void snd_print_pcm_bits(int pcm, char *buf, int buflen)
12707
12708 for (i = 0, j = 0; i < ARRAY_SIZE(bits); i++)
12709 if (pcm & (AC_SUPPCM_BITS_8 << i))
12710 - j += snprintf(buf + j, buflen - j, " %d", bits[i]);
12711 + j += scnprintf(buf + j, buflen - j, " %d", bits[i]);
12712
12713 buf[j] = '\0'; /* necessary when j == 0 */
12714 }
12715 diff --git a/sound/pci/hda/hda_eld.c b/sound/pci/hda/hda_eld.c
12716 index ba7fe9b6655c..864cc8c9ada0 100644
12717 --- a/sound/pci/hda/hda_eld.c
12718 +++ b/sound/pci/hda/hda_eld.c
12719 @@ -373,7 +373,7 @@ static void hdmi_print_pcm_rates(int pcm, char *buf, int buflen)
12720
12721 for (i = 0, j = 0; i < ARRAY_SIZE(alsa_rates); i++)
12722 if (pcm & (1 << i))
12723 - j += snprintf(buf + j, buflen - j, " %d",
12724 + j += scnprintf(buf + j, buflen - j, " %d",
12725 alsa_rates[i]);
12726
12727 buf[j] = '\0'; /* necessary when j == 0 */
12728 diff --git a/sound/pci/hda/hda_sysfs.c b/sound/pci/hda/hda_sysfs.c
12729 index 9739fce9e032..f3ac19d33bd4 100644
12730 --- a/sound/pci/hda/hda_sysfs.c
12731 +++ b/sound/pci/hda/hda_sysfs.c
12732 @@ -221,7 +221,7 @@ static ssize_t init_verbs_show(struct device *dev,
12733 mutex_lock(&codec->user_mutex);
12734 for (i = 0; i < codec->init_verbs.used; i++) {
12735 struct hda_verb *v = snd_array_elem(&codec->init_verbs, i);
12736 - len += snprintf(buf + len, PAGE_SIZE - len,
12737 + len += scnprintf(buf + len, PAGE_SIZE - len,
12738 "0x%02x 0x%03x 0x%04x\n",
12739 v->nid, v->verb, v->param);
12740 }
12741 @@ -271,7 +271,7 @@ static ssize_t hints_show(struct device *dev,
12742 mutex_lock(&codec->user_mutex);
12743 for (i = 0; i < codec->hints.used; i++) {
12744 struct hda_hint *hint = snd_array_elem(&codec->hints, i);
12745 - len += snprintf(buf + len, PAGE_SIZE - len,
12746 + len += scnprintf(buf + len, PAGE_SIZE - len,
12747 "%s = %s\n", hint->key, hint->val);
12748 }
12749 mutex_unlock(&codec->user_mutex);
12750 diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
12751 index 8557b94e462c..1e99500dbb6c 100644
12752 --- a/sound/pci/hda/patch_conexant.c
12753 +++ b/sound/pci/hda/patch_conexant.c
12754 @@ -853,6 +853,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
12755 SND_PCI_QUIRK(0x17aa, 0x215f, "Lenovo T510", CXT_PINCFG_LENOVO_TP410),
12756 SND_PCI_QUIRK(0x17aa, 0x21ce, "Lenovo T420", CXT_PINCFG_LENOVO_TP410),
12757 SND_PCI_QUIRK(0x17aa, 0x21cf, "Lenovo T520", CXT_PINCFG_LENOVO_TP410),
12758 + SND_PCI_QUIRK(0x17aa, 0x21d2, "Lenovo T420s", CXT_PINCFG_LENOVO_TP410),
12759 SND_PCI_QUIRK(0x17aa, 0x21da, "Lenovo X220", CXT_PINCFG_LENOVO_TP410),
12760 SND_PCI_QUIRK(0x17aa, 0x21db, "Lenovo X220-tablet", CXT_PINCFG_LENOVO_TP410),
12761 SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo IdeaPad Z560", CXT_FIXUP_MUTE_LED_EAPD),
12762 diff --git a/sound/sh/aica.c b/sound/sh/aica.c
12763 index fbbc25279559..2a127feb8e29 100644
12764 --- a/sound/sh/aica.c
12765 +++ b/sound/sh/aica.c
12766 @@ -117,10 +117,10 @@ static void spu_memset(u32 toi, u32 what, int length)
12767 }
12768
12769 /* spu_memload - write to SPU address space */
12770 -static void spu_memload(u32 toi, void *from, int length)
12771 +static void spu_memload(u32 toi, const void *from, int length)
12772 {
12773 unsigned long flags;
12774 - u32 *froml = from;
12775 + const u32 *froml = from;
12776 u32 __iomem *to = (u32 __iomem *) (SPU_MEMORY_BASE + toi);
12777 int i;
12778 u32 val;
12779 diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig
12780 index 22aec9a1e9a4..838d03a138ca 100644
12781 --- a/sound/soc/atmel/Kconfig
12782 +++ b/sound/soc/atmel/Kconfig
12783 @@ -25,6 +25,8 @@ config SND_ATMEL_SOC_DMA
12784
12785 config SND_ATMEL_SOC_SSC_DMA
12786 tristate
12787 + select SND_ATMEL_SOC_DMA
12788 + select SND_ATMEL_SOC_PDC
12789
12790 config SND_ATMEL_SOC_SSC
12791 tristate
12792 diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
12793 index a5299cbb09ba..064f3485a977 100644
12794 --- a/sound/usb/quirks.c
12795 +++ b/sound/usb/quirks.c
12796 @@ -1149,6 +1149,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
12797 case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */
12798 case USB_ID(0x1de7, 0x0114): /* Phoenix Audio MT202pcs */
12799 case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */
12800 + case USB_ID(0x2912, 0x30c8): /* Audioengine D1 */
12801 return true;
12802 }
12803 return false;
12804 diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c
12805 index 0b34dbc8f302..7dcb33d3886b 100644
12806 --- a/sound/usb/usx2y/usX2Yhwdep.c
12807 +++ b/sound/usb/usx2y/usX2Yhwdep.c
12808 @@ -132,7 +132,7 @@ static int snd_usX2Y_hwdep_dsp_status(struct snd_hwdep *hw,
12809 info->num_dsps = 2; // 0: Prepad Data, 1: FPGA Code
12810 if (us428->chip_status & USX2Y_STAT_CHIP_INIT)
12811 info->chip_ready = 1;
12812 - info->version = USX2Y_DRIVER_VERSION;
12813 + info->version = USX2Y_DRIVER_VERSION;
12814 return 0;
12815 }
12816
12817 diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
12818 index f99f49e4a31e..21e714cf0126 100644
12819 --- a/tools/lib/api/fs/fs.c
12820 +++ b/tools/lib/api/fs/fs.c
12821 @@ -194,6 +194,7 @@ static bool fs__env_override(struct fs *fs)
12822 size_t name_len = strlen(fs->name);
12823 /* name + "_PATH" + '\0' */
12824 char upper_name[name_len + 5 + 1];
12825 +
12826 memcpy(upper_name, fs->name, name_len);
12827 mem_toupper(upper_name, name_len);
12828 strcpy(&upper_name[name_len], "_PATH");
12829 @@ -203,7 +204,8 @@ static bool fs__env_override(struct fs *fs)
12830 return false;
12831
12832 fs->found = true;
12833 - strncpy(fs->path, override_path, sizeof(fs->path));
12834 + strncpy(fs->path, override_path, sizeof(fs->path) - 1);
12835 + fs->path[sizeof(fs->path) - 1] = '\0';
12836 return true;
12837 }
12838
12839 diff --git a/tools/objtool/arch/x86/lib/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
12840 index 0f7eb4f5bdb7..82e105b284e0 100644
12841 --- a/tools/objtool/arch/x86/lib/x86-opcode-map.txt
12842 +++ b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
12843 @@ -909,7 +909,7 @@ EndTable
12844
12845 GrpTable: Grp3_2
12846 0: TEST Ev,Iz
12847 -1:
12848 +1: TEST Ev,Iz
12849 2: NOT Ev
12850 3: NEG Ev
12851 4: MUL rAX,Ev
12852 diff --git a/tools/usb/usbip/src/usbip_network.c b/tools/usb/usbip/src/usbip_network.c
12853 index b4c37e76a6e0..187dfaa67d0a 100644
12854 --- a/tools/usb/usbip/src/usbip_network.c
12855 +++ b/tools/usb/usbip/src/usbip_network.c
12856 @@ -62,39 +62,39 @@ void usbip_setup_port_number(char *arg)
12857 info("using port %d (\"%s\")", usbip_port, usbip_port_string);
12858 }
12859
12860 -void usbip_net_pack_uint32_t(int pack, uint32_t *num)
12861 +uint32_t usbip_net_pack_uint32_t(int pack, uint32_t num)
12862 {
12863 uint32_t i;
12864
12865 if (pack)
12866 - i = htonl(*num);
12867 + i = htonl(num);
12868 else
12869 - i = ntohl(*num);
12870 + i = ntohl(num);
12871
12872 - *num = i;
12873 + return i;
12874 }
12875
12876 -void usbip_net_pack_uint16_t(int pack, uint16_t *num)
12877 +uint16_t usbip_net_pack_uint16_t(int pack, uint16_t num)
12878 {
12879 uint16_t i;
12880
12881 if (pack)
12882 - i = htons(*num);
12883 + i = htons(num);
12884 else
12885 - i = ntohs(*num);
12886 + i = ntohs(num);
12887
12888 - *num = i;
12889 + return i;
12890 }
12891
12892 void usbip_net_pack_usb_device(int pack, struct usbip_usb_device *udev)
12893 {
12894 - usbip_net_pack_uint32_t(pack, &udev->busnum);
12895 - usbip_net_pack_uint32_t(pack, &udev->devnum);
12896 - usbip_net_pack_uint32_t(pack, &udev->speed);
12897 + udev->busnum = usbip_net_pack_uint32_t(pack, udev->busnum);
12898 + udev->devnum = usbip_net_pack_uint32_t(pack, udev->devnum);
12899 + udev->speed = usbip_net_pack_uint32_t(pack, udev->speed);
12900
12901 - usbip_net_pack_uint16_t(pack, &udev->idVendor);
12902 - usbip_net_pack_uint16_t(pack, &udev->idProduct);
12903 - usbip_net_pack_uint16_t(pack, &udev->bcdDevice);
12904 + udev->idVendor = usbip_net_pack_uint16_t(pack, udev->idVendor);
12905 + udev->idProduct = usbip_net_pack_uint16_t(pack, udev->idProduct);
12906 + udev->bcdDevice = usbip_net_pack_uint16_t(pack, udev->bcdDevice);
12907 }
12908
12909 void usbip_net_pack_usb_interface(int pack __attribute__((unused)),
12910 @@ -141,6 +141,14 @@ ssize_t usbip_net_send(int sockfd, void *buff, size_t bufflen)
12911 return usbip_net_xmit(sockfd, buff, bufflen, 1);
12912 }
12913
12914 +static inline void usbip_net_pack_op_common(int pack,
12915 + struct op_common *op_common)
12916 +{
12917 + op_common->version = usbip_net_pack_uint16_t(pack, op_common->version);
12918 + op_common->code = usbip_net_pack_uint16_t(pack, op_common->code);
12919 + op_common->status = usbip_net_pack_uint32_t(pack, op_common->status);
12920 +}
12921 +
12922 int usbip_net_send_op_common(int sockfd, uint32_t code, uint32_t status)
12923 {
12924 struct op_common op_common;
12925 @@ -152,7 +160,7 @@ int usbip_net_send_op_common(int sockfd, uint32_t code, uint32_t status)
12926 op_common.code = code;
12927 op_common.status = status;
12928
12929 - PACK_OP_COMMON(1, &op_common);
12930 + usbip_net_pack_op_common(1, &op_common);
12931
12932 rc = usbip_net_send(sockfd, &op_common, sizeof(op_common));
12933 if (rc < 0) {
12934 @@ -176,7 +184,7 @@ int usbip_net_recv_op_common(int sockfd, uint16_t *code)
12935 goto err;
12936 }
12937
12938 - PACK_OP_COMMON(0, &op_common);
12939 + usbip_net_pack_op_common(0, &op_common);
12940
12941 if (op_common.version != USBIP_VERSION) {
12942 dbg("version mismatch: %d %d", op_common.version,
12943 diff --git a/tools/usb/usbip/src/usbip_network.h b/tools/usb/usbip/src/usbip_network.h
12944 index c1e875cf1078..573fa839b66b 100644
12945 --- a/tools/usb/usbip/src/usbip_network.h
12946 +++ b/tools/usb/usbip/src/usbip_network.h
12947 @@ -33,12 +33,6 @@ struct op_common {
12948
12949 } __attribute__((packed));
12950
12951 -#define PACK_OP_COMMON(pack, op_common) do {\
12952 - usbip_net_pack_uint16_t(pack, &(op_common)->version);\
12953 - usbip_net_pack_uint16_t(pack, &(op_common)->code);\
12954 - usbip_net_pack_uint32_t(pack, &(op_common)->status);\
12955 -} while (0)
12956 -
12957 /* ---------------------------------------------------------------------- */
12958 /* Dummy Code */
12959 #define OP_UNSPEC 0x00
12960 @@ -164,11 +158,11 @@ struct op_devlist_reply_extra {
12961 } while (0)
12962
12963 #define PACK_OP_DEVLIST_REPLY(pack, reply) do {\
12964 - usbip_net_pack_uint32_t(pack, &(reply)->ndev);\
12965 + (reply)->ndev = usbip_net_pack_uint32_t(pack, (reply)->ndev);\
12966 } while (0)
12967
12968 -void usbip_net_pack_uint32_t(int pack, uint32_t *num);
12969 -void usbip_net_pack_uint16_t(int pack, uint16_t *num);
12970 +uint32_t usbip_net_pack_uint32_t(int pack, uint32_t num);
12971 +uint16_t usbip_net_pack_uint16_t(int pack, uint16_t num);
12972 void usbip_net_pack_usb_device(int pack, struct usbip_usb_device *udev);
12973 void usbip_net_pack_usb_interface(int pack, struct usbip_usb_interface *uinf);
12974