Magellan Linux

Annotation of /trunk/kernel-alx-legacy/patches-4.9/0314-4.9.215-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3608 - (hide annotations) (download)
Fri Aug 14 07:34:29 2020 UTC (3 years, 10 months ago) by niro
File size: 401597 byte(s)
-added kerenl-alx-legacy pkg
1 niro 3608 diff --git a/Makefile b/Makefile
2     index 9a6aa41a9ec1..b594484788a8 100644
3     --- a/Makefile
4     +++ b/Makefile
5     @@ -1,6 +1,6 @@
6     VERSION = 4
7     PATCHLEVEL = 9
8     -SUBLEVEL = 214
9     +SUBLEVEL = 215
10     EXTRAVERSION =
11     NAME = Roaring Lionus
12    
13     diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
14     index 74a70f91b01a..56bd9beb6a35 100644
15     --- a/arch/arm/Kconfig
16     +++ b/arch/arm/Kconfig
17     @@ -2020,7 +2020,7 @@ config XIP_PHYS_ADDR
18     config KEXEC
19     bool "Kexec system call (EXPERIMENTAL)"
20     depends on (!SMP || PM_SLEEP_SMP)
21     - depends on !CPU_V7M
22     + depends on MMU
23     select KEXEC_CORE
24     help
25     kexec is a system call that implements the ability to shutdown your
26     diff --git a/arch/arm/boot/dts/r8a7779.dtsi b/arch/arm/boot/dts/r8a7779.dtsi
27     index b9bbcce69dfb..6c6d4893e92d 100644
28     --- a/arch/arm/boot/dts/r8a7779.dtsi
29     +++ b/arch/arm/boot/dts/r8a7779.dtsi
30     @@ -67,6 +67,14 @@
31     <0xf0000100 0x100>;
32     };
33    
34     + timer@f0000200 {
35     + compatible = "arm,cortex-a9-global-timer";
36     + reg = <0xf0000200 0x100>;
37     + interrupts = <GIC_PPI 11
38     + (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>;
39     + clocks = <&cpg_clocks R8A7779_CLK_ZS>;
40     + };
41     +
42     timer@f0000600 {
43     compatible = "arm,cortex-a9-twd-timer";
44     reg = <0xf0000600 0x20>;
45     diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
46     index 7e842dcae450..3626655175a2 100644
47     --- a/arch/arm64/include/asm/alternative.h
48     +++ b/arch/arm64/include/asm/alternative.h
49     @@ -29,13 +29,16 @@ typedef void (*alternative_cb_t)(struct alt_instr *alt,
50     void __init apply_alternatives_all(void);
51     void apply_alternatives(void *start, size_t length);
52    
53     -#define ALTINSTR_ENTRY(feature,cb) \
54     +#define ALTINSTR_ENTRY(feature) \
55     " .word 661b - .\n" /* label */ \
56     - " .if " __stringify(cb) " == 0\n" \
57     " .word 663f - .\n" /* new instruction */ \
58     - " .else\n" \
59     + " .hword " __stringify(feature) "\n" /* feature bit */ \
60     + " .byte 662b-661b\n" /* source len */ \
61     + " .byte 664f-663f\n" /* replacement len */
62     +
63     +#define ALTINSTR_ENTRY_CB(feature, cb) \
64     + " .word 661b - .\n" /* label */ \
65     " .word " __stringify(cb) "- .\n" /* callback */ \
66     - " .endif\n" \
67     " .hword " __stringify(feature) "\n" /* feature bit */ \
68     " .byte 662b-661b\n" /* source len */ \
69     " .byte 664f-663f\n" /* replacement len */
70     @@ -56,15 +59,14 @@ void apply_alternatives(void *start, size_t length);
71     *
72     * Alternatives with callbacks do not generate replacement instructions.
73     */
74     -#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \
75     +#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \
76     ".if "__stringify(cfg_enabled)" == 1\n" \
77     "661:\n\t" \
78     oldinstr "\n" \
79     "662:\n" \
80     ".pushsection .altinstructions,\"a\"\n" \
81     - ALTINSTR_ENTRY(feature,cb) \
82     + ALTINSTR_ENTRY(feature) \
83     ".popsection\n" \
84     - " .if " __stringify(cb) " == 0\n" \
85     ".pushsection .altinstr_replacement, \"a\"\n" \
86     "663:\n\t" \
87     newinstr "\n" \
88     @@ -72,17 +74,25 @@ void apply_alternatives(void *start, size_t length);
89     ".popsection\n\t" \
90     ".org . - (664b-663b) + (662b-661b)\n\t" \
91     ".org . - (662b-661b) + (664b-663b)\n" \
92     - ".else\n\t" \
93     + ".endif\n"
94     +
95     +#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \
96     + ".if "__stringify(cfg_enabled)" == 1\n" \
97     + "661:\n\t" \
98     + oldinstr "\n" \
99     + "662:\n" \
100     + ".pushsection .altinstructions,\"a\"\n" \
101     + ALTINSTR_ENTRY_CB(feature, cb) \
102     + ".popsection\n" \
103     "663:\n\t" \
104     "664:\n\t" \
105     - ".endif\n" \
106     ".endif\n"
107    
108     #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \
109     - __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0)
110     + __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
111    
112     #define ALTERNATIVE_CB(oldinstr, cb) \
113     - __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb)
114     + __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb)
115     #else
116    
117     #include <asm/assembler.h>
118     diff --git a/arch/microblaze/kernel/cpu/cache.c b/arch/microblaze/kernel/cpu/cache.c
119     index 0bde47e4fa69..dcba53803fa5 100644
120     --- a/arch/microblaze/kernel/cpu/cache.c
121     +++ b/arch/microblaze/kernel/cpu/cache.c
122     @@ -92,7 +92,8 @@ static inline void __disable_dcache_nomsr(void)
123     #define CACHE_LOOP_LIMITS(start, end, cache_line_length, cache_size) \
124     do { \
125     int align = ~(cache_line_length - 1); \
126     - end = min(start + cache_size, end); \
127     + if (start < UINT_MAX - cache_size) \
128     + end = min(start + cache_size, end); \
129     start &= align; \
130     } while (0)
131    
132     diff --git a/arch/mips/loongson64/loongson-3/platform.c b/arch/mips/loongson64/loongson-3/platform.c
133     index 25a97cc0ee33..0db4cc3196eb 100644
134     --- a/arch/mips/loongson64/loongson-3/platform.c
135     +++ b/arch/mips/loongson64/loongson-3/platform.c
136     @@ -31,6 +31,9 @@ static int __init loongson3_platform_init(void)
137     continue;
138    
139     pdev = kzalloc(sizeof(struct platform_device), GFP_KERNEL);
140     + if (!pdev)
141     + return -ENOMEM;
142     +
143     pdev->name = loongson_sysconf.sensors[i].name;
144     pdev->id = loongson_sysconf.sensors[i].id;
145     pdev->dev.platform_data = &loongson_sysconf.sensors[i];
146     diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
147     index 620e08d4eb6e..adac3dee4c57 100644
148     --- a/arch/powerpc/kernel/eeh_driver.c
149     +++ b/arch/powerpc/kernel/eeh_driver.c
150     @@ -520,12 +520,6 @@ static void *eeh_rmv_device(void *data, void *userdata)
151    
152     pci_iov_remove_virtfn(edev->physfn, pdn->vf_index, 0);
153     edev->pdev = NULL;
154     -
155     - /*
156     - * We have to set the VF PE number to invalid one, which is
157     - * required to plug the VF successfully.
158     - */
159     - pdn->pe_number = IODA_INVALID_PE;
160     #endif
161     if (rmv_data)
162     list_add(&edev->rmv_list, &rmv_data->edev_list);
163     diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
164     index 592693437070..c8f1b78fbd0e 100644
165     --- a/arch/powerpc/kernel/pci_dn.c
166     +++ b/arch/powerpc/kernel/pci_dn.c
167     @@ -271,9 +271,22 @@ void remove_dev_pci_data(struct pci_dev *pdev)
168     continue;
169    
170     #ifdef CONFIG_EEH
171     - /* Release EEH device for the VF */
172     + /*
173     + * Release EEH state for this VF. The PCI core
174     + * has already torn down the pci_dev for this VF, but
175     + * we're responsible to removing the eeh_dev since it
176     + * has the same lifetime as the pci_dn that spawned it.
177     + */
178     edev = pdn_to_eeh_dev(pdn);
179     if (edev) {
180     + /*
181     + * We allocate pci_dn's for the totalvfs count,
182     + * but only only the vfs that were activated
183     + * have a configured PE.
184     + */
185     + if (edev->pe)
186     + eeh_rmv_from_parent_pe(edev);
187     +
188     pdn->edev = NULL;
189     kfree(edev);
190     }
191     diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
192     index 3ec673b4ca6c..b787a669a1e2 100644
193     --- a/arch/powerpc/platforms/powernv/pci-ioda.c
194     +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
195     @@ -1524,6 +1524,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
196    
197     /* Reserve PE for each VF */
198     for (vf_index = 0; vf_index < num_vfs; vf_index++) {
199     + int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index);
200     + int vf_bus = pci_iov_virtfn_bus(pdev, vf_index);
201     + struct pci_dn *vf_pdn;
202     +
203     if (pdn->m64_single_mode)
204     pe_num = pdn->pe_num_map[vf_index];
205     else
206     @@ -1536,13 +1540,11 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
207     pe->pbus = NULL;
208     pe->parent_dev = pdev;
209     pe->mve_number = -1;
210     - pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) |
211     - pci_iov_virtfn_devfn(pdev, vf_index);
212     + pe->rid = (vf_bus << 8) | vf_devfn;
213    
214     pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%d\n",
215     hose->global_number, pdev->bus->number,
216     - PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)),
217     - PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num);
218     + PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num);
219    
220     if (pnv_ioda_configure_pe(phb, pe)) {
221     /* XXX What do we do here ? */
222     @@ -1556,6 +1558,15 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
223     list_add_tail(&pe->list, &phb->ioda.pe_list);
224     mutex_unlock(&phb->ioda.pe_list_mutex);
225    
226     + /* associate this pe to it's pdn */
227     + list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) {
228     + if (vf_pdn->busno == vf_bus &&
229     + vf_pdn->devfn == vf_devfn) {
230     + vf_pdn->pe_number = pe_num;
231     + break;
232     + }
233     + }
234     +
235     pnv_pci_ioda2_setup_dma_pe(phb, pe);
236     }
237     }
238     diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
239     index 00dbf1e895a9..2ed7627e991e 100644
240     --- a/arch/powerpc/platforms/powernv/pci.c
241     +++ b/arch/powerpc/platforms/powernv/pci.c
242     @@ -856,16 +856,12 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
243     struct pnv_phb *phb = hose->private_data;
244     #ifdef CONFIG_PCI_IOV
245     struct pnv_ioda_pe *pe;
246     - struct pci_dn *pdn;
247    
248     /* Fix the VF pdn PE number */
249     if (pdev->is_virtfn) {
250     - pdn = pci_get_pdn(pdev);
251     - WARN_ON(pdn->pe_number != IODA_INVALID_PE);
252     list_for_each_entry(pe, &phb->ioda.pe_list, list) {
253     if (pe->rid == ((pdev->bus->number << 8) |
254     (pdev->devfn & 0xff))) {
255     - pdn->pe_number = pe->pe_number;
256     pe->pdev = pdev;
257     break;
258     }
259     diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
260     index 69b8a41fca84..e094c0cf6936 100644
261     --- a/arch/s390/include/asm/page.h
262     +++ b/arch/s390/include/asm/page.h
263     @@ -35,7 +35,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end);
264    
265     static inline void storage_key_init_range(unsigned long start, unsigned long end)
266     {
267     - if (PAGE_DEFAULT_KEY)
268     + if (PAGE_DEFAULT_KEY != 0)
269     __storage_key_init_range(start, end);
270     }
271    
272     diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
273     index 0bb08f341c09..f1330245b584 100644
274     --- a/arch/s390/include/asm/timex.h
275     +++ b/arch/s390/include/asm/timex.h
276     @@ -146,7 +146,7 @@ static inline void get_tod_clock_ext(char *clk)
277    
278     static inline unsigned long long get_tod_clock(void)
279     {
280     - unsigned char clk[STORE_CLOCK_EXT_SIZE];
281     + char clk[STORE_CLOCK_EXT_SIZE];
282    
283     get_tod_clock_ext(clk);
284     return *((unsigned long long *)&clk[1]);
285     diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
286     index be75e8e49e43..802a4ded9a62 100644
287     --- a/arch/s390/kernel/mcount.S
288     +++ b/arch/s390/kernel/mcount.S
289     @@ -24,6 +24,12 @@ ENTRY(ftrace_stub)
290     #define STACK_PTREGS (STACK_FRAME_OVERHEAD)
291     #define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS)
292     #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW)
293     +#ifdef __PACK_STACK
294     +/* allocate just enough for r14, r15 and backchain */
295     +#define TRACED_FUNC_FRAME_SIZE 24
296     +#else
297     +#define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD
298     +#endif
299    
300     ENTRY(_mcount)
301     BR_EX %r14
302     @@ -37,9 +43,16 @@ ENTRY(ftrace_caller)
303     #ifndef CC_USING_HOTPATCH
304     aghi %r0,MCOUNT_RETURN_FIXUP
305     #endif
306     - aghi %r15,-STACK_FRAME_SIZE
307     + # allocate stack frame for ftrace_caller to contain traced function
308     + aghi %r15,-TRACED_FUNC_FRAME_SIZE
309     stg %r1,__SF_BACKCHAIN(%r15)
310     + stg %r0,(__SF_GPRS+8*8)(%r15)
311     + stg %r15,(__SF_GPRS+9*8)(%r15)
312     + # allocate pt_regs and stack frame for ftrace_trace_function
313     + aghi %r15,-STACK_FRAME_SIZE
314     stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15)
315     + aghi %r1,-TRACED_FUNC_FRAME_SIZE
316     + stg %r1,__SF_BACKCHAIN(%r15)
317     stg %r0,(STACK_PTREGS_PSW+8)(%r15)
318     stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15)
319     #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
320     diff --git a/arch/sh/include/cpu-sh2a/cpu/sh7269.h b/arch/sh/include/cpu-sh2a/cpu/sh7269.h
321     index 2a0ca8780f0d..e4caddd443da 100644
322     --- a/arch/sh/include/cpu-sh2a/cpu/sh7269.h
323     +++ b/arch/sh/include/cpu-sh2a/cpu/sh7269.h
324     @@ -79,8 +79,15 @@ enum {
325     GPIO_FN_WDTOVF,
326    
327     /* CAN */
328     - GPIO_FN_CTX1, GPIO_FN_CRX1, GPIO_FN_CTX0, GPIO_FN_CTX0_CTX1,
329     - GPIO_FN_CRX0, GPIO_FN_CRX0_CRX1, GPIO_FN_CRX0_CRX1_CRX2,
330     + GPIO_FN_CTX2, GPIO_FN_CRX2,
331     + GPIO_FN_CTX1, GPIO_FN_CRX1,
332     + GPIO_FN_CTX0, GPIO_FN_CRX0,
333     + GPIO_FN_CTX0_CTX1, GPIO_FN_CRX0_CRX1,
334     + GPIO_FN_CTX0_CTX1_CTX2, GPIO_FN_CRX0_CRX1_CRX2,
335     + GPIO_FN_CTX2_PJ21, GPIO_FN_CRX2_PJ20,
336     + GPIO_FN_CTX1_PJ23, GPIO_FN_CRX1_PJ22,
337     + GPIO_FN_CTX0_CTX1_PJ23, GPIO_FN_CRX0_CRX1_PJ22,
338     + GPIO_FN_CTX0_CTX1_CTX2_PJ21, GPIO_FN_CRX0_CRX1_CRX2_PJ20,
339    
340     /* DMAC */
341     GPIO_FN_TEND0, GPIO_FN_DACK0, GPIO_FN_DREQ0,
342     diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
343     index 572db686f845..385d6d04564d 100644
344     --- a/arch/sparc/kernel/vmlinux.lds.S
345     +++ b/arch/sparc/kernel/vmlinux.lds.S
346     @@ -151,12 +151,14 @@ SECTIONS
347     }
348     PERCPU_SECTION(SMP_CACHE_BYTES)
349    
350     -#ifdef CONFIG_JUMP_LABEL
351     . = ALIGN(PAGE_SIZE);
352     .exit.text : {
353     EXIT_TEXT
354     }
355     -#endif
356     +
357     + .exit.data : {
358     + EXIT_DATA
359     + }
360    
361     . = ALIGN(PAGE_SIZE);
362     __init_end = .;
363     diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
364     index 3f9d1a83891a..50c1f77cab15 100644
365     --- a/arch/x86/entry/vdso/vdso32-setup.c
366     +++ b/arch/x86/entry/vdso/vdso32-setup.c
367     @@ -10,6 +10,7 @@
368     #include <linux/smp.h>
369     #include <linux/kernel.h>
370     #include <linux/mm_types.h>
371     +#include <linux/elf.h>
372    
373     #include <asm/processor.h>
374     #include <asm/vdso.h>
375     diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
376     index 00b56cc69d37..836b7e4a2005 100644
377     --- a/arch/x86/events/amd/core.c
378     +++ b/arch/x86/events/amd/core.c
379     @@ -239,6 +239,7 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
380     [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
381     [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
382     [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
383     + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964,
384     [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
385     [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
386     [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287,
387     diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
388     index ad31c01f810f..f562ddbeb20c 100644
389     --- a/arch/x86/events/intel/ds.c
390     +++ b/arch/x86/events/intel/ds.c
391     @@ -1326,6 +1326,8 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
392     old = ((s64)(prev_raw_count << shift) >> shift);
393     local64_add(new - old + count * period, &event->count);
394    
395     + local64_set(&hwc->period_left, -new);
396     +
397     perf_event_update_userpage(event);
398    
399     return 0;
400     diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
401     index ccc4420f051b..fb457ba8ccc6 100644
402     --- a/arch/x86/include/asm/cpufeatures.h
403     +++ b/arch/x86/include/asm/cpufeatures.h
404     @@ -305,6 +305,7 @@
405     /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
406     #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
407     #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
408     +#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */
409    
410     /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
411     #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
412     diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
413     index e728699db774..3a01996db58f 100644
414     --- a/arch/x86/include/asm/vgtod.h
415     +++ b/arch/x86/include/asm/vgtod.h
416     @@ -89,8 +89,13 @@ static inline unsigned int __getcpu(void)
417     * works on all CPUs. This is volatile so that it orders
418     * correctly wrt barrier() and to keep gcc from cleverly
419     * hoisting it out of the calling function.
420     + *
421     + * If RDPID is available, use it.
422     */
423     - asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
424     + alternative_io ("lsl %[p],%[seg]",
425     + ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
426     + X86_FEATURE_RDPID,
427     + [p] "=a" (p), [seg] "r" (__PER_CPU_SEG));
428    
429     return p;
430     }
431     diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
432     index 2a473cda3977..775d5f028fe8 100644
433     --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
434     +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
435     @@ -846,9 +846,12 @@ static const struct sysfs_ops threshold_ops = {
436     .store = store,
437     };
438    
439     +static void threshold_block_release(struct kobject *kobj);
440     +
441     static struct kobj_type threshold_ktype = {
442     .sysfs_ops = &threshold_ops,
443     .default_attrs = default_attrs,
444     + .release = threshold_block_release,
445     };
446    
447     static const char *get_name(unsigned int bank, struct threshold_block *b)
448     @@ -879,8 +882,9 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
449     return buf_mcatype;
450     }
451    
452     -static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
453     - unsigned int block, u32 address)
454     +static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb,
455     + unsigned int bank, unsigned int block,
456     + u32 address)
457     {
458     struct threshold_block *b = NULL;
459     u32 low, high;
460     @@ -924,16 +928,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
461    
462     INIT_LIST_HEAD(&b->miscj);
463    
464     - if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
465     - list_add(&b->miscj,
466     - &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
467     - } else {
468     - per_cpu(threshold_banks, cpu)[bank]->blocks = b;
469     - }
470     + if (tb->blocks)
471     + list_add(&b->miscj, &tb->blocks->miscj);
472     + else
473     + tb->blocks = b;
474    
475     - err = kobject_init_and_add(&b->kobj, &threshold_ktype,
476     - per_cpu(threshold_banks, cpu)[bank]->kobj,
477     - get_name(bank, b));
478     + err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b));
479     if (err)
480     goto out_free;
481     recurse:
482     @@ -941,7 +941,7 @@ recurse:
483     if (!address)
484     return 0;
485    
486     - err = allocate_threshold_blocks(cpu, bank, block, address);
487     + err = allocate_threshold_blocks(cpu, tb, bank, block, address);
488     if (err)
489     goto out_free;
490    
491     @@ -1026,8 +1026,6 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
492     goto out_free;
493     }
494    
495     - per_cpu(threshold_banks, cpu)[bank] = b;
496     -
497     if (is_shared_bank(bank)) {
498     atomic_set(&b->cpus, 1);
499    
500     @@ -1038,9 +1036,13 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
501     }
502     }
503    
504     - err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank));
505     - if (!err)
506     - goto out;
507     + err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank));
508     + if (err)
509     + goto out_free;
510     +
511     + per_cpu(threshold_banks, cpu)[bank] = b;
512     +
513     + return 0;
514    
515     out_free:
516     kfree(b);
517     @@ -1074,8 +1076,12 @@ static int threshold_create_device(unsigned int cpu)
518     return err;
519     }
520    
521     -static void deallocate_threshold_block(unsigned int cpu,
522     - unsigned int bank)
523     +static void threshold_block_release(struct kobject *kobj)
524     +{
525     + kfree(to_block(kobj));
526     +}
527     +
528     +static void deallocate_threshold_block(unsigned int cpu, unsigned int bank)
529     {
530     struct threshold_block *pos = NULL;
531     struct threshold_block *tmp = NULL;
532     @@ -1085,13 +1091,11 @@ static void deallocate_threshold_block(unsigned int cpu,
533     return;
534    
535     list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
536     - kobject_put(&pos->kobj);
537     list_del(&pos->miscj);
538     - kfree(pos);
539     + kobject_put(&pos->kobj);
540     }
541    
542     - kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
543     - per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
544     + kobject_put(&head->blocks->kobj);
545     }
546    
547     static void __threshold_remove_blocks(struct threshold_bank *b)
548     diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c
549     index 85195d447a92..f3215346e47f 100644
550     --- a/arch/x86/kernel/sysfb_simplefb.c
551     +++ b/arch/x86/kernel/sysfb_simplefb.c
552     @@ -94,11 +94,11 @@ __init int create_simplefb(const struct screen_info *si,
553     if (si->orig_video_isVGA == VIDEO_TYPE_VLFB)
554     size <<= 16;
555     length = mode->height * mode->stride;
556     - length = PAGE_ALIGN(length);
557     if (length > size) {
558     printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n");
559     return -EINVAL;
560     }
561     + length = PAGE_ALIGN(length);
562    
563     /* setup IORESOURCE_MEM as framebuffer memory */
564     memset(&res, 0, sizeof(res));
565     diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
566     index 242ad06fbe1a..c57dab0884fe 100644
567     --- a/arch/x86/kvm/cpuid.c
568     +++ b/arch/x86/kvm/cpuid.c
569     @@ -279,13 +279,18 @@ static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
570     {
571     switch (func) {
572     case 0:
573     - entry->eax = 1; /* only one leaf currently */
574     + entry->eax = 7;
575     ++*nent;
576     break;
577     case 1:
578     entry->ecx = F(MOVBE);
579     ++*nent;
580     break;
581     + case 7:
582     + entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
583     + if (index == 0)
584     + entry->ecx = F(RDPID);
585     + ++*nent;
586     default:
587     break;
588     }
589     diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
590     index c456a9dbade8..e9c7090858d6 100644
591     --- a/arch/x86/kvm/emulate.c
592     +++ b/arch/x86/kvm/emulate.c
593     @@ -3531,6 +3531,16 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt)
594     return X86EMUL_CONTINUE;
595     }
596    
597     +static int em_rdpid(struct x86_emulate_ctxt *ctxt)
598     +{
599     + u64 tsc_aux = 0;
600     +
601     + if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux))
602     + return emulate_gp(ctxt, 0);
603     + ctxt->dst.val = tsc_aux;
604     + return X86EMUL_CONTINUE;
605     +}
606     +
607     static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
608     {
609     u64 tsc = 0;
610     @@ -4391,10 +4401,20 @@ static const struct opcode group8[] = {
611     F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
612     };
613    
614     +/*
615     + * The "memory" destination is actually always a register, since we come
616     + * from the register case of group9.
617     + */
618     +static const struct gprefix pfx_0f_c7_7 = {
619     + N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp),
620     +};
621     +
622     +
623     static const struct group_dual group9 = { {
624     N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
625     }, {
626     - N, N, N, N, N, N, N, N,
627     + N, N, N, N, N, N, N,
628     + GP(0, &pfx_0f_c7_7),
629     } };
630    
631     static const struct opcode group11[] = {
632     diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
633     index 6c0191615f23..cf8b3c17657a 100644
634     --- a/arch/x86/kvm/irq_comm.c
635     +++ b/arch/x86/kvm/irq_comm.c
636     @@ -436,7 +436,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
637    
638     kvm_set_msi_irq(vcpu->kvm, entry, &irq);
639    
640     - if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0,
641     + if (irq.trig_mode && kvm_apic_match_dest(vcpu, NULL, 0,
642     irq.dest_id, irq.dest_mode))
643     __set_bit(irq.vector, ioapic_handled_vectors);
644     }
645     diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
646     index caa17f8d4221..3988e26af3b5 100644
647     --- a/arch/x86/kvm/lapic.c
648     +++ b/arch/x86/kvm/lapic.c
649     @@ -532,9 +532,11 @@ static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
650     static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
651     {
652     u8 val;
653     - if (pv_eoi_get_user(vcpu, &val) < 0)
654     + if (pv_eoi_get_user(vcpu, &val) < 0) {
655     apic_debug("Can't read EOI MSR value: 0x%llx\n",
656     (unsigned long long)vcpu->arch.pv_eoi.msr_val);
657     + return false;
658     + }
659     return val & 0x1;
660     }
661    
662     diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
663     index 67cdb08a736f..8bd336651de5 100644
664     --- a/arch/x86/kvm/vmx.c
665     +++ b/arch/x86/kvm/vmx.c
666     @@ -4641,6 +4641,26 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
667     (ss.selector & SEGMENT_RPL_MASK));
668     }
669    
670     +static bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu,
671     + unsigned int port, int size);
672     +static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
673     + struct vmcs12 *vmcs12)
674     +{
675     + unsigned long exit_qualification;
676     + unsigned short port;
677     + int size;
678     +
679     + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
680     + return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
681     +
682     + exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
683     +
684     + port = exit_qualification >> 16;
685     + size = (exit_qualification & 7) + 1;
686     +
687     + return nested_vmx_check_io_bitmaps(vcpu, port, size);
688     +}
689     +
690     /*
691     * Check if guest state is valid. Returns true if valid, false if
692     * not.
693     @@ -8026,23 +8046,17 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
694     static const int kvm_vmx_max_exit_handlers =
695     ARRAY_SIZE(kvm_vmx_exit_handlers);
696    
697     -static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
698     - struct vmcs12 *vmcs12)
699     +/*
700     + * Return true if an IO instruction with the specified port and size should cause
701     + * a VM-exit into L1.
702     + */
703     +bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
704     + int size)
705     {
706     - unsigned long exit_qualification;
707     + struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
708     gpa_t bitmap, last_bitmap;
709     - unsigned int port;
710     - int size;
711     u8 b;
712    
713     - if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
714     - return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
715     -
716     - exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
717     -
718     - port = exit_qualification >> 16;
719     - size = (exit_qualification & 7) + 1;
720     -
721     last_bitmap = (gpa_t)-1;
722     b = -1;
723    
724     @@ -11335,11 +11349,71 @@ static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
725     to_vmx(vcpu)->nested.sync_shadow_vmcs = true;
726     }
727    
728     +static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
729     + struct x86_instruction_info *info)
730     +{
731     + struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
732     + unsigned short port;
733     + bool intercept;
734     + int size;
735     +
736     + if (info->intercept == x86_intercept_in ||
737     + info->intercept == x86_intercept_ins) {
738     + port = info->src_val;
739     + size = info->dst_bytes;
740     + } else {
741     + port = info->dst_val;
742     + size = info->src_bytes;
743     + }
744     +
745     + /*
746     + * If the 'use IO bitmaps' VM-execution control is 0, IO instruction
747     + * VM-exits depend on the 'unconditional IO exiting' VM-execution
748     + * control.
749     + *
750     + * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps.
751     + */
752     + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
753     + intercept = nested_cpu_has(vmcs12,
754     + CPU_BASED_UNCOND_IO_EXITING);
755     + else
756     + intercept = nested_vmx_check_io_bitmaps(vcpu, port, size);
757     +
758     + return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
759     +}
760     +
761     static int vmx_check_intercept(struct kvm_vcpu *vcpu,
762     struct x86_instruction_info *info,
763     enum x86_intercept_stage stage)
764     {
765     - return X86EMUL_CONTINUE;
766     + struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
767     + struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
768     +
769     + switch (info->intercept) {
770     + /*
771     + * RDPID causes #UD if disabled through secondary execution controls.
772     + * Because it is marked as EmulateOnUD, we need to intercept it here.
773     + */
774     + case x86_intercept_rdtscp:
775     + if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
776     + ctxt->exception.vector = UD_VECTOR;
777     + ctxt->exception.error_code_valid = false;
778     + return X86EMUL_PROPAGATE_FAULT;
779     + }
780     + break;
781     +
782     + case x86_intercept_in:
783     + case x86_intercept_ins:
784     + case x86_intercept_out:
785     + case x86_intercept_outs:
786     + return vmx_check_intercept_io(vcpu, info);
787     +
788     + /* TODO: check more intercepts... */
789     + default:
790     + break;
791     + }
792     +
793     + return X86EMUL_UNHANDLEABLE;
794     }
795    
796     #ifdef CONFIG_X86_64
797     diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
798     deleted file mode 100644
799     index 3791ce8d269e..000000000000
800     --- a/arch/x86/kvm/vmx/vmx.c
801     +++ /dev/null
802     @@ -1,8033 +0,0 @@
803     -// SPDX-License-Identifier: GPL-2.0-only
804     -/*
805     - * Kernel-based Virtual Machine driver for Linux
806     - *
807     - * This module enables machines with Intel VT-x extensions to run virtual
808     - * machines without emulation or binary translation.
809     - *
810     - * Copyright (C) 2006 Qumranet, Inc.
811     - * Copyright 2010 Red Hat, Inc. and/or its affiliates.
812     - *
813     - * Authors:
814     - * Avi Kivity <avi@qumranet.com>
815     - * Yaniv Kamay <yaniv@qumranet.com>
816     - */
817     -
818     -#include <linux/frame.h>
819     -#include <linux/highmem.h>
820     -#include <linux/hrtimer.h>
821     -#include <linux/kernel.h>
822     -#include <linux/kvm_host.h>
823     -#include <linux/module.h>
824     -#include <linux/moduleparam.h>
825     -#include <linux/mod_devicetable.h>
826     -#include <linux/mm.h>
827     -#include <linux/sched.h>
828     -#include <linux/sched/smt.h>
829     -#include <linux/slab.h>
830     -#include <linux/tboot.h>
831     -#include <linux/trace_events.h>
832     -
833     -#include <asm/apic.h>
834     -#include <asm/asm.h>
835     -#include <asm/cpu.h>
836     -#include <asm/debugreg.h>
837     -#include <asm/desc.h>
838     -#include <asm/fpu/internal.h>
839     -#include <asm/io.h>
840     -#include <asm/irq_remapping.h>
841     -#include <asm/kexec.h>
842     -#include <asm/perf_event.h>
843     -#include <asm/mce.h>
844     -#include <asm/mmu_context.h>
845     -#include <asm/mshyperv.h>
846     -#include <asm/spec-ctrl.h>
847     -#include <asm/virtext.h>
848     -#include <asm/vmx.h>
849     -
850     -#include "capabilities.h"
851     -#include "cpuid.h"
852     -#include "evmcs.h"
853     -#include "irq.h"
854     -#include "kvm_cache_regs.h"
855     -#include "lapic.h"
856     -#include "mmu.h"
857     -#include "nested.h"
858     -#include "ops.h"
859     -#include "pmu.h"
860     -#include "trace.h"
861     -#include "vmcs.h"
862     -#include "vmcs12.h"
863     -#include "vmx.h"
864     -#include "x86.h"
865     -
866     -MODULE_AUTHOR("Qumranet");
867     -MODULE_LICENSE("GPL");
868     -
869     -static const struct x86_cpu_id vmx_cpu_id[] = {
870     - X86_FEATURE_MATCH(X86_FEATURE_VMX),
871     - {}
872     -};
873     -MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
874     -
875     -bool __read_mostly enable_vpid = 1;
876     -module_param_named(vpid, enable_vpid, bool, 0444);
877     -
878     -static bool __read_mostly enable_vnmi = 1;
879     -module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
880     -
881     -bool __read_mostly flexpriority_enabled = 1;
882     -module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
883     -
884     -bool __read_mostly enable_ept = 1;
885     -module_param_named(ept, enable_ept, bool, S_IRUGO);
886     -
887     -bool __read_mostly enable_unrestricted_guest = 1;
888     -module_param_named(unrestricted_guest,
889     - enable_unrestricted_guest, bool, S_IRUGO);
890     -
891     -bool __read_mostly enable_ept_ad_bits = 1;
892     -module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
893     -
894     -static bool __read_mostly emulate_invalid_guest_state = true;
895     -module_param(emulate_invalid_guest_state, bool, S_IRUGO);
896     -
897     -static bool __read_mostly fasteoi = 1;
898     -module_param(fasteoi, bool, S_IRUGO);
899     -
900     -static bool __read_mostly enable_apicv = 1;
901     -module_param(enable_apicv, bool, S_IRUGO);
902     -
903     -/*
904     - * If nested=1, nested virtualization is supported, i.e., guests may use
905     - * VMX and be a hypervisor for its own guests. If nested=0, guests may not
906     - * use VMX instructions.
907     - */
908     -static bool __read_mostly nested = 1;
909     -module_param(nested, bool, S_IRUGO);
910     -
911     -bool __read_mostly enable_pml = 1;
912     -module_param_named(pml, enable_pml, bool, S_IRUGO);
913     -
914     -static bool __read_mostly dump_invalid_vmcs = 0;
915     -module_param(dump_invalid_vmcs, bool, 0644);
916     -
917     -#define MSR_BITMAP_MODE_X2APIC 1
918     -#define MSR_BITMAP_MODE_X2APIC_APICV 2
919     -
920     -#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
921     -
922     -/* Guest_tsc -> host_tsc conversion requires 64-bit division. */
923     -static int __read_mostly cpu_preemption_timer_multi;
924     -static bool __read_mostly enable_preemption_timer = 1;
925     -#ifdef CONFIG_X86_64
926     -module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
927     -#endif
928     -
929     -#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
930     -#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
931     -#define KVM_VM_CR0_ALWAYS_ON \
932     - (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \
933     - X86_CR0_WP | X86_CR0_PG | X86_CR0_PE)
934     -#define KVM_CR4_GUEST_OWNED_BITS \
935     - (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
936     - | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)
937     -
938     -#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
939     -#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
940     -#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
941     -
942     -#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
943     -
944     -#define MSR_IA32_RTIT_STATUS_MASK (~(RTIT_STATUS_FILTEREN | \
945     - RTIT_STATUS_CONTEXTEN | RTIT_STATUS_TRIGGEREN | \
946     - RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
947     - RTIT_STATUS_BYTECNT))
948     -
949     -#define MSR_IA32_RTIT_OUTPUT_BASE_MASK \
950     - (~((1UL << cpuid_query_maxphyaddr(vcpu)) - 1) | 0x7f)
951     -
952     -/*
953     - * These 2 parameters are used to config the controls for Pause-Loop Exiting:
954     - * ple_gap: upper bound on the amount of time between two successive
955     - * executions of PAUSE in a loop. Also indicate if ple enabled.
956     - * According to test, this time is usually smaller than 128 cycles.
957     - * ple_window: upper bound on the amount of time a guest is allowed to execute
958     - * in a PAUSE loop. Tests indicate that most spinlocks are held for
959     - * less than 2^12 cycles
960     - * Time is measured based on a counter that runs at the same rate as the TSC,
961     - * refer SDM volume 3b section 21.6.13 & 22.1.3.
962     - */
963     -static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP;
964     -module_param(ple_gap, uint, 0444);
965     -
966     -static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
967     -module_param(ple_window, uint, 0444);
968     -
969     -/* Default doubles per-vcpu window every exit. */
970     -static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
971     -module_param(ple_window_grow, uint, 0444);
972     -
973     -/* Default resets per-vcpu window every exit to ple_window. */
974     -static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
975     -module_param(ple_window_shrink, uint, 0444);
976     -
977     -/* Default is to compute the maximum so we can never overflow. */
978     -static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
979     -module_param(ple_window_max, uint, 0444);
980     -
981     -/* Default is SYSTEM mode, 1 for host-guest mode */
982     -int __read_mostly pt_mode = PT_MODE_SYSTEM;
983     -module_param(pt_mode, int, S_IRUGO);
984     -
985     -static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
986     -static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
987     -static DEFINE_MUTEX(vmx_l1d_flush_mutex);
988     -
989     -/* Storage for pre module init parameter parsing */
990     -static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO;
991     -
992     -static const struct {
993     - const char *option;
994     - bool for_parse;
995     -} vmentry_l1d_param[] = {
996     - [VMENTER_L1D_FLUSH_AUTO] = {"auto", true},
997     - [VMENTER_L1D_FLUSH_NEVER] = {"never", true},
998     - [VMENTER_L1D_FLUSH_COND] = {"cond", true},
999     - [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true},
1000     - [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false},
1001     - [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
1002     -};
1003     -
1004     -#define L1D_CACHE_ORDER 4
1005     -static void *vmx_l1d_flush_pages;
1006     -
1007     -static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
1008     -{
1009     - struct page *page;
1010     - unsigned int i;
1011     -
1012     - if (!boot_cpu_has_bug(X86_BUG_L1TF)) {
1013     - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
1014     - return 0;
1015     - }
1016     -
1017     - if (!enable_ept) {
1018     - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
1019     - return 0;
1020     - }
1021     -
1022     - if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
1023     - u64 msr;
1024     -
1025     - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
1026     - if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
1027     - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
1028     - return 0;
1029     - }
1030     - }
1031     -
1032     - /* If set to auto use the default l1tf mitigation method */
1033     - if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
1034     - switch (l1tf_mitigation) {
1035     - case L1TF_MITIGATION_OFF:
1036     - l1tf = VMENTER_L1D_FLUSH_NEVER;
1037     - break;
1038     - case L1TF_MITIGATION_FLUSH_NOWARN:
1039     - case L1TF_MITIGATION_FLUSH:
1040     - case L1TF_MITIGATION_FLUSH_NOSMT:
1041     - l1tf = VMENTER_L1D_FLUSH_COND;
1042     - break;
1043     - case L1TF_MITIGATION_FULL:
1044     - case L1TF_MITIGATION_FULL_FORCE:
1045     - l1tf = VMENTER_L1D_FLUSH_ALWAYS;
1046     - break;
1047     - }
1048     - } else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) {
1049     - l1tf = VMENTER_L1D_FLUSH_ALWAYS;
1050     - }
1051     -
1052     - if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
1053     - !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
1054     - /*
1055     - * This allocation for vmx_l1d_flush_pages is not tied to a VM
1056     - * lifetime and so should not be charged to a memcg.
1057     - */
1058     - page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
1059     - if (!page)
1060     - return -ENOMEM;
1061     - vmx_l1d_flush_pages = page_address(page);
1062     -
1063     - /*
1064     - * Initialize each page with a different pattern in
1065     - * order to protect against KSM in the nested
1066     - * virtualization case.
1067     - */
1068     - for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) {
1069     - memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1,
1070     - PAGE_SIZE);
1071     - }
1072     - }
1073     -
1074     - l1tf_vmx_mitigation = l1tf;
1075     -
1076     - if (l1tf != VMENTER_L1D_FLUSH_NEVER)
1077     - static_branch_enable(&vmx_l1d_should_flush);
1078     - else
1079     - static_branch_disable(&vmx_l1d_should_flush);
1080     -
1081     - if (l1tf == VMENTER_L1D_FLUSH_COND)
1082     - static_branch_enable(&vmx_l1d_flush_cond);
1083     - else
1084     - static_branch_disable(&vmx_l1d_flush_cond);
1085     - return 0;
1086     -}
1087     -
1088     -static int vmentry_l1d_flush_parse(const char *s)
1089     -{
1090     - unsigned int i;
1091     -
1092     - if (s) {
1093     - for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
1094     - if (vmentry_l1d_param[i].for_parse &&
1095     - sysfs_streq(s, vmentry_l1d_param[i].option))
1096     - return i;
1097     - }
1098     - }
1099     - return -EINVAL;
1100     -}
1101     -
1102     -static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
1103     -{
1104     - int l1tf, ret;
1105     -
1106     - l1tf = vmentry_l1d_flush_parse(s);
1107     - if (l1tf < 0)
1108     - return l1tf;
1109     -
1110     - if (!boot_cpu_has(X86_BUG_L1TF))
1111     - return 0;
1112     -
1113     - /*
1114     - * Has vmx_init() run already? If not then this is the pre init
1115     - * parameter parsing. In that case just store the value and let
1116     - * vmx_init() do the proper setup after enable_ept has been
1117     - * established.
1118     - */
1119     - if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) {
1120     - vmentry_l1d_flush_param = l1tf;
1121     - return 0;
1122     - }
1123     -
1124     - mutex_lock(&vmx_l1d_flush_mutex);
1125     - ret = vmx_setup_l1d_flush(l1tf);
1126     - mutex_unlock(&vmx_l1d_flush_mutex);
1127     - return ret;
1128     -}
1129     -
1130     -static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
1131     -{
1132     - if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
1133     - return sprintf(s, "???\n");
1134     -
1135     - return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
1136     -}
1137     -
1138     -static const struct kernel_param_ops vmentry_l1d_flush_ops = {
1139     - .set = vmentry_l1d_flush_set,
1140     - .get = vmentry_l1d_flush_get,
1141     -};
1142     -module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
1143     -
1144     -static bool guest_state_valid(struct kvm_vcpu *vcpu);
1145     -static u32 vmx_segment_access_rights(struct kvm_segment *var);
1146     -static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
1147     - u32 msr, int type);
1148     -
1149     -void vmx_vmexit(void);
1150     -
1151     -#define vmx_insn_failed(fmt...) \
1152     -do { \
1153     - WARN_ONCE(1, fmt); \
1154     - pr_warn_ratelimited(fmt); \
1155     -} while (0)
1156     -
1157     -asmlinkage void vmread_error(unsigned long field, bool fault)
1158     -{
1159     - if (fault)
1160     - kvm_spurious_fault();
1161     - else
1162     - vmx_insn_failed("kvm: vmread failed: field=%lx\n", field);
1163     -}
1164     -
1165     -noinline void vmwrite_error(unsigned long field, unsigned long value)
1166     -{
1167     - vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n",
1168     - field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
1169     -}
1170     -
1171     -noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr)
1172     -{
1173     - vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr);
1174     -}
1175     -
1176     -noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr)
1177     -{
1178     - vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr);
1179     -}
1180     -
1181     -noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva)
1182     -{
1183     - vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n",
1184     - ext, vpid, gva);
1185     -}
1186     -
1187     -noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa)
1188     -{
1189     - vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n",
1190     - ext, eptp, gpa);
1191     -}
1192     -
1193     -static DEFINE_PER_CPU(struct vmcs *, vmxarea);
1194     -DEFINE_PER_CPU(struct vmcs *, current_vmcs);
1195     -/*
1196     - * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
1197     - * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
1198     - */
1199     -static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
1200     -
1201     -/*
1202     - * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
1203     - * can find which vCPU should be waken up.
1204     - */
1205     -static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
1206     -static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
1207     -
1208     -static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
1209     -static DEFINE_SPINLOCK(vmx_vpid_lock);
1210     -
1211     -struct vmcs_config vmcs_config;
1212     -struct vmx_capability vmx_capability;
1213     -
1214     -#define VMX_SEGMENT_FIELD(seg) \
1215     - [VCPU_SREG_##seg] = { \
1216     - .selector = GUEST_##seg##_SELECTOR, \
1217     - .base = GUEST_##seg##_BASE, \
1218     - .limit = GUEST_##seg##_LIMIT, \
1219     - .ar_bytes = GUEST_##seg##_AR_BYTES, \
1220     - }
1221     -
1222     -static const struct kvm_vmx_segment_field {
1223     - unsigned selector;
1224     - unsigned base;
1225     - unsigned limit;
1226     - unsigned ar_bytes;
1227     -} kvm_vmx_segment_fields[] = {
1228     - VMX_SEGMENT_FIELD(CS),
1229     - VMX_SEGMENT_FIELD(DS),
1230     - VMX_SEGMENT_FIELD(ES),
1231     - VMX_SEGMENT_FIELD(FS),
1232     - VMX_SEGMENT_FIELD(GS),
1233     - VMX_SEGMENT_FIELD(SS),
1234     - VMX_SEGMENT_FIELD(TR),
1235     - VMX_SEGMENT_FIELD(LDTR),
1236     -};
1237     -
1238     -u64 host_efer;
1239     -static unsigned long host_idt_base;
1240     -
1241     -/*
1242     - * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
1243     - * will emulate SYSCALL in legacy mode if the vendor string in guest
1244     - * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
1245     - * support this emulation, IA32_STAR must always be included in
1246     - * vmx_msr_index[], even in i386 builds.
1247     - */
1248     -const u32 vmx_msr_index[] = {
1249     -#ifdef CONFIG_X86_64
1250     - MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
1251     -#endif
1252     - MSR_EFER, MSR_TSC_AUX, MSR_STAR,
1253     - MSR_IA32_TSX_CTRL,
1254     -};
1255     -
1256     -#if IS_ENABLED(CONFIG_HYPERV)
1257     -static bool __read_mostly enlightened_vmcs = true;
1258     -module_param(enlightened_vmcs, bool, 0444);
1259     -
1260     -/* check_ept_pointer() should be under protection of ept_pointer_lock. */
1261     -static void check_ept_pointer_match(struct kvm *kvm)
1262     -{
1263     - struct kvm_vcpu *vcpu;
1264     - u64 tmp_eptp = INVALID_PAGE;
1265     - int i;
1266     -
1267     - kvm_for_each_vcpu(i, vcpu, kvm) {
1268     - if (!VALID_PAGE(tmp_eptp)) {
1269     - tmp_eptp = to_vmx(vcpu)->ept_pointer;
1270     - } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
1271     - to_kvm_vmx(kvm)->ept_pointers_match
1272     - = EPT_POINTERS_MISMATCH;
1273     - return;
1274     - }
1275     - }
1276     -
1277     - to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH;
1278     -}
1279     -
1280     -static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush,
1281     - void *data)
1282     -{
1283     - struct kvm_tlb_range *range = data;
1284     -
1285     - return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn,
1286     - range->pages);
1287     -}
1288     -
1289     -static inline int __hv_remote_flush_tlb_with_range(struct kvm *kvm,
1290     - struct kvm_vcpu *vcpu, struct kvm_tlb_range *range)
1291     -{
1292     - u64 ept_pointer = to_vmx(vcpu)->ept_pointer;
1293     -
1294     - /*
1295     - * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs address
1296     - * of the base of EPT PML4 table, strip off EPT configuration
1297     - * information.
1298     - */
1299     - if (range)
1300     - return hyperv_flush_guest_mapping_range(ept_pointer & PAGE_MASK,
1301     - kvm_fill_hv_flush_list_func, (void *)range);
1302     - else
1303     - return hyperv_flush_guest_mapping(ept_pointer & PAGE_MASK);
1304     -}
1305     -
1306     -static int hv_remote_flush_tlb_with_range(struct kvm *kvm,
1307     - struct kvm_tlb_range *range)
1308     -{
1309     - struct kvm_vcpu *vcpu;
1310     - int ret = 0, i;
1311     -
1312     - spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
1313     -
1314     - if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK)
1315     - check_ept_pointer_match(kvm);
1316     -
1317     - if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) {
1318     - kvm_for_each_vcpu(i, vcpu, kvm) {
1319     - /* If ept_pointer is invalid pointer, bypass flush request. */
1320     - if (VALID_PAGE(to_vmx(vcpu)->ept_pointer))
1321     - ret |= __hv_remote_flush_tlb_with_range(
1322     - kvm, vcpu, range);
1323     - }
1324     - } else {
1325     - ret = __hv_remote_flush_tlb_with_range(kvm,
1326     - kvm_get_vcpu(kvm, 0), range);
1327     - }
1328     -
1329     - spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
1330     - return ret;
1331     -}
1332     -static int hv_remote_flush_tlb(struct kvm *kvm)
1333     -{
1334     - return hv_remote_flush_tlb_with_range(kvm, NULL);
1335     -}
1336     -
1337     -static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
1338     -{
1339     - struct hv_enlightened_vmcs *evmcs;
1340     - struct hv_partition_assist_pg **p_hv_pa_pg =
1341     - &vcpu->kvm->arch.hyperv.hv_pa_pg;
1342     - /*
1343     - * Synthetic VM-Exit is not enabled in current code and so All
1344     - * evmcs in singe VM shares same assist page.
1345     - */
1346     - if (!*p_hv_pa_pg)
1347     - *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL);
1348     -
1349     - if (!*p_hv_pa_pg)
1350     - return -ENOMEM;
1351     -
1352     - evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs;
1353     -
1354     - evmcs->partition_assist_page =
1355     - __pa(*p_hv_pa_pg);
1356     - evmcs->hv_vm_id = (unsigned long)vcpu->kvm;
1357     - evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
1358     -
1359     - return 0;
1360     -}
1361     -
1362     -#endif /* IS_ENABLED(CONFIG_HYPERV) */
1363     -
1364     -/*
1365     - * Comment's format: document - errata name - stepping - processor name.
1366     - * Refer from
1367     - * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp
1368     - */
1369     -static u32 vmx_preemption_cpu_tfms[] = {
1370     -/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */
1371     -0x000206E6,
1372     -/* 323056.pdf - AAX65 - C2 - Xeon L3406 */
1373     -/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */
1374     -/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */
1375     -0x00020652,
1376     -/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */
1377     -0x00020655,
1378     -/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */
1379     -/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */
1380     -/*
1381     - * 320767.pdf - AAP86 - B1 -
1382     - * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile
1383     - */
1384     -0x000106E5,
1385     -/* 321333.pdf - AAM126 - C0 - Xeon 3500 */
1386     -0x000106A0,
1387     -/* 321333.pdf - AAM126 - C1 - Xeon 3500 */
1388     -0x000106A1,
1389     -/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */
1390     -0x000106A4,
1391     - /* 321333.pdf - AAM126 - D0 - Xeon 3500 */
1392     - /* 321324.pdf - AAK139 - D0 - Xeon 5500 */
1393     - /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */
1394     -0x000106A5,
1395     - /* Xeon E3-1220 V2 */
1396     -0x000306A8,
1397     -};
1398     -
1399     -static inline bool cpu_has_broken_vmx_preemption_timer(void)
1400     -{
1401     - u32 eax = cpuid_eax(0x00000001), i;
1402     -
1403     - /* Clear the reserved bits */
1404     - eax &= ~(0x3U << 14 | 0xfU << 28);
1405     - for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++)
1406     - if (eax == vmx_preemption_cpu_tfms[i])
1407     - return true;
1408     -
1409     - return false;
1410     -}
1411     -
1412     -static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
1413     -{
1414     - return flexpriority_enabled && lapic_in_kernel(vcpu);
1415     -}
1416     -
1417     -static inline bool report_flexpriority(void)
1418     -{
1419     - return flexpriority_enabled;
1420     -}
1421     -
1422     -static inline int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
1423     -{
1424     - int i;
1425     -
1426     - for (i = 0; i < vmx->nmsrs; ++i)
1427     - if (vmx_msr_index[vmx->guest_msrs[i].index] == msr)
1428     - return i;
1429     - return -1;
1430     -}
1431     -
1432     -struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
1433     -{
1434     - int i;
1435     -
1436     - i = __find_msr_index(vmx, msr);
1437     - if (i >= 0)
1438     - return &vmx->guest_msrs[i];
1439     - return NULL;
1440     -}
1441     -
1442     -static int vmx_set_guest_msr(struct vcpu_vmx *vmx, struct shared_msr_entry *msr, u64 data)
1443     -{
1444     - int ret = 0;
1445     -
1446     - u64 old_msr_data = msr->data;
1447     - msr->data = data;
1448     - if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
1449     - preempt_disable();
1450     - ret = kvm_set_shared_msr(msr->index, msr->data,
1451     - msr->mask);
1452     - preempt_enable();
1453     - if (ret)
1454     - msr->data = old_msr_data;
1455     - }
1456     - return ret;
1457     -}
1458     -
1459     -void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
1460     -{
1461     - vmcs_clear(loaded_vmcs->vmcs);
1462     - if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
1463     - vmcs_clear(loaded_vmcs->shadow_vmcs);
1464     - loaded_vmcs->cpu = -1;
1465     - loaded_vmcs->launched = 0;
1466     -}
1467     -
1468     -#ifdef CONFIG_KEXEC_CORE
1469     -/*
1470     - * This bitmap is used to indicate whether the vmclear
1471     - * operation is enabled on all cpus. All disabled by
1472     - * default.
1473     - */
1474     -static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
1475     -
1476     -static inline void crash_enable_local_vmclear(int cpu)
1477     -{
1478     - cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
1479     -}
1480     -
1481     -static inline void crash_disable_local_vmclear(int cpu)
1482     -{
1483     - cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
1484     -}
1485     -
1486     -static inline int crash_local_vmclear_enabled(int cpu)
1487     -{
1488     - return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
1489     -}
1490     -
1491     -static void crash_vmclear_local_loaded_vmcss(void)
1492     -{
1493     - int cpu = raw_smp_processor_id();
1494     - struct loaded_vmcs *v;
1495     -
1496     - if (!crash_local_vmclear_enabled(cpu))
1497     - return;
1498     -
1499     - list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
1500     - loaded_vmcss_on_cpu_link)
1501     - vmcs_clear(v->vmcs);
1502     -}
1503     -#else
1504     -static inline void crash_enable_local_vmclear(int cpu) { }
1505     -static inline void crash_disable_local_vmclear(int cpu) { }
1506     -#endif /* CONFIG_KEXEC_CORE */
1507     -
1508     -static void __loaded_vmcs_clear(void *arg)
1509     -{
1510     - struct loaded_vmcs *loaded_vmcs = arg;
1511     - int cpu = raw_smp_processor_id();
1512     -
1513     - if (loaded_vmcs->cpu != cpu)
1514     - return; /* vcpu migration can race with cpu offline */
1515     - if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
1516     - per_cpu(current_vmcs, cpu) = NULL;
1517     - crash_disable_local_vmclear(cpu);
1518     - list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
1519     -
1520     - /*
1521     - * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link
1522     - * is before setting loaded_vmcs->vcpu to -1 which is done in
1523     - * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist
1524     - * then adds the vmcs into percpu list before it is deleted.
1525     - */
1526     - smp_wmb();
1527     -
1528     - loaded_vmcs_init(loaded_vmcs);
1529     - crash_enable_local_vmclear(cpu);
1530     -}
1531     -
1532     -void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
1533     -{
1534     - int cpu = loaded_vmcs->cpu;
1535     -
1536     - if (cpu != -1)
1537     - smp_call_function_single(cpu,
1538     - __loaded_vmcs_clear, loaded_vmcs, 1);
1539     -}
1540     -
1541     -static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
1542     - unsigned field)
1543     -{
1544     - bool ret;
1545     - u32 mask = 1 << (seg * SEG_FIELD_NR + field);
1546     -
1547     - if (!kvm_register_is_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS)) {
1548     - kvm_register_mark_available(&vmx->vcpu, VCPU_EXREG_SEGMENTS);
1549     - vmx->segment_cache.bitmask = 0;
1550     - }
1551     - ret = vmx->segment_cache.bitmask & mask;
1552     - vmx->segment_cache.bitmask |= mask;
1553     - return ret;
1554     -}
1555     -
1556     -static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
1557     -{
1558     - u16 *p = &vmx->segment_cache.seg[seg].selector;
1559     -
1560     - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
1561     - *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
1562     - return *p;
1563     -}
1564     -
1565     -static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
1566     -{
1567     - ulong *p = &vmx->segment_cache.seg[seg].base;
1568     -
1569     - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
1570     - *p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
1571     - return *p;
1572     -}
1573     -
1574     -static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
1575     -{
1576     - u32 *p = &vmx->segment_cache.seg[seg].limit;
1577     -
1578     - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
1579     - *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
1580     - return *p;
1581     -}
1582     -
1583     -static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
1584     -{
1585     - u32 *p = &vmx->segment_cache.seg[seg].ar;
1586     -
1587     - if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
1588     - *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
1589     - return *p;
1590     -}
1591     -
1592     -void update_exception_bitmap(struct kvm_vcpu *vcpu)
1593     -{
1594     - u32 eb;
1595     -
1596     - eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
1597     - (1u << DB_VECTOR) | (1u << AC_VECTOR);
1598     - /*
1599     - * Guest access to VMware backdoor ports could legitimately
1600     - * trigger #GP because of TSS I/O permission bitmap.
1601     - * We intercept those #GP and allow access to them anyway
1602     - * as VMware does.
1603     - */
1604     - if (enable_vmware_backdoor)
1605     - eb |= (1u << GP_VECTOR);
1606     - if ((vcpu->guest_debug &
1607     - (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
1608     - (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
1609     - eb |= 1u << BP_VECTOR;
1610     - if (to_vmx(vcpu)->rmode.vm86_active)
1611     - eb = ~0;
1612     - if (enable_ept)
1613     - eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
1614     -
1615     - /* When we are running a nested L2 guest and L1 specified for it a
1616     - * certain exception bitmap, we must trap the same exceptions and pass
1617     - * them to L1. When running L2, we will only handle the exceptions
1618     - * specified above if L1 did not want them.
1619     - */
1620     - if (is_guest_mode(vcpu))
1621     - eb |= get_vmcs12(vcpu)->exception_bitmap;
1622     -
1623     - vmcs_write32(EXCEPTION_BITMAP, eb);
1624     -}
1625     -
1626     -/*
1627     - * Check if MSR is intercepted for currently loaded MSR bitmap.
1628     - */
1629     -static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
1630     -{
1631     - unsigned long *msr_bitmap;
1632     - int f = sizeof(unsigned long);
1633     -
1634     - if (!cpu_has_vmx_msr_bitmap())
1635     - return true;
1636     -
1637     - msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
1638     -
1639     - if (msr <= 0x1fff) {
1640     - return !!test_bit(msr, msr_bitmap + 0x800 / f);
1641     - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
1642     - msr &= 0x1fff;
1643     - return !!test_bit(msr, msr_bitmap + 0xc00 / f);
1644     - }
1645     -
1646     - return true;
1647     -}
1648     -
1649     -static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
1650     - unsigned long entry, unsigned long exit)
1651     -{
1652     - vm_entry_controls_clearbit(vmx, entry);
1653     - vm_exit_controls_clearbit(vmx, exit);
1654     -}
1655     -
1656     -int vmx_find_msr_index(struct vmx_msrs *m, u32 msr)
1657     -{
1658     - unsigned int i;
1659     -
1660     - for (i = 0; i < m->nr; ++i) {
1661     - if (m->val[i].index == msr)
1662     - return i;
1663     - }
1664     - return -ENOENT;
1665     -}
1666     -
1667     -static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
1668     -{
1669     - int i;
1670     - struct msr_autoload *m = &vmx->msr_autoload;
1671     -
1672     - switch (msr) {
1673     - case MSR_EFER:
1674     - if (cpu_has_load_ia32_efer()) {
1675     - clear_atomic_switch_msr_special(vmx,
1676     - VM_ENTRY_LOAD_IA32_EFER,
1677     - VM_EXIT_LOAD_IA32_EFER);
1678     - return;
1679     - }
1680     - break;
1681     - case MSR_CORE_PERF_GLOBAL_CTRL:
1682     - if (cpu_has_load_perf_global_ctrl()) {
1683     - clear_atomic_switch_msr_special(vmx,
1684     - VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1685     - VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
1686     - return;
1687     - }
1688     - break;
1689     - }
1690     - i = vmx_find_msr_index(&m->guest, msr);
1691     - if (i < 0)
1692     - goto skip_guest;
1693     - --m->guest.nr;
1694     - m->guest.val[i] = m->guest.val[m->guest.nr];
1695     - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
1696     -
1697     -skip_guest:
1698     - i = vmx_find_msr_index(&m->host, msr);
1699     - if (i < 0)
1700     - return;
1701     -
1702     - --m->host.nr;
1703     - m->host.val[i] = m->host.val[m->host.nr];
1704     - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
1705     -}
1706     -
1707     -static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
1708     - unsigned long entry, unsigned long exit,
1709     - unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
1710     - u64 guest_val, u64 host_val)
1711     -{
1712     - vmcs_write64(guest_val_vmcs, guest_val);
1713     - if (host_val_vmcs != HOST_IA32_EFER)
1714     - vmcs_write64(host_val_vmcs, host_val);
1715     - vm_entry_controls_setbit(vmx, entry);
1716     - vm_exit_controls_setbit(vmx, exit);
1717     -}
1718     -
1719     -static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
1720     - u64 guest_val, u64 host_val, bool entry_only)
1721     -{
1722     - int i, j = 0;
1723     - struct msr_autoload *m = &vmx->msr_autoload;
1724     -
1725     - switch (msr) {
1726     - case MSR_EFER:
1727     - if (cpu_has_load_ia32_efer()) {
1728     - add_atomic_switch_msr_special(vmx,
1729     - VM_ENTRY_LOAD_IA32_EFER,
1730     - VM_EXIT_LOAD_IA32_EFER,
1731     - GUEST_IA32_EFER,
1732     - HOST_IA32_EFER,
1733     - guest_val, host_val);
1734     - return;
1735     - }
1736     - break;
1737     - case MSR_CORE_PERF_GLOBAL_CTRL:
1738     - if (cpu_has_load_perf_global_ctrl()) {
1739     - add_atomic_switch_msr_special(vmx,
1740     - VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
1741     - VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
1742     - GUEST_IA32_PERF_GLOBAL_CTRL,
1743     - HOST_IA32_PERF_GLOBAL_CTRL,
1744     - guest_val, host_val);
1745     - return;
1746     - }
1747     - break;
1748     - case MSR_IA32_PEBS_ENABLE:
1749     - /* PEBS needs a quiescent period after being disabled (to write
1750     - * a record). Disabling PEBS through VMX MSR swapping doesn't
1751     - * provide that period, so a CPU could write host's record into
1752     - * guest's memory.
1753     - */
1754     - wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
1755     - }
1756     -
1757     - i = vmx_find_msr_index(&m->guest, msr);
1758     - if (!entry_only)
1759     - j = vmx_find_msr_index(&m->host, msr);
1760     -
1761     - if ((i < 0 && m->guest.nr == NR_LOADSTORE_MSRS) ||
1762     - (j < 0 && m->host.nr == NR_LOADSTORE_MSRS)) {
1763     - printk_once(KERN_WARNING "Not enough msr switch entries. "
1764     - "Can't add msr %x\n", msr);
1765     - return;
1766     - }
1767     - if (i < 0) {
1768     - i = m->guest.nr++;
1769     - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
1770     - }
1771     - m->guest.val[i].index = msr;
1772     - m->guest.val[i].value = guest_val;
1773     -
1774     - if (entry_only)
1775     - return;
1776     -
1777     - if (j < 0) {
1778     - j = m->host.nr++;
1779     - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
1780     - }
1781     - m->host.val[j].index = msr;
1782     - m->host.val[j].value = host_val;
1783     -}
1784     -
1785     -static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
1786     -{
1787     - u64 guest_efer = vmx->vcpu.arch.efer;
1788     - u64 ignore_bits = 0;
1789     -
1790     - /* Shadow paging assumes NX to be available. */
1791     - if (!enable_ept)
1792     - guest_efer |= EFER_NX;
1793     -
1794     - /*
1795     - * LMA and LME handled by hardware; SCE meaningless outside long mode.
1796     - */
1797     - ignore_bits |= EFER_SCE;
1798     -#ifdef CONFIG_X86_64
1799     - ignore_bits |= EFER_LMA | EFER_LME;
1800     - /* SCE is meaningful only in long mode on Intel */
1801     - if (guest_efer & EFER_LMA)
1802     - ignore_bits &= ~(u64)EFER_SCE;
1803     -#endif
1804     -
1805     - /*
1806     - * On EPT, we can't emulate NX, so we must switch EFER atomically.
1807     - * On CPUs that support "load IA32_EFER", always switch EFER
1808     - * atomically, since it's faster than switching it manually.
1809     - */
1810     - if (cpu_has_load_ia32_efer() ||
1811     - (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
1812     - if (!(guest_efer & EFER_LMA))
1813     - guest_efer &= ~EFER_LME;
1814     - if (guest_efer != host_efer)
1815     - add_atomic_switch_msr(vmx, MSR_EFER,
1816     - guest_efer, host_efer, false);
1817     - else
1818     - clear_atomic_switch_msr(vmx, MSR_EFER);
1819     - return false;
1820     - } else {
1821     - clear_atomic_switch_msr(vmx, MSR_EFER);
1822     -
1823     - guest_efer &= ~ignore_bits;
1824     - guest_efer |= host_efer & ignore_bits;
1825     -
1826     - vmx->guest_msrs[efer_offset].data = guest_efer;
1827     - vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
1828     -
1829     - return true;
1830     - }
1831     -}
1832     -
1833     -#ifdef CONFIG_X86_32
1834     -/*
1835     - * On 32-bit kernels, VM exits still load the FS and GS bases from the
1836     - * VMCS rather than the segment table. KVM uses this helper to figure
1837     - * out the current bases to poke them into the VMCS before entry.
1838     - */
1839     -static unsigned long segment_base(u16 selector)
1840     -{
1841     - struct desc_struct *table;
1842     - unsigned long v;
1843     -
1844     - if (!(selector & ~SEGMENT_RPL_MASK))
1845     - return 0;
1846     -
1847     - table = get_current_gdt_ro();
1848     -
1849     - if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
1850     - u16 ldt_selector = kvm_read_ldt();
1851     -
1852     - if (!(ldt_selector & ~SEGMENT_RPL_MASK))
1853     - return 0;
1854     -
1855     - table = (struct desc_struct *)segment_base(ldt_selector);
1856     - }
1857     - v = get_desc_base(&table[selector >> 3]);
1858     - return v;
1859     -}
1860     -#endif
1861     -
1862     -static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range)
1863     -{
1864     - u32 i;
1865     -
1866     - wrmsrl(MSR_IA32_RTIT_STATUS, ctx->status);
1867     - wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
1868     - wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
1869     - wrmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
1870     - for (i = 0; i < addr_range; i++) {
1871     - wrmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
1872     - wrmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
1873     - }
1874     -}
1875     -
1876     -static inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range)
1877     -{
1878     - u32 i;
1879     -
1880     - rdmsrl(MSR_IA32_RTIT_STATUS, ctx->status);
1881     - rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
1882     - rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
1883     - rdmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
1884     - for (i = 0; i < addr_range; i++) {
1885     - rdmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
1886     - rdmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
1887     - }
1888     -}
1889     -
1890     -static void pt_guest_enter(struct vcpu_vmx *vmx)
1891     -{
1892     - if (pt_mode == PT_MODE_SYSTEM)
1893     - return;
1894     -
1895     - /*
1896     - * GUEST_IA32_RTIT_CTL is already set in the VMCS.
1897     - * Save host state before VM entry.
1898     - */
1899     - rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
1900     - if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
1901     - wrmsrl(MSR_IA32_RTIT_CTL, 0);
1902     - pt_save_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range);
1903     - pt_load_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range);
1904     - }
1905     -}
1906     -
1907     -static void pt_guest_exit(struct vcpu_vmx *vmx)
1908     -{
1909     - if (pt_mode == PT_MODE_SYSTEM)
1910     - return;
1911     -
1912     - if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
1913     - pt_save_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range);
1914     - pt_load_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range);
1915     - }
1916     -
1917     - /* Reload host state (IA32_RTIT_CTL will be cleared on VM exit). */
1918     - wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
1919     -}
1920     -
1921     -void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
1922     - unsigned long fs_base, unsigned long gs_base)
1923     -{
1924     - if (unlikely(fs_sel != host->fs_sel)) {
1925     - if (!(fs_sel & 7))
1926     - vmcs_write16(HOST_FS_SELECTOR, fs_sel);
1927     - else
1928     - vmcs_write16(HOST_FS_SELECTOR, 0);
1929     - host->fs_sel = fs_sel;
1930     - }
1931     - if (unlikely(gs_sel != host->gs_sel)) {
1932     - if (!(gs_sel & 7))
1933     - vmcs_write16(HOST_GS_SELECTOR, gs_sel);
1934     - else
1935     - vmcs_write16(HOST_GS_SELECTOR, 0);
1936     - host->gs_sel = gs_sel;
1937     - }
1938     - if (unlikely(fs_base != host->fs_base)) {
1939     - vmcs_writel(HOST_FS_BASE, fs_base);
1940     - host->fs_base = fs_base;
1941     - }
1942     - if (unlikely(gs_base != host->gs_base)) {
1943     - vmcs_writel(HOST_GS_BASE, gs_base);
1944     - host->gs_base = gs_base;
1945     - }
1946     -}
1947     -
1948     -void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
1949     -{
1950     - struct vcpu_vmx *vmx = to_vmx(vcpu);
1951     - struct vmcs_host_state *host_state;
1952     -#ifdef CONFIG_X86_64
1953     - int cpu = raw_smp_processor_id();
1954     -#endif
1955     - unsigned long fs_base, gs_base;
1956     - u16 fs_sel, gs_sel;
1957     - int i;
1958     -
1959     - vmx->req_immediate_exit = false;
1960     -
1961     - /*
1962     - * Note that guest MSRs to be saved/restored can also be changed
1963     - * when guest state is loaded. This happens when guest transitions
1964     - * to/from long-mode by setting MSR_EFER.LMA.
1965     - */
1966     - if (!vmx->guest_msrs_ready) {
1967     - vmx->guest_msrs_ready = true;
1968     - for (i = 0; i < vmx->save_nmsrs; ++i)
1969     - kvm_set_shared_msr(vmx->guest_msrs[i].index,
1970     - vmx->guest_msrs[i].data,
1971     - vmx->guest_msrs[i].mask);
1972     -
1973     - }
1974     - if (vmx->guest_state_loaded)
1975     - return;
1976     -
1977     - host_state = &vmx->loaded_vmcs->host_state;
1978     -
1979     - /*
1980     - * Set host fs and gs selectors. Unfortunately, 22.2.3 does not
1981     - * allow segment selectors with cpl > 0 or ti == 1.
1982     - */
1983     - host_state->ldt_sel = kvm_read_ldt();
1984     -
1985     -#ifdef CONFIG_X86_64
1986     - savesegment(ds, host_state->ds_sel);
1987     - savesegment(es, host_state->es_sel);
1988     -
1989     - gs_base = cpu_kernelmode_gs_base(cpu);
1990     - if (likely(is_64bit_mm(current->mm))) {
1991     - save_fsgs_for_kvm();
1992     - fs_sel = current->thread.fsindex;
1993     - gs_sel = current->thread.gsindex;
1994     - fs_base = current->thread.fsbase;
1995     - vmx->msr_host_kernel_gs_base = current->thread.gsbase;
1996     - } else {
1997     - savesegment(fs, fs_sel);
1998     - savesegment(gs, gs_sel);
1999     - fs_base = read_msr(MSR_FS_BASE);
2000     - vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
2001     - }
2002     -
2003     - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
2004     -#else
2005     - savesegment(fs, fs_sel);
2006     - savesegment(gs, gs_sel);
2007     - fs_base = segment_base(fs_sel);
2008     - gs_base = segment_base(gs_sel);
2009     -#endif
2010     -
2011     - vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
2012     - vmx->guest_state_loaded = true;
2013     -}
2014     -
2015     -static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
2016     -{
2017     - struct vmcs_host_state *host_state;
2018     -
2019     - if (!vmx->guest_state_loaded)
2020     - return;
2021     -
2022     - host_state = &vmx->loaded_vmcs->host_state;
2023     -
2024     - ++vmx->vcpu.stat.host_state_reload;
2025     -
2026     -#ifdef CONFIG_X86_64
2027     - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
2028     -#endif
2029     - if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
2030     - kvm_load_ldt(host_state->ldt_sel);
2031     -#ifdef CONFIG_X86_64
2032     - load_gs_index(host_state->gs_sel);
2033     -#else
2034     - loadsegment(gs, host_state->gs_sel);
2035     -#endif
2036     - }
2037     - if (host_state->fs_sel & 7)
2038     - loadsegment(fs, host_state->fs_sel);
2039     -#ifdef CONFIG_X86_64
2040     - if (unlikely(host_state->ds_sel | host_state->es_sel)) {
2041     - loadsegment(ds, host_state->ds_sel);
2042     - loadsegment(es, host_state->es_sel);
2043     - }
2044     -#endif
2045     - invalidate_tss_limit();
2046     -#ifdef CONFIG_X86_64
2047     - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
2048     -#endif
2049     - load_fixmap_gdt(raw_smp_processor_id());
2050     - vmx->guest_state_loaded = false;
2051     - vmx->guest_msrs_ready = false;
2052     -}
2053     -
2054     -#ifdef CONFIG_X86_64
2055     -static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
2056     -{
2057     - preempt_disable();
2058     - if (vmx->guest_state_loaded)
2059     - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
2060     - preempt_enable();
2061     - return vmx->msr_guest_kernel_gs_base;
2062     -}
2063     -
2064     -static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
2065     -{
2066     - preempt_disable();
2067     - if (vmx->guest_state_loaded)
2068     - wrmsrl(MSR_KERNEL_GS_BASE, data);
2069     - preempt_enable();
2070     - vmx->msr_guest_kernel_gs_base = data;
2071     -}
2072     -#endif
2073     -
2074     -static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
2075     -{
2076     - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
2077     - struct pi_desc old, new;
2078     - unsigned int dest;
2079     -
2080     - /*
2081     - * In case of hot-plug or hot-unplug, we may have to undo
2082     - * vmx_vcpu_pi_put even if there is no assigned device. And we
2083     - * always keep PI.NDST up to date for simplicity: it makes the
2084     - * code easier, and CPU migration is not a fast path.
2085     - */
2086     - if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
2087     - return;
2088     -
2089     - /*
2090     - * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
2091     - * PI.NDST: pi_post_block is the one expected to change PID.NDST and the
2092     - * wakeup handler expects the vCPU to be on the blocked_vcpu_list that
2093     - * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
2094     - * correctly.
2095     - */
2096     - if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
2097     - pi_clear_sn(pi_desc);
2098     - goto after_clear_sn;
2099     - }
2100     -
2101     - /* The full case. */
2102     - do {
2103     - old.control = new.control = pi_desc->control;
2104     -
2105     - dest = cpu_physical_id(cpu);
2106     -
2107     - if (x2apic_enabled())
2108     - new.ndst = dest;
2109     - else
2110     - new.ndst = (dest << 8) & 0xFF00;
2111     -
2112     - new.sn = 0;
2113     - } while (cmpxchg64(&pi_desc->control, old.control,
2114     - new.control) != old.control);
2115     -
2116     -after_clear_sn:
2117     -
2118     - /*
2119     - * Clear SN before reading the bitmap. The VT-d firmware
2120     - * writes the bitmap and reads SN atomically (5.2.3 in the
2121     - * spec), so it doesn't really have a memory barrier that
2122     - * pairs with this, but we cannot do that and we need one.
2123     - */
2124     - smp_mb__after_atomic();
2125     -
2126     - if (!pi_is_pir_empty(pi_desc))
2127     - pi_set_on(pi_desc);
2128     -}
2129     -
2130     -void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
2131     -{
2132     - struct vcpu_vmx *vmx = to_vmx(vcpu);
2133     - bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
2134     -
2135     - if (!already_loaded) {
2136     - loaded_vmcs_clear(vmx->loaded_vmcs);
2137     - local_irq_disable();
2138     - crash_disable_local_vmclear(cpu);
2139     -
2140     - /*
2141     - * Read loaded_vmcs->cpu should be before fetching
2142     - * loaded_vmcs->loaded_vmcss_on_cpu_link.
2143     - * See the comments in __loaded_vmcs_clear().
2144     - */
2145     - smp_rmb();
2146     -
2147     - list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
2148     - &per_cpu(loaded_vmcss_on_cpu, cpu));
2149     - crash_enable_local_vmclear(cpu);
2150     - local_irq_enable();
2151     - }
2152     -
2153     - if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
2154     - per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
2155     - vmcs_load(vmx->loaded_vmcs->vmcs);
2156     - indirect_branch_prediction_barrier();
2157     - }
2158     -
2159     - if (!already_loaded) {
2160     - void *gdt = get_current_gdt_ro();
2161     - unsigned long sysenter_esp;
2162     -
2163     - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2164     -
2165     - /*
2166     - * Linux uses per-cpu TSS and GDT, so set these when switching
2167     - * processors. See 22.2.4.
2168     - */
2169     - vmcs_writel(HOST_TR_BASE,
2170     - (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
2171     - vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
2172     -
2173     - rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
2174     - vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
2175     -
2176     - vmx->loaded_vmcs->cpu = cpu;
2177     - }
2178     -
2179     - /* Setup TSC multiplier */
2180     - if (kvm_has_tsc_control &&
2181     - vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
2182     - decache_tsc_multiplier(vmx);
2183     -}
2184     -
2185     -/*
2186     - * Switches to specified vcpu, until a matching vcpu_put(), but assumes
2187     - * vcpu mutex is already taken.
2188     - */
2189     -void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2190     -{
2191     - struct vcpu_vmx *vmx = to_vmx(vcpu);
2192     -
2193     - vmx_vcpu_load_vmcs(vcpu, cpu);
2194     -
2195     - vmx_vcpu_pi_load(vcpu, cpu);
2196     -
2197     - vmx->host_pkru = read_pkru();
2198     - vmx->host_debugctlmsr = get_debugctlmsr();
2199     -}
2200     -
2201     -static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
2202     -{
2203     - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
2204     -
2205     - if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
2206     - !irq_remapping_cap(IRQ_POSTING_CAP) ||
2207     - !kvm_vcpu_apicv_active(vcpu))
2208     - return;
2209     -
2210     - /* Set SN when the vCPU is preempted */
2211     - if (vcpu->preempted)
2212     - pi_set_sn(pi_desc);
2213     -}
2214     -
2215     -static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
2216     -{
2217     - vmx_vcpu_pi_put(vcpu);
2218     -
2219     - vmx_prepare_switch_to_host(to_vmx(vcpu));
2220     -}
2221     -
2222     -static bool emulation_required(struct kvm_vcpu *vcpu)
2223     -{
2224     - return emulate_invalid_guest_state && !guest_state_valid(vcpu);
2225     -}
2226     -
2227     -static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
2228     -
2229     -unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
2230     -{
2231     - struct vcpu_vmx *vmx = to_vmx(vcpu);
2232     - unsigned long rflags, save_rflags;
2233     -
2234     - if (!kvm_register_is_available(vcpu, VCPU_EXREG_RFLAGS)) {
2235     - kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
2236     - rflags = vmcs_readl(GUEST_RFLAGS);
2237     - if (vmx->rmode.vm86_active) {
2238     - rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
2239     - save_rflags = vmx->rmode.save_rflags;
2240     - rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
2241     - }
2242     - vmx->rflags = rflags;
2243     - }
2244     - return vmx->rflags;
2245     -}
2246     -
2247     -void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
2248     -{
2249     - struct vcpu_vmx *vmx = to_vmx(vcpu);
2250     - unsigned long old_rflags;
2251     -
2252     - if (enable_unrestricted_guest) {
2253     - kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
2254     - vmx->rflags = rflags;
2255     - vmcs_writel(GUEST_RFLAGS, rflags);
2256     - return;
2257     - }
2258     -
2259     - old_rflags = vmx_get_rflags(vcpu);
2260     - vmx->rflags = rflags;
2261     - if (vmx->rmode.vm86_active) {
2262     - vmx->rmode.save_rflags = rflags;
2263     - rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
2264     - }
2265     - vmcs_writel(GUEST_RFLAGS, rflags);
2266     -
2267     - if ((old_rflags ^ vmx->rflags) & X86_EFLAGS_VM)
2268     - vmx->emulation_required = emulation_required(vcpu);
2269     -}
2270     -
2271     -u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
2272     -{
2273     - u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
2274     - int ret = 0;
2275     -
2276     - if (interruptibility & GUEST_INTR_STATE_STI)
2277     - ret |= KVM_X86_SHADOW_INT_STI;
2278     - if (interruptibility & GUEST_INTR_STATE_MOV_SS)
2279     - ret |= KVM_X86_SHADOW_INT_MOV_SS;
2280     -
2281     - return ret;
2282     -}
2283     -
2284     -void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
2285     -{
2286     - u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
2287     - u32 interruptibility = interruptibility_old;
2288     -
2289     - interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
2290     -
2291     - if (mask & KVM_X86_SHADOW_INT_MOV_SS)
2292     - interruptibility |= GUEST_INTR_STATE_MOV_SS;
2293     - else if (mask & KVM_X86_SHADOW_INT_STI)
2294     - interruptibility |= GUEST_INTR_STATE_STI;
2295     -
2296     - if ((interruptibility != interruptibility_old))
2297     - vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
2298     -}
2299     -
2300     -static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
2301     -{
2302     - struct vcpu_vmx *vmx = to_vmx(vcpu);
2303     - unsigned long value;
2304     -
2305     - /*
2306     - * Any MSR write that attempts to change bits marked reserved will
2307     - * case a #GP fault.
2308     - */
2309     - if (data & vmx->pt_desc.ctl_bitmask)
2310     - return 1;
2311     -
2312     - /*
2313     - * Any attempt to modify IA32_RTIT_CTL while TraceEn is set will
2314     - * result in a #GP unless the same write also clears TraceEn.
2315     - */
2316     - if ((vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) &&
2317     - ((vmx->pt_desc.guest.ctl ^ data) & ~RTIT_CTL_TRACEEN))
2318     - return 1;
2319     -
2320     - /*
2321     - * WRMSR to IA32_RTIT_CTL that sets TraceEn but clears this bit
2322     - * and FabricEn would cause #GP, if
2323     - * CPUID.(EAX=14H, ECX=0):ECX.SNGLRGNOUT[bit 2] = 0
2324     - */
2325     - if ((data & RTIT_CTL_TRACEEN) && !(data & RTIT_CTL_TOPA) &&
2326     - !(data & RTIT_CTL_FABRIC_EN) &&
2327     - !intel_pt_validate_cap(vmx->pt_desc.caps,
2328     - PT_CAP_single_range_output))
2329     - return 1;
2330     -
2331     - /*
2332     - * MTCFreq, CycThresh and PSBFreq encodings check, any MSR write that
2333     - * utilize encodings marked reserved will casue a #GP fault.
2334     - */
2335     - value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc_periods);
2336     - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc) &&
2337     - !test_bit((data & RTIT_CTL_MTC_RANGE) >>
2338     - RTIT_CTL_MTC_RANGE_OFFSET, &value))
2339     - return 1;
2340     - value = intel_pt_validate_cap(vmx->pt_desc.caps,
2341     - PT_CAP_cycle_thresholds);
2342     - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
2343     - !test_bit((data & RTIT_CTL_CYC_THRESH) >>
2344     - RTIT_CTL_CYC_THRESH_OFFSET, &value))
2345     - return 1;
2346     - value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_periods);
2347     - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
2348     - !test_bit((data & RTIT_CTL_PSB_FREQ) >>
2349     - RTIT_CTL_PSB_FREQ_OFFSET, &value))
2350     - return 1;
2351     -
2352     - /*
2353     - * If ADDRx_CFG is reserved or the encodings is >2 will
2354     - * cause a #GP fault.
2355     - */
2356     - value = (data & RTIT_CTL_ADDR0) >> RTIT_CTL_ADDR0_OFFSET;
2357     - if ((value && (vmx->pt_desc.addr_range < 1)) || (value > 2))
2358     - return 1;
2359     - value = (data & RTIT_CTL_ADDR1) >> RTIT_CTL_ADDR1_OFFSET;
2360     - if ((value && (vmx->pt_desc.addr_range < 2)) || (value > 2))
2361     - return 1;
2362     - value = (data & RTIT_CTL_ADDR2) >> RTIT_CTL_ADDR2_OFFSET;
2363     - if ((value && (vmx->pt_desc.addr_range < 3)) || (value > 2))
2364     - return 1;
2365     - value = (data & RTIT_CTL_ADDR3) >> RTIT_CTL_ADDR3_OFFSET;
2366     - if ((value && (vmx->pt_desc.addr_range < 4)) || (value > 2))
2367     - return 1;
2368     -
2369     - return 0;
2370     -}
2371     -
2372     -static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
2373     -{
2374     - unsigned long rip;
2375     -
2376     - /*
2377     - * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on
2378     - * undefined behavior: Intel's SDM doesn't mandate the VMCS field be
2379     - * set when EPT misconfig occurs. In practice, real hardware updates
2380     - * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors
2381     - * (namely Hyper-V) don't set it due to it being undefined behavior,
2382     - * i.e. we end up advancing IP with some random value.
2383     - */
2384     - if (!static_cpu_has(X86_FEATURE_HYPERVISOR) ||
2385     - to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) {
2386     - rip = kvm_rip_read(vcpu);
2387     - rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
2388     - kvm_rip_write(vcpu, rip);
2389     - } else {
2390     - if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
2391     - return 0;
2392     - }
2393     -
2394     - /* skipping an emulated instruction also counts */
2395     - vmx_set_interrupt_shadow(vcpu, 0);
2396     -
2397     - return 1;
2398     -}
2399     -
2400     -static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
2401     -{
2402     - /*
2403     - * Ensure that we clear the HLT state in the VMCS. We don't need to
2404     - * explicitly skip the instruction because if the HLT state is set,
2405     - * then the instruction is already executing and RIP has already been
2406     - * advanced.
2407     - */
2408     - if (kvm_hlt_in_guest(vcpu->kvm) &&
2409     - vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
2410     - vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
2411     -}
2412     -
2413     -static void vmx_queue_exception(struct kvm_vcpu *vcpu)
2414     -{
2415     - struct vcpu_vmx *vmx = to_vmx(vcpu);
2416     - unsigned nr = vcpu->arch.exception.nr;
2417     - bool has_error_code = vcpu->arch.exception.has_error_code;
2418     - u32 error_code = vcpu->arch.exception.error_code;
2419     - u32 intr_info = nr | INTR_INFO_VALID_MASK;
2420     -
2421     - kvm_deliver_exception_payload(vcpu);
2422     -
2423     - if (has_error_code) {
2424     - vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
2425     - intr_info |= INTR_INFO_DELIVER_CODE_MASK;
2426     - }
2427     -
2428     - if (vmx->rmode.vm86_active) {
2429     - int inc_eip = 0;
2430     - if (kvm_exception_is_soft(nr))
2431     - inc_eip = vcpu->arch.event_exit_inst_len;
2432     - kvm_inject_realmode_interrupt(vcpu, nr, inc_eip);
2433     - return;
2434     - }
2435     -
2436     - WARN_ON_ONCE(vmx->emulation_required);
2437     -
2438     - if (kvm_exception_is_soft(nr)) {
2439     - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
2440     - vmx->vcpu.arch.event_exit_inst_len);
2441     - intr_info |= INTR_TYPE_SOFT_EXCEPTION;
2442     - } else
2443     - intr_info |= INTR_TYPE_HARD_EXCEPTION;
2444     -
2445     - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
2446     -
2447     - vmx_clear_hlt(vcpu);
2448     -}
2449     -
2450     -static bool vmx_rdtscp_supported(void)
2451     -{
2452     - return cpu_has_vmx_rdtscp();
2453     -}
2454     -
2455     -static bool vmx_invpcid_supported(void)
2456     -{
2457     - return cpu_has_vmx_invpcid();
2458     -}
2459     -
2460     -/*
2461     - * Swap MSR entry in host/guest MSR entry array.
2462     - */
2463     -static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
2464     -{
2465     - struct shared_msr_entry tmp;
2466     -
2467     - tmp = vmx->guest_msrs[to];
2468     - vmx->guest_msrs[to] = vmx->guest_msrs[from];
2469     - vmx->guest_msrs[from] = tmp;
2470     -}
2471     -
2472     -/*
2473     - * Set up the vmcs to automatically save and restore system
2474     - * msrs. Don't touch the 64-bit msrs if the guest is in legacy
2475     - * mode, as fiddling with msrs is very expensive.
2476     - */
2477     -static void setup_msrs(struct vcpu_vmx *vmx)
2478     -{
2479     - int save_nmsrs, index;
2480     -
2481     - save_nmsrs = 0;
2482     -#ifdef CONFIG_X86_64
2483     - /*
2484     - * The SYSCALL MSRs are only needed on long mode guests, and only
2485     - * when EFER.SCE is set.
2486     - */
2487     - if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) {
2488     - index = __find_msr_index(vmx, MSR_STAR);
2489     - if (index >= 0)
2490     - move_msr_up(vmx, index, save_nmsrs++);
2491     - index = __find_msr_index(vmx, MSR_LSTAR);
2492     - if (index >= 0)
2493     - move_msr_up(vmx, index, save_nmsrs++);
2494     - index = __find_msr_index(vmx, MSR_SYSCALL_MASK);
2495     - if (index >= 0)
2496     - move_msr_up(vmx, index, save_nmsrs++);
2497     - }
2498     -#endif
2499     - index = __find_msr_index(vmx, MSR_EFER);
2500     - if (index >= 0 && update_transition_efer(vmx, index))
2501     - move_msr_up(vmx, index, save_nmsrs++);
2502     - index = __find_msr_index(vmx, MSR_TSC_AUX);
2503     - if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
2504     - move_msr_up(vmx, index, save_nmsrs++);
2505     - index = __find_msr_index(vmx, MSR_IA32_TSX_CTRL);
2506     - if (index >= 0)
2507     - move_msr_up(vmx, index, save_nmsrs++);
2508     -
2509     - vmx->save_nmsrs = save_nmsrs;
2510     - vmx->guest_msrs_ready = false;
2511     -
2512     - if (cpu_has_vmx_msr_bitmap())
2513     - vmx_update_msr_bitmap(&vmx->vcpu);
2514     -}
2515     -
2516     -static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
2517     -{
2518     - struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2519     -
2520     - if (is_guest_mode(vcpu) &&
2521     - (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING))
2522     - return vcpu->arch.tsc_offset - vmcs12->tsc_offset;
2523     -
2524     - return vcpu->arch.tsc_offset;
2525     -}
2526     -
2527     -static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
2528     -{
2529     - struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
2530     - u64 g_tsc_offset = 0;
2531     -
2532     - /*
2533     - * We're here if L1 chose not to trap WRMSR to TSC. According
2534     - * to the spec, this should set L1's TSC; The offset that L1
2535     - * set for L2 remains unchanged, and still needs to be added
2536     - * to the newly set TSC to get L2's TSC.
2537     - */
2538     - if (is_guest_mode(vcpu) &&
2539     - (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING))
2540     - g_tsc_offset = vmcs12->tsc_offset;
2541     -
2542     - trace_kvm_write_tsc_offset(vcpu->vcpu_id,
2543     - vcpu->arch.tsc_offset - g_tsc_offset,
2544     - offset);
2545     - vmcs_write64(TSC_OFFSET, offset + g_tsc_offset);
2546     - return offset + g_tsc_offset;
2547     -}
2548     -
2549     -/*
2550     - * nested_vmx_allowed() checks whether a guest should be allowed to use VMX
2551     - * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for
2552     - * all guests if the "nested" module option is off, and can also be disabled
2553     - * for a single guest by disabling its VMX cpuid bit.
2554     - */
2555     -bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
2556     -{
2557     - return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
2558     -}
2559     -
2560     -static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
2561     - uint64_t val)
2562     -{
2563     - uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits;
2564     -
2565     - return !(val & ~valid_bits);
2566     -}
2567     -
2568     -static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
2569     -{
2570     - switch (msr->index) {
2571     - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2572     - if (!nested)
2573     - return 1;
2574     - return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
2575     - default:
2576     - return 1;
2577     - }
2578     -}
2579     -
2580     -/*
2581     - * Reads an msr value (of 'msr_index') into 'pdata'.
2582     - * Returns 0 on success, non-0 otherwise.
2583     - * Assumes vcpu_load() was already called.
2584     - */
2585     -static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2586     -{
2587     - struct vcpu_vmx *vmx = to_vmx(vcpu);
2588     - struct shared_msr_entry *msr;
2589     - u32 index;
2590     -
2591     - switch (msr_info->index) {
2592     -#ifdef CONFIG_X86_64
2593     - case MSR_FS_BASE:
2594     - msr_info->data = vmcs_readl(GUEST_FS_BASE);
2595     - break;
2596     - case MSR_GS_BASE:
2597     - msr_info->data = vmcs_readl(GUEST_GS_BASE);
2598     - break;
2599     - case MSR_KERNEL_GS_BASE:
2600     - msr_info->data = vmx_read_guest_kernel_gs_base(vmx);
2601     - break;
2602     -#endif
2603     - case MSR_EFER:
2604     - return kvm_get_msr_common(vcpu, msr_info);
2605     - case MSR_IA32_TSX_CTRL:
2606     - if (!msr_info->host_initiated &&
2607     - !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
2608     - return 1;
2609     - goto find_shared_msr;
2610     - case MSR_IA32_UMWAIT_CONTROL:
2611     - if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
2612     - return 1;
2613     -
2614     - msr_info->data = vmx->msr_ia32_umwait_control;
2615     - break;
2616     - case MSR_IA32_SPEC_CTRL:
2617     - if (!msr_info->host_initiated &&
2618     - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
2619     - return 1;
2620     -
2621     - msr_info->data = to_vmx(vcpu)->spec_ctrl;
2622     - break;
2623     - case MSR_IA32_SYSENTER_CS:
2624     - msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
2625     - break;
2626     - case MSR_IA32_SYSENTER_EIP:
2627     - msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP);
2628     - break;
2629     - case MSR_IA32_SYSENTER_ESP:
2630     - msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
2631     - break;
2632     - case MSR_IA32_BNDCFGS:
2633     - if (!kvm_mpx_supported() ||
2634     - (!msr_info->host_initiated &&
2635     - !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
2636     - return 1;
2637     - msr_info->data = vmcs_read64(GUEST_BNDCFGS);
2638     - break;
2639     - case MSR_IA32_MCG_EXT_CTL:
2640     - if (!msr_info->host_initiated &&
2641     - !(vmx->msr_ia32_feature_control &
2642     - FEATURE_CONTROL_LMCE))
2643     - return 1;
2644     - msr_info->data = vcpu->arch.mcg_ext_ctl;
2645     - break;
2646     - case MSR_IA32_FEATURE_CONTROL:
2647     - msr_info->data = vmx->msr_ia32_feature_control;
2648     - break;
2649     - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2650     - if (!nested_vmx_allowed(vcpu))
2651     - return 1;
2652     - return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
2653     - &msr_info->data);
2654     - case MSR_IA32_RTIT_CTL:
2655     - if (pt_mode != PT_MODE_HOST_GUEST)
2656     - return 1;
2657     - msr_info->data = vmx->pt_desc.guest.ctl;
2658     - break;
2659     - case MSR_IA32_RTIT_STATUS:
2660     - if (pt_mode != PT_MODE_HOST_GUEST)
2661     - return 1;
2662     - msr_info->data = vmx->pt_desc.guest.status;
2663     - break;
2664     - case MSR_IA32_RTIT_CR3_MATCH:
2665     - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2666     - !intel_pt_validate_cap(vmx->pt_desc.caps,
2667     - PT_CAP_cr3_filtering))
2668     - return 1;
2669     - msr_info->data = vmx->pt_desc.guest.cr3_match;
2670     - break;
2671     - case MSR_IA32_RTIT_OUTPUT_BASE:
2672     - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2673     - (!intel_pt_validate_cap(vmx->pt_desc.caps,
2674     - PT_CAP_topa_output) &&
2675     - !intel_pt_validate_cap(vmx->pt_desc.caps,
2676     - PT_CAP_single_range_output)))
2677     - return 1;
2678     - msr_info->data = vmx->pt_desc.guest.output_base;
2679     - break;
2680     - case MSR_IA32_RTIT_OUTPUT_MASK:
2681     - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2682     - (!intel_pt_validate_cap(vmx->pt_desc.caps,
2683     - PT_CAP_topa_output) &&
2684     - !intel_pt_validate_cap(vmx->pt_desc.caps,
2685     - PT_CAP_single_range_output)))
2686     - return 1;
2687     - msr_info->data = vmx->pt_desc.guest.output_mask;
2688     - break;
2689     - case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
2690     - index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
2691     - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2692     - (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps,
2693     - PT_CAP_num_address_ranges)))
2694     - return 1;
2695     - if (is_noncanonical_address(data, vcpu))
2696     - return 1;
2697     - if (index % 2)
2698     - msr_info->data = vmx->pt_desc.guest.addr_b[index / 2];
2699     - else
2700     - msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
2701     - break;
2702     - case MSR_TSC_AUX:
2703     - if (!msr_info->host_initiated &&
2704     - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
2705     - return 1;
2706     - goto find_shared_msr;
2707     - default:
2708     - find_shared_msr:
2709     - msr = find_msr_entry(vmx, msr_info->index);
2710     - if (msr) {
2711     - msr_info->data = msr->data;
2712     - break;
2713     - }
2714     - return kvm_get_msr_common(vcpu, msr_info);
2715     - }
2716     -
2717     - return 0;
2718     -}
2719     -
2720     -/*
2721     - * Writes msr value into the appropriate "register".
2722     - * Returns 0 on success, non-0 otherwise.
2723     - * Assumes vcpu_load() was already called.
2724     - */
2725     -static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2726     -{
2727     - struct vcpu_vmx *vmx = to_vmx(vcpu);
2728     - struct shared_msr_entry *msr;
2729     - int ret = 0;
2730     - u32 msr_index = msr_info->index;
2731     - u64 data = msr_info->data;
2732     - u32 index;
2733     -
2734     - switch (msr_index) {
2735     - case MSR_EFER:
2736     - ret = kvm_set_msr_common(vcpu, msr_info);
2737     - break;
2738     -#ifdef CONFIG_X86_64
2739     - case MSR_FS_BASE:
2740     - vmx_segment_cache_clear(vmx);
2741     - vmcs_writel(GUEST_FS_BASE, data);
2742     - break;
2743     - case MSR_GS_BASE:
2744     - vmx_segment_cache_clear(vmx);
2745     - vmcs_writel(GUEST_GS_BASE, data);
2746     - break;
2747     - case MSR_KERNEL_GS_BASE:
2748     - vmx_write_guest_kernel_gs_base(vmx, data);
2749     - break;
2750     -#endif
2751     - case MSR_IA32_SYSENTER_CS:
2752     - if (is_guest_mode(vcpu))
2753     - get_vmcs12(vcpu)->guest_sysenter_cs = data;
2754     - vmcs_write32(GUEST_SYSENTER_CS, data);
2755     - break;
2756     - case MSR_IA32_SYSENTER_EIP:
2757     - if (is_guest_mode(vcpu))
2758     - get_vmcs12(vcpu)->guest_sysenter_eip = data;
2759     - vmcs_writel(GUEST_SYSENTER_EIP, data);
2760     - break;
2761     - case MSR_IA32_SYSENTER_ESP:
2762     - if (is_guest_mode(vcpu))
2763     - get_vmcs12(vcpu)->guest_sysenter_esp = data;
2764     - vmcs_writel(GUEST_SYSENTER_ESP, data);
2765     - break;
2766     - case MSR_IA32_DEBUGCTLMSR:
2767     - if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls &
2768     - VM_EXIT_SAVE_DEBUG_CONTROLS)
2769     - get_vmcs12(vcpu)->guest_ia32_debugctl = data;
2770     -
2771     - ret = kvm_set_msr_common(vcpu, msr_info);
2772     - break;
2773     -
2774     - case MSR_IA32_BNDCFGS:
2775     - if (!kvm_mpx_supported() ||
2776     - (!msr_info->host_initiated &&
2777     - !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
2778     - return 1;
2779     - if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
2780     - (data & MSR_IA32_BNDCFGS_RSVD))
2781     - return 1;
2782     - vmcs_write64(GUEST_BNDCFGS, data);
2783     - break;
2784     - case MSR_IA32_UMWAIT_CONTROL:
2785     - if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
2786     - return 1;
2787     -
2788     - /* The reserved bit 1 and non-32 bit [63:32] should be zero */
2789     - if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32)))
2790     - return 1;
2791     -
2792     - vmx->msr_ia32_umwait_control = data;
2793     - break;
2794     - case MSR_IA32_SPEC_CTRL:
2795     - if (!msr_info->host_initiated &&
2796     - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
2797     - return 1;
2798     -
2799     - /* The STIBP bit doesn't fault even if it's not advertised */
2800     - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
2801     - return 1;
2802     -
2803     - vmx->spec_ctrl = data;
2804     -
2805     - if (!data)
2806     - break;
2807     -
2808     - /*
2809     - * For non-nested:
2810     - * When it's written (to non-zero) for the first time, pass
2811     - * it through.
2812     - *
2813     - * For nested:
2814     - * The handling of the MSR bitmap for L2 guests is done in
2815     - * nested_vmx_prepare_msr_bitmap. We should not touch the
2816     - * vmcs02.msr_bitmap here since it gets completely overwritten
2817     - * in the merging. We update the vmcs01 here for L1 as well
2818     - * since it will end up touching the MSR anyway now.
2819     - */
2820     - vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
2821     - MSR_IA32_SPEC_CTRL,
2822     - MSR_TYPE_RW);
2823     - break;
2824     - case MSR_IA32_TSX_CTRL:
2825     - if (!msr_info->host_initiated &&
2826     - !(vcpu->arch.arch_capabilities & ARCH_CAP_TSX_CTRL_MSR))
2827     - return 1;
2828     - if (data & ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR))
2829     - return 1;
2830     - goto find_shared_msr;
2831     - case MSR_IA32_PRED_CMD:
2832     - if (!msr_info->host_initiated &&
2833     - !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
2834     - return 1;
2835     -
2836     - if (data & ~PRED_CMD_IBPB)
2837     - return 1;
2838     -
2839     - if (!data)
2840     - break;
2841     -
2842     - wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
2843     -
2844     - /*
2845     - * For non-nested:
2846     - * When it's written (to non-zero) for the first time, pass
2847     - * it through.
2848     - *
2849     - * For nested:
2850     - * The handling of the MSR bitmap for L2 guests is done in
2851     - * nested_vmx_prepare_msr_bitmap. We should not touch the
2852     - * vmcs02.msr_bitmap here since it gets completely overwritten
2853     - * in the merging.
2854     - */
2855     - vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
2856     - MSR_TYPE_W);
2857     - break;
2858     - case MSR_IA32_CR_PAT:
2859     - if (!kvm_pat_valid(data))
2860     - return 1;
2861     -
2862     - if (is_guest_mode(vcpu) &&
2863     - get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
2864     - get_vmcs12(vcpu)->guest_ia32_pat = data;
2865     -
2866     - if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
2867     - vmcs_write64(GUEST_IA32_PAT, data);
2868     - vcpu->arch.pat = data;
2869     - break;
2870     - }
2871     - ret = kvm_set_msr_common(vcpu, msr_info);
2872     - break;
2873     - case MSR_IA32_TSC_ADJUST:
2874     - ret = kvm_set_msr_common(vcpu, msr_info);
2875     - break;
2876     - case MSR_IA32_MCG_EXT_CTL:
2877     - if ((!msr_info->host_initiated &&
2878     - !(to_vmx(vcpu)->msr_ia32_feature_control &
2879     - FEATURE_CONTROL_LMCE)) ||
2880     - (data & ~MCG_EXT_CTL_LMCE_EN))
2881     - return 1;
2882     - vcpu->arch.mcg_ext_ctl = data;
2883     - break;
2884     - case MSR_IA32_FEATURE_CONTROL:
2885     - if (!vmx_feature_control_msr_valid(vcpu, data) ||
2886     - (to_vmx(vcpu)->msr_ia32_feature_control &
2887     - FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
2888     - return 1;
2889     - vmx->msr_ia32_feature_control = data;
2890     - if (msr_info->host_initiated && data == 0)
2891     - vmx_leave_nested(vcpu);
2892     - break;
2893     - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2894     - if (!msr_info->host_initiated)
2895     - return 1; /* they are read-only */
2896     - if (!nested_vmx_allowed(vcpu))
2897     - return 1;
2898     - return vmx_set_vmx_msr(vcpu, msr_index, data);
2899     - case MSR_IA32_RTIT_CTL:
2900     - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2901     - vmx_rtit_ctl_check(vcpu, data) ||
2902     - vmx->nested.vmxon)
2903     - return 1;
2904     - vmcs_write64(GUEST_IA32_RTIT_CTL, data);
2905     - vmx->pt_desc.guest.ctl = data;
2906     - pt_update_intercept_for_msr(vmx);
2907     - break;
2908     - case MSR_IA32_RTIT_STATUS:
2909     - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2910     - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2911     - (data & MSR_IA32_RTIT_STATUS_MASK))
2912     - return 1;
2913     - vmx->pt_desc.guest.status = data;
2914     - break;
2915     - case MSR_IA32_RTIT_CR3_MATCH:
2916     - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2917     - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2918     - !intel_pt_validate_cap(vmx->pt_desc.caps,
2919     - PT_CAP_cr3_filtering))
2920     - return 1;
2921     - vmx->pt_desc.guest.cr3_match = data;
2922     - break;
2923     - case MSR_IA32_RTIT_OUTPUT_BASE:
2924     - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2925     - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2926     - (!intel_pt_validate_cap(vmx->pt_desc.caps,
2927     - PT_CAP_topa_output) &&
2928     - !intel_pt_validate_cap(vmx->pt_desc.caps,
2929     - PT_CAP_single_range_output)) ||
2930     - (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK))
2931     - return 1;
2932     - vmx->pt_desc.guest.output_base = data;
2933     - break;
2934     - case MSR_IA32_RTIT_OUTPUT_MASK:
2935     - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2936     - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2937     - (!intel_pt_validate_cap(vmx->pt_desc.caps,
2938     - PT_CAP_topa_output) &&
2939     - !intel_pt_validate_cap(vmx->pt_desc.caps,
2940     - PT_CAP_single_range_output)))
2941     - return 1;
2942     - vmx->pt_desc.guest.output_mask = data;
2943     - break;
2944     - case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
2945     - index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
2946     - if ((pt_mode != PT_MODE_HOST_GUEST) ||
2947     - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2948     - (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps,
2949     - PT_CAP_num_address_ranges)))
2950     - return 1;
2951     - if (is_noncanonical_address(data, vcpu))
2952     - return 1;
2953     - if (index % 2)
2954     - vmx->pt_desc.guest.addr_b[index / 2] = data;
2955     - else
2956     - vmx->pt_desc.guest.addr_a[index / 2] = data;
2957     - break;
2958     - case MSR_TSC_AUX:
2959     - if (!msr_info->host_initiated &&
2960     - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
2961     - return 1;
2962     - /* Check reserved bit, higher 32 bits should be zero */
2963     - if ((data >> 32) != 0)
2964     - return 1;
2965     - goto find_shared_msr;
2966     -
2967     - default:
2968     - find_shared_msr:
2969     - msr = find_msr_entry(vmx, msr_index);
2970     - if (msr)
2971     - ret = vmx_set_guest_msr(vmx, msr, data);
2972     - else
2973     - ret = kvm_set_msr_common(vcpu, msr_info);
2974     - }
2975     -
2976     - return ret;
2977     -}
2978     -
2979     -static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
2980     -{
2981     - kvm_register_mark_available(vcpu, reg);
2982     -
2983     - switch (reg) {
2984     - case VCPU_REGS_RSP:
2985     - vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
2986     - break;
2987     - case VCPU_REGS_RIP:
2988     - vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP);
2989     - break;
2990     - case VCPU_EXREG_PDPTR:
2991     - if (enable_ept)
2992     - ept_save_pdptrs(vcpu);
2993     - break;
2994     - case VCPU_EXREG_CR3:
2995     - if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
2996     - vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
2997     - break;
2998     - default:
2999     - WARN_ON_ONCE(1);
3000     - break;
3001     - }
3002     -}
3003     -
3004     -static __init int cpu_has_kvm_support(void)
3005     -{
3006     - return cpu_has_vmx();
3007     -}
3008     -
3009     -static __init int vmx_disabled_by_bios(void)
3010     -{
3011     - u64 msr;
3012     -
3013     - rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
3014     - if (msr & FEATURE_CONTROL_LOCKED) {
3015     - /* launched w/ TXT and VMX disabled */
3016     - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
3017     - && tboot_enabled())
3018     - return 1;
3019     - /* launched w/o TXT and VMX only enabled w/ TXT */
3020     - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
3021     - && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
3022     - && !tboot_enabled()) {
3023     - printk(KERN_WARNING "kvm: disable TXT in the BIOS or "
3024     - "activate TXT before enabling KVM\n");
3025     - return 1;
3026     - }
3027     - /* launched w/o TXT and VMX disabled */
3028     - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
3029     - && !tboot_enabled())
3030     - return 1;
3031     - }
3032     -
3033     - return 0;
3034     -}
3035     -
3036     -static void kvm_cpu_vmxon(u64 addr)
3037     -{
3038     - cr4_set_bits(X86_CR4_VMXE);
3039     - intel_pt_handle_vmx(1);
3040     -
3041     - asm volatile ("vmxon %0" : : "m"(addr));
3042     -}
3043     -
3044     -static int hardware_enable(void)
3045     -{
3046     - int cpu = raw_smp_processor_id();
3047     - u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
3048     - u64 old, test_bits;
3049     -
3050     - if (cr4_read_shadow() & X86_CR4_VMXE)
3051     - return -EBUSY;
3052     -
3053     - /*
3054     - * This can happen if we hot-added a CPU but failed to allocate
3055     - * VP assist page for it.
3056     - */
3057     - if (static_branch_unlikely(&enable_evmcs) &&
3058     - !hv_get_vp_assist_page(cpu))
3059     - return -EFAULT;
3060     -
3061     - INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
3062     - INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
3063     - spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
3064     -
3065     - /*
3066     - * Now we can enable the vmclear operation in kdump
3067     - * since the loaded_vmcss_on_cpu list on this cpu
3068     - * has been initialized.
3069     - *
3070     - * Though the cpu is not in VMX operation now, there
3071     - * is no problem to enable the vmclear operation
3072     - * for the loaded_vmcss_on_cpu list is empty!
3073     - */
3074     - crash_enable_local_vmclear(cpu);
3075     -
3076     - rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
3077     -
3078     - test_bits = FEATURE_CONTROL_LOCKED;
3079     - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
3080     - if (tboot_enabled())
3081     - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX;
3082     -
3083     - if ((old & test_bits) != test_bits) {
3084     - /* enable and lock */
3085     - wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
3086     - }
3087     - kvm_cpu_vmxon(phys_addr);
3088     - if (enable_ept)
3089     - ept_sync_global();
3090     -
3091     - return 0;
3092     -}
3093     -
3094     -static void vmclear_local_loaded_vmcss(void)
3095     -{
3096     - int cpu = raw_smp_processor_id();
3097     - struct loaded_vmcs *v, *n;
3098     -
3099     - list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
3100     - loaded_vmcss_on_cpu_link)
3101     - __loaded_vmcs_clear(v);
3102     -}
3103     -
3104     -
3105     -/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
3106     - * tricks.
3107     - */
3108     -static void kvm_cpu_vmxoff(void)
3109     -{
3110     - asm volatile (__ex("vmxoff"));
3111     -
3112     - intel_pt_handle_vmx(0);
3113     - cr4_clear_bits(X86_CR4_VMXE);
3114     -}
3115     -
3116     -static void hardware_disable(void)
3117     -{
3118     - vmclear_local_loaded_vmcss();
3119     - kvm_cpu_vmxoff();
3120     -}
3121     -
3122     -static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
3123     - u32 msr, u32 *result)
3124     -{
3125     - u32 vmx_msr_low, vmx_msr_high;
3126     - u32 ctl = ctl_min | ctl_opt;
3127     -
3128     - rdmsr(msr, vmx_msr_low, vmx_msr_high);
3129     -
3130     - ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */
3131     - ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */
3132     -
3133     - /* Ensure minimum (required) set of control bits are supported. */
3134     - if (ctl_min & ~ctl)
3135     - return -EIO;
3136     -
3137     - *result = ctl;
3138     - return 0;
3139     -}
3140     -
3141     -static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
3142     - struct vmx_capability *vmx_cap)
3143     -{
3144     - u32 vmx_msr_low, vmx_msr_high;
3145     - u32 min, opt, min2, opt2;
3146     - u32 _pin_based_exec_control = 0;
3147     - u32 _cpu_based_exec_control = 0;
3148     - u32 _cpu_based_2nd_exec_control = 0;
3149     - u32 _vmexit_control = 0;
3150     - u32 _vmentry_control = 0;
3151     -
3152     - memset(vmcs_conf, 0, sizeof(*vmcs_conf));
3153     - min = CPU_BASED_HLT_EXITING |
3154     -#ifdef CONFIG_X86_64
3155     - CPU_BASED_CR8_LOAD_EXITING |
3156     - CPU_BASED_CR8_STORE_EXITING |
3157     -#endif
3158     - CPU_BASED_CR3_LOAD_EXITING |
3159     - CPU_BASED_CR3_STORE_EXITING |
3160     - CPU_BASED_UNCOND_IO_EXITING |
3161     - CPU_BASED_MOV_DR_EXITING |
3162     - CPU_BASED_USE_TSC_OFFSETTING |
3163     - CPU_BASED_MWAIT_EXITING |
3164     - CPU_BASED_MONITOR_EXITING |
3165     - CPU_BASED_INVLPG_EXITING |
3166     - CPU_BASED_RDPMC_EXITING;
3167     -
3168     - opt = CPU_BASED_TPR_SHADOW |
3169     - CPU_BASED_USE_MSR_BITMAPS |
3170     - CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
3171     - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
3172     - &_cpu_based_exec_control) < 0)
3173     - return -EIO;
3174     -#ifdef CONFIG_X86_64
3175     - if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
3176     - _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING &
3177     - ~CPU_BASED_CR8_STORE_EXITING;
3178     -#endif
3179     - if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
3180     - min2 = 0;
3181     - opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
3182     - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
3183     - SECONDARY_EXEC_WBINVD_EXITING |
3184     - SECONDARY_EXEC_ENABLE_VPID |
3185     - SECONDARY_EXEC_ENABLE_EPT |
3186     - SECONDARY_EXEC_UNRESTRICTED_GUEST |
3187     - SECONDARY_EXEC_PAUSE_LOOP_EXITING |
3188     - SECONDARY_EXEC_DESC |
3189     - SECONDARY_EXEC_RDTSCP |
3190     - SECONDARY_EXEC_ENABLE_INVPCID |
3191     - SECONDARY_EXEC_APIC_REGISTER_VIRT |
3192     - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
3193     - SECONDARY_EXEC_SHADOW_VMCS |
3194     - SECONDARY_EXEC_XSAVES |
3195     - SECONDARY_EXEC_RDSEED_EXITING |
3196     - SECONDARY_EXEC_RDRAND_EXITING |
3197     - SECONDARY_EXEC_ENABLE_PML |
3198     - SECONDARY_EXEC_TSC_SCALING |
3199     - SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
3200     - SECONDARY_EXEC_PT_USE_GPA |
3201     - SECONDARY_EXEC_PT_CONCEAL_VMX |
3202     - SECONDARY_EXEC_ENABLE_VMFUNC |
3203     - SECONDARY_EXEC_ENCLS_EXITING;
3204     - if (adjust_vmx_controls(min2, opt2,
3205     - MSR_IA32_VMX_PROCBASED_CTLS2,
3206     - &_cpu_based_2nd_exec_control) < 0)
3207     - return -EIO;
3208     - }
3209     -#ifndef CONFIG_X86_64
3210     - if (!(_cpu_based_2nd_exec_control &
3211     - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
3212     - _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
3213     -#endif
3214     -
3215     - if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
3216     - _cpu_based_2nd_exec_control &= ~(
3217     - SECONDARY_EXEC_APIC_REGISTER_VIRT |
3218     - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
3219     - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
3220     -
3221     - rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP,
3222     - &vmx_cap->ept, &vmx_cap->vpid);
3223     -
3224     - if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
3225     - /* CR3 accesses and invlpg don't need to cause VM Exits when EPT
3226     - enabled */
3227     - _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
3228     - CPU_BASED_CR3_STORE_EXITING |
3229     - CPU_BASED_INVLPG_EXITING);
3230     - } else if (vmx_cap->ept) {
3231     - vmx_cap->ept = 0;
3232     - pr_warn_once("EPT CAP should not exist if not support "
3233     - "1-setting enable EPT VM-execution control\n");
3234     - }
3235     - if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) &&
3236     - vmx_cap->vpid) {
3237     - vmx_cap->vpid = 0;
3238     - pr_warn_once("VPID CAP should not exist if not support "
3239     - "1-setting enable VPID VM-execution control\n");
3240     - }
3241     -
3242     - min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
3243     -#ifdef CONFIG_X86_64
3244     - min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
3245     -#endif
3246     - opt = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
3247     - VM_EXIT_LOAD_IA32_PAT |
3248     - VM_EXIT_LOAD_IA32_EFER |
3249     - VM_EXIT_CLEAR_BNDCFGS |
3250     - VM_EXIT_PT_CONCEAL_PIP |
3251     - VM_EXIT_CLEAR_IA32_RTIT_CTL;
3252     - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
3253     - &_vmexit_control) < 0)
3254     - return -EIO;
3255     -
3256     - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
3257     - opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
3258     - PIN_BASED_VMX_PREEMPTION_TIMER;
3259     - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
3260     - &_pin_based_exec_control) < 0)
3261     - return -EIO;
3262     -
3263     - if (cpu_has_broken_vmx_preemption_timer())
3264     - _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
3265     - if (!(_cpu_based_2nd_exec_control &
3266     - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY))
3267     - _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
3268     -
3269     - min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
3270     - opt = VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
3271     - VM_ENTRY_LOAD_IA32_PAT |
3272     - VM_ENTRY_LOAD_IA32_EFER |
3273     - VM_ENTRY_LOAD_BNDCFGS |
3274     - VM_ENTRY_PT_CONCEAL_PIP |
3275     - VM_ENTRY_LOAD_IA32_RTIT_CTL;
3276     - if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
3277     - &_vmentry_control) < 0)
3278     - return -EIO;
3279     -
3280     - /*
3281     - * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they
3282     - * can't be used due to an errata where VM Exit may incorrectly clear
3283     - * IA32_PERF_GLOBAL_CTRL[34:32]. Workaround the errata by using the
3284     - * MSR load mechanism to switch IA32_PERF_GLOBAL_CTRL.
3285     - */
3286     - if (boot_cpu_data.x86 == 0x6) {
3287     - switch (boot_cpu_data.x86_model) {
3288     - case 26: /* AAK155 */
3289     - case 30: /* AAP115 */
3290     - case 37: /* AAT100 */
3291     - case 44: /* BC86,AAY89,BD102 */
3292     - case 46: /* BA97 */
3293     - _vmentry_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
3294     - _vmexit_control &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
3295     - pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
3296     - "does not work properly. Using workaround\n");
3297     - break;
3298     - default:
3299     - break;
3300     - }
3301     - }
3302     -
3303     -
3304     - rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
3305     -
3306     - /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
3307     - if ((vmx_msr_high & 0x1fff) > PAGE_SIZE)
3308     - return -EIO;
3309     -
3310     -#ifdef CONFIG_X86_64
3311     - /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */
3312     - if (vmx_msr_high & (1u<<16))
3313     - return -EIO;
3314     -#endif
3315     -
3316     - /* Require Write-Back (WB) memory type for VMCS accesses. */
3317     - if (((vmx_msr_high >> 18) & 15) != 6)
3318     - return -EIO;
3319     -
3320     - vmcs_conf->size = vmx_msr_high & 0x1fff;
3321     - vmcs_conf->order = get_order(vmcs_conf->size);
3322     - vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
3323     -
3324     - vmcs_conf->revision_id = vmx_msr_low;
3325     -
3326     - vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
3327     - vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
3328     - vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control;
3329     - vmcs_conf->vmexit_ctrl = _vmexit_control;
3330     - vmcs_conf->vmentry_ctrl = _vmentry_control;
3331     -
3332     - if (static_branch_unlikely(&enable_evmcs))
3333     - evmcs_sanitize_exec_ctrls(vmcs_conf);
3334     -
3335     - return 0;
3336     -}
3337     -
3338     -struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
3339     -{
3340     - int node = cpu_to_node(cpu);
3341     - struct page *pages;
3342     - struct vmcs *vmcs;
3343     -
3344     - pages = __alloc_pages_node(node, flags, vmcs_config.order);
3345     - if (!pages)
3346     - return NULL;
3347     - vmcs = page_address(pages);
3348     - memset(vmcs, 0, vmcs_config.size);
3349     -
3350     - /* KVM supports Enlightened VMCS v1 only */
3351     - if (static_branch_unlikely(&enable_evmcs))
3352     - vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
3353     - else
3354     - vmcs->hdr.revision_id = vmcs_config.revision_id;
3355     -
3356     - if (shadow)
3357     - vmcs->hdr.shadow_vmcs = 1;
3358     - return vmcs;
3359     -}
3360     -
3361     -void free_vmcs(struct vmcs *vmcs)
3362     -{
3363     - free_pages((unsigned long)vmcs, vmcs_config.order);
3364     -}
3365     -
3366     -/*
3367     - * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded
3368     - */
3369     -void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
3370     -{
3371     - if (!loaded_vmcs->vmcs)
3372     - return;
3373     - loaded_vmcs_clear(loaded_vmcs);
3374     - free_vmcs(loaded_vmcs->vmcs);
3375     - loaded_vmcs->vmcs = NULL;
3376     - if (loaded_vmcs->msr_bitmap)
3377     - free_page((unsigned long)loaded_vmcs->msr_bitmap);
3378     - WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
3379     -}
3380     -
3381     -int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
3382     -{
3383     - loaded_vmcs->vmcs = alloc_vmcs(false);
3384     - if (!loaded_vmcs->vmcs)
3385     - return -ENOMEM;
3386     -
3387     - loaded_vmcs->shadow_vmcs = NULL;
3388     - loaded_vmcs->hv_timer_soft_disabled = false;
3389     - loaded_vmcs_init(loaded_vmcs);
3390     -
3391     - if (cpu_has_vmx_msr_bitmap()) {
3392     - loaded_vmcs->msr_bitmap = (unsigned long *)
3393     - __get_free_page(GFP_KERNEL_ACCOUNT);
3394     - if (!loaded_vmcs->msr_bitmap)
3395     - goto out_vmcs;
3396     - memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
3397     -
3398     - if (IS_ENABLED(CONFIG_HYPERV) &&
3399     - static_branch_unlikely(&enable_evmcs) &&
3400     - (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
3401     - struct hv_enlightened_vmcs *evmcs =
3402     - (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;
3403     -
3404     - evmcs->hv_enlightenments_control.msr_bitmap = 1;
3405     - }
3406     - }
3407     -
3408     - memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
3409     - memset(&loaded_vmcs->controls_shadow, 0,
3410     - sizeof(struct vmcs_controls_shadow));
3411     -
3412     - return 0;
3413     -
3414     -out_vmcs:
3415     - free_loaded_vmcs(loaded_vmcs);
3416     - return -ENOMEM;
3417     -}
3418     -
3419     -static void free_kvm_area(void)
3420     -{
3421     - int cpu;
3422     -
3423     - for_each_possible_cpu(cpu) {
3424     - free_vmcs(per_cpu(vmxarea, cpu));
3425     - per_cpu(vmxarea, cpu) = NULL;
3426     - }
3427     -}
3428     -
3429     -static __init int alloc_kvm_area(void)
3430     -{
3431     - int cpu;
3432     -
3433     - for_each_possible_cpu(cpu) {
3434     - struct vmcs *vmcs;
3435     -
3436     - vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL);
3437     - if (!vmcs) {
3438     - free_kvm_area();
3439     - return -ENOMEM;
3440     - }
3441     -
3442     - /*
3443     - * When eVMCS is enabled, alloc_vmcs_cpu() sets
3444     - * vmcs->revision_id to KVM_EVMCS_VERSION instead of
3445     - * revision_id reported by MSR_IA32_VMX_BASIC.
3446     - *
3447     - * However, even though not explicitly documented by
3448     - * TLFS, VMXArea passed as VMXON argument should
3449     - * still be marked with revision_id reported by
3450     - * physical CPU.
3451     - */
3452     - if (static_branch_unlikely(&enable_evmcs))
3453     - vmcs->hdr.revision_id = vmcs_config.revision_id;
3454     -
3455     - per_cpu(vmxarea, cpu) = vmcs;
3456     - }
3457     - return 0;
3458     -}
3459     -
3460     -static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
3461     - struct kvm_segment *save)
3462     -{
3463     - if (!emulate_invalid_guest_state) {
3464     - /*
3465     - * CS and SS RPL should be equal during guest entry according
3466     - * to VMX spec, but in reality it is not always so. Since vcpu
3467     - * is in the middle of the transition from real mode to
3468     - * protected mode it is safe to assume that RPL 0 is a good
3469     - * default value.
3470     - */
3471     - if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
3472     - save->selector &= ~SEGMENT_RPL_MASK;
3473     - save->dpl = save->selector & SEGMENT_RPL_MASK;
3474     - save->s = 1;
3475     - }
3476     - vmx_set_segment(vcpu, save, seg);
3477     -}
3478     -
3479     -static void enter_pmode(struct kvm_vcpu *vcpu)
3480     -{
3481     - unsigned long flags;
3482     - struct vcpu_vmx *vmx = to_vmx(vcpu);
3483     -
3484     - /*
3485     - * Update real mode segment cache. It may be not up-to-date if sement
3486     - * register was written while vcpu was in a guest mode.
3487     - */
3488     - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
3489     - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
3490     - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
3491     - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
3492     - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
3493     - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
3494     -
3495     - vmx->rmode.vm86_active = 0;
3496     -
3497     - vmx_segment_cache_clear(vmx);
3498     -
3499     - vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
3500     -
3501     - flags = vmcs_readl(GUEST_RFLAGS);
3502     - flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
3503     - flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
3504     - vmcs_writel(GUEST_RFLAGS, flags);
3505     -
3506     - vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
3507     - (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME));
3508     -
3509     - update_exception_bitmap(vcpu);
3510     -
3511     - fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
3512     - fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
3513     - fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
3514     - fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
3515     - fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
3516     - fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
3517     -}
3518     -
3519     -static void fix_rmode_seg(int seg, struct kvm_segment *save)
3520     -{
3521     - const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
3522     - struct kvm_segment var = *save;
3523     -
3524     - var.dpl = 0x3;
3525     - if (seg == VCPU_SREG_CS)
3526     - var.type = 0x3;
3527     -
3528     - if (!emulate_invalid_guest_state) {
3529     - var.selector = var.base >> 4;
3530     - var.base = var.base & 0xffff0;
3531     - var.limit = 0xffff;
3532     - var.g = 0;
3533     - var.db = 0;
3534     - var.present = 1;
3535     - var.s = 1;
3536     - var.l = 0;
3537     - var.unusable = 0;
3538     - var.type = 0x3;
3539     - var.avl = 0;
3540     - if (save->base & 0xf)
3541     - printk_once(KERN_WARNING "kvm: segment base is not "
3542     - "paragraph aligned when entering "
3543     - "protected mode (seg=%d)", seg);
3544     - }
3545     -
3546     - vmcs_write16(sf->selector, var.selector);
3547     - vmcs_writel(sf->base, var.base);
3548     - vmcs_write32(sf->limit, var.limit);
3549     - vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
3550     -}
3551     -
3552     -static void enter_rmode(struct kvm_vcpu *vcpu)
3553     -{
3554     - unsigned long flags;
3555     - struct vcpu_vmx *vmx = to_vmx(vcpu);
3556     - struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
3557     -
3558     - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
3559     - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
3560     - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
3561     - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
3562     - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
3563     - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
3564     - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
3565     -
3566     - vmx->rmode.vm86_active = 1;
3567     -
3568     - /*
3569     - * Very old userspace does not call KVM_SET_TSS_ADDR before entering
3570     - * vcpu. Warn the user that an update is overdue.
3571     - */
3572     - if (!kvm_vmx->tss_addr)
3573     - printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
3574     - "called before entering vcpu\n");
3575     -
3576     - vmx_segment_cache_clear(vmx);
3577     -
3578     - vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
3579     - vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
3580     - vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
3581     -
3582     - flags = vmcs_readl(GUEST_RFLAGS);
3583     - vmx->rmode.save_rflags = flags;
3584     -
3585     - flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
3586     -
3587     - vmcs_writel(GUEST_RFLAGS, flags);
3588     - vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
3589     - update_exception_bitmap(vcpu);
3590     -
3591     - fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
3592     - fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
3593     - fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
3594     - fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
3595     - fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
3596     - fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
3597     -
3598     - kvm_mmu_reset_context(vcpu);
3599     -}
3600     -
3601     -void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
3602     -{
3603     - struct vcpu_vmx *vmx = to_vmx(vcpu);
3604     - struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
3605     -
3606     - if (!msr)
3607     - return;
3608     -
3609     - vcpu->arch.efer = efer;
3610     - if (efer & EFER_LMA) {
3611     - vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3612     - msr->data = efer;
3613     - } else {
3614     - vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3615     -
3616     - msr->data = efer & ~EFER_LME;
3617     - }
3618     - setup_msrs(vmx);
3619     -}
3620     -
3621     -#ifdef CONFIG_X86_64
3622     -
3623     -static void enter_lmode(struct kvm_vcpu *vcpu)
3624     -{
3625     - u32 guest_tr_ar;
3626     -
3627     - vmx_segment_cache_clear(to_vmx(vcpu));
3628     -
3629     - guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
3630     - if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
3631     - pr_debug_ratelimited("%s: tss fixup for long mode. \n",
3632     - __func__);
3633     - vmcs_write32(GUEST_TR_AR_BYTES,
3634     - (guest_tr_ar & ~VMX_AR_TYPE_MASK)
3635     - | VMX_AR_TYPE_BUSY_64_TSS);
3636     - }
3637     - vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
3638     -}
3639     -
3640     -static void exit_lmode(struct kvm_vcpu *vcpu)
3641     -{
3642     - vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
3643     - vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
3644     -}
3645     -
3646     -#endif
3647     -
3648     -static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
3649     -{
3650     - int vpid = to_vmx(vcpu)->vpid;
3651     -
3652     - if (!vpid_sync_vcpu_addr(vpid, addr))
3653     - vpid_sync_context(vpid);
3654     -
3655     - /*
3656     - * If VPIDs are not supported or enabled, then the above is a no-op.
3657     - * But we don't really need a TLB flush in that case anyway, because
3658     - * each VM entry/exit includes an implicit flush when VPID is 0.
3659     - */
3660     -}
3661     -
3662     -static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
3663     -{
3664     - ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
3665     -
3666     - vcpu->arch.cr0 &= ~cr0_guest_owned_bits;
3667     - vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
3668     -}
3669     -
3670     -static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
3671     -{
3672     - ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
3673     -
3674     - vcpu->arch.cr4 &= ~cr4_guest_owned_bits;
3675     - vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits;
3676     -}
3677     -
3678     -static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
3679     -{
3680     - struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
3681     -
3682     - if (!kvm_register_is_dirty(vcpu, VCPU_EXREG_PDPTR))
3683     - return;
3684     -
3685     - if (is_pae_paging(vcpu)) {
3686     - vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
3687     - vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
3688     - vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
3689     - vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]);
3690     - }
3691     -}
3692     -
3693     -void ept_save_pdptrs(struct kvm_vcpu *vcpu)
3694     -{
3695     - struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
3696     -
3697     - if (is_pae_paging(vcpu)) {
3698     - mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
3699     - mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
3700     - mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
3701     - mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
3702     - }
3703     -
3704     - kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
3705     -}
3706     -
3707     -static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
3708     - unsigned long cr0,
3709     - struct kvm_vcpu *vcpu)
3710     -{
3711     - struct vcpu_vmx *vmx = to_vmx(vcpu);
3712     -
3713     - if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
3714     - vmx_cache_reg(vcpu, VCPU_EXREG_CR3);
3715     - if (!(cr0 & X86_CR0_PG)) {
3716     - /* From paging/starting to nonpaging */
3717     - exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
3718     - CPU_BASED_CR3_STORE_EXITING);
3719     - vcpu->arch.cr0 = cr0;
3720     - vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
3721     - } else if (!is_paging(vcpu)) {
3722     - /* From nonpaging to paging */
3723     - exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
3724     - CPU_BASED_CR3_STORE_EXITING);
3725     - vcpu->arch.cr0 = cr0;
3726     - vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
3727     - }
3728     -
3729     - if (!(cr0 & X86_CR0_WP))
3730     - *hw_cr0 &= ~X86_CR0_WP;
3731     -}
3732     -
3733     -void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
3734     -{
3735     - struct vcpu_vmx *vmx = to_vmx(vcpu);
3736     - unsigned long hw_cr0;
3737     -
3738     - hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
3739     - if (enable_unrestricted_guest)
3740     - hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
3741     - else {
3742     - hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
3743     -
3744     - if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
3745     - enter_pmode(vcpu);
3746     -
3747     - if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE))
3748     - enter_rmode(vcpu);
3749     - }
3750     -
3751     -#ifdef CONFIG_X86_64
3752     - if (vcpu->arch.efer & EFER_LME) {
3753     - if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
3754     - enter_lmode(vcpu);
3755     - if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
3756     - exit_lmode(vcpu);
3757     - }
3758     -#endif
3759     -
3760     - if (enable_ept && !enable_unrestricted_guest)
3761     - ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
3762     -
3763     - vmcs_writel(CR0_READ_SHADOW, cr0);
3764     - vmcs_writel(GUEST_CR0, hw_cr0);
3765     - vcpu->arch.cr0 = cr0;
3766     -
3767     - /* depends on vcpu->arch.cr0 to be set to a new value */
3768     - vmx->emulation_required = emulation_required(vcpu);
3769     -}
3770     -
3771     -static int get_ept_level(struct kvm_vcpu *vcpu)
3772     -{
3773     - if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
3774     - return 5;
3775     - return 4;
3776     -}
3777     -
3778     -u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
3779     -{
3780     - u64 eptp = VMX_EPTP_MT_WB;
3781     -
3782     - eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4;
3783     -
3784     - if (enable_ept_ad_bits &&
3785     - (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
3786     - eptp |= VMX_EPTP_AD_ENABLE_BIT;
3787     - eptp |= (root_hpa & PAGE_MASK);
3788     -
3789     - return eptp;
3790     -}
3791     -
3792     -void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
3793     -{
3794     - struct kvm *kvm = vcpu->kvm;
3795     - bool update_guest_cr3 = true;
3796     - unsigned long guest_cr3;
3797     - u64 eptp;
3798     -
3799     - guest_cr3 = cr3;
3800     - if (enable_ept) {
3801     - eptp = construct_eptp(vcpu, cr3);
3802     - vmcs_write64(EPT_POINTER, eptp);
3803     -
3804     - if (kvm_x86_ops->tlb_remote_flush) {
3805     - spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
3806     - to_vmx(vcpu)->ept_pointer = eptp;
3807     - to_kvm_vmx(kvm)->ept_pointers_match
3808     - = EPT_POINTERS_CHECK;
3809     - spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
3810     - }
3811     -
3812     - /* Loading vmcs02.GUEST_CR3 is handled by nested VM-Enter. */
3813     - if (is_guest_mode(vcpu))
3814     - update_guest_cr3 = false;
3815     - else if (!enable_unrestricted_guest && !is_paging(vcpu))
3816     - guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
3817     - else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
3818     - guest_cr3 = vcpu->arch.cr3;
3819     - else /* vmcs01.GUEST_CR3 is already up-to-date. */
3820     - update_guest_cr3 = false;
3821     - ept_load_pdptrs(vcpu);
3822     - }
3823     -
3824     - if (update_guest_cr3)
3825     - vmcs_writel(GUEST_CR3, guest_cr3);
3826     -}
3827     -
3828     -int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
3829     -{
3830     - struct vcpu_vmx *vmx = to_vmx(vcpu);
3831     - /*
3832     - * Pass through host's Machine Check Enable value to hw_cr4, which
3833     - * is in force while we are in guest mode. Do not let guests control
3834     - * this bit, even if host CR4.MCE == 0.
3835     - */
3836     - unsigned long hw_cr4;
3837     -
3838     - hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
3839     - if (enable_unrestricted_guest)
3840     - hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
3841     - else if (vmx->rmode.vm86_active)
3842     - hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
3843     - else
3844     - hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
3845     -
3846     - if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
3847     - if (cr4 & X86_CR4_UMIP) {
3848     - secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC);
3849     - hw_cr4 &= ~X86_CR4_UMIP;
3850     - } else if (!is_guest_mode(vcpu) ||
3851     - !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) {
3852     - secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC);
3853     - }
3854     - }
3855     -
3856     - if (cr4 & X86_CR4_VMXE) {
3857     - /*
3858     - * To use VMXON (and later other VMX instructions), a guest
3859     - * must first be able to turn on cr4.VMXE (see handle_vmon()).
3860     - * So basically the check on whether to allow nested VMX
3861     - * is here. We operate under the default treatment of SMM,
3862     - * so VMX cannot be enabled under SMM.
3863     - */
3864     - if (!nested_vmx_allowed(vcpu) || is_smm(vcpu))
3865     - return 1;
3866     - }
3867     -
3868     - if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
3869     - return 1;
3870     -
3871     - vcpu->arch.cr4 = cr4;
3872     -
3873     - if (!enable_unrestricted_guest) {
3874     - if (enable_ept) {
3875     - if (!is_paging(vcpu)) {
3876     - hw_cr4 &= ~X86_CR4_PAE;
3877     - hw_cr4 |= X86_CR4_PSE;
3878     - } else if (!(cr4 & X86_CR4_PAE)) {
3879     - hw_cr4 &= ~X86_CR4_PAE;
3880     - }
3881     - }
3882     -
3883     - /*
3884     - * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in
3885     - * hardware. To emulate this behavior, SMEP/SMAP/PKU needs
3886     - * to be manually disabled when guest switches to non-paging
3887     - * mode.
3888     - *
3889     - * If !enable_unrestricted_guest, the CPU is always running
3890     - * with CR0.PG=1 and CR4 needs to be modified.
3891     - * If enable_unrestricted_guest, the CPU automatically
3892     - * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0.
3893     - */
3894     - if (!is_paging(vcpu))
3895     - hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
3896     - }
3897     -
3898     - vmcs_writel(CR4_READ_SHADOW, cr4);
3899     - vmcs_writel(GUEST_CR4, hw_cr4);
3900     - return 0;
3901     -}
3902     -
3903     -void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
3904     -{
3905     - struct vcpu_vmx *vmx = to_vmx(vcpu);
3906     - u32 ar;
3907     -
3908     - if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
3909     - *var = vmx->rmode.segs[seg];
3910     - if (seg == VCPU_SREG_TR
3911     - || var->selector == vmx_read_guest_seg_selector(vmx, seg))
3912     - return;
3913     - var->base = vmx_read_guest_seg_base(vmx, seg);
3914     - var->selector = vmx_read_guest_seg_selector(vmx, seg);
3915     - return;
3916     - }
3917     - var->base = vmx_read_guest_seg_base(vmx, seg);
3918     - var->limit = vmx_read_guest_seg_limit(vmx, seg);
3919     - var->selector = vmx_read_guest_seg_selector(vmx, seg);
3920     - ar = vmx_read_guest_seg_ar(vmx, seg);
3921     - var->unusable = (ar >> 16) & 1;
3922     - var->type = ar & 15;
3923     - var->s = (ar >> 4) & 1;
3924     - var->dpl = (ar >> 5) & 3;
3925     - /*
3926     - * Some userspaces do not preserve unusable property. Since usable
3927     - * segment has to be present according to VMX spec we can use present
3928     - * property to amend userspace bug by making unusable segment always
3929     - * nonpresent. vmx_segment_access_rights() already marks nonpresent
3930     - * segment as unusable.
3931     - */
3932     - var->present = !var->unusable;
3933     - var->avl = (ar >> 12) & 1;
3934     - var->l = (ar >> 13) & 1;
3935     - var->db = (ar >> 14) & 1;
3936     - var->g = (ar >> 15) & 1;
3937     -}
3938     -
3939     -static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
3940     -{
3941     - struct kvm_segment s;
3942     -
3943     - if (to_vmx(vcpu)->rmode.vm86_active) {
3944     - vmx_get_segment(vcpu, &s, seg);
3945     - return s.base;
3946     - }
3947     - return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
3948     -}
3949     -
3950     -int vmx_get_cpl(struct kvm_vcpu *vcpu)
3951     -{
3952     - struct vcpu_vmx *vmx = to_vmx(vcpu);
3953     -
3954     - if (unlikely(vmx->rmode.vm86_active))
3955     - return 0;
3956     - else {
3957     - int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
3958     - return VMX_AR_DPL(ar);
3959     - }
3960     -}
3961     -
3962     -static u32 vmx_segment_access_rights(struct kvm_segment *var)
3963     -{
3964     - u32 ar;
3965     -
3966     - if (var->unusable || !var->present)
3967     - ar = 1 << 16;
3968     - else {
3969     - ar = var->type & 15;
3970     - ar |= (var->s & 1) << 4;
3971     - ar |= (var->dpl & 3) << 5;
3972     - ar |= (var->present & 1) << 7;
3973     - ar |= (var->avl & 1) << 12;
3974     - ar |= (var->l & 1) << 13;
3975     - ar |= (var->db & 1) << 14;
3976     - ar |= (var->g & 1) << 15;
3977     - }
3978     -
3979     - return ar;
3980     -}
3981     -
3982     -void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
3983     -{
3984     - struct vcpu_vmx *vmx = to_vmx(vcpu);
3985     - const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
3986     -
3987     - vmx_segment_cache_clear(vmx);
3988     -
3989     - if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
3990     - vmx->rmode.segs[seg] = *var;
3991     - if (seg == VCPU_SREG_TR)
3992     - vmcs_write16(sf->selector, var->selector);
3993     - else if (var->s)
3994     - fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
3995     - goto out;
3996     - }
3997     -
3998     - vmcs_writel(sf->base, var->base);
3999     - vmcs_write32(sf->limit, var->limit);
4000     - vmcs_write16(sf->selector, var->selector);
4001     -
4002     - /*
4003     - * Fix the "Accessed" bit in AR field of segment registers for older
4004     - * qemu binaries.
4005     - * IA32 arch specifies that at the time of processor reset the
4006     - * "Accessed" bit in the AR field of segment registers is 1. And qemu
4007     - * is setting it to 0 in the userland code. This causes invalid guest
4008     - * state vmexit when "unrestricted guest" mode is turned on.
4009     - * Fix for this setup issue in cpu_reset is being pushed in the qemu
4010     - * tree. Newer qemu binaries with that qemu fix would not need this
4011     - * kvm hack.
4012     - */
4013     - if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
4014     - var->type |= 0x1; /* Accessed */
4015     -
4016     - vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
4017     -
4018     -out:
4019     - vmx->emulation_required = emulation_required(vcpu);
4020     -}
4021     -
4022     -static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
4023     -{
4024     - u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
4025     -
4026     - *db = (ar >> 14) & 1;
4027     - *l = (ar >> 13) & 1;
4028     -}
4029     -
4030     -static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
4031     -{
4032     - dt->size = vmcs_read32(GUEST_IDTR_LIMIT);
4033     - dt->address = vmcs_readl(GUEST_IDTR_BASE);
4034     -}
4035     -
4036     -static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
4037     -{
4038     - vmcs_write32(GUEST_IDTR_LIMIT, dt->size);
4039     - vmcs_writel(GUEST_IDTR_BASE, dt->address);
4040     -}
4041     -
4042     -static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
4043     -{
4044     - dt->size = vmcs_read32(GUEST_GDTR_LIMIT);
4045     - dt->address = vmcs_readl(GUEST_GDTR_BASE);
4046     -}
4047     -
4048     -static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
4049     -{
4050     - vmcs_write32(GUEST_GDTR_LIMIT, dt->size);
4051     - vmcs_writel(GUEST_GDTR_BASE, dt->address);
4052     -}
4053     -
4054     -static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
4055     -{
4056     - struct kvm_segment var;
4057     - u32 ar;
4058     -
4059     - vmx_get_segment(vcpu, &var, seg);
4060     - var.dpl = 0x3;
4061     - if (seg == VCPU_SREG_CS)
4062     - var.type = 0x3;
4063     - ar = vmx_segment_access_rights(&var);
4064     -
4065     - if (var.base != (var.selector << 4))
4066     - return false;
4067     - if (var.limit != 0xffff)
4068     - return false;
4069     - if (ar != 0xf3)
4070     - return false;
4071     -
4072     - return true;
4073     -}
4074     -
4075     -static bool code_segment_valid(struct kvm_vcpu *vcpu)
4076     -{
4077     - struct kvm_segment cs;
4078     - unsigned int cs_rpl;
4079     -
4080     - vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
4081     - cs_rpl = cs.selector & SEGMENT_RPL_MASK;
4082     -
4083     - if (cs.unusable)
4084     - return false;
4085     - if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK))
4086     - return false;
4087     - if (!cs.s)
4088     - return false;
4089     - if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) {
4090     - if (cs.dpl > cs_rpl)
4091     - return false;
4092     - } else {
4093     - if (cs.dpl != cs_rpl)
4094     - return false;
4095     - }
4096     - if (!cs.present)
4097     - return false;
4098     -
4099     - /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */
4100     - return true;
4101     -}
4102     -
4103     -static bool stack_segment_valid(struct kvm_vcpu *vcpu)
4104     -{
4105     - struct kvm_segment ss;
4106     - unsigned int ss_rpl;
4107     -
4108     - vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
4109     - ss_rpl = ss.selector & SEGMENT_RPL_MASK;
4110     -
4111     - if (ss.unusable)
4112     - return true;
4113     - if (ss.type != 3 && ss.type != 7)
4114     - return false;
4115     - if (!ss.s)
4116     - return false;
4117     - if (ss.dpl != ss_rpl) /* DPL != RPL */
4118     - return false;
4119     - if (!ss.present)
4120     - return false;
4121     -
4122     - return true;
4123     -}
4124     -
4125     -static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
4126     -{
4127     - struct kvm_segment var;
4128     - unsigned int rpl;
4129     -
4130     - vmx_get_segment(vcpu, &var, seg);
4131     - rpl = var.selector & SEGMENT_RPL_MASK;
4132     -
4133     - if (var.unusable)
4134     - return true;
4135     - if (!var.s)
4136     - return false;
4137     - if (!var.present)
4138     - return false;
4139     - if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) {
4140     - if (var.dpl < rpl) /* DPL < RPL */
4141     - return false;
4142     - }
4143     -
4144     - /* TODO: Add other members to kvm_segment_field to allow checking for other access
4145     - * rights flags
4146     - */
4147     - return true;
4148     -}
4149     -
4150     -static bool tr_valid(struct kvm_vcpu *vcpu)
4151     -{
4152     - struct kvm_segment tr;
4153     -
4154     - vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
4155     -
4156     - if (tr.unusable)
4157     - return false;
4158     - if (tr.selector & SEGMENT_TI_MASK) /* TI = 1 */
4159     - return false;
4160     - if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
4161     - return false;
4162     - if (!tr.present)
4163     - return false;
4164     -
4165     - return true;
4166     -}
4167     -
4168     -static bool ldtr_valid(struct kvm_vcpu *vcpu)
4169     -{
4170     - struct kvm_segment ldtr;
4171     -
4172     - vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
4173     -
4174     - if (ldtr.unusable)
4175     - return true;
4176     - if (ldtr.selector & SEGMENT_TI_MASK) /* TI = 1 */
4177     - return false;
4178     - if (ldtr.type != 2)
4179     - return false;
4180     - if (!ldtr.present)
4181     - return false;
4182     -
4183     - return true;
4184     -}
4185     -
4186     -static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
4187     -{
4188     - struct kvm_segment cs, ss;
4189     -
4190     - vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
4191     - vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
4192     -
4193     - return ((cs.selector & SEGMENT_RPL_MASK) ==
4194     - (ss.selector & SEGMENT_RPL_MASK));
4195     -}
4196     -
4197     -/*
4198     - * Check if guest state is valid. Returns true if valid, false if
4199     - * not.
4200     - * We assume that registers are always usable
4201     - */
4202     -static bool guest_state_valid(struct kvm_vcpu *vcpu)
4203     -{
4204     - if (enable_unrestricted_guest)
4205     - return true;
4206     -
4207     - /* real mode guest state checks */
4208     - if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
4209     - if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
4210     - return false;
4211     - if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
4212     - return false;
4213     - if (!rmode_segment_valid(vcpu, VCPU_SREG_DS))
4214     - return false;
4215     - if (!rmode_segment_valid(vcpu, VCPU_SREG_ES))
4216     - return false;
4217     - if (!rmode_segment_valid(vcpu, VCPU_SREG_FS))
4218     - return false;
4219     - if (!rmode_segment_valid(vcpu, VCPU_SREG_GS))
4220     - return false;
4221     - } else {
4222     - /* protected mode guest state checks */
4223     - if (!cs_ss_rpl_check(vcpu))
4224     - return false;
4225     - if (!code_segment_valid(vcpu))
4226     - return false;
4227     - if (!stack_segment_valid(vcpu))
4228     - return false;
4229     - if (!data_segment_valid(vcpu, VCPU_SREG_DS))
4230     - return false;
4231     - if (!data_segment_valid(vcpu, VCPU_SREG_ES))
4232     - return false;
4233     - if (!data_segment_valid(vcpu, VCPU_SREG_FS))
4234     - return false;
4235     - if (!data_segment_valid(vcpu, VCPU_SREG_GS))
4236     - return false;
4237     - if (!tr_valid(vcpu))
4238     - return false;
4239     - if (!ldtr_valid(vcpu))
4240     - return false;
4241     - }
4242     - /* TODO:
4243     - * - Add checks on RIP
4244     - * - Add checks on RFLAGS
4245     - */
4246     -
4247     - return true;
4248     -}
4249     -
4250     -static int init_rmode_tss(struct kvm *kvm)
4251     -{
4252     - gfn_t fn;
4253     - u16 data = 0;
4254     - int idx, r;
4255     -
4256     - idx = srcu_read_lock(&kvm->srcu);
4257     - fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
4258     - r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
4259     - if (r < 0)
4260     - goto out;
4261     - data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
4262     - r = kvm_write_guest_page(kvm, fn++, &data,
4263     - TSS_IOPB_BASE_OFFSET, sizeof(u16));
4264     - if (r < 0)
4265     - goto out;
4266     - r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
4267     - if (r < 0)
4268     - goto out;
4269     - r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
4270     - if (r < 0)
4271     - goto out;
4272     - data = ~0;
4273     - r = kvm_write_guest_page(kvm, fn, &data,
4274     - RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
4275     - sizeof(u8));
4276     -out:
4277     - srcu_read_unlock(&kvm->srcu, idx);
4278     - return r;
4279     -}
4280     -
4281     -static int init_rmode_identity_map(struct kvm *kvm)
4282     -{
4283     - struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
4284     - int i, idx, r = 0;
4285     - kvm_pfn_t identity_map_pfn;
4286     - u32 tmp;
4287     -
4288     - /* Protect kvm_vmx->ept_identity_pagetable_done. */
4289     - mutex_lock(&kvm->slots_lock);
4290     -
4291     - if (likely(kvm_vmx->ept_identity_pagetable_done))
4292     - goto out2;
4293     -
4294     - if (!kvm_vmx->ept_identity_map_addr)
4295     - kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
4296     - identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT;
4297     -
4298     - r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
4299     - kvm_vmx->ept_identity_map_addr, PAGE_SIZE);
4300     - if (r < 0)
4301     - goto out2;
4302     -
4303     - idx = srcu_read_lock(&kvm->srcu);
4304     - r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
4305     - if (r < 0)
4306     - goto out;
4307     - /* Set up identity-mapping pagetable for EPT in real mode */
4308     - for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
4309     - tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
4310     - _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
4311     - r = kvm_write_guest_page(kvm, identity_map_pfn,
4312     - &tmp, i * sizeof(tmp), sizeof(tmp));
4313     - if (r < 0)
4314     - goto out;
4315     - }
4316     - kvm_vmx->ept_identity_pagetable_done = true;
4317     -
4318     -out:
4319     - srcu_read_unlock(&kvm->srcu, idx);
4320     -
4321     -out2:
4322     - mutex_unlock(&kvm->slots_lock);
4323     - return r;
4324     -}
4325     -
4326     -static void seg_setup(int seg)
4327     -{
4328     - const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
4329     - unsigned int ar;
4330     -
4331     - vmcs_write16(sf->selector, 0);
4332     - vmcs_writel(sf->base, 0);
4333     - vmcs_write32(sf->limit, 0xffff);
4334     - ar = 0x93;
4335     - if (seg == VCPU_SREG_CS)
4336     - ar |= 0x08; /* code segment */
4337     -
4338     - vmcs_write32(sf->ar_bytes, ar);
4339     -}
4340     -
4341     -static int alloc_apic_access_page(struct kvm *kvm)
4342     -{
4343     - struct page *page;
4344     - int r = 0;
4345     -
4346     - mutex_lock(&kvm->slots_lock);
4347     - if (kvm->arch.apic_access_page_done)
4348     - goto out;
4349     - r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
4350     - APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
4351     - if (r)
4352     - goto out;
4353     -
4354     - page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
4355     - if (is_error_page(page)) {
4356     - r = -EFAULT;
4357     - goto out;
4358     - }
4359     -
4360     - /*
4361     - * Do not pin the page in memory, so that memory hot-unplug
4362     - * is able to migrate it.
4363     - */
4364     - put_page(page);
4365     - kvm->arch.apic_access_page_done = true;
4366     -out:
4367     - mutex_unlock(&kvm->slots_lock);
4368     - return r;
4369     -}
4370     -
4371     -int allocate_vpid(void)
4372     -{
4373     - int vpid;
4374     -
4375     - if (!enable_vpid)
4376     - return 0;
4377     - spin_lock(&vmx_vpid_lock);
4378     - vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
4379     - if (vpid < VMX_NR_VPIDS)
4380     - __set_bit(vpid, vmx_vpid_bitmap);
4381     - else
4382     - vpid = 0;
4383     - spin_unlock(&vmx_vpid_lock);
4384     - return vpid;
4385     -}
4386     -
4387     -void free_vpid(int vpid)
4388     -{
4389     - if (!enable_vpid || vpid == 0)
4390     - return;
4391     - spin_lock(&vmx_vpid_lock);
4392     - __clear_bit(vpid, vmx_vpid_bitmap);
4393     - spin_unlock(&vmx_vpid_lock);
4394     -}
4395     -
4396     -static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
4397     - u32 msr, int type)
4398     -{
4399     - int f = sizeof(unsigned long);
4400     -
4401     - if (!cpu_has_vmx_msr_bitmap())
4402     - return;
4403     -
4404     - if (static_branch_unlikely(&enable_evmcs))
4405     - evmcs_touch_msr_bitmap();
4406     -
4407     - /*
4408     - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
4409     - * have the write-low and read-high bitmap offsets the wrong way round.
4410     - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
4411     - */
4412     - if (msr <= 0x1fff) {
4413     - if (type & MSR_TYPE_R)
4414     - /* read-low */
4415     - __clear_bit(msr, msr_bitmap + 0x000 / f);
4416     -
4417     - if (type & MSR_TYPE_W)
4418     - /* write-low */
4419     - __clear_bit(msr, msr_bitmap + 0x800 / f);
4420     -
4421     - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
4422     - msr &= 0x1fff;
4423     - if (type & MSR_TYPE_R)
4424     - /* read-high */
4425     - __clear_bit(msr, msr_bitmap + 0x400 / f);
4426     -
4427     - if (type & MSR_TYPE_W)
4428     - /* write-high */
4429     - __clear_bit(msr, msr_bitmap + 0xc00 / f);
4430     -
4431     - }
4432     -}
4433     -
4434     -static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
4435     - u32 msr, int type)
4436     -{
4437     - int f = sizeof(unsigned long);
4438     -
4439     - if (!cpu_has_vmx_msr_bitmap())
4440     - return;
4441     -
4442     - if (static_branch_unlikely(&enable_evmcs))
4443     - evmcs_touch_msr_bitmap();
4444     -
4445     - /*
4446     - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
4447     - * have the write-low and read-high bitmap offsets the wrong way round.
4448     - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
4449     - */
4450     - if (msr <= 0x1fff) {
4451     - if (type & MSR_TYPE_R)
4452     - /* read-low */
4453     - __set_bit(msr, msr_bitmap + 0x000 / f);
4454     -
4455     - if (type & MSR_TYPE_W)
4456     - /* write-low */
4457     - __set_bit(msr, msr_bitmap + 0x800 / f);
4458     -
4459     - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
4460     - msr &= 0x1fff;
4461     - if (type & MSR_TYPE_R)
4462     - /* read-high */
4463     - __set_bit(msr, msr_bitmap + 0x400 / f);
4464     -
4465     - if (type & MSR_TYPE_W)
4466     - /* write-high */
4467     - __set_bit(msr, msr_bitmap + 0xc00 / f);
4468     -
4469     - }
4470     -}
4471     -
4472     -static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
4473     - u32 msr, int type, bool value)
4474     -{
4475     - if (value)
4476     - vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
4477     - else
4478     - vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
4479     -}
4480     -
4481     -static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
4482     -{
4483     - u8 mode = 0;
4484     -
4485     - if (cpu_has_secondary_exec_ctrls() &&
4486     - (secondary_exec_controls_get(to_vmx(vcpu)) &
4487     - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
4488     - mode |= MSR_BITMAP_MODE_X2APIC;
4489     - if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
4490     - mode |= MSR_BITMAP_MODE_X2APIC_APICV;
4491     - }
4492     -
4493     - return mode;
4494     -}
4495     -
4496     -static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
4497     - u8 mode)
4498     -{
4499     - int msr;
4500     -
4501     - for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
4502     - unsigned word = msr / BITS_PER_LONG;
4503     - msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
4504     - msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
4505     - }
4506     -
4507     - if (mode & MSR_BITMAP_MODE_X2APIC) {
4508     - /*
4509     - * TPR reads and writes can be virtualized even if virtual interrupt
4510     - * delivery is not in use.
4511     - */
4512     - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
4513     - if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
4514     - vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
4515     - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
4516     - vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
4517     - }
4518     - }
4519     -}
4520     -
4521     -void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
4522     -{
4523     - struct vcpu_vmx *vmx = to_vmx(vcpu);
4524     - unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
4525     - u8 mode = vmx_msr_bitmap_mode(vcpu);
4526     - u8 changed = mode ^ vmx->msr_bitmap_mode;
4527     -
4528     - if (!changed)
4529     - return;
4530     -
4531     - if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
4532     - vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
4533     -
4534     - vmx->msr_bitmap_mode = mode;
4535     -}
4536     -
4537     -void pt_update_intercept_for_msr(struct vcpu_vmx *vmx)
4538     -{
4539     - unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
4540     - bool flag = !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN);
4541     - u32 i;
4542     -
4543     - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_STATUS,
4544     - MSR_TYPE_RW, flag);
4545     - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_BASE,
4546     - MSR_TYPE_RW, flag);
4547     - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_MASK,
4548     - MSR_TYPE_RW, flag);
4549     - vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_CR3_MATCH,
4550     - MSR_TYPE_RW, flag);
4551     - for (i = 0; i < vmx->pt_desc.addr_range; i++) {
4552     - vmx_set_intercept_for_msr(msr_bitmap,
4553     - MSR_IA32_RTIT_ADDR0_A + i * 2, MSR_TYPE_RW, flag);
4554     - vmx_set_intercept_for_msr(msr_bitmap,
4555     - MSR_IA32_RTIT_ADDR0_B + i * 2, MSR_TYPE_RW, flag);
4556     - }
4557     -}
4558     -
4559     -static bool vmx_get_enable_apicv(struct kvm *kvm)
4560     -{
4561     - return enable_apicv;
4562     -}
4563     -
4564     -static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
4565     -{
4566     - struct vcpu_vmx *vmx = to_vmx(vcpu);
4567     - void *vapic_page;
4568     - u32 vppr;
4569     - int rvi;
4570     -
4571     - if (WARN_ON_ONCE(!is_guest_mode(vcpu)) ||
4572     - !nested_cpu_has_vid(get_vmcs12(vcpu)) ||
4573     - WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn))
4574     - return false;
4575     -
4576     - rvi = vmx_get_rvi();
4577     -
4578     - vapic_page = vmx->nested.virtual_apic_map.hva;
4579     - vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
4580     -
4581     - return ((rvi & 0xf0) > (vppr & 0xf0));
4582     -}
4583     -
4584     -static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
4585     - bool nested)
4586     -{
4587     -#ifdef CONFIG_SMP
4588     - int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
4589     -
4590     - if (vcpu->mode == IN_GUEST_MODE) {
4591     - /*
4592     - * The vector of interrupt to be delivered to vcpu had
4593     - * been set in PIR before this function.
4594     - *
4595     - * Following cases will be reached in this block, and
4596     - * we always send a notification event in all cases as
4597     - * explained below.
4598     - *
4599     - * Case 1: vcpu keeps in non-root mode. Sending a
4600     - * notification event posts the interrupt to vcpu.
4601     - *
4602     - * Case 2: vcpu exits to root mode and is still
4603     - * runnable. PIR will be synced to vIRR before the
4604     - * next vcpu entry. Sending a notification event in
4605     - * this case has no effect, as vcpu is not in root
4606     - * mode.
4607     - *
4608     - * Case 3: vcpu exits to root mode and is blocked.
4609     - * vcpu_block() has already synced PIR to vIRR and
4610     - * never blocks vcpu if vIRR is not cleared. Therefore,
4611     - * a blocked vcpu here does not wait for any requested
4612     - * interrupts in PIR, and sending a notification event
4613     - * which has no effect is safe here.
4614     - */
4615     -
4616     - apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
4617     - return true;
4618     - }
4619     -#endif
4620     - return false;
4621     -}
4622     -
4623     -static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
4624     - int vector)
4625     -{
4626     - struct vcpu_vmx *vmx = to_vmx(vcpu);
4627     -
4628     - if (is_guest_mode(vcpu) &&
4629     - vector == vmx->nested.posted_intr_nv) {
4630     - /*
4631     - * If a posted intr is not recognized by hardware,
4632     - * we will accomplish it in the next vmentry.
4633     - */
4634     - vmx->nested.pi_pending = true;
4635     - kvm_make_request(KVM_REQ_EVENT, vcpu);
4636     - /* the PIR and ON have been set by L1. */
4637     - if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true))
4638     - kvm_vcpu_kick(vcpu);
4639     - return 0;
4640     - }
4641     - return -1;
4642     -}
4643     -/*
4644     - * Send interrupt to vcpu via posted interrupt way.
4645     - * 1. If target vcpu is running(non-root mode), send posted interrupt
4646     - * notification to vcpu and hardware will sync PIR to vIRR atomically.
4647     - * 2. If target vcpu isn't running(root mode), kick it to pick up the
4648     - * interrupt from PIR in next vmentry.
4649     - */
4650     -static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
4651     -{
4652     - struct vcpu_vmx *vmx = to_vmx(vcpu);
4653     - int r;
4654     -
4655     - r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
4656     - if (!r)
4657     - return;
4658     -
4659     - if (pi_test_and_set_pir(vector, &vmx->pi_desc))
4660     - return;
4661     -
4662     - /* If a previous notification has sent the IPI, nothing to do. */
4663     - if (pi_test_and_set_on(&vmx->pi_desc))
4664     - return;
4665     -
4666     - if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
4667     - kvm_vcpu_kick(vcpu);
4668     -}
4669     -
4670     -/*
4671     - * Set up the vmcs's constant host-state fields, i.e., host-state fields that
4672     - * will not change in the lifetime of the guest.
4673     - * Note that host-state that does change is set elsewhere. E.g., host-state
4674     - * that is set differently for each CPU is set in vmx_vcpu_load(), not here.
4675     - */
4676     -void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
4677     -{
4678     - u32 low32, high32;
4679     - unsigned long tmpl;
4680     - unsigned long cr0, cr3, cr4;
4681     -
4682     - cr0 = read_cr0();
4683     - WARN_ON(cr0 & X86_CR0_TS);
4684     - vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */
4685     -
4686     - /*
4687     - * Save the most likely value for this task's CR3 in the VMCS.
4688     - * We can't use __get_current_cr3_fast() because we're not atomic.
4689     - */
4690     - cr3 = __read_cr3();
4691     - vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */
4692     - vmx->loaded_vmcs->host_state.cr3 = cr3;
4693     -
4694     - /* Save the most likely value for this task's CR4 in the VMCS. */
4695     - cr4 = cr4_read_shadow();
4696     - vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */
4697     - vmx->loaded_vmcs->host_state.cr4 = cr4;
4698     -
4699     - vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
4700     -#ifdef CONFIG_X86_64
4701     - /*
4702     - * Load null selectors, so we can avoid reloading them in
4703     - * vmx_prepare_switch_to_host(), in case userspace uses
4704     - * the null selectors too (the expected case).
4705     - */
4706     - vmcs_write16(HOST_DS_SELECTOR, 0);
4707     - vmcs_write16(HOST_ES_SELECTOR, 0);
4708     -#else
4709     - vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
4710     - vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
4711     -#endif
4712     - vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
4713     - vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
4714     -
4715     - vmcs_writel(HOST_IDTR_BASE, host_idt_base); /* 22.2.4 */
4716     -
4717     - vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); /* 22.2.5 */
4718     -
4719     - rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
4720     - vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
4721     - rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
4722     - vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */
4723     -
4724     - if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
4725     - rdmsr(MSR_IA32_CR_PAT, low32, high32);
4726     - vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32));
4727     - }
4728     -
4729     - if (cpu_has_load_ia32_efer())
4730     - vmcs_write64(HOST_IA32_EFER, host_efer);
4731     -}
4732     -
4733     -void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
4734     -{
4735     - vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
4736     - if (enable_ept)
4737     - vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
4738     - if (is_guest_mode(&vmx->vcpu))
4739     - vmx->vcpu.arch.cr4_guest_owned_bits &=
4740     - ~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask;
4741     - vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
4742     -}
4743     -
4744     -u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
4745     -{
4746     - u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
4747     -
4748     - if (!kvm_vcpu_apicv_active(&vmx->vcpu))
4749     - pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
4750     -
4751     - if (!enable_vnmi)
4752     - pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
4753     -
4754     - if (!enable_preemption_timer)
4755     - pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
4756     -
4757     - return pin_based_exec_ctrl;
4758     -}
4759     -
4760     -static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
4761     -{
4762     - struct vcpu_vmx *vmx = to_vmx(vcpu);
4763     -
4764     - pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
4765     - if (cpu_has_secondary_exec_ctrls()) {
4766     - if (kvm_vcpu_apicv_active(vcpu))
4767     - secondary_exec_controls_setbit(vmx,
4768     - SECONDARY_EXEC_APIC_REGISTER_VIRT |
4769     - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
4770     - else
4771     - secondary_exec_controls_clearbit(vmx,
4772     - SECONDARY_EXEC_APIC_REGISTER_VIRT |
4773     - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
4774     - }
4775     -
4776     - if (cpu_has_vmx_msr_bitmap())
4777     - vmx_update_msr_bitmap(vcpu);
4778     -}
4779     -
4780     -u32 vmx_exec_control(struct vcpu_vmx *vmx)
4781     -{
4782     - u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
4783     -
4784     - if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
4785     - exec_control &= ~CPU_BASED_MOV_DR_EXITING;
4786     -
4787     - if (!cpu_need_tpr_shadow(&vmx->vcpu)) {
4788     - exec_control &= ~CPU_BASED_TPR_SHADOW;
4789     -#ifdef CONFIG_X86_64
4790     - exec_control |= CPU_BASED_CR8_STORE_EXITING |
4791     - CPU_BASED_CR8_LOAD_EXITING;
4792     -#endif
4793     - }
4794     - if (!enable_ept)
4795     - exec_control |= CPU_BASED_CR3_STORE_EXITING |
4796     - CPU_BASED_CR3_LOAD_EXITING |
4797     - CPU_BASED_INVLPG_EXITING;
4798     - if (kvm_mwait_in_guest(vmx->vcpu.kvm))
4799     - exec_control &= ~(CPU_BASED_MWAIT_EXITING |
4800     - CPU_BASED_MONITOR_EXITING);
4801     - if (kvm_hlt_in_guest(vmx->vcpu.kvm))
4802     - exec_control &= ~CPU_BASED_HLT_EXITING;
4803     - return exec_control;
4804     -}
4805     -
4806     -
4807     -static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
4808     -{
4809     - struct kvm_vcpu *vcpu = &vmx->vcpu;
4810     -
4811     - u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
4812     -
4813     - if (pt_mode == PT_MODE_SYSTEM)
4814     - exec_control &= ~(SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX);
4815     - if (!cpu_need_virtualize_apic_accesses(vcpu))
4816     - exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
4817     - if (vmx->vpid == 0)
4818     - exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
4819     - if (!enable_ept) {
4820     - exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
4821     - enable_unrestricted_guest = 0;
4822     - }
4823     - if (!enable_unrestricted_guest)
4824     - exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
4825     - if (kvm_pause_in_guest(vmx->vcpu.kvm))
4826     - exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
4827     - if (!kvm_vcpu_apicv_active(vcpu))
4828     - exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
4829     - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
4830     - exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
4831     -
4832     - /* SECONDARY_EXEC_DESC is enabled/disabled on writes to CR4.UMIP,
4833     - * in vmx_set_cr4. */
4834     - exec_control &= ~SECONDARY_EXEC_DESC;
4835     -
4836     - /* SECONDARY_EXEC_SHADOW_VMCS is enabled when L1 executes VMPTRLD
4837     - (handle_vmptrld).
4838     - We can NOT enable shadow_vmcs here because we don't have yet
4839     - a current VMCS12
4840     - */
4841     - exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
4842     -
4843     - if (!enable_pml)
4844     - exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
4845     -
4846     - if (vmx_xsaves_supported()) {
4847     - /* Exposing XSAVES only when XSAVE is exposed */
4848     - bool xsaves_enabled =
4849     - guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
4850     - guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
4851     -
4852     - vcpu->arch.xsaves_enabled = xsaves_enabled;
4853     -
4854     - if (!xsaves_enabled)
4855     - exec_control &= ~SECONDARY_EXEC_XSAVES;
4856     -
4857     - if (nested) {
4858     - if (xsaves_enabled)
4859     - vmx->nested.msrs.secondary_ctls_high |=
4860     - SECONDARY_EXEC_XSAVES;
4861     - else
4862     - vmx->nested.msrs.secondary_ctls_high &=
4863     - ~SECONDARY_EXEC_XSAVES;
4864     - }
4865     - }
4866     -
4867     - if (vmx_rdtscp_supported()) {
4868     - bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP);
4869     - if (!rdtscp_enabled)
4870     - exec_control &= ~SECONDARY_EXEC_RDTSCP;
4871     -
4872     - if (nested) {
4873     - if (rdtscp_enabled)
4874     - vmx->nested.msrs.secondary_ctls_high |=
4875     - SECONDARY_EXEC_RDTSCP;
4876     - else
4877     - vmx->nested.msrs.secondary_ctls_high &=
4878     - ~SECONDARY_EXEC_RDTSCP;
4879     - }
4880     - }
4881     -
4882     - if (vmx_invpcid_supported()) {
4883     - /* Exposing INVPCID only when PCID is exposed */
4884     - bool invpcid_enabled =
4885     - guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) &&
4886     - guest_cpuid_has(vcpu, X86_FEATURE_PCID);
4887     -
4888     - if (!invpcid_enabled) {
4889     - exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
4890     - guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID);
4891     - }
4892     -
4893     - if (nested) {
4894     - if (invpcid_enabled)
4895     - vmx->nested.msrs.secondary_ctls_high |=
4896     - SECONDARY_EXEC_ENABLE_INVPCID;
4897     - else
4898     - vmx->nested.msrs.secondary_ctls_high &=
4899     - ~SECONDARY_EXEC_ENABLE_INVPCID;
4900     - }
4901     - }
4902     -
4903     - if (vmx_rdrand_supported()) {
4904     - bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND);
4905     - if (rdrand_enabled)
4906     - exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING;
4907     -
4908     - if (nested) {
4909     - if (rdrand_enabled)
4910     - vmx->nested.msrs.secondary_ctls_high |=
4911     - SECONDARY_EXEC_RDRAND_EXITING;
4912     - else
4913     - vmx->nested.msrs.secondary_ctls_high &=
4914     - ~SECONDARY_EXEC_RDRAND_EXITING;
4915     - }
4916     - }
4917     -
4918     - if (vmx_rdseed_supported()) {
4919     - bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED);
4920     - if (rdseed_enabled)
4921     - exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING;
4922     -
4923     - if (nested) {
4924     - if (rdseed_enabled)
4925     - vmx->nested.msrs.secondary_ctls_high |=
4926     - SECONDARY_EXEC_RDSEED_EXITING;
4927     - else
4928     - vmx->nested.msrs.secondary_ctls_high &=
4929     - ~SECONDARY_EXEC_RDSEED_EXITING;
4930     - }
4931     - }
4932     -
4933     - if (vmx_waitpkg_supported()) {
4934     - bool waitpkg_enabled =
4935     - guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG);
4936     -
4937     - if (!waitpkg_enabled)
4938     - exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
4939     -
4940     - if (nested) {
4941     - if (waitpkg_enabled)
4942     - vmx->nested.msrs.secondary_ctls_high |=
4943     - SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
4944     - else
4945     - vmx->nested.msrs.secondary_ctls_high &=
4946     - ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
4947     - }
4948     - }
4949     -
4950     - vmx->secondary_exec_control = exec_control;
4951     -}
4952     -
4953     -static void ept_set_mmio_spte_mask(void)
4954     -{
4955     - /*
4956     - * EPT Misconfigurations can be generated if the value of bits 2:0
4957     - * of an EPT paging-structure entry is 110b (write/execute).
4958     - */
4959     - kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK,
4960     - VMX_EPT_MISCONFIG_WX_VALUE, 0);
4961     -}
4962     -
4963     -#define VMX_XSS_EXIT_BITMAP 0
4964     -
4965     -/*
4966     - * Noting that the initialization of Guest-state Area of VMCS is in
4967     - * vmx_vcpu_reset().
4968     - */
4969     -static void init_vmcs(struct vcpu_vmx *vmx)
4970     -{
4971     - if (nested)
4972     - nested_vmx_set_vmcs_shadowing_bitmap();
4973     -
4974     - if (cpu_has_vmx_msr_bitmap())
4975     - vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
4976     -
4977     - vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
4978     -
4979     - /* Control */
4980     - pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
4981     -
4982     - exec_controls_set(vmx, vmx_exec_control(vmx));
4983     -
4984     - if (cpu_has_secondary_exec_ctrls()) {
4985     - vmx_compute_secondary_exec_control(vmx);
4986     - secondary_exec_controls_set(vmx, vmx->secondary_exec_control);
4987     - }
4988     -
4989     - if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
4990     - vmcs_write64(EOI_EXIT_BITMAP0, 0);
4991     - vmcs_write64(EOI_EXIT_BITMAP1, 0);
4992     - vmcs_write64(EOI_EXIT_BITMAP2, 0);
4993     - vmcs_write64(EOI_EXIT_BITMAP3, 0);
4994     -
4995     - vmcs_write16(GUEST_INTR_STATUS, 0);
4996     -
4997     - vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
4998     - vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
4999     - }
5000     -
5001     - if (!kvm_pause_in_guest(vmx->vcpu.kvm)) {
5002     - vmcs_write32(PLE_GAP, ple_gap);
5003     - vmx->ple_window = ple_window;
5004     - vmx->ple_window_dirty = true;
5005     - }
5006     -
5007     - vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
5008     - vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
5009     - vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */
5010     -
5011     - vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
5012     - vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */
5013     - vmx_set_constant_host_state(vmx);
5014     - vmcs_writel(HOST_FS_BASE, 0); /* 22.2.4 */
5015     - vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
5016     -
5017     - if (cpu_has_vmx_vmfunc())
5018     - vmcs_write64(VM_FUNCTION_CONTROL, 0);
5019     -
5020     - vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
5021     - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
5022     - vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
5023     - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
5024     - vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
5025     -
5026     - if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
5027     - vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
5028     -
5029     - vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
5030     -
5031     - /* 22.2.1, 20.8.1 */
5032     - vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
5033     -
5034     - vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS;
5035     - vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS);
5036     -
5037     - set_cr4_guest_host_mask(vmx);
5038     -
5039     - if (vmx->vpid != 0)
5040     - vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
5041     -
5042     - if (vmx_xsaves_supported())
5043     - vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
5044     -
5045     - if (enable_pml) {
5046     - vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
5047     - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
5048     - }
5049     -
5050     - if (cpu_has_vmx_encls_vmexit())
5051     - vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
5052     -
5053     - if (pt_mode == PT_MODE_HOST_GUEST) {
5054     - memset(&vmx->pt_desc, 0, sizeof(vmx->pt_desc));
5055     - /* Bit[6~0] are forced to 1, writes are ignored. */
5056     - vmx->pt_desc.guest.output_mask = 0x7F;
5057     - vmcs_write64(GUEST_IA32_RTIT_CTL, 0);
5058     - }
5059     -}
5060     -
5061     -static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
5062     -{
5063     - struct vcpu_vmx *vmx = to_vmx(vcpu);
5064     - struct msr_data apic_base_msr;
5065     - u64 cr0;
5066     -
5067     - vmx->rmode.vm86_active = 0;
5068     - vmx->spec_ctrl = 0;
5069     -
5070     - vmx->msr_ia32_umwait_control = 0;
5071     -
5072     - vcpu->arch.microcode_version = 0x100000000ULL;
5073     - vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
5074     - vmx->hv_deadline_tsc = -1;
5075     - kvm_set_cr8(vcpu, 0);
5076     -
5077     - if (!init_event) {
5078     - apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
5079     - MSR_IA32_APICBASE_ENABLE;
5080     - if (kvm_vcpu_is_reset_bsp(vcpu))
5081     - apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
5082     - apic_base_msr.host_initiated = true;
5083     - kvm_set_apic_base(vcpu, &apic_base_msr);
5084     - }
5085     -
5086     - vmx_segment_cache_clear(vmx);
5087     -
5088     - seg_setup(VCPU_SREG_CS);
5089     - vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
5090     - vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
5091     -
5092     - seg_setup(VCPU_SREG_DS);
5093     - seg_setup(VCPU_SREG_ES);
5094     - seg_setup(VCPU_SREG_FS);
5095     - seg_setup(VCPU_SREG_GS);
5096     - seg_setup(VCPU_SREG_SS);
5097     -
5098     - vmcs_write16(GUEST_TR_SELECTOR, 0);
5099     - vmcs_writel(GUEST_TR_BASE, 0);
5100     - vmcs_write32(GUEST_TR_LIMIT, 0xffff);
5101     - vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
5102     -
5103     - vmcs_write16(GUEST_LDTR_SELECTOR, 0);
5104     - vmcs_writel(GUEST_LDTR_BASE, 0);
5105     - vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
5106     - vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
5107     -
5108     - if (!init_event) {
5109     - vmcs_write32(GUEST_SYSENTER_CS, 0);
5110     - vmcs_writel(GUEST_SYSENTER_ESP, 0);
5111     - vmcs_writel(GUEST_SYSENTER_EIP, 0);
5112     - vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
5113     - }
5114     -
5115     - kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
5116     - kvm_rip_write(vcpu, 0xfff0);
5117     -
5118     - vmcs_writel(GUEST_GDTR_BASE, 0);
5119     - vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
5120     -
5121     - vmcs_writel(GUEST_IDTR_BASE, 0);
5122     - vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
5123     -
5124     - vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
5125     - vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
5126     - vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
5127     - if (kvm_mpx_supported())
5128     - vmcs_write64(GUEST_BNDCFGS, 0);
5129     -
5130     - setup_msrs(vmx);
5131     -
5132     - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
5133     -
5134     - if (cpu_has_vmx_tpr_shadow() && !init_event) {
5135     - vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
5136     - if (cpu_need_tpr_shadow(vcpu))
5137     - vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
5138     - __pa(vcpu->arch.apic->regs));
5139     - vmcs_write32(TPR_THRESHOLD, 0);
5140     - }
5141     -
5142     - kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
5143     -
5144     - cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
5145     - vmx->vcpu.arch.cr0 = cr0;
5146     - vmx_set_cr0(vcpu, cr0); /* enter rmode */
5147     - vmx_set_cr4(vcpu, 0);
5148     - vmx_set_efer(vcpu, 0);
5149     -
5150     - update_exception_bitmap(vcpu);
5151     -
5152     - vpid_sync_context(vmx->vpid);
5153     - if (init_event)
5154     - vmx_clear_hlt(vcpu);
5155     -}
5156     -
5157     -static void enable_irq_window(struct kvm_vcpu *vcpu)
5158     -{
5159     - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING);
5160     -}
5161     -
5162     -static void enable_nmi_window(struct kvm_vcpu *vcpu)
5163     -{
5164     - if (!enable_vnmi ||
5165     - vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
5166     - enable_irq_window(vcpu);
5167     - return;
5168     - }
5169     -
5170     - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
5171     -}
5172     -
5173     -static void vmx_inject_irq(struct kvm_vcpu *vcpu)
5174     -{
5175     - struct vcpu_vmx *vmx = to_vmx(vcpu);
5176     - uint32_t intr;
5177     - int irq = vcpu->arch.interrupt.nr;
5178     -
5179     - trace_kvm_inj_virq(irq);
5180     -
5181     - ++vcpu->stat.irq_injections;
5182     - if (vmx->rmode.vm86_active) {
5183     - int inc_eip = 0;
5184     - if (vcpu->arch.interrupt.soft)
5185     - inc_eip = vcpu->arch.event_exit_inst_len;
5186     - kvm_inject_realmode_interrupt(vcpu, irq, inc_eip);
5187     - return;
5188     - }
5189     - intr = irq | INTR_INFO_VALID_MASK;
5190     - if (vcpu->arch.interrupt.soft) {
5191     - intr |= INTR_TYPE_SOFT_INTR;
5192     - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
5193     - vmx->vcpu.arch.event_exit_inst_len);
5194     - } else
5195     - intr |= INTR_TYPE_EXT_INTR;
5196     - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
5197     -
5198     - vmx_clear_hlt(vcpu);
5199     -}
5200     -
5201     -static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
5202     -{
5203     - struct vcpu_vmx *vmx = to_vmx(vcpu);
5204     -
5205     - if (!enable_vnmi) {
5206     - /*
5207     - * Tracking the NMI-blocked state in software is built upon
5208     - * finding the next open IRQ window. This, in turn, depends on
5209     - * well-behaving guests: They have to keep IRQs disabled at
5210     - * least as long as the NMI handler runs. Otherwise we may
5211     - * cause NMI nesting, maybe breaking the guest. But as this is
5212     - * highly unlikely, we can live with the residual risk.
5213     - */
5214     - vmx->loaded_vmcs->soft_vnmi_blocked = 1;
5215     - vmx->loaded_vmcs->vnmi_blocked_time = 0;
5216     - }
5217     -
5218     - ++vcpu->stat.nmi_injections;
5219     - vmx->loaded_vmcs->nmi_known_unmasked = false;
5220     -
5221     - if (vmx->rmode.vm86_active) {
5222     - kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0);
5223     - return;
5224     - }
5225     -
5226     - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
5227     - INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
5228     -
5229     - vmx_clear_hlt(vcpu);
5230     -}
5231     -
5232     -bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
5233     -{
5234     - struct vcpu_vmx *vmx = to_vmx(vcpu);
5235     - bool masked;
5236     -
5237     - if (!enable_vnmi)
5238     - return vmx->loaded_vmcs->soft_vnmi_blocked;
5239     - if (vmx->loaded_vmcs->nmi_known_unmasked)
5240     - return false;
5241     - masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
5242     - vmx->loaded_vmcs->nmi_known_unmasked = !masked;
5243     - return masked;
5244     -}
5245     -
5246     -void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
5247     -{
5248     - struct vcpu_vmx *vmx = to_vmx(vcpu);
5249     -
5250     - if (!enable_vnmi) {
5251     - if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
5252     - vmx->loaded_vmcs->soft_vnmi_blocked = masked;
5253     - vmx->loaded_vmcs->vnmi_blocked_time = 0;
5254     - }
5255     - } else {
5256     - vmx->loaded_vmcs->nmi_known_unmasked = !masked;
5257     - if (masked)
5258     - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
5259     - GUEST_INTR_STATE_NMI);
5260     - else
5261     - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
5262     - GUEST_INTR_STATE_NMI);
5263     - }
5264     -}
5265     -
5266     -static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
5267     -{
5268     - if (to_vmx(vcpu)->nested.nested_run_pending)
5269     - return 0;
5270     -
5271     - if (!enable_vnmi &&
5272     - to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
5273     - return 0;
5274     -
5275     - return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
5276     - (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
5277     - | GUEST_INTR_STATE_NMI));
5278     -}
5279     -
5280     -static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
5281     -{
5282     - return (!to_vmx(vcpu)->nested.nested_run_pending &&
5283     - vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
5284     - !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
5285     - (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
5286     -}
5287     -
5288     -static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
5289     -{
5290     - int ret;
5291     -
5292     - if (enable_unrestricted_guest)
5293     - return 0;
5294     -
5295     - ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
5296     - PAGE_SIZE * 3);
5297     - if (ret)
5298     - return ret;
5299     - to_kvm_vmx(kvm)->tss_addr = addr;
5300     - return init_rmode_tss(kvm);
5301     -}
5302     -
5303     -static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
5304     -{
5305     - to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr;
5306     - return 0;
5307     -}
5308     -
5309     -static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
5310     -{
5311     - switch (vec) {
5312     - case BP_VECTOR:
5313     - /*
5314     - * Update instruction length as we may reinject the exception
5315     - * from user space while in guest debugging mode.
5316     - */
5317     - to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
5318     - vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
5319     - if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
5320     - return false;
5321     - /* fall through */
5322     - case DB_VECTOR:
5323     - if (vcpu->guest_debug &
5324     - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
5325     - return false;
5326     - /* fall through */
5327     - case DE_VECTOR:
5328     - case OF_VECTOR:
5329     - case BR_VECTOR:
5330     - case UD_VECTOR:
5331     - case DF_VECTOR:
5332     - case SS_VECTOR:
5333     - case GP_VECTOR:
5334     - case MF_VECTOR:
5335     - return true;
5336     - break;
5337     - }
5338     - return false;
5339     -}
5340     -
5341     -static int handle_rmode_exception(struct kvm_vcpu *vcpu,
5342     - int vec, u32 err_code)
5343     -{
5344     - /*
5345     - * Instruction with address size override prefix opcode 0x67
5346     - * Cause the #SS fault with 0 error code in VM86 mode.
5347     - */
5348     - if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
5349     - if (kvm_emulate_instruction(vcpu, 0)) {
5350     - if (vcpu->arch.halt_request) {
5351     - vcpu->arch.halt_request = 0;
5352     - return kvm_vcpu_halt(vcpu);
5353     - }
5354     - return 1;
5355     - }
5356     - return 0;
5357     - }
5358     -
5359     - /*
5360     - * Forward all other exceptions that are valid in real mode.
5361     - * FIXME: Breaks guest debugging in real mode, needs to be fixed with
5362     - * the required debugging infrastructure rework.
5363     - */
5364     - kvm_queue_exception(vcpu, vec);
5365     - return 1;
5366     -}
5367     -
5368     -/*
5369     - * Trigger machine check on the host. We assume all the MSRs are already set up
5370     - * by the CPU and that we still run on the same CPU as the MCE occurred on.
5371     - * We pass a fake environment to the machine check handler because we want
5372     - * the guest to be always treated like user space, no matter what context
5373     - * it used internally.
5374     - */
5375     -static void kvm_machine_check(void)
5376     -{
5377     -#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64)
5378     - struct pt_regs regs = {
5379     - .cs = 3, /* Fake ring 3 no matter what the guest ran on */
5380     - .flags = X86_EFLAGS_IF,
5381     - };
5382     -
5383     - do_machine_check(&regs, 0);
5384     -#endif
5385     -}
5386     -
5387     -static int handle_machine_check(struct kvm_vcpu *vcpu)
5388     -{
5389     - /* handled by vmx_vcpu_run() */
5390     - return 1;
5391     -}
5392     -
5393     -static int handle_exception_nmi(struct kvm_vcpu *vcpu)
5394     -{
5395     - struct vcpu_vmx *vmx = to_vmx(vcpu);
5396     - struct kvm_run *kvm_run = vcpu->run;
5397     - u32 intr_info, ex_no, error_code;
5398     - unsigned long cr2, rip, dr6;
5399     - u32 vect_info;
5400     -
5401     - vect_info = vmx->idt_vectoring_info;
5402     - intr_info = vmx->exit_intr_info;
5403     -
5404     - if (is_machine_check(intr_info) || is_nmi(intr_info))
5405     - return 1; /* handled by handle_exception_nmi_irqoff() */
5406     -
5407     - if (is_invalid_opcode(intr_info))
5408     - return handle_ud(vcpu);
5409     -
5410     - error_code = 0;
5411     - if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
5412     - error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
5413     -
5414     - if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
5415     - WARN_ON_ONCE(!enable_vmware_backdoor);
5416     -
5417     - /*
5418     - * VMware backdoor emulation on #GP interception only handles
5419     - * IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero
5420     - * error code on #GP.
5421     - */
5422     - if (error_code) {
5423     - kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
5424     - return 1;
5425     - }
5426     - return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
5427     - }
5428     -
5429     - /*
5430     - * The #PF with PFEC.RSVD = 1 indicates the guest is accessing
5431     - * MMIO, it is better to report an internal error.
5432     - * See the comments in vmx_handle_exit.
5433     - */
5434     - if ((vect_info & VECTORING_INFO_VALID_MASK) &&
5435     - !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
5436     - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
5437     - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
5438     - vcpu->run->internal.ndata = 3;
5439     - vcpu->run->internal.data[0] = vect_info;
5440     - vcpu->run->internal.data[1] = intr_info;
5441     - vcpu->run->internal.data[2] = error_code;
5442     - return 0;
5443     - }
5444     -
5445     - if (is_page_fault(intr_info)) {
5446     - cr2 = vmcs_readl(EXIT_QUALIFICATION);
5447     - /* EPT won't cause page fault directly */
5448     - WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
5449     - return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
5450     - }
5451     -
5452     - ex_no = intr_info & INTR_INFO_VECTOR_MASK;
5453     -
5454     - if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no))
5455     - return handle_rmode_exception(vcpu, ex_no, error_code);
5456     -
5457     - switch (ex_no) {
5458     - case AC_VECTOR:
5459     - kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
5460     - return 1;
5461     - case DB_VECTOR:
5462     - dr6 = vmcs_readl(EXIT_QUALIFICATION);
5463     - if (!(vcpu->guest_debug &
5464     - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
5465     - vcpu->arch.dr6 &= ~DR_TRAP_BITS;
5466     - vcpu->arch.dr6 |= dr6 | DR6_RTM;
5467     - if (is_icebp(intr_info))
5468     - WARN_ON(!skip_emulated_instruction(vcpu));
5469     -
5470     - kvm_queue_exception(vcpu, DB_VECTOR);
5471     - return 1;
5472     - }
5473     - kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
5474     - kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
5475     - /* fall through */
5476     - case BP_VECTOR:
5477     - /*
5478     - * Update instruction length as we may reinject #BP from
5479     - * user space while in guest debugging mode. Reading it for
5480     - * #DB as well causes no harm, it is not used in that case.
5481     - */
5482     - vmx->vcpu.arch.event_exit_inst_len =
5483     - vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
5484     - kvm_run->exit_reason = KVM_EXIT_DEBUG;
5485     - rip = kvm_rip_read(vcpu);
5486     - kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
5487     - kvm_run->debug.arch.exception = ex_no;
5488     - break;
5489     - default:
5490     - kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
5491     - kvm_run->ex.exception = ex_no;
5492     - kvm_run->ex.error_code = error_code;
5493     - break;
5494     - }
5495     - return 0;
5496     -}
5497     -
5498     -static __always_inline int handle_external_interrupt(struct kvm_vcpu *vcpu)
5499     -{
5500     - ++vcpu->stat.irq_exits;
5501     - return 1;
5502     -}
5503     -
5504     -static int handle_triple_fault(struct kvm_vcpu *vcpu)
5505     -{
5506     - vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
5507     - vcpu->mmio_needed = 0;
5508     - return 0;
5509     -}
5510     -
5511     -static int handle_io(struct kvm_vcpu *vcpu)
5512     -{
5513     - unsigned long exit_qualification;
5514     - int size, in, string;
5515     - unsigned port;
5516     -
5517     - exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5518     - string = (exit_qualification & 16) != 0;
5519     -
5520     - ++vcpu->stat.io_exits;
5521     -
5522     - if (string)
5523     - return kvm_emulate_instruction(vcpu, 0);
5524     -
5525     - port = exit_qualification >> 16;
5526     - size = (exit_qualification & 7) + 1;
5527     - in = (exit_qualification & 8) != 0;
5528     -
5529     - return kvm_fast_pio(vcpu, size, port, in);
5530     -}
5531     -
5532     -static void
5533     -vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
5534     -{
5535     - /*
5536     - * Patch in the VMCALL instruction:
5537     - */
5538     - hypercall[0] = 0x0f;
5539     - hypercall[1] = 0x01;
5540     - hypercall[2] = 0xc1;
5541     -}
5542     -
5543     -/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
5544     -static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
5545     -{
5546     - if (is_guest_mode(vcpu)) {
5547     - struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5548     - unsigned long orig_val = val;
5549     -
5550     - /*
5551     - * We get here when L2 changed cr0 in a way that did not change
5552     - * any of L1's shadowed bits (see nested_vmx_exit_handled_cr),
5553     - * but did change L0 shadowed bits. So we first calculate the
5554     - * effective cr0 value that L1 would like to write into the
5555     - * hardware. It consists of the L2-owned bits from the new
5556     - * value combined with the L1-owned bits from L1's guest_cr0.
5557     - */
5558     - val = (val & ~vmcs12->cr0_guest_host_mask) |
5559     - (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
5560     -
5561     - if (!nested_guest_cr0_valid(vcpu, val))
5562     - return 1;
5563     -
5564     - if (kvm_set_cr0(vcpu, val))
5565     - return 1;
5566     - vmcs_writel(CR0_READ_SHADOW, orig_val);
5567     - return 0;
5568     - } else {
5569     - if (to_vmx(vcpu)->nested.vmxon &&
5570     - !nested_host_cr0_valid(vcpu, val))
5571     - return 1;
5572     -
5573     - return kvm_set_cr0(vcpu, val);
5574     - }
5575     -}
5576     -
5577     -static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
5578     -{
5579     - if (is_guest_mode(vcpu)) {
5580     - struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
5581     - unsigned long orig_val = val;
5582     -
5583     - /* analogously to handle_set_cr0 */
5584     - val = (val & ~vmcs12->cr4_guest_host_mask) |
5585     - (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask);
5586     - if (kvm_set_cr4(vcpu, val))
5587     - return 1;
5588     - vmcs_writel(CR4_READ_SHADOW, orig_val);
5589     - return 0;
5590     - } else
5591     - return kvm_set_cr4(vcpu, val);
5592     -}
5593     -
5594     -static int handle_desc(struct kvm_vcpu *vcpu)
5595     -{
5596     - WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
5597     - return kvm_emulate_instruction(vcpu, 0);
5598     -}
5599     -
5600     -static int handle_cr(struct kvm_vcpu *vcpu)
5601     -{
5602     - unsigned long exit_qualification, val;
5603     - int cr;
5604     - int reg;
5605     - int err;
5606     - int ret;
5607     -
5608     - exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5609     - cr = exit_qualification & 15;
5610     - reg = (exit_qualification >> 8) & 15;
5611     - switch ((exit_qualification >> 4) & 3) {
5612     - case 0: /* mov to cr */
5613     - val = kvm_register_readl(vcpu, reg);
5614     - trace_kvm_cr_write(cr, val);
5615     - switch (cr) {
5616     - case 0:
5617     - err = handle_set_cr0(vcpu, val);
5618     - return kvm_complete_insn_gp(vcpu, err);
5619     - case 3:
5620     - WARN_ON_ONCE(enable_unrestricted_guest);
5621     - err = kvm_set_cr3(vcpu, val);
5622     - return kvm_complete_insn_gp(vcpu, err);
5623     - case 4:
5624     - err = handle_set_cr4(vcpu, val);
5625     - return kvm_complete_insn_gp(vcpu, err);
5626     - case 8: {
5627     - u8 cr8_prev = kvm_get_cr8(vcpu);
5628     - u8 cr8 = (u8)val;
5629     - err = kvm_set_cr8(vcpu, cr8);
5630     - ret = kvm_complete_insn_gp(vcpu, err);
5631     - if (lapic_in_kernel(vcpu))
5632     - return ret;
5633     - if (cr8_prev <= cr8)
5634     - return ret;
5635     - /*
5636     - * TODO: we might be squashing a
5637     - * KVM_GUESTDBG_SINGLESTEP-triggered
5638     - * KVM_EXIT_DEBUG here.
5639     - */
5640     - vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
5641     - return 0;
5642     - }
5643     - }
5644     - break;
5645     - case 2: /* clts */
5646     - WARN_ONCE(1, "Guest should always own CR0.TS");
5647     - vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
5648     - trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
5649     - return kvm_skip_emulated_instruction(vcpu);
5650     - case 1: /*mov from cr*/
5651     - switch (cr) {
5652     - case 3:
5653     - WARN_ON_ONCE(enable_unrestricted_guest);
5654     - val = kvm_read_cr3(vcpu);
5655     - kvm_register_write(vcpu, reg, val);
5656     - trace_kvm_cr_read(cr, val);
5657     - return kvm_skip_emulated_instruction(vcpu);
5658     - case 8:
5659     - val = kvm_get_cr8(vcpu);
5660     - kvm_register_write(vcpu, reg, val);
5661     - trace_kvm_cr_read(cr, val);
5662     - return kvm_skip_emulated_instruction(vcpu);
5663     - }
5664     - break;
5665     - case 3: /* lmsw */
5666     - val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
5667     - trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
5668     - kvm_lmsw(vcpu, val);
5669     -
5670     - return kvm_skip_emulated_instruction(vcpu);
5671     - default:
5672     - break;
5673     - }
5674     - vcpu->run->exit_reason = 0;
5675     - vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
5676     - (int)(exit_qualification >> 4) & 3, cr);
5677     - return 0;
5678     -}
5679     -
5680     -static int handle_dr(struct kvm_vcpu *vcpu)
5681     -{
5682     - unsigned long exit_qualification;
5683     - int dr, dr7, reg;
5684     -
5685     - exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5686     - dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
5687     -
5688     - /* First, if DR does not exist, trigger UD */
5689     - if (!kvm_require_dr(vcpu, dr))
5690     - return 1;
5691     -
5692     - /* Do not handle if the CPL > 0, will trigger GP on re-entry */
5693     - if (!kvm_require_cpl(vcpu, 0))
5694     - return 1;
5695     - dr7 = vmcs_readl(GUEST_DR7);
5696     - if (dr7 & DR7_GD) {
5697     - /*
5698     - * As the vm-exit takes precedence over the debug trap, we
5699     - * need to emulate the latter, either for the host or the
5700     - * guest debugging itself.
5701     - */
5702     - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
5703     - vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
5704     - vcpu->run->debug.arch.dr7 = dr7;
5705     - vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
5706     - vcpu->run->debug.arch.exception = DB_VECTOR;
5707     - vcpu->run->exit_reason = KVM_EXIT_DEBUG;
5708     - return 0;
5709     - } else {
5710     - vcpu->arch.dr6 &= ~DR_TRAP_BITS;
5711     - vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
5712     - kvm_queue_exception(vcpu, DB_VECTOR);
5713     - return 1;
5714     - }
5715     - }
5716     -
5717     - if (vcpu->guest_debug == 0) {
5718     - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
5719     -
5720     - /*
5721     - * No more DR vmexits; force a reload of the debug registers
5722     - * and reenter on this instruction. The next vmexit will
5723     - * retrieve the full state of the debug registers.
5724     - */
5725     - vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
5726     - return 1;
5727     - }
5728     -
5729     - reg = DEBUG_REG_ACCESS_REG(exit_qualification);
5730     - if (exit_qualification & TYPE_MOV_FROM_DR) {
5731     - unsigned long val;
5732     -
5733     - if (kvm_get_dr(vcpu, dr, &val))
5734     - return 1;
5735     - kvm_register_write(vcpu, reg, val);
5736     - } else
5737     - if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg)))
5738     - return 1;
5739     -
5740     - return kvm_skip_emulated_instruction(vcpu);
5741     -}
5742     -
5743     -static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
5744     -{
5745     - return vcpu->arch.dr6;
5746     -}
5747     -
5748     -static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
5749     -{
5750     -}
5751     -
5752     -static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
5753     -{
5754     - get_debugreg(vcpu->arch.db[0], 0);
5755     - get_debugreg(vcpu->arch.db[1], 1);
5756     - get_debugreg(vcpu->arch.db[2], 2);
5757     - get_debugreg(vcpu->arch.db[3], 3);
5758     - get_debugreg(vcpu->arch.dr6, 6);
5759     - vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
5760     -
5761     - vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
5762     - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
5763     -}
5764     -
5765     -static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
5766     -{
5767     - vmcs_writel(GUEST_DR7, val);
5768     -}
5769     -
5770     -static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
5771     -{
5772     - kvm_apic_update_ppr(vcpu);
5773     - return 1;
5774     -}
5775     -
5776     -static int handle_interrupt_window(struct kvm_vcpu *vcpu)
5777     -{
5778     - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING);
5779     -
5780     - kvm_make_request(KVM_REQ_EVENT, vcpu);
5781     -
5782     - ++vcpu->stat.irq_window_exits;
5783     - return 1;
5784     -}
5785     -
5786     -static int handle_vmcall(struct kvm_vcpu *vcpu)
5787     -{
5788     - return kvm_emulate_hypercall(vcpu);
5789     -}
5790     -
5791     -static int handle_invd(struct kvm_vcpu *vcpu)
5792     -{
5793     - return kvm_emulate_instruction(vcpu, 0);
5794     -}
5795     -
5796     -static int handle_invlpg(struct kvm_vcpu *vcpu)
5797     -{
5798     - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5799     -
5800     - kvm_mmu_invlpg(vcpu, exit_qualification);
5801     - return kvm_skip_emulated_instruction(vcpu);
5802     -}
5803     -
5804     -static int handle_rdpmc(struct kvm_vcpu *vcpu)
5805     -{
5806     - int err;
5807     -
5808     - err = kvm_rdpmc(vcpu);
5809     - return kvm_complete_insn_gp(vcpu, err);
5810     -}
5811     -
5812     -static int handle_wbinvd(struct kvm_vcpu *vcpu)
5813     -{
5814     - return kvm_emulate_wbinvd(vcpu);
5815     -}
5816     -
5817     -static int handle_xsetbv(struct kvm_vcpu *vcpu)
5818     -{
5819     - u64 new_bv = kvm_read_edx_eax(vcpu);
5820     - u32 index = kvm_rcx_read(vcpu);
5821     -
5822     - if (kvm_set_xcr(vcpu, index, new_bv) == 0)
5823     - return kvm_skip_emulated_instruction(vcpu);
5824     - return 1;
5825     -}
5826     -
5827     -static int handle_apic_access(struct kvm_vcpu *vcpu)
5828     -{
5829     - if (likely(fasteoi)) {
5830     - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5831     - int access_type, offset;
5832     -
5833     - access_type = exit_qualification & APIC_ACCESS_TYPE;
5834     - offset = exit_qualification & APIC_ACCESS_OFFSET;
5835     - /*
5836     - * Sane guest uses MOV to write EOI, with written value
5837     - * not cared. So make a short-circuit here by avoiding
5838     - * heavy instruction emulation.
5839     - */
5840     - if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&
5841     - (offset == APIC_EOI)) {
5842     - kvm_lapic_set_eoi(vcpu);
5843     - return kvm_skip_emulated_instruction(vcpu);
5844     - }
5845     - }
5846     - return kvm_emulate_instruction(vcpu, 0);
5847     -}
5848     -
5849     -static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
5850     -{
5851     - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5852     - int vector = exit_qualification & 0xff;
5853     -
5854     - /* EOI-induced VM exit is trap-like and thus no need to adjust IP */
5855     - kvm_apic_set_eoi_accelerated(vcpu, vector);
5856     - return 1;
5857     -}
5858     -
5859     -static int handle_apic_write(struct kvm_vcpu *vcpu)
5860     -{
5861     - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5862     - u32 offset = exit_qualification & 0xfff;
5863     -
5864     - /* APIC-write VM exit is trap-like and thus no need to adjust IP */
5865     - kvm_apic_write_nodecode(vcpu, offset);
5866     - return 1;
5867     -}
5868     -
5869     -static int handle_task_switch(struct kvm_vcpu *vcpu)
5870     -{
5871     - struct vcpu_vmx *vmx = to_vmx(vcpu);
5872     - unsigned long exit_qualification;
5873     - bool has_error_code = false;
5874     - u32 error_code = 0;
5875     - u16 tss_selector;
5876     - int reason, type, idt_v, idt_index;
5877     -
5878     - idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
5879     - idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
5880     - type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
5881     -
5882     - exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5883     -
5884     - reason = (u32)exit_qualification >> 30;
5885     - if (reason == TASK_SWITCH_GATE && idt_v) {
5886     - switch (type) {
5887     - case INTR_TYPE_NMI_INTR:
5888     - vcpu->arch.nmi_injected = false;
5889     - vmx_set_nmi_mask(vcpu, true);
5890     - break;
5891     - case INTR_TYPE_EXT_INTR:
5892     - case INTR_TYPE_SOFT_INTR:
5893     - kvm_clear_interrupt_queue(vcpu);
5894     - break;
5895     - case INTR_TYPE_HARD_EXCEPTION:
5896     - if (vmx->idt_vectoring_info &
5897     - VECTORING_INFO_DELIVER_CODE_MASK) {
5898     - has_error_code = true;
5899     - error_code =
5900     - vmcs_read32(IDT_VECTORING_ERROR_CODE);
5901     - }
5902     - /* fall through */
5903     - case INTR_TYPE_SOFT_EXCEPTION:
5904     - kvm_clear_exception_queue(vcpu);
5905     - break;
5906     - default:
5907     - break;
5908     - }
5909     - }
5910     - tss_selector = exit_qualification;
5911     -
5912     - if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
5913     - type != INTR_TYPE_EXT_INTR &&
5914     - type != INTR_TYPE_NMI_INTR))
5915     - WARN_ON(!skip_emulated_instruction(vcpu));
5916     -
5917     - /*
5918     - * TODO: What about debug traps on tss switch?
5919     - * Are we supposed to inject them and update dr6?
5920     - */
5921     - return kvm_task_switch(vcpu, tss_selector,
5922     - type == INTR_TYPE_SOFT_INTR ? idt_index : -1,
5923     - reason, has_error_code, error_code);
5924     -}
5925     -
5926     -static int handle_ept_violation(struct kvm_vcpu *vcpu)
5927     -{
5928     - unsigned long exit_qualification;
5929     - gpa_t gpa;
5930     - u64 error_code;
5931     -
5932     - exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5933     -
5934     - /*
5935     - * EPT violation happened while executing iret from NMI,
5936     - * "blocked by NMI" bit has to be set before next VM entry.
5937     - * There are errata that may cause this bit to not be set:
5938     - * AAK134, BY25.
5939     - */
5940     - if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
5941     - enable_vnmi &&
5942     - (exit_qualification & INTR_INFO_UNBLOCK_NMI))
5943     - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
5944     -
5945     - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
5946     - trace_kvm_page_fault(gpa, exit_qualification);
5947     -
5948     - /* Is it a read fault? */
5949     - error_code = (exit_qualification & EPT_VIOLATION_ACC_READ)
5950     - ? PFERR_USER_MASK : 0;
5951     - /* Is it a write fault? */
5952     - error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE)
5953     - ? PFERR_WRITE_MASK : 0;
5954     - /* Is it a fetch fault? */
5955     - error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
5956     - ? PFERR_FETCH_MASK : 0;
5957     - /* ept page table entry is present? */
5958     - error_code |= (exit_qualification &
5959     - (EPT_VIOLATION_READABLE | EPT_VIOLATION_WRITABLE |
5960     - EPT_VIOLATION_EXECUTABLE))
5961     - ? PFERR_PRESENT_MASK : 0;
5962     -
5963     - error_code |= (exit_qualification & 0x100) != 0 ?
5964     - PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
5965     -
5966     - vcpu->arch.exit_qualification = exit_qualification;
5967     - return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
5968     -}
5969     -
5970     -static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
5971     -{
5972     - gpa_t gpa;
5973     -
5974     - /*
5975     - * A nested guest cannot optimize MMIO vmexits, because we have an
5976     - * nGPA here instead of the required GPA.
5977     - */
5978     - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
5979     - if (!is_guest_mode(vcpu) &&
5980     - !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
5981     - trace_kvm_fast_mmio(gpa);
5982     - return kvm_skip_emulated_instruction(vcpu);
5983     - }
5984     -
5985     - return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
5986     -}
5987     -
5988     -static int handle_nmi_window(struct kvm_vcpu *vcpu)
5989     -{
5990     - WARN_ON_ONCE(!enable_vnmi);
5991     - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING);
5992     - ++vcpu->stat.nmi_window_exits;
5993     - kvm_make_request(KVM_REQ_EVENT, vcpu);
5994     -
5995     - return 1;
5996     -}
5997     -
5998     -static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
5999     -{
6000     - struct vcpu_vmx *vmx = to_vmx(vcpu);
6001     - bool intr_window_requested;
6002     - unsigned count = 130;
6003     -
6004     - /*
6005     - * We should never reach the point where we are emulating L2
6006     - * due to invalid guest state as that means we incorrectly
6007     - * allowed a nested VMEntry with an invalid vmcs12.
6008     - */
6009     - WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
6010     -
6011     - intr_window_requested = exec_controls_get(vmx) &
6012     - CPU_BASED_INTR_WINDOW_EXITING;
6013     -
6014     - while (vmx->emulation_required && count-- != 0) {
6015     - if (intr_window_requested && vmx_interrupt_allowed(vcpu))
6016     - return handle_interrupt_window(&vmx->vcpu);
6017     -
6018     - if (kvm_test_request(KVM_REQ_EVENT, vcpu))
6019     - return 1;
6020     -
6021     - if (!kvm_emulate_instruction(vcpu, 0))
6022     - return 0;
6023     -
6024     - if (vmx->emulation_required && !vmx->rmode.vm86_active &&
6025     - vcpu->arch.exception.pending) {
6026     - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6027     - vcpu->run->internal.suberror =
6028     - KVM_INTERNAL_ERROR_EMULATION;
6029     - vcpu->run->internal.ndata = 0;
6030     - return 0;
6031     - }
6032     -
6033     - if (vcpu->arch.halt_request) {
6034     - vcpu->arch.halt_request = 0;
6035     - return kvm_vcpu_halt(vcpu);
6036     - }
6037     -
6038     - /*
6039     - * Note, return 1 and not 0, vcpu_run() is responsible for
6040     - * morphing the pending signal into the proper return code.
6041     - */
6042     - if (signal_pending(current))
6043     - return 1;
6044     -
6045     - if (need_resched())
6046     - schedule();
6047     - }
6048     -
6049     - return 1;
6050     -}
6051     -
6052     -static void grow_ple_window(struct kvm_vcpu *vcpu)
6053     -{
6054     - struct vcpu_vmx *vmx = to_vmx(vcpu);
6055     - unsigned int old = vmx->ple_window;
6056     -
6057     - vmx->ple_window = __grow_ple_window(old, ple_window,
6058     - ple_window_grow,
6059     - ple_window_max);
6060     -
6061     - if (vmx->ple_window != old) {
6062     - vmx->ple_window_dirty = true;
6063     - trace_kvm_ple_window_update(vcpu->vcpu_id,
6064     - vmx->ple_window, old);
6065     - }
6066     -}
6067     -
6068     -static void shrink_ple_window(struct kvm_vcpu *vcpu)
6069     -{
6070     - struct vcpu_vmx *vmx = to_vmx(vcpu);
6071     - unsigned int old = vmx->ple_window;
6072     -
6073     - vmx->ple_window = __shrink_ple_window(old, ple_window,
6074     - ple_window_shrink,
6075     - ple_window);
6076     -
6077     - if (vmx->ple_window != old) {
6078     - vmx->ple_window_dirty = true;
6079     - trace_kvm_ple_window_update(vcpu->vcpu_id,
6080     - vmx->ple_window, old);
6081     - }
6082     -}
6083     -
6084     -/*
6085     - * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
6086     - */
6087     -static void wakeup_handler(void)
6088     -{
6089     - struct kvm_vcpu *vcpu;
6090     - int cpu = smp_processor_id();
6091     -
6092     - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
6093     - list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
6094     - blocked_vcpu_list) {
6095     - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
6096     -
6097     - if (pi_test_on(pi_desc) == 1)
6098     - kvm_vcpu_kick(vcpu);
6099     - }
6100     - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
6101     -}
6102     -
6103     -static void vmx_enable_tdp(void)
6104     -{
6105     - kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
6106     - enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull,
6107     - enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull,
6108     - 0ull, VMX_EPT_EXECUTABLE_MASK,
6109     - cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK,
6110     - VMX_EPT_RWX_MASK, 0ull);
6111     -
6112     - ept_set_mmio_spte_mask();
6113     - kvm_enable_tdp();
6114     -}
6115     -
6116     -/*
6117     - * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
6118     - * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
6119     - */
6120     -static int handle_pause(struct kvm_vcpu *vcpu)
6121     -{
6122     - if (!kvm_pause_in_guest(vcpu->kvm))
6123     - grow_ple_window(vcpu);
6124     -
6125     - /*
6126     - * Intel sdm vol3 ch-25.1.3 says: The "PAUSE-loop exiting"
6127     - * VM-execution control is ignored if CPL > 0. OTOH, KVM
6128     - * never set PAUSE_EXITING and just set PLE if supported,
6129     - * so the vcpu must be CPL=0 if it gets a PAUSE exit.
6130     - */
6131     - kvm_vcpu_on_spin(vcpu, true);
6132     - return kvm_skip_emulated_instruction(vcpu);
6133     -}
6134     -
6135     -static int handle_nop(struct kvm_vcpu *vcpu)
6136     -{
6137     - return kvm_skip_emulated_instruction(vcpu);
6138     -}
6139     -
6140     -static int handle_mwait(struct kvm_vcpu *vcpu)
6141     -{
6142     - printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
6143     - return handle_nop(vcpu);
6144     -}
6145     -
6146     -static int handle_invalid_op(struct kvm_vcpu *vcpu)
6147     -{
6148     - kvm_queue_exception(vcpu, UD_VECTOR);
6149     - return 1;
6150     -}
6151     -
6152     -static int handle_monitor_trap(struct kvm_vcpu *vcpu)
6153     -{
6154     - return 1;
6155     -}
6156     -
6157     -static int handle_monitor(struct kvm_vcpu *vcpu)
6158     -{
6159     - printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
6160     - return handle_nop(vcpu);
6161     -}
6162     -
6163     -static int handle_invpcid(struct kvm_vcpu *vcpu)
6164     -{
6165     - u32 vmx_instruction_info;
6166     - unsigned long type;
6167     - bool pcid_enabled;
6168     - gva_t gva;
6169     - struct x86_exception e;
6170     - unsigned i;
6171     - unsigned long roots_to_free = 0;
6172     - struct {
6173     - u64 pcid;
6174     - u64 gla;
6175     - } operand;
6176     -
6177     - if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
6178     - kvm_queue_exception(vcpu, UD_VECTOR);
6179     - return 1;
6180     - }
6181     -
6182     - vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
6183     - type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
6184     -
6185     - if (type > 3) {
6186     - kvm_inject_gp(vcpu, 0);
6187     - return 1;
6188     - }
6189     -
6190     - /* According to the Intel instruction reference, the memory operand
6191     - * is read even if it isn't needed (e.g., for type==all)
6192     - */
6193     - if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
6194     - vmx_instruction_info, false,
6195     - sizeof(operand), &gva))
6196     - return 1;
6197     -
6198     - if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
6199     - kvm_inject_page_fault(vcpu, &e);
6200     - return 1;
6201     - }
6202     -
6203     - if (operand.pcid >> 12 != 0) {
6204     - kvm_inject_gp(vcpu, 0);
6205     - return 1;
6206     - }
6207     -
6208     - pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
6209     -
6210     - switch (type) {
6211     - case INVPCID_TYPE_INDIV_ADDR:
6212     - if ((!pcid_enabled && (operand.pcid != 0)) ||
6213     - is_noncanonical_address(operand.gla, vcpu)) {
6214     - kvm_inject_gp(vcpu, 0);
6215     - return 1;
6216     - }
6217     - kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
6218     - return kvm_skip_emulated_instruction(vcpu);
6219     -
6220     - case INVPCID_TYPE_SINGLE_CTXT:
6221     - if (!pcid_enabled && (operand.pcid != 0)) {
6222     - kvm_inject_gp(vcpu, 0);
6223     - return 1;
6224     - }
6225     -
6226     - if (kvm_get_active_pcid(vcpu) == operand.pcid) {
6227     - kvm_mmu_sync_roots(vcpu);
6228     - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
6229     - }
6230     -
6231     - for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
6232     - if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].cr3)
6233     - == operand.pcid)
6234     - roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
6235     -
6236     - kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
6237     - /*
6238     - * If neither the current cr3 nor any of the prev_roots use the
6239     - * given PCID, then nothing needs to be done here because a
6240     - * resync will happen anyway before switching to any other CR3.
6241     - */
6242     -
6243     - return kvm_skip_emulated_instruction(vcpu);
6244     -
6245     - case INVPCID_TYPE_ALL_NON_GLOBAL:
6246     - /*
6247     - * Currently, KVM doesn't mark global entries in the shadow
6248     - * page tables, so a non-global flush just degenerates to a
6249     - * global flush. If needed, we could optimize this later by
6250     - * keeping track of global entries in shadow page tables.
6251     - */
6252     -
6253     - /* fall-through */
6254     - case INVPCID_TYPE_ALL_INCL_GLOBAL:
6255     - kvm_mmu_unload(vcpu);
6256     - return kvm_skip_emulated_instruction(vcpu);
6257     -
6258     - default:
6259     - BUG(); /* We have already checked above that type <= 3 */
6260     - }
6261     -}
6262     -
6263     -static int handle_pml_full(struct kvm_vcpu *vcpu)
6264     -{
6265     - unsigned long exit_qualification;
6266     -
6267     - trace_kvm_pml_full(vcpu->vcpu_id);
6268     -
6269     - exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6270     -
6271     - /*
6272     - * PML buffer FULL happened while executing iret from NMI,
6273     - * "blocked by NMI" bit has to be set before next VM entry.
6274     - */
6275     - if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
6276     - enable_vnmi &&
6277     - (exit_qualification & INTR_INFO_UNBLOCK_NMI))
6278     - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
6279     - GUEST_INTR_STATE_NMI);
6280     -
6281     - /*
6282     - * PML buffer already flushed at beginning of VMEXIT. Nothing to do
6283     - * here.., and there's no userspace involvement needed for PML.
6284     - */
6285     - return 1;
6286     -}
6287     -
6288     -static int handle_preemption_timer(struct kvm_vcpu *vcpu)
6289     -{
6290     - struct vcpu_vmx *vmx = to_vmx(vcpu);
6291     -
6292     - if (!vmx->req_immediate_exit &&
6293     - !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled))
6294     - kvm_lapic_expired_hv_timer(vcpu);
6295     -
6296     - return 1;
6297     -}
6298     -
6299     -/*
6300     - * When nested=0, all VMX instruction VM Exits filter here. The handlers
6301     - * are overwritten by nested_vmx_setup() when nested=1.
6302     - */
6303     -static int handle_vmx_instruction(struct kvm_vcpu *vcpu)
6304     -{
6305     - kvm_queue_exception(vcpu, UD_VECTOR);
6306     - return 1;
6307     -}
6308     -
6309     -static int handle_encls(struct kvm_vcpu *vcpu)
6310     -{
6311     - /*
6312     - * SGX virtualization is not yet supported. There is no software
6313     - * enable bit for SGX, so we have to trap ENCLS and inject a #UD
6314     - * to prevent the guest from executing ENCLS.
6315     - */
6316     - kvm_queue_exception(vcpu, UD_VECTOR);
6317     - return 1;
6318     -}
6319     -
6320     -/*
6321     - * The exit handlers return 1 if the exit was handled fully and guest execution
6322     - * may resume. Otherwise they set the kvm_run parameter to indicate what needs
6323     - * to be done to userspace and return 0.
6324     - */
6325     -static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
6326     - [EXIT_REASON_EXCEPTION_NMI] = handle_exception_nmi,
6327     - [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
6328     - [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
6329     - [EXIT_REASON_NMI_WINDOW] = handle_nmi_window,
6330     - [EXIT_REASON_IO_INSTRUCTION] = handle_io,
6331     - [EXIT_REASON_CR_ACCESS] = handle_cr,
6332     - [EXIT_REASON_DR_ACCESS] = handle_dr,
6333     - [EXIT_REASON_CPUID] = kvm_emulate_cpuid,
6334     - [EXIT_REASON_MSR_READ] = kvm_emulate_rdmsr,
6335     - [EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr,
6336     - [EXIT_REASON_INTERRUPT_WINDOW] = handle_interrupt_window,
6337     - [EXIT_REASON_HLT] = kvm_emulate_halt,
6338     - [EXIT_REASON_INVD] = handle_invd,
6339     - [EXIT_REASON_INVLPG] = handle_invlpg,
6340     - [EXIT_REASON_RDPMC] = handle_rdpmc,
6341     - [EXIT_REASON_VMCALL] = handle_vmcall,
6342     - [EXIT_REASON_VMCLEAR] = handle_vmx_instruction,
6343     - [EXIT_REASON_VMLAUNCH] = handle_vmx_instruction,
6344     - [EXIT_REASON_VMPTRLD] = handle_vmx_instruction,
6345     - [EXIT_REASON_VMPTRST] = handle_vmx_instruction,
6346     - [EXIT_REASON_VMREAD] = handle_vmx_instruction,
6347     - [EXIT_REASON_VMRESUME] = handle_vmx_instruction,
6348     - [EXIT_REASON_VMWRITE] = handle_vmx_instruction,
6349     - [EXIT_REASON_VMOFF] = handle_vmx_instruction,
6350     - [EXIT_REASON_VMON] = handle_vmx_instruction,
6351     - [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
6352     - [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
6353     - [EXIT_REASON_APIC_WRITE] = handle_apic_write,
6354     - [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced,
6355     - [EXIT_REASON_WBINVD] = handle_wbinvd,
6356     - [EXIT_REASON_XSETBV] = handle_xsetbv,
6357     - [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
6358     - [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
6359     - [EXIT_REASON_GDTR_IDTR] = handle_desc,
6360     - [EXIT_REASON_LDTR_TR] = handle_desc,
6361     - [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
6362     - [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
6363     - [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
6364     - [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
6365     - [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap,
6366     - [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
6367     - [EXIT_REASON_INVEPT] = handle_vmx_instruction,
6368     - [EXIT_REASON_INVVPID] = handle_vmx_instruction,
6369     - [EXIT_REASON_RDRAND] = handle_invalid_op,
6370     - [EXIT_REASON_RDSEED] = handle_invalid_op,
6371     - [EXIT_REASON_PML_FULL] = handle_pml_full,
6372     - [EXIT_REASON_INVPCID] = handle_invpcid,
6373     - [EXIT_REASON_VMFUNC] = handle_vmx_instruction,
6374     - [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
6375     - [EXIT_REASON_ENCLS] = handle_encls,
6376     -};
6377     -
6378     -static const int kvm_vmx_max_exit_handlers =
6379     - ARRAY_SIZE(kvm_vmx_exit_handlers);
6380     -
6381     -static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
6382     -{
6383     - *info1 = vmcs_readl(EXIT_QUALIFICATION);
6384     - *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
6385     -}
6386     -
6387     -static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
6388     -{
6389     - if (vmx->pml_pg) {
6390     - __free_page(vmx->pml_pg);
6391     - vmx->pml_pg = NULL;
6392     - }
6393     -}
6394     -
6395     -static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
6396     -{
6397     - struct vcpu_vmx *vmx = to_vmx(vcpu);
6398     - u64 *pml_buf;
6399     - u16 pml_idx;
6400     -
6401     - pml_idx = vmcs_read16(GUEST_PML_INDEX);
6402     -
6403     - /* Do nothing if PML buffer is empty */
6404     - if (pml_idx == (PML_ENTITY_NUM - 1))
6405     - return;
6406     -
6407     - /* PML index always points to next available PML buffer entity */
6408     - if (pml_idx >= PML_ENTITY_NUM)
6409     - pml_idx = 0;
6410     - else
6411     - pml_idx++;
6412     -
6413     - pml_buf = page_address(vmx->pml_pg);
6414     - for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
6415     - u64 gpa;
6416     -
6417     - gpa = pml_buf[pml_idx];
6418     - WARN_ON(gpa & (PAGE_SIZE - 1));
6419     - kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
6420     - }
6421     -
6422     - /* reset PML index */
6423     - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
6424     -}
6425     -
6426     -/*
6427     - * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
6428     - * Called before reporting dirty_bitmap to userspace.
6429     - */
6430     -static void kvm_flush_pml_buffers(struct kvm *kvm)
6431     -{
6432     - int i;
6433     - struct kvm_vcpu *vcpu;
6434     - /*
6435     - * We only need to kick vcpu out of guest mode here, as PML buffer
6436     - * is flushed at beginning of all VMEXITs, and it's obvious that only
6437     - * vcpus running in guest are possible to have unflushed GPAs in PML
6438     - * buffer.
6439     - */
6440     - kvm_for_each_vcpu(i, vcpu, kvm)
6441     - kvm_vcpu_kick(vcpu);
6442     -}
6443     -
6444     -static void vmx_dump_sel(char *name, uint32_t sel)
6445     -{
6446     - pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
6447     - name, vmcs_read16(sel),
6448     - vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
6449     - vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
6450     - vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
6451     -}
6452     -
6453     -static void vmx_dump_dtsel(char *name, uint32_t limit)
6454     -{
6455     - pr_err("%s limit=0x%08x, base=0x%016lx\n",
6456     - name, vmcs_read32(limit),
6457     - vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
6458     -}
6459     -
6460     -void dump_vmcs(void)
6461     -{
6462     - u32 vmentry_ctl, vmexit_ctl;
6463     - u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control;
6464     - unsigned long cr4;
6465     - u64 efer;
6466     - int i, n;
6467     -
6468     - if (!dump_invalid_vmcs) {
6469     - pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n");
6470     - return;
6471     - }
6472     -
6473     - vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
6474     - vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
6475     - cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
6476     - pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
6477     - cr4 = vmcs_readl(GUEST_CR4);
6478     - efer = vmcs_read64(GUEST_IA32_EFER);
6479     - secondary_exec_control = 0;
6480     - if (cpu_has_secondary_exec_ctrls())
6481     - secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
6482     -
6483     - pr_err("*** Guest State ***\n");
6484     - pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
6485     - vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW),
6486     - vmcs_readl(CR0_GUEST_HOST_MASK));
6487     - pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
6488     - cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK));
6489     - pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3));
6490     - if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
6491     - (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
6492     - {
6493     - pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n",
6494     - vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1));
6495     - pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n",
6496     - vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3));
6497     - }
6498     - pr_err("RSP = 0x%016lx RIP = 0x%016lx\n",
6499     - vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
6500     - pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n",
6501     - vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7));
6502     - pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
6503     - vmcs_readl(GUEST_SYSENTER_ESP),
6504     - vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP));
6505     - vmx_dump_sel("CS: ", GUEST_CS_SELECTOR);
6506     - vmx_dump_sel("DS: ", GUEST_DS_SELECTOR);
6507     - vmx_dump_sel("SS: ", GUEST_SS_SELECTOR);
6508     - vmx_dump_sel("ES: ", GUEST_ES_SELECTOR);
6509     - vmx_dump_sel("FS: ", GUEST_FS_SELECTOR);
6510     - vmx_dump_sel("GS: ", GUEST_GS_SELECTOR);
6511     - vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT);
6512     - vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR);
6513     - vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT);
6514     - vmx_dump_sel("TR: ", GUEST_TR_SELECTOR);
6515     - if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
6516     - (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
6517     - pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
6518     - efer, vmcs_read64(GUEST_IA32_PAT));
6519     - pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n",
6520     - vmcs_read64(GUEST_IA32_DEBUGCTL),
6521     - vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
6522     - if (cpu_has_load_perf_global_ctrl() &&
6523     - vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
6524     - pr_err("PerfGlobCtl = 0x%016llx\n",
6525     - vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
6526     - if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
6527     - pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS));
6528     - pr_err("Interruptibility = %08x ActivityState = %08x\n",
6529     - vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
6530     - vmcs_read32(GUEST_ACTIVITY_STATE));
6531     - if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
6532     - pr_err("InterruptStatus = %04x\n",
6533     - vmcs_read16(GUEST_INTR_STATUS));
6534     -
6535     - pr_err("*** Host State ***\n");
6536     - pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
6537     - vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
6538     - pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
6539     - vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR),
6540     - vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR),
6541     - vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR),
6542     - vmcs_read16(HOST_TR_SELECTOR));
6543     - pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
6544     - vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE),
6545     - vmcs_readl(HOST_TR_BASE));
6546     - pr_err("GDTBase=%016lx IDTBase=%016lx\n",
6547     - vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE));
6548     - pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n",
6549     - vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3),
6550     - vmcs_readl(HOST_CR4));
6551     - pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
6552     - vmcs_readl(HOST_IA32_SYSENTER_ESP),
6553     - vmcs_read32(HOST_IA32_SYSENTER_CS),
6554     - vmcs_readl(HOST_IA32_SYSENTER_EIP));
6555     - if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
6556     - pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
6557     - vmcs_read64(HOST_IA32_EFER),
6558     - vmcs_read64(HOST_IA32_PAT));
6559     - if (cpu_has_load_perf_global_ctrl() &&
6560     - vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
6561     - pr_err("PerfGlobCtl = 0x%016llx\n",
6562     - vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
6563     -
6564     - pr_err("*** Control State ***\n");
6565     - pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
6566     - pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control);
6567     - pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
6568     - pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
6569     - vmcs_read32(EXCEPTION_BITMAP),
6570     - vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK),
6571     - vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH));
6572     - pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
6573     - vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
6574     - vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE),
6575     - vmcs_read32(VM_ENTRY_INSTRUCTION_LEN));
6576     - pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
6577     - vmcs_read32(VM_EXIT_INTR_INFO),
6578     - vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
6579     - vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
6580     - pr_err(" reason=%08x qualification=%016lx\n",
6581     - vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION));
6582     - pr_err("IDTVectoring: info=%08x errcode=%08x\n",
6583     - vmcs_read32(IDT_VECTORING_INFO_FIELD),
6584     - vmcs_read32(IDT_VECTORING_ERROR_CODE));
6585     - pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET));
6586     - if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
6587     - pr_err("TSC Multiplier = 0x%016llx\n",
6588     - vmcs_read64(TSC_MULTIPLIER));
6589     - if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) {
6590     - if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
6591     - u16 status = vmcs_read16(GUEST_INTR_STATUS);
6592     - pr_err("SVI|RVI = %02x|%02x ", status >> 8, status & 0xff);
6593     - }
6594     - pr_cont("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
6595     - if (secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
6596     - pr_err("APIC-access addr = 0x%016llx ", vmcs_read64(APIC_ACCESS_ADDR));
6597     - pr_cont("virt-APIC addr = 0x%016llx\n", vmcs_read64(VIRTUAL_APIC_PAGE_ADDR));
6598     - }
6599     - if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
6600     - pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
6601     - if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
6602     - pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
6603     - n = vmcs_read32(CR3_TARGET_COUNT);
6604     - for (i = 0; i + 1 < n; i += 4)
6605     - pr_err("CR3 target%u=%016lx target%u=%016lx\n",
6606     - i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2),
6607     - i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2));
6608     - if (i < n)
6609     - pr_err("CR3 target%u=%016lx\n",
6610     - i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2));
6611     - if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
6612     - pr_err("PLE Gap=%08x Window=%08x\n",
6613     - vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW));
6614     - if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
6615     - pr_err("Virtual processor ID = 0x%04x\n",
6616     - vmcs_read16(VIRTUAL_PROCESSOR_ID));
6617     -}
6618     -
6619     -/*
6620     - * The guest has exited. See if we can fix it or if we need userspace
6621     - * assistance.
6622     - */
6623     -static int vmx_handle_exit(struct kvm_vcpu *vcpu,
6624     - enum exit_fastpath_completion exit_fastpath)
6625     -{
6626     - struct vcpu_vmx *vmx = to_vmx(vcpu);
6627     - u32 exit_reason = vmx->exit_reason;
6628     - u32 vectoring_info = vmx->idt_vectoring_info;
6629     -
6630     - trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
6631     -
6632     - /*
6633     - * Flush logged GPAs PML buffer, this will make dirty_bitmap more
6634     - * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
6635     - * querying dirty_bitmap, we only need to kick all vcpus out of guest
6636     - * mode as if vcpus is in root mode, the PML buffer must has been
6637     - * flushed already.
6638     - */
6639     - if (enable_pml)
6640     - vmx_flush_pml_buffer(vcpu);
6641     -
6642     - /* If guest state is invalid, start emulating */
6643     - if (vmx->emulation_required)
6644     - return handle_invalid_guest_state(vcpu);
6645     -
6646     - if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason))
6647     - return nested_vmx_reflect_vmexit(vcpu, exit_reason);
6648     -
6649     - if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
6650     - dump_vmcs();
6651     - vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
6652     - vcpu->run->fail_entry.hardware_entry_failure_reason
6653     - = exit_reason;
6654     - return 0;
6655     - }
6656     -
6657     - if (unlikely(vmx->fail)) {
6658     - dump_vmcs();
6659     - vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
6660     - vcpu->run->fail_entry.hardware_entry_failure_reason
6661     - = vmcs_read32(VM_INSTRUCTION_ERROR);
6662     - return 0;
6663     - }
6664     -
6665     - /*
6666     - * Note:
6667     - * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by
6668     - * delivery event since it indicates guest is accessing MMIO.
6669     - * The vm-exit can be triggered again after return to guest that
6670     - * will cause infinite loop.
6671     - */
6672     - if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
6673     - (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
6674     - exit_reason != EXIT_REASON_EPT_VIOLATION &&
6675     - exit_reason != EXIT_REASON_PML_FULL &&
6676     - exit_reason != EXIT_REASON_TASK_SWITCH)) {
6677     - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6678     - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
6679     - vcpu->run->internal.ndata = 3;
6680     - vcpu->run->internal.data[0] = vectoring_info;
6681     - vcpu->run->internal.data[1] = exit_reason;
6682     - vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
6683     - if (exit_reason == EXIT_REASON_EPT_MISCONFIG) {
6684     - vcpu->run->internal.ndata++;
6685     - vcpu->run->internal.data[3] =
6686     - vmcs_read64(GUEST_PHYSICAL_ADDRESS);
6687     - }
6688     - return 0;
6689     - }
6690     -
6691     - if (unlikely(!enable_vnmi &&
6692     - vmx->loaded_vmcs->soft_vnmi_blocked)) {
6693     - if (vmx_interrupt_allowed(vcpu)) {
6694     - vmx->loaded_vmcs->soft_vnmi_blocked = 0;
6695     - } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
6696     - vcpu->arch.nmi_pending) {
6697     - /*
6698     - * This CPU don't support us in finding the end of an
6699     - * NMI-blocked window if the guest runs with IRQs
6700     - * disabled. So we pull the trigger after 1 s of
6701     - * futile waiting, but inform the user about this.
6702     - */
6703     - printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
6704     - "state on VCPU %d after 1 s timeout\n",
6705     - __func__, vcpu->vcpu_id);
6706     - vmx->loaded_vmcs->soft_vnmi_blocked = 0;
6707     - }
6708     - }
6709     -
6710     - if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) {
6711     - kvm_skip_emulated_instruction(vcpu);
6712     - return 1;
6713     - } else if (exit_reason < kvm_vmx_max_exit_handlers
6714     - && kvm_vmx_exit_handlers[exit_reason]) {
6715     -#ifdef CONFIG_RETPOLINE
6716     - if (exit_reason == EXIT_REASON_MSR_WRITE)
6717     - return kvm_emulate_wrmsr(vcpu);
6718     - else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER)
6719     - return handle_preemption_timer(vcpu);
6720     - else if (exit_reason == EXIT_REASON_INTERRUPT_WINDOW)
6721     - return handle_interrupt_window(vcpu);
6722     - else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
6723     - return handle_external_interrupt(vcpu);
6724     - else if (exit_reason == EXIT_REASON_HLT)
6725     - return kvm_emulate_halt(vcpu);
6726     - else if (exit_reason == EXIT_REASON_EPT_MISCONFIG)
6727     - return handle_ept_misconfig(vcpu);
6728     -#endif
6729     - return kvm_vmx_exit_handlers[exit_reason](vcpu);
6730     - } else {
6731     - vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
6732     - exit_reason);
6733     - dump_vmcs();
6734     - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6735     - vcpu->run->internal.suberror =
6736     - KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
6737     - vcpu->run->internal.ndata = 1;
6738     - vcpu->run->internal.data[0] = exit_reason;
6739     - return 0;
6740     - }
6741     -}
6742     -
6743     -/*
6744     - * Software based L1D cache flush which is used when microcode providing
6745     - * the cache control MSR is not loaded.
6746     - *
6747     - * The L1D cache is 32 KiB on Nehalem and later microarchitectures, but to
6748     - * flush it is required to read in 64 KiB because the replacement algorithm
6749     - * is not exactly LRU. This could be sized at runtime via topology
6750     - * information but as all relevant affected CPUs have 32KiB L1D cache size
6751     - * there is no point in doing so.
6752     - */
6753     -static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
6754     -{
6755     - int size = PAGE_SIZE << L1D_CACHE_ORDER;
6756     -
6757     - /*
6758     - * This code is only executed when the the flush mode is 'cond' or
6759     - * 'always'
6760     - */
6761     - if (static_branch_likely(&vmx_l1d_flush_cond)) {
6762     - bool flush_l1d;
6763     -
6764     - /*
6765     - * Clear the per-vcpu flush bit, it gets set again
6766     - * either from vcpu_run() or from one of the unsafe
6767     - * VMEXIT handlers.
6768     - */
6769     - flush_l1d = vcpu->arch.l1tf_flush_l1d;
6770     - vcpu->arch.l1tf_flush_l1d = false;
6771     -
6772     - /*
6773     - * Clear the per-cpu flush bit, it gets set again from
6774     - * the interrupt handlers.
6775     - */
6776     - flush_l1d |= kvm_get_cpu_l1tf_flush_l1d();
6777     - kvm_clear_cpu_l1tf_flush_l1d();
6778     -
6779     - if (!flush_l1d)
6780     - return;
6781     - }
6782     -
6783     - vcpu->stat.l1d_flush++;
6784     -
6785     - if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
6786     - wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
6787     - return;
6788     - }
6789     -
6790     - asm volatile(
6791     - /* First ensure the pages are in the TLB */
6792     - "xorl %%eax, %%eax\n"
6793     - ".Lpopulate_tlb:\n\t"
6794     - "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
6795     - "addl $4096, %%eax\n\t"
6796     - "cmpl %%eax, %[size]\n\t"
6797     - "jne .Lpopulate_tlb\n\t"
6798     - "xorl %%eax, %%eax\n\t"
6799     - "cpuid\n\t"
6800     - /* Now fill the cache */
6801     - "xorl %%eax, %%eax\n"
6802     - ".Lfill_cache:\n"
6803     - "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
6804     - "addl $64, %%eax\n\t"
6805     - "cmpl %%eax, %[size]\n\t"
6806     - "jne .Lfill_cache\n\t"
6807     - "lfence\n"
6808     - :: [flush_pages] "r" (vmx_l1d_flush_pages),
6809     - [size] "r" (size)
6810     - : "eax", "ebx", "ecx", "edx");
6811     -}
6812     -
6813     -static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
6814     -{
6815     - struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6816     - int tpr_threshold;
6817     -
6818     - if (is_guest_mode(vcpu) &&
6819     - nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
6820     - return;
6821     -
6822     - tpr_threshold = (irr == -1 || tpr < irr) ? 0 : irr;
6823     - if (is_guest_mode(vcpu))
6824     - to_vmx(vcpu)->nested.l1_tpr_threshold = tpr_threshold;
6825     - else
6826     - vmcs_write32(TPR_THRESHOLD, tpr_threshold);
6827     -}
6828     -
6829     -void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
6830     -{
6831     - struct vcpu_vmx *vmx = to_vmx(vcpu);
6832     - u32 sec_exec_control;
6833     -
6834     - if (!lapic_in_kernel(vcpu))
6835     - return;
6836     -
6837     - if (!flexpriority_enabled &&
6838     - !cpu_has_vmx_virtualize_x2apic_mode())
6839     - return;
6840     -
6841     - /* Postpone execution until vmcs01 is the current VMCS. */
6842     - if (is_guest_mode(vcpu)) {
6843     - vmx->nested.change_vmcs01_virtual_apic_mode = true;
6844     - return;
6845     - }
6846     -
6847     - sec_exec_control = secondary_exec_controls_get(vmx);
6848     - sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
6849     - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
6850     -
6851     - switch (kvm_get_apic_mode(vcpu)) {
6852     - case LAPIC_MODE_INVALID:
6853     - WARN_ONCE(true, "Invalid local APIC state");
6854     - case LAPIC_MODE_DISABLED:
6855     - break;
6856     - case LAPIC_MODE_XAPIC:
6857     - if (flexpriority_enabled) {
6858     - sec_exec_control |=
6859     - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6860     - vmx_flush_tlb(vcpu, true);
6861     - }
6862     - break;
6863     - case LAPIC_MODE_X2APIC:
6864     - if (cpu_has_vmx_virtualize_x2apic_mode())
6865     - sec_exec_control |=
6866     - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
6867     - break;
6868     - }
6869     - secondary_exec_controls_set(vmx, sec_exec_control);
6870     -
6871     - vmx_update_msr_bitmap(vcpu);
6872     -}
6873     -
6874     -static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
6875     -{
6876     - if (!is_guest_mode(vcpu)) {
6877     - vmcs_write64(APIC_ACCESS_ADDR, hpa);
6878     - vmx_flush_tlb(vcpu, true);
6879     - }
6880     -}
6881     -
6882     -static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
6883     -{
6884     - u16 status;
6885     - u8 old;
6886     -
6887     - if (max_isr == -1)
6888     - max_isr = 0;
6889     -
6890     - status = vmcs_read16(GUEST_INTR_STATUS);
6891     - old = status >> 8;
6892     - if (max_isr != old) {
6893     - status &= 0xff;
6894     - status |= max_isr << 8;
6895     - vmcs_write16(GUEST_INTR_STATUS, status);
6896     - }
6897     -}
6898     -
6899     -static void vmx_set_rvi(int vector)
6900     -{
6901     - u16 status;
6902     - u8 old;
6903     -
6904     - if (vector == -1)
6905     - vector = 0;
6906     -
6907     - status = vmcs_read16(GUEST_INTR_STATUS);
6908     - old = (u8)status & 0xff;
6909     - if ((u8)vector != old) {
6910     - status &= ~0xff;
6911     - status |= (u8)vector;
6912     - vmcs_write16(GUEST_INTR_STATUS, status);
6913     - }
6914     -}
6915     -
6916     -static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
6917     -{
6918     - /*
6919     - * When running L2, updating RVI is only relevant when
6920     - * vmcs12 virtual-interrupt-delivery enabled.
6921     - * However, it can be enabled only when L1 also
6922     - * intercepts external-interrupts and in that case
6923     - * we should not update vmcs02 RVI but instead intercept
6924     - * interrupt. Therefore, do nothing when running L2.
6925     - */
6926     - if (!is_guest_mode(vcpu))
6927     - vmx_set_rvi(max_irr);
6928     -}
6929     -
6930     -static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
6931     -{
6932     - struct vcpu_vmx *vmx = to_vmx(vcpu);
6933     - int max_irr;
6934     - bool max_irr_updated;
6935     -
6936     - WARN_ON(!vcpu->arch.apicv_active);
6937     - if (pi_test_on(&vmx->pi_desc)) {
6938     - pi_clear_on(&vmx->pi_desc);
6939     - /*
6940     - * IOMMU can write to PID.ON, so the barrier matters even on UP.
6941     - * But on x86 this is just a compiler barrier anyway.
6942     - */
6943     - smp_mb__after_atomic();
6944     - max_irr_updated =
6945     - kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
6946     -
6947     - /*
6948     - * If we are running L2 and L1 has a new pending interrupt
6949     - * which can be injected, we should re-evaluate
6950     - * what should be done with this new L1 interrupt.
6951     - * If L1 intercepts external-interrupts, we should
6952     - * exit from L2 to L1. Otherwise, interrupt should be
6953     - * delivered directly to L2.
6954     - */
6955     - if (is_guest_mode(vcpu) && max_irr_updated) {
6956     - if (nested_exit_on_intr(vcpu))
6957     - kvm_vcpu_exiting_guest_mode(vcpu);
6958     - else
6959     - kvm_make_request(KVM_REQ_EVENT, vcpu);
6960     - }
6961     - } else {
6962     - max_irr = kvm_lapic_find_highest_irr(vcpu);
6963     - }
6964     - vmx_hwapic_irr_update(vcpu, max_irr);
6965     - return max_irr;
6966     -}
6967     -
6968     -static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
6969     -{
6970     - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
6971     -
6972     - return pi_test_on(pi_desc) ||
6973     - (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc));
6974     -}
6975     -
6976     -static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
6977     -{
6978     - if (!kvm_vcpu_apicv_active(vcpu))
6979     - return;
6980     -
6981     - vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
6982     - vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
6983     - vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
6984     - vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
6985     -}
6986     -
6987     -static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
6988     -{
6989     - struct vcpu_vmx *vmx = to_vmx(vcpu);
6990     -
6991     - pi_clear_on(&vmx->pi_desc);
6992     - memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
6993     -}
6994     -
6995     -static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
6996     -{
6997     - vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
6998     -
6999     - /* if exit due to PF check for async PF */
7000     - if (is_page_fault(vmx->exit_intr_info))
7001     - vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
7002     -
7003     - /* Handle machine checks before interrupts are enabled */
7004     - if (is_machine_check(vmx->exit_intr_info))
7005     - kvm_machine_check();
7006     -
7007     - /* We need to handle NMIs before interrupts are enabled */
7008     - if (is_nmi(vmx->exit_intr_info)) {
7009     - kvm_before_interrupt(&vmx->vcpu);
7010     - asm("int $2");
7011     - kvm_after_interrupt(&vmx->vcpu);
7012     - }
7013     -}
7014     -
7015     -static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
7016     -{
7017     - unsigned int vector;
7018     - unsigned long entry;
7019     -#ifdef CONFIG_X86_64
7020     - unsigned long tmp;
7021     -#endif
7022     - gate_desc *desc;
7023     - u32 intr_info;
7024     -
7025     - intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
7026     - if (WARN_ONCE(!is_external_intr(intr_info),
7027     - "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
7028     - return;
7029     -
7030     - vector = intr_info & INTR_INFO_VECTOR_MASK;
7031     - desc = (gate_desc *)host_idt_base + vector;
7032     - entry = gate_offset(desc);
7033     -
7034     - kvm_before_interrupt(vcpu);
7035     -
7036     - asm volatile(
7037     -#ifdef CONFIG_X86_64
7038     - "mov %%" _ASM_SP ", %[sp]\n\t"
7039     - "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
7040     - "push $%c[ss]\n\t"
7041     - "push %[sp]\n\t"
7042     -#endif
7043     - "pushf\n\t"
7044     - __ASM_SIZE(push) " $%c[cs]\n\t"
7045     - CALL_NOSPEC
7046     - :
7047     -#ifdef CONFIG_X86_64
7048     - [sp]"=&r"(tmp),
7049     -#endif
7050     - ASM_CALL_CONSTRAINT
7051     - :
7052     - THUNK_TARGET(entry),
7053     - [ss]"i"(__KERNEL_DS),
7054     - [cs]"i"(__KERNEL_CS)
7055     - );
7056     -
7057     - kvm_after_interrupt(vcpu);
7058     -}
7059     -STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
7060     -
7061     -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu,
7062     - enum exit_fastpath_completion *exit_fastpath)
7063     -{
7064     - struct vcpu_vmx *vmx = to_vmx(vcpu);
7065     -
7066     - if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
7067     - handle_external_interrupt_irqoff(vcpu);
7068     - else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
7069     - handle_exception_nmi_irqoff(vmx);
7070     - else if (!is_guest_mode(vcpu) &&
7071     - vmx->exit_reason == EXIT_REASON_MSR_WRITE)
7072     - *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu);
7073     -}
7074     -
7075     -static bool vmx_has_emulated_msr(int index)
7076     -{
7077     - switch (index) {
7078     - case MSR_IA32_SMBASE:
7079     - /*
7080     - * We cannot do SMM unless we can run the guest in big
7081     - * real mode.
7082     - */
7083     - return enable_unrestricted_guest || emulate_invalid_guest_state;
7084     - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
7085     - return nested;
7086     - case MSR_AMD64_VIRT_SPEC_CTRL:
7087     - /* This is AMD only. */
7088     - return false;
7089     - default:
7090     - return true;
7091     - }
7092     -}
7093     -
7094     -static bool vmx_pt_supported(void)
7095     -{
7096     - return pt_mode == PT_MODE_HOST_GUEST;
7097     -}
7098     -
7099     -static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
7100     -{
7101     - u32 exit_intr_info;
7102     - bool unblock_nmi;
7103     - u8 vector;
7104     - bool idtv_info_valid;
7105     -
7106     - idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
7107     -
7108     - if (enable_vnmi) {
7109     - if (vmx->loaded_vmcs->nmi_known_unmasked)
7110     - return;
7111     - /*
7112     - * Can't use vmx->exit_intr_info since we're not sure what
7113     - * the exit reason is.
7114     - */
7115     - exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
7116     - unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
7117     - vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
7118     - /*
7119     - * SDM 3: 27.7.1.2 (September 2008)
7120     - * Re-set bit "block by NMI" before VM entry if vmexit caused by
7121     - * a guest IRET fault.
7122     - * SDM 3: 23.2.2 (September 2008)
7123     - * Bit 12 is undefined in any of the following cases:
7124     - * If the VM exit sets the valid bit in the IDT-vectoring
7125     - * information field.
7126     - * If the VM exit is due to a double fault.
7127     - */
7128     - if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
7129     - vector != DF_VECTOR && !idtv_info_valid)
7130     - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
7131     - GUEST_INTR_STATE_NMI);
7132     - else
7133     - vmx->loaded_vmcs->nmi_known_unmasked =
7134     - !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
7135     - & GUEST_INTR_STATE_NMI);
7136     - } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
7137     - vmx->loaded_vmcs->vnmi_blocked_time +=
7138     - ktime_to_ns(ktime_sub(ktime_get(),
7139     - vmx->loaded_vmcs->entry_time));
7140     -}
7141     -
7142     -static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
7143     - u32 idt_vectoring_info,
7144     - int instr_len_field,
7145     - int error_code_field)
7146     -{
7147     - u8 vector;
7148     - int type;
7149     - bool idtv_info_valid;
7150     -
7151     - idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
7152     -
7153     - vcpu->arch.nmi_injected = false;
7154     - kvm_clear_exception_queue(vcpu);
7155     - kvm_clear_interrupt_queue(vcpu);
7156     -
7157     - if (!idtv_info_valid)
7158     - return;
7159     -
7160     - kvm_make_request(KVM_REQ_EVENT, vcpu);
7161     -
7162     - vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
7163     - type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
7164     -
7165     - switch (type) {
7166     - case INTR_TYPE_NMI_INTR:
7167     - vcpu->arch.nmi_injected = true;
7168     - /*
7169     - * SDM 3: 27.7.1.2 (September 2008)
7170     - * Clear bit "block by NMI" before VM entry if a NMI
7171     - * delivery faulted.
7172     - */
7173     - vmx_set_nmi_mask(vcpu, false);
7174     - break;
7175     - case INTR_TYPE_SOFT_EXCEPTION:
7176     - vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
7177     - /* fall through */
7178     - case INTR_TYPE_HARD_EXCEPTION:
7179     - if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
7180     - u32 err = vmcs_read32(error_code_field);
7181     - kvm_requeue_exception_e(vcpu, vector, err);
7182     - } else
7183     - kvm_requeue_exception(vcpu, vector);
7184     - break;
7185     - case INTR_TYPE_SOFT_INTR:
7186     - vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
7187     - /* fall through */
7188     - case INTR_TYPE_EXT_INTR:
7189     - kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
7190     - break;
7191     - default:
7192     - break;
7193     - }
7194     -}
7195     -
7196     -static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
7197     -{
7198     - __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
7199     - VM_EXIT_INSTRUCTION_LEN,
7200     - IDT_VECTORING_ERROR_CODE);
7201     -}
7202     -
7203     -static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
7204     -{
7205     - __vmx_complete_interrupts(vcpu,
7206     - vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
7207     - VM_ENTRY_INSTRUCTION_LEN,
7208     - VM_ENTRY_EXCEPTION_ERROR_CODE);
7209     -
7210     - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
7211     -}
7212     -
7213     -static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
7214     -{
7215     - int i, nr_msrs;
7216     - struct perf_guest_switch_msr *msrs;
7217     -
7218     - msrs = perf_guest_get_msrs(&nr_msrs);
7219     -
7220     - if (!msrs)
7221     - return;
7222     -
7223     - for (i = 0; i < nr_msrs; i++)
7224     - if (msrs[i].host == msrs[i].guest)
7225     - clear_atomic_switch_msr(vmx, msrs[i].msr);
7226     - else
7227     - add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
7228     - msrs[i].host, false);
7229     -}
7230     -
7231     -static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx)
7232     -{
7233     - u32 host_umwait_control;
7234     -
7235     - if (!vmx_has_waitpkg(vmx))
7236     - return;
7237     -
7238     - host_umwait_control = get_umwait_control_msr();
7239     -
7240     - if (vmx->msr_ia32_umwait_control != host_umwait_control)
7241     - add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL,
7242     - vmx->msr_ia32_umwait_control,
7243     - host_umwait_control, false);
7244     - else
7245     - clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL);
7246     -}
7247     -
7248     -static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
7249     -{
7250     - struct vcpu_vmx *vmx = to_vmx(vcpu);
7251     - u64 tscl;
7252     - u32 delta_tsc;
7253     -
7254     - if (vmx->req_immediate_exit) {
7255     - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
7256     - vmx->loaded_vmcs->hv_timer_soft_disabled = false;
7257     - } else if (vmx->hv_deadline_tsc != -1) {
7258     - tscl = rdtsc();
7259     - if (vmx->hv_deadline_tsc > tscl)
7260     - /* set_hv_timer ensures the delta fits in 32-bits */
7261     - delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
7262     - cpu_preemption_timer_multi);
7263     - else
7264     - delta_tsc = 0;
7265     -
7266     - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
7267     - vmx->loaded_vmcs->hv_timer_soft_disabled = false;
7268     - } else if (!vmx->loaded_vmcs->hv_timer_soft_disabled) {
7269     - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, -1);
7270     - vmx->loaded_vmcs->hv_timer_soft_disabled = true;
7271     - }
7272     -}
7273     -
7274     -void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
7275     -{
7276     - if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) {
7277     - vmx->loaded_vmcs->host_state.rsp = host_rsp;
7278     - vmcs_writel(HOST_RSP, host_rsp);
7279     - }
7280     -}
7281     -
7282     -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
7283     -
7284     -static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
7285     -{
7286     - struct vcpu_vmx *vmx = to_vmx(vcpu);
7287     - unsigned long cr3, cr4;
7288     -
7289     - /* Record the guest's net vcpu time for enforced NMI injections. */
7290     - if (unlikely(!enable_vnmi &&
7291     - vmx->loaded_vmcs->soft_vnmi_blocked))
7292     - vmx->loaded_vmcs->entry_time = ktime_get();
7293     -
7294     - /* Don't enter VMX if guest state is invalid, let the exit handler
7295     - start emulation until we arrive back to a valid state */
7296     - if (vmx->emulation_required)
7297     - return;
7298     -
7299     - if (vmx->ple_window_dirty) {
7300     - vmx->ple_window_dirty = false;
7301     - vmcs_write32(PLE_WINDOW, vmx->ple_window);
7302     - }
7303     -
7304     - if (vmx->nested.need_vmcs12_to_shadow_sync)
7305     - nested_sync_vmcs12_to_shadow(vcpu);
7306     -
7307     - if (kvm_register_is_dirty(vcpu, VCPU_REGS_RSP))
7308     - vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
7309     - if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP))
7310     - vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
7311     -
7312     - cr3 = __get_current_cr3_fast();
7313     - if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
7314     - vmcs_writel(HOST_CR3, cr3);
7315     - vmx->loaded_vmcs->host_state.cr3 = cr3;
7316     - }
7317     -
7318     - cr4 = cr4_read_shadow();
7319     - if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
7320     - vmcs_writel(HOST_CR4, cr4);
7321     - vmx->loaded_vmcs->host_state.cr4 = cr4;
7322     - }
7323     -
7324     - /* When single-stepping over STI and MOV SS, we must clear the
7325     - * corresponding interruptibility bits in the guest state. Otherwise
7326     - * vmentry fails as it then expects bit 14 (BS) in pending debug
7327     - * exceptions being set, but that's not correct for the guest debugging
7328     - * case. */
7329     - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
7330     - vmx_set_interrupt_shadow(vcpu, 0);
7331     -
7332     - kvm_load_guest_xsave_state(vcpu);
7333     -
7334     - if (static_cpu_has(X86_FEATURE_PKU) &&
7335     - kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
7336     - vcpu->arch.pkru != vmx->host_pkru)
7337     - __write_pkru(vcpu->arch.pkru);
7338     -
7339     - pt_guest_enter(vmx);
7340     -
7341     - atomic_switch_perf_msrs(vmx);
7342     - atomic_switch_umwait_control_msr(vmx);
7343     -
7344     - if (enable_preemption_timer)
7345     - vmx_update_hv_timer(vcpu);
7346     -
7347     - if (lapic_in_kernel(vcpu) &&
7348     - vcpu->arch.apic->lapic_timer.timer_advance_ns)
7349     - kvm_wait_lapic_expire(vcpu);
7350     -
7351     - /*
7352     - * If this vCPU has touched SPEC_CTRL, restore the guest's value if
7353     - * it's non-zero. Since vmentry is serialising on affected CPUs, there
7354     - * is no need to worry about the conditional branch over the wrmsr
7355     - * being speculatively taken.
7356     - */
7357     - x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
7358     -
7359     - /* L1D Flush includes CPU buffer clear to mitigate MDS */
7360     - if (static_branch_unlikely(&vmx_l1d_should_flush))
7361     - vmx_l1d_flush(vcpu);
7362     - else if (static_branch_unlikely(&mds_user_clear))
7363     - mds_clear_cpu_buffers();
7364     -
7365     - if (vcpu->arch.cr2 != read_cr2())
7366     - write_cr2(vcpu->arch.cr2);
7367     -
7368     - vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
7369     - vmx->loaded_vmcs->launched);
7370     -
7371     - vcpu->arch.cr2 = read_cr2();
7372     -
7373     - /*
7374     - * We do not use IBRS in the kernel. If this vCPU has used the
7375     - * SPEC_CTRL MSR it may have left it on; save the value and
7376     - * turn it off. This is much more efficient than blindly adding
7377     - * it to the atomic save/restore list. Especially as the former
7378     - * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
7379     - *
7380     - * For non-nested case:
7381     - * If the L01 MSR bitmap does not intercept the MSR, then we need to
7382     - * save it.
7383     - *
7384     - * For nested case:
7385     - * If the L02 MSR bitmap does not intercept the MSR, then we need to
7386     - * save it.
7387     - */
7388     - if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
7389     - vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
7390     -
7391     - x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
7392     -
7393     - /* All fields are clean at this point */
7394     - if (static_branch_unlikely(&enable_evmcs))
7395     - current_evmcs->hv_clean_fields |=
7396     - HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
7397     -
7398     - if (static_branch_unlikely(&enable_evmcs))
7399     - current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index;
7400     -
7401     - /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
7402     - if (vmx->host_debugctlmsr)
7403     - update_debugctlmsr(vmx->host_debugctlmsr);
7404     -
7405     -#ifndef CONFIG_X86_64
7406     - /*
7407     - * The sysexit path does not restore ds/es, so we must set them to
7408     - * a reasonable value ourselves.
7409     - *
7410     - * We can't defer this to vmx_prepare_switch_to_host() since that
7411     - * function may be executed in interrupt context, which saves and
7412     - * restore segments around it, nullifying its effect.
7413     - */
7414     - loadsegment(ds, __USER_DS);
7415     - loadsegment(es, __USER_DS);
7416     -#endif
7417     -
7418     - vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
7419     - | (1 << VCPU_EXREG_RFLAGS)
7420     - | (1 << VCPU_EXREG_PDPTR)
7421     - | (1 << VCPU_EXREG_SEGMENTS)
7422     - | (1 << VCPU_EXREG_CR3));
7423     - vcpu->arch.regs_dirty = 0;
7424     -
7425     - pt_guest_exit(vmx);
7426     -
7427     - /*
7428     - * eager fpu is enabled if PKEY is supported and CR4 is switched
7429     - * back on host, so it is safe to read guest PKRU from current
7430     - * XSAVE.
7431     - */
7432     - if (static_cpu_has(X86_FEATURE_PKU) &&
7433     - kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) {
7434     - vcpu->arch.pkru = rdpkru();
7435     - if (vcpu->arch.pkru != vmx->host_pkru)
7436     - __write_pkru(vmx->host_pkru);
7437     - }
7438     -
7439     - kvm_load_host_xsave_state(vcpu);
7440     -
7441     - vmx->nested.nested_run_pending = 0;
7442     - vmx->idt_vectoring_info = 0;
7443     -
7444     - vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
7445     - if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
7446     - kvm_machine_check();
7447     -
7448     - if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
7449     - return;
7450     -
7451     - vmx->loaded_vmcs->launched = 1;
7452     - vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
7453     -
7454     - vmx_recover_nmi_blocking(vmx);
7455     - vmx_complete_interrupts(vmx);
7456     -}
7457     -
7458     -static struct kvm *vmx_vm_alloc(void)
7459     -{
7460     - struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx),
7461     - GFP_KERNEL_ACCOUNT | __GFP_ZERO,
7462     - PAGE_KERNEL);
7463     - return &kvm_vmx->kvm;
7464     -}
7465     -
7466     -static void vmx_vm_free(struct kvm *kvm)
7467     -{
7468     - kfree(kvm->arch.hyperv.hv_pa_pg);
7469     - vfree(to_kvm_vmx(kvm));
7470     -}
7471     -
7472     -static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
7473     -{
7474     - struct vcpu_vmx *vmx = to_vmx(vcpu);
7475     -
7476     - if (enable_pml)
7477     - vmx_destroy_pml_buffer(vmx);
7478     - free_vpid(vmx->vpid);
7479     - nested_vmx_free_vcpu(vcpu);
7480     - free_loaded_vmcs(vmx->loaded_vmcs);
7481     - kvm_vcpu_uninit(vcpu);
7482     - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
7483     - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
7484     - kmem_cache_free(kvm_vcpu_cache, vmx);
7485     -}
7486     -
7487     -static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
7488     -{
7489     - int err;
7490     - struct vcpu_vmx *vmx;
7491     - unsigned long *msr_bitmap;
7492     - int i, cpu;
7493     -
7494     - BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
7495     - "struct kvm_vcpu must be at offset 0 for arch usercopy region");
7496     -
7497     - vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
7498     - if (!vmx)
7499     - return ERR_PTR(-ENOMEM);
7500     -
7501     - vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
7502     - GFP_KERNEL_ACCOUNT);
7503     - if (!vmx->vcpu.arch.user_fpu) {
7504     - printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
7505     - err = -ENOMEM;
7506     - goto free_partial_vcpu;
7507     - }
7508     -
7509     - vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
7510     - GFP_KERNEL_ACCOUNT);
7511     - if (!vmx->vcpu.arch.guest_fpu) {
7512     - printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
7513     - err = -ENOMEM;
7514     - goto free_user_fpu;
7515     - }
7516     -
7517     - vmx->vpid = allocate_vpid();
7518     -
7519     - err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
7520     - if (err)
7521     - goto free_vcpu;
7522     -
7523     - err = -ENOMEM;
7524     -
7525     - /*
7526     - * If PML is turned on, failure on enabling PML just results in failure
7527     - * of creating the vcpu, therefore we can simplify PML logic (by
7528     - * avoiding dealing with cases, such as enabling PML partially on vcpus
7529     - * for the guest), etc.
7530     - */
7531     - if (enable_pml) {
7532     - vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
7533     - if (!vmx->pml_pg)
7534     - goto uninit_vcpu;
7535     - }
7536     -
7537     - BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) != NR_SHARED_MSRS);
7538     -
7539     - for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
7540     - u32 index = vmx_msr_index[i];
7541     - u32 data_low, data_high;
7542     - int j = vmx->nmsrs;
7543     -
7544     - if (rdmsr_safe(index, &data_low, &data_high) < 0)
7545     - continue;
7546     - if (wrmsr_safe(index, data_low, data_high) < 0)
7547     - continue;
7548     -
7549     - vmx->guest_msrs[j].index = i;
7550     - vmx->guest_msrs[j].data = 0;
7551     - switch (index) {
7552     - case MSR_IA32_TSX_CTRL:
7553     - /*
7554     - * No need to pass TSX_CTRL_CPUID_CLEAR through, so
7555     - * let's avoid changing CPUID bits under the host
7556     - * kernel's feet.
7557     - */
7558     - vmx->guest_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
7559     - break;
7560     - default:
7561     - vmx->guest_msrs[j].mask = -1ull;
7562     - break;
7563     - }
7564     - ++vmx->nmsrs;
7565     - }
7566     -
7567     - err = alloc_loaded_vmcs(&vmx->vmcs01);
7568     - if (err < 0)
7569     - goto free_pml;
7570     -
7571     - msr_bitmap = vmx->vmcs01.msr_bitmap;
7572     - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R);
7573     - vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
7574     - vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
7575     - vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
7576     - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
7577     - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
7578     - vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
7579     - if (kvm_cstate_in_guest(kvm)) {
7580     - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
7581     - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
7582     - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
7583     - vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
7584     - }
7585     - vmx->msr_bitmap_mode = 0;
7586     -
7587     - vmx->loaded_vmcs = &vmx->vmcs01;
7588     - cpu = get_cpu();
7589     - vmx_vcpu_load(&vmx->vcpu, cpu);
7590     - vmx->vcpu.cpu = cpu;
7591     - init_vmcs(vmx);
7592     - vmx_vcpu_put(&vmx->vcpu);
7593     - put_cpu();
7594     - if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
7595     - err = alloc_apic_access_page(kvm);
7596     - if (err)
7597     - goto free_vmcs;
7598     - }
7599     -
7600     - if (enable_ept && !enable_unrestricted_guest) {
7601     - err = init_rmode_identity_map(kvm);
7602     - if (err)
7603     - goto free_vmcs;
7604     - }
7605     -
7606     - if (nested)
7607     - nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
7608     - vmx_capability.ept,
7609     - kvm_vcpu_apicv_active(&vmx->vcpu));
7610     - else
7611     - memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
7612     -
7613     - vmx->nested.posted_intr_nv = -1;
7614     - vmx->nested.current_vmptr = -1ull;
7615     -
7616     - vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
7617     -
7618     - /*
7619     - * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
7620     - * or POSTED_INTR_WAKEUP_VECTOR.
7621     - */
7622     - vmx->pi_desc.nv = POSTED_INTR_VECTOR;
7623     - vmx->pi_desc.sn = 1;
7624     -
7625     - vmx->ept_pointer = INVALID_PAGE;
7626     -
7627     - return &vmx->vcpu;
7628     -
7629     -free_vmcs:
7630     - free_loaded_vmcs(vmx->loaded_vmcs);
7631     -free_pml:
7632     - vmx_destroy_pml_buffer(vmx);
7633     -uninit_vcpu:
7634     - kvm_vcpu_uninit(&vmx->vcpu);
7635     -free_vcpu:
7636     - free_vpid(vmx->vpid);
7637     - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
7638     -free_user_fpu:
7639     - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
7640     -free_partial_vcpu:
7641     - kmem_cache_free(kvm_vcpu_cache, vmx);
7642     - return ERR_PTR(err);
7643     -}
7644     -
7645     -#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
7646     -#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
7647     -
7648     -static int vmx_vm_init(struct kvm *kvm)
7649     -{
7650     - spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
7651     -
7652     - if (!ple_gap)
7653     - kvm->arch.pause_in_guest = true;
7654     -
7655     - if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) {
7656     - switch (l1tf_mitigation) {
7657     - case L1TF_MITIGATION_OFF:
7658     - case L1TF_MITIGATION_FLUSH_NOWARN:
7659     - /* 'I explicitly don't care' is set */
7660     - break;
7661     - case L1TF_MITIGATION_FLUSH:
7662     - case L1TF_MITIGATION_FLUSH_NOSMT:
7663     - case L1TF_MITIGATION_FULL:
7664     - /*
7665     - * Warn upon starting the first VM in a potentially
7666     - * insecure environment.
7667     - */
7668     - if (sched_smt_active())
7669     - pr_warn_once(L1TF_MSG_SMT);
7670     - if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER)
7671     - pr_warn_once(L1TF_MSG_L1D);
7672     - break;
7673     - case L1TF_MITIGATION_FULL_FORCE:
7674     - /* Flush is enforced */
7675     - break;
7676     - }
7677     - }
7678     - return 0;
7679     -}
7680     -
7681     -static int __init vmx_check_processor_compat(void)
7682     -{
7683     - struct vmcs_config vmcs_conf;
7684     - struct vmx_capability vmx_cap;
7685     -
7686     - if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
7687     - return -EIO;
7688     - if (nested)
7689     - nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept,
7690     - enable_apicv);
7691     - if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
7692     - printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
7693     - smp_processor_id());
7694     - return -EIO;
7695     - }
7696     - return 0;
7697     -}
7698     -
7699     -static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
7700     -{
7701     - u8 cache;
7702     - u64 ipat = 0;
7703     -
7704     - /* For VT-d and EPT combination
7705     - * 1. MMIO: always map as UC
7706     - * 2. EPT with VT-d:
7707     - * a. VT-d without snooping control feature: can't guarantee the
7708     - * result, try to trust guest.
7709     - * b. VT-d with snooping control feature: snooping control feature of
7710     - * VT-d engine can guarantee the cache correctness. Just set it
7711     - * to WB to keep consistent with host. So the same as item 3.
7712     - * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
7713     - * consistent with host MTRR
7714     - */
7715     - if (is_mmio) {
7716     - cache = MTRR_TYPE_UNCACHABLE;
7717     - goto exit;
7718     - }
7719     -
7720     - if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
7721     - ipat = VMX_EPT_IPAT_BIT;
7722     - cache = MTRR_TYPE_WRBACK;
7723     - goto exit;
7724     - }
7725     -
7726     - if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
7727     - ipat = VMX_EPT_IPAT_BIT;
7728     - if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
7729     - cache = MTRR_TYPE_WRBACK;
7730     - else
7731     - cache = MTRR_TYPE_UNCACHABLE;
7732     - goto exit;
7733     - }
7734     -
7735     - cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
7736     -
7737     -exit:
7738     - return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
7739     -}
7740     -
7741     -static int vmx_get_lpage_level(void)
7742     -{
7743     - if (enable_ept && !cpu_has_vmx_ept_1g_page())
7744     - return PT_DIRECTORY_LEVEL;
7745     - else
7746     - /* For shadow and EPT supported 1GB page */
7747     - return PT_PDPE_LEVEL;
7748     -}
7749     -
7750     -static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx)
7751     -{
7752     - /*
7753     - * These bits in the secondary execution controls field
7754     - * are dynamic, the others are mostly based on the hypervisor
7755     - * architecture and the guest's CPUID. Do not touch the
7756     - * dynamic bits.
7757     - */
7758     - u32 mask =
7759     - SECONDARY_EXEC_SHADOW_VMCS |
7760     - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
7761     - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
7762     - SECONDARY_EXEC_DESC;
7763     -
7764     - u32 new_ctl = vmx->secondary_exec_control;
7765     - u32 cur_ctl = secondary_exec_controls_get(vmx);
7766     -
7767     - secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask));
7768     -}
7769     -
7770     -/*
7771     - * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits
7772     - * (indicating "allowed-1") if they are supported in the guest's CPUID.
7773     - */
7774     -static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
7775     -{
7776     - struct vcpu_vmx *vmx = to_vmx(vcpu);
7777     - struct kvm_cpuid_entry2 *entry;
7778     -
7779     - vmx->nested.msrs.cr0_fixed1 = 0xffffffff;
7780     - vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE;
7781     -
7782     -#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \
7783     - if (entry && (entry->_reg & (_cpuid_mask))) \
7784     - vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \
7785     -} while (0)
7786     -
7787     - entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
7788     - cr4_fixed1_update(X86_CR4_VME, edx, bit(X86_FEATURE_VME));
7789     - cr4_fixed1_update(X86_CR4_PVI, edx, bit(X86_FEATURE_VME));
7790     - cr4_fixed1_update(X86_CR4_TSD, edx, bit(X86_FEATURE_TSC));
7791     - cr4_fixed1_update(X86_CR4_DE, edx, bit(X86_FEATURE_DE));
7792     - cr4_fixed1_update(X86_CR4_PSE, edx, bit(X86_FEATURE_PSE));
7793     - cr4_fixed1_update(X86_CR4_PAE, edx, bit(X86_FEATURE_PAE));
7794     - cr4_fixed1_update(X86_CR4_MCE, edx, bit(X86_FEATURE_MCE));
7795     - cr4_fixed1_update(X86_CR4_PGE, edx, bit(X86_FEATURE_PGE));
7796     - cr4_fixed1_update(X86_CR4_OSFXSR, edx, bit(X86_FEATURE_FXSR));
7797     - cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM));
7798     - cr4_fixed1_update(X86_CR4_VMXE, ecx, bit(X86_FEATURE_VMX));
7799     - cr4_fixed1_update(X86_CR4_SMXE, ecx, bit(X86_FEATURE_SMX));
7800     - cr4_fixed1_update(X86_CR4_PCIDE, ecx, bit(X86_FEATURE_PCID));
7801     - cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, bit(X86_FEATURE_XSAVE));
7802     -
7803     - entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
7804     - cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, bit(X86_FEATURE_FSGSBASE));
7805     - cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP));
7806     - cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP));
7807     - cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU));
7808     - cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP));
7809     - cr4_fixed1_update(X86_CR4_LA57, ecx, bit(X86_FEATURE_LA57));
7810     -
7811     -#undef cr4_fixed1_update
7812     -}
7813     -
7814     -static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
7815     -{
7816     - struct vcpu_vmx *vmx = to_vmx(vcpu);
7817     -
7818     - if (kvm_mpx_supported()) {
7819     - bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX);
7820     -
7821     - if (mpx_enabled) {
7822     - vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
7823     - vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
7824     - } else {
7825     - vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS;
7826     - vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS;
7827     - }
7828     - }
7829     -}
7830     -
7831     -static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
7832     -{
7833     - struct vcpu_vmx *vmx = to_vmx(vcpu);
7834     - struct kvm_cpuid_entry2 *best = NULL;
7835     - int i;
7836     -
7837     - for (i = 0; i < PT_CPUID_LEAVES; i++) {
7838     - best = kvm_find_cpuid_entry(vcpu, 0x14, i);
7839     - if (!best)
7840     - return;
7841     - vmx->pt_desc.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM] = best->eax;
7842     - vmx->pt_desc.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM] = best->ebx;
7843     - vmx->pt_desc.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM] = best->ecx;
7844     - vmx->pt_desc.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM] = best->edx;
7845     - }
7846     -
7847     - /* Get the number of configurable Address Ranges for filtering */
7848     - vmx->pt_desc.addr_range = intel_pt_validate_cap(vmx->pt_desc.caps,
7849     - PT_CAP_num_address_ranges);
7850     -
7851     - /* Initialize and clear the no dependency bits */
7852     - vmx->pt_desc.ctl_bitmask = ~(RTIT_CTL_TRACEEN | RTIT_CTL_OS |
7853     - RTIT_CTL_USR | RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC);
7854     -
7855     - /*
7856     - * If CPUID.(EAX=14H,ECX=0):EBX[0]=1 CR3Filter can be set otherwise
7857     - * will inject an #GP
7858     - */
7859     - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_cr3_filtering))
7860     - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_CR3EN;
7861     -
7862     - /*
7863     - * If CPUID.(EAX=14H,ECX=0):EBX[1]=1 CYCEn, CycThresh and
7864     - * PSBFreq can be set
7865     - */
7866     - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc))
7867     - vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_CYCLEACC |
7868     - RTIT_CTL_CYC_THRESH | RTIT_CTL_PSB_FREQ);
7869     -
7870     - /*
7871     - * If CPUID.(EAX=14H,ECX=0):EBX[3]=1 MTCEn BranchEn and
7872     - * MTCFreq can be set
7873     - */
7874     - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc))
7875     - vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_MTC_EN |
7876     - RTIT_CTL_BRANCH_EN | RTIT_CTL_MTC_RANGE);
7877     -
7878     - /* If CPUID.(EAX=14H,ECX=0):EBX[4]=1 FUPonPTW and PTWEn can be set */
7879     - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_ptwrite))
7880     - vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_FUP_ON_PTW |
7881     - RTIT_CTL_PTW_EN);
7882     -
7883     - /* If CPUID.(EAX=14H,ECX=0):EBX[5]=1 PwrEvEn can be set */
7884     - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_power_event_trace))
7885     - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_PWR_EVT_EN;
7886     -
7887     - /* If CPUID.(EAX=14H,ECX=0):ECX[0]=1 ToPA can be set */
7888     - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_topa_output))
7889     - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_TOPA;
7890     -
7891     - /* If CPUID.(EAX=14H,ECX=0):ECX[3]=1 FabircEn can be set */
7892     - if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_output_subsys))
7893     - vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_FABRIC_EN;
7894     -
7895     - /* unmask address range configure area */
7896     - for (i = 0; i < vmx->pt_desc.addr_range; i++)
7897     - vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4));
7898     -}
7899     -
7900     -static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
7901     -{
7902     - struct vcpu_vmx *vmx = to_vmx(vcpu);
7903     -
7904     - /* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */
7905     - vcpu->arch.xsaves_enabled = false;
7906     -
7907     - if (cpu_has_secondary_exec_ctrls()) {
7908     - vmx_compute_secondary_exec_control(vmx);
7909     - vmcs_set_secondary_exec_control(vmx);
7910     - }
7911     -
7912     - if (nested_vmx_allowed(vcpu))
7913     - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
7914     - FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX |
7915     - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
7916     - else
7917     - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
7918     - ~(FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX |
7919     - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX);
7920     -
7921     - if (nested_vmx_allowed(vcpu)) {
7922     - nested_vmx_cr_fixed1_bits_update(vcpu);
7923     - nested_vmx_entry_exit_ctls_update(vcpu);
7924     - }
7925     -
7926     - if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
7927     - guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT))
7928     - update_intel_pt_cfg(vcpu);
7929     -
7930     - if (boot_cpu_has(X86_FEATURE_RTM)) {
7931     - struct shared_msr_entry *msr;
7932     - msr = find_msr_entry(vmx, MSR_IA32_TSX_CTRL);
7933     - if (msr) {
7934     - bool enabled = guest_cpuid_has(vcpu, X86_FEATURE_RTM);
7935     - vmx_set_guest_msr(vmx, msr, enabled ? 0 : TSX_CTRL_RTM_DISABLE);
7936     - }
7937     - }
7938     -}
7939     -
7940     -static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
7941     -{
7942     - if (func == 1 && nested)
7943     - entry->ecx |= bit(X86_FEATURE_VMX);
7944     -}
7945     -
7946     -static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
7947     -{
7948     - to_vmx(vcpu)->req_immediate_exit = true;
7949     -}
7950     -
7951     -static int vmx_check_intercept(struct kvm_vcpu *vcpu,
7952     - struct x86_instruction_info *info,
7953     - enum x86_intercept_stage stage)
7954     -{
7955     - struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7956     - struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
7957     -
7958     - /*
7959     - * RDPID causes #UD if disabled through secondary execution controls.
7960     - * Because it is marked as EmulateOnUD, we need to intercept it here.
7961     - */
7962     - if (info->intercept == x86_intercept_rdtscp &&
7963     - !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
7964     - ctxt->exception.vector = UD_VECTOR;
7965     - ctxt->exception.error_code_valid = false;
7966     - return X86EMUL_PROPAGATE_FAULT;
7967     - }
7968     -
7969     - /* TODO: check more intercepts... */
7970     - return X86EMUL_CONTINUE;
7971     -}
7972     -
7973     -#ifdef CONFIG_X86_64
7974     -/* (a << shift) / divisor, return 1 if overflow otherwise 0 */
7975     -static inline int u64_shl_div_u64(u64 a, unsigned int shift,
7976     - u64 divisor, u64 *result)
7977     -{
7978     - u64 low = a << shift, high = a >> (64 - shift);
7979     -
7980     - /* To avoid the overflow on divq */
7981     - if (high >= divisor)
7982     - return 1;
7983     -
7984     - /* Low hold the result, high hold rem which is discarded */
7985     - asm("divq %2\n\t" : "=a" (low), "=d" (high) :
7986     - "rm" (divisor), "0" (low), "1" (high));
7987     - *result = low;
7988     -
7989     - return 0;
7990     -}
7991     -
7992     -static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
7993     - bool *expired)
7994     -{
7995     - struct vcpu_vmx *vmx;
7996     - u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
7997     - struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
7998     -
7999     - if (kvm_mwait_in_guest(vcpu->kvm) ||
8000     - kvm_can_post_timer_interrupt(vcpu))
8001     - return -EOPNOTSUPP;
8002     -
8003     - vmx = to_vmx(vcpu);
8004     - tscl = rdtsc();
8005     - guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
8006     - delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
8007     - lapic_timer_advance_cycles = nsec_to_cycles(vcpu,
8008     - ktimer->timer_advance_ns);
8009     -
8010     - if (delta_tsc > lapic_timer_advance_cycles)
8011     - delta_tsc -= lapic_timer_advance_cycles;
8012     - else
8013     - delta_tsc = 0;
8014     -
8015     - /* Convert to host delta tsc if tsc scaling is enabled */
8016     - if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
8017     - delta_tsc && u64_shl_div_u64(delta_tsc,
8018     - kvm_tsc_scaling_ratio_frac_bits,
8019     - vcpu->arch.tsc_scaling_ratio, &delta_tsc))
8020     - return -ERANGE;
8021     -
8022     - /*
8023     - * If the delta tsc can't fit in the 32 bit after the multi shift,
8024     - * we can't use the preemption timer.
8025     - * It's possible that it fits on later vmentries, but checking
8026     - * on every vmentry is costly so we just use an hrtimer.
8027     - */
8028     - if (delta_tsc >> (cpu_preemption_timer_multi + 32))
8029     - return -ERANGE;
8030     -
8031     - vmx->hv_deadline_tsc = tscl + delta_tsc;
8032     - *expired = !delta_tsc;
8033     - return 0;
8034     -}
8035     -
8036     -static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
8037     -{
8038     - to_vmx(vcpu)->hv_deadline_tsc = -1;
8039     -}
8040     -#endif
8041     -
8042     -static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
8043     -{
8044     - if (!kvm_pause_in_guest(vcpu->kvm))
8045     - shrink_ple_window(vcpu);
8046     -}
8047     -
8048     -static void vmx_slot_enable_log_dirty(struct kvm *kvm,
8049     - struct kvm_memory_slot *slot)
8050     -{
8051     - kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
8052     - kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
8053     -}
8054     -
8055     -static void vmx_slot_disable_log_dirty(struct kvm *kvm,
8056     - struct kvm_memory_slot *slot)
8057     -{
8058     - kvm_mmu_slot_set_dirty(kvm, slot);
8059     -}
8060     -
8061     -static void vmx_flush_log_dirty(struct kvm *kvm)
8062     -{
8063     - kvm_flush_pml_buffers(kvm);
8064     -}
8065     -
8066     -static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
8067     -{
8068     - struct vmcs12 *vmcs12;
8069     - struct vcpu_vmx *vmx = to_vmx(vcpu);
8070     - gpa_t gpa, dst;
8071     -
8072     - if (is_guest_mode(vcpu)) {
8073     - WARN_ON_ONCE(vmx->nested.pml_full);
8074     -
8075     - /*
8076     - * Check if PML is enabled for the nested guest.
8077     - * Whether eptp bit 6 is set is already checked
8078     - * as part of A/D emulation.
8079     - */
8080     - vmcs12 = get_vmcs12(vcpu);
8081     - if (!nested_cpu_has_pml(vmcs12))
8082     - return 0;
8083     -
8084     - if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
8085     - vmx->nested.pml_full = true;
8086     - return 1;
8087     - }
8088     -
8089     - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
8090     - dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index;
8091     -
8092     - if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa,
8093     - offset_in_page(dst), sizeof(gpa)))
8094     - return 0;
8095     -
8096     - vmcs12->guest_pml_index--;
8097     - }
8098     -
8099     - return 0;
8100     -}
8101     -
8102     -static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
8103     - struct kvm_memory_slot *memslot,
8104     - gfn_t offset, unsigned long mask)
8105     -{
8106     - kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
8107     -}
8108     -
8109     -static void __pi_post_block(struct kvm_vcpu *vcpu)
8110     -{
8111     - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
8112     - struct pi_desc old, new;
8113     - unsigned int dest;
8114     -
8115     - do {
8116     - old.control = new.control = pi_desc->control;
8117     - WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
8118     - "Wakeup handler not enabled while the VCPU is blocked\n");
8119     -
8120     - dest = cpu_physical_id(vcpu->cpu);
8121     -
8122     - if (x2apic_enabled())
8123     - new.ndst = dest;
8124     - else
8125     - new.ndst = (dest << 8) & 0xFF00;
8126     -
8127     - /* set 'NV' to 'notification vector' */
8128     - new.nv = POSTED_INTR_VECTOR;
8129     - } while (cmpxchg64(&pi_desc->control, old.control,
8130     - new.control) != old.control);
8131     -
8132     - if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
8133     - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
8134     - list_del(&vcpu->blocked_vcpu_list);
8135     - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
8136     - vcpu->pre_pcpu = -1;
8137     - }
8138     -}
8139     -
8140     -/*
8141     - * This routine does the following things for vCPU which is going
8142     - * to be blocked if VT-d PI is enabled.
8143     - * - Store the vCPU to the wakeup list, so when interrupts happen
8144     - * we can find the right vCPU to wake up.
8145     - * - Change the Posted-interrupt descriptor as below:
8146     - * 'NDST' <-- vcpu->pre_pcpu
8147     - * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR
8148     - * - If 'ON' is set during this process, which means at least one
8149     - * interrupt is posted for this vCPU, we cannot block it, in
8150     - * this case, return 1, otherwise, return 0.
8151     - *
8152     - */
8153     -static int pi_pre_block(struct kvm_vcpu *vcpu)
8154     -{
8155     - unsigned int dest;
8156     - struct pi_desc old, new;
8157     - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
8158     -
8159     - if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
8160     - !irq_remapping_cap(IRQ_POSTING_CAP) ||
8161     - !kvm_vcpu_apicv_active(vcpu))
8162     - return 0;
8163     -
8164     - WARN_ON(irqs_disabled());
8165     - local_irq_disable();
8166     - if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
8167     - vcpu->pre_pcpu = vcpu->cpu;
8168     - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
8169     - list_add_tail(&vcpu->blocked_vcpu_list,
8170     - &per_cpu(blocked_vcpu_on_cpu,
8171     - vcpu->pre_pcpu));
8172     - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
8173     - }
8174     -
8175     - do {
8176     - old.control = new.control = pi_desc->control;
8177     -
8178     - WARN((pi_desc->sn == 1),
8179     - "Warning: SN field of posted-interrupts "
8180     - "is set before blocking\n");
8181     -
8182     - /*
8183     - * Since vCPU can be preempted during this process,
8184     - * vcpu->cpu could be different with pre_pcpu, we
8185     - * need to set pre_pcpu as the destination of wakeup
8186     - * notification event, then we can find the right vCPU
8187     - * to wakeup in wakeup handler if interrupts happen
8188     - * when the vCPU is in blocked state.
8189     - */
8190     - dest = cpu_physical_id(vcpu->pre_pcpu);
8191     -
8192     - if (x2apic_enabled())
8193     - new.ndst = dest;
8194     - else
8195     - new.ndst = (dest << 8) & 0xFF00;
8196     -
8197     - /* set 'NV' to 'wakeup vector' */
8198     - new.nv = POSTED_INTR_WAKEUP_VECTOR;
8199     - } while (cmpxchg64(&pi_desc->control, old.control,
8200     - new.control) != old.control);
8201     -
8202     - /* We should not block the vCPU if an interrupt is posted for it. */
8203     - if (pi_test_on(pi_desc) == 1)
8204     - __pi_post_block(vcpu);
8205     -
8206     - local_irq_enable();
8207     - return (vcpu->pre_pcpu == -1);
8208     -}
8209     -
8210     -static int vmx_pre_block(struct kvm_vcpu *vcpu)
8211     -{
8212     - if (pi_pre_block(vcpu))
8213     - return 1;
8214     -
8215     - if (kvm_lapic_hv_timer_in_use(vcpu))
8216     - kvm_lapic_switch_to_sw_timer(vcpu);
8217     -
8218     - return 0;
8219     -}
8220     -
8221     -static void pi_post_block(struct kvm_vcpu *vcpu)
8222     -{
8223     - if (vcpu->pre_pcpu == -1)
8224     - return;
8225     -
8226     - WARN_ON(irqs_disabled());
8227     - local_irq_disable();
8228     - __pi_post_block(vcpu);
8229     - local_irq_enable();
8230     -}
8231     -
8232     -static void vmx_post_block(struct kvm_vcpu *vcpu)
8233     -{
8234     - if (kvm_x86_ops->set_hv_timer)
8235     - kvm_lapic_switch_to_hv_timer(vcpu);
8236     -
8237     - pi_post_block(vcpu);
8238     -}
8239     -
8240     -/*
8241     - * vmx_update_pi_irte - set IRTE for Posted-Interrupts
8242     - *
8243     - * @kvm: kvm
8244     - * @host_irq: host irq of the interrupt
8245     - * @guest_irq: gsi of the interrupt
8246     - * @set: set or unset PI
8247     - * returns 0 on success, < 0 on failure
8248     - */
8249     -static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
8250     - uint32_t guest_irq, bool set)
8251     -{
8252     - struct kvm_kernel_irq_routing_entry *e;
8253     - struct kvm_irq_routing_table *irq_rt;
8254     - struct kvm_lapic_irq irq;
8255     - struct kvm_vcpu *vcpu;
8256     - struct vcpu_data vcpu_info;
8257     - int idx, ret = 0;
8258     -
8259     - if (!kvm_arch_has_assigned_device(kvm) ||
8260     - !irq_remapping_cap(IRQ_POSTING_CAP) ||
8261     - !kvm_vcpu_apicv_active(kvm->vcpus[0]))
8262     - return 0;
8263     -
8264     - idx = srcu_read_lock(&kvm->irq_srcu);
8265     - irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
8266     - if (guest_irq >= irq_rt->nr_rt_entries ||
8267     - hlist_empty(&irq_rt->map[guest_irq])) {
8268     - pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
8269     - guest_irq, irq_rt->nr_rt_entries);
8270     - goto out;
8271     - }
8272     -
8273     - hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
8274     - if (e->type != KVM_IRQ_ROUTING_MSI)
8275     - continue;
8276     - /*
8277     - * VT-d PI cannot support posting multicast/broadcast
8278     - * interrupts to a vCPU, we still use interrupt remapping
8279     - * for these kind of interrupts.
8280     - *
8281     - * For lowest-priority interrupts, we only support
8282     - * those with single CPU as the destination, e.g. user
8283     - * configures the interrupts via /proc/irq or uses
8284     - * irqbalance to make the interrupts single-CPU.
8285     - *
8286     - * We will support full lowest-priority interrupt later.
8287     - *
8288     - * In addition, we can only inject generic interrupts using
8289     - * the PI mechanism, refuse to route others through it.
8290     - */
8291     -
8292     - kvm_set_msi_irq(kvm, e, &irq);
8293     - if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
8294     - !kvm_irq_is_postable(&irq)) {
8295     - /*
8296     - * Make sure the IRTE is in remapped mode if
8297     - * we don't handle it in posted mode.
8298     - */
8299     - ret = irq_set_vcpu_affinity(host_irq, NULL);
8300     - if (ret < 0) {
8301     - printk(KERN_INFO
8302     - "failed to back to remapped mode, irq: %u\n",
8303     - host_irq);
8304     - goto out;
8305     - }
8306     -
8307     - continue;
8308     - }
8309     -
8310     - vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
8311     - vcpu_info.vector = irq.vector;
8312     -
8313     - trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
8314     - vcpu_info.vector, vcpu_info.pi_desc_addr, set);
8315     -
8316     - if (set)
8317     - ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
8318     - else
8319     - ret = irq_set_vcpu_affinity(host_irq, NULL);
8320     -
8321     - if (ret < 0) {
8322     - printk(KERN_INFO "%s: failed to update PI IRTE\n",
8323     - __func__);
8324     - goto out;
8325     - }
8326     - }
8327     -
8328     - ret = 0;
8329     -out:
8330     - srcu_read_unlock(&kvm->irq_srcu, idx);
8331     - return ret;
8332     -}
8333     -
8334     -static void vmx_setup_mce(struct kvm_vcpu *vcpu)
8335     -{
8336     - if (vcpu->arch.mcg_cap & MCG_LMCE_P)
8337     - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
8338     - FEATURE_CONTROL_LMCE;
8339     - else
8340     - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
8341     - ~FEATURE_CONTROL_LMCE;
8342     -}
8343     -
8344     -static int vmx_smi_allowed(struct kvm_vcpu *vcpu)
8345     -{
8346     - /* we need a nested vmexit to enter SMM, postpone if run is pending */
8347     - if (to_vmx(vcpu)->nested.nested_run_pending)
8348     - return 0;
8349     - return 1;
8350     -}
8351     -
8352     -static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
8353     -{
8354     - struct vcpu_vmx *vmx = to_vmx(vcpu);
8355     -
8356     - vmx->nested.smm.guest_mode = is_guest_mode(vcpu);
8357     - if (vmx->nested.smm.guest_mode)
8358     - nested_vmx_vmexit(vcpu, -1, 0, 0);
8359     -
8360     - vmx->nested.smm.vmxon = vmx->nested.vmxon;
8361     - vmx->nested.vmxon = false;
8362     - vmx_clear_hlt(vcpu);
8363     - return 0;
8364     -}
8365     -
8366     -static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
8367     -{
8368     - struct vcpu_vmx *vmx = to_vmx(vcpu);
8369     - int ret;
8370     -
8371     - if (vmx->nested.smm.vmxon) {
8372     - vmx->nested.vmxon = true;
8373     - vmx->nested.smm.vmxon = false;
8374     - }
8375     -
8376     - if (vmx->nested.smm.guest_mode) {
8377     - ret = nested_vmx_enter_non_root_mode(vcpu, false);
8378     - if (ret)
8379     - return ret;
8380     -
8381     - vmx->nested.smm.guest_mode = false;
8382     - }
8383     - return 0;
8384     -}
8385     -
8386     -static int enable_smi_window(struct kvm_vcpu *vcpu)
8387     -{
8388     - return 0;
8389     -}
8390     -
8391     -static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
8392     -{
8393     - return false;
8394     -}
8395     -
8396     -static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
8397     -{
8398     - return to_vmx(vcpu)->nested.vmxon;
8399     -}
8400     -
8401     -static __init int hardware_setup(void)
8402     -{
8403     - unsigned long host_bndcfgs;
8404     - struct desc_ptr dt;
8405     - int r, i;
8406     -
8407     - rdmsrl_safe(MSR_EFER, &host_efer);
8408     -
8409     - store_idt(&dt);
8410     - host_idt_base = dt.address;
8411     -
8412     - for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
8413     - kvm_define_shared_msr(i, vmx_msr_index[i]);
8414     -
8415     - if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
8416     - return -EIO;
8417     -
8418     - if (boot_cpu_has(X86_FEATURE_NX))
8419     - kvm_enable_efer_bits(EFER_NX);
8420     -
8421     - if (boot_cpu_has(X86_FEATURE_MPX)) {
8422     - rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs);
8423     - WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
8424     - }
8425     -
8426     - if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
8427     - !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
8428     - enable_vpid = 0;
8429     -
8430     - if (!cpu_has_vmx_ept() ||
8431     - !cpu_has_vmx_ept_4levels() ||
8432     - !cpu_has_vmx_ept_mt_wb() ||
8433     - !cpu_has_vmx_invept_global())
8434     - enable_ept = 0;
8435     -
8436     - if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
8437     - enable_ept_ad_bits = 0;
8438     -
8439     - if (!cpu_has_vmx_unrestricted_guest() || !enable_ept)
8440     - enable_unrestricted_guest = 0;
8441     -
8442     - if (!cpu_has_vmx_flexpriority())
8443     - flexpriority_enabled = 0;
8444     -
8445     - if (!cpu_has_virtual_nmis())
8446     - enable_vnmi = 0;
8447     -
8448     - /*
8449     - * set_apic_access_page_addr() is used to reload apic access
8450     - * page upon invalidation. No need to do anything if not
8451     - * using the APIC_ACCESS_ADDR VMCS field.
8452     - */
8453     - if (!flexpriority_enabled)
8454     - kvm_x86_ops->set_apic_access_page_addr = NULL;
8455     -
8456     - if (!cpu_has_vmx_tpr_shadow())
8457     - kvm_x86_ops->update_cr8_intercept = NULL;
8458     -
8459     - if (enable_ept && !cpu_has_vmx_ept_2m_page())
8460     - kvm_disable_largepages();
8461     -
8462     -#if IS_ENABLED(CONFIG_HYPERV)
8463     - if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
8464     - && enable_ept) {
8465     - kvm_x86_ops->tlb_remote_flush = hv_remote_flush_tlb;
8466     - kvm_x86_ops->tlb_remote_flush_with_range =
8467     - hv_remote_flush_tlb_with_range;
8468     - }
8469     -#endif
8470     -
8471     - if (!cpu_has_vmx_ple()) {
8472     - ple_gap = 0;
8473     - ple_window = 0;
8474     - ple_window_grow = 0;
8475     - ple_window_max = 0;
8476     - ple_window_shrink = 0;
8477     - }
8478     -
8479     - if (!cpu_has_vmx_apicv()) {
8480     - enable_apicv = 0;
8481     - kvm_x86_ops->sync_pir_to_irr = NULL;
8482     - }
8483     -
8484     - if (cpu_has_vmx_tsc_scaling()) {
8485     - kvm_has_tsc_control = true;
8486     - kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
8487     - kvm_tsc_scaling_ratio_frac_bits = 48;
8488     - }
8489     -
8490     - set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
8491     -
8492     - if (enable_ept)
8493     - vmx_enable_tdp();
8494     - else
8495     - kvm_disable_tdp();
8496     -
8497     - /*
8498     - * Only enable PML when hardware supports PML feature, and both EPT
8499     - * and EPT A/D bit features are enabled -- PML depends on them to work.
8500     - */
8501     - if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
8502     - enable_pml = 0;
8503     -
8504     - if (!enable_pml) {
8505     - kvm_x86_ops->slot_enable_log_dirty = NULL;
8506     - kvm_x86_ops->slot_disable_log_dirty = NULL;
8507     - kvm_x86_ops->flush_log_dirty = NULL;
8508     - kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
8509     - }
8510     -
8511     - if (!cpu_has_vmx_preemption_timer())
8512     - enable_preemption_timer = false;
8513     -
8514     - if (enable_preemption_timer) {
8515     - u64 use_timer_freq = 5000ULL * 1000 * 1000;
8516     - u64 vmx_msr;
8517     -
8518     - rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
8519     - cpu_preemption_timer_multi =
8520     - vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
8521     -
8522     - if (tsc_khz)
8523     - use_timer_freq = (u64)tsc_khz * 1000;
8524     - use_timer_freq >>= cpu_preemption_timer_multi;
8525     -
8526     - /*
8527     - * KVM "disables" the preemption timer by setting it to its max
8528     - * value. Don't use the timer if it might cause spurious exits
8529     - * at a rate faster than 0.1 Hz (of uninterrupted guest time).
8530     - */
8531     - if (use_timer_freq > 0xffffffffu / 10)
8532     - enable_preemption_timer = false;
8533     - }
8534     -
8535     - if (!enable_preemption_timer) {
8536     - kvm_x86_ops->set_hv_timer = NULL;
8537     - kvm_x86_ops->cancel_hv_timer = NULL;
8538     - kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
8539     - }
8540     -
8541     - kvm_set_posted_intr_wakeup_handler(wakeup_handler);
8542     -
8543     - kvm_mce_cap_supported |= MCG_LMCE_P;
8544     -
8545     - if (pt_mode != PT_MODE_SYSTEM && pt_mode != PT_MODE_HOST_GUEST)
8546     - return -EINVAL;
8547     - if (!enable_ept || !cpu_has_vmx_intel_pt())
8548     - pt_mode = PT_MODE_SYSTEM;
8549     -
8550     - if (nested) {
8551     - nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
8552     - vmx_capability.ept, enable_apicv);
8553     -
8554     - r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
8555     - if (r)
8556     - return r;
8557     - }
8558     -
8559     - r = alloc_kvm_area();
8560     - if (r)
8561     - nested_vmx_hardware_unsetup();
8562     - return r;
8563     -}
8564     -
8565     -static __exit void hardware_unsetup(void)
8566     -{
8567     - if (nested)
8568     - nested_vmx_hardware_unsetup();
8569     -
8570     - free_kvm_area();
8571     -}
8572     -
8573     -static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
8574     - .cpu_has_kvm_support = cpu_has_kvm_support,
8575     - .disabled_by_bios = vmx_disabled_by_bios,
8576     - .hardware_setup = hardware_setup,
8577     - .hardware_unsetup = hardware_unsetup,
8578     - .check_processor_compatibility = vmx_check_processor_compat,
8579     - .hardware_enable = hardware_enable,
8580     - .hardware_disable = hardware_disable,
8581     - .cpu_has_accelerated_tpr = report_flexpriority,
8582     - .has_emulated_msr = vmx_has_emulated_msr,
8583     -
8584     - .vm_init = vmx_vm_init,
8585     - .vm_alloc = vmx_vm_alloc,
8586     - .vm_free = vmx_vm_free,
8587     -
8588     - .vcpu_create = vmx_create_vcpu,
8589     - .vcpu_free = vmx_free_vcpu,
8590     - .vcpu_reset = vmx_vcpu_reset,
8591     -
8592     - .prepare_guest_switch = vmx_prepare_switch_to_guest,
8593     - .vcpu_load = vmx_vcpu_load,
8594     - .vcpu_put = vmx_vcpu_put,
8595     -
8596     - .update_bp_intercept = update_exception_bitmap,
8597     - .get_msr_feature = vmx_get_msr_feature,
8598     - .get_msr = vmx_get_msr,
8599     - .set_msr = vmx_set_msr,
8600     - .get_segment_base = vmx_get_segment_base,
8601     - .get_segment = vmx_get_segment,
8602     - .set_segment = vmx_set_segment,
8603     - .get_cpl = vmx_get_cpl,
8604     - .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
8605     - .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
8606     - .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
8607     - .set_cr0 = vmx_set_cr0,
8608     - .set_cr3 = vmx_set_cr3,
8609     - .set_cr4 = vmx_set_cr4,
8610     - .set_efer = vmx_set_efer,
8611     - .get_idt = vmx_get_idt,
8612     - .set_idt = vmx_set_idt,
8613     - .get_gdt = vmx_get_gdt,
8614     - .set_gdt = vmx_set_gdt,
8615     - .get_dr6 = vmx_get_dr6,
8616     - .set_dr6 = vmx_set_dr6,
8617     - .set_dr7 = vmx_set_dr7,
8618     - .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
8619     - .cache_reg = vmx_cache_reg,
8620     - .get_rflags = vmx_get_rflags,
8621     - .set_rflags = vmx_set_rflags,
8622     -
8623     - .tlb_flush = vmx_flush_tlb,
8624     - .tlb_flush_gva = vmx_flush_tlb_gva,
8625     -
8626     - .run = vmx_vcpu_run,
8627     - .handle_exit = vmx_handle_exit,
8628     - .skip_emulated_instruction = skip_emulated_instruction,
8629     - .set_interrupt_shadow = vmx_set_interrupt_shadow,
8630     - .get_interrupt_shadow = vmx_get_interrupt_shadow,
8631     - .patch_hypercall = vmx_patch_hypercall,
8632     - .set_irq = vmx_inject_irq,
8633     - .set_nmi = vmx_inject_nmi,
8634     - .queue_exception = vmx_queue_exception,
8635     - .cancel_injection = vmx_cancel_injection,
8636     - .interrupt_allowed = vmx_interrupt_allowed,
8637     - .nmi_allowed = vmx_nmi_allowed,
8638     - .get_nmi_mask = vmx_get_nmi_mask,
8639     - .set_nmi_mask = vmx_set_nmi_mask,
8640     - .enable_nmi_window = enable_nmi_window,
8641     - .enable_irq_window = enable_irq_window,
8642     - .update_cr8_intercept = update_cr8_intercept,
8643     - .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
8644     - .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
8645     - .get_enable_apicv = vmx_get_enable_apicv,
8646     - .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
8647     - .load_eoi_exitmap = vmx_load_eoi_exitmap,
8648     - .apicv_post_state_restore = vmx_apicv_post_state_restore,
8649     - .hwapic_irr_update = vmx_hwapic_irr_update,
8650     - .hwapic_isr_update = vmx_hwapic_isr_update,
8651     - .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
8652     - .sync_pir_to_irr = vmx_sync_pir_to_irr,
8653     - .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
8654     - .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt,
8655     -
8656     - .set_tss_addr = vmx_set_tss_addr,
8657     - .set_identity_map_addr = vmx_set_identity_map_addr,
8658     - .get_tdp_level = get_ept_level,
8659     - .get_mt_mask = vmx_get_mt_mask,
8660     -
8661     - .get_exit_info = vmx_get_exit_info,
8662     -
8663     - .get_lpage_level = vmx_get_lpage_level,
8664     -
8665     - .cpuid_update = vmx_cpuid_update,
8666     -
8667     - .rdtscp_supported = vmx_rdtscp_supported,
8668     - .invpcid_supported = vmx_invpcid_supported,
8669     -
8670     - .set_supported_cpuid = vmx_set_supported_cpuid,
8671     -
8672     - .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
8673     -
8674     - .read_l1_tsc_offset = vmx_read_l1_tsc_offset,
8675     - .write_l1_tsc_offset = vmx_write_l1_tsc_offset,
8676     -
8677     - .set_tdp_cr3 = vmx_set_cr3,
8678     -
8679     - .check_intercept = vmx_check_intercept,
8680     - .handle_exit_irqoff = vmx_handle_exit_irqoff,
8681     - .mpx_supported = vmx_mpx_supported,
8682     - .xsaves_supported = vmx_xsaves_supported,
8683     - .umip_emulated = vmx_umip_emulated,
8684     - .pt_supported = vmx_pt_supported,
8685     -
8686     - .request_immediate_exit = vmx_request_immediate_exit,
8687     -
8688     - .sched_in = vmx_sched_in,
8689     -
8690     - .slot_enable_log_dirty = vmx_slot_enable_log_dirty,
8691     - .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
8692     - .flush_log_dirty = vmx_flush_log_dirty,
8693     - .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
8694     - .write_log_dirty = vmx_write_pml_buffer,
8695     -
8696     - .pre_block = vmx_pre_block,
8697     - .post_block = vmx_post_block,
8698     -
8699     - .pmu_ops = &intel_pmu_ops,
8700     -
8701     - .update_pi_irte = vmx_update_pi_irte,
8702     -
8703     -#ifdef CONFIG_X86_64
8704     - .set_hv_timer = vmx_set_hv_timer,
8705     - .cancel_hv_timer = vmx_cancel_hv_timer,
8706     -#endif
8707     -
8708     - .setup_mce = vmx_setup_mce,
8709     -
8710     - .smi_allowed = vmx_smi_allowed,
8711     - .pre_enter_smm = vmx_pre_enter_smm,
8712     - .pre_leave_smm = vmx_pre_leave_smm,
8713     - .enable_smi_window = enable_smi_window,
8714     -
8715     - .check_nested_events = NULL,
8716     - .get_nested_state = NULL,
8717     - .set_nested_state = NULL,
8718     - .get_vmcs12_pages = NULL,
8719     - .nested_enable_evmcs = NULL,
8720     - .nested_get_evmcs_version = NULL,
8721     - .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
8722     - .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
8723     -};
8724     -
8725     -static void vmx_cleanup_l1d_flush(void)
8726     -{
8727     - if (vmx_l1d_flush_pages) {
8728     - free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
8729     - vmx_l1d_flush_pages = NULL;
8730     - }
8731     - /* Restore state so sysfs ignores VMX */
8732     - l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
8733     -}
8734     -
8735     -static void vmx_exit(void)
8736     -{
8737     -#ifdef CONFIG_KEXEC_CORE
8738     - RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
8739     - synchronize_rcu();
8740     -#endif
8741     -
8742     - kvm_exit();
8743     -
8744     -#if IS_ENABLED(CONFIG_HYPERV)
8745     - if (static_branch_unlikely(&enable_evmcs)) {
8746     - int cpu;
8747     - struct hv_vp_assist_page *vp_ap;
8748     - /*
8749     - * Reset everything to support using non-enlightened VMCS
8750     - * access later (e.g. when we reload the module with
8751     - * enlightened_vmcs=0)
8752     - */
8753     - for_each_online_cpu(cpu) {
8754     - vp_ap = hv_get_vp_assist_page(cpu);
8755     -
8756     - if (!vp_ap)
8757     - continue;
8758     -
8759     - vp_ap->nested_control.features.directhypercall = 0;
8760     - vp_ap->current_nested_vmcs = 0;
8761     - vp_ap->enlighten_vmentry = 0;
8762     - }
8763     -
8764     - static_branch_disable(&enable_evmcs);
8765     - }
8766     -#endif
8767     - vmx_cleanup_l1d_flush();
8768     -}
8769     -module_exit(vmx_exit);
8770     -
8771     -static int __init vmx_init(void)
8772     -{
8773     - int r;
8774     -
8775     -#if IS_ENABLED(CONFIG_HYPERV)
8776     - /*
8777     - * Enlightened VMCS usage should be recommended and the host needs
8778     - * to support eVMCS v1 or above. We can also disable eVMCS support
8779     - * with module parameter.
8780     - */
8781     - if (enlightened_vmcs &&
8782     - ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED &&
8783     - (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >=
8784     - KVM_EVMCS_VERSION) {
8785     - int cpu;
8786     -
8787     - /* Check that we have assist pages on all online CPUs */
8788     - for_each_online_cpu(cpu) {
8789     - if (!hv_get_vp_assist_page(cpu)) {
8790     - enlightened_vmcs = false;
8791     - break;
8792     - }
8793     - }
8794     -
8795     - if (enlightened_vmcs) {
8796     - pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
8797     - static_branch_enable(&enable_evmcs);
8798     - }
8799     -
8800     - if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
8801     - vmx_x86_ops.enable_direct_tlbflush
8802     - = hv_enable_direct_tlbflush;
8803     -
8804     - } else {
8805     - enlightened_vmcs = false;
8806     - }
8807     -#endif
8808     -
8809     - r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
8810     - __alignof__(struct vcpu_vmx), THIS_MODULE);
8811     - if (r)
8812     - return r;
8813     -
8814     - /*
8815     - * Must be called after kvm_init() so enable_ept is properly set
8816     - * up. Hand the parameter mitigation value in which was stored in
8817     - * the pre module init parser. If no parameter was given, it will
8818     - * contain 'auto' which will be turned into the default 'cond'
8819     - * mitigation mode.
8820     - */
8821     - r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
8822     - if (r) {
8823     - vmx_exit();
8824     - return r;
8825     - }
8826     -
8827     -#ifdef CONFIG_KEXEC_CORE
8828     - rcu_assign_pointer(crash_vmclear_loaded_vmcss,
8829     - crash_vmclear_local_loaded_vmcss);
8830     -#endif
8831     - vmx_check_vmcs12_offsets();
8832     -
8833     - return 0;
8834     -}
8835     -module_init(vmx_init);
8836     diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
8837     index 0f7eb4f5bdb7..82e105b284e0 100644
8838     --- a/arch/x86/lib/x86-opcode-map.txt
8839     +++ b/arch/x86/lib/x86-opcode-map.txt
8840     @@ -909,7 +909,7 @@ EndTable
8841    
8842     GrpTable: Grp3_2
8843     0: TEST Ev,Iz
8844     -1:
8845     +1: TEST Ev,Iz
8846     2: NOT Ev
8847     3: NEG Ev
8848     4: MUL rAX,Ev
8849     diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
8850     index b6669d326545..f08abdf8bb67 100644
8851     --- a/arch/x86/platform/efi/efi.c
8852     +++ b/arch/x86/platform/efi/efi.c
8853     @@ -478,7 +478,6 @@ void __init efi_init(void)
8854     efi_char16_t *c16;
8855     char vendor[100] = "unknown";
8856     int i = 0;
8857     - void *tmp;
8858    
8859     #ifdef CONFIG_X86_32
8860     if (boot_params.efi_info.efi_systab_hi ||
8861     @@ -503,14 +502,16 @@ void __init efi_init(void)
8862     /*
8863     * Show what we know for posterity
8864     */
8865     - c16 = tmp = early_memremap(efi.systab->fw_vendor, 2);
8866     + c16 = early_memremap_ro(efi.systab->fw_vendor,
8867     + sizeof(vendor) * sizeof(efi_char16_t));
8868     if (c16) {
8869     - for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
8870     - vendor[i] = *c16++;
8871     + for (i = 0; i < sizeof(vendor) - 1 && c16[i]; ++i)
8872     + vendor[i] = c16[i];
8873     vendor[i] = '\0';
8874     - } else
8875     + early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t));
8876     + } else {
8877     pr_err("Could not map the firmware vendor!\n");
8878     - early_memunmap(tmp, 2);
8879     + }
8880    
8881     pr_info("EFI v%u.%.02u by %s\n",
8882     efi.systab->hdr.revision >> 16,
8883     diff --git a/drivers/acpi/acpica/dsfield.c b/drivers/acpi/acpica/dsfield.c
8884     index 6a4b603d0e83..10bbf6ca082a 100644
8885     --- a/drivers/acpi/acpica/dsfield.c
8886     +++ b/drivers/acpi/acpica/dsfield.c
8887     @@ -272,7 +272,7 @@ cleanup:
8888     * FUNCTION: acpi_ds_get_field_names
8889     *
8890     * PARAMETERS: info - create_field info structure
8891     - * ` walk_state - Current method state
8892     + * walk_state - Current method state
8893     * arg - First parser arg for the field name list
8894     *
8895     * RETURN: Status
8896     diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c
8897     index fd34040d4f44..9c41d2153d0f 100644
8898     --- a/drivers/acpi/acpica/dswload.c
8899     +++ b/drivers/acpi/acpica/dswload.c
8900     @@ -440,6 +440,27 @@ acpi_status acpi_ds_load1_end_op(struct acpi_walk_state *walk_state)
8901     ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, "Op=%p State=%p\n", op,
8902     walk_state));
8903    
8904     + /*
8905     + * Disassembler: handle create field operators here.
8906     + *
8907     + * create_buffer_field is a deferred op that is typically processed in load
8908     + * pass 2. However, disassembly of control method contents walk the parse
8909     + * tree with ACPI_PARSE_LOAD_PASS1 and AML_CREATE operators are processed
8910     + * in a later walk. This is a problem when there is a control method that
8911     + * has the same name as the AML_CREATE object. In this case, any use of the
8912     + * name segment will be detected as a method call rather than a reference
8913     + * to a buffer field.
8914     + *
8915     + * This earlier creation during disassembly solves this issue by inserting
8916     + * the named object in the ACPI namespace so that references to this name
8917     + * would be a name string rather than a method call.
8918     + */
8919     + if ((walk_state->parse_flags & ACPI_PARSE_DISASSEMBLE) &&
8920     + (walk_state->op_info->flags & AML_CREATE)) {
8921     + status = acpi_ds_create_buffer_field(op, walk_state);
8922     + return_ACPI_STATUS(status);
8923     + }
8924     +
8925     /* We are only interested in opcodes that have an associated name */
8926    
8927     if (!(walk_state->op_info->flags & (AML_NAMED | AML_FIELD))) {
8928     diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
8929     index 5408a292078b..89e62043d02e 100644
8930     --- a/drivers/ata/ahci.c
8931     +++ b/drivers/ata/ahci.c
8932     @@ -86,6 +86,7 @@ enum board_ids {
8933    
8934     static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
8935     static void ahci_remove_one(struct pci_dev *dev);
8936     +static void ahci_shutdown_one(struct pci_dev *dev);
8937     static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class,
8938     unsigned long deadline);
8939     static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class,
8940     @@ -582,6 +583,7 @@ static struct pci_driver ahci_pci_driver = {
8941     .id_table = ahci_pci_tbl,
8942     .probe = ahci_init_one,
8943     .remove = ahci_remove_one,
8944     + .shutdown = ahci_shutdown_one,
8945     .driver = {
8946     .pm = &ahci_pci_pm_ops,
8947     },
8948     @@ -1775,6 +1777,11 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
8949     return 0;
8950     }
8951    
8952     +static void ahci_shutdown_one(struct pci_dev *pdev)
8953     +{
8954     + ata_pci_shutdown_one(pdev);
8955     +}
8956     +
8957     static void ahci_remove_one(struct pci_dev *pdev)
8958     {
8959     pm_runtime_get_noresume(&pdev->dev);
8960     diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
8961     index b1582f161171..ba0cffbd0bb6 100644
8962     --- a/drivers/ata/libata-core.c
8963     +++ b/drivers/ata/libata-core.c
8964     @@ -6580,6 +6580,26 @@ void ata_pci_remove_one(struct pci_dev *pdev)
8965     ata_host_detach(host);
8966     }
8967    
8968     +void ata_pci_shutdown_one(struct pci_dev *pdev)
8969     +{
8970     + struct ata_host *host = pci_get_drvdata(pdev);
8971     + int i;
8972     +
8973     + for (i = 0; i < host->n_ports; i++) {
8974     + struct ata_port *ap = host->ports[i];
8975     +
8976     + ap->pflags |= ATA_PFLAG_FROZEN;
8977     +
8978     + /* Disable port interrupts */
8979     + if (ap->ops->freeze)
8980     + ap->ops->freeze(ap);
8981     +
8982     + /* Stop the port DMA engines */
8983     + if (ap->ops->port_stop)
8984     + ap->ops->port_stop(ap);
8985     + }
8986     +}
8987     +
8988     /* move to PCI subsystem */
8989     int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits)
8990     {
8991     @@ -7200,6 +7220,7 @@ EXPORT_SYMBOL_GPL(ata_timing_cycle2mode);
8992    
8993     #ifdef CONFIG_PCI
8994     EXPORT_SYMBOL_GPL(pci_test_config_bits);
8995     +EXPORT_SYMBOL_GPL(ata_pci_shutdown_one);
8996     EXPORT_SYMBOL_GPL(ata_pci_remove_one);
8997     #ifdef CONFIG_PM
8998     EXPORT_SYMBOL_GPL(ata_pci_device_do_suspend);
8999     diff --git a/drivers/base/dd.c b/drivers/base/dd.c
9000     index ee25a69630c3..854d218ea76a 100644
9001     --- a/drivers/base/dd.c
9002     +++ b/drivers/base/dd.c
9003     @@ -341,7 +341,10 @@ static int really_probe(struct device *dev, struct device_driver *drv)
9004     atomic_inc(&probe_count);
9005     pr_debug("bus: '%s': %s: probing driver %s with device %s\n",
9006     drv->bus->name, __func__, drv->name, dev_name(dev));
9007     - WARN_ON(!list_empty(&dev->devres_head));
9008     + if (!list_empty(&dev->devres_head)) {
9009     + dev_crit(dev, "Resources present before probing\n");
9010     + return -EBUSY;
9011     + }
9012    
9013     re_probe:
9014     dev->driver = drv;
9015     diff --git a/drivers/base/platform.c b/drivers/base/platform.c
9016     index f90b1b9bbad0..bef299ef6227 100644
9017     --- a/drivers/base/platform.c
9018     +++ b/drivers/base/platform.c
9019     @@ -28,6 +28,7 @@
9020     #include <linux/limits.h>
9021     #include <linux/property.h>
9022     #include <linux/kmemleak.h>
9023     +#include <linux/types.h>
9024    
9025     #include "base.h"
9026     #include "power/power.h"
9027     @@ -68,7 +69,7 @@ void __weak arch_setup_pdev_archdata(struct platform_device *pdev)
9028     struct resource *platform_get_resource(struct platform_device *dev,
9029     unsigned int type, unsigned int num)
9030     {
9031     - int i;
9032     + u32 i;
9033    
9034     for (i = 0; i < dev->num_resources; i++) {
9035     struct resource *r = &dev->resource[i];
9036     @@ -153,7 +154,7 @@ struct resource *platform_get_resource_byname(struct platform_device *dev,
9037     unsigned int type,
9038     const char *name)
9039     {
9040     - int i;
9041     + u32 i;
9042    
9043     for (i = 0; i < dev->num_resources; i++) {
9044     struct resource *r = &dev->resource[i];
9045     @@ -350,7 +351,8 @@ EXPORT_SYMBOL_GPL(platform_device_add_properties);
9046     */
9047     int platform_device_add(struct platform_device *pdev)
9048     {
9049     - int i, ret;
9050     + u32 i;
9051     + int ret;
9052    
9053     if (!pdev)
9054     return -EINVAL;
9055     @@ -416,7 +418,7 @@ int platform_device_add(struct platform_device *pdev)
9056     pdev->id = PLATFORM_DEVID_AUTO;
9057     }
9058    
9059     - while (--i >= 0) {
9060     + while (i--) {
9061     struct resource *r = &pdev->resource[i];
9062     if (r->parent)
9063     release_resource(r);
9064     @@ -437,7 +439,7 @@ EXPORT_SYMBOL_GPL(platform_device_add);
9065     */
9066     void platform_device_del(struct platform_device *pdev)
9067     {
9068     - int i;
9069     + u32 i;
9070    
9071     if (pdev) {
9072     device_remove_properties(&pdev->dev);
9073     diff --git a/drivers/block/brd.c b/drivers/block/brd.c
9074     index 0c76d4016eeb..7e35574a17df 100644
9075     --- a/drivers/block/brd.c
9076     +++ b/drivers/block/brd.c
9077     @@ -581,6 +581,25 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
9078     return kobj;
9079     }
9080    
9081     +static inline void brd_check_and_reset_par(void)
9082     +{
9083     + if (unlikely(!max_part))
9084     + max_part = 1;
9085     +
9086     + /*
9087     + * make sure 'max_part' can be divided exactly by (1U << MINORBITS),
9088     + * otherwise, it is possiable to get same dev_t when adding partitions.
9089     + */
9090     + if ((1U << MINORBITS) % max_part != 0)
9091     + max_part = 1UL << fls(max_part);
9092     +
9093     + if (max_part > DISK_MAX_PARTS) {
9094     + pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n",
9095     + DISK_MAX_PARTS, DISK_MAX_PARTS);
9096     + max_part = DISK_MAX_PARTS;
9097     + }
9098     +}
9099     +
9100     static int __init brd_init(void)
9101     {
9102     struct brd_device *brd, *next;
9103     @@ -604,8 +623,7 @@ static int __init brd_init(void)
9104     if (register_blkdev(RAMDISK_MAJOR, "ramdisk"))
9105     return -EIO;
9106    
9107     - if (unlikely(!max_part))
9108     - max_part = 1;
9109     + brd_check_and_reset_par();
9110    
9111     for (i = 0; i < rd_nr; i++) {
9112     brd = brd_alloc(i);
9113     diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
9114     index ece4f706b38f..4496e7a49235 100644
9115     --- a/drivers/block/floppy.c
9116     +++ b/drivers/block/floppy.c
9117     @@ -848,14 +848,17 @@ static void reset_fdc_info(int mode)
9118     /* selects the fdc and drive, and enables the fdc's input/dma. */
9119     static void set_fdc(int drive)
9120     {
9121     + unsigned int new_fdc = fdc;
9122     +
9123     if (drive >= 0 && drive < N_DRIVE) {
9124     - fdc = FDC(drive);
9125     + new_fdc = FDC(drive);
9126     current_drive = drive;
9127     }
9128     - if (fdc != 1 && fdc != 0) {
9129     + if (new_fdc >= N_FDC) {
9130     pr_info("bad fdc value\n");
9131     return;
9132     }
9133     + fdc = new_fdc;
9134     set_dor(fdc, ~0, 8);
9135     #if N_FDC > 1
9136     set_dor(1 - fdc, ~8, 0);
9137     diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c
9138     index 0ae1b0a66eb5..d8601b138dc1 100644
9139     --- a/drivers/clk/qcom/clk-rcg2.c
9140     +++ b/drivers/clk/qcom/clk-rcg2.c
9141     @@ -194,6 +194,9 @@ static int _freq_tbl_determine_rate(struct clk_hw *hw,
9142    
9143     clk_flags = clk_hw_get_flags(hw);
9144     p = clk_hw_get_parent_by_index(hw, index);
9145     + if (!p)
9146     + return -EINVAL;
9147     +
9148     if (clk_flags & CLK_SET_RATE_PARENT) {
9149     if (f->pre_div) {
9150     if (!rate)
9151     diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
9152     index 41254e702f1e..2ce7cc94d78b 100644
9153     --- a/drivers/devfreq/Kconfig
9154     +++ b/drivers/devfreq/Kconfig
9155     @@ -102,7 +102,8 @@ config ARM_TEGRA_DEVFREQ
9156    
9157     config ARM_RK3399_DMC_DEVFREQ
9158     tristate "ARM RK3399 DMC DEVFREQ Driver"
9159     - depends on ARCH_ROCKCHIP
9160     + depends on (ARCH_ROCKCHIP && HAVE_ARM_SMCCC) || \
9161     + (COMPILE_TEST && HAVE_ARM_SMCCC)
9162     select DEVFREQ_EVENT_ROCKCHIP_DFI
9163     select DEVFREQ_GOV_SIMPLE_ONDEMAND
9164     select PM_DEVFREQ_EVENT
9165     diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig
9166     index cd949800eed9..8851bc4e8e3e 100644
9167     --- a/drivers/devfreq/event/Kconfig
9168     +++ b/drivers/devfreq/event/Kconfig
9169     @@ -33,7 +33,7 @@ config DEVFREQ_EVENT_EXYNOS_PPMU
9170    
9171     config DEVFREQ_EVENT_ROCKCHIP_DFI
9172     tristate "ROCKCHIP DFI DEVFREQ event Driver"
9173     - depends on ARCH_ROCKCHIP
9174     + depends on ARCH_ROCKCHIP || COMPILE_TEST
9175     help
9176     This add the devfreq-event driver for Rockchip SoC. It provides DFI
9177     (DDR Monitor Module) driver to count ddr load.
9178     diff --git a/drivers/gpio/gpio-grgpio.c b/drivers/gpio/gpio-grgpio.c
9179     index 7847dd34f86f..036a78b70427 100644
9180     --- a/drivers/gpio/gpio-grgpio.c
9181     +++ b/drivers/gpio/gpio-grgpio.c
9182     @@ -259,17 +259,16 @@ static int grgpio_irq_map(struct irq_domain *d, unsigned int irq,
9183     lirq->irq = irq;
9184     uirq = &priv->uirqs[lirq->index];
9185     if (uirq->refcnt == 0) {
9186     + spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
9187     ret = request_irq(uirq->uirq, grgpio_irq_handler, 0,
9188     dev_name(priv->dev), priv);
9189     if (ret) {
9190     dev_err(priv->dev,
9191     "Could not request underlying irq %d\n",
9192     uirq->uirq);
9193     -
9194     - spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
9195     -
9196     return ret;
9197     }
9198     + spin_lock_irqsave(&priv->gc.bgpio_lock, flags);
9199     }
9200     uirq->refcnt++;
9201    
9202     @@ -315,8 +314,11 @@ static void grgpio_irq_unmap(struct irq_domain *d, unsigned int irq)
9203     if (index >= 0) {
9204     uirq = &priv->uirqs[lirq->index];
9205     uirq->refcnt--;
9206     - if (uirq->refcnt == 0)
9207     + if (uirq->refcnt == 0) {
9208     + spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
9209     free_irq(uirq->uirq, priv);
9210     + return;
9211     + }
9212     }
9213    
9214     spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags);
9215     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
9216     index 26afdffab5a0..ac8885562919 100644
9217     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
9218     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
9219     @@ -336,17 +336,9 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *
9220     path_size += le16_to_cpu(path->usSize);
9221    
9222     if (device_support & le16_to_cpu(path->usDeviceTag)) {
9223     - uint8_t con_obj_id, con_obj_num, con_obj_type;
9224     -
9225     - con_obj_id =
9226     + uint8_t con_obj_id =
9227     (le16_to_cpu(path->usConnObjectId) & OBJECT_ID_MASK)
9228     >> OBJECT_ID_SHIFT;
9229     - con_obj_num =
9230     - (le16_to_cpu(path->usConnObjectId) & ENUM_ID_MASK)
9231     - >> ENUM_ID_SHIFT;
9232     - con_obj_type =
9233     - (le16_to_cpu(path->usConnObjectId) &
9234     - OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
9235    
9236     /* Skip TV/CV support */
9237     if ((le16_to_cpu(path->usDeviceTag) ==
9238     @@ -371,14 +363,7 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device *
9239     router.ddc_valid = false;
9240     router.cd_valid = false;
9241     for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2); j++) {
9242     - uint8_t grph_obj_id, grph_obj_num, grph_obj_type;
9243     -
9244     - grph_obj_id =
9245     - (le16_to_cpu(path->usGraphicObjIds[j]) &
9246     - OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
9247     - grph_obj_num =
9248     - (le16_to_cpu(path->usGraphicObjIds[j]) &
9249     - ENUM_ID_MASK) >> ENUM_ID_SHIFT;
9250     + uint8_t grph_obj_type=
9251     grph_obj_type =
9252     (le16_to_cpu(path->usGraphicObjIds[j]) &
9253     OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
9254     diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
9255     index 3a44e705db53..d224fc12b757 100644
9256     --- a/drivers/gpu/drm/gma500/framebuffer.c
9257     +++ b/drivers/gpu/drm/gma500/framebuffer.c
9258     @@ -516,6 +516,7 @@ static int psbfb_probe(struct drm_fb_helper *helper,
9259     container_of(helper, struct psb_fbdev, psb_fb_helper);
9260     struct drm_device *dev = psb_fbdev->psb_fb_helper.dev;
9261     struct drm_psb_private *dev_priv = dev->dev_private;
9262     + unsigned int fb_size;
9263     int bytespp;
9264    
9265     bytespp = sizes->surface_bpp / 8;
9266     @@ -525,8 +526,11 @@ static int psbfb_probe(struct drm_fb_helper *helper,
9267     /* If the mode will not fit in 32bit then switch to 16bit to get
9268     a console on full resolution. The X mode setting server will
9269     allocate its own 32bit GEM framebuffer */
9270     - if (ALIGN(sizes->fb_width * bytespp, 64) * sizes->fb_height >
9271     - dev_priv->vram_stolen_size) {
9272     + fb_size = ALIGN(sizes->surface_width * bytespp, 64) *
9273     + sizes->surface_height;
9274     + fb_size = ALIGN(fb_size, PAGE_SIZE);
9275     +
9276     + if (fb_size > dev_priv->vram_stolen_size) {
9277     sizes->surface_bpp = 16;
9278     sizes->surface_depth = 16;
9279     }
9280     diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
9281     index 01a21dd835b5..1ed60da76a0c 100644
9282     --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
9283     +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
9284     @@ -306,6 +306,7 @@ err_pm_runtime_put:
9285     static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc)
9286     {
9287     struct drm_device *drm = mtk_crtc->base.dev;
9288     + struct drm_crtc *crtc = &mtk_crtc->base;
9289     int i;
9290    
9291     DRM_DEBUG_DRIVER("%s\n", __func__);
9292     @@ -327,6 +328,13 @@ static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc)
9293     mtk_disp_mutex_unprepare(mtk_crtc->mutex);
9294    
9295     pm_runtime_put(drm->dev);
9296     +
9297     + if (crtc->state->event && !crtc->state->active) {
9298     + spin_lock_irq(&crtc->dev->event_lock);
9299     + drm_crtc_send_vblank_event(crtc, crtc->state->event);
9300     + crtc->state->event = NULL;
9301     + spin_unlock_irq(&crtc->dev->event_lock);
9302     + }
9303     }
9304    
9305     static void mtk_drm_crtc_enable(struct drm_crtc *crtc)
9306     diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
9307     index 4bb9ab892ae1..78e521d00251 100644
9308     --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
9309     +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
9310     @@ -158,7 +158,7 @@ nouveau_fence_wait_uevent_handler(struct nvif_notify *notify)
9311    
9312     fence = list_entry(fctx->pending.next, typeof(*fence), head);
9313     chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock));
9314     - if (nouveau_fence_update(fence->channel, fctx))
9315     + if (nouveau_fence_update(chan, fctx))
9316     ret = NVIF_NOTIFY_DROP;
9317     }
9318     spin_unlock_irqrestore(&fctx->lock, flags);
9319     diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c
9320     index 9d90d8b4b7e6..f5a8db1bb8b7 100644
9321     --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c
9322     +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c
9323     @@ -72,6 +72,8 @@ nv50_disp_chan_mthd(struct nv50_disp_chan *chan, int debug)
9324    
9325     if (debug > subdev->debug)
9326     return;
9327     + if (!mthd)
9328     + return;
9329    
9330     for (i = 0; (list = mthd->data[i].mthd) != NULL; i++) {
9331     u32 base = chan->head * mthd->addr;
9332     diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
9333     index de8b806b88fd..7618b2eb4fdf 100644
9334     --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
9335     +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
9336     @@ -143,23 +143,24 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *fw_name,
9337    
9338     nent = (fuc.size / sizeof(struct gk20a_fw_av));
9339    
9340     - pack = vzalloc((sizeof(*pack) * max_classes) +
9341     - (sizeof(*init) * (nent + 1)));
9342     + pack = vzalloc((sizeof(*pack) * (max_classes + 1)) +
9343     + (sizeof(*init) * (nent + max_classes + 1)));
9344     if (!pack) {
9345     ret = -ENOMEM;
9346     goto end;
9347     }
9348    
9349     - init = (void *)(pack + max_classes);
9350     + init = (void *)(pack + max_classes + 1);
9351    
9352     - for (i = 0; i < nent; i++) {
9353     - struct gf100_gr_init *ent = &init[i];
9354     + for (i = 0; i < nent; i++, init++) {
9355     struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc.data)[i];
9356     u32 class = av->addr & 0xffff;
9357     u32 addr = (av->addr & 0xffff0000) >> 14;
9358    
9359     if (prevclass != class) {
9360     - pack[classidx].init = ent;
9361     + if (prevclass) /* Add terminator to the method list. */
9362     + init++;
9363     + pack[classidx].init = init;
9364     pack[classidx].type = class;
9365     prevclass = class;
9366     if (++classidx >= max_classes) {
9367     @@ -169,10 +170,10 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *fw_name,
9368     }
9369     }
9370    
9371     - ent->addr = addr;
9372     - ent->data = av->data;
9373     - ent->count = 1;
9374     - ent->pitch = 1;
9375     + init->addr = addr;
9376     + init->data = av->data;
9377     + init->count = 1;
9378     + init->pitch = 1;
9379     }
9380    
9381     *ppack = pack;
9382     diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
9383     index 8b6f8aa23806..432ad7d73cb9 100644
9384     --- a/drivers/gpu/drm/radeon/radeon_display.c
9385     +++ b/drivers/gpu/drm/radeon/radeon_display.c
9386     @@ -110,6 +110,8 @@ static void dce5_crtc_load_lut(struct drm_crtc *crtc)
9387    
9388     DRM_DEBUG_KMS("%d\n", radeon_crtc->crtc_id);
9389    
9390     + msleep(10);
9391     +
9392     WREG32(NI_INPUT_CSC_CONTROL + radeon_crtc->crtc_offset,
9393     (NI_INPUT_CSC_GRPH_MODE(NI_INPUT_CSC_BYPASS) |
9394     NI_INPUT_CSC_OVL_MODE(NI_INPUT_CSC_BYPASS)));
9395     diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
9396     index 1f013d45c9e9..0c7c3005594c 100644
9397     --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
9398     +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c
9399     @@ -210,8 +210,10 @@ int vmw_cmdbuf_res_add(struct vmw_cmdbuf_res_manager *man,
9400    
9401     cres->hash.key = user_key | (res_type << 24);
9402     ret = drm_ht_insert_item(&man->resources, &cres->hash);
9403     - if (unlikely(ret != 0))
9404     + if (unlikely(ret != 0)) {
9405     + kfree(cres);
9406     goto out_invalid_key;
9407     + }
9408    
9409     cres->state = VMW_CMDBUF_RES_ADD;
9410     cres->res = vmw_resource_reference(res);
9411     diff --git a/drivers/hwmon/pmbus/ltc2978.c b/drivers/hwmon/pmbus/ltc2978.c
9412     index 58b789c28b48..94eea2ac6251 100644
9413     --- a/drivers/hwmon/pmbus/ltc2978.c
9414     +++ b/drivers/hwmon/pmbus/ltc2978.c
9415     @@ -89,8 +89,8 @@ enum chips { ltc2974, ltc2975, ltc2977, ltc2978, ltc2980, ltc3880, ltc3882,
9416    
9417     #define LTC_POLL_TIMEOUT 100 /* in milli-seconds */
9418    
9419     -#define LTC_NOT_BUSY BIT(5)
9420     -#define LTC_NOT_PENDING BIT(4)
9421     +#define LTC_NOT_BUSY BIT(6)
9422     +#define LTC_NOT_PENDING BIT(5)
9423    
9424     /*
9425     * LTC2978 clears peak data whenever the CLEAR_FAULTS command is executed, which
9426     diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c
9427     index b127ed60c733..9dde8390da09 100644
9428     --- a/drivers/ide/cmd64x.c
9429     +++ b/drivers/ide/cmd64x.c
9430     @@ -65,6 +65,9 @@ static void cmd64x_program_timings(ide_drive_t *drive, u8 mode)
9431     struct ide_timing t;
9432     u8 arttim = 0;
9433    
9434     + if (drive->dn >= ARRAY_SIZE(drwtim_regs))
9435     + return;
9436     +
9437     ide_timing_compute(drive, mode, &t, T, 0);
9438    
9439     /*
9440     diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c
9441     index a97affca18ab..0f57d45484d1 100644
9442     --- a/drivers/ide/serverworks.c
9443     +++ b/drivers/ide/serverworks.c
9444     @@ -114,6 +114,9 @@ static void svwks_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
9445     struct pci_dev *dev = to_pci_dev(hwif->dev);
9446     const u8 pio = drive->pio_mode - XFER_PIO_0;
9447    
9448     + if (drive->dn >= ARRAY_SIZE(drive_pci))
9449     + return;
9450     +
9451     pci_write_config_byte(dev, drive_pci[drive->dn], pio_modes[pio]);
9452    
9453     if (svwks_csb_check(dev)) {
9454     @@ -140,6 +143,9 @@ static void svwks_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
9455    
9456     u8 ultra_enable = 0, ultra_timing = 0, dma_timing = 0;
9457    
9458     + if (drive->dn >= ARRAY_SIZE(drive_pci2))
9459     + return;
9460     +
9461     pci_read_config_byte(dev, (0x56|hwif->channel), &ultra_timing);
9462     pci_read_config_byte(dev, 0x54, &ultra_enable);
9463    
9464     diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
9465     index 47003d2a4a46..dee3853163b6 100644
9466     --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
9467     +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
9468     @@ -422,7 +422,7 @@ struct rxe_dev {
9469     struct list_head pending_mmaps;
9470    
9471     spinlock_t mmap_offset_lock; /* guard mmap_offset */
9472     - int mmap_offset;
9473     + u64 mmap_offset;
9474    
9475     struct rxe_port port;
9476     struct list_head list;
9477     diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
9478     index 0d2ab9a2cf44..02a5e2d7e574 100644
9479     --- a/drivers/infiniband/ulp/isert/ib_isert.c
9480     +++ b/drivers/infiniband/ulp/isert/ib_isert.c
9481     @@ -2555,6 +2555,17 @@ isert_wait4logout(struct isert_conn *isert_conn)
9482     }
9483     }
9484    
9485     +static void
9486     +isert_wait4cmds(struct iscsi_conn *conn)
9487     +{
9488     + isert_info("iscsi_conn %p\n", conn);
9489     +
9490     + if (conn->sess) {
9491     + target_sess_cmd_list_set_waiting(conn->sess->se_sess);
9492     + target_wait_for_sess_cmds(conn->sess->se_sess);
9493     + }
9494     +}
9495     +
9496     /**
9497     * isert_put_unsol_pending_cmds() - Drop commands waiting for
9498     * unsolicitate dataout
9499     @@ -2602,6 +2613,7 @@ static void isert_wait_conn(struct iscsi_conn *conn)
9500    
9501     ib_drain_qp(isert_conn->qp);
9502     isert_put_unsol_pending_cmds(conn);
9503     + isert_wait4cmds(conn);
9504     isert_wait4logout(isert_conn);
9505    
9506     queue_work(isert_release_wq, &isert_conn->release_work);
9507     diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c
9508     index 28466e358fee..22c8d2070faa 100644
9509     --- a/drivers/input/touchscreen/edt-ft5x06.c
9510     +++ b/drivers/input/touchscreen/edt-ft5x06.c
9511     @@ -887,6 +887,7 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client,
9512     {
9513     const struct edt_i2c_chip_data *chip_data;
9514     struct edt_ft5x06_ts_data *tsdata;
9515     + u8 buf[2] = { 0xfc, 0x00 };
9516     struct input_dev *input;
9517     unsigned long irq_flags;
9518     int error;
9519     @@ -956,6 +957,12 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client,
9520     return error;
9521     }
9522    
9523     + /*
9524     + * Dummy read access. EP0700MLP1 returns bogus data on the first
9525     + * register read access and ignores writes.
9526     + */
9527     + edt_ft5x06_ts_readwrite(tsdata->client, 2, buf, 2, buf);
9528     +
9529     edt_ft5x06_ts_set_regs(tsdata);
9530     edt_ft5x06_ts_get_defaults(&client->dev, tsdata);
9531     edt_ft5x06_ts_get_parameters(tsdata);
9532     diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
9533     index 7bd98585d78d..48d382008788 100644
9534     --- a/drivers/iommu/arm-smmu-v3.c
9535     +++ b/drivers/iommu/arm-smmu-v3.c
9536     @@ -1103,7 +1103,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
9537     }
9538    
9539     arm_smmu_sync_ste_for_sid(smmu, sid);
9540     - dst[0] = cpu_to_le64(val);
9541     + /* See comment in arm_smmu_write_ctx_desc() */
9542     + WRITE_ONCE(dst[0], cpu_to_le64(val));
9543     arm_smmu_sync_ste_for_sid(smmu, sid);
9544    
9545     /* It's likely that we'll want to use the new STE soon */
9546     diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
9547     index 0c0cd2768d6e..d1efbb8dadc5 100644
9548     --- a/drivers/irqchip/irq-gic-v3-its.c
9549     +++ b/drivers/irqchip/irq-gic-v3-its.c
9550     @@ -365,7 +365,7 @@ static struct its_collection *its_build_invall_cmd(struct its_cmd_block *cmd,
9551     struct its_cmd_desc *desc)
9552     {
9553     its_encode_cmd(cmd, GITS_CMD_INVALL);
9554     - its_encode_collection(cmd, desc->its_mapc_cmd.col->col_id);
9555     + its_encode_collection(cmd, desc->its_invall_cmd.col->col_id);
9556    
9557     its_fixup_cmd(cmd);
9558    
9559     diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
9560     index f7b8681aed3f..2ab6060031a4 100644
9561     --- a/drivers/irqchip/irq-gic-v3.c
9562     +++ b/drivers/irqchip/irq-gic-v3.c
9563     @@ -1195,6 +1195,7 @@ static struct
9564     struct redist_region *redist_regs;
9565     u32 nr_redist_regions;
9566     bool single_redist;
9567     + int enabled_rdists;
9568     u32 maint_irq;
9569     int maint_irq_mode;
9570     phys_addr_t vcpu_base;
9571     @@ -1289,8 +1290,10 @@ static int __init gic_acpi_match_gicc(struct acpi_subtable_header *header,
9572     * If GICC is enabled and has valid gicr base address, then it means
9573     * GICR base is presented via GICC
9574     */
9575     - if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address)
9576     + if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) {
9577     + acpi_data.enabled_rdists++;
9578     return 0;
9579     + }
9580    
9581     /*
9582     * It's perfectly valid firmware can pass disabled GICC entry, driver
9583     @@ -1320,8 +1323,10 @@ static int __init gic_acpi_count_gicr_regions(void)
9584    
9585     count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
9586     gic_acpi_match_gicc, 0);
9587     - if (count > 0)
9588     + if (count > 0) {
9589     acpi_data.single_redist = true;
9590     + count = acpi_data.enabled_rdists;
9591     + }
9592    
9593     return count;
9594     }
9595     diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
9596     index b935839ab79c..f483041eed98 100644
9597     --- a/drivers/md/bcache/bset.h
9598     +++ b/drivers/md/bcache/bset.h
9599     @@ -380,7 +380,8 @@ void bch_btree_keys_stats(struct btree_keys *, struct bset_stats *);
9600    
9601     /* Bkey utility code */
9602    
9603     -#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys)
9604     +#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, \
9605     + (unsigned int)(i)->keys)
9606    
9607     static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx)
9608     {
9609     diff --git a/drivers/media/i2c/mt9v032.c b/drivers/media/i2c/mt9v032.c
9610     index 58eb62f1ba21..a018a76662df 100644
9611     --- a/drivers/media/i2c/mt9v032.c
9612     +++ b/drivers/media/i2c/mt9v032.c
9613     @@ -423,10 +423,12 @@ static int mt9v032_enum_mbus_code(struct v4l2_subdev *subdev,
9614     struct v4l2_subdev_pad_config *cfg,
9615     struct v4l2_subdev_mbus_code_enum *code)
9616     {
9617     + struct mt9v032 *mt9v032 = to_mt9v032(subdev);
9618     +
9619     if (code->index > 0)
9620     return -EINVAL;
9621    
9622     - code->code = MEDIA_BUS_FMT_SGRBG10_1X10;
9623     + code->code = mt9v032->format.code;
9624     return 0;
9625     }
9626    
9627     @@ -434,7 +436,11 @@ static int mt9v032_enum_frame_size(struct v4l2_subdev *subdev,
9628     struct v4l2_subdev_pad_config *cfg,
9629     struct v4l2_subdev_frame_size_enum *fse)
9630     {
9631     - if (fse->index >= 3 || fse->code != MEDIA_BUS_FMT_SGRBG10_1X10)
9632     + struct mt9v032 *mt9v032 = to_mt9v032(subdev);
9633     +
9634     + if (fse->index >= 3)
9635     + return -EINVAL;
9636     + if (mt9v032->format.code != fse->code)
9637     return -EINVAL;
9638    
9639     fse->min_width = MT9V032_WINDOW_WIDTH_DEF / (1 << fse->index);
9640     diff --git a/drivers/media/platform/sti/bdisp/bdisp-hw.c b/drivers/media/platform/sti/bdisp/bdisp-hw.c
9641     index b7892f3efd98..5c4c3f0c57be 100644
9642     --- a/drivers/media/platform/sti/bdisp/bdisp-hw.c
9643     +++ b/drivers/media/platform/sti/bdisp/bdisp-hw.c
9644     @@ -14,8 +14,8 @@
9645     #define MAX_SRC_WIDTH 2048
9646    
9647     /* Reset & boot poll config */
9648     -#define POLL_RST_MAX 50
9649     -#define POLL_RST_DELAY_MS 20
9650     +#define POLL_RST_MAX 500
9651     +#define POLL_RST_DELAY_MS 2
9652    
9653     enum bdisp_target_plan {
9654     BDISP_RGB,
9655     @@ -382,7 +382,7 @@ int bdisp_hw_reset(struct bdisp_dev *bdisp)
9656     for (i = 0; i < POLL_RST_MAX; i++) {
9657     if (readl(bdisp->regs + BLT_STA1) & BLT_STA1_IDLE)
9658     break;
9659     - msleep(POLL_RST_DELAY_MS);
9660     + udelay(POLL_RST_DELAY_MS * 1000);
9661     }
9662     if (i == POLL_RST_MAX)
9663     dev_err(bdisp->dev, "Reset timeout\n");
9664     diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
9665     index b73d9ba9496c..96290b83dfde 100644
9666     --- a/drivers/net/ethernet/cisco/enic/enic_main.c
9667     +++ b/drivers/net/ethernet/cisco/enic/enic_main.c
9668     @@ -1806,10 +1806,10 @@ static int enic_stop(struct net_device *netdev)
9669     }
9670    
9671     netif_carrier_off(netdev);
9672     - netif_tx_disable(netdev);
9673     if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX)
9674     for (i = 0; i < enic->wq_count; i++)
9675     napi_disable(&enic->napi[enic_cq_wq(enic, i)]);
9676     + netif_tx_disable(netdev);
9677    
9678     if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic))
9679     enic_dev_del_station_addr(enic);
9680     diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
9681     index 60bd1b36df60..b665d27f8e29 100644
9682     --- a/drivers/net/ethernet/freescale/gianfar.c
9683     +++ b/drivers/net/ethernet/freescale/gianfar.c
9684     @@ -2688,13 +2688,17 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
9685     skb_dirtytx = tx_queue->skb_dirtytx;
9686    
9687     while ((skb = tx_queue->tx_skbuff[skb_dirtytx])) {
9688     + bool do_tstamp;
9689     +
9690     + do_tstamp = (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
9691     + priv->hwts_tx_en;
9692    
9693     frags = skb_shinfo(skb)->nr_frags;
9694    
9695     /* When time stamping, one additional TxBD must be freed.
9696     * Also, we need to dma_unmap_single() the TxPAL.
9697     */
9698     - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
9699     + if (unlikely(do_tstamp))
9700     nr_txbds = frags + 2;
9701     else
9702     nr_txbds = frags + 1;
9703     @@ -2708,7 +2712,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
9704     (lstatus & BD_LENGTH_MASK))
9705     break;
9706    
9707     - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
9708     + if (unlikely(do_tstamp)) {
9709     next = next_txbd(bdp, base, tx_ring_size);
9710     buflen = be16_to_cpu(next->length) +
9711     GMAC_FCB_LEN + GMAC_TXPAL_LEN;
9712     @@ -2718,7 +2722,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
9713     dma_unmap_single(priv->dev, be32_to_cpu(bdp->bufPtr),
9714     buflen, DMA_TO_DEVICE);
9715    
9716     - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
9717     + if (unlikely(do_tstamp)) {
9718     struct skb_shared_hwtstamps shhwtstamps;
9719     u64 *ns = (u64 *)(((uintptr_t)skb->data + 0x10) &
9720     ~0x7UL);
9721     diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
9722     index af85a1b3135e..87bf05a81db5 100644
9723     --- a/drivers/net/wan/fsl_ucc_hdlc.c
9724     +++ b/drivers/net/wan/fsl_ucc_hdlc.c
9725     @@ -209,6 +209,11 @@ static int uhdlc_init(struct ucc_hdlc_private *priv)
9726     ret = -ENOMEM;
9727     goto free_riptr;
9728     }
9729     + if (riptr != (u16)riptr || tiptr != (u16)tiptr) {
9730     + dev_err(priv->dev, "MURAM allocation out of addressable range\n");
9731     + ret = -ENOMEM;
9732     + goto free_tiptr;
9733     + }
9734    
9735     /* Set RIPTR, TIPTR */
9736     iowrite16be(riptr, &priv->ucc_pram->riptr);
9737     diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c
9738     index e7bbdb7af53a..97968e6a6a4e 100644
9739     --- a/drivers/net/wan/ixp4xx_hss.c
9740     +++ b/drivers/net/wan/ixp4xx_hss.c
9741     @@ -261,7 +261,7 @@ struct port {
9742     struct hss_plat_info *plat;
9743     buffer_t *rx_buff_tab[RX_DESCS], *tx_buff_tab[TX_DESCS];
9744     struct desc *desc_tab; /* coherent */
9745     - u32 desc_tab_phys;
9746     + dma_addr_t desc_tab_phys;
9747     unsigned int id;
9748     unsigned int clock_type, clock_rate, loopback;
9749     unsigned int initialized, carrier;
9750     @@ -861,7 +861,7 @@ static int hss_hdlc_xmit(struct sk_buff *skb, struct net_device *dev)
9751     dev->stats.tx_dropped++;
9752     return NETDEV_TX_OK;
9753     }
9754     - memcpy_swab32(mem, (u32 *)((int)skb->data & ~3), bytes / 4);
9755     + memcpy_swab32(mem, (u32 *)((uintptr_t)skb->data & ~3), bytes / 4);
9756     dev_kfree_skb(skb);
9757     #endif
9758    
9759     diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c
9760     index 83770d2ea057..9da8bd792702 100644
9761     --- a/drivers/net/wireless/broadcom/b43legacy/main.c
9762     +++ b/drivers/net/wireless/broadcom/b43legacy/main.c
9763     @@ -1304,8 +1304,9 @@ static void handle_irq_ucode_debug(struct b43legacy_wldev *dev)
9764     }
9765    
9766     /* Interrupt handler bottom-half */
9767     -static void b43legacy_interrupt_tasklet(struct b43legacy_wldev *dev)
9768     +static void b43legacy_interrupt_tasklet(unsigned long data)
9769     {
9770     + struct b43legacy_wldev *dev = (struct b43legacy_wldev *)data;
9771     u32 reason;
9772     u32 dma_reason[ARRAY_SIZE(dev->dma_reason)];
9773     u32 merged_dma_reason = 0;
9774     @@ -3775,7 +3776,7 @@ static int b43legacy_one_core_attach(struct ssb_device *dev,
9775     b43legacy_set_status(wldev, B43legacy_STAT_UNINIT);
9776     wldev->bad_frames_preempt = modparam_bad_frames_preempt;
9777     tasklet_init(&wldev->isr_tasklet,
9778     - (void (*)(unsigned long))b43legacy_interrupt_tasklet,
9779     + b43legacy_interrupt_tasklet,
9780     (unsigned long)wldev);
9781     if (modparam_pio)
9782     wldev->__using_pio = true;
9783     diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
9784     index de52d826eb24..998a4bd6db78 100644
9785     --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
9786     +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
9787     @@ -1921,6 +1921,7 @@ static uint brcmf_sdio_readframes(struct brcmf_sdio *bus, uint maxframes)
9788     BRCMF_SDIO_FT_NORMAL)) {
9789     rd->len = 0;
9790     brcmu_pkt_buf_free_skb(pkt);
9791     + continue;
9792     }
9793     bus->sdcnt.rx_readahead_cnt++;
9794     if (rd->len != roundup(rd_new.len, 16)) {
9795     diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
9796     index bfa542c8d6f1..86c84b11218d 100644
9797     --- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c
9798     +++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
9799     @@ -3220,8 +3220,9 @@ static void ipw2100_tx_send_data(struct ipw2100_priv *priv)
9800     }
9801     }
9802    
9803     -static void ipw2100_irq_tasklet(struct ipw2100_priv *priv)
9804     +static void ipw2100_irq_tasklet(unsigned long data)
9805     {
9806     + struct ipw2100_priv *priv = (struct ipw2100_priv *)data;
9807     struct net_device *dev = priv->net_dev;
9808     unsigned long flags;
9809     u32 inta, tmp;
9810     @@ -6029,7 +6030,7 @@ static void ipw2100_rf_kill(struct work_struct *work)
9811     spin_unlock_irqrestore(&priv->low_lock, flags);
9812     }
9813    
9814     -static void ipw2100_irq_tasklet(struct ipw2100_priv *priv);
9815     +static void ipw2100_irq_tasklet(unsigned long data);
9816    
9817     static const struct net_device_ops ipw2100_netdev_ops = {
9818     .ndo_open = ipw2100_open,
9819     @@ -6158,7 +6159,7 @@ static struct net_device *ipw2100_alloc_device(struct pci_dev *pci_dev,
9820     INIT_DELAYED_WORK(&priv->rf_kill, ipw2100_rf_kill);
9821     INIT_DELAYED_WORK(&priv->scan_event, ipw2100_scan_event);
9822    
9823     - tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long))
9824     + tasklet_init(&priv->irq_tasklet,
9825     ipw2100_irq_tasklet, (unsigned long)priv);
9826    
9827     /* NOTE: We do not start the deferred work for status checks yet */
9828     diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
9829     index bfd68612a535..48edb2b6eb7d 100644
9830     --- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c
9831     +++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
9832     @@ -1968,8 +1968,9 @@ static void notify_wx_assoc_event(struct ipw_priv *priv)
9833     wireless_send_event(priv->net_dev, SIOCGIWAP, &wrqu, NULL);
9834     }
9835    
9836     -static void ipw_irq_tasklet(struct ipw_priv *priv)
9837     +static void ipw_irq_tasklet(unsigned long data)
9838     {
9839     + struct ipw_priv *priv = (struct ipw_priv *)data;
9840     u32 inta, inta_mask, handled = 0;
9841     unsigned long flags;
9842     int rc = 0;
9843     @@ -10705,7 +10706,7 @@ static int ipw_setup_deferred_work(struct ipw_priv *priv)
9844     INIT_WORK(&priv->qos_activate, ipw_bg_qos_activate);
9845     #endif /* CONFIG_IPW2200_QOS */
9846    
9847     - tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long))
9848     + tasklet_init(&priv->irq_tasklet,
9849     ipw_irq_tasklet, (unsigned long)priv);
9850    
9851     return ret;
9852     diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
9853     index 466912eb2d87..d853ccbf74cb 100644
9854     --- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c
9855     +++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
9856     @@ -1399,8 +1399,9 @@ il3945_dump_nic_error_log(struct il_priv *il)
9857     }
9858    
9859     static void
9860     -il3945_irq_tasklet(struct il_priv *il)
9861     +il3945_irq_tasklet(unsigned long data)
9862     {
9863     + struct il_priv *il = (struct il_priv *)data;
9864     u32 inta, handled = 0;
9865     u32 inta_fh;
9866     unsigned long flags;
9867     @@ -3432,7 +3433,7 @@ il3945_setup_deferred_work(struct il_priv *il)
9868     setup_timer(&il->watchdog, il_bg_watchdog, (unsigned long)il);
9869    
9870     tasklet_init(&il->irq_tasklet,
9871     - (void (*)(unsigned long))il3945_irq_tasklet,
9872     + il3945_irq_tasklet,
9873     (unsigned long)il);
9874     }
9875    
9876     diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
9877     index a91d170a614b..6c2dcd236713 100644
9878     --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c
9879     +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
9880     @@ -4361,8 +4361,9 @@ il4965_synchronize_irq(struct il_priv *il)
9881     }
9882    
9883     static void
9884     -il4965_irq_tasklet(struct il_priv *il)
9885     +il4965_irq_tasklet(unsigned long data)
9886     {
9887     + struct il_priv *il = (struct il_priv *)data;
9888     u32 inta, handled = 0;
9889     u32 inta_fh;
9890     unsigned long flags;
9891     @@ -6260,7 +6261,7 @@ il4965_setup_deferred_work(struct il_priv *il)
9892     setup_timer(&il->watchdog, il_bg_watchdog, (unsigned long)il);
9893    
9894     tasklet_init(&il->irq_tasklet,
9895     - (void (*)(unsigned long))il4965_irq_tasklet,
9896     + il4965_irq_tasklet,
9897     (unsigned long)il);
9898     }
9899    
9900     diff --git a/drivers/net/wireless/intel/iwlegacy/common.c b/drivers/net/wireless/intel/iwlegacy/common.c
9901     index 140b6ea8f7cc..db2373fe8ac3 100644
9902     --- a/drivers/net/wireless/intel/iwlegacy/common.c
9903     +++ b/drivers/net/wireless/intel/iwlegacy/common.c
9904     @@ -717,7 +717,7 @@ il_eeprom_init(struct il_priv *il)
9905     u32 gp = _il_rd(il, CSR_EEPROM_GP);
9906     int sz;
9907     int ret;
9908     - u16 addr;
9909     + int addr;
9910    
9911     /* allocate eeprom */
9912     sz = il->cfg->eeprom_size;
9913     diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
9914     index c5203568a47a..f0f205c3aadb 100644
9915     --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
9916     +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
9917     @@ -736,7 +736,8 @@ static struct thermal_zone_device_ops tzone_ops = {
9918     static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
9919     {
9920     int i;
9921     - char name[] = "iwlwifi";
9922     + char name[16];
9923     + static atomic_t counter = ATOMIC_INIT(0);
9924    
9925     if (!iwl_mvm_is_tt_in_fw(mvm)) {
9926     mvm->tz_device.tzone = NULL;
9927     @@ -746,6 +747,7 @@ static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
9928    
9929     BUILD_BUG_ON(ARRAY_SIZE(name) >= THERMAL_NAME_LENGTH);
9930    
9931     + sprintf(name, "iwlwifi_%u", atomic_inc_return(&counter) & 0xFF);
9932     mvm->tz_device.tzone = thermal_zone_device_register(name,
9933     IWL_MAX_DTS_TRIPS,
9934     IWL_WRITABLE_TRIPS_MSK,
9935     diff --git a/drivers/net/wireless/intersil/hostap/hostap_ap.c b/drivers/net/wireless/intersil/hostap/hostap_ap.c
9936     index c995ace153ee..30171d4c4718 100644
9937     --- a/drivers/net/wireless/intersil/hostap/hostap_ap.c
9938     +++ b/drivers/net/wireless/intersil/hostap/hostap_ap.c
9939     @@ -2570,7 +2570,7 @@ static int prism2_hostapd_add_sta(struct ap_data *ap,
9940     sta->supported_rates[0] = 2;
9941     if (sta->tx_supp_rates & WLAN_RATE_2M)
9942     sta->supported_rates[1] = 4;
9943     - if (sta->tx_supp_rates & WLAN_RATE_5M5)
9944     + if (sta->tx_supp_rates & WLAN_RATE_5M5)
9945     sta->supported_rates[2] = 11;
9946     if (sta->tx_supp_rates & WLAN_RATE_11M)
9947     sta->supported_rates[3] = 22;
9948     diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
9949     index 8244d8262951..4e91c74fcfad 100644
9950     --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
9951     +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
9952     @@ -1351,7 +1351,8 @@ static int ezusb_init(struct hermes *hw)
9953     int retval;
9954    
9955     BUG_ON(in_interrupt());
9956     - BUG_ON(!upriv);
9957     + if (!upriv)
9958     + return -EINVAL;
9959    
9960     upriv->reply_count = 0;
9961     /* Write the MAGIC number on the simulated registers to keep
9962     diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c
9963     index e15b462d096b..21b7cb845bf4 100644
9964     --- a/drivers/net/wireless/realtek/rtlwifi/pci.c
9965     +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c
9966     @@ -1095,13 +1095,15 @@ done:
9967     return ret;
9968     }
9969    
9970     -static void _rtl_pci_irq_tasklet(struct ieee80211_hw *hw)
9971     +static void _rtl_pci_irq_tasklet(unsigned long data)
9972     {
9973     + struct ieee80211_hw *hw = (struct ieee80211_hw *)data;
9974     _rtl_pci_tx_chk_waitq(hw);
9975     }
9976    
9977     -static void _rtl_pci_prepare_bcn_tasklet(struct ieee80211_hw *hw)
9978     +static void _rtl_pci_prepare_bcn_tasklet(unsigned long data)
9979     {
9980     + struct ieee80211_hw *hw = (struct ieee80211_hw *)data;
9981     struct rtl_priv *rtlpriv = rtl_priv(hw);
9982     struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
9983     struct rtl_mac *mac = rtl_mac(rtl_priv(hw));
9984     @@ -1223,10 +1225,10 @@ static void _rtl_pci_init_struct(struct ieee80211_hw *hw,
9985    
9986     /*task */
9987     tasklet_init(&rtlpriv->works.irq_tasklet,
9988     - (void (*)(unsigned long))_rtl_pci_irq_tasklet,
9989     + _rtl_pci_irq_tasklet,
9990     (unsigned long)hw);
9991     tasklet_init(&rtlpriv->works.irq_prepare_bcn_tasklet,
9992     - (void (*)(unsigned long))_rtl_pci_prepare_bcn_tasklet,
9993     + _rtl_pci_prepare_bcn_tasklet,
9994     (unsigned long)hw);
9995     INIT_WORK(&rtlpriv->works.lps_change_work,
9996     rtl_lps_change_work_callback);
9997     diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c
9998     index 3cd995de1bbb..151b220381f9 100644
9999     --- a/drivers/nfc/port100.c
10000     +++ b/drivers/nfc/port100.c
10001     @@ -573,7 +573,7 @@ static void port100_tx_update_payload_len(void *_frame, int len)
10002     {
10003     struct port100_frame *frame = _frame;
10004    
10005     - frame->datalen = cpu_to_le16(le16_to_cpu(frame->datalen) + len);
10006     + le16_add_cpu(&frame->datalen, len);
10007     }
10008    
10009     static bool port100_rx_frame_is_valid(void *_frame)
10010     diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
10011     index 1d32fe2d97aa..9ec3cb628b0b 100644
10012     --- a/drivers/pci/iov.c
10013     +++ b/drivers/pci/iov.c
10014     @@ -181,6 +181,7 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset)
10015     failed2:
10016     sysfs_remove_link(&dev->dev.kobj, buf);
10017     failed1:
10018     + pci_stop_and_remove_bus_device(virtfn);
10019     pci_dev_put(dev);
10020     mutex_lock(&iov->dev->sriov->lock);
10021     pci_stop_and_remove_bus_device(virtfn);
10022     diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c
10023     index f83a2a60d9c9..1e945aa77734 100644
10024     --- a/drivers/pinctrl/intel/pinctrl-baytrail.c
10025     +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c
10026     @@ -958,7 +958,13 @@ static void byt_gpio_clear_triggering(struct byt_gpio *vg, unsigned int offset)
10027    
10028     raw_spin_lock_irqsave(&byt_lock, flags);
10029     value = readl(reg);
10030     - value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL);
10031     +
10032     + /* Do not clear direct-irq enabled IRQs (from gpio_disable_free) */
10033     + if (value & BYT_DIRECT_IRQ_EN)
10034     + /* nothing to do */ ;
10035     + else
10036     + value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL);
10037     +
10038     writel(value, reg);
10039     raw_spin_unlock_irqrestore(&byt_lock, flags);
10040     }
10041     diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7264.c b/drivers/pinctrl/sh-pfc/pfc-sh7264.c
10042     index e1c34e19222e..3ddb9565ed80 100644
10043     --- a/drivers/pinctrl/sh-pfc/pfc-sh7264.c
10044     +++ b/drivers/pinctrl/sh-pfc/pfc-sh7264.c
10045     @@ -500,17 +500,15 @@ enum {
10046     SD_WP_MARK, SD_CLK_MARK, SD_CMD_MARK,
10047     CRX0_MARK, CRX1_MARK,
10048     CTX0_MARK, CTX1_MARK,
10049     + CRX0_CRX1_MARK, CTX0_CTX1_MARK,
10050    
10051     PWM1A_MARK, PWM1B_MARK, PWM1C_MARK, PWM1D_MARK,
10052     PWM1E_MARK, PWM1F_MARK, PWM1G_MARK, PWM1H_MARK,
10053     PWM2A_MARK, PWM2B_MARK, PWM2C_MARK, PWM2D_MARK,
10054     PWM2E_MARK, PWM2F_MARK, PWM2G_MARK, PWM2H_MARK,
10055     IERXD_MARK, IETXD_MARK,
10056     - CRX0_CRX1_MARK,
10057     WDTOVF_MARK,
10058    
10059     - CRX0X1_MARK,
10060     -
10061     /* DMAC */
10062     TEND0_MARK, DACK0_MARK, DREQ0_MARK,
10063     TEND1_MARK, DACK1_MARK, DREQ1_MARK,
10064     @@ -998,12 +996,12 @@ static const u16 pinmux_data[] = {
10065    
10066     PINMUX_DATA(PJ3_DATA, PJ3MD_00),
10067     PINMUX_DATA(CRX1_MARK, PJ3MD_01),
10068     - PINMUX_DATA(CRX0X1_MARK, PJ3MD_10),
10069     + PINMUX_DATA(CRX0_CRX1_MARK, PJ3MD_10),
10070     PINMUX_DATA(IRQ1_PJ_MARK, PJ3MD_11),
10071    
10072     PINMUX_DATA(PJ2_DATA, PJ2MD_000),
10073     PINMUX_DATA(CTX1_MARK, PJ2MD_001),
10074     - PINMUX_DATA(CRX0_CRX1_MARK, PJ2MD_010),
10075     + PINMUX_DATA(CTX0_CTX1_MARK, PJ2MD_010),
10076     PINMUX_DATA(CS2_MARK, PJ2MD_011),
10077     PINMUX_DATA(SCK0_MARK, PJ2MD_100),
10078     PINMUX_DATA(LCD_M_DISP_MARK, PJ2MD_101),
10079     @@ -1248,6 +1246,7 @@ static const struct pinmux_func pinmux_func_gpios[] = {
10080     GPIO_FN(CTX1),
10081     GPIO_FN(CRX1),
10082     GPIO_FN(CTX0),
10083     + GPIO_FN(CTX0_CTX1),
10084     GPIO_FN(CRX0),
10085     GPIO_FN(CRX0_CRX1),
10086    
10087     diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7269.c b/drivers/pinctrl/sh-pfc/pfc-sh7269.c
10088     index cfdb4fc177c3..3df0c0d139d0 100644
10089     --- a/drivers/pinctrl/sh-pfc/pfc-sh7269.c
10090     +++ b/drivers/pinctrl/sh-pfc/pfc-sh7269.c
10091     @@ -740,13 +740,12 @@ enum {
10092     CRX0_MARK, CTX0_MARK,
10093     CRX1_MARK, CTX1_MARK,
10094     CRX2_MARK, CTX2_MARK,
10095     - CRX0_CRX1_MARK,
10096     - CRX0_CRX1_CRX2_MARK,
10097     - CTX0CTX1CTX2_MARK,
10098     + CRX0_CRX1_MARK, CTX0_CTX1_MARK,
10099     + CRX0_CRX1_CRX2_MARK, CTX0_CTX1_CTX2_MARK,
10100     CRX1_PJ22_MARK, CTX1_PJ23_MARK,
10101     CRX2_PJ20_MARK, CTX2_PJ21_MARK,
10102     - CRX0CRX1_PJ22_MARK,
10103     - CRX0CRX1CRX2_PJ20_MARK,
10104     + CRX0_CRX1_PJ22_MARK, CTX0_CTX1_PJ23_MARK,
10105     + CRX0_CRX1_CRX2_PJ20_MARK, CTX0_CTX1_CTX2_PJ21_MARK,
10106    
10107     /* VDC */
10108     DV_CLK_MARK,
10109     @@ -824,6 +823,7 @@ static const u16 pinmux_data[] = {
10110     PINMUX_DATA(CS3_MARK, PC8MD_001),
10111     PINMUX_DATA(TXD7_MARK, PC8MD_010),
10112     PINMUX_DATA(CTX1_MARK, PC8MD_011),
10113     + PINMUX_DATA(CTX0_CTX1_MARK, PC8MD_100),
10114    
10115     PINMUX_DATA(PC7_DATA, PC7MD_000),
10116     PINMUX_DATA(CKE_MARK, PC7MD_001),
10117     @@ -836,11 +836,12 @@ static const u16 pinmux_data[] = {
10118     PINMUX_DATA(CAS_MARK, PC6MD_001),
10119     PINMUX_DATA(SCK7_MARK, PC6MD_010),
10120     PINMUX_DATA(CTX0_MARK, PC6MD_011),
10121     + PINMUX_DATA(CTX0_CTX1_CTX2_MARK, PC6MD_100),
10122    
10123     PINMUX_DATA(PC5_DATA, PC5MD_000),
10124     PINMUX_DATA(RAS_MARK, PC5MD_001),
10125     PINMUX_DATA(CRX0_MARK, PC5MD_011),
10126     - PINMUX_DATA(CTX0CTX1CTX2_MARK, PC5MD_100),
10127     + PINMUX_DATA(CTX0_CTX1_CTX2_MARK, PC5MD_100),
10128     PINMUX_DATA(IRQ0_PC_MARK, PC5MD_101),
10129    
10130     PINMUX_DATA(PC4_DATA, PC4MD_00),
10131     @@ -1292,30 +1293,32 @@ static const u16 pinmux_data[] = {
10132     PINMUX_DATA(LCD_DATA23_PJ23_MARK, PJ23MD_010),
10133     PINMUX_DATA(LCD_TCON6_MARK, PJ23MD_011),
10134     PINMUX_DATA(IRQ3_PJ_MARK, PJ23MD_100),
10135     - PINMUX_DATA(CTX1_MARK, PJ23MD_101),
10136     + PINMUX_DATA(CTX1_PJ23_MARK, PJ23MD_101),
10137     + PINMUX_DATA(CTX0_CTX1_PJ23_MARK, PJ23MD_110),
10138    
10139     PINMUX_DATA(PJ22_DATA, PJ22MD_000),
10140     PINMUX_DATA(DV_DATA22_MARK, PJ22MD_001),
10141     PINMUX_DATA(LCD_DATA22_PJ22_MARK, PJ22MD_010),
10142     PINMUX_DATA(LCD_TCON5_MARK, PJ22MD_011),
10143     PINMUX_DATA(IRQ2_PJ_MARK, PJ22MD_100),
10144     - PINMUX_DATA(CRX1_MARK, PJ22MD_101),
10145     - PINMUX_DATA(CRX0_CRX1_MARK, PJ22MD_110),
10146     + PINMUX_DATA(CRX1_PJ22_MARK, PJ22MD_101),
10147     + PINMUX_DATA(CRX0_CRX1_PJ22_MARK, PJ22MD_110),
10148    
10149     PINMUX_DATA(PJ21_DATA, PJ21MD_000),
10150     PINMUX_DATA(DV_DATA21_MARK, PJ21MD_001),
10151     PINMUX_DATA(LCD_DATA21_PJ21_MARK, PJ21MD_010),
10152     PINMUX_DATA(LCD_TCON4_MARK, PJ21MD_011),
10153     PINMUX_DATA(IRQ1_PJ_MARK, PJ21MD_100),
10154     - PINMUX_DATA(CTX2_MARK, PJ21MD_101),
10155     + PINMUX_DATA(CTX2_PJ21_MARK, PJ21MD_101),
10156     + PINMUX_DATA(CTX0_CTX1_CTX2_PJ21_MARK, PJ21MD_110),
10157    
10158     PINMUX_DATA(PJ20_DATA, PJ20MD_000),
10159     PINMUX_DATA(DV_DATA20_MARK, PJ20MD_001),
10160     PINMUX_DATA(LCD_DATA20_PJ20_MARK, PJ20MD_010),
10161     PINMUX_DATA(LCD_TCON3_MARK, PJ20MD_011),
10162     PINMUX_DATA(IRQ0_PJ_MARK, PJ20MD_100),
10163     - PINMUX_DATA(CRX2_MARK, PJ20MD_101),
10164     - PINMUX_DATA(CRX0CRX1CRX2_PJ20_MARK, PJ20MD_110),
10165     + PINMUX_DATA(CRX2_PJ20_MARK, PJ20MD_101),
10166     + PINMUX_DATA(CRX0_CRX1_CRX2_PJ20_MARK, PJ20MD_110),
10167    
10168     PINMUX_DATA(PJ19_DATA, PJ19MD_000),
10169     PINMUX_DATA(DV_DATA19_MARK, PJ19MD_001),
10170     @@ -1666,12 +1669,24 @@ static const struct pinmux_func pinmux_func_gpios[] = {
10171     GPIO_FN(WDTOVF),
10172    
10173     /* CAN */
10174     + GPIO_FN(CTX2),
10175     + GPIO_FN(CRX2),
10176     GPIO_FN(CTX1),
10177     GPIO_FN(CRX1),
10178     GPIO_FN(CTX0),
10179     GPIO_FN(CRX0),
10180     + GPIO_FN(CTX0_CTX1),
10181     GPIO_FN(CRX0_CRX1),
10182     + GPIO_FN(CTX0_CTX1_CTX2),
10183     GPIO_FN(CRX0_CRX1_CRX2),
10184     + GPIO_FN(CTX2_PJ21),
10185     + GPIO_FN(CRX2_PJ20),
10186     + GPIO_FN(CTX1_PJ23),
10187     + GPIO_FN(CRX1_PJ22),
10188     + GPIO_FN(CTX0_CTX1_PJ23),
10189     + GPIO_FN(CRX0_CRX1_PJ22),
10190     + GPIO_FN(CTX0_CTX1_CTX2_PJ21),
10191     + GPIO_FN(CRX0_CRX1_CRX2_PJ20),
10192    
10193     /* DMAC */
10194     GPIO_FN(TEND0),
10195     diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c
10196     index 5ad42f33e70c..2e15acf13893 100644
10197     --- a/drivers/pwm/pwm-omap-dmtimer.c
10198     +++ b/drivers/pwm/pwm-omap-dmtimer.c
10199     @@ -337,6 +337,11 @@ static int pwm_omap_dmtimer_probe(struct platform_device *pdev)
10200     static int pwm_omap_dmtimer_remove(struct platform_device *pdev)
10201     {
10202     struct pwm_omap_dmtimer_chip *omap = platform_get_drvdata(pdev);
10203     + int ret;
10204     +
10205     + ret = pwmchip_remove(&omap->chip);
10206     + if (ret)
10207     + return ret;
10208    
10209     if (pm_runtime_active(&omap->dm_timer_pdev->dev))
10210     omap->pdata->stop(omap->dm_timer);
10211     @@ -345,7 +350,7 @@ static int pwm_omap_dmtimer_remove(struct platform_device *pdev)
10212    
10213     mutex_destroy(&omap->mutex);
10214    
10215     - return pwmchip_remove(&omap->chip);
10216     + return 0;
10217     }
10218    
10219     static const struct of_device_id pwm_omap_dmtimer_of_match[] = {
10220     diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c
10221     index dfa8d50a5d74..28646e4cf3ba 100644
10222     --- a/drivers/regulator/rk808-regulator.c
10223     +++ b/drivers/regulator/rk808-regulator.c
10224     @@ -589,7 +589,7 @@ static int rk808_regulator_dt_parse_pdata(struct device *dev,
10225     }
10226    
10227     if (!pdata->dvs_gpio[i]) {
10228     - dev_warn(dev, "there is no dvs%d gpio\n", i);
10229     + dev_info(dev, "there is no dvs%d gpio\n", i);
10230     continue;
10231     }
10232    
10233     diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
10234     index c6bfb3496684..b99780574044 100644
10235     --- a/drivers/remoteproc/remoteproc_core.c
10236     +++ b/drivers/remoteproc/remoteproc_core.c
10237     @@ -1488,7 +1488,7 @@ static int __init remoteproc_init(void)
10238    
10239     return 0;
10240     }
10241     -module_init(remoteproc_init);
10242     +subsys_initcall(remoteproc_init);
10243    
10244     static void __exit remoteproc_exit(void)
10245     {
10246     diff --git a/drivers/scsi/aic7xxx/aic7xxx_core.c b/drivers/scsi/aic7xxx/aic7xxx_core.c
10247     index 64ab9eaec428..def3208dd290 100644
10248     --- a/drivers/scsi/aic7xxx/aic7xxx_core.c
10249     +++ b/drivers/scsi/aic7xxx/aic7xxx_core.c
10250     @@ -2321,7 +2321,7 @@ ahc_find_syncrate(struct ahc_softc *ahc, u_int *period,
10251     * At some speeds, we only support
10252     * ST transfers.
10253     */
10254     - if ((syncrate->sxfr_u2 & ST_SXFR) != 0)
10255     + if ((syncrate->sxfr_u2 & ST_SXFR) != 0)
10256     *ppr_options &= ~MSG_EXT_PPR_DT_REQ;
10257     break;
10258     }
10259     diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
10260     index d60564397be5..60c3e2bf8761 100644
10261     --- a/drivers/scsi/iscsi_tcp.c
10262     +++ b/drivers/scsi/iscsi_tcp.c
10263     @@ -882,6 +882,10 @@ free_host:
10264     static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session)
10265     {
10266     struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
10267     + struct iscsi_session *session = cls_session->dd_data;
10268     +
10269     + if (WARN_ON_ONCE(session->leadconn))
10270     + return;
10271    
10272     iscsi_tcp_r2tpool_free(cls_session->dd_data);
10273     iscsi_session_teardown(cls_session);
10274     diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
10275     index e730aabc26d0..65bbca715f57 100644
10276     --- a/drivers/scsi/qla2xxx/qla_os.c
10277     +++ b/drivers/scsi/qla2xxx/qla_os.c
10278     @@ -451,6 +451,12 @@ static int qla25xx_setup_mode(struct scsi_qla_host *vha)
10279     goto fail;
10280     }
10281     if (ql2xmultique_tag) {
10282     + ha->wq = alloc_workqueue("qla2xxx_wq", WQ_MEM_RECLAIM, 1);
10283     + if (unlikely(!ha->wq)) {
10284     + ql_log(ql_log_warn, vha, 0x01e0,
10285     + "Failed to alloc workqueue.\n");
10286     + goto fail;
10287     + }
10288     /* create a request queue for IO */
10289     options |= BIT_7;
10290     req = qla25xx_create_req_que(ha, options, 0, 0, -1,
10291     @@ -458,9 +464,8 @@ static int qla25xx_setup_mode(struct scsi_qla_host *vha)
10292     if (!req) {
10293     ql_log(ql_log_warn, vha, 0x00e0,
10294     "Failed to create request queue.\n");
10295     - goto fail;
10296     + goto fail2;
10297     }
10298     - ha->wq = alloc_workqueue("qla2xxx_wq", WQ_MEM_RECLAIM, 1);
10299     vha->req = ha->req_q_map[req];
10300     options |= BIT_1;
10301     for (ques = 1; ques < ha->max_rsp_queues; ques++) {
10302     @@ -468,7 +473,7 @@ static int qla25xx_setup_mode(struct scsi_qla_host *vha)
10303     if (!ret) {
10304     ql_log(ql_log_warn, vha, 0x00e8,
10305     "Failed to create response queue.\n");
10306     - goto fail2;
10307     + goto fail3;
10308     }
10309     }
10310     ha->flags.cpu_affinity_enabled = 1;
10311     @@ -482,11 +487,13 @@ static int qla25xx_setup_mode(struct scsi_qla_host *vha)
10312     ha->max_rsp_queues, ha->max_req_queues);
10313     }
10314     return 0;
10315     -fail2:
10316     +
10317     +fail3:
10318     qla25xx_delete_queues(vha);
10319     - destroy_workqueue(ha->wq);
10320     - ha->wq = NULL;
10321     vha->req = ha->req_q_map[0];
10322     +fail2:
10323     + destroy_workqueue(ha->wq);
10324     + ha->wq = NULL;
10325     fail:
10326     ha->mqenable = 0;
10327     kfree(ha->req_q_map);
10328     diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
10329     index ab7bc4e63425..fff9c4d0f7c8 100644
10330     --- a/drivers/scsi/scsi_transport_iscsi.c
10331     +++ b/drivers/scsi/scsi_transport_iscsi.c
10332     @@ -2964,6 +2964,24 @@ iscsi_set_path(struct iscsi_transport *transport, struct iscsi_uevent *ev)
10333     return err;
10334     }
10335    
10336     +static int iscsi_session_has_conns(int sid)
10337     +{
10338     + struct iscsi_cls_conn *conn;
10339     + unsigned long flags;
10340     + int found = 0;
10341     +
10342     + spin_lock_irqsave(&connlock, flags);
10343     + list_for_each_entry(conn, &connlist, conn_list) {
10344     + if (iscsi_conn_get_sid(conn) == sid) {
10345     + found = 1;
10346     + break;
10347     + }
10348     + }
10349     + spin_unlock_irqrestore(&connlock, flags);
10350     +
10351     + return found;
10352     +}
10353     +
10354     static int
10355     iscsi_set_iface_params(struct iscsi_transport *transport,
10356     struct iscsi_uevent *ev, uint32_t len)
10357     @@ -3538,10 +3556,12 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
10358     break;
10359     case ISCSI_UEVENT_DESTROY_SESSION:
10360     session = iscsi_session_lookup(ev->u.d_session.sid);
10361     - if (session)
10362     - transport->destroy_session(session);
10363     - else
10364     + if (!session)
10365     err = -EINVAL;
10366     + else if (iscsi_session_has_conns(ev->u.d_session.sid))
10367     + err = -EBUSY;
10368     + else
10369     + transport->destroy_session(session);
10370     break;
10371     case ISCSI_UEVENT_UNBIND_SESSION:
10372     session = iscsi_session_lookup(ev->u.d_session.sid);
10373     diff --git a/drivers/soc/tegra/fuse/tegra-apbmisc.c b/drivers/soc/tegra/fuse/tegra-apbmisc.c
10374     index 5b18f6ffa45c..cd61c883c19f 100644
10375     --- a/drivers/soc/tegra/fuse/tegra-apbmisc.c
10376     +++ b/drivers/soc/tegra/fuse/tegra-apbmisc.c
10377     @@ -134,7 +134,7 @@ void __init tegra_init_apbmisc(void)
10378     apbmisc.flags = IORESOURCE_MEM;
10379    
10380     /* strapping options */
10381     - if (tegra_get_chip_id() == TEGRA124) {
10382     + if (of_machine_is_compatible("nvidia,tegra124")) {
10383     straps.start = 0x7000e864;
10384     straps.end = 0x7000e867;
10385     } else {
10386     diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
10387     index c6314d1552ea..99fd4f53c856 100644
10388     --- a/drivers/staging/android/ashmem.c
10389     +++ b/drivers/staging/android/ashmem.c
10390     @@ -370,8 +370,23 @@ static inline vm_flags_t calc_vm_may_flags(unsigned long prot)
10391     _calc_vm_trans(prot, PROT_EXEC, VM_MAYEXEC);
10392     }
10393    
10394     +static int ashmem_vmfile_mmap(struct file *file, struct vm_area_struct *vma)
10395     +{
10396     + /* do not allow to mmap ashmem backing shmem file directly */
10397     + return -EPERM;
10398     +}
10399     +
10400     +static unsigned long
10401     +ashmem_vmfile_get_unmapped_area(struct file *file, unsigned long addr,
10402     + unsigned long len, unsigned long pgoff,
10403     + unsigned long flags)
10404     +{
10405     + return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
10406     +}
10407     +
10408     static int ashmem_mmap(struct file *file, struct vm_area_struct *vma)
10409     {
10410     + static struct file_operations vmfile_fops;
10411     struct ashmem_area *asma = file->private_data;
10412     int ret = 0;
10413    
10414     @@ -412,6 +427,19 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma)
10415     }
10416     vmfile->f_mode |= FMODE_LSEEK;
10417     asma->file = vmfile;
10418     + /*
10419     + * override mmap operation of the vmfile so that it can't be
10420     + * remapped which would lead to creation of a new vma with no
10421     + * asma permission checks. Have to override get_unmapped_area
10422     + * as well to prevent VM_BUG_ON check for f_ops modification.
10423     + */
10424     + if (!vmfile_fops.mmap) {
10425     + vmfile_fops = *vmfile->f_op;
10426     + vmfile_fops.mmap = ashmem_vmfile_mmap;
10427     + vmfile_fops.get_unmapped_area =
10428     + ashmem_vmfile_get_unmapped_area;
10429     + }
10430     + vmfile->f_op = &vmfile_fops;
10431     }
10432     get_file(asma->file);
10433    
10434     diff --git a/drivers/staging/greybus/audio_manager.c b/drivers/staging/greybus/audio_manager.c
10435     index aa6508b44fab..ed7c32542cb3 100644
10436     --- a/drivers/staging/greybus/audio_manager.c
10437     +++ b/drivers/staging/greybus/audio_manager.c
10438     @@ -90,8 +90,8 @@ void gb_audio_manager_remove_all(void)
10439    
10440     list_for_each_entry_safe(module, next, &modules_list, list) {
10441     list_del(&module->list);
10442     - kobject_put(&module->kobj);
10443     ida_simple_remove(&module_id, module->id);
10444     + kobject_put(&module->kobj);
10445     }
10446    
10447     is_empty = list_empty(&modules_list);
10448     diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
10449     index c7bf8ab26192..50793c9df1b3 100644
10450     --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
10451     +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
10452     @@ -2052,7 +2052,7 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p)
10453     struct ieee_param *param;
10454     uint ret = 0;
10455    
10456     - if (p->length < sizeof(struct ieee_param) || !p->pointer) {
10457     + if (!p->pointer || p->length != sizeof(struct ieee_param)) {
10458     ret = -EINVAL;
10459     goto out;
10460     }
10461     @@ -2859,7 +2859,7 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p)
10462     goto out;
10463     }
10464    
10465     - if (!p->pointer) {
10466     + if (!p->pointer || p->length != sizeof(struct ieee_param)) {
10467     ret = -EINVAL;
10468     goto out;
10469     }
10470     diff --git a/drivers/staging/vt6656/dpc.c b/drivers/staging/vt6656/dpc.c
10471     index 655f0002f880..7b73fa2f8834 100644
10472     --- a/drivers/staging/vt6656/dpc.c
10473     +++ b/drivers/staging/vt6656/dpc.c
10474     @@ -140,7 +140,7 @@ int vnt_rx_data(struct vnt_private *priv, struct vnt_rcb *ptr_rcb,
10475    
10476     vnt_rf_rssi_to_dbm(priv, *rssi, &rx_dbm);
10477    
10478     - priv->bb_pre_ed_rssi = (u8)rx_dbm + 1;
10479     + priv->bb_pre_ed_rssi = (u8)-rx_dbm + 1;
10480     priv->current_rssi = priv->bb_pre_ed_rssi;
10481    
10482     frame = skb_data + 8;
10483     diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
10484     index 9636d8744347..b6c4f55f79e7 100644
10485     --- a/drivers/target/iscsi/iscsi_target.c
10486     +++ b/drivers/target/iscsi/iscsi_target.c
10487     @@ -1168,9 +1168,7 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
10488     hdr->cmdsn, be32_to_cpu(hdr->data_length), payload_length,
10489     conn->cid);
10490    
10491     - if (target_get_sess_cmd(&cmd->se_cmd, true) < 0)
10492     - return iscsit_add_reject_cmd(cmd,
10493     - ISCSI_REASON_WAITING_FOR_LOGOUT, buf);
10494     + target_get_sess_cmd(&cmd->se_cmd, true);
10495    
10496     cmd->sense_reason = transport_lookup_cmd_lun(&cmd->se_cmd,
10497     scsilun_to_int(&hdr->lun));
10498     @@ -1988,9 +1986,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
10499     conn->sess->se_sess, 0, DMA_NONE,
10500     TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
10501    
10502     - if (target_get_sess_cmd(&cmd->se_cmd, true) < 0)
10503     - return iscsit_add_reject_cmd(cmd,
10504     - ISCSI_REASON_WAITING_FOR_LOGOUT, buf);
10505     + target_get_sess_cmd(&cmd->se_cmd, true);
10506    
10507     /*
10508     * TASK_REASSIGN for ERL=2 / connection stays inside of
10509     @@ -4162,6 +4158,9 @@ int iscsit_close_connection(
10510     iscsit_stop_nopin_response_timer(conn);
10511     iscsit_stop_nopin_timer(conn);
10512    
10513     + if (conn->conn_transport->iscsit_wait_conn)
10514     + conn->conn_transport->iscsit_wait_conn(conn);
10515     +
10516     /*
10517     * During Connection recovery drop unacknowledged out of order
10518     * commands for this connection, and prepare the other commands
10519     @@ -4244,11 +4243,6 @@ int iscsit_close_connection(
10520     * must wait until they have completed.
10521     */
10522     iscsit_check_conn_usage_count(conn);
10523     - target_sess_cmd_list_set_waiting(sess->se_sess);
10524     - target_wait_for_sess_cmds(sess->se_sess);
10525     -
10526     - if (conn->conn_transport->iscsit_wait_conn)
10527     - conn->conn_transport->iscsit_wait_conn(conn);
10528    
10529     ahash_request_free(conn->conn_tx_hash);
10530     if (conn->conn_rx_hash) {
10531     diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
10532     index 325f9db2da86..4a7eb85f7c85 100644
10533     --- a/drivers/tty/serial/atmel_serial.c
10534     +++ b/drivers/tty/serial/atmel_serial.c
10535     @@ -501,7 +501,8 @@ static void atmel_stop_tx(struct uart_port *port)
10536     atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask);
10537    
10538     if (atmel_uart_is_half_duplex(port))
10539     - atmel_start_rx(port);
10540     + if (!atomic_read(&atmel_port->tasklet_shutdown))
10541     + atmel_start_rx(port);
10542    
10543     }
10544    
10545     diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
10546     index e75bd8d7e6f6..325c38c9b451 100644
10547     --- a/drivers/tty/serial/imx.c
10548     +++ b/drivers/tty/serial/imx.c
10549     @@ -532,7 +532,7 @@ static void imx_dma_tx(struct imx_port *sport)
10550    
10551     sport->tx_bytes = uart_circ_chars_pending(xmit);
10552    
10553     - if (xmit->tail < xmit->head) {
10554     + if (xmit->tail < xmit->head || xmit->head == 0) {
10555     sport->dma_tx_nents = 1;
10556     sg_init_one(sgl, xmit->buf + xmit->tail, sport->tx_bytes);
10557     } else {
10558     diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
10559     index e645ee1cfd98..7446ce29f677 100644
10560     --- a/drivers/tty/synclink_gt.c
10561     +++ b/drivers/tty/synclink_gt.c
10562     @@ -1349,10 +1349,10 @@ static void throttle(struct tty_struct * tty)
10563     DBGINFO(("%s throttle\n", info->device_name));
10564     if (I_IXOFF(tty))
10565     send_xchar(tty, STOP_CHAR(tty));
10566     - if (C_CRTSCTS(tty)) {
10567     + if (C_CRTSCTS(tty)) {
10568     spin_lock_irqsave(&info->lock,flags);
10569     info->signals &= ~SerialSignal_RTS;
10570     - set_signals(info);
10571     + set_signals(info);
10572     spin_unlock_irqrestore(&info->lock,flags);
10573     }
10574     }
10575     @@ -1374,10 +1374,10 @@ static void unthrottle(struct tty_struct * tty)
10576     else
10577     send_xchar(tty, START_CHAR(tty));
10578     }
10579     - if (C_CRTSCTS(tty)) {
10580     + if (C_CRTSCTS(tty)) {
10581     spin_lock_irqsave(&info->lock,flags);
10582     info->signals |= SerialSignal_RTS;
10583     - set_signals(info);
10584     + set_signals(info);
10585     spin_unlock_irqrestore(&info->lock,flags);
10586     }
10587     }
10588     @@ -2576,8 +2576,8 @@ static void change_params(struct slgt_info *info)
10589     info->read_status_mask = IRQ_RXOVER;
10590     if (I_INPCK(info->port.tty))
10591     info->read_status_mask |= MASK_PARITY | MASK_FRAMING;
10592     - if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty))
10593     - info->read_status_mask |= MASK_BREAK;
10594     + if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty))
10595     + info->read_status_mask |= MASK_BREAK;
10596     if (I_IGNPAR(info->port.tty))
10597     info->ignore_status_mask |= MASK_PARITY | MASK_FRAMING;
10598     if (I_IGNBRK(info->port.tty)) {
10599     @@ -3208,7 +3208,7 @@ static int tiocmset(struct tty_struct *tty,
10600     info->signals &= ~SerialSignal_DTR;
10601    
10602     spin_lock_irqsave(&info->lock,flags);
10603     - set_signals(info);
10604     + set_signals(info);
10605     spin_unlock_irqrestore(&info->lock,flags);
10606     return 0;
10607     }
10608     @@ -3219,7 +3219,7 @@ static int carrier_raised(struct tty_port *port)
10609     struct slgt_info *info = container_of(port, struct slgt_info, port);
10610    
10611     spin_lock_irqsave(&info->lock,flags);
10612     - get_signals(info);
10613     + get_signals(info);
10614     spin_unlock_irqrestore(&info->lock,flags);
10615     return (info->signals & SerialSignal_DCD) ? 1 : 0;
10616     }
10617     @@ -3234,7 +3234,7 @@ static void dtr_rts(struct tty_port *port, int on)
10618     info->signals |= SerialSignal_RTS | SerialSignal_DTR;
10619     else
10620     info->signals &= ~(SerialSignal_RTS | SerialSignal_DTR);
10621     - set_signals(info);
10622     + set_signals(info);
10623     spin_unlock_irqrestore(&info->lock,flags);
10624     }
10625    
10626     diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c
10627     index dec156586de1..2f6df8d74b4a 100644
10628     --- a/drivers/tty/synclinkmp.c
10629     +++ b/drivers/tty/synclinkmp.c
10630     @@ -1467,10 +1467,10 @@ static void throttle(struct tty_struct * tty)
10631     if (I_IXOFF(tty))
10632     send_xchar(tty, STOP_CHAR(tty));
10633    
10634     - if (C_CRTSCTS(tty)) {
10635     + if (C_CRTSCTS(tty)) {
10636     spin_lock_irqsave(&info->lock,flags);
10637     info->serial_signals &= ~SerialSignal_RTS;
10638     - set_signals(info);
10639     + set_signals(info);
10640     spin_unlock_irqrestore(&info->lock,flags);
10641     }
10642     }
10643     @@ -1496,10 +1496,10 @@ static void unthrottle(struct tty_struct * tty)
10644     send_xchar(tty, START_CHAR(tty));
10645     }
10646    
10647     - if (C_CRTSCTS(tty)) {
10648     + if (C_CRTSCTS(tty)) {
10649     spin_lock_irqsave(&info->lock,flags);
10650     info->serial_signals |= SerialSignal_RTS;
10651     - set_signals(info);
10652     + set_signals(info);
10653     spin_unlock_irqrestore(&info->lock,flags);
10654     }
10655     }
10656     @@ -2485,7 +2485,7 @@ static void isr_io_pin( SLMP_INFO *info, u16 status )
10657     if (status & SerialSignal_CTS) {
10658     if ( debug_level >= DEBUG_LEVEL_ISR )
10659     printk("CTS tx start...");
10660     - info->port.tty->hw_stopped = 0;
10661     + info->port.tty->hw_stopped = 0;
10662     tx_start(info);
10663     info->pending_bh |= BH_TRANSMIT;
10664     return;
10665     @@ -2494,7 +2494,7 @@ static void isr_io_pin( SLMP_INFO *info, u16 status )
10666     if (!(status & SerialSignal_CTS)) {
10667     if ( debug_level >= DEBUG_LEVEL_ISR )
10668     printk("CTS tx stop...");
10669     - info->port.tty->hw_stopped = 1;
10670     + info->port.tty->hw_stopped = 1;
10671     tx_stop(info);
10672     }
10673     }
10674     @@ -2821,8 +2821,8 @@ static void change_params(SLMP_INFO *info)
10675     info->read_status_mask2 = OVRN;
10676     if (I_INPCK(info->port.tty))
10677     info->read_status_mask2 |= PE | FRME;
10678     - if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty))
10679     - info->read_status_mask1 |= BRKD;
10680     + if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty))
10681     + info->read_status_mask1 |= BRKD;
10682     if (I_IGNPAR(info->port.tty))
10683     info->ignore_status_mask2 |= PE | FRME;
10684     if (I_IGNBRK(info->port.tty)) {
10685     @@ -3192,7 +3192,7 @@ static int tiocmget(struct tty_struct *tty)
10686     unsigned long flags;
10687    
10688     spin_lock_irqsave(&info->lock,flags);
10689     - get_signals(info);
10690     + get_signals(info);
10691     spin_unlock_irqrestore(&info->lock,flags);
10692    
10693     result = ((info->serial_signals & SerialSignal_RTS) ? TIOCM_RTS : 0) |
10694     @@ -3230,7 +3230,7 @@ static int tiocmset(struct tty_struct *tty,
10695     info->serial_signals &= ~SerialSignal_DTR;
10696    
10697     spin_lock_irqsave(&info->lock,flags);
10698     - set_signals(info);
10699     + set_signals(info);
10700     spin_unlock_irqrestore(&info->lock,flags);
10701    
10702     return 0;
10703     @@ -3242,7 +3242,7 @@ static int carrier_raised(struct tty_port *port)
10704     unsigned long flags;
10705    
10706     spin_lock_irqsave(&info->lock,flags);
10707     - get_signals(info);
10708     + get_signals(info);
10709     spin_unlock_irqrestore(&info->lock,flags);
10710    
10711     return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
10712     @@ -3258,7 +3258,7 @@ static void dtr_rts(struct tty_port *port, int on)
10713     info->serial_signals |= SerialSignal_RTS | SerialSignal_DTR;
10714     else
10715     info->serial_signals &= ~(SerialSignal_RTS | SerialSignal_DTR);
10716     - set_signals(info);
10717     + set_signals(info);
10718     spin_unlock_irqrestore(&info->lock,flags);
10719     }
10720    
10721     diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c
10722     index 368ce1803e8f..6ac05021c4a7 100644
10723     --- a/drivers/tty/vt/selection.c
10724     +++ b/drivers/tty/vt/selection.c
10725     @@ -341,6 +341,7 @@ int paste_selection(struct tty_struct *tty)
10726     unsigned int count;
10727     struct tty_ldisc *ld;
10728     DECLARE_WAITQUEUE(wait, current);
10729     + int ret = 0;
10730    
10731     console_lock();
10732     poke_blanked_console();
10733     @@ -354,6 +355,10 @@ int paste_selection(struct tty_struct *tty)
10734     add_wait_queue(&vc->paste_wait, &wait);
10735     while (sel_buffer && sel_buffer_lth > pasted) {
10736     set_current_state(TASK_INTERRUPTIBLE);
10737     + if (signal_pending(current)) {
10738     + ret = -EINTR;
10739     + break;
10740     + }
10741     if (tty_throttled(tty)) {
10742     schedule();
10743     continue;
10744     @@ -369,5 +374,5 @@ int paste_selection(struct tty_struct *tty)
10745    
10746     tty_buffer_unlock_exclusive(&vc->port);
10747     tty_ldisc_deref(ld);
10748     - return 0;
10749     + return ret;
10750     }
10751     diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
10752     index 638eb9bbd59f..e8efb270dc8f 100644
10753     --- a/drivers/tty/vt/vt_ioctl.c
10754     +++ b/drivers/tty/vt/vt_ioctl.c
10755     @@ -850,58 +850,49 @@ int vt_ioctl(struct tty_struct *tty,
10756    
10757     case VT_RESIZEX:
10758     {
10759     - struct vt_consize __user *vtconsize = up;
10760     - ushort ll,cc,vlin,clin,vcol,ccol;
10761     + struct vt_consize v;
10762     if (!perm)
10763     return -EPERM;
10764     - if (!access_ok(VERIFY_READ, vtconsize,
10765     - sizeof(struct vt_consize))) {
10766     - ret = -EFAULT;
10767     - break;
10768     - }
10769     + if (copy_from_user(&v, up, sizeof(struct vt_consize)))
10770     + return -EFAULT;
10771     /* FIXME: Should check the copies properly */
10772     - __get_user(ll, &vtconsize->v_rows);
10773     - __get_user(cc, &vtconsize->v_cols);
10774     - __get_user(vlin, &vtconsize->v_vlin);
10775     - __get_user(clin, &vtconsize->v_clin);
10776     - __get_user(vcol, &vtconsize->v_vcol);
10777     - __get_user(ccol, &vtconsize->v_ccol);
10778     - vlin = vlin ? vlin : vc->vc_scan_lines;
10779     - if (clin) {
10780     - if (ll) {
10781     - if (ll != vlin/clin) {
10782     - /* Parameters don't add up */
10783     - ret = -EINVAL;
10784     - break;
10785     - }
10786     - } else
10787     - ll = vlin/clin;
10788     + if (!v.v_vlin)
10789     + v.v_vlin = vc->vc_scan_lines;
10790     + if (v.v_clin) {
10791     + int rows = v.v_vlin/v.v_clin;
10792     + if (v.v_rows != rows) {
10793     + if (v.v_rows) /* Parameters don't add up */
10794     + return -EINVAL;
10795     + v.v_rows = rows;
10796     + }
10797     }
10798     - if (vcol && ccol) {
10799     - if (cc) {
10800     - if (cc != vcol/ccol) {
10801     - ret = -EINVAL;
10802     - break;
10803     - }
10804     - } else
10805     - cc = vcol/ccol;
10806     + if (v.v_vcol && v.v_ccol) {
10807     + int cols = v.v_vcol/v.v_ccol;
10808     + if (v.v_cols != cols) {
10809     + if (v.v_cols)
10810     + return -EINVAL;
10811     + v.v_cols = cols;
10812     + }
10813     }
10814    
10815     - if (clin > 32) {
10816     - ret = -EINVAL;
10817     - break;
10818     - }
10819     -
10820     + if (v.v_clin > 32)
10821     + return -EINVAL;
10822     +
10823     for (i = 0; i < MAX_NR_CONSOLES; i++) {
10824     + struct vc_data *vcp;
10825     +
10826     if (!vc_cons[i].d)
10827     continue;
10828     console_lock();
10829     - if (vlin)
10830     - vc_cons[i].d->vc_scan_lines = vlin;
10831     - if (clin)
10832     - vc_cons[i].d->vc_font.height = clin;
10833     - vc_cons[i].d->vc_resize_user = 1;
10834     - vc_resize(vc_cons[i].d, cc, ll);
10835     + vcp = vc_cons[i].d;
10836     + if (vcp) {
10837     + if (v.v_vlin)
10838     + vcp->vc_scan_lines = v.v_vlin;
10839     + if (v.v_clin)
10840     + vcp->vc_font.height = v.v_clin;
10841     + vcp->vc_resize_user = 1;
10842     + vc_resize(vcp, v.v_cols, v.v_rows);
10843     + }
10844     console_unlock();
10845     }
10846     break;
10847     diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c
10848     index e1134a4d97f3..a00b4aee6c79 100644
10849     --- a/drivers/uio/uio_dmem_genirq.c
10850     +++ b/drivers/uio/uio_dmem_genirq.c
10851     @@ -135,11 +135,13 @@ static int uio_dmem_genirq_irqcontrol(struct uio_info *dev_info, s32 irq_on)
10852     if (irq_on) {
10853     if (test_and_clear_bit(0, &priv->flags))
10854     enable_irq(dev_info->irq);
10855     + spin_unlock_irqrestore(&priv->lock, flags);
10856     } else {
10857     - if (!test_and_set_bit(0, &priv->flags))
10858     + if (!test_and_set_bit(0, &priv->flags)) {
10859     + spin_unlock_irqrestore(&priv->lock, flags);
10860     disable_irq(dev_info->irq);
10861     + }
10862     }
10863     - spin_unlock_irqrestore(&priv->lock, flags);
10864    
10865     return 0;
10866     }
10867     diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
10868     index 9f05f9a81f69..3fcc3e74ae2e 100644
10869     --- a/drivers/usb/core/hub.c
10870     +++ b/drivers/usb/core/hub.c
10871     @@ -1187,11 +1187,6 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
10872     #ifdef CONFIG_PM
10873     udev->reset_resume = 1;
10874     #endif
10875     - /* Don't set the change_bits when the device
10876     - * was powered off.
10877     - */
10878     - if (test_bit(port1, hub->power_bits))
10879     - set_bit(port1, hub->change_bits);
10880    
10881     } else {
10882     /* The power session is gone; tell hub_wq */
10883     diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
10884     index 19e819aa2419..ad8307140df8 100644
10885     --- a/drivers/usb/core/quirks.c
10886     +++ b/drivers/usb/core/quirks.c
10887     @@ -291,6 +291,9 @@ static const struct usb_device_id usb_quirk_list[] = {
10888     /* INTEL VALUE SSD */
10889     { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
10890    
10891     + /* novation SoundControl XL */
10892     + { USB_DEVICE(0x1235, 0x0061), .driver_info = USB_QUIRK_RESET_RESUME },
10893     +
10894     { } /* terminating entry must be last */
10895     };
10896    
10897     diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
10898     index 854c4ec0af2c..4d7df2f6caf5 100644
10899     --- a/drivers/usb/gadget/composite.c
10900     +++ b/drivers/usb/gadget/composite.c
10901     @@ -437,12 +437,10 @@ static u8 encode_bMaxPower(enum usb_device_speed speed,
10902     val = CONFIG_USB_GADGET_VBUS_DRAW;
10903     if (!val)
10904     return 0;
10905     - switch (speed) {
10906     - case USB_SPEED_SUPER:
10907     - return DIV_ROUND_UP(val, 8);
10908     - default:
10909     + if (speed < USB_SPEED_SUPER)
10910     return DIV_ROUND_UP(val, 2);
10911     - }
10912     + else
10913     + return DIV_ROUND_UP(val, 8);
10914     }
10915    
10916     static int config_buf(struct usb_configuration *config,
10917     diff --git a/drivers/usb/gadget/udc/gr_udc.c b/drivers/usb/gadget/udc/gr_udc.c
10918     index 39b7136d31d9..9e246d2e55ca 100644
10919     --- a/drivers/usb/gadget/udc/gr_udc.c
10920     +++ b/drivers/usb/gadget/udc/gr_udc.c
10921     @@ -2200,8 +2200,6 @@ static int gr_probe(struct platform_device *pdev)
10922     return -ENOMEM;
10923     }
10924    
10925     - spin_lock(&dev->lock);
10926     -
10927     /* Inside lock so that no gadget can use this udc until probe is done */
10928     retval = usb_add_gadget_udc(dev->dev, &dev->gadget);
10929     if (retval) {
10930     @@ -2210,15 +2208,21 @@ static int gr_probe(struct platform_device *pdev)
10931     }
10932     dev->added = 1;
10933    
10934     + spin_lock(&dev->lock);
10935     +
10936     retval = gr_udc_init(dev);
10937     - if (retval)
10938     + if (retval) {
10939     + spin_unlock(&dev->lock);
10940     goto out;
10941     -
10942     - gr_dfs_create(dev);
10943     + }
10944    
10945     /* Clear all interrupt enables that might be left on since last boot */
10946     gr_disable_interrupts_and_pullup(dev);
10947    
10948     + spin_unlock(&dev->lock);
10949     +
10950     + gr_dfs_create(dev);
10951     +
10952     retval = gr_request_irq(dev, dev->irq);
10953     if (retval) {
10954     dev_err(dev->dev, "Failed to request irq %d\n", dev->irq);
10955     @@ -2247,8 +2251,6 @@ static int gr_probe(struct platform_device *pdev)
10956     dev_info(dev->dev, "regs: %p, irq %d\n", dev->regs, dev->irq);
10957    
10958     out:
10959     - spin_unlock(&dev->lock);
10960     -
10961     if (retval)
10962     gr_remove(pdev);
10963    
10964     diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
10965     index aad64a26a767..3cca60b845a8 100644
10966     --- a/drivers/usb/host/xhci-mem.c
10967     +++ b/drivers/usb/host/xhci-mem.c
10968     @@ -1532,9 +1532,15 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
10969     /* Allow 3 retries for everything but isoc, set CErr = 3 */
10970     if (!usb_endpoint_xfer_isoc(&ep->desc))
10971     err_count = 3;
10972     - /* Some devices get this wrong */
10973     - if (usb_endpoint_xfer_bulk(&ep->desc) && udev->speed == USB_SPEED_HIGH)
10974     - max_packet = 512;
10975     + /* HS bulk max packet should be 512, FS bulk supports 8, 16, 32 or 64 */
10976     + if (usb_endpoint_xfer_bulk(&ep->desc)) {
10977     + if (udev->speed == USB_SPEED_HIGH)
10978     + max_packet = 512;
10979     + if (udev->speed == USB_SPEED_FULL) {
10980     + max_packet = rounddown_pow_of_two(max_packet);
10981     + max_packet = clamp_val(max_packet, 8, 64);
10982     + }
10983     + }
10984     /* xHCI 1.0 and 1.1 indicates that ctrl ep avg TRB Length should be 8 */
10985     if (usb_endpoint_xfer_control(&ep->desc) && xhci->hci_version >= 0x100)
10986     avg_trb_len = 8;
10987     diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
10988     index aec6b20262e9..4355fbc36fce 100644
10989     --- a/drivers/usb/host/xhci-pci.c
10990     +++ b/drivers/usb/host/xhci-pci.c
10991     @@ -53,6 +53,7 @@
10992     #define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI 0x1aa8
10993     #define PCI_DEVICE_ID_INTEL_APL_XHCI 0x5aa8
10994     #define PCI_DEVICE_ID_INTEL_DNV_XHCI 0x19d0
10995     +#define PCI_DEVICE_ID_INTEL_CML_XHCI 0xa3af
10996    
10997     #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142
10998    
10999     @@ -170,7 +171,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
11000     pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI ||
11001     pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI ||
11002     pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI ||
11003     - pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) {
11004     + pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI ||
11005     + pdev->device == PCI_DEVICE_ID_INTEL_CML_XHCI)) {
11006     xhci->quirks |= XHCI_PME_STUCK_QUIRK;
11007     }
11008     if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
11009     diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
11010     index e8be8e39ab8f..457ad33f4caa 100644
11011     --- a/drivers/usb/musb/omap2430.c
11012     +++ b/drivers/usb/musb/omap2430.c
11013     @@ -388,8 +388,6 @@ static const struct musb_platform_ops omap2430_ops = {
11014     .init = omap2430_musb_init,
11015     .exit = omap2430_musb_exit,
11016    
11017     - .set_vbus = omap2430_musb_set_vbus,
11018     -
11019     .enable = omap2430_musb_enable,
11020     .disable = omap2430_musb_disable,
11021    
11022     diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
11023     index a6999042e7ad..d022b5ff4cd0 100644
11024     --- a/drivers/usb/storage/uas.c
11025     +++ b/drivers/usb/storage/uas.c
11026     @@ -46,6 +46,7 @@ struct uas_dev_info {
11027     struct scsi_cmnd *cmnd[MAX_CMNDS];
11028     spinlock_t lock;
11029     struct work_struct work;
11030     + struct work_struct scan_work; /* for async scanning */
11031     };
11032    
11033     enum {
11034     @@ -115,6 +116,17 @@ out:
11035     spin_unlock_irqrestore(&devinfo->lock, flags);
11036     }
11037    
11038     +static void uas_scan_work(struct work_struct *work)
11039     +{
11040     + struct uas_dev_info *devinfo =
11041     + container_of(work, struct uas_dev_info, scan_work);
11042     + struct Scsi_Host *shost = usb_get_intfdata(devinfo->intf);
11043     +
11044     + dev_dbg(&devinfo->intf->dev, "starting scan\n");
11045     + scsi_scan_host(shost);
11046     + dev_dbg(&devinfo->intf->dev, "scan complete\n");
11047     +}
11048     +
11049     static void uas_add_work(struct uas_cmd_info *cmdinfo)
11050     {
11051     struct scsi_pointer *scp = (void *)cmdinfo;
11052     @@ -989,6 +1001,7 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id)
11053     init_usb_anchor(&devinfo->data_urbs);
11054     spin_lock_init(&devinfo->lock);
11055     INIT_WORK(&devinfo->work, uas_do_work);
11056     + INIT_WORK(&devinfo->scan_work, uas_scan_work);
11057    
11058     result = uas_configure_endpoints(devinfo);
11059     if (result)
11060     @@ -1005,7 +1018,9 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id)
11061     if (result)
11062     goto free_streams;
11063    
11064     - scsi_scan_host(shost);
11065     + /* Submit the delayed_work for SCSI-device scanning */
11066     + schedule_work(&devinfo->scan_work);
11067     +
11068     return result;
11069    
11070     free_streams:
11071     @@ -1173,6 +1188,12 @@ static void uas_disconnect(struct usb_interface *intf)
11072     usb_kill_anchored_urbs(&devinfo->data_urbs);
11073     uas_zap_pending(devinfo, DID_NO_CONNECT);
11074    
11075     + /*
11076     + * Prevent SCSI scanning (if it hasn't started yet)
11077     + * or wait for the SCSI-scanning routine to stop.
11078     + */
11079     + cancel_work_sync(&devinfo->scan_work);
11080     +
11081     scsi_remove_host(shost);
11082     uas_free_streams(devinfo);
11083     scsi_host_put(shost);
11084     diff --git a/drivers/video/fbdev/pxa168fb.c b/drivers/video/fbdev/pxa168fb.c
11085     index d059d04c63ac..20195d3dbf08 100644
11086     --- a/drivers/video/fbdev/pxa168fb.c
11087     +++ b/drivers/video/fbdev/pxa168fb.c
11088     @@ -769,8 +769,8 @@ failed_free_cmap:
11089     failed_free_clk:
11090     clk_disable_unprepare(fbi->clk);
11091     failed_free_fbmem:
11092     - dma_free_coherent(fbi->dev, info->fix.smem_len,
11093     - info->screen_base, fbi->fb_start_dma);
11094     + dma_free_wc(fbi->dev, info->fix.smem_len,
11095     + info->screen_base, fbi->fb_start_dma);
11096     failed_free_info:
11097     kfree(info);
11098    
11099     @@ -804,7 +804,7 @@ static int pxa168fb_remove(struct platform_device *pdev)
11100    
11101     irq = platform_get_irq(pdev, 0);
11102    
11103     - dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
11104     + dma_free_wc(fbi->dev, info->fix.smem_len,
11105     info->screen_base, info->fix.smem_start);
11106    
11107     clk_disable_unprepare(fbi->clk);
11108     diff --git a/drivers/vme/bridges/vme_fake.c b/drivers/vme/bridges/vme_fake.c
11109     index 30b3acc93833..e81ec763b555 100644
11110     --- a/drivers/vme/bridges/vme_fake.c
11111     +++ b/drivers/vme/bridges/vme_fake.c
11112     @@ -418,8 +418,9 @@ static void fake_lm_check(struct fake_driver *bridge, unsigned long long addr,
11113     }
11114     }
11115    
11116     -static u8 fake_vmeread8(struct fake_driver *bridge, unsigned long long addr,
11117     - u32 aspace, u32 cycle)
11118     +static noinline_for_stack u8 fake_vmeread8(struct fake_driver *bridge,
11119     + unsigned long long addr,
11120     + u32 aspace, u32 cycle)
11121     {
11122     u8 retval = 0xff;
11123     int i;
11124     @@ -450,8 +451,9 @@ static u8 fake_vmeread8(struct fake_driver *bridge, unsigned long long addr,
11125     return retval;
11126     }
11127    
11128     -static u16 fake_vmeread16(struct fake_driver *bridge, unsigned long long addr,
11129     - u32 aspace, u32 cycle)
11130     +static noinline_for_stack u16 fake_vmeread16(struct fake_driver *bridge,
11131     + unsigned long long addr,
11132     + u32 aspace, u32 cycle)
11133     {
11134     u16 retval = 0xffff;
11135     int i;
11136     @@ -482,8 +484,9 @@ static u16 fake_vmeread16(struct fake_driver *bridge, unsigned long long addr,
11137     return retval;
11138     }
11139    
11140     -static u32 fake_vmeread32(struct fake_driver *bridge, unsigned long long addr,
11141     - u32 aspace, u32 cycle)
11142     +static noinline_for_stack u32 fake_vmeread32(struct fake_driver *bridge,
11143     + unsigned long long addr,
11144     + u32 aspace, u32 cycle)
11145     {
11146     u32 retval = 0xffffffff;
11147     int i;
11148     @@ -613,8 +616,9 @@ out:
11149     return retval;
11150     }
11151    
11152     -static void fake_vmewrite8(struct fake_driver *bridge, u8 *buf,
11153     - unsigned long long addr, u32 aspace, u32 cycle)
11154     +static noinline_for_stack void fake_vmewrite8(struct fake_driver *bridge,
11155     + u8 *buf, unsigned long long addr,
11156     + u32 aspace, u32 cycle)
11157     {
11158     int i;
11159     unsigned long long start, end, offset;
11160     @@ -643,8 +647,9 @@ static void fake_vmewrite8(struct fake_driver *bridge, u8 *buf,
11161    
11162     }
11163    
11164     -static void fake_vmewrite16(struct fake_driver *bridge, u16 *buf,
11165     - unsigned long long addr, u32 aspace, u32 cycle)
11166     +static noinline_for_stack void fake_vmewrite16(struct fake_driver *bridge,
11167     + u16 *buf, unsigned long long addr,
11168     + u32 aspace, u32 cycle)
11169     {
11170     int i;
11171     unsigned long long start, end, offset;
11172     @@ -673,8 +678,9 @@ static void fake_vmewrite16(struct fake_driver *bridge, u16 *buf,
11173    
11174     }
11175    
11176     -static void fake_vmewrite32(struct fake_driver *bridge, u32 *buf,
11177     - unsigned long long addr, u32 aspace, u32 cycle)
11178     +static noinline_for_stack void fake_vmewrite32(struct fake_driver *bridge,
11179     + u32 *buf, unsigned long long addr,
11180     + u32 aspace, u32 cycle)
11181     {
11182     int i;
11183     unsigned long long start, end, offset;
11184     diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c
11185     index 08cb419eb4e6..5f6b77ea34fb 100644
11186     --- a/drivers/xen/preempt.c
11187     +++ b/drivers/xen/preempt.c
11188     @@ -37,7 +37,9 @@ asmlinkage __visible void xen_maybe_preempt_hcall(void)
11189     * cpu.
11190     */
11191     __this_cpu_write(xen_in_preemptible_hcall, false);
11192     - _cond_resched();
11193     + local_irq_enable();
11194     + cond_resched();
11195     + local_irq_disable();
11196     __this_cpu_write(xen_in_preemptible_hcall, true);
11197     }
11198     }
11199     diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
11200     index e3524ecce3d7..390053557d4d 100644
11201     --- a/fs/btrfs/disk-io.c
11202     +++ b/fs/btrfs/disk-io.c
11203     @@ -2979,6 +2979,7 @@ retry_root_backup:
11204     /* do not make disk changes in broken FS or nologreplay is given */
11205     if (btrfs_super_log_root(disk_super) != 0 &&
11206     !btrfs_test_opt(tree_root->fs_info, NOLOGREPLAY)) {
11207     + btrfs_info(fs_info, "start tree-log replay");
11208     ret = btrfs_replay_log(fs_info, fs_devices);
11209     if (ret) {
11210     err = ret;
11211     diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
11212     index 26f9ac719d20..4f59b4089eb0 100644
11213     --- a/fs/btrfs/extent_map.c
11214     +++ b/fs/btrfs/extent_map.c
11215     @@ -227,6 +227,17 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
11216     struct extent_map *merge = NULL;
11217     struct rb_node *rb;
11218    
11219     + /*
11220     + * We can't modify an extent map that is in the tree and that is being
11221     + * used by another task, as it can cause that other task to see it in
11222     + * inconsistent state during the merging. We always have 1 reference for
11223     + * the tree and 1 for this task (which is unpinning the extent map or
11224     + * clearing the logging flag), so anything > 2 means it's being used by
11225     + * other tasks too.
11226     + */
11227     + if (atomic_read(&em->refs) > 2)
11228     + return;
11229     +
11230     if (em->start != 0) {
11231     rb = rb_prev(&em->rb_node);
11232     if (rb)
11233     diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
11234     index b2d1e95de7be..7dc2284017fa 100644
11235     --- a/fs/btrfs/ordered-data.c
11236     +++ b/fs/btrfs/ordered-data.c
11237     @@ -837,10 +837,15 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
11238     }
11239     btrfs_start_ordered_extent(inode, ordered, 1);
11240     end = ordered->file_offset;
11241     + /*
11242     + * If the ordered extent had an error save the error but don't
11243     + * exit without waiting first for all other ordered extents in
11244     + * the range to complete.
11245     + */
11246     if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
11247     ret = -EIO;
11248     btrfs_put_ordered_extent(ordered);
11249     - if (ret || end == 0 || end == start)
11250     + if (end == 0 || end == start)
11251     break;
11252     end--;
11253     }
11254     diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
11255     index 0c71cdd3f98b..9286603a6a98 100644
11256     --- a/fs/btrfs/super.c
11257     +++ b/fs/btrfs/super.c
11258     @@ -1809,6 +1809,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
11259     }
11260    
11261     if (btrfs_super_log_root(fs_info->super_copy) != 0) {
11262     + btrfs_warn(fs_info,
11263     + "mount required to replay tree-log, cannot remount read-write");
11264     ret = -EINVAL;
11265     goto restore;
11266     }
11267     diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
11268     index 751bdde6515d..961fcb40183a 100644
11269     --- a/fs/cifs/connect.c
11270     +++ b/fs/cifs/connect.c
11271     @@ -2927,8 +2927,10 @@ match_prepath(struct super_block *sb, struct cifs_mnt_data *mnt_data)
11272     {
11273     struct cifs_sb_info *old = CIFS_SB(sb);
11274     struct cifs_sb_info *new = mnt_data->cifs_sb;
11275     - bool old_set = old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH;
11276     - bool new_set = new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH;
11277     + bool old_set = (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) &&
11278     + old->prepath;
11279     + bool new_set = (new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) &&
11280     + new->prepath;
11281    
11282     if (old_set && new_set && !strcmp(new->prepath, old->prepath))
11283     return 1;
11284     diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
11285     index cb77e7ee2c9f..ff6cf23be8a2 100644
11286     --- a/fs/ecryptfs/crypto.c
11287     +++ b/fs/ecryptfs/crypto.c
11288     @@ -339,8 +339,10 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
11289     struct extent_crypt_result ecr;
11290     int rc = 0;
11291    
11292     - BUG_ON(!crypt_stat || !crypt_stat->tfm
11293     - || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED));
11294     + if (!crypt_stat || !crypt_stat->tfm
11295     + || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED))
11296     + return -EINVAL;
11297     +
11298     if (unlikely(ecryptfs_verbosity > 0)) {
11299     ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n",
11300     crypt_stat->key_size);
11301     diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
11302     index fa218cd64f74..3f3ec50bf773 100644
11303     --- a/fs/ecryptfs/keystore.c
11304     +++ b/fs/ecryptfs/keystore.c
11305     @@ -1285,7 +1285,7 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat,
11306     printk(KERN_ERR "Enter w/ first byte != 0x%.2x\n",
11307     ECRYPTFS_TAG_1_PACKET_TYPE);
11308     rc = -EINVAL;
11309     - goto out;
11310     + goto out_free;
11311     }
11312     /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or
11313     * at end of function upon failure */
11314     diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
11315     index 4f457d5c4933..26464f9d9b76 100644
11316     --- a/fs/ecryptfs/messaging.c
11317     +++ b/fs/ecryptfs/messaging.c
11318     @@ -397,6 +397,7 @@ int __init ecryptfs_init_messaging(void)
11319     * ecryptfs_message_buf_len),
11320     GFP_KERNEL);
11321     if (!ecryptfs_msg_ctx_arr) {
11322     + kfree(ecryptfs_daemon_hash);
11323     rc = -ENOMEM;
11324     printk(KERN_ERR "%s: Failed to allocate memory\n", __func__);
11325     goto out;
11326     diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
11327     index 7fb8df7b6a43..6b3a32f75dad 100644
11328     --- a/fs/ext4/dir.c
11329     +++ b/fs/ext4/dir.c
11330     @@ -124,12 +124,14 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
11331     if (err != ERR_BAD_DX_DIR) {
11332     return err;
11333     }
11334     - /*
11335     - * We don't set the inode dirty flag since it's not
11336     - * critical that it get flushed back to the disk.
11337     - */
11338     - ext4_clear_inode_flag(file_inode(file),
11339     - EXT4_INODE_INDEX);
11340     + /* Can we just clear INDEX flag to ignore htree information? */
11341     + if (!ext4_has_metadata_csum(sb)) {
11342     + /*
11343     + * We don't set the inode dirty flag since it's not
11344     + * critical that it gets flushed back to the disk.
11345     + */
11346     + ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
11347     + }
11348     }
11349    
11350     if (ext4_has_inline_data(inode)) {
11351     diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
11352     index 567a6c7af677..9713d3d41412 100644
11353     --- a/fs/ext4/ext4.h
11354     +++ b/fs/ext4/ext4.h
11355     @@ -1514,8 +1514,11 @@ struct ext4_sb_info {
11356     struct ratelimit_state s_warning_ratelimit_state;
11357     struct ratelimit_state s_msg_ratelimit_state;
11358    
11359     - /* Barrier between changing inodes' journal flags and writepages ops. */
11360     - struct percpu_rw_semaphore s_journal_flag_rwsem;
11361     + /*
11362     + * Barrier between writepages ops and changing any inode's JOURNAL_DATA
11363     + * or EXTENTS flag.
11364     + */
11365     + struct percpu_rw_semaphore s_writepages_rwsem;
11366    
11367     /* Encryption support */
11368     #ifdef CONFIG_EXT4_FS_ENCRYPTION
11369     @@ -2375,8 +2378,11 @@ int ext4_insert_dentry(struct inode *dir,
11370     struct ext4_filename *fname);
11371     static inline void ext4_update_dx_flag(struct inode *inode)
11372     {
11373     - if (!ext4_has_feature_dir_index(inode->i_sb))
11374     + if (!ext4_has_feature_dir_index(inode->i_sb)) {
11375     + /* ext4_iget() should have caught this... */
11376     + WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb));
11377     ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
11378     + }
11379     }
11380     static unsigned char ext4_filetype_table[] = {
11381     DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
11382     @@ -2848,7 +2854,7 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
11383     !inode_is_locked(inode));
11384     down_write(&EXT4_I(inode)->i_data_sem);
11385     if (newsize > EXT4_I(inode)->i_disksize)
11386     - EXT4_I(inode)->i_disksize = newsize;
11387     + WRITE_ONCE(EXT4_I(inode)->i_disksize, newsize);
11388     up_write(&EXT4_I(inode)->i_data_sem);
11389     }
11390    
11391     diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
11392     index 8133e6529994..911a49e861d2 100644
11393     --- a/fs/ext4/inode.c
11394     +++ b/fs/ext4/inode.c
11395     @@ -2475,7 +2475,7 @@ update_disksize:
11396     * truncate are avoided by checking i_size under i_data_sem.
11397     */
11398     disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT;
11399     - if (disksize > EXT4_I(inode)->i_disksize) {
11400     + if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) {
11401     int err2;
11402     loff_t i_size;
11403    
11404     @@ -2652,7 +2652,7 @@ static int ext4_writepages(struct address_space *mapping,
11405     struct blk_plug plug;
11406     bool give_up_on_write = false;
11407    
11408     - percpu_down_read(&sbi->s_journal_flag_rwsem);
11409     + percpu_down_read(&sbi->s_writepages_rwsem);
11410     trace_ext4_writepages(inode, wbc);
11411    
11412     if (dax_mapping(mapping)) {
11413     @@ -2853,7 +2853,7 @@ retry:
11414     out_writepages:
11415     trace_ext4_writepages_result(inode, wbc, ret,
11416     nr_to_write - wbc->nr_to_write);
11417     - percpu_up_read(&sbi->s_journal_flag_rwsem);
11418     + percpu_up_read(&sbi->s_writepages_rwsem);
11419     return ret;
11420     }
11421    
11422     @@ -4594,6 +4594,18 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
11423     ret = -EFSCORRUPTED;
11424     goto bad_inode;
11425     }
11426     + /*
11427     + * If dir_index is not enabled but there's dir with INDEX flag set,
11428     + * we'd normally treat htree data as empty space. But with metadata
11429     + * checksumming that corrupts checksums so forbid that.
11430     + */
11431     + if (!ext4_has_feature_dir_index(sb) && ext4_has_metadata_csum(sb) &&
11432     + ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) {
11433     + EXT4_ERROR_INODE(inode,
11434     + "iget: Dir with htree data on filesystem without dir_index feature.");
11435     + ret = -EFSCORRUPTED;
11436     + goto bad_inode;
11437     + }
11438     ei->i_disksize = inode->i_size;
11439     #ifdef CONFIG_QUOTA
11440     ei->i_reserved_quota = 0;
11441     @@ -5676,7 +5688,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
11442     }
11443     }
11444    
11445     - percpu_down_write(&sbi->s_journal_flag_rwsem);
11446     + percpu_down_write(&sbi->s_writepages_rwsem);
11447     jbd2_journal_lock_updates(journal);
11448    
11449     /*
11450     @@ -5693,7 +5705,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
11451     err = jbd2_journal_flush(journal);
11452     if (err < 0) {
11453     jbd2_journal_unlock_updates(journal);
11454     - percpu_up_write(&sbi->s_journal_flag_rwsem);
11455     + percpu_up_write(&sbi->s_writepages_rwsem);
11456     ext4_inode_resume_unlocked_dio(inode);
11457     return err;
11458     }
11459     @@ -5702,7 +5714,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
11460     ext4_set_aops(inode);
11461    
11462     jbd2_journal_unlock_updates(journal);
11463     - percpu_up_write(&sbi->s_journal_flag_rwsem);
11464     + percpu_up_write(&sbi->s_writepages_rwsem);
11465    
11466     if (val)
11467     up_write(&EXT4_I(inode)->i_mmap_sem);
11468     diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
11469     index 364ea4d4a943..bce2d696d6b9 100644
11470     --- a/fs/ext4/migrate.c
11471     +++ b/fs/ext4/migrate.c
11472     @@ -434,6 +434,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode)
11473    
11474     int ext4_ext_migrate(struct inode *inode)
11475     {
11476     + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
11477     handle_t *handle;
11478     int retval = 0, i;
11479     __le32 *i_data;
11480     @@ -458,6 +459,8 @@ int ext4_ext_migrate(struct inode *inode)
11481     */
11482     return retval;
11483    
11484     + percpu_down_write(&sbi->s_writepages_rwsem);
11485     +
11486     /*
11487     * Worst case we can touch the allocation bitmaps, a bgd
11488     * block, and a block to link in the orphan list. We do need
11489     @@ -468,7 +471,7 @@ int ext4_ext_migrate(struct inode *inode)
11490    
11491     if (IS_ERR(handle)) {
11492     retval = PTR_ERR(handle);
11493     - return retval;
11494     + goto out_unlock;
11495     }
11496     goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
11497     EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
11498     @@ -479,7 +482,7 @@ int ext4_ext_migrate(struct inode *inode)
11499     if (IS_ERR(tmp_inode)) {
11500     retval = PTR_ERR(tmp_inode);
11501     ext4_journal_stop(handle);
11502     - return retval;
11503     + goto out_unlock;
11504     }
11505     i_size_write(tmp_inode, i_size_read(inode));
11506     /*
11507     @@ -521,7 +524,7 @@ int ext4_ext_migrate(struct inode *inode)
11508     */
11509     ext4_orphan_del(NULL, tmp_inode);
11510     retval = PTR_ERR(handle);
11511     - goto out;
11512     + goto out_tmp_inode;
11513     }
11514    
11515     ei = EXT4_I(inode);
11516     @@ -602,10 +605,11 @@ err_out:
11517     /* Reset the extent details */
11518     ext4_ext_tree_init(handle, tmp_inode);
11519     ext4_journal_stop(handle);
11520     -out:
11521     +out_tmp_inode:
11522     unlock_new_inode(tmp_inode);
11523     iput(tmp_inode);
11524     -
11525     +out_unlock:
11526     + percpu_up_write(&sbi->s_writepages_rwsem);
11527     return retval;
11528     }
11529    
11530     @@ -615,7 +619,8 @@ out:
11531     int ext4_ind_migrate(struct inode *inode)
11532     {
11533     struct ext4_extent_header *eh;
11534     - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
11535     + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
11536     + struct ext4_super_block *es = sbi->s_es;
11537     struct ext4_inode_info *ei = EXT4_I(inode);
11538     struct ext4_extent *ex;
11539     unsigned int i, len;
11540     @@ -639,9 +644,13 @@ int ext4_ind_migrate(struct inode *inode)
11541     if (test_opt(inode->i_sb, DELALLOC))
11542     ext4_alloc_da_blocks(inode);
11543    
11544     + percpu_down_write(&sbi->s_writepages_rwsem);
11545     +
11546     handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
11547     - if (IS_ERR(handle))
11548     - return PTR_ERR(handle);
11549     + if (IS_ERR(handle)) {
11550     + ret = PTR_ERR(handle);
11551     + goto out_unlock;
11552     + }
11553    
11554     down_write(&EXT4_I(inode)->i_data_sem);
11555     ret = ext4_ext_check_inode(inode);
11556     @@ -676,5 +685,7 @@ int ext4_ind_migrate(struct inode *inode)
11557     errout:
11558     ext4_journal_stop(handle);
11559     up_write(&EXT4_I(inode)->i_data_sem);
11560     +out_unlock:
11561     + percpu_up_write(&sbi->s_writepages_rwsem);
11562     return ret;
11563     }
11564     diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
11565     index c2e830a6206d..fb1ad9510c5f 100644
11566     --- a/fs/ext4/mmp.c
11567     +++ b/fs/ext4/mmp.c
11568     @@ -119,10 +119,10 @@ void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
11569     {
11570     __ext4_warning(sb, function, line, "%s", msg);
11571     __ext4_warning(sb, function, line,
11572     - "MMP failure info: last update time: %llu, last update "
11573     - "node: %s, last update device: %s",
11574     - (long long unsigned int) le64_to_cpu(mmp->mmp_time),
11575     - mmp->mmp_nodename, mmp->mmp_bdevname);
11576     + "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s",
11577     + (unsigned long long)le64_to_cpu(mmp->mmp_time),
11578     + (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename,
11579     + (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname);
11580     }
11581    
11582     /*
11583     @@ -153,6 +153,7 @@ static int kmmpd(void *data)
11584     mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
11585     EXT4_MMP_MIN_CHECK_INTERVAL);
11586     mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
11587     + BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE);
11588     bdevname(bh->b_bdev, mmp->mmp_bdevname);
11589    
11590     memcpy(mmp->mmp_nodename, init_utsname()->nodename,
11591     @@ -377,7 +378,8 @@ skip:
11592     /*
11593     * Start a kernel thread to update the MMP block periodically.
11594     */
11595     - EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
11596     + EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%.*s",
11597     + (int)sizeof(mmp->mmp_bdevname),
11598     bdevname(bh->b_bdev,
11599     mmp->mmp_bdevname));
11600     if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
11601     diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
11602     index f0ce535d514c..339ede11896a 100644
11603     --- a/fs/ext4/namei.c
11604     +++ b/fs/ext4/namei.c
11605     @@ -1445,6 +1445,7 @@ restart:
11606     /*
11607     * We deal with the read-ahead logic here.
11608     */
11609     + cond_resched();
11610     if (ra_ptr >= ra_max) {
11611     /* Refill the readahead buffer */
11612     ra_ptr = 0;
11613     @@ -2148,6 +2149,13 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
11614     retval = ext4_dx_add_entry(handle, &fname, dir, inode);
11615     if (!retval || (retval != ERR_BAD_DX_DIR))
11616     goto out;
11617     + /* Can we just ignore htree data? */
11618     + if (ext4_has_metadata_csum(sb)) {
11619     + EXT4_ERROR_INODE(dir,
11620     + "Directory has corrupted htree index.");
11621     + retval = -EFSCORRUPTED;
11622     + goto out;
11623     + }
11624     ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
11625     dx_fallback++;
11626     ext4_mark_inode_dirty(handle, dir);
11627     diff --git a/fs/ext4/super.c b/fs/ext4/super.c
11628     index 391ab55808c9..b69a78c061cb 100644
11629     --- a/fs/ext4/super.c
11630     +++ b/fs/ext4/super.c
11631     @@ -865,7 +865,7 @@ static void ext4_put_super(struct super_block *sb)
11632     percpu_counter_destroy(&sbi->s_freeinodes_counter);
11633     percpu_counter_destroy(&sbi->s_dirs_counter);
11634     percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
11635     - percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
11636     + percpu_free_rwsem(&sbi->s_writepages_rwsem);
11637     brelse(sbi->s_sbh);
11638     #ifdef CONFIG_QUOTA
11639     for (i = 0; i < EXT4_MAXQUOTAS; i++)
11640     @@ -2743,17 +2743,11 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
11641     return 0;
11642     }
11643    
11644     -#ifndef CONFIG_QUOTA
11645     - if (ext4_has_feature_quota(sb) && !readonly) {
11646     +#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
11647     + if (!readonly && (ext4_has_feature_quota(sb) ||
11648     + ext4_has_feature_project(sb))) {
11649     ext4_msg(sb, KERN_ERR,
11650     - "Filesystem with quota feature cannot be mounted RDWR "
11651     - "without CONFIG_QUOTA");
11652     - return 0;
11653     - }
11654     - if (ext4_has_feature_project(sb) && !readonly) {
11655     - ext4_msg(sb, KERN_ERR,
11656     - "Filesystem with project quota feature cannot be mounted RDWR "
11657     - "without CONFIG_QUOTA");
11658     + "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
11659     return 0;
11660     }
11661     #endif /* CONFIG_QUOTA */
11662     @@ -4229,7 +4223,7 @@ no_journal:
11663     err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
11664     GFP_KERNEL);
11665     if (!err)
11666     - err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem);
11667     + err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
11668    
11669     if (err) {
11670     ext4_msg(sb, KERN_ERR, "insufficient memory");
11671     @@ -4328,7 +4322,7 @@ failed_mount6:
11672     percpu_counter_destroy(&sbi->s_freeinodes_counter);
11673     percpu_counter_destroy(&sbi->s_dirs_counter);
11674     percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
11675     - percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
11676     + percpu_free_rwsem(&sbi->s_writepages_rwsem);
11677     failed_mount5:
11678     ext4_ext_release(sb);
11679     ext4_release_system_zone(sb);
11680     diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
11681     index 4d5a5a4cc017..addb0784dd1c 100644
11682     --- a/fs/jbd2/checkpoint.c
11683     +++ b/fs/jbd2/checkpoint.c
11684     @@ -168,7 +168,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
11685     "journal space in %s\n", __func__,
11686     journal->j_devname);
11687     WARN_ON(1);
11688     - jbd2_journal_abort(journal, 0);
11689     + jbd2_journal_abort(journal, -EIO);
11690     }
11691     write_lock(&journal->j_state_lock);
11692     } else {
11693     diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
11694     index d002b2b6895f..1d06f81ee8b4 100644
11695     --- a/fs/jbd2/commit.c
11696     +++ b/fs/jbd2/commit.c
11697     @@ -779,7 +779,7 @@ start_journal_io:
11698     err = journal_submit_commit_record(journal, commit_transaction,
11699     &cbh, crc32_sum);
11700     if (err)
11701     - __jbd2_journal_abort_hard(journal);
11702     + jbd2_journal_abort(journal, err);
11703     }
11704    
11705     blk_finish_plug(&plug);
11706     @@ -872,7 +872,7 @@ start_journal_io:
11707     err = journal_submit_commit_record(journal, commit_transaction,
11708     &cbh, crc32_sum);
11709     if (err)
11710     - __jbd2_journal_abort_hard(journal);
11711     + jbd2_journal_abort(journal, err);
11712     }
11713     if (cbh)
11714     err = journal_wait_on_commit_record(journal, cbh);
11715     @@ -969,29 +969,33 @@ restart_loop:
11716     * it. */
11717    
11718     /*
11719     - * A buffer which has been freed while still being journaled by
11720     - * a previous transaction.
11721     - */
11722     - if (buffer_freed(bh)) {
11723     + * A buffer which has been freed while still being journaled
11724     + * by a previous transaction, refile the buffer to BJ_Forget of
11725     + * the running transaction. If the just committed transaction
11726     + * contains "add to orphan" operation, we can completely
11727     + * invalidate the buffer now. We are rather through in that
11728     + * since the buffer may be still accessible when blocksize <
11729     + * pagesize and it is attached to the last partial page.
11730     + */
11731     + if (buffer_freed(bh) && !jh->b_next_transaction) {
11732     + struct address_space *mapping;
11733     +
11734     + clear_buffer_freed(bh);
11735     + clear_buffer_jbddirty(bh);
11736     +
11737     /*
11738     - * If the running transaction is the one containing
11739     - * "add to orphan" operation (b_next_transaction !=
11740     - * NULL), we have to wait for that transaction to
11741     - * commit before we can really get rid of the buffer.
11742     - * So just clear b_modified to not confuse transaction
11743     - * credit accounting and refile the buffer to
11744     - * BJ_Forget of the running transaction. If the just
11745     - * committed transaction contains "add to orphan"
11746     - * operation, we can completely invalidate the buffer
11747     - * now. We are rather through in that since the
11748     - * buffer may be still accessible when blocksize <
11749     - * pagesize and it is attached to the last partial
11750     - * page.
11751     + * Block device buffers need to stay mapped all the
11752     + * time, so it is enough to clear buffer_jbddirty and
11753     + * buffer_freed bits. For the file mapping buffers (i.e.
11754     + * journalled data) we need to unmap buffer and clear
11755     + * more bits. We also need to be careful about the check
11756     + * because the data page mapping can get cleared under
11757     + * out hands, which alse need not to clear more bits
11758     + * because the page and buffers will be freed and can
11759     + * never be reused once we are done with them.
11760     */
11761     - jh->b_modified = 0;
11762     - if (!jh->b_next_transaction) {
11763     - clear_buffer_freed(bh);
11764     - clear_buffer_jbddirty(bh);
11765     + mapping = READ_ONCE(bh->b_page->mapping);
11766     + if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) {
11767     clear_buffer_mapped(bh);
11768     clear_buffer_new(bh);
11769     clear_buffer_req(bh);
11770     diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
11771     index 3cbcf649ac66..efc8cfd06073 100644
11772     --- a/fs/jbd2/journal.c
11773     +++ b/fs/jbd2/journal.c
11774     @@ -1670,6 +1670,11 @@ int jbd2_journal_load(journal_t *journal)
11775     journal->j_devname);
11776     return -EFSCORRUPTED;
11777     }
11778     + /*
11779     + * clear JBD2_ABORT flag initialized in journal_init_common
11780     + * here to update log tail information with the newest seq.
11781     + */
11782     + journal->j_flags &= ~JBD2_ABORT;
11783    
11784     /* OK, we've finished with the dynamic journal bits:
11785     * reinitialise the dynamic contents of the superblock in memory
11786     @@ -1677,7 +1682,6 @@ int jbd2_journal_load(journal_t *journal)
11787     if (journal_reset(journal))
11788     goto recovery_error;
11789    
11790     - journal->j_flags &= ~JBD2_ABORT;
11791     journal->j_flags |= JBD2_LOADED;
11792     return 0;
11793    
11794     @@ -2096,12 +2100,10 @@ static void __journal_abort_soft (journal_t *journal, int errno)
11795    
11796     __jbd2_journal_abort_hard(journal);
11797    
11798     - if (errno) {
11799     - jbd2_journal_update_sb_errno(journal);
11800     - write_lock(&journal->j_state_lock);
11801     - journal->j_flags |= JBD2_REC_ERR;
11802     - write_unlock(&journal->j_state_lock);
11803     - }
11804     + jbd2_journal_update_sb_errno(journal);
11805     + write_lock(&journal->j_state_lock);
11806     + journal->j_flags |= JBD2_REC_ERR;
11807     + write_unlock(&journal->j_state_lock);
11808     }
11809    
11810     /**
11811     @@ -2143,11 +2145,6 @@ static void __journal_abort_soft (journal_t *journal, int errno)
11812     * failure to disk. ext3_error, for example, now uses this
11813     * functionality.
11814     *
11815     - * Errors which originate from within the journaling layer will NOT
11816     - * supply an errno; a null errno implies that absolutely no further
11817     - * writes are done to the journal (unless there are any already in
11818     - * progress).
11819     - *
11820     */
11821    
11822     void jbd2_journal_abort(journal_t *journal, int errno)
11823     diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
11824     index 799f96c67211..04dd0652bb5c 100644
11825     --- a/fs/jbd2/transaction.c
11826     +++ b/fs/jbd2/transaction.c
11827     @@ -2213,14 +2213,16 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
11828     return -EBUSY;
11829     }
11830     /*
11831     - * OK, buffer won't be reachable after truncate. We just set
11832     - * j_next_transaction to the running transaction (if there is
11833     - * one) and mark buffer as freed so that commit code knows it
11834     - * should clear dirty bits when it is done with the buffer.
11835     + * OK, buffer won't be reachable after truncate. We just clear
11836     + * b_modified to not confuse transaction credit accounting, and
11837     + * set j_next_transaction to the running transaction (if there
11838     + * is one) and mark buffer as freed so that commit code knows
11839     + * it should clear dirty bits when it is done with the buffer.
11840     */
11841     set_buffer_freed(bh);
11842     if (journal->j_running_transaction && buffer_jbddirty(bh))
11843     jh->b_next_transaction = journal->j_running_transaction;
11844     + jh->b_modified = 0;
11845     jbd2_journal_put_journal_head(jh);
11846     spin_unlock(&journal->j_list_lock);
11847     jbd_unlock_bh_state(bh);
11848     diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
11849     index 497a4171ef61..bfb50fc51528 100644
11850     --- a/fs/ocfs2/journal.h
11851     +++ b/fs/ocfs2/journal.h
11852     @@ -637,9 +637,11 @@ static inline void ocfs2_update_inode_fsync_trans(handle_t *handle,
11853     {
11854     struct ocfs2_inode_info *oi = OCFS2_I(inode);
11855    
11856     - oi->i_sync_tid = handle->h_transaction->t_tid;
11857     - if (datasync)
11858     - oi->i_datasync_tid = handle->h_transaction->t_tid;
11859     + if (!is_handle_aborted(handle)) {
11860     + oi->i_sync_tid = handle->h_transaction->t_tid;
11861     + if (datasync)
11862     + oi->i_datasync_tid = handle->h_transaction->t_tid;
11863     + }
11864     }
11865    
11866     #endif /* OCFS2_JOURNAL_H */
11867     diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
11868     index 0748a26598fc..7d7df003f9d8 100644
11869     --- a/fs/orangefs/orangefs-debugfs.c
11870     +++ b/fs/orangefs/orangefs-debugfs.c
11871     @@ -304,6 +304,7 @@ static void *help_start(struct seq_file *m, loff_t *pos)
11872    
11873     static void *help_next(struct seq_file *m, void *v, loff_t *pos)
11874     {
11875     + (*pos)++;
11876     gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_next: start\n");
11877    
11878     return NULL;
11879     diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
11880     index a97e352d05d3..5f5fff068877 100644
11881     --- a/fs/reiserfs/stree.c
11882     +++ b/fs/reiserfs/stree.c
11883     @@ -2249,7 +2249,8 @@ error_out:
11884     /* also releases the path */
11885     unfix_nodes(&s_ins_balance);
11886     #ifdef REISERQUOTA_DEBUG
11887     - reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
11888     + if (inode)
11889     + reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
11890     "reiserquota insert_item(): freeing %u id=%u type=%c",
11891     quota_bytes, inode->i_uid, head2type(ih));
11892     #endif
11893     diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
11894     index bfed2a700015..677608a89b08 100644
11895     --- a/fs/reiserfs/super.c
11896     +++ b/fs/reiserfs/super.c
11897     @@ -1928,7 +1928,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
11898     if (!sbi->s_jdev) {
11899     SWARN(silent, s, "", "Cannot allocate memory for "
11900     "journal device name");
11901     - goto error;
11902     + goto error_unlocked;
11903     }
11904     }
11905     #ifdef CONFIG_QUOTA
11906     diff --git a/fs/udf/super.c b/fs/udf/super.c
11907     index 03369a89600e..4abdba453885 100644
11908     --- a/fs/udf/super.c
11909     +++ b/fs/udf/super.c
11910     @@ -2460,17 +2460,29 @@ static unsigned int udf_count_free_table(struct super_block *sb,
11911     static unsigned int udf_count_free(struct super_block *sb)
11912     {
11913     unsigned int accum = 0;
11914     - struct udf_sb_info *sbi;
11915     + struct udf_sb_info *sbi = UDF_SB(sb);
11916     struct udf_part_map *map;
11917     + unsigned int part = sbi->s_partition;
11918     + int ptype = sbi->s_partmaps[part].s_partition_type;
11919     +
11920     + if (ptype == UDF_METADATA_MAP25) {
11921     + part = sbi->s_partmaps[part].s_type_specific.s_metadata.
11922     + s_phys_partition_ref;
11923     + } else if (ptype == UDF_VIRTUAL_MAP15 || ptype == UDF_VIRTUAL_MAP20) {
11924     + /*
11925     + * Filesystems with VAT are append-only and we cannot write to
11926     + * them. Let's just report 0 here.
11927     + */
11928     + return 0;
11929     + }
11930    
11931     - sbi = UDF_SB(sb);
11932     if (sbi->s_lvid_bh) {
11933     struct logicalVolIntegrityDesc *lvid =
11934     (struct logicalVolIntegrityDesc *)
11935     sbi->s_lvid_bh->b_data;
11936     - if (le32_to_cpu(lvid->numOfPartitions) > sbi->s_partition) {
11937     + if (le32_to_cpu(lvid->numOfPartitions) > part) {
11938     accum = le32_to_cpu(
11939     - lvid->freeSpaceTable[sbi->s_partition]);
11940     + lvid->freeSpaceTable[part]);
11941     if (accum == 0xFFFFFFFF)
11942     accum = 0;
11943     }
11944     @@ -2479,7 +2491,7 @@ static unsigned int udf_count_free(struct super_block *sb)
11945     if (accum)
11946     return accum;
11947    
11948     - map = &sbi->s_partmaps[sbi->s_partition];
11949     + map = &sbi->s_partmaps[part];
11950     if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) {
11951     accum += udf_count_free_bitmap(sb,
11952     map->s_uspace.s_bitmap);
11953     diff --git a/include/linux/libata.h b/include/linux/libata.h
11954     index df58b01e6962..cdfb67b22317 100644
11955     --- a/include/linux/libata.h
11956     +++ b/include/linux/libata.h
11957     @@ -1222,6 +1222,7 @@ struct pci_bits {
11958     };
11959    
11960     extern int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits);
11961     +extern void ata_pci_shutdown_one(struct pci_dev *pdev);
11962     extern void ata_pci_remove_one(struct pci_dev *pdev);
11963    
11964     #ifdef CONFIG_PM
11965     diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
11966     index 87ff4f58a2f0..9e20bf7f46a2 100644
11967     --- a/include/linux/list_nulls.h
11968     +++ b/include/linux/list_nulls.h
11969     @@ -71,10 +71,10 @@ static inline void hlist_nulls_add_head(struct hlist_nulls_node *n,
11970     struct hlist_nulls_node *first = h->first;
11971    
11972     n->next = first;
11973     - n->pprev = &h->first;
11974     + WRITE_ONCE(n->pprev, &h->first);
11975     h->first = n;
11976     if (!is_a_nulls(first))
11977     - first->pprev = &n->next;
11978     + WRITE_ONCE(first->pprev, &n->next);
11979     }
11980    
11981     static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
11982     @@ -84,13 +84,13 @@ static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
11983    
11984     WRITE_ONCE(*pprev, next);
11985     if (!is_a_nulls(next))
11986     - next->pprev = pprev;
11987     + WRITE_ONCE(next->pprev, pprev);
11988     }
11989    
11990     static inline void hlist_nulls_del(struct hlist_nulls_node *n)
11991     {
11992     __hlist_nulls_del(n);
11993     - n->pprev = LIST_POISON2;
11994     + WRITE_ONCE(n->pprev, LIST_POISON2);
11995     }
11996    
11997     /**
11998     diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
11999     index 106f4e0d7bd3..4d71e3687d1e 100644
12000     --- a/include/linux/rculist_nulls.h
12001     +++ b/include/linux/rculist_nulls.h
12002     @@ -33,7 +33,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n)
12003     {
12004     if (!hlist_nulls_unhashed(n)) {
12005     __hlist_nulls_del(n);
12006     - n->pprev = NULL;
12007     + WRITE_ONCE(n->pprev, NULL);
12008     }
12009     }
12010    
12011     @@ -65,7 +65,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n)
12012     static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n)
12013     {
12014     __hlist_nulls_del(n);
12015     - n->pprev = LIST_POISON2;
12016     + WRITE_ONCE(n->pprev, LIST_POISON2);
12017     }
12018    
12019     /**
12020     @@ -93,10 +93,10 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
12021     struct hlist_nulls_node *first = h->first;
12022    
12023     n->next = first;
12024     - n->pprev = &h->first;
12025     + WRITE_ONCE(n->pprev, &h->first);
12026     rcu_assign_pointer(hlist_nulls_first_rcu(h), n);
12027     if (!is_a_nulls(first))
12028     - first->pprev = &n->next;
12029     + WRITE_ONCE(first->pprev, &n->next);
12030     }
12031    
12032     /**
12033     diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h
12034     index 8ffa94009d1a..76002416cead 100644
12035     --- a/include/media/v4l2-device.h
12036     +++ b/include/media/v4l2-device.h
12037     @@ -268,7 +268,7 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd,
12038     struct v4l2_subdev *__sd; \
12039     \
12040     __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \
12041     - !(grpid) || __sd->grp_id == (grpid), o, f , \
12042     + (grpid) == 0 || __sd->grp_id == (grpid), o, f , \
12043     ##args); \
12044     } while (0)
12045    
12046     @@ -280,7 +280,7 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd,
12047     ({ \
12048     struct v4l2_subdev *__sd; \
12049     __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \
12050     - !(grpid) || __sd->grp_id == (grpid), o, f , \
12051     + (grpid) == 0 || __sd->grp_id == (grpid), o, f , \
12052     ##args); \
12053     })
12054    
12055     @@ -294,8 +294,8 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd,
12056     struct v4l2_subdev *__sd; \
12057     \
12058     __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \
12059     - !(grpmsk) || (__sd->grp_id & (grpmsk)), o, f , \
12060     - ##args); \
12061     + (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \
12062     + f , ##args); \
12063     } while (0)
12064    
12065     /*
12066     @@ -308,8 +308,8 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd,
12067     ({ \
12068     struct v4l2_subdev *__sd; \
12069     __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \
12070     - !(grpmsk) || (__sd->grp_id & (grpmsk)), o, f , \
12071     - ##args); \
12072     + (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \
12073     + f , ##args); \
12074     })
12075    
12076     /*
12077     diff --git a/include/scsi/iscsi_proto.h b/include/scsi/iscsi_proto.h
12078     index 1a2ae0862e23..c1260d80ef30 100644
12079     --- a/include/scsi/iscsi_proto.h
12080     +++ b/include/scsi/iscsi_proto.h
12081     @@ -638,7 +638,6 @@ struct iscsi_reject {
12082     #define ISCSI_REASON_BOOKMARK_INVALID 9
12083     #define ISCSI_REASON_BOOKMARK_NO_RESOURCES 10
12084     #define ISCSI_REASON_NEGOTIATION_RESET 11
12085     -#define ISCSI_REASON_WAITING_FOR_LOGOUT 12
12086    
12087     /* Max. number of Key=Value pairs in a text message */
12088     #define MAX_KEY_VALUE_PAIRS 8192
12089     diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h
12090     index f730b91e472f..5432111c8761 100644
12091     --- a/include/sound/rawmidi.h
12092     +++ b/include/sound/rawmidi.h
12093     @@ -92,9 +92,9 @@ struct snd_rawmidi_substream {
12094     struct list_head list; /* list of all substream for given stream */
12095     int stream; /* direction */
12096     int number; /* substream number */
12097     - unsigned int opened: 1, /* open flag */
12098     - append: 1, /* append flag (merge more streams) */
12099     - active_sensing: 1; /* send active sensing when close */
12100     + bool opened; /* open flag */
12101     + bool append; /* append flag (merge more streams) */
12102     + bool active_sensing; /* send active sensing when close */
12103     int use_count; /* use counter (for output) */
12104     size_t bytes;
12105     struct snd_rawmidi *rmidi;
12106     diff --git a/ipc/sem.c b/ipc/sem.c
12107     index 10b94bc59d4a..5cd9d802592f 100644
12108     --- a/ipc/sem.c
12109     +++ b/ipc/sem.c
12110     @@ -2159,11 +2159,9 @@ void exit_sem(struct task_struct *tsk)
12111     ipc_assert_locked_object(&sma->sem_perm);
12112     list_del(&un->list_id);
12113    
12114     - /* we are the last process using this ulp, acquiring ulp->lock
12115     - * isn't required. Besides that, we are also protected against
12116     - * IPC_RMID as we hold sma->sem_perm lock now
12117     - */
12118     + spin_lock(&ulp->lock);
12119     list_del_rcu(&un->list_proc);
12120     + spin_unlock(&ulp->lock);
12121    
12122     /* perform adjustments registered in un */
12123     for (i = 0; i < sma->sem_nsems; i++) {
12124     diff --git a/kernel/cpu.c b/kernel/cpu.c
12125     index c2573e858009..1fbe93fefc1f 100644
12126     --- a/kernel/cpu.c
12127     +++ b/kernel/cpu.c
12128     @@ -515,8 +515,7 @@ static int bringup_wait_for_ap(unsigned int cpu)
12129     if (WARN_ON_ONCE((!cpu_online(cpu))))
12130     return -ECANCELED;
12131    
12132     - /* Unpark the stopper thread and the hotplug thread of the target cpu */
12133     - stop_machine_unpark(cpu);
12134     + /* Unpark the hotplug thread of the target cpu */
12135     kthread_unpark(st->thread);
12136    
12137     /*
12138     @@ -1115,8 +1114,8 @@ void notify_cpu_starting(unsigned int cpu)
12139    
12140     /*
12141     * Called from the idle task. Wake up the controlling task which brings the
12142     - * stopper and the hotplug thread of the upcoming CPU up and then delegates
12143     - * the rest of the online bringup to the hotplug thread.
12144     + * hotplug thread of the upcoming CPU up and then delegates the rest of the
12145     + * online bringup to the hotplug thread.
12146     */
12147     void cpuhp_online_idle(enum cpuhp_state state)
12148     {
12149     @@ -1126,6 +1125,12 @@ void cpuhp_online_idle(enum cpuhp_state state)
12150     if (state != CPUHP_AP_ONLINE_IDLE)
12151     return;
12152    
12153     + /*
12154     + * Unpart the stopper thread before we start the idle loop (and start
12155     + * scheduling); this ensures the stopper task is always available.
12156     + */
12157     + stop_machine_unpark(smp_processor_id());
12158     +
12159     st->state = CPUHP_AP_ONLINE_IDLE;
12160     complete(&st->done);
12161     }
12162     diff --git a/kernel/padata.c b/kernel/padata.c
12163     index 63449fc584da..286c5142a0f7 100644
12164     --- a/kernel/padata.c
12165     +++ b/kernel/padata.c
12166     @@ -34,6 +34,8 @@
12167    
12168     #define MAX_OBJ_NUM 1000
12169    
12170     +static void padata_free_pd(struct parallel_data *pd);
12171     +
12172     static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
12173     {
12174     int cpu, target_cpu;
12175     @@ -301,6 +303,7 @@ static void padata_serial_worker(struct work_struct *serial_work)
12176     struct padata_serial_queue *squeue;
12177     struct parallel_data *pd;
12178     LIST_HEAD(local_list);
12179     + int cnt;
12180    
12181     local_bh_disable();
12182     squeue = container_of(serial_work, struct padata_serial_queue, work);
12183     @@ -310,6 +313,8 @@ static void padata_serial_worker(struct work_struct *serial_work)
12184     list_replace_init(&squeue->serial.list, &local_list);
12185     spin_unlock(&squeue->serial.lock);
12186    
12187     + cnt = 0;
12188     +
12189     while (!list_empty(&local_list)) {
12190     struct padata_priv *padata;
12191    
12192     @@ -319,9 +324,12 @@ static void padata_serial_worker(struct work_struct *serial_work)
12193     list_del_init(&padata->list);
12194    
12195     padata->serial(padata);
12196     - atomic_dec(&pd->refcnt);
12197     + cnt++;
12198     }
12199     local_bh_enable();
12200     +
12201     + if (atomic_sub_and_test(cnt, &pd->refcnt))
12202     + padata_free_pd(pd);
12203     }
12204    
12205     /**
12206     @@ -444,7 +452,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
12207     setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
12208     atomic_set(&pd->seq_nr, -1);
12209     atomic_set(&pd->reorder_objects, 0);
12210     - atomic_set(&pd->refcnt, 0);
12211     + atomic_set(&pd->refcnt, 1);
12212     pd->pinst = pinst;
12213     spin_lock_init(&pd->lock);
12214    
12215     @@ -469,31 +477,6 @@ static void padata_free_pd(struct parallel_data *pd)
12216     kfree(pd);
12217     }
12218    
12219     -/* Flush all objects out of the padata queues. */
12220     -static void padata_flush_queues(struct parallel_data *pd)
12221     -{
12222     - int cpu;
12223     - struct padata_parallel_queue *pqueue;
12224     - struct padata_serial_queue *squeue;
12225     -
12226     - for_each_cpu(cpu, pd->cpumask.pcpu) {
12227     - pqueue = per_cpu_ptr(pd->pqueue, cpu);
12228     - flush_work(&pqueue->work);
12229     - }
12230     -
12231     - del_timer_sync(&pd->timer);
12232     -
12233     - if (atomic_read(&pd->reorder_objects))
12234     - padata_reorder(pd);
12235     -
12236     - for_each_cpu(cpu, pd->cpumask.cbcpu) {
12237     - squeue = per_cpu_ptr(pd->squeue, cpu);
12238     - flush_work(&squeue->work);
12239     - }
12240     -
12241     - BUG_ON(atomic_read(&pd->refcnt) != 0);
12242     -}
12243     -
12244     static void __padata_start(struct padata_instance *pinst)
12245     {
12246     pinst->flags |= PADATA_INIT;
12247     @@ -507,10 +490,6 @@ static void __padata_stop(struct padata_instance *pinst)
12248     pinst->flags &= ~PADATA_INIT;
12249    
12250     synchronize_rcu();
12251     -
12252     - get_online_cpus();
12253     - padata_flush_queues(pinst->pd);
12254     - put_online_cpus();
12255     }
12256    
12257     /* Replace the internal control structure with a new one. */
12258     @@ -531,8 +510,8 @@ static void padata_replace(struct padata_instance *pinst,
12259     if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
12260     notification_mask |= PADATA_CPU_SERIAL;
12261    
12262     - padata_flush_queues(pd_old);
12263     - padata_free_pd(pd_old);
12264     + if (atomic_dec_and_test(&pd_old->refcnt))
12265     + padata_free_pd(pd_old);
12266    
12267     if (notification_mask)
12268     blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
12269     diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
12270     index 71a40e5c3a9f..2ae98f8bce81 100644
12271     --- a/kernel/trace/ftrace.c
12272     +++ b/kernel/trace/ftrace.c
12273     @@ -5455,9 +5455,10 @@ static void *fpid_next(struct seq_file *m, void *v, loff_t *pos)
12274     struct trace_array *tr = m->private;
12275     struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_pids);
12276    
12277     - if (v == FTRACE_NO_PIDS)
12278     + if (v == FTRACE_NO_PIDS) {
12279     + (*pos)++;
12280     return NULL;
12281     -
12282     + }
12283     return trace_pid_next(pid_list, v, pos);
12284     }
12285    
12286     diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
12287     index 7e6971ba9541..8a88e85c8c61 100644
12288     --- a/kernel/trace/trace_events_trigger.c
12289     +++ b/kernel/trace/trace_events_trigger.c
12290     @@ -126,9 +126,10 @@ static void *trigger_next(struct seq_file *m, void *t, loff_t *pos)
12291     {
12292     struct trace_event_file *event_file = event_file_data(m->private);
12293    
12294     - if (t == SHOW_AVAILABLE_TRIGGERS)
12295     + if (t == SHOW_AVAILABLE_TRIGGERS) {
12296     + (*pos)++;
12297     return NULL;
12298     -
12299     + }
12300     return seq_list_next(t, &event_file->triggers, pos);
12301     }
12302    
12303     diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
12304     index 413ff108fbd0..d19f2191960e 100644
12305     --- a/kernel/trace/trace_stat.c
12306     +++ b/kernel/trace/trace_stat.c
12307     @@ -277,18 +277,22 @@ static int tracing_stat_init(void)
12308    
12309     d_tracing = tracing_init_dentry();
12310     if (IS_ERR(d_tracing))
12311     - return 0;
12312     + return -ENODEV;
12313    
12314     stat_dir = tracefs_create_dir("trace_stat", d_tracing);
12315     - if (!stat_dir)
12316     + if (!stat_dir) {
12317     pr_warn("Could not create tracefs 'trace_stat' entry\n");
12318     + return -ENOMEM;
12319     + }
12320     return 0;
12321     }
12322    
12323     static int init_stat_file(struct stat_session *session)
12324     {
12325     - if (!stat_dir && tracing_stat_init())
12326     - return -ENODEV;
12327     + int ret;
12328     +
12329     + if (!stat_dir && (ret = tracing_stat_init()))
12330     + return ret;
12331    
12332     session->file = tracefs_create_file(session->ts->name, 0644,
12333     stat_dir,
12334     @@ -301,7 +305,7 @@ static int init_stat_file(struct stat_session *session)
12335     int register_stat_tracer(struct tracer_stat *trace)
12336     {
12337     struct stat_session *session, *node;
12338     - int ret;
12339     + int ret = -EINVAL;
12340    
12341     if (!trace)
12342     return -EINVAL;
12343     @@ -312,17 +316,15 @@ int register_stat_tracer(struct tracer_stat *trace)
12344     /* Already registered? */
12345     mutex_lock(&all_stat_sessions_mutex);
12346     list_for_each_entry(node, &all_stat_sessions, session_list) {
12347     - if (node->ts == trace) {
12348     - mutex_unlock(&all_stat_sessions_mutex);
12349     - return -EINVAL;
12350     - }
12351     + if (node->ts == trace)
12352     + goto out;
12353     }
12354     - mutex_unlock(&all_stat_sessions_mutex);
12355    
12356     + ret = -ENOMEM;
12357     /* Init the session */
12358     session = kzalloc(sizeof(*session), GFP_KERNEL);
12359     if (!session)
12360     - return -ENOMEM;
12361     + goto out;
12362    
12363     session->ts = trace;
12364     INIT_LIST_HEAD(&session->session_list);
12365     @@ -331,15 +333,16 @@ int register_stat_tracer(struct tracer_stat *trace)
12366     ret = init_stat_file(session);
12367     if (ret) {
12368     destroy_session(session);
12369     - return ret;
12370     + goto out;
12371     }
12372    
12373     + ret = 0;
12374     /* Register */
12375     - mutex_lock(&all_stat_sessions_mutex);
12376     list_add_tail(&session->session_list, &all_stat_sessions);
12377     + out:
12378     mutex_unlock(&all_stat_sessions_mutex);
12379    
12380     - return 0;
12381     + return ret;
12382     }
12383    
12384     void unregister_stat_tracer(struct tracer_stat *trace)
12385     diff --git a/lib/scatterlist.c b/lib/scatterlist.c
12386     index a854cc39f084..ef8c14a56d0a 100644
12387     --- a/lib/scatterlist.c
12388     +++ b/lib/scatterlist.c
12389     @@ -317,7 +317,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
12390     if (prv)
12391     table->nents = ++table->orig_nents;
12392    
12393     - return -ENOMEM;
12394     + return -ENOMEM;
12395     }
12396    
12397     sg_init_table(sg, alloc_size);
12398     diff --git a/lib/stackdepot.c b/lib/stackdepot.c
12399     index f87d138e9672..759ff419fe61 100644
12400     --- a/lib/stackdepot.c
12401     +++ b/lib/stackdepot.c
12402     @@ -92,15 +92,19 @@ static bool init_stack_slab(void **prealloc)
12403     return true;
12404     if (stack_slabs[depot_index] == NULL) {
12405     stack_slabs[depot_index] = *prealloc;
12406     + *prealloc = NULL;
12407     } else {
12408     - stack_slabs[depot_index + 1] = *prealloc;
12409     + /* If this is the last depot slab, do not touch the next one. */
12410     + if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) {
12411     + stack_slabs[depot_index + 1] = *prealloc;
12412     + *prealloc = NULL;
12413     + }
12414     /*
12415     * This smp_store_release pairs with smp_load_acquire() from
12416     * |next_slab_inited| above and in depot_save_stack().
12417     */
12418     smp_store_release(&next_slab_inited, 1);
12419     }
12420     - *prealloc = NULL;
12421     return true;
12422     }
12423    
12424     diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
12425     index dffee9d47ec4..7b993f25aab9 100644
12426     --- a/net/netfilter/xt_bpf.c
12427     +++ b/net/netfilter/xt_bpf.c
12428     @@ -25,6 +25,9 @@ static int bpf_mt_check(const struct xt_mtchk_param *par)
12429     struct xt_bpf_info *info = par->matchinfo;
12430     struct sock_fprog_kern program;
12431    
12432     + if (info->bpf_program_num_elem > XT_BPF_MAX_NUM_INSTR)
12433     + return -EINVAL;
12434     +
12435     program.len = info->bpf_program_num_elem;
12436     program.filter = info->bpf_program;
12437    
12438     diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
12439     index a1a29cdc58fc..140a9ae262ef 100644
12440     --- a/net/netfilter/xt_hashlimit.c
12441     +++ b/net/netfilter/xt_hashlimit.c
12442     @@ -735,6 +735,8 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
12443     return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 2);
12444     }
12445    
12446     +#define HASHLIMIT_MAX_SIZE 1048576
12447     +
12448     static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
12449     struct xt_hashlimit_htable **hinfo,
12450     struct hashlimit_cfg2 *cfg,
12451     @@ -745,6 +747,14 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
12452    
12453     if (cfg->gc_interval == 0 || cfg->expire == 0)
12454     return -EINVAL;
12455     + if (cfg->size > HASHLIMIT_MAX_SIZE) {
12456     + cfg->size = HASHLIMIT_MAX_SIZE;
12457     + pr_info_ratelimited("size too large, truncated to %u\n", cfg->size);
12458     + }
12459     + if (cfg->max > HASHLIMIT_MAX_SIZE) {
12460     + cfg->max = HASHLIMIT_MAX_SIZE;
12461     + pr_info_ratelimited("max too large, truncated to %u\n", cfg->max);
12462     + }
12463     if (par->family == NFPROTO_IPV4) {
12464     if (cfg->srcmask > 32 || cfg->dstmask > 32)
12465     return -EINVAL;
12466     diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
12467     index eee299bb6bcf..de03b7b49e05 100644
12468     --- a/net/sched/cls_flower.c
12469     +++ b/net/sched/cls_flower.c
12470     @@ -364,6 +364,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
12471     [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NLA_U16 },
12472     [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NLA_U16 },
12473     [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NLA_U16 },
12474     + [TCA_FLOWER_FLAGS] = { .type = NLA_U32 },
12475     };
12476    
12477     static void fl_set_key_val(struct nlattr **tb,
12478     diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
12479     index 61ddfbad2aae..fe29c576e494 100644
12480     --- a/net/sched/cls_matchall.c
12481     +++ b/net/sched/cls_matchall.c
12482     @@ -111,6 +111,7 @@ static unsigned long mall_get(struct tcf_proto *tp, u32 handle)
12483     static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
12484     [TCA_MATCHALL_UNSPEC] = { .type = NLA_UNSPEC },
12485     [TCA_MATCHALL_CLASSID] = { .type = NLA_U32 },
12486     + [TCA_MATCHALL_FLAGS] = { .type = NLA_U32 },
12487     };
12488    
12489     static int mall_set_parms(struct net *net, struct tcf_proto *tp,
12490     diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
12491     index 27aac273205b..fa423fcd1a92 100644
12492     --- a/scripts/kconfig/confdata.c
12493     +++ b/scripts/kconfig/confdata.c
12494     @@ -1238,7 +1238,7 @@ bool conf_set_all_new_symbols(enum conf_def_mode mode)
12495    
12496     sym_calc_value(csym);
12497     if (mode == def_random)
12498     - has_changed = randomize_choice_values(csym);
12499     + has_changed |= randomize_choice_values(csym);
12500     else {
12501     set_all_choice_values(csym);
12502     has_changed = true;
12503     diff --git a/security/selinux/avc.c b/security/selinux/avc.c
12504     index 52f3c550abcc..f3c473791b69 100644
12505     --- a/security/selinux/avc.c
12506     +++ b/security/selinux/avc.c
12507     @@ -865,7 +865,7 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid,
12508     if (orig->ae.xp_node) {
12509     rc = avc_xperms_populate(node, orig->ae.xp_node);
12510     if (rc) {
12511     - kmem_cache_free(avc_node_cachep, node);
12512     + avc_node_kill(node);
12513     goto out_unlock;
12514     }
12515     }
12516     diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c
12517     index eee4ea17a8f5..198eea5c8c2f 100644
12518     --- a/sound/core/seq/seq_clientmgr.c
12519     +++ b/sound/core/seq/seq_clientmgr.c
12520     @@ -564,7 +564,7 @@ static int update_timestamp_of_queue(struct snd_seq_event *event,
12521     event->queue = queue;
12522     event->flags &= ~SNDRV_SEQ_TIME_STAMP_MASK;
12523     if (real_time) {
12524     - event->time.time = snd_seq_timer_get_cur_time(q->timer);
12525     + event->time.time = snd_seq_timer_get_cur_time(q->timer, true);
12526     event->flags |= SNDRV_SEQ_TIME_STAMP_REAL;
12527     } else {
12528     event->time.tick = snd_seq_timer_get_cur_tick(q->timer);
12529     @@ -1639,7 +1639,7 @@ static int snd_seq_ioctl_get_queue_status(struct snd_seq_client *client,
12530     tmr = queue->timer;
12531     status->events = queue->tickq->cells + queue->timeq->cells;
12532    
12533     - status->time = snd_seq_timer_get_cur_time(tmr);
12534     + status->time = snd_seq_timer_get_cur_time(tmr, true);
12535     status->tick = snd_seq_timer_get_cur_tick(tmr);
12536    
12537     status->running = tmr->running;
12538     diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c
12539     index 1a6dc4ff44a6..ea1aa0796276 100644
12540     --- a/sound/core/seq/seq_queue.c
12541     +++ b/sound/core/seq/seq_queue.c
12542     @@ -261,6 +261,8 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop)
12543     {
12544     unsigned long flags;
12545     struct snd_seq_event_cell *cell;
12546     + snd_seq_tick_time_t cur_tick;
12547     + snd_seq_real_time_t cur_time;
12548    
12549     if (q == NULL)
12550     return;
12551     @@ -277,17 +279,18 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop)
12552    
12553     __again:
12554     /* Process tick queue... */
12555     + cur_tick = snd_seq_timer_get_cur_tick(q->timer);
12556     for (;;) {
12557     - cell = snd_seq_prioq_cell_out(q->tickq,
12558     - &q->timer->tick.cur_tick);
12559     + cell = snd_seq_prioq_cell_out(q->tickq, &cur_tick);
12560     if (!cell)
12561     break;
12562     snd_seq_dispatch_event(cell, atomic, hop);
12563     }
12564    
12565     /* Process time queue... */
12566     + cur_time = snd_seq_timer_get_cur_time(q->timer, false);
12567     for (;;) {
12568     - cell = snd_seq_prioq_cell_out(q->timeq, &q->timer->cur_time);
12569     + cell = snd_seq_prioq_cell_out(q->timeq, &cur_time);
12570     if (!cell)
12571     break;
12572     snd_seq_dispatch_event(cell, atomic, hop);
12573     @@ -415,6 +418,7 @@ int snd_seq_queue_check_access(int queueid, int client)
12574     int snd_seq_queue_set_owner(int queueid, int client, int locked)
12575     {
12576     struct snd_seq_queue *q = queueptr(queueid);
12577     + unsigned long flags;
12578    
12579     if (q == NULL)
12580     return -EINVAL;
12581     @@ -424,8 +428,10 @@ int snd_seq_queue_set_owner(int queueid, int client, int locked)
12582     return -EPERM;
12583     }
12584    
12585     + spin_lock_irqsave(&q->owner_lock, flags);
12586     q->locked = locked ? 1 : 0;
12587     q->owner = client;
12588     + spin_unlock_irqrestore(&q->owner_lock, flags);
12589     queue_access_unlock(q);
12590     queuefree(q);
12591    
12592     @@ -564,15 +570,17 @@ void snd_seq_queue_client_termination(int client)
12593     unsigned long flags;
12594     int i;
12595     struct snd_seq_queue *q;
12596     + bool matched;
12597    
12598     for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) {
12599     if ((q = queueptr(i)) == NULL)
12600     continue;
12601     spin_lock_irqsave(&q->owner_lock, flags);
12602     - if (q->owner == client)
12603     + matched = (q->owner == client);
12604     + if (matched)
12605     q->klocked = 1;
12606     spin_unlock_irqrestore(&q->owner_lock, flags);
12607     - if (q->owner == client) {
12608     + if (matched) {
12609     if (q->timer->running)
12610     snd_seq_timer_stop(q->timer);
12611     snd_seq_timer_reset(q->timer);
12612     @@ -764,6 +772,8 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry,
12613     int i, bpm;
12614     struct snd_seq_queue *q;
12615     struct snd_seq_timer *tmr;
12616     + bool locked;
12617     + int owner;
12618    
12619     for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) {
12620     if ((q = queueptr(i)) == NULL)
12621     @@ -775,9 +785,14 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry,
12622     else
12623     bpm = 0;
12624    
12625     + spin_lock_irq(&q->owner_lock);
12626     + locked = q->locked;
12627     + owner = q->owner;
12628     + spin_unlock_irq(&q->owner_lock);
12629     +
12630     snd_iprintf(buffer, "queue %d: [%s]\n", q->queue, q->name);
12631     - snd_iprintf(buffer, "owned by client : %d\n", q->owner);
12632     - snd_iprintf(buffer, "lock status : %s\n", q->locked ? "Locked" : "Free");
12633     + snd_iprintf(buffer, "owned by client : %d\n", owner);
12634     + snd_iprintf(buffer, "lock status : %s\n", locked ? "Locked" : "Free");
12635     snd_iprintf(buffer, "queued time events : %d\n", snd_seq_prioq_avail(q->timeq));
12636     snd_iprintf(buffer, "queued tick events : %d\n", snd_seq_prioq_avail(q->tickq));
12637     snd_iprintf(buffer, "timer state : %s\n", tmr->running ? "Running" : "Stopped");
12638     diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c
12639     index 0e1feb597586..bd5e5a5d52a8 100644
12640     --- a/sound/core/seq/seq_timer.c
12641     +++ b/sound/core/seq/seq_timer.c
12642     @@ -436,14 +436,15 @@ int snd_seq_timer_continue(struct snd_seq_timer *tmr)
12643     }
12644    
12645     /* return current 'real' time. use timeofday() to get better granularity. */
12646     -snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr)
12647     +snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr,
12648     + bool adjust_ktime)
12649     {
12650     snd_seq_real_time_t cur_time;
12651     unsigned long flags;
12652    
12653     spin_lock_irqsave(&tmr->lock, flags);
12654     cur_time = tmr->cur_time;
12655     - if (tmr->running) {
12656     + if (adjust_ktime && tmr->running) {
12657     struct timespec64 tm;
12658    
12659     ktime_get_ts64(&tm);
12660     @@ -460,7 +461,13 @@ snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr)
12661     high PPQ values) */
12662     snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr)
12663     {
12664     - return tmr->tick.cur_tick;
12665     + snd_seq_tick_time_t cur_tick;
12666     + unsigned long flags;
12667     +
12668     + spin_lock_irqsave(&tmr->lock, flags);
12669     + cur_tick = tmr->tick.cur_tick;
12670     + spin_unlock_irqrestore(&tmr->lock, flags);
12671     + return cur_tick;
12672     }
12673    
12674    
12675     diff --git a/sound/core/seq/seq_timer.h b/sound/core/seq/seq_timer.h
12676     index 9506b661fe5b..5d47d559465e 100644
12677     --- a/sound/core/seq/seq_timer.h
12678     +++ b/sound/core/seq/seq_timer.h
12679     @@ -135,7 +135,8 @@ int snd_seq_timer_set_ppq(struct snd_seq_timer *tmr, int ppq);
12680     int snd_seq_timer_set_position_tick(struct snd_seq_timer *tmr, snd_seq_tick_time_t position);
12681     int snd_seq_timer_set_position_time(struct snd_seq_timer *tmr, snd_seq_real_time_t position);
12682     int snd_seq_timer_set_skew(struct snd_seq_timer *tmr, unsigned int skew, unsigned int base);
12683     -snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr);
12684     +snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr,
12685     + bool adjust_ktime);
12686     snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr);
12687    
12688     extern int seq_default_timer_class;
12689     diff --git a/sound/hda/hdmi_chmap.c b/sound/hda/hdmi_chmap.c
12690     index f21633cd9b38..acbe61b8db7b 100644
12691     --- a/sound/hda/hdmi_chmap.c
12692     +++ b/sound/hda/hdmi_chmap.c
12693     @@ -249,7 +249,7 @@ void snd_hdac_print_channel_allocation(int spk_alloc, char *buf, int buflen)
12694    
12695     for (i = 0, j = 0; i < ARRAY_SIZE(cea_speaker_allocation_names); i++) {
12696     if (spk_alloc & (1 << i))
12697     - j += snprintf(buf + j, buflen - j, " %s",
12698     + j += scnprintf(buf + j, buflen - j, " %s",
12699     cea_speaker_allocation_names[i]);
12700     }
12701     buf[j] = '\0'; /* necessary when j == 0 */
12702     diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
12703     index 1b5e217d1bb2..2ad28ce7ff49 100644
12704     --- a/sound/pci/hda/hda_codec.c
12705     +++ b/sound/pci/hda/hda_codec.c
12706     @@ -4104,7 +4104,7 @@ void snd_print_pcm_bits(int pcm, char *buf, int buflen)
12707    
12708     for (i = 0, j = 0; i < ARRAY_SIZE(bits); i++)
12709     if (pcm & (AC_SUPPCM_BITS_8 << i))
12710     - j += snprintf(buf + j, buflen - j, " %d", bits[i]);
12711     + j += scnprintf(buf + j, buflen - j, " %d", bits[i]);
12712    
12713     buf[j] = '\0'; /* necessary when j == 0 */
12714     }
12715     diff --git a/sound/pci/hda/hda_eld.c b/sound/pci/hda/hda_eld.c
12716     index ba7fe9b6655c..864cc8c9ada0 100644
12717     --- a/sound/pci/hda/hda_eld.c
12718     +++ b/sound/pci/hda/hda_eld.c
12719     @@ -373,7 +373,7 @@ static void hdmi_print_pcm_rates(int pcm, char *buf, int buflen)
12720    
12721     for (i = 0, j = 0; i < ARRAY_SIZE(alsa_rates); i++)
12722     if (pcm & (1 << i))
12723     - j += snprintf(buf + j, buflen - j, " %d",
12724     + j += scnprintf(buf + j, buflen - j, " %d",
12725     alsa_rates[i]);
12726    
12727     buf[j] = '\0'; /* necessary when j == 0 */
12728     diff --git a/sound/pci/hda/hda_sysfs.c b/sound/pci/hda/hda_sysfs.c
12729     index 9739fce9e032..f3ac19d33bd4 100644
12730     --- a/sound/pci/hda/hda_sysfs.c
12731     +++ b/sound/pci/hda/hda_sysfs.c
12732     @@ -221,7 +221,7 @@ static ssize_t init_verbs_show(struct device *dev,
12733     mutex_lock(&codec->user_mutex);
12734     for (i = 0; i < codec->init_verbs.used; i++) {
12735     struct hda_verb *v = snd_array_elem(&codec->init_verbs, i);
12736     - len += snprintf(buf + len, PAGE_SIZE - len,
12737     + len += scnprintf(buf + len, PAGE_SIZE - len,
12738     "0x%02x 0x%03x 0x%04x\n",
12739     v->nid, v->verb, v->param);
12740     }
12741     @@ -271,7 +271,7 @@ static ssize_t hints_show(struct device *dev,
12742     mutex_lock(&codec->user_mutex);
12743     for (i = 0; i < codec->hints.used; i++) {
12744     struct hda_hint *hint = snd_array_elem(&codec->hints, i);
12745     - len += snprintf(buf + len, PAGE_SIZE - len,
12746     + len += scnprintf(buf + len, PAGE_SIZE - len,
12747     "%s = %s\n", hint->key, hint->val);
12748     }
12749     mutex_unlock(&codec->user_mutex);
12750     diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
12751     index 8557b94e462c..1e99500dbb6c 100644
12752     --- a/sound/pci/hda/patch_conexant.c
12753     +++ b/sound/pci/hda/patch_conexant.c
12754     @@ -853,6 +853,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
12755     SND_PCI_QUIRK(0x17aa, 0x215f, "Lenovo T510", CXT_PINCFG_LENOVO_TP410),
12756     SND_PCI_QUIRK(0x17aa, 0x21ce, "Lenovo T420", CXT_PINCFG_LENOVO_TP410),
12757     SND_PCI_QUIRK(0x17aa, 0x21cf, "Lenovo T520", CXT_PINCFG_LENOVO_TP410),
12758     + SND_PCI_QUIRK(0x17aa, 0x21d2, "Lenovo T420s", CXT_PINCFG_LENOVO_TP410),
12759     SND_PCI_QUIRK(0x17aa, 0x21da, "Lenovo X220", CXT_PINCFG_LENOVO_TP410),
12760     SND_PCI_QUIRK(0x17aa, 0x21db, "Lenovo X220-tablet", CXT_PINCFG_LENOVO_TP410),
12761     SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo IdeaPad Z560", CXT_FIXUP_MUTE_LED_EAPD),
12762     diff --git a/sound/sh/aica.c b/sound/sh/aica.c
12763     index fbbc25279559..2a127feb8e29 100644
12764     --- a/sound/sh/aica.c
12765     +++ b/sound/sh/aica.c
12766     @@ -117,10 +117,10 @@ static void spu_memset(u32 toi, u32 what, int length)
12767     }
12768    
12769     /* spu_memload - write to SPU address space */
12770     -static void spu_memload(u32 toi, void *from, int length)
12771     +static void spu_memload(u32 toi, const void *from, int length)
12772     {
12773     unsigned long flags;
12774     - u32 *froml = from;
12775     + const u32 *froml = from;
12776     u32 __iomem *to = (u32 __iomem *) (SPU_MEMORY_BASE + toi);
12777     int i;
12778     u32 val;
12779     diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig
12780     index 22aec9a1e9a4..838d03a138ca 100644
12781     --- a/sound/soc/atmel/Kconfig
12782     +++ b/sound/soc/atmel/Kconfig
12783     @@ -25,6 +25,8 @@ config SND_ATMEL_SOC_DMA
12784    
12785     config SND_ATMEL_SOC_SSC_DMA
12786     tristate
12787     + select SND_ATMEL_SOC_DMA
12788     + select SND_ATMEL_SOC_PDC
12789    
12790     config SND_ATMEL_SOC_SSC
12791     tristate
12792     diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
12793     index a5299cbb09ba..064f3485a977 100644
12794     --- a/sound/usb/quirks.c
12795     +++ b/sound/usb/quirks.c
12796     @@ -1149,6 +1149,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
12797     case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */
12798     case USB_ID(0x1de7, 0x0114): /* Phoenix Audio MT202pcs */
12799     case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */
12800     + case USB_ID(0x2912, 0x30c8): /* Audioengine D1 */
12801     return true;
12802     }
12803     return false;
12804     diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c
12805     index 0b34dbc8f302..7dcb33d3886b 100644
12806     --- a/sound/usb/usx2y/usX2Yhwdep.c
12807     +++ b/sound/usb/usx2y/usX2Yhwdep.c
12808     @@ -132,7 +132,7 @@ static int snd_usX2Y_hwdep_dsp_status(struct snd_hwdep *hw,
12809     info->num_dsps = 2; // 0: Prepad Data, 1: FPGA Code
12810     if (us428->chip_status & USX2Y_STAT_CHIP_INIT)
12811     info->chip_ready = 1;
12812     - info->version = USX2Y_DRIVER_VERSION;
12813     + info->version = USX2Y_DRIVER_VERSION;
12814     return 0;
12815     }
12816    
12817     diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
12818     index f99f49e4a31e..21e714cf0126 100644
12819     --- a/tools/lib/api/fs/fs.c
12820     +++ b/tools/lib/api/fs/fs.c
12821     @@ -194,6 +194,7 @@ static bool fs__env_override(struct fs *fs)
12822     size_t name_len = strlen(fs->name);
12823     /* name + "_PATH" + '\0' */
12824     char upper_name[name_len + 5 + 1];
12825     +
12826     memcpy(upper_name, fs->name, name_len);
12827     mem_toupper(upper_name, name_len);
12828     strcpy(&upper_name[name_len], "_PATH");
12829     @@ -203,7 +204,8 @@ static bool fs__env_override(struct fs *fs)
12830     return false;
12831    
12832     fs->found = true;
12833     - strncpy(fs->path, override_path, sizeof(fs->path));
12834     + strncpy(fs->path, override_path, sizeof(fs->path) - 1);
12835     + fs->path[sizeof(fs->path) - 1] = '\0';
12836     return true;
12837     }
12838    
12839     diff --git a/tools/objtool/arch/x86/lib/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
12840     index 0f7eb4f5bdb7..82e105b284e0 100644
12841     --- a/tools/objtool/arch/x86/lib/x86-opcode-map.txt
12842     +++ b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
12843     @@ -909,7 +909,7 @@ EndTable
12844    
12845     GrpTable: Grp3_2
12846     0: TEST Ev,Iz
12847     -1:
12848     +1: TEST Ev,Iz
12849     2: NOT Ev
12850     3: NEG Ev
12851     4: MUL rAX,Ev
12852     diff --git a/tools/usb/usbip/src/usbip_network.c b/tools/usb/usbip/src/usbip_network.c
12853     index b4c37e76a6e0..187dfaa67d0a 100644
12854     --- a/tools/usb/usbip/src/usbip_network.c
12855     +++ b/tools/usb/usbip/src/usbip_network.c
12856     @@ -62,39 +62,39 @@ void usbip_setup_port_number(char *arg)
12857     info("using port %d (\"%s\")", usbip_port, usbip_port_string);
12858     }
12859    
12860     -void usbip_net_pack_uint32_t(int pack, uint32_t *num)
12861     +uint32_t usbip_net_pack_uint32_t(int pack, uint32_t num)
12862     {
12863     uint32_t i;
12864    
12865     if (pack)
12866     - i = htonl(*num);
12867     + i = htonl(num);
12868     else
12869     - i = ntohl(*num);
12870     + i = ntohl(num);
12871    
12872     - *num = i;
12873     + return i;
12874     }
12875    
12876     -void usbip_net_pack_uint16_t(int pack, uint16_t *num)
12877     +uint16_t usbip_net_pack_uint16_t(int pack, uint16_t num)
12878     {
12879     uint16_t i;
12880    
12881     if (pack)
12882     - i = htons(*num);
12883     + i = htons(num);
12884     else
12885     - i = ntohs(*num);
12886     + i = ntohs(num);
12887    
12888     - *num = i;
12889     + return i;
12890     }
12891    
12892     void usbip_net_pack_usb_device(int pack, struct usbip_usb_device *udev)
12893     {
12894     - usbip_net_pack_uint32_t(pack, &udev->busnum);
12895     - usbip_net_pack_uint32_t(pack, &udev->devnum);
12896     - usbip_net_pack_uint32_t(pack, &udev->speed);
12897     + udev->busnum = usbip_net_pack_uint32_t(pack, udev->busnum);
12898     + udev->devnum = usbip_net_pack_uint32_t(pack, udev->devnum);
12899     + udev->speed = usbip_net_pack_uint32_t(pack, udev->speed);
12900    
12901     - usbip_net_pack_uint16_t(pack, &udev->idVendor);
12902     - usbip_net_pack_uint16_t(pack, &udev->idProduct);
12903     - usbip_net_pack_uint16_t(pack, &udev->bcdDevice);
12904     + udev->idVendor = usbip_net_pack_uint16_t(pack, udev->idVendor);
12905     + udev->idProduct = usbip_net_pack_uint16_t(pack, udev->idProduct);
12906     + udev->bcdDevice = usbip_net_pack_uint16_t(pack, udev->bcdDevice);
12907     }
12908    
12909     void usbip_net_pack_usb_interface(int pack __attribute__((unused)),
12910     @@ -141,6 +141,14 @@ ssize_t usbip_net_send(int sockfd, void *buff, size_t bufflen)
12911     return usbip_net_xmit(sockfd, buff, bufflen, 1);
12912     }
12913    
12914     +static inline void usbip_net_pack_op_common(int pack,
12915     + struct op_common *op_common)
12916     +{
12917     + op_common->version = usbip_net_pack_uint16_t(pack, op_common->version);
12918     + op_common->code = usbip_net_pack_uint16_t(pack, op_common->code);
12919     + op_common->status = usbip_net_pack_uint32_t(pack, op_common->status);
12920     +}
12921     +
12922     int usbip_net_send_op_common(int sockfd, uint32_t code, uint32_t status)
12923     {
12924     struct op_common op_common;
12925     @@ -152,7 +160,7 @@ int usbip_net_send_op_common(int sockfd, uint32_t code, uint32_t status)
12926     op_common.code = code;
12927     op_common.status = status;
12928    
12929     - PACK_OP_COMMON(1, &op_common);
12930     + usbip_net_pack_op_common(1, &op_common);
12931    
12932     rc = usbip_net_send(sockfd, &op_common, sizeof(op_common));
12933     if (rc < 0) {
12934     @@ -176,7 +184,7 @@ int usbip_net_recv_op_common(int sockfd, uint16_t *code)
12935     goto err;
12936     }
12937    
12938     - PACK_OP_COMMON(0, &op_common);
12939     + usbip_net_pack_op_common(0, &op_common);
12940    
12941     if (op_common.version != USBIP_VERSION) {
12942     dbg("version mismatch: %d %d", op_common.version,
12943     diff --git a/tools/usb/usbip/src/usbip_network.h b/tools/usb/usbip/src/usbip_network.h
12944     index c1e875cf1078..573fa839b66b 100644
12945     --- a/tools/usb/usbip/src/usbip_network.h
12946     +++ b/tools/usb/usbip/src/usbip_network.h
12947     @@ -33,12 +33,6 @@ struct op_common {
12948    
12949     } __attribute__((packed));
12950    
12951     -#define PACK_OP_COMMON(pack, op_common) do {\
12952     - usbip_net_pack_uint16_t(pack, &(op_common)->version);\
12953     - usbip_net_pack_uint16_t(pack, &(op_common)->code);\
12954     - usbip_net_pack_uint32_t(pack, &(op_common)->status);\
12955     -} while (0)
12956     -
12957     /* ---------------------------------------------------------------------- */
12958     /* Dummy Code */
12959     #define OP_UNSPEC 0x00
12960     @@ -164,11 +158,11 @@ struct op_devlist_reply_extra {
12961     } while (0)
12962    
12963     #define PACK_OP_DEVLIST_REPLY(pack, reply) do {\
12964     - usbip_net_pack_uint32_t(pack, &(reply)->ndev);\
12965     + (reply)->ndev = usbip_net_pack_uint32_t(pack, (reply)->ndev);\
12966     } while (0)
12967    
12968     -void usbip_net_pack_uint32_t(int pack, uint32_t *num);
12969     -void usbip_net_pack_uint16_t(int pack, uint16_t *num);
12970     +uint32_t usbip_net_pack_uint32_t(int pack, uint32_t num);
12971     +uint16_t usbip_net_pack_uint16_t(int pack, uint16_t num);
12972     void usbip_net_pack_usb_device(int pack, struct usbip_usb_device *udev);
12973     void usbip_net_pack_usb_interface(int pack, struct usbip_usb_interface *uinf);
12974