Annotation of /trunk/kernel-magellan/patches-4.15/0108-4.15.9-all-fixes.patch
Parent Directory | Revision Log
Revision 3092 -
(hide annotations)
(download)
Wed Mar 21 14:52:46 2018 UTC (6 years, 6 months ago) by niro
File size: 24210 byte(s)
Wed Mar 21 14:52:46 2018 UTC (6 years, 6 months ago) by niro
File size: 24210 byte(s)
-linux-4.15.9
1 | niro | 3092 | diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt |
2 | index 3c65feb83010..a81c97a4b4a5 100644 | ||
3 | --- a/Documentation/virtual/kvm/cpuid.txt | ||
4 | +++ b/Documentation/virtual/kvm/cpuid.txt | ||
5 | @@ -54,6 +54,10 @@ KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit | ||
6 | || || before enabling paravirtualized | ||
7 | || || spinlock support. | ||
8 | ------------------------------------------------------------------------------ | ||
9 | +KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit | ||
10 | + || || can be enabled by setting bit 2 | ||
11 | + || || when writing to msr 0x4b564d02 | ||
12 | +------------------------------------------------------------------------------ | ||
13 | KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side | ||
14 | || || per-cpu warps are expected in | ||
15 | || || kvmclock. | ||
16 | diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt | ||
17 | index 1ebecc115dc6..f3f0d57ced8e 100644 | ||
18 | --- a/Documentation/virtual/kvm/msr.txt | ||
19 | +++ b/Documentation/virtual/kvm/msr.txt | ||
20 | @@ -170,7 +170,8 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02 | ||
21 | when asynchronous page faults are enabled on the vcpu 0 when | ||
22 | disabled. Bit 1 is 1 if asynchronous page faults can be injected | ||
23 | when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults | ||
24 | - are delivered to L1 as #PF vmexits. | ||
25 | + are delivered to L1 as #PF vmexits. Bit 2 can be set only if | ||
26 | + KVM_FEATURE_ASYNC_PF_VMEXIT is present in CPUID. | ||
27 | |||
28 | First 4 byte of 64 byte memory location will be written to by | ||
29 | the hypervisor at the time of asynchronous page fault (APF) | ||
30 | diff --git a/Makefile b/Makefile | ||
31 | index eb18d200a603..0420f9a0c70f 100644 | ||
32 | --- a/Makefile | ||
33 | +++ b/Makefile | ||
34 | @@ -1,7 +1,7 @@ | ||
35 | # SPDX-License-Identifier: GPL-2.0 | ||
36 | VERSION = 4 | ||
37 | PATCHLEVEL = 15 | ||
38 | -SUBLEVEL = 8 | ||
39 | +SUBLEVEL = 9 | ||
40 | EXTRAVERSION = | ||
41 | NAME = Fearless Coyote | ||
42 | |||
43 | diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c | ||
44 | index bb32f7f6dd0f..be155f70f108 100644 | ||
45 | --- a/arch/arm64/net/bpf_jit_comp.c | ||
46 | +++ b/arch/arm64/net/bpf_jit_comp.c | ||
47 | @@ -238,8 +238,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) | ||
48 | off = offsetof(struct bpf_array, map.max_entries); | ||
49 | emit_a64_mov_i64(tmp, off, ctx); | ||
50 | emit(A64_LDR32(tmp, r2, tmp), ctx); | ||
51 | + emit(A64_MOV(0, r3, r3), ctx); | ||
52 | emit(A64_CMP(0, r3, tmp), ctx); | ||
53 | - emit(A64_B_(A64_COND_GE, jmp_offset), ctx); | ||
54 | + emit(A64_B_(A64_COND_CS, jmp_offset), ctx); | ||
55 | |||
56 | /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) | ||
57 | * goto out; | ||
58 | @@ -247,7 +248,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) | ||
59 | */ | ||
60 | emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx); | ||
61 | emit(A64_CMP(1, tcc, tmp), ctx); | ||
62 | - emit(A64_B_(A64_COND_GT, jmp_offset), ctx); | ||
63 | + emit(A64_B_(A64_COND_HI, jmp_offset), ctx); | ||
64 | emit(A64_ADD_I(1, tcc, tcc, 1), ctx); | ||
65 | |||
66 | /* prog = array->ptrs[index]; | ||
67 | diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c | ||
68 | index d183b4801bdb..35591fb09042 100644 | ||
69 | --- a/arch/powerpc/net/bpf_jit_comp64.c | ||
70 | +++ b/arch/powerpc/net/bpf_jit_comp64.c | ||
71 | @@ -242,6 +242,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 | ||
72 | * goto out; | ||
73 | */ | ||
74 | PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)); | ||
75 | + PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31); | ||
76 | PPC_CMPLW(b2p_index, b2p[TMP_REG_1]); | ||
77 | PPC_BCC(COND_GE, out); | ||
78 | |||
79 | diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h | ||
80 | index 76b058533e47..81a1be326571 100644 | ||
81 | --- a/arch/x86/include/asm/nospec-branch.h | ||
82 | +++ b/arch/x86/include/asm/nospec-branch.h | ||
83 | @@ -177,4 +177,41 @@ static inline void indirect_branch_prediction_barrier(void) | ||
84 | } | ||
85 | |||
86 | #endif /* __ASSEMBLY__ */ | ||
87 | + | ||
88 | +/* | ||
89 | + * Below is used in the eBPF JIT compiler and emits the byte sequence | ||
90 | + * for the following assembly: | ||
91 | + * | ||
92 | + * With retpolines configured: | ||
93 | + * | ||
94 | + * callq do_rop | ||
95 | + * spec_trap: | ||
96 | + * pause | ||
97 | + * lfence | ||
98 | + * jmp spec_trap | ||
99 | + * do_rop: | ||
100 | + * mov %rax,(%rsp) | ||
101 | + * retq | ||
102 | + * | ||
103 | + * Without retpolines configured: | ||
104 | + * | ||
105 | + * jmp *%rax | ||
106 | + */ | ||
107 | +#ifdef CONFIG_RETPOLINE | ||
108 | +# define RETPOLINE_RAX_BPF_JIT_SIZE 17 | ||
109 | +# define RETPOLINE_RAX_BPF_JIT() \ | ||
110 | + EMIT1_off32(0xE8, 7); /* callq do_rop */ \ | ||
111 | + /* spec_trap: */ \ | ||
112 | + EMIT2(0xF3, 0x90); /* pause */ \ | ||
113 | + EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ | ||
114 | + EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ | ||
115 | + /* do_rop: */ \ | ||
116 | + EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ | ||
117 | + EMIT1(0xC3); /* retq */ | ||
118 | +#else | ||
119 | +# define RETPOLINE_RAX_BPF_JIT_SIZE 2 | ||
120 | +# define RETPOLINE_RAX_BPF_JIT() \ | ||
121 | + EMIT2(0xFF, 0xE0); /* jmp *%rax */ | ||
122 | +#endif | ||
123 | + | ||
124 | #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ | ||
125 | diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h | ||
126 | index 09cc06483bed..989db885de97 100644 | ||
127 | --- a/arch/x86/include/uapi/asm/kvm_para.h | ||
128 | +++ b/arch/x86/include/uapi/asm/kvm_para.h | ||
129 | @@ -25,6 +25,7 @@ | ||
130 | #define KVM_FEATURE_STEAL_TIME 5 | ||
131 | #define KVM_FEATURE_PV_EOI 6 | ||
132 | #define KVM_FEATURE_PV_UNHALT 7 | ||
133 | +#define KVM_FEATURE_ASYNC_PF_VMEXIT 10 | ||
134 | |||
135 | /* The last 8 bits are used to indicate how to interpret the flags field | ||
136 | * in pvclock structure. If no bits are set, all flags are ignored. | ||
137 | diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c | ||
138 | index b40ffbf156c1..0a93e83b774a 100644 | ||
139 | --- a/arch/x86/kernel/kvm.c | ||
140 | +++ b/arch/x86/kernel/kvm.c | ||
141 | @@ -341,10 +341,10 @@ static void kvm_guest_cpu_init(void) | ||
142 | #endif | ||
143 | pa |= KVM_ASYNC_PF_ENABLED; | ||
144 | |||
145 | - /* Async page fault support for L1 hypervisor is optional */ | ||
146 | - if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN, | ||
147 | - (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0) | ||
148 | - wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); | ||
149 | + if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT)) | ||
150 | + pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT; | ||
151 | + | ||
152 | + wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); | ||
153 | __this_cpu_write(apf_reason.enabled, 1); | ||
154 | printk(KERN_INFO"KVM setup async PF for cpu %d\n", | ||
155 | smp_processor_id()); | ||
156 | diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c | ||
157 | index 13f5d4217e4f..4f544f2a7b06 100644 | ||
158 | --- a/arch/x86/kvm/cpuid.c | ||
159 | +++ b/arch/x86/kvm/cpuid.c | ||
160 | @@ -597,7 +597,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | ||
161 | (1 << KVM_FEATURE_ASYNC_PF) | | ||
162 | (1 << KVM_FEATURE_PV_EOI) | | ||
163 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | | ||
164 | - (1 << KVM_FEATURE_PV_UNHALT); | ||
165 | + (1 << KVM_FEATURE_PV_UNHALT) | | ||
166 | + (1 << KVM_FEATURE_ASYNC_PF_VMEXIT); | ||
167 | |||
168 | if (sched_info_on()) | ||
169 | entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); | ||
170 | diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c | ||
171 | index 0554e8aef4d5..940aac70b4da 100644 | ||
172 | --- a/arch/x86/net/bpf_jit_comp.c | ||
173 | +++ b/arch/x86/net/bpf_jit_comp.c | ||
174 | @@ -13,6 +13,7 @@ | ||
175 | #include <linux/if_vlan.h> | ||
176 | #include <asm/cacheflush.h> | ||
177 | #include <asm/set_memory.h> | ||
178 | +#include <asm/nospec-branch.h> | ||
179 | #include <linux/bpf.h> | ||
180 | |||
181 | int bpf_jit_enable __read_mostly; | ||
182 | @@ -287,7 +288,7 @@ static void emit_bpf_tail_call(u8 **pprog) | ||
183 | EMIT2(0x89, 0xD2); /* mov edx, edx */ | ||
184 | EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ | ||
185 | offsetof(struct bpf_array, map.max_entries)); | ||
186 | -#define OFFSET1 43 /* number of bytes to jump */ | ||
187 | +#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */ | ||
188 | EMIT2(X86_JBE, OFFSET1); /* jbe out */ | ||
189 | label1 = cnt; | ||
190 | |||
191 | @@ -296,7 +297,7 @@ static void emit_bpf_tail_call(u8 **pprog) | ||
192 | */ | ||
193 | EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ | ||
194 | EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ | ||
195 | -#define OFFSET2 32 | ||
196 | +#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE) | ||
197 | EMIT2(X86_JA, OFFSET2); /* ja out */ | ||
198 | label2 = cnt; | ||
199 | EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ | ||
200 | @@ -310,7 +311,7 @@ static void emit_bpf_tail_call(u8 **pprog) | ||
201 | * goto out; | ||
202 | */ | ||
203 | EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ | ||
204 | -#define OFFSET3 10 | ||
205 | +#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) | ||
206 | EMIT2(X86_JE, OFFSET3); /* je out */ | ||
207 | label3 = cnt; | ||
208 | |||
209 | @@ -323,7 +324,7 @@ static void emit_bpf_tail_call(u8 **pprog) | ||
210 | * rdi == ctx (1st arg) | ||
211 | * rax == prog->bpf_func + prologue_size | ||
212 | */ | ||
213 | - EMIT2(0xFF, 0xE0); /* jmp rax */ | ||
214 | + RETPOLINE_RAX_BPF_JIT(); | ||
215 | |||
216 | /* out: */ | ||
217 | BUILD_BUG_ON(cnt - label1 != OFFSET1); | ||
218 | diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c | ||
219 | index 8027de465d47..f43b51452596 100644 | ||
220 | --- a/drivers/scsi/mpt3sas/mpt3sas_base.c | ||
221 | +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c | ||
222 | @@ -6289,14 +6289,14 @@ _base_reset_handler(struct MPT3SAS_ADAPTER *ioc, int reset_phase) | ||
223 | } | ||
224 | |||
225 | /** | ||
226 | - * _wait_for_commands_to_complete - reset controller | ||
227 | + * mpt3sas_wait_for_commands_to_complete - reset controller | ||
228 | * @ioc: Pointer to MPT_ADAPTER structure | ||
229 | * | ||
230 | * This function waiting(3s) for all pending commands to complete | ||
231 | * prior to putting controller in reset. | ||
232 | */ | ||
233 | -static void | ||
234 | -_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc) | ||
235 | +void | ||
236 | +mpt3sas_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc) | ||
237 | { | ||
238 | u32 ioc_state; | ||
239 | unsigned long flags; | ||
240 | @@ -6375,7 +6375,7 @@ mpt3sas_base_hard_reset_handler(struct MPT3SAS_ADAPTER *ioc, | ||
241 | is_fault = 1; | ||
242 | } | ||
243 | _base_reset_handler(ioc, MPT3_IOC_PRE_RESET); | ||
244 | - _wait_for_commands_to_complete(ioc); | ||
245 | + mpt3sas_wait_for_commands_to_complete(ioc); | ||
246 | _base_mask_interrupts(ioc); | ||
247 | r = _base_make_ioc_ready(ioc, type); | ||
248 | if (r) | ||
249 | diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h | ||
250 | index 60f42ca3954f..69022b10a3d8 100644 | ||
251 | --- a/drivers/scsi/mpt3sas/mpt3sas_base.h | ||
252 | +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h | ||
253 | @@ -1435,6 +1435,9 @@ void mpt3sas_base_update_missing_delay(struct MPT3SAS_ADAPTER *ioc, | ||
254 | |||
255 | int mpt3sas_port_enable(struct MPT3SAS_ADAPTER *ioc); | ||
256 | |||
257 | +void | ||
258 | +mpt3sas_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc); | ||
259 | + | ||
260 | |||
261 | /* scsih shared API */ | ||
262 | u8 mpt3sas_scsih_event_callback(struct MPT3SAS_ADAPTER *ioc, u8 msix_index, | ||
263 | diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c | ||
264 | index b258f210120a..741b0a28c2e3 100644 | ||
265 | --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c | ||
266 | +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c | ||
267 | @@ -2998,7 +2998,8 @@ scsih_abort(struct scsi_cmnd *scmd) | ||
268 | _scsih_tm_display_info(ioc, scmd); | ||
269 | |||
270 | sas_device_priv_data = scmd->device->hostdata; | ||
271 | - if (!sas_device_priv_data || !sas_device_priv_data->sas_target) { | ||
272 | + if (!sas_device_priv_data || !sas_device_priv_data->sas_target || | ||
273 | + ioc->remove_host) { | ||
274 | sdev_printk(KERN_INFO, scmd->device, | ||
275 | "device been deleted! scmd(%p)\n", scmd); | ||
276 | scmd->result = DID_NO_CONNECT << 16; | ||
277 | @@ -3060,7 +3061,8 @@ scsih_dev_reset(struct scsi_cmnd *scmd) | ||
278 | _scsih_tm_display_info(ioc, scmd); | ||
279 | |||
280 | sas_device_priv_data = scmd->device->hostdata; | ||
281 | - if (!sas_device_priv_data || !sas_device_priv_data->sas_target) { | ||
282 | + if (!sas_device_priv_data || !sas_device_priv_data->sas_target || | ||
283 | + ioc->remove_host) { | ||
284 | sdev_printk(KERN_INFO, scmd->device, | ||
285 | "device been deleted! scmd(%p)\n", scmd); | ||
286 | scmd->result = DID_NO_CONNECT << 16; | ||
287 | @@ -3122,7 +3124,8 @@ scsih_target_reset(struct scsi_cmnd *scmd) | ||
288 | _scsih_tm_display_info(ioc, scmd); | ||
289 | |||
290 | sas_device_priv_data = scmd->device->hostdata; | ||
291 | - if (!sas_device_priv_data || !sas_device_priv_data->sas_target) { | ||
292 | + if (!sas_device_priv_data || !sas_device_priv_data->sas_target || | ||
293 | + ioc->remove_host) { | ||
294 | starget_printk(KERN_INFO, starget, "target been deleted! scmd(%p)\n", | ||
295 | scmd); | ||
296 | scmd->result = DID_NO_CONNECT << 16; | ||
297 | @@ -3179,7 +3182,7 @@ scsih_host_reset(struct scsi_cmnd *scmd) | ||
298 | ioc->name, scmd); | ||
299 | scsi_print_command(scmd); | ||
300 | |||
301 | - if (ioc->is_driver_loading) { | ||
302 | + if (ioc->is_driver_loading || ioc->remove_host) { | ||
303 | pr_info(MPT3SAS_FMT "Blocking the host reset\n", | ||
304 | ioc->name); | ||
305 | r = FAILED; | ||
306 | @@ -4611,7 +4614,7 @@ _scsih_flush_running_cmds(struct MPT3SAS_ADAPTER *ioc) | ||
307 | _scsih_set_satl_pending(scmd, false); | ||
308 | mpt3sas_base_free_smid(ioc, smid); | ||
309 | scsi_dma_unmap(scmd); | ||
310 | - if (ioc->pci_error_recovery) | ||
311 | + if (ioc->pci_error_recovery || ioc->remove_host) | ||
312 | scmd->result = DID_NO_CONNECT << 16; | ||
313 | else | ||
314 | scmd->result = DID_RESET << 16; | ||
315 | @@ -9901,6 +9904,10 @@ static void scsih_remove(struct pci_dev *pdev) | ||
316 | unsigned long flags; | ||
317 | |||
318 | ioc->remove_host = 1; | ||
319 | + | ||
320 | + mpt3sas_wait_for_commands_to_complete(ioc); | ||
321 | + _scsih_flush_running_cmds(ioc); | ||
322 | + | ||
323 | _scsih_fw_event_cleanup_queue(ioc); | ||
324 | |||
325 | spin_lock_irqsave(&ioc->fw_event_lock, flags); | ||
326 | @@ -9977,6 +9984,10 @@ scsih_shutdown(struct pci_dev *pdev) | ||
327 | unsigned long flags; | ||
328 | |||
329 | ioc->remove_host = 1; | ||
330 | + | ||
331 | + mpt3sas_wait_for_commands_to_complete(ioc); | ||
332 | + _scsih_flush_running_cmds(ioc); | ||
333 | + | ||
334 | _scsih_fw_event_cleanup_queue(ioc); | ||
335 | |||
336 | spin_lock_irqsave(&ioc->fw_event_lock, flags); | ||
337 | diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c | ||
338 | index ab94d304a634..8596aa31c75e 100644 | ||
339 | --- a/kernel/bpf/arraymap.c | ||
340 | +++ b/kernel/bpf/arraymap.c | ||
341 | @@ -26,8 +26,10 @@ static void bpf_array_free_percpu(struct bpf_array *array) | ||
342 | { | ||
343 | int i; | ||
344 | |||
345 | - for (i = 0; i < array->map.max_entries; i++) | ||
346 | + for (i = 0; i < array->map.max_entries; i++) { | ||
347 | free_percpu(array->pptrs[i]); | ||
348 | + cond_resched(); | ||
349 | + } | ||
350 | } | ||
351 | |||
352 | static int bpf_array_alloc_percpu(struct bpf_array *array) | ||
353 | @@ -43,6 +45,7 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) | ||
354 | return -ENOMEM; | ||
355 | } | ||
356 | array->pptrs[i] = ptr; | ||
357 | + cond_resched(); | ||
358 | } | ||
359 | |||
360 | return 0; | ||
361 | @@ -52,11 +55,11 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) | ||
362 | static struct bpf_map *array_map_alloc(union bpf_attr *attr) | ||
363 | { | ||
364 | bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; | ||
365 | - int numa_node = bpf_map_attr_numa_node(attr); | ||
366 | + int ret, numa_node = bpf_map_attr_numa_node(attr); | ||
367 | u32 elem_size, index_mask, max_entries; | ||
368 | bool unpriv = !capable(CAP_SYS_ADMIN); | ||
369 | + u64 cost, array_size, mask64; | ||
370 | struct bpf_array *array; | ||
371 | - u64 array_size, mask64; | ||
372 | |||
373 | /* check sanity of attributes */ | ||
374 | if (attr->max_entries == 0 || attr->key_size != 4 || | ||
375 | @@ -101,8 +104,19 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) | ||
376 | array_size += (u64) max_entries * elem_size; | ||
377 | |||
378 | /* make sure there is no u32 overflow later in round_up() */ | ||
379 | - if (array_size >= U32_MAX - PAGE_SIZE) | ||
380 | + cost = array_size; | ||
381 | + if (cost >= U32_MAX - PAGE_SIZE) | ||
382 | return ERR_PTR(-ENOMEM); | ||
383 | + if (percpu) { | ||
384 | + cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); | ||
385 | + if (cost >= U32_MAX - PAGE_SIZE) | ||
386 | + return ERR_PTR(-ENOMEM); | ||
387 | + } | ||
388 | + cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; | ||
389 | + | ||
390 | + ret = bpf_map_precharge_memlock(cost); | ||
391 | + if (ret < 0) | ||
392 | + return ERR_PTR(ret); | ||
393 | |||
394 | /* allocate all map elements and zero-initialize them */ | ||
395 | array = bpf_map_area_alloc(array_size, numa_node); | ||
396 | @@ -118,20 +132,13 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) | ||
397 | array->map.max_entries = attr->max_entries; | ||
398 | array->map.map_flags = attr->map_flags; | ||
399 | array->map.numa_node = numa_node; | ||
400 | + array->map.pages = cost; | ||
401 | array->elem_size = elem_size; | ||
402 | |||
403 | - if (!percpu) | ||
404 | - goto out; | ||
405 | - | ||
406 | - array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); | ||
407 | - | ||
408 | - if (array_size >= U32_MAX - PAGE_SIZE || | ||
409 | - bpf_array_alloc_percpu(array)) { | ||
410 | + if (percpu && bpf_array_alloc_percpu(array)) { | ||
411 | bpf_map_area_free(array); | ||
412 | return ERR_PTR(-ENOMEM); | ||
413 | } | ||
414 | -out: | ||
415 | - array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT; | ||
416 | |||
417 | return &array->map; | ||
418 | } | ||
419 | diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c | ||
420 | index 885e45479680..424f89ac4adc 100644 | ||
421 | --- a/kernel/bpf/lpm_trie.c | ||
422 | +++ b/kernel/bpf/lpm_trie.c | ||
423 | @@ -560,7 +560,10 @@ static void trie_free(struct bpf_map *map) | ||
424 | struct lpm_trie_node __rcu **slot; | ||
425 | struct lpm_trie_node *node; | ||
426 | |||
427 | - raw_spin_lock(&trie->lock); | ||
428 | + /* Wait for outstanding programs to complete | ||
429 | + * update/lookup/delete/get_next_key and free the trie. | ||
430 | + */ | ||
431 | + synchronize_rcu(); | ||
432 | |||
433 | /* Always start at the root and walk down to a node that has no | ||
434 | * children. Then free that node, nullify its reference in the parent | ||
435 | @@ -571,10 +574,9 @@ static void trie_free(struct bpf_map *map) | ||
436 | slot = &trie->root; | ||
437 | |||
438 | for (;;) { | ||
439 | - node = rcu_dereference_protected(*slot, | ||
440 | - lockdep_is_held(&trie->lock)); | ||
441 | + node = rcu_dereference_protected(*slot, 1); | ||
442 | if (!node) | ||
443 | - goto unlock; | ||
444 | + goto out; | ||
445 | |||
446 | if (rcu_access_pointer(node->child[0])) { | ||
447 | slot = &node->child[0]; | ||
448 | @@ -592,8 +594,8 @@ static void trie_free(struct bpf_map *map) | ||
449 | } | ||
450 | } | ||
451 | |||
452 | -unlock: | ||
453 | - raw_spin_unlock(&trie->lock); | ||
454 | +out: | ||
455 | + kfree(trie); | ||
456 | } | ||
457 | |||
458 | static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key) | ||
459 | diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c | ||
460 | index 13551e623501..7125ddbb24df 100644 | ||
461 | --- a/kernel/bpf/verifier.c | ||
462 | +++ b/kernel/bpf/verifier.c | ||
463 | @@ -985,6 +985,13 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) | ||
464 | return reg->type == PTR_TO_CTX; | ||
465 | } | ||
466 | |||
467 | +static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) | ||
468 | +{ | ||
469 | + const struct bpf_reg_state *reg = cur_regs(env) + regno; | ||
470 | + | ||
471 | + return type_is_pkt_pointer(reg->type); | ||
472 | +} | ||
473 | + | ||
474 | static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, | ||
475 | const struct bpf_reg_state *reg, | ||
476 | int off, int size, bool strict) | ||
477 | @@ -1045,10 +1052,10 @@ static int check_generic_ptr_alignment(struct bpf_verifier_env *env, | ||
478 | } | ||
479 | |||
480 | static int check_ptr_alignment(struct bpf_verifier_env *env, | ||
481 | - const struct bpf_reg_state *reg, | ||
482 | - int off, int size) | ||
483 | + const struct bpf_reg_state *reg, int off, | ||
484 | + int size, bool strict_alignment_once) | ||
485 | { | ||
486 | - bool strict = env->strict_alignment; | ||
487 | + bool strict = env->strict_alignment || strict_alignment_once; | ||
488 | const char *pointer_desc = ""; | ||
489 | |||
490 | switch (reg->type) { | ||
491 | @@ -1108,9 +1115,9 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) | ||
492 | * if t==write && value_regno==-1, some unknown value is stored into memory | ||
493 | * if t==read && value_regno==-1, don't care what we read from memory | ||
494 | */ | ||
495 | -static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off, | ||
496 | - int bpf_size, enum bpf_access_type t, | ||
497 | - int value_regno) | ||
498 | +static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, | ||
499 | + int off, int bpf_size, enum bpf_access_type t, | ||
500 | + int value_regno, bool strict_alignment_once) | ||
501 | { | ||
502 | struct bpf_verifier_state *state = env->cur_state; | ||
503 | struct bpf_reg_state *regs = cur_regs(env); | ||
504 | @@ -1122,7 +1129,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn | ||
505 | return size; | ||
506 | |||
507 | /* alignment checks will add in reg->off themselves */ | ||
508 | - err = check_ptr_alignment(env, reg, off, size); | ||
509 | + err = check_ptr_alignment(env, reg, off, size, strict_alignment_once); | ||
510 | if (err) | ||
511 | return err; | ||
512 | |||
513 | @@ -1265,21 +1272,23 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins | ||
514 | return -EACCES; | ||
515 | } | ||
516 | |||
517 | - if (is_ctx_reg(env, insn->dst_reg)) { | ||
518 | - verbose(env, "BPF_XADD stores into R%d context is not allowed\n", | ||
519 | - insn->dst_reg); | ||
520 | + if (is_ctx_reg(env, insn->dst_reg) || | ||
521 | + is_pkt_reg(env, insn->dst_reg)) { | ||
522 | + verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", | ||
523 | + insn->dst_reg, is_ctx_reg(env, insn->dst_reg) ? | ||
524 | + "context" : "packet"); | ||
525 | return -EACCES; | ||
526 | } | ||
527 | |||
528 | /* check whether atomic_add can read the memory */ | ||
529 | err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, | ||
530 | - BPF_SIZE(insn->code), BPF_READ, -1); | ||
531 | + BPF_SIZE(insn->code), BPF_READ, -1, true); | ||
532 | if (err) | ||
533 | return err; | ||
534 | |||
535 | /* check whether atomic_add can write into the same memory */ | ||
536 | return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, | ||
537 | - BPF_SIZE(insn->code), BPF_WRITE, -1); | ||
538 | + BPF_SIZE(insn->code), BPF_WRITE, -1, true); | ||
539 | } | ||
540 | |||
541 | /* Does this register contain a constant zero? */ | ||
542 | @@ -1763,7 +1772,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) | ||
543 | * is inferred from register state. | ||
544 | */ | ||
545 | for (i = 0; i < meta.access_size; i++) { | ||
546 | - err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1); | ||
547 | + err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, | ||
548 | + BPF_WRITE, -1, false); | ||
549 | if (err) | ||
550 | return err; | ||
551 | } | ||
552 | @@ -3933,7 +3943,7 @@ static int do_check(struct bpf_verifier_env *env) | ||
553 | */ | ||
554 | err = check_mem_access(env, insn_idx, insn->src_reg, insn->off, | ||
555 | BPF_SIZE(insn->code), BPF_READ, | ||
556 | - insn->dst_reg); | ||
557 | + insn->dst_reg, false); | ||
558 | if (err) | ||
559 | return err; | ||
560 | |||
561 | @@ -3985,7 +3995,7 @@ static int do_check(struct bpf_verifier_env *env) | ||
562 | /* check that memory (dst_reg + off) is writeable */ | ||
563 | err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, | ||
564 | BPF_SIZE(insn->code), BPF_WRITE, | ||
565 | - insn->src_reg); | ||
566 | + insn->src_reg, false); | ||
567 | if (err) | ||
568 | return err; | ||
569 | |||
570 | @@ -4020,7 +4030,7 @@ static int do_check(struct bpf_verifier_env *env) | ||
571 | /* check that memory (dst_reg + off) is writeable */ | ||
572 | err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, | ||
573 | BPF_SIZE(insn->code), BPF_WRITE, | ||
574 | - -1); | ||
575 | + -1, false); | ||
576 | if (err) | ||
577 | return err; | ||
578 | |||
579 | diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c | ||
580 | index 5ed4175c4ff8..0694527acaa0 100644 | ||
581 | --- a/tools/testing/selftests/bpf/test_verifier.c | ||
582 | +++ b/tools/testing/selftests/bpf/test_verifier.c | ||
583 | @@ -2254,6 +2254,32 @@ static struct bpf_test tests[] = { | ||
584 | .result_unpriv = REJECT, | ||
585 | .result = ACCEPT, | ||
586 | }, | ||
587 | + { | ||
588 | + "runtime/jit: pass negative index to tail_call", | ||
589 | + .insns = { | ||
590 | + BPF_MOV64_IMM(BPF_REG_3, -1), | ||
591 | + BPF_LD_MAP_FD(BPF_REG_2, 0), | ||
592 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
593 | + BPF_FUNC_tail_call), | ||
594 | + BPF_MOV64_IMM(BPF_REG_0, 0), | ||
595 | + BPF_EXIT_INSN(), | ||
596 | + }, | ||
597 | + .fixup_prog = { 1 }, | ||
598 | + .result = ACCEPT, | ||
599 | + }, | ||
600 | + { | ||
601 | + "runtime/jit: pass > 32bit index to tail_call", | ||
602 | + .insns = { | ||
603 | + BPF_LD_IMM64(BPF_REG_3, 0x100000000ULL), | ||
604 | + BPF_LD_MAP_FD(BPF_REG_2, 0), | ||
605 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
606 | + BPF_FUNC_tail_call), | ||
607 | + BPF_MOV64_IMM(BPF_REG_0, 0), | ||
608 | + BPF_EXIT_INSN(), | ||
609 | + }, | ||
610 | + .fixup_prog = { 2 }, | ||
611 | + .result = ACCEPT, | ||
612 | + }, | ||
613 | { | ||
614 | "stack pointer arithmetic", | ||
615 | .insns = { | ||
616 | @@ -8826,6 +8852,64 @@ static struct bpf_test tests[] = { | ||
617 | .result = REJECT, | ||
618 | .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, | ||
619 | }, | ||
620 | + { | ||
621 | + "xadd/w check unaligned stack", | ||
622 | + .insns = { | ||
623 | + BPF_MOV64_IMM(BPF_REG_0, 1), | ||
624 | + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), | ||
625 | + BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -7), | ||
626 | + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), | ||
627 | + BPF_EXIT_INSN(), | ||
628 | + }, | ||
629 | + .result = REJECT, | ||
630 | + .errstr = "misaligned stack access off", | ||
631 | + .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
632 | + }, | ||
633 | + { | ||
634 | + "xadd/w check unaligned map", | ||
635 | + .insns = { | ||
636 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), | ||
637 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), | ||
638 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), | ||
639 | + BPF_LD_MAP_FD(BPF_REG_1, 0), | ||
640 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, | ||
641 | + BPF_FUNC_map_lookup_elem), | ||
642 | + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), | ||
643 | + BPF_EXIT_INSN(), | ||
644 | + BPF_MOV64_IMM(BPF_REG_1, 1), | ||
645 | + BPF_STX_XADD(BPF_W, BPF_REG_0, BPF_REG_1, 3), | ||
646 | + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3), | ||
647 | + BPF_EXIT_INSN(), | ||
648 | + }, | ||
649 | + .fixup_map1 = { 3 }, | ||
650 | + .result = REJECT, | ||
651 | + .errstr = "misaligned value access off", | ||
652 | + .prog_type = BPF_PROG_TYPE_SCHED_CLS, | ||
653 | + }, | ||
654 | + { | ||
655 | + "xadd/w check unaligned pkt", | ||
656 | + .insns = { | ||
657 | + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, | ||
658 | + offsetof(struct xdp_md, data)), | ||
659 | + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, | ||
660 | + offsetof(struct xdp_md, data_end)), | ||
661 | + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), | ||
662 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), | ||
663 | + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 2), | ||
664 | + BPF_MOV64_IMM(BPF_REG_0, 99), | ||
665 | + BPF_JMP_IMM(BPF_JA, 0, 0, 6), | ||
666 | + BPF_MOV64_IMM(BPF_REG_0, 1), | ||
667 | + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), | ||
668 | + BPF_ST_MEM(BPF_W, BPF_REG_2, 3, 0), | ||
669 | + BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 1), | ||
670 | + BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 2), | ||
671 | + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 1), | ||
672 | + BPF_EXIT_INSN(), | ||
673 | + }, | ||
674 | + .result = REJECT, | ||
675 | + .errstr = "BPF_XADD stores into R2 packet", | ||
676 | + .prog_type = BPF_PROG_TYPE_XDP, | ||
677 | + }, | ||
678 | }; | ||
679 | |||
680 | static int probe_filter_length(const struct bpf_insn *fp) |