Contents of /trunk/kernel-magellan/patches-4.15/0108-4.15.9-all-fixes.patch
Parent Directory | Revision Log
Revision 3092 -
(show annotations)
(download)
Wed Mar 21 14:52:46 2018 UTC (6 years, 7 months ago) by niro
File size: 24210 byte(s)
Wed Mar 21 14:52:46 2018 UTC (6 years, 7 months ago) by niro
File size: 24210 byte(s)
-linux-4.15.9
1 | diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt |
2 | index 3c65feb83010..a81c97a4b4a5 100644 |
3 | --- a/Documentation/virtual/kvm/cpuid.txt |
4 | +++ b/Documentation/virtual/kvm/cpuid.txt |
5 | @@ -54,6 +54,10 @@ KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit |
6 | || || before enabling paravirtualized |
7 | || || spinlock support. |
8 | ------------------------------------------------------------------------------ |
9 | +KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit |
10 | + || || can be enabled by setting bit 2 |
11 | + || || when writing to msr 0x4b564d02 |
12 | +------------------------------------------------------------------------------ |
13 | KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side |
14 | || || per-cpu warps are expected in |
15 | || || kvmclock. |
16 | diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt |
17 | index 1ebecc115dc6..f3f0d57ced8e 100644 |
18 | --- a/Documentation/virtual/kvm/msr.txt |
19 | +++ b/Documentation/virtual/kvm/msr.txt |
20 | @@ -170,7 +170,8 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02 |
21 | when asynchronous page faults are enabled on the vcpu 0 when |
22 | disabled. Bit 1 is 1 if asynchronous page faults can be injected |
23 | when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults |
24 | - are delivered to L1 as #PF vmexits. |
25 | + are delivered to L1 as #PF vmexits. Bit 2 can be set only if |
26 | + KVM_FEATURE_ASYNC_PF_VMEXIT is present in CPUID. |
27 | |
28 | First 4 byte of 64 byte memory location will be written to by |
29 | the hypervisor at the time of asynchronous page fault (APF) |
30 | diff --git a/Makefile b/Makefile |
31 | index eb18d200a603..0420f9a0c70f 100644 |
32 | --- a/Makefile |
33 | +++ b/Makefile |
34 | @@ -1,7 +1,7 @@ |
35 | # SPDX-License-Identifier: GPL-2.0 |
36 | VERSION = 4 |
37 | PATCHLEVEL = 15 |
38 | -SUBLEVEL = 8 |
39 | +SUBLEVEL = 9 |
40 | EXTRAVERSION = |
41 | NAME = Fearless Coyote |
42 | |
43 | diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c |
44 | index bb32f7f6dd0f..be155f70f108 100644 |
45 | --- a/arch/arm64/net/bpf_jit_comp.c |
46 | +++ b/arch/arm64/net/bpf_jit_comp.c |
47 | @@ -238,8 +238,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) |
48 | off = offsetof(struct bpf_array, map.max_entries); |
49 | emit_a64_mov_i64(tmp, off, ctx); |
50 | emit(A64_LDR32(tmp, r2, tmp), ctx); |
51 | + emit(A64_MOV(0, r3, r3), ctx); |
52 | emit(A64_CMP(0, r3, tmp), ctx); |
53 | - emit(A64_B_(A64_COND_GE, jmp_offset), ctx); |
54 | + emit(A64_B_(A64_COND_CS, jmp_offset), ctx); |
55 | |
56 | /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) |
57 | * goto out; |
58 | @@ -247,7 +248,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) |
59 | */ |
60 | emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx); |
61 | emit(A64_CMP(1, tcc, tmp), ctx); |
62 | - emit(A64_B_(A64_COND_GT, jmp_offset), ctx); |
63 | + emit(A64_B_(A64_COND_HI, jmp_offset), ctx); |
64 | emit(A64_ADD_I(1, tcc, tcc, 1), ctx); |
65 | |
66 | /* prog = array->ptrs[index]; |
67 | diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c |
68 | index d183b4801bdb..35591fb09042 100644 |
69 | --- a/arch/powerpc/net/bpf_jit_comp64.c |
70 | +++ b/arch/powerpc/net/bpf_jit_comp64.c |
71 | @@ -242,6 +242,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 |
72 | * goto out; |
73 | */ |
74 | PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)); |
75 | + PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31); |
76 | PPC_CMPLW(b2p_index, b2p[TMP_REG_1]); |
77 | PPC_BCC(COND_GE, out); |
78 | |
79 | diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h |
80 | index 76b058533e47..81a1be326571 100644 |
81 | --- a/arch/x86/include/asm/nospec-branch.h |
82 | +++ b/arch/x86/include/asm/nospec-branch.h |
83 | @@ -177,4 +177,41 @@ static inline void indirect_branch_prediction_barrier(void) |
84 | } |
85 | |
86 | #endif /* __ASSEMBLY__ */ |
87 | + |
88 | +/* |
89 | + * Below is used in the eBPF JIT compiler and emits the byte sequence |
90 | + * for the following assembly: |
91 | + * |
92 | + * With retpolines configured: |
93 | + * |
94 | + * callq do_rop |
95 | + * spec_trap: |
96 | + * pause |
97 | + * lfence |
98 | + * jmp spec_trap |
99 | + * do_rop: |
100 | + * mov %rax,(%rsp) |
101 | + * retq |
102 | + * |
103 | + * Without retpolines configured: |
104 | + * |
105 | + * jmp *%rax |
106 | + */ |
107 | +#ifdef CONFIG_RETPOLINE |
108 | +# define RETPOLINE_RAX_BPF_JIT_SIZE 17 |
109 | +# define RETPOLINE_RAX_BPF_JIT() \ |
110 | + EMIT1_off32(0xE8, 7); /* callq do_rop */ \ |
111 | + /* spec_trap: */ \ |
112 | + EMIT2(0xF3, 0x90); /* pause */ \ |
113 | + EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ |
114 | + EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ |
115 | + /* do_rop: */ \ |
116 | + EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ |
117 | + EMIT1(0xC3); /* retq */ |
118 | +#else |
119 | +# define RETPOLINE_RAX_BPF_JIT_SIZE 2 |
120 | +# define RETPOLINE_RAX_BPF_JIT() \ |
121 | + EMIT2(0xFF, 0xE0); /* jmp *%rax */ |
122 | +#endif |
123 | + |
124 | #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ |
125 | diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h |
126 | index 09cc06483bed..989db885de97 100644 |
127 | --- a/arch/x86/include/uapi/asm/kvm_para.h |
128 | +++ b/arch/x86/include/uapi/asm/kvm_para.h |
129 | @@ -25,6 +25,7 @@ |
130 | #define KVM_FEATURE_STEAL_TIME 5 |
131 | #define KVM_FEATURE_PV_EOI 6 |
132 | #define KVM_FEATURE_PV_UNHALT 7 |
133 | +#define KVM_FEATURE_ASYNC_PF_VMEXIT 10 |
134 | |
135 | /* The last 8 bits are used to indicate how to interpret the flags field |
136 | * in pvclock structure. If no bits are set, all flags are ignored. |
137 | diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c |
138 | index b40ffbf156c1..0a93e83b774a 100644 |
139 | --- a/arch/x86/kernel/kvm.c |
140 | +++ b/arch/x86/kernel/kvm.c |
141 | @@ -341,10 +341,10 @@ static void kvm_guest_cpu_init(void) |
142 | #endif |
143 | pa |= KVM_ASYNC_PF_ENABLED; |
144 | |
145 | - /* Async page fault support for L1 hypervisor is optional */ |
146 | - if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN, |
147 | - (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0) |
148 | - wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); |
149 | + if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT)) |
150 | + pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT; |
151 | + |
152 | + wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); |
153 | __this_cpu_write(apf_reason.enabled, 1); |
154 | printk(KERN_INFO"KVM setup async PF for cpu %d\n", |
155 | smp_processor_id()); |
156 | diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c |
157 | index 13f5d4217e4f..4f544f2a7b06 100644 |
158 | --- a/arch/x86/kvm/cpuid.c |
159 | +++ b/arch/x86/kvm/cpuid.c |
160 | @@ -597,7 +597,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
161 | (1 << KVM_FEATURE_ASYNC_PF) | |
162 | (1 << KVM_FEATURE_PV_EOI) | |
163 | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | |
164 | - (1 << KVM_FEATURE_PV_UNHALT); |
165 | + (1 << KVM_FEATURE_PV_UNHALT) | |
166 | + (1 << KVM_FEATURE_ASYNC_PF_VMEXIT); |
167 | |
168 | if (sched_info_on()) |
169 | entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); |
170 | diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c |
171 | index 0554e8aef4d5..940aac70b4da 100644 |
172 | --- a/arch/x86/net/bpf_jit_comp.c |
173 | +++ b/arch/x86/net/bpf_jit_comp.c |
174 | @@ -13,6 +13,7 @@ |
175 | #include <linux/if_vlan.h> |
176 | #include <asm/cacheflush.h> |
177 | #include <asm/set_memory.h> |
178 | +#include <asm/nospec-branch.h> |
179 | #include <linux/bpf.h> |
180 | |
181 | int bpf_jit_enable __read_mostly; |
182 | @@ -287,7 +288,7 @@ static void emit_bpf_tail_call(u8 **pprog) |
183 | EMIT2(0x89, 0xD2); /* mov edx, edx */ |
184 | EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ |
185 | offsetof(struct bpf_array, map.max_entries)); |
186 | -#define OFFSET1 43 /* number of bytes to jump */ |
187 | +#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */ |
188 | EMIT2(X86_JBE, OFFSET1); /* jbe out */ |
189 | label1 = cnt; |
190 | |
191 | @@ -296,7 +297,7 @@ static void emit_bpf_tail_call(u8 **pprog) |
192 | */ |
193 | EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ |
194 | EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ |
195 | -#define OFFSET2 32 |
196 | +#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE) |
197 | EMIT2(X86_JA, OFFSET2); /* ja out */ |
198 | label2 = cnt; |
199 | EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ |
200 | @@ -310,7 +311,7 @@ static void emit_bpf_tail_call(u8 **pprog) |
201 | * goto out; |
202 | */ |
203 | EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ |
204 | -#define OFFSET3 10 |
205 | +#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) |
206 | EMIT2(X86_JE, OFFSET3); /* je out */ |
207 | label3 = cnt; |
208 | |
209 | @@ -323,7 +324,7 @@ static void emit_bpf_tail_call(u8 **pprog) |
210 | * rdi == ctx (1st arg) |
211 | * rax == prog->bpf_func + prologue_size |
212 | */ |
213 | - EMIT2(0xFF, 0xE0); /* jmp rax */ |
214 | + RETPOLINE_RAX_BPF_JIT(); |
215 | |
216 | /* out: */ |
217 | BUILD_BUG_ON(cnt - label1 != OFFSET1); |
218 | diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c |
219 | index 8027de465d47..f43b51452596 100644 |
220 | --- a/drivers/scsi/mpt3sas/mpt3sas_base.c |
221 | +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c |
222 | @@ -6289,14 +6289,14 @@ _base_reset_handler(struct MPT3SAS_ADAPTER *ioc, int reset_phase) |
223 | } |
224 | |
225 | /** |
226 | - * _wait_for_commands_to_complete - reset controller |
227 | + * mpt3sas_wait_for_commands_to_complete - reset controller |
228 | * @ioc: Pointer to MPT_ADAPTER structure |
229 | * |
230 | * This function waiting(3s) for all pending commands to complete |
231 | * prior to putting controller in reset. |
232 | */ |
233 | -static void |
234 | -_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc) |
235 | +void |
236 | +mpt3sas_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc) |
237 | { |
238 | u32 ioc_state; |
239 | unsigned long flags; |
240 | @@ -6375,7 +6375,7 @@ mpt3sas_base_hard_reset_handler(struct MPT3SAS_ADAPTER *ioc, |
241 | is_fault = 1; |
242 | } |
243 | _base_reset_handler(ioc, MPT3_IOC_PRE_RESET); |
244 | - _wait_for_commands_to_complete(ioc); |
245 | + mpt3sas_wait_for_commands_to_complete(ioc); |
246 | _base_mask_interrupts(ioc); |
247 | r = _base_make_ioc_ready(ioc, type); |
248 | if (r) |
249 | diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h |
250 | index 60f42ca3954f..69022b10a3d8 100644 |
251 | --- a/drivers/scsi/mpt3sas/mpt3sas_base.h |
252 | +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h |
253 | @@ -1435,6 +1435,9 @@ void mpt3sas_base_update_missing_delay(struct MPT3SAS_ADAPTER *ioc, |
254 | |
255 | int mpt3sas_port_enable(struct MPT3SAS_ADAPTER *ioc); |
256 | |
257 | +void |
258 | +mpt3sas_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc); |
259 | + |
260 | |
261 | /* scsih shared API */ |
262 | u8 mpt3sas_scsih_event_callback(struct MPT3SAS_ADAPTER *ioc, u8 msix_index, |
263 | diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c |
264 | index b258f210120a..741b0a28c2e3 100644 |
265 | --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c |
266 | +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c |
267 | @@ -2998,7 +2998,8 @@ scsih_abort(struct scsi_cmnd *scmd) |
268 | _scsih_tm_display_info(ioc, scmd); |
269 | |
270 | sas_device_priv_data = scmd->device->hostdata; |
271 | - if (!sas_device_priv_data || !sas_device_priv_data->sas_target) { |
272 | + if (!sas_device_priv_data || !sas_device_priv_data->sas_target || |
273 | + ioc->remove_host) { |
274 | sdev_printk(KERN_INFO, scmd->device, |
275 | "device been deleted! scmd(%p)\n", scmd); |
276 | scmd->result = DID_NO_CONNECT << 16; |
277 | @@ -3060,7 +3061,8 @@ scsih_dev_reset(struct scsi_cmnd *scmd) |
278 | _scsih_tm_display_info(ioc, scmd); |
279 | |
280 | sas_device_priv_data = scmd->device->hostdata; |
281 | - if (!sas_device_priv_data || !sas_device_priv_data->sas_target) { |
282 | + if (!sas_device_priv_data || !sas_device_priv_data->sas_target || |
283 | + ioc->remove_host) { |
284 | sdev_printk(KERN_INFO, scmd->device, |
285 | "device been deleted! scmd(%p)\n", scmd); |
286 | scmd->result = DID_NO_CONNECT << 16; |
287 | @@ -3122,7 +3124,8 @@ scsih_target_reset(struct scsi_cmnd *scmd) |
288 | _scsih_tm_display_info(ioc, scmd); |
289 | |
290 | sas_device_priv_data = scmd->device->hostdata; |
291 | - if (!sas_device_priv_data || !sas_device_priv_data->sas_target) { |
292 | + if (!sas_device_priv_data || !sas_device_priv_data->sas_target || |
293 | + ioc->remove_host) { |
294 | starget_printk(KERN_INFO, starget, "target been deleted! scmd(%p)\n", |
295 | scmd); |
296 | scmd->result = DID_NO_CONNECT << 16; |
297 | @@ -3179,7 +3182,7 @@ scsih_host_reset(struct scsi_cmnd *scmd) |
298 | ioc->name, scmd); |
299 | scsi_print_command(scmd); |
300 | |
301 | - if (ioc->is_driver_loading) { |
302 | + if (ioc->is_driver_loading || ioc->remove_host) { |
303 | pr_info(MPT3SAS_FMT "Blocking the host reset\n", |
304 | ioc->name); |
305 | r = FAILED; |
306 | @@ -4611,7 +4614,7 @@ _scsih_flush_running_cmds(struct MPT3SAS_ADAPTER *ioc) |
307 | _scsih_set_satl_pending(scmd, false); |
308 | mpt3sas_base_free_smid(ioc, smid); |
309 | scsi_dma_unmap(scmd); |
310 | - if (ioc->pci_error_recovery) |
311 | + if (ioc->pci_error_recovery || ioc->remove_host) |
312 | scmd->result = DID_NO_CONNECT << 16; |
313 | else |
314 | scmd->result = DID_RESET << 16; |
315 | @@ -9901,6 +9904,10 @@ static void scsih_remove(struct pci_dev *pdev) |
316 | unsigned long flags; |
317 | |
318 | ioc->remove_host = 1; |
319 | + |
320 | + mpt3sas_wait_for_commands_to_complete(ioc); |
321 | + _scsih_flush_running_cmds(ioc); |
322 | + |
323 | _scsih_fw_event_cleanup_queue(ioc); |
324 | |
325 | spin_lock_irqsave(&ioc->fw_event_lock, flags); |
326 | @@ -9977,6 +9984,10 @@ scsih_shutdown(struct pci_dev *pdev) |
327 | unsigned long flags; |
328 | |
329 | ioc->remove_host = 1; |
330 | + |
331 | + mpt3sas_wait_for_commands_to_complete(ioc); |
332 | + _scsih_flush_running_cmds(ioc); |
333 | + |
334 | _scsih_fw_event_cleanup_queue(ioc); |
335 | |
336 | spin_lock_irqsave(&ioc->fw_event_lock, flags); |
337 | diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c |
338 | index ab94d304a634..8596aa31c75e 100644 |
339 | --- a/kernel/bpf/arraymap.c |
340 | +++ b/kernel/bpf/arraymap.c |
341 | @@ -26,8 +26,10 @@ static void bpf_array_free_percpu(struct bpf_array *array) |
342 | { |
343 | int i; |
344 | |
345 | - for (i = 0; i < array->map.max_entries; i++) |
346 | + for (i = 0; i < array->map.max_entries; i++) { |
347 | free_percpu(array->pptrs[i]); |
348 | + cond_resched(); |
349 | + } |
350 | } |
351 | |
352 | static int bpf_array_alloc_percpu(struct bpf_array *array) |
353 | @@ -43,6 +45,7 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) |
354 | return -ENOMEM; |
355 | } |
356 | array->pptrs[i] = ptr; |
357 | + cond_resched(); |
358 | } |
359 | |
360 | return 0; |
361 | @@ -52,11 +55,11 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) |
362 | static struct bpf_map *array_map_alloc(union bpf_attr *attr) |
363 | { |
364 | bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; |
365 | - int numa_node = bpf_map_attr_numa_node(attr); |
366 | + int ret, numa_node = bpf_map_attr_numa_node(attr); |
367 | u32 elem_size, index_mask, max_entries; |
368 | bool unpriv = !capable(CAP_SYS_ADMIN); |
369 | + u64 cost, array_size, mask64; |
370 | struct bpf_array *array; |
371 | - u64 array_size, mask64; |
372 | |
373 | /* check sanity of attributes */ |
374 | if (attr->max_entries == 0 || attr->key_size != 4 || |
375 | @@ -101,8 +104,19 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) |
376 | array_size += (u64) max_entries * elem_size; |
377 | |
378 | /* make sure there is no u32 overflow later in round_up() */ |
379 | - if (array_size >= U32_MAX - PAGE_SIZE) |
380 | + cost = array_size; |
381 | + if (cost >= U32_MAX - PAGE_SIZE) |
382 | return ERR_PTR(-ENOMEM); |
383 | + if (percpu) { |
384 | + cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); |
385 | + if (cost >= U32_MAX - PAGE_SIZE) |
386 | + return ERR_PTR(-ENOMEM); |
387 | + } |
388 | + cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; |
389 | + |
390 | + ret = bpf_map_precharge_memlock(cost); |
391 | + if (ret < 0) |
392 | + return ERR_PTR(ret); |
393 | |
394 | /* allocate all map elements and zero-initialize them */ |
395 | array = bpf_map_area_alloc(array_size, numa_node); |
396 | @@ -118,20 +132,13 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) |
397 | array->map.max_entries = attr->max_entries; |
398 | array->map.map_flags = attr->map_flags; |
399 | array->map.numa_node = numa_node; |
400 | + array->map.pages = cost; |
401 | array->elem_size = elem_size; |
402 | |
403 | - if (!percpu) |
404 | - goto out; |
405 | - |
406 | - array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); |
407 | - |
408 | - if (array_size >= U32_MAX - PAGE_SIZE || |
409 | - bpf_array_alloc_percpu(array)) { |
410 | + if (percpu && bpf_array_alloc_percpu(array)) { |
411 | bpf_map_area_free(array); |
412 | return ERR_PTR(-ENOMEM); |
413 | } |
414 | -out: |
415 | - array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT; |
416 | |
417 | return &array->map; |
418 | } |
419 | diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c |
420 | index 885e45479680..424f89ac4adc 100644 |
421 | --- a/kernel/bpf/lpm_trie.c |
422 | +++ b/kernel/bpf/lpm_trie.c |
423 | @@ -560,7 +560,10 @@ static void trie_free(struct bpf_map *map) |
424 | struct lpm_trie_node __rcu **slot; |
425 | struct lpm_trie_node *node; |
426 | |
427 | - raw_spin_lock(&trie->lock); |
428 | + /* Wait for outstanding programs to complete |
429 | + * update/lookup/delete/get_next_key and free the trie. |
430 | + */ |
431 | + synchronize_rcu(); |
432 | |
433 | /* Always start at the root and walk down to a node that has no |
434 | * children. Then free that node, nullify its reference in the parent |
435 | @@ -571,10 +574,9 @@ static void trie_free(struct bpf_map *map) |
436 | slot = &trie->root; |
437 | |
438 | for (;;) { |
439 | - node = rcu_dereference_protected(*slot, |
440 | - lockdep_is_held(&trie->lock)); |
441 | + node = rcu_dereference_protected(*slot, 1); |
442 | if (!node) |
443 | - goto unlock; |
444 | + goto out; |
445 | |
446 | if (rcu_access_pointer(node->child[0])) { |
447 | slot = &node->child[0]; |
448 | @@ -592,8 +594,8 @@ static void trie_free(struct bpf_map *map) |
449 | } |
450 | } |
451 | |
452 | -unlock: |
453 | - raw_spin_unlock(&trie->lock); |
454 | +out: |
455 | + kfree(trie); |
456 | } |
457 | |
458 | static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key) |
459 | diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c |
460 | index 13551e623501..7125ddbb24df 100644 |
461 | --- a/kernel/bpf/verifier.c |
462 | +++ b/kernel/bpf/verifier.c |
463 | @@ -985,6 +985,13 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) |
464 | return reg->type == PTR_TO_CTX; |
465 | } |
466 | |
467 | +static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) |
468 | +{ |
469 | + const struct bpf_reg_state *reg = cur_regs(env) + regno; |
470 | + |
471 | + return type_is_pkt_pointer(reg->type); |
472 | +} |
473 | + |
474 | static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, |
475 | const struct bpf_reg_state *reg, |
476 | int off, int size, bool strict) |
477 | @@ -1045,10 +1052,10 @@ static int check_generic_ptr_alignment(struct bpf_verifier_env *env, |
478 | } |
479 | |
480 | static int check_ptr_alignment(struct bpf_verifier_env *env, |
481 | - const struct bpf_reg_state *reg, |
482 | - int off, int size) |
483 | + const struct bpf_reg_state *reg, int off, |
484 | + int size, bool strict_alignment_once) |
485 | { |
486 | - bool strict = env->strict_alignment; |
487 | + bool strict = env->strict_alignment || strict_alignment_once; |
488 | const char *pointer_desc = ""; |
489 | |
490 | switch (reg->type) { |
491 | @@ -1108,9 +1115,9 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) |
492 | * if t==write && value_regno==-1, some unknown value is stored into memory |
493 | * if t==read && value_regno==-1, don't care what we read from memory |
494 | */ |
495 | -static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off, |
496 | - int bpf_size, enum bpf_access_type t, |
497 | - int value_regno) |
498 | +static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, |
499 | + int off, int bpf_size, enum bpf_access_type t, |
500 | + int value_regno, bool strict_alignment_once) |
501 | { |
502 | struct bpf_verifier_state *state = env->cur_state; |
503 | struct bpf_reg_state *regs = cur_regs(env); |
504 | @@ -1122,7 +1129,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn |
505 | return size; |
506 | |
507 | /* alignment checks will add in reg->off themselves */ |
508 | - err = check_ptr_alignment(env, reg, off, size); |
509 | + err = check_ptr_alignment(env, reg, off, size, strict_alignment_once); |
510 | if (err) |
511 | return err; |
512 | |
513 | @@ -1265,21 +1272,23 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins |
514 | return -EACCES; |
515 | } |
516 | |
517 | - if (is_ctx_reg(env, insn->dst_reg)) { |
518 | - verbose(env, "BPF_XADD stores into R%d context is not allowed\n", |
519 | - insn->dst_reg); |
520 | + if (is_ctx_reg(env, insn->dst_reg) || |
521 | + is_pkt_reg(env, insn->dst_reg)) { |
522 | + verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", |
523 | + insn->dst_reg, is_ctx_reg(env, insn->dst_reg) ? |
524 | + "context" : "packet"); |
525 | return -EACCES; |
526 | } |
527 | |
528 | /* check whether atomic_add can read the memory */ |
529 | err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, |
530 | - BPF_SIZE(insn->code), BPF_READ, -1); |
531 | + BPF_SIZE(insn->code), BPF_READ, -1, true); |
532 | if (err) |
533 | return err; |
534 | |
535 | /* check whether atomic_add can write into the same memory */ |
536 | return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, |
537 | - BPF_SIZE(insn->code), BPF_WRITE, -1); |
538 | + BPF_SIZE(insn->code), BPF_WRITE, -1, true); |
539 | } |
540 | |
541 | /* Does this register contain a constant zero? */ |
542 | @@ -1763,7 +1772,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) |
543 | * is inferred from register state. |
544 | */ |
545 | for (i = 0; i < meta.access_size; i++) { |
546 | - err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1); |
547 | + err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, |
548 | + BPF_WRITE, -1, false); |
549 | if (err) |
550 | return err; |
551 | } |
552 | @@ -3933,7 +3943,7 @@ static int do_check(struct bpf_verifier_env *env) |
553 | */ |
554 | err = check_mem_access(env, insn_idx, insn->src_reg, insn->off, |
555 | BPF_SIZE(insn->code), BPF_READ, |
556 | - insn->dst_reg); |
557 | + insn->dst_reg, false); |
558 | if (err) |
559 | return err; |
560 | |
561 | @@ -3985,7 +3995,7 @@ static int do_check(struct bpf_verifier_env *env) |
562 | /* check that memory (dst_reg + off) is writeable */ |
563 | err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, |
564 | BPF_SIZE(insn->code), BPF_WRITE, |
565 | - insn->src_reg); |
566 | + insn->src_reg, false); |
567 | if (err) |
568 | return err; |
569 | |
570 | @@ -4020,7 +4030,7 @@ static int do_check(struct bpf_verifier_env *env) |
571 | /* check that memory (dst_reg + off) is writeable */ |
572 | err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, |
573 | BPF_SIZE(insn->code), BPF_WRITE, |
574 | - -1); |
575 | + -1, false); |
576 | if (err) |
577 | return err; |
578 | |
579 | diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c |
580 | index 5ed4175c4ff8..0694527acaa0 100644 |
581 | --- a/tools/testing/selftests/bpf/test_verifier.c |
582 | +++ b/tools/testing/selftests/bpf/test_verifier.c |
583 | @@ -2254,6 +2254,32 @@ static struct bpf_test tests[] = { |
584 | .result_unpriv = REJECT, |
585 | .result = ACCEPT, |
586 | }, |
587 | + { |
588 | + "runtime/jit: pass negative index to tail_call", |
589 | + .insns = { |
590 | + BPF_MOV64_IMM(BPF_REG_3, -1), |
591 | + BPF_LD_MAP_FD(BPF_REG_2, 0), |
592 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
593 | + BPF_FUNC_tail_call), |
594 | + BPF_MOV64_IMM(BPF_REG_0, 0), |
595 | + BPF_EXIT_INSN(), |
596 | + }, |
597 | + .fixup_prog = { 1 }, |
598 | + .result = ACCEPT, |
599 | + }, |
600 | + { |
601 | + "runtime/jit: pass > 32bit index to tail_call", |
602 | + .insns = { |
603 | + BPF_LD_IMM64(BPF_REG_3, 0x100000000ULL), |
604 | + BPF_LD_MAP_FD(BPF_REG_2, 0), |
605 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
606 | + BPF_FUNC_tail_call), |
607 | + BPF_MOV64_IMM(BPF_REG_0, 0), |
608 | + BPF_EXIT_INSN(), |
609 | + }, |
610 | + .fixup_prog = { 2 }, |
611 | + .result = ACCEPT, |
612 | + }, |
613 | { |
614 | "stack pointer arithmetic", |
615 | .insns = { |
616 | @@ -8826,6 +8852,64 @@ static struct bpf_test tests[] = { |
617 | .result = REJECT, |
618 | .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, |
619 | }, |
620 | + { |
621 | + "xadd/w check unaligned stack", |
622 | + .insns = { |
623 | + BPF_MOV64_IMM(BPF_REG_0, 1), |
624 | + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), |
625 | + BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -7), |
626 | + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), |
627 | + BPF_EXIT_INSN(), |
628 | + }, |
629 | + .result = REJECT, |
630 | + .errstr = "misaligned stack access off", |
631 | + .prog_type = BPF_PROG_TYPE_SCHED_CLS, |
632 | + }, |
633 | + { |
634 | + "xadd/w check unaligned map", |
635 | + .insns = { |
636 | + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), |
637 | + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), |
638 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), |
639 | + BPF_LD_MAP_FD(BPF_REG_1, 0), |
640 | + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, |
641 | + BPF_FUNC_map_lookup_elem), |
642 | + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), |
643 | + BPF_EXIT_INSN(), |
644 | + BPF_MOV64_IMM(BPF_REG_1, 1), |
645 | + BPF_STX_XADD(BPF_W, BPF_REG_0, BPF_REG_1, 3), |
646 | + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3), |
647 | + BPF_EXIT_INSN(), |
648 | + }, |
649 | + .fixup_map1 = { 3 }, |
650 | + .result = REJECT, |
651 | + .errstr = "misaligned value access off", |
652 | + .prog_type = BPF_PROG_TYPE_SCHED_CLS, |
653 | + }, |
654 | + { |
655 | + "xadd/w check unaligned pkt", |
656 | + .insns = { |
657 | + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, |
658 | + offsetof(struct xdp_md, data)), |
659 | + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, |
660 | + offsetof(struct xdp_md, data_end)), |
661 | + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), |
662 | + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), |
663 | + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 2), |
664 | + BPF_MOV64_IMM(BPF_REG_0, 99), |
665 | + BPF_JMP_IMM(BPF_JA, 0, 0, 6), |
666 | + BPF_MOV64_IMM(BPF_REG_0, 1), |
667 | + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), |
668 | + BPF_ST_MEM(BPF_W, BPF_REG_2, 3, 0), |
669 | + BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 1), |
670 | + BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 2), |
671 | + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 1), |
672 | + BPF_EXIT_INSN(), |
673 | + }, |
674 | + .result = REJECT, |
675 | + .errstr = "BPF_XADD stores into R2 packet", |
676 | + .prog_type = BPF_PROG_TYPE_XDP, |
677 | + }, |
678 | }; |
679 | |
680 | static int probe_filter_length(const struct bpf_insn *fp) |