Magellan Linux

Contents of /trunk/kernel-alx/patches-4.9/0178-4.9.79-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3198 - (show annotations) (download)
Wed Aug 8 14:25:36 2018 UTC (5 years, 8 months ago) by niro
File size: 76658 byte(s)
-linux-4.9.79
1 diff --git a/Makefile b/Makefile
2 index 8a6f158a1176..4a7e6dff1c2e 100644
3 --- a/Makefile
4 +++ b/Makefile
5 @@ -1,6 +1,6 @@
6 VERSION = 4
7 PATCHLEVEL = 9
8 -SUBLEVEL = 78
9 +SUBLEVEL = 79
10 EXTRAVERSION =
11 NAME = Roaring Lionus
12
13 diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
14 index 2206e0e00934..2a35c1963f6d 100644
15 --- a/arch/arm/kvm/mmu.c
16 +++ b/arch/arm/kvm/mmu.c
17 @@ -1284,7 +1284,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
18 return -EFAULT;
19 }
20
21 - if (is_vm_hugetlb_page(vma) && !logging_active) {
22 + if (vma_kernel_pagesize(vma) && !logging_active) {
23 hugetlb = true;
24 gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
25 } else {
26 diff --git a/arch/um/Makefile b/arch/um/Makefile
27 index 0ca46ededfc7..9c150ccb35d2 100644
28 --- a/arch/um/Makefile
29 +++ b/arch/um/Makefile
30 @@ -117,7 +117,7 @@ archheaders:
31 archprepare: include/generated/user_constants.h
32
33 LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
34 -LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib
35 +LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib $(call cc-option, -no-pie)
36
37 CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \
38 $(call cc-option, -fno-stack-protector,) \
39 diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
40 index 6bb7e92c6d50..0174290b2857 100644
41 --- a/arch/x86/entry/vsyscall/vsyscall_64.c
42 +++ b/arch/x86/entry/vsyscall/vsyscall_64.c
43 @@ -46,6 +46,7 @@ static enum { EMULATE, NATIVE, NONE } vsyscall_mode =
44 #else
45 EMULATE;
46 #endif
47 +unsigned long vsyscall_pgprot = __PAGE_KERNEL_VSYSCALL;
48
49 static int __init vsyscall_setup(char *str)
50 {
51 @@ -336,11 +337,11 @@ void __init map_vsyscall(void)
52 extern char __vsyscall_page;
53 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
54
55 + if (vsyscall_mode != NATIVE)
56 + vsyscall_pgprot = __PAGE_KERNEL_VVAR;
57 if (vsyscall_mode != NONE)
58 __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
59 - vsyscall_mode == NATIVE
60 - ? PAGE_KERNEL_VSYSCALL
61 - : PAGE_KERNEL_VVAR);
62 + __pgprot(vsyscall_pgprot));
63
64 BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
65 (unsigned long)VSYSCALL_ADDR);
66 diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c
67 index 9842270ed2f2..21a4e4127f43 100644
68 --- a/arch/x86/events/amd/power.c
69 +++ b/arch/x86/events/amd/power.c
70 @@ -277,7 +277,7 @@ static int __init amd_power_pmu_init(void)
71 int ret;
72
73 if (!x86_match_cpu(cpu_match))
74 - return 0;
75 + return -ENODEV;
76
77 if (!boot_cpu_has(X86_FEATURE_ACC_POWER))
78 return -ENODEV;
79 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
80 index e40b19ca486e..353f038ec645 100644
81 --- a/arch/x86/include/asm/processor.h
82 +++ b/arch/x86/include/asm/processor.h
83 @@ -596,7 +596,7 @@ static inline void sync_core(void)
84 {
85 int tmp;
86
87 -#ifdef CONFIG_M486
88 +#ifdef CONFIG_X86_32
89 /*
90 * Do a CPUID if available, otherwise do a jump. The jump
91 * can conveniently enough be the jump around CPUID.
92 diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
93 index 4865e10dbb55..9ee85066f407 100644
94 --- a/arch/x86/include/asm/vsyscall.h
95 +++ b/arch/x86/include/asm/vsyscall.h
96 @@ -13,6 +13,7 @@ extern void map_vsyscall(void);
97 */
98 extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
99 extern bool vsyscall_enabled(void);
100 +extern unsigned long vsyscall_pgprot;
101 #else
102 static inline void map_vsyscall(void) {}
103 static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
104 diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
105 index de6626c18e42..be6337156502 100644
106 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c
107 +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
108 @@ -934,6 +934,8 @@ static int __populate_cache_leaves(unsigned int cpu)
109 ci_leaf_init(this_leaf++, &id4_regs);
110 __cache_cpumap_setup(cpu, idx, &id4_regs);
111 }
112 + this_cpu_ci->cpu_map_populated = true;
113 +
114 return 0;
115 }
116
117 diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
118 index ac3e636ad586..f90f17610f62 100644
119 --- a/arch/x86/kernel/cpu/microcode/intel.c
120 +++ b/arch/x86/kernel/cpu/microcode/intel.c
121 @@ -40,6 +40,9 @@
122 #include <asm/setup.h>
123 #include <asm/msr.h>
124
125 +/* last level cache size per core */
126 +static int llc_size_per_core;
127 +
128 /*
129 * Temporary microcode blobs pointers storage. We note here during early load
130 * the pointers to microcode blobs we've got from whatever storage (detached
131 @@ -1053,12 +1056,14 @@ static bool is_blacklisted(unsigned int cpu)
132
133 /*
134 * Late loading on model 79 with microcode revision less than 0x0b000021
135 - * may result in a system hang. This behavior is documented in item
136 - * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
137 + * and LLC size per core bigger than 2.5MB may result in a system hang.
138 + * This behavior is documented in item BDF90, #334165 (Intel Xeon
139 + * Processor E7-8800/4800 v4 Product Family).
140 */
141 if (c->x86 == 6 &&
142 c->x86_model == INTEL_FAM6_BROADWELL_X &&
143 c->x86_mask == 0x01 &&
144 + llc_size_per_core > 2621440 &&
145 c->microcode < 0x0b000021) {
146 pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
147 pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
148 @@ -1125,6 +1130,15 @@ static struct microcode_ops microcode_intel_ops = {
149 .microcode_fini_cpu = microcode_fini_cpu,
150 };
151
152 +static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c)
153 +{
154 + u64 llc_size = c->x86_cache_size * 1024;
155 +
156 + do_div(llc_size, c->x86_max_cores);
157 +
158 + return (int)llc_size;
159 +}
160 +
161 struct microcode_ops * __init init_intel_microcode(void)
162 {
163 struct cpuinfo_x86 *c = &boot_cpu_data;
164 @@ -1135,6 +1149,8 @@ struct microcode_ops * __init init_intel_microcode(void)
165 return NULL;
166 }
167
168 + llc_size_per_core = calc_llc_size_per_core(c);
169 +
170 return &microcode_intel_ops;
171 }
172
173 diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
174 index 073d1f1a620b..9758524ee99f 100644
175 --- a/arch/x86/lib/delay.c
176 +++ b/arch/x86/lib/delay.c
177 @@ -93,6 +93,13 @@ static void delay_mwaitx(unsigned long __loops)
178 {
179 u64 start, end, delay, loops = __loops;
180
181 + /*
182 + * Timer value of 0 causes MWAITX to wait indefinitely, unless there
183 + * is a store on the memory monitored by MONITORX.
184 + */
185 + if (loops == 0)
186 + return;
187 +
188 start = rdtsc_ordered();
189
190 for (;;) {
191 diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
192 index a8ade08a9bf5..ec678aafa3f8 100644
193 --- a/arch/x86/mm/kaiser.c
194 +++ b/arch/x86/mm/kaiser.c
195 @@ -344,7 +344,7 @@ void __init kaiser_init(void)
196 if (vsyscall_enabled())
197 kaiser_add_user_map_early((void *)VSYSCALL_ADDR,
198 PAGE_SIZE,
199 - __PAGE_KERNEL_VSYSCALL);
200 + vsyscall_pgprot);
201
202 for_each_possible_cpu(cpu) {
203 void *percpu_vaddr = __per_cpu_user_mapped_start +
204 diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
205 index 15f743615923..7840331d3056 100644
206 --- a/arch/x86/net/bpf_jit_comp.c
207 +++ b/arch/x86/net/bpf_jit_comp.c
208 @@ -278,10 +278,10 @@ static void emit_bpf_tail_call(u8 **pprog)
209 /* if (index >= array->map.max_entries)
210 * goto out;
211 */
212 - EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */
213 + EMIT2(0x89, 0xD2); /* mov edx, edx */
214 + EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
215 offsetof(struct bpf_array, map.max_entries));
216 - EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */
217 -#define OFFSET1 47 /* number of bytes to jump */
218 +#define OFFSET1 43 /* number of bytes to jump */
219 EMIT2(X86_JBE, OFFSET1); /* jbe out */
220 label1 = cnt;
221
222 @@ -290,21 +290,20 @@ static void emit_bpf_tail_call(u8 **pprog)
223 */
224 EMIT2_off32(0x8B, 0x85, -STACKSIZE + 36); /* mov eax, dword ptr [rbp - 516] */
225 EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
226 -#define OFFSET2 36
227 +#define OFFSET2 32
228 EMIT2(X86_JA, OFFSET2); /* ja out */
229 label2 = cnt;
230 EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
231 EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */
232
233 /* prog = array->ptrs[index]; */
234 - EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */
235 + EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */
236 offsetof(struct bpf_array, ptrs));
237 - EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */
238
239 /* if (prog == NULL)
240 * goto out;
241 */
242 - EMIT4(0x48, 0x83, 0xF8, 0x00); /* cmp rax, 0 */
243 + EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */
244 #define OFFSET3 10
245 EMIT2(X86_JE, OFFSET3); /* je out */
246 label3 = cnt;
247 diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c
248 index 691814dfed31..943702dd9517 100644
249 --- a/drivers/acpi/acpica/nsutils.c
250 +++ b/drivers/acpi/acpica/nsutils.c
251 @@ -594,25 +594,20 @@ struct acpi_namespace_node *acpi_ns_validate_handle(acpi_handle handle)
252 void acpi_ns_terminate(void)
253 {
254 acpi_status status;
255 + union acpi_operand_object *prev;
256 + union acpi_operand_object *next;
257
258 ACPI_FUNCTION_TRACE(ns_terminate);
259
260 -#ifdef ACPI_EXEC_APP
261 - {
262 - union acpi_operand_object *prev;
263 - union acpi_operand_object *next;
264 + /* Delete any module-level code blocks */
265
266 - /* Delete any module-level code blocks */
267 -
268 - next = acpi_gbl_module_code_list;
269 - while (next) {
270 - prev = next;
271 - next = next->method.mutex;
272 - prev->method.mutex = NULL; /* Clear the Mutex (cheated) field */
273 - acpi_ut_remove_reference(prev);
274 - }
275 + next = acpi_gbl_module_code_list;
276 + while (next) {
277 + prev = next;
278 + next = next->method.mutex;
279 + prev->method.mutex = NULL; /* Clear the Mutex (cheated) field */
280 + acpi_ut_remove_reference(prev);
281 }
282 -#endif
283
284 /*
285 * Free the entire namespace -- all nodes and all objects
286 diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
287 index 73c9c7fa9001..f06317d6fc38 100644
288 --- a/drivers/acpi/glue.c
289 +++ b/drivers/acpi/glue.c
290 @@ -99,13 +99,13 @@ static int find_child_checks(struct acpi_device *adev, bool check_children)
291 return -ENODEV;
292
293 /*
294 - * If the device has a _HID (or _CID) returning a valid ACPI/PNP
295 - * device ID, it is better to make it look less attractive here, so that
296 - * the other device with the same _ADR value (that may not have a valid
297 - * device ID) can be matched going forward. [This means a second spec
298 - * violation in a row, so whatever we do here is best effort anyway.]
299 + * If the device has a _HID returning a valid ACPI/PNP device ID, it is
300 + * better to make it look less attractive here, so that the other device
301 + * with the same _ADR value (that may not have a valid device ID) can be
302 + * matched going forward. [This means a second spec violation in a row,
303 + * so whatever we do here is best effort anyway.]
304 */
305 - return sta_present && list_empty(&adev->pnp.ids) ?
306 + return sta_present && !adev->pnp.type.platform_id ?
307 FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE;
308 }
309
310 diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
311 index e9fd32e91668..70e13cf06ed0 100644
312 --- a/drivers/base/cacheinfo.c
313 +++ b/drivers/base/cacheinfo.c
314 @@ -16,6 +16,7 @@
315 * You should have received a copy of the GNU General Public License
316 * along with this program. If not, see <http://www.gnu.org/licenses/>.
317 */
318 +#include <linux/acpi.h>
319 #include <linux/bitops.h>
320 #include <linux/cacheinfo.h>
321 #include <linux/compiler.h>
322 @@ -104,9 +105,16 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
323 struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
324 struct cacheinfo *this_leaf, *sib_leaf;
325 unsigned int index;
326 - int ret;
327 + int ret = 0;
328 +
329 + if (this_cpu_ci->cpu_map_populated)
330 + return 0;
331
332 - ret = cache_setup_of_node(cpu);
333 + if (of_have_populated_dt())
334 + ret = cache_setup_of_node(cpu);
335 + else if (!acpi_disabled)
336 + /* No cache property/hierarchy support yet in ACPI */
337 + ret = -ENOTSUPP;
338 if (ret)
339 return ret;
340
341 @@ -203,8 +211,7 @@ static int detect_cache_attributes(unsigned int cpu)
342 */
343 ret = cache_shared_cpu_map_setup(cpu);
344 if (ret) {
345 - pr_warn("Unable to detect cache hierarchy from DT for CPU %d\n",
346 - cpu);
347 + pr_warn("Unable to detect cache hierarchy for CPU %d\n", cpu);
348 goto free_ci;
349 }
350 return 0;
351 diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c
352 index 7e2dc5e56632..0b49f29bf0da 100644
353 --- a/drivers/input/mouse/trackpoint.c
354 +++ b/drivers/input/mouse/trackpoint.c
355 @@ -383,6 +383,9 @@ int trackpoint_detect(struct psmouse *psmouse, bool set_properties)
356 if (trackpoint_read(&psmouse->ps2dev, TP_EXT_BTN, &button_info)) {
357 psmouse_warn(psmouse, "failed to get extended button data, assuming 3 buttons\n");
358 button_info = 0x33;
359 + } else if (!button_info) {
360 + psmouse_warn(psmouse, "got 0 in extended button data, assuming 3 buttons\n");
361 + button_info = 0x33;
362 }
363
364 psmouse->private = kzalloc(sizeof(struct trackpoint_data), GFP_KERNEL);
365 diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
366 index 1644896568c4..b2eeecb26939 100644
367 --- a/drivers/net/ethernet/emulex/benet/be_main.c
368 +++ b/drivers/net/ethernet/emulex/benet/be_main.c
369 @@ -4733,6 +4733,15 @@ int be_update_queues(struct be_adapter *adapter)
370
371 be_schedule_worker(adapter);
372
373 + /*
374 + * The IF was destroyed and re-created. We need to clear
375 + * all promiscuous flags valid for the destroyed IF.
376 + * Without this promisc mode is not restored during
377 + * be_open() because the driver thinks that it is
378 + * already enabled in HW.
379 + */
380 + adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
381 +
382 if (netif_running(netdev))
383 status = be_open(netdev);
384
385 diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
386 index 8aa91ddff287..16556011d571 100644
387 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
388 +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
389 @@ -765,11 +765,8 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
390 dipn = htonl(dip);
391 dev = mlxsw_sp->rifs[rif]->dev;
392 n = neigh_lookup(&arp_tbl, &dipn, dev);
393 - if (!n) {
394 - netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
395 - &dip);
396 + if (!n)
397 return;
398 - }
399
400 netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
401 neigh_event_send(n, NULL);
402 diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
403 index 2c4350a1c629..298b74ebc1e9 100644
404 --- a/drivers/net/ethernet/realtek/r8169.c
405 +++ b/drivers/net/ethernet/realtek/r8169.c
406 @@ -2222,19 +2222,14 @@ static bool rtl8169_do_counters(struct net_device *dev, u32 counter_cmd)
407 void __iomem *ioaddr = tp->mmio_addr;
408 dma_addr_t paddr = tp->counters_phys_addr;
409 u32 cmd;
410 - bool ret;
411
412 RTL_W32(CounterAddrHigh, (u64)paddr >> 32);
413 + RTL_R32(CounterAddrHigh);
414 cmd = (u64)paddr & DMA_BIT_MASK(32);
415 RTL_W32(CounterAddrLow, cmd);
416 RTL_W32(CounterAddrLow, cmd | counter_cmd);
417
418 - ret = rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
419 -
420 - RTL_W32(CounterAddrLow, 0);
421 - RTL_W32(CounterAddrHigh, 0);
422 -
423 - return ret;
424 + return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
425 }
426
427 static bool rtl8169_reset_counters(struct net_device *dev)
428 diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
429 index b883af93929c..fc4c2ccc3d22 100644
430 --- a/drivers/net/ppp/ppp_generic.c
431 +++ b/drivers/net/ppp/ppp_generic.c
432 @@ -1002,17 +1002,18 @@ static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set)
433 if (!ifname_is_set)
434 snprintf(ppp->dev->name, IFNAMSIZ, "ppp%i", ppp->file.index);
435
436 + mutex_unlock(&pn->all_ppp_mutex);
437 +
438 ret = register_netdevice(ppp->dev);
439 if (ret < 0)
440 goto err_unit;
441
442 atomic_inc(&ppp_unit_count);
443
444 - mutex_unlock(&pn->all_ppp_mutex);
445 -
446 return 0;
447
448 err_unit:
449 + mutex_lock(&pn->all_ppp_mutex);
450 unit_put(&pn->units_idr, ppp->file.index);
451 err:
452 mutex_unlock(&pn->all_ppp_mutex);
453 diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
454 index 4ddae8118c85..dc36c2ec1d10 100644
455 --- a/drivers/net/ppp/pppoe.c
456 +++ b/drivers/net/ppp/pppoe.c
457 @@ -842,6 +842,7 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m,
458 struct pppoe_hdr *ph;
459 struct net_device *dev;
460 char *start;
461 + int hlen;
462
463 lock_sock(sk);
464 if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) {
465 @@ -860,16 +861,16 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m,
466 if (total_len > (dev->mtu + dev->hard_header_len))
467 goto end;
468
469 -
470 - skb = sock_wmalloc(sk, total_len + dev->hard_header_len + 32,
471 - 0, GFP_KERNEL);
472 + hlen = LL_RESERVED_SPACE(dev);
473 + skb = sock_wmalloc(sk, hlen + sizeof(*ph) + total_len +
474 + dev->needed_tailroom, 0, GFP_KERNEL);
475 if (!skb) {
476 error = -ENOMEM;
477 goto end;
478 }
479
480 /* Reserve space for headers. */
481 - skb_reserve(skb, dev->hard_header_len);
482 + skb_reserve(skb, hlen);
483 skb_reset_network_header(skb);
484
485 skb->dev = dev;
486 @@ -930,7 +931,7 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
487 /* Copy the data if there is no space for the header or if it's
488 * read-only.
489 */
490 - if (skb_cow_head(skb, sizeof(*ph) + dev->hard_header_len))
491 + if (skb_cow_head(skb, LL_RESERVED_SPACE(dev) + sizeof(*ph)))
492 goto abort;
493
494 __skb_push(skb, sizeof(*ph));
495 diff --git a/drivers/net/tun.c b/drivers/net/tun.c
496 index 518cbfbc8b65..eb6dc28e5e52 100644
497 --- a/drivers/net/tun.c
498 +++ b/drivers/net/tun.c
499 @@ -525,6 +525,14 @@ static void tun_queue_purge(struct tun_file *tfile)
500 skb_queue_purge(&tfile->sk.sk_error_queue);
501 }
502
503 +static void tun_cleanup_tx_array(struct tun_file *tfile)
504 +{
505 + if (tfile->tx_array.ring.queue) {
506 + skb_array_cleanup(&tfile->tx_array);
507 + memset(&tfile->tx_array, 0, sizeof(tfile->tx_array));
508 + }
509 +}
510 +
511 static void __tun_detach(struct tun_file *tfile, bool clean)
512 {
513 struct tun_file *ntfile;
514 @@ -566,8 +574,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
515 tun->dev->reg_state == NETREG_REGISTERED)
516 unregister_netdevice(tun->dev);
517 }
518 - if (tun)
519 - skb_array_cleanup(&tfile->tx_array);
520 + tun_cleanup_tx_array(tfile);
521 sock_put(&tfile->sk);
522 }
523 }
524 @@ -606,11 +613,13 @@ static void tun_detach_all(struct net_device *dev)
525 /* Drop read queue */
526 tun_queue_purge(tfile);
527 sock_put(&tfile->sk);
528 + tun_cleanup_tx_array(tfile);
529 }
530 list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
531 tun_enable_queue(tfile);
532 tun_queue_purge(tfile);
533 sock_put(&tfile->sk);
534 + tun_cleanup_tx_array(tfile);
535 }
536 BUG_ON(tun->numdisabled != 0);
537
538 @@ -2363,6 +2372,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
539
540 sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
541
542 + memset(&tfile->tx_array, 0, sizeof(tfile->tx_array));
543 +
544 return 0;
545 }
546
547 diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
548 index 9c257ffedb15..c53385a0052f 100644
549 --- a/drivers/net/usb/lan78xx.c
550 +++ b/drivers/net/usb/lan78xx.c
551 @@ -2197,6 +2197,7 @@ static int lan78xx_reset(struct lan78xx_net *dev)
552 buf = DEFAULT_BURST_CAP_SIZE / FS_USB_PKT_SIZE;
553 dev->rx_urb_size = DEFAULT_BURST_CAP_SIZE;
554 dev->rx_qlen = 4;
555 + dev->tx_qlen = 4;
556 }
557
558 ret = lan78xx_write_reg(dev, BURST_CAP, buf);
559 diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
560 index ef83ae3b0a44..4afba17e2403 100644
561 --- a/drivers/net/vmxnet3/vmxnet3_drv.c
562 +++ b/drivers/net/vmxnet3/vmxnet3_drv.c
563 @@ -1616,7 +1616,6 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
564 rq->rx_ring[i].basePA);
565 rq->rx_ring[i].base = NULL;
566 }
567 - rq->buf_info[i] = NULL;
568 }
569
570 if (rq->data_ring.base) {
571 @@ -1638,6 +1637,7 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
572 (rq->rx_ring[0].size + rq->rx_ring[1].size);
573 dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0],
574 rq->buf_info_pa);
575 + rq->buf_info[0] = rq->buf_info[1] = NULL;
576 }
577 }
578
579 diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
580 index a530f08592cd..4abd3fce5ab6 100644
581 --- a/drivers/scsi/libiscsi.c
582 +++ b/drivers/scsi/libiscsi.c
583 @@ -1727,7 +1727,7 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)
584
585 if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {
586 reason = FAILURE_SESSION_IN_RECOVERY;
587 - sc->result = DID_REQUEUE;
588 + sc->result = DID_REQUEUE << 16;
589 goto fault;
590 }
591
592 diff --git a/drivers/usb/usbip/usbip_common.h b/drivers/usb/usbip/usbip_common.h
593 index 9f490375ac92..f0b955f8504e 100644
594 --- a/drivers/usb/usbip/usbip_common.h
595 +++ b/drivers/usb/usbip/usbip_common.h
596 @@ -271,6 +271,7 @@ struct usbip_device {
597 /* lock for status */
598 spinlock_t lock;
599
600 + int sockfd;
601 struct socket *tcp_socket;
602
603 struct task_struct *tcp_rx;
604 diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c
605 index b96e5b189269..c287ccc78fde 100644
606 --- a/drivers/usb/usbip/vhci_sysfs.c
607 +++ b/drivers/usb/usbip/vhci_sysfs.c
608 @@ -49,13 +49,17 @@ static ssize_t status_show_vhci(int pdev_nr, char *out)
609
610 /*
611 * output example:
612 - * port sta spd dev socket local_busid
613 - * 0000 004 000 00000000 c5a7bb80 1-2.3
614 - * 0001 004 000 00000000 d8cee980 2-3.4
615 + * port sta spd dev sockfd local_busid
616 + * 0000 004 000 00000000 000003 1-2.3
617 + * 0001 004 000 00000000 000004 2-3.4
618 *
619 - * IP address can be retrieved from a socket pointer address by looking
620 - * up /proc/net/{tcp,tcp6}. Also, a userland program may remember a
621 - * port number and its peer IP address.
622 + * Output includes socket fd instead of socket pointer address to
623 + * avoid leaking kernel memory address in:
624 + * /sys/devices/platform/vhci_hcd.0/status and in debug output.
625 + * The socket pointer address is not used at the moment and it was
626 + * made visible as a convenient way to find IP address from socket
627 + * pointer address by looking up /proc/net/{tcp,tcp6}. As this opens
628 + * a security hole, the change is made to use sockfd instead.
629 */
630 for (i = 0; i < VHCI_HC_PORTS; i++) {
631 struct vhci_device *vdev = &vhci->vdev[i];
632 @@ -68,13 +72,13 @@ static ssize_t status_show_vhci(int pdev_nr, char *out)
633 if (vdev->ud.status == VDEV_ST_USED) {
634 out += sprintf(out, "%03u %08x ",
635 vdev->speed, vdev->devid);
636 - out += sprintf(out, "%16p %s",
637 - vdev->ud.tcp_socket,
638 + out += sprintf(out, "%06u %s",
639 + vdev->ud.sockfd,
640 dev_name(&vdev->udev->dev));
641
642 } else {
643 out += sprintf(out, "000 00000000 ");
644 - out += sprintf(out, "0000000000000000 0-0");
645 + out += sprintf(out, "000000 0-0");
646 }
647
648 out += sprintf(out, "\n");
649 @@ -125,7 +129,7 @@ static ssize_t status_show(struct device *dev,
650 int pdev_nr;
651
652 out += sprintf(out,
653 - "port sta spd dev socket local_busid\n");
654 + "port sta spd dev sockfd local_busid\n");
655
656 pdev_nr = status_name_to_id(attr->attr.name);
657 if (pdev_nr < 0)
658 @@ -324,6 +328,7 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr,
659
660 vdev->devid = devid;
661 vdev->speed = speed;
662 + vdev->ud.sockfd = sockfd;
663 vdev->ud.tcp_socket = socket;
664 vdev->ud.status = VDEV_ST_NOTASSIGNED;
665
666 diff --git a/fs/fcntl.c b/fs/fcntl.c
667 index 1493ceb0477d..ec03cf620fd7 100644
668 --- a/fs/fcntl.c
669 +++ b/fs/fcntl.c
670 @@ -114,6 +114,10 @@ void f_setown(struct file *filp, unsigned long arg, int force)
671 int who = arg;
672 type = PIDTYPE_PID;
673 if (who < 0) {
674 + /* avoid overflow below */
675 + if (who == INT_MIN)
676 + return;
677 +
678 type = PIDTYPE_PGID;
679 who = -who;
680 }
681 diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
682 index 75f942ae5176..81c018e5c31e 100644
683 --- a/fs/nfsd/auth.c
684 +++ b/fs/nfsd/auth.c
685 @@ -59,10 +59,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
686 gi->gid[i] = exp->ex_anon_gid;
687 else
688 gi->gid[i] = rqgi->gid[i];
689 -
690 - /* Each thread allocates its own gi, no race */
691 - groups_sort(gi);
692 }
693 +
694 + /* Each thread allocates its own gi, no race */
695 + groups_sort(gi);
696 } else {
697 gi = get_group_info(rqgi);
698 }
699 diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c
700 index fe2cbeb90772..939aa066e1ca 100644
701 --- a/fs/orangefs/devorangefs-req.c
702 +++ b/fs/orangefs/devorangefs-req.c
703 @@ -161,7 +161,7 @@ static ssize_t orangefs_devreq_read(struct file *file,
704 struct orangefs_kernel_op_s *op, *temp;
705 __s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION;
706 static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
707 - struct orangefs_kernel_op_s *cur_op = NULL;
708 + struct orangefs_kernel_op_s *cur_op;
709 unsigned long ret;
710
711 /* We do not support blocking IO. */
712 @@ -181,6 +181,7 @@ static ssize_t orangefs_devreq_read(struct file *file,
713 }
714
715 restart:
716 + cur_op = NULL;
717 /* Get next op (if any) from top of list. */
718 spin_lock(&orangefs_request_list_lock);
719 list_for_each_entry_safe(op, temp, &orangefs_request_list, list) {
720 diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
721 index 02cc6139ec90..5b2cbe567365 100644
722 --- a/fs/orangefs/file.c
723 +++ b/fs/orangefs/file.c
724 @@ -446,7 +446,7 @@ ssize_t orangefs_inode_read(struct inode *inode,
725 static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
726 {
727 struct file *file = iocb->ki_filp;
728 - loff_t pos = *(&iocb->ki_pos);
729 + loff_t pos = iocb->ki_pos;
730 ssize_t rc = 0;
731
732 BUG_ON(iocb->private);
733 @@ -485,9 +485,6 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite
734 }
735 }
736
737 - if (file->f_pos > i_size_read(file->f_mapping->host))
738 - orangefs_i_size_write(file->f_mapping->host, file->f_pos);
739 -
740 rc = generic_write_checks(iocb, iter);
741
742 if (rc <= 0) {
743 @@ -501,7 +498,7 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite
744 * pos to the end of the file, so we will wait till now to set
745 * pos...
746 */
747 - pos = *(&iocb->ki_pos);
748 + pos = iocb->ki_pos;
749
750 rc = do_readv_writev(ORANGEFS_IO_WRITE,
751 file,
752 diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
753 index 45dd8f27b2ac..f28381a7cd12 100644
754 --- a/fs/orangefs/orangefs-kernel.h
755 +++ b/fs/orangefs/orangefs-kernel.h
756 @@ -570,17 +570,6 @@ do { \
757 sys_attr.mask = ORANGEFS_ATTR_SYS_ALL_SETABLE; \
758 } while (0)
759
760 -static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size)
761 -{
762 -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
763 - inode_lock(inode);
764 -#endif
765 - i_size_write(inode, i_size);
766 -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
767 - inode_unlock(inode);
768 -#endif
769 -}
770 -
771 static inline void orangefs_set_timeout(struct dentry *dentry)
772 {
773 unsigned long time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
774 diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c
775 index abcfa3fa9992..f61b00887481 100644
776 --- a/fs/orangefs/waitqueue.c
777 +++ b/fs/orangefs/waitqueue.c
778 @@ -28,10 +28,10 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s
779 */
780 void purge_waiting_ops(void)
781 {
782 - struct orangefs_kernel_op_s *op;
783 + struct orangefs_kernel_op_s *op, *tmp;
784
785 spin_lock(&orangefs_request_list_lock);
786 - list_for_each_entry(op, &orangefs_request_list, list) {
787 + list_for_each_entry_safe(op, tmp, &orangefs_request_list, list) {
788 gossip_debug(GOSSIP_WAIT_DEBUG,
789 "pvfs2-client-core: purging op tag %llu %s\n",
790 llu(op->tag),
791 diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
792 index dc198bc64c61..edc8ef78b63f 100644
793 --- a/fs/reiserfs/bitmap.c
794 +++ b/fs/reiserfs/bitmap.c
795 @@ -513,9 +513,17 @@ static void __discard_prealloc(struct reiserfs_transaction_handle *th,
796 "inode has negative prealloc blocks count.");
797 #endif
798 while (ei->i_prealloc_count > 0) {
799 - reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
800 - ei->i_prealloc_block++;
801 + b_blocknr_t block_to_free;
802 +
803 + /*
804 + * reiserfs_free_prealloc_block can drop the write lock,
805 + * which could allow another caller to free the same block.
806 + * We can protect against it by modifying the prealloc
807 + * state before calling it.
808 + */
809 + block_to_free = ei->i_prealloc_block++;
810 ei->i_prealloc_count--;
811 + reiserfs_free_prealloc_block(th, inode, block_to_free);
812 dirty = 1;
813 }
814 if (dirty)
815 @@ -1128,7 +1136,7 @@ static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
816 hint->prealloc_size = 0;
817
818 if (!hint->formatted_node && hint->preallocate) {
819 - if (S_ISREG(hint->inode->i_mode)
820 + if (S_ISREG(hint->inode->i_mode) && !IS_PRIVATE(hint->inode)
821 && hint->inode->i_size >=
822 REISERFS_SB(hint->th->t_super)->s_alloc_options.
823 preallocmin * hint->inode->i_sb->s_blocksize)
824 diff --git a/include/linux/bpf.h b/include/linux/bpf.h
825 index 75ffd3b2149e..7995940d4187 100644
826 --- a/include/linux/bpf.h
827 +++ b/include/linux/bpf.h
828 @@ -36,7 +36,10 @@ struct bpf_map_ops {
829 };
830
831 struct bpf_map {
832 - atomic_t refcnt;
833 + /* 1st cacheline with read-mostly members of which some
834 + * are also accessed in fast-path (e.g. ops, max_entries).
835 + */
836 + const struct bpf_map_ops *ops ____cacheline_aligned;
837 enum bpf_map_type map_type;
838 u32 key_size;
839 u32 value_size;
840 @@ -44,10 +47,15 @@ struct bpf_map {
841 u32 map_flags;
842 u32 pages;
843 bool unpriv_array;
844 - struct user_struct *user;
845 - const struct bpf_map_ops *ops;
846 - struct work_struct work;
847 + /* 7 bytes hole */
848 +
849 + /* 2nd cacheline with misc members to avoid false sharing
850 + * particularly with refcounting.
851 + */
852 + struct user_struct *user ____cacheline_aligned;
853 + atomic_t refcnt;
854 atomic_t usercnt;
855 + struct work_struct work;
856 };
857
858 struct bpf_map_type_list {
859 diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
860 index 2189935075b4..a951fd10aaaa 100644
861 --- a/include/linux/cacheinfo.h
862 +++ b/include/linux/cacheinfo.h
863 @@ -71,6 +71,7 @@ struct cpu_cacheinfo {
864 struct cacheinfo *info_list;
865 unsigned int num_levels;
866 unsigned int num_leaves;
867 + bool cpu_map_populated;
868 };
869
870 /*
871 diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
872 index 490f5a83f947..e3d7754f25f0 100644
873 --- a/include/linux/mmzone.h
874 +++ b/include/linux/mmzone.h
875 @@ -633,6 +633,8 @@ typedef struct pglist_data {
876 int kswapd_order;
877 enum zone_type kswapd_classzone_idx;
878
879 + int kswapd_failures; /* Number of 'reclaimed == 0' runs */
880 +
881 #ifdef CONFIG_COMPACTION
882 int kcompactd_max_order;
883 enum zone_type kcompactd_classzone_idx;
884 diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
885 index a3d04934aa96..6f8fbcf10dfb 100644
886 --- a/include/linux/vermagic.h
887 +++ b/include/linux/vermagic.h
888 @@ -24,16 +24,10 @@
889 #ifndef MODULE_ARCH_VERMAGIC
890 #define MODULE_ARCH_VERMAGIC ""
891 #endif
892 -#ifdef RETPOLINE
893 -#define MODULE_VERMAGIC_RETPOLINE "retpoline "
894 -#else
895 -#define MODULE_VERMAGIC_RETPOLINE ""
896 -#endif
897
898 #define VERMAGIC_STRING \
899 UTS_RELEASE " " \
900 MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \
901 MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \
902 - MODULE_ARCH_VERMAGIC \
903 - MODULE_VERMAGIC_RETPOLINE
904 + MODULE_ARCH_VERMAGIC
905
906 diff --git a/include/net/arp.h b/include/net/arp.h
907 index 5e0f891d476c..1b3f86981757 100644
908 --- a/include/net/arp.h
909 +++ b/include/net/arp.h
910 @@ -19,6 +19,9 @@ static inline u32 arp_hashfn(const void *pkey, const struct net_device *dev, u32
911
912 static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key)
913 {
914 + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
915 + key = INADDR_ANY;
916 +
917 return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev);
918 }
919
920 diff --git a/include/net/ipv6.h b/include/net/ipv6.h
921 index 615ce0abba9c..e64210c98c2b 100644
922 --- a/include/net/ipv6.h
923 +++ b/include/net/ipv6.h
924 @@ -290,6 +290,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
925 int flags);
926 int ip6_flowlabel_init(void);
927 void ip6_flowlabel_cleanup(void);
928 +bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np);
929
930 static inline void fl6_sock_release(struct ip6_flowlabel *fl)
931 {
932 diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
933 index 0940598c002f..23102da24dd9 100644
934 --- a/include/net/net_namespace.h
935 +++ b/include/net/net_namespace.h
936 @@ -213,6 +213,11 @@ int net_eq(const struct net *net1, const struct net *net2)
937 return net1 == net2;
938 }
939
940 +static inline int check_net(const struct net *net)
941 +{
942 + return atomic_read(&net->count) != 0;
943 +}
944 +
945 void net_drop_ns(void *);
946
947 #else
948 @@ -237,6 +242,11 @@ int net_eq(const struct net *net1, const struct net *net2)
949 return 1;
950 }
951
952 +static inline int check_net(const struct net *net)
953 +{
954 + return 1;
955 +}
956 +
957 #define net_drop_ns NULL
958 #endif
959
960 diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
961 index 1c3154913a39..bc96b14dfb2c 100644
962 --- a/include/uapi/linux/eventpoll.h
963 +++ b/include/uapi/linux/eventpoll.h
964 @@ -26,6 +26,19 @@
965 #define EPOLL_CTL_DEL 2
966 #define EPOLL_CTL_MOD 3
967
968 +/* Epoll event masks */
969 +#define EPOLLIN 0x00000001
970 +#define EPOLLPRI 0x00000002
971 +#define EPOLLOUT 0x00000004
972 +#define EPOLLERR 0x00000008
973 +#define EPOLLHUP 0x00000010
974 +#define EPOLLRDNORM 0x00000040
975 +#define EPOLLRDBAND 0x00000080
976 +#define EPOLLWRNORM 0x00000100
977 +#define EPOLLWRBAND 0x00000200
978 +#define EPOLLMSG 0x00000400
979 +#define EPOLLRDHUP 0x00002000
980 +
981 /* Set exclusive wakeup mode for the target file descriptor */
982 #define EPOLLEXCLUSIVE (1 << 28)
983
984 diff --git a/init/Kconfig b/init/Kconfig
985 index 34407f15e6d3..b331feeabda4 100644
986 --- a/init/Kconfig
987 +++ b/init/Kconfig
988 @@ -1609,6 +1609,13 @@ config BPF_SYSCALL
989 Enable the bpf() system call that allows to manipulate eBPF
990 programs and maps via file descriptors.
991
992 +config BPF_JIT_ALWAYS_ON
993 + bool "Permanently enable BPF JIT and remove BPF interpreter"
994 + depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
995 + help
996 + Enables BPF JIT and removes BPF interpreter to avoid
997 + speculative execution of BPF instructions by the interpreter
998 +
999 config SHMEM
1000 bool "Use full shmem filesystem" if EXPERT
1001 default y
1002 diff --git a/ipc/msg.c b/ipc/msg.c
1003 index e12307d0c920..ff10d43b5184 100644
1004 --- a/ipc/msg.c
1005 +++ b/ipc/msg.c
1006 @@ -763,7 +763,10 @@ static inline int convert_mode(long *msgtyp, int msgflg)
1007 if (*msgtyp == 0)
1008 return SEARCH_ANY;
1009 if (*msgtyp < 0) {
1010 - *msgtyp = -*msgtyp;
1011 + if (*msgtyp == LONG_MIN) /* -LONG_MIN is undefined */
1012 + *msgtyp = LONG_MAX;
1013 + else
1014 + *msgtyp = -*msgtyp;
1015 return SEARCH_LESSEQUAL;
1016 }
1017 if (msgflg & MSG_EXCEPT)
1018 diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
1019 index aa6d98154106..879ca844ba1d 100644
1020 --- a/kernel/bpf/core.c
1021 +++ b/kernel/bpf/core.c
1022 @@ -458,6 +458,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
1023 }
1024 EXPORT_SYMBOL_GPL(__bpf_call_base);
1025
1026 +#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1027 /**
1028 * __bpf_prog_run - run eBPF program on a given context
1029 * @ctx: is the data we are operating on
1030 @@ -641,7 +642,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
1031 DST = tmp;
1032 CONT;
1033 ALU_MOD_X:
1034 - if (unlikely(SRC == 0))
1035 + if (unlikely((u32)SRC == 0))
1036 return 0;
1037 tmp = (u32) DST;
1038 DST = do_div(tmp, (u32) SRC);
1039 @@ -660,7 +661,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
1040 DST = div64_u64(DST, SRC);
1041 CONT;
1042 ALU_DIV_X:
1043 - if (unlikely(SRC == 0))
1044 + if (unlikely((u32)SRC == 0))
1045 return 0;
1046 tmp = (u32) DST;
1047 do_div(tmp, (u32) SRC);
1048 @@ -715,7 +716,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
1049 struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
1050 struct bpf_array *array = container_of(map, struct bpf_array, map);
1051 struct bpf_prog *prog;
1052 - u64 index = BPF_R3;
1053 + u32 index = BPF_R3;
1054
1055 if (unlikely(index >= array->map.max_entries))
1056 goto out;
1057 @@ -923,6 +924,13 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
1058 }
1059 STACK_FRAME_NON_STANDARD(__bpf_prog_run); /* jump table */
1060
1061 +#else
1062 +static unsigned int __bpf_prog_ret0(void *ctx, const struct bpf_insn *insn)
1063 +{
1064 + return 0;
1065 +}
1066 +#endif
1067 +
1068 bool bpf_prog_array_compatible(struct bpf_array *array,
1069 const struct bpf_prog *fp)
1070 {
1071 @@ -970,7 +978,11 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
1072 */
1073 struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
1074 {
1075 +#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1076 fp->bpf_func = (void *) __bpf_prog_run;
1077 +#else
1078 + fp->bpf_func = (void *) __bpf_prog_ret0;
1079 +#endif
1080
1081 /* eBPF JITs can rewrite the program in case constant
1082 * blinding is active. However, in case of error during
1083 @@ -979,6 +991,12 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
1084 * be JITed, but falls back to the interpreter.
1085 */
1086 fp = bpf_int_jit_compile(fp);
1087 +#ifdef CONFIG_BPF_JIT_ALWAYS_ON
1088 + if (!fp->jited) {
1089 + *err = -ENOTSUPP;
1090 + return fp;
1091 + }
1092 +#endif
1093 bpf_prog_lock_ro(fp);
1094
1095 /* The tail call compatibility check can only be done at
1096 diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
1097 index 19c44cf59bb2..076e4a0ff95e 100644
1098 --- a/kernel/bpf/verifier.c
1099 +++ b/kernel/bpf/verifier.c
1100 @@ -702,6 +702,13 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
1101 return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]);
1102 }
1103
1104 +static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
1105 +{
1106 + const struct bpf_reg_state *reg = &env->cur_state.regs[regno];
1107 +
1108 + return reg->type == PTR_TO_CTX;
1109 +}
1110 +
1111 static int check_ptr_alignment(struct bpf_verifier_env *env,
1112 struct bpf_reg_state *reg, int off, int size)
1113 {
1114 @@ -896,6 +903,12 @@ static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn)
1115 return -EACCES;
1116 }
1117
1118 + if (is_ctx_reg(env, insn->dst_reg)) {
1119 + verbose("BPF_XADD stores into R%d context is not allowed\n",
1120 + insn->dst_reg);
1121 + return -EACCES;
1122 + }
1123 +
1124 /* check whether atomic_add can read the memory */
1125 err = check_mem_access(env, insn->dst_reg, insn->off,
1126 BPF_SIZE(insn->code), BPF_READ, -1);
1127 @@ -1843,6 +1856,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
1128 return -EINVAL;
1129 }
1130
1131 + if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) {
1132 + verbose("BPF_ARSH not supported for 32 bit ALU\n");
1133 + return -EINVAL;
1134 + }
1135 +
1136 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
1137 opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
1138 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
1139 @@ -3007,6 +3025,12 @@ static int do_check(struct bpf_verifier_env *env)
1140 if (err)
1141 return err;
1142
1143 + if (is_ctx_reg(env, insn->dst_reg)) {
1144 + verbose("BPF_ST stores into R%d context is not allowed\n",
1145 + insn->dst_reg);
1146 + return -EACCES;
1147 + }
1148 +
1149 /* check that memory (dst_reg + off) is writeable */
1150 err = check_mem_access(env, insn->dst_reg, insn->off,
1151 BPF_SIZE(insn->code), BPF_WRITE,
1152 @@ -3386,6 +3410,24 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
1153
1154
1155 for (i = 0; i < insn_cnt; i++, insn++) {
1156 + if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
1157 + insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
1158 + /* due to JIT bugs clear upper 32-bits of src register
1159 + * before div/mod operation
1160 + */
1161 + insn_buf[0] = BPF_MOV32_REG(insn->src_reg, insn->src_reg);
1162 + insn_buf[1] = *insn;
1163 + cnt = 2;
1164 + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
1165 + if (!new_prog)
1166 + return -ENOMEM;
1167 +
1168 + delta += cnt - 1;
1169 + env->prog = prog = new_prog;
1170 + insn = new_prog->insnsi + i + delta;
1171 + continue;
1172 + }
1173 +
1174 if (insn->code != (BPF_JMP | BPF_CALL))
1175 continue;
1176
1177 diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
1178 index eeb7f2f5698d..54fd2fed36e9 100644
1179 --- a/kernel/time/hrtimer.c
1180 +++ b/kernel/time/hrtimer.c
1181 @@ -652,7 +652,9 @@ static void hrtimer_reprogram(struct hrtimer *timer,
1182 static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
1183 {
1184 base->expires_next.tv64 = KTIME_MAX;
1185 + base->hang_detected = 0;
1186 base->hres_active = 0;
1187 + base->next_timer = NULL;
1188 }
1189
1190 /*
1191 @@ -1610,6 +1612,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
1192 timerqueue_init_head(&cpu_base->clock_base[i].active);
1193 }
1194
1195 + cpu_base->active_bases = 0;
1196 cpu_base->cpu = cpu;
1197 hrtimer_init_hres(cpu_base);
1198 return 0;
1199 diff --git a/lib/test_bpf.c b/lib/test_bpf.c
1200 index 2e385026915c..98da7520a6aa 100644
1201 --- a/lib/test_bpf.c
1202 +++ b/lib/test_bpf.c
1203 @@ -5646,9 +5646,8 @@ static struct bpf_prog *generate_filter(int which, int *err)
1204 return NULL;
1205 }
1206 }
1207 - /* We don't expect to fail. */
1208 if (*err) {
1209 - pr_cont("FAIL to attach err=%d len=%d\n",
1210 + pr_cont("FAIL to prog_create err=%d len=%d\n",
1211 *err, fprog.len);
1212 return NULL;
1213 }
1214 @@ -5671,6 +5670,10 @@ static struct bpf_prog *generate_filter(int which, int *err)
1215 * checks.
1216 */
1217 fp = bpf_prog_select_runtime(fp, err);
1218 + if (*err) {
1219 + pr_cont("FAIL to select_runtime err=%d\n", *err);
1220 + return NULL;
1221 + }
1222 break;
1223 }
1224
1225 @@ -5856,8 +5859,8 @@ static __init int test_bpf(void)
1226 pass_cnt++;
1227 continue;
1228 }
1229 -
1230 - return err;
1231 + err_cnt++;
1232 + continue;
1233 }
1234
1235 pr_cont("jited:%u ", fp->jited);
1236 diff --git a/mm/cma.c b/mm/cma.c
1237 index c960459eda7e..397687fc51f9 100644
1238 --- a/mm/cma.c
1239 +++ b/mm/cma.c
1240 @@ -54,7 +54,7 @@ unsigned long cma_get_size(const struct cma *cma)
1241 }
1242
1243 static unsigned long cma_bitmap_aligned_mask(const struct cma *cma,
1244 - int align_order)
1245 + unsigned int align_order)
1246 {
1247 if (align_order <= cma->order_per_bit)
1248 return 0;
1249 @@ -62,17 +62,14 @@ static unsigned long cma_bitmap_aligned_mask(const struct cma *cma,
1250 }
1251
1252 /*
1253 - * Find a PFN aligned to the specified order and return an offset represented in
1254 - * order_per_bits.
1255 + * Find the offset of the base PFN from the specified align_order.
1256 + * The value returned is represented in order_per_bits.
1257 */
1258 static unsigned long cma_bitmap_aligned_offset(const struct cma *cma,
1259 - int align_order)
1260 + unsigned int align_order)
1261 {
1262 - if (align_order <= cma->order_per_bit)
1263 - return 0;
1264 -
1265 - return (ALIGN(cma->base_pfn, (1UL << align_order))
1266 - - cma->base_pfn) >> cma->order_per_bit;
1267 + return (cma->base_pfn & ((1UL << align_order) - 1))
1268 + >> cma->order_per_bit;
1269 }
1270
1271 static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma,
1272 diff --git a/mm/internal.h b/mm/internal.h
1273 index 34a5459e5989..3e2d01694747 100644
1274 --- a/mm/internal.h
1275 +++ b/mm/internal.h
1276 @@ -73,6 +73,12 @@ static inline void set_page_refcounted(struct page *page)
1277
1278 extern unsigned long highest_memmap_pfn;
1279
1280 +/*
1281 + * Maximum number of reclaim retries without progress before the OOM
1282 + * killer is consider the only way forward.
1283 + */
1284 +#define MAX_RECLAIM_RETRIES 16
1285 +
1286 /*
1287 * in mm/vmscan.c:
1288 */
1289 diff --git a/mm/memcontrol.c b/mm/memcontrol.c
1290 index 2a800c4a39bd..50088150fc17 100644
1291 --- a/mm/memcontrol.c
1292 +++ b/mm/memcontrol.c
1293 @@ -5531,7 +5531,7 @@ static void uncharge_list(struct list_head *page_list)
1294 next = page->lru.next;
1295
1296 VM_BUG_ON_PAGE(PageLRU(page), page);
1297 - VM_BUG_ON_PAGE(page_count(page), page);
1298 + VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page);
1299
1300 if (!page->mem_cgroup)
1301 continue;
1302 diff --git a/mm/memory-failure.c b/mm/memory-failure.c
1303 index ce7d416edab7..5aa71a82ca73 100644
1304 --- a/mm/memory-failure.c
1305 +++ b/mm/memory-failure.c
1306 @@ -535,6 +535,13 @@ static int delete_from_lru_cache(struct page *p)
1307 */
1308 ClearPageActive(p);
1309 ClearPageUnevictable(p);
1310 +
1311 + /*
1312 + * Poisoned page might never drop its ref count to 0 so we have
1313 + * to uncharge it manually from its memcg.
1314 + */
1315 + mem_cgroup_uncharge(p);
1316 +
1317 /*
1318 * drop the page count elevated by isolate_lru_page()
1319 */
1320 diff --git a/mm/mmap.c b/mm/mmap.c
1321 index 5b48adb4aa56..45ac5b973459 100644
1322 --- a/mm/mmap.c
1323 +++ b/mm/mmap.c
1324 @@ -2240,7 +2240,8 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1325 gap_addr = TASK_SIZE;
1326
1327 next = vma->vm_next;
1328 - if (next && next->vm_start < gap_addr) {
1329 + if (next && next->vm_start < gap_addr &&
1330 + (next->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
1331 if (!(next->vm_flags & VM_GROWSUP))
1332 return -ENOMEM;
1333 /* Check that both stack segments have the same anon_vma? */
1334 @@ -2324,7 +2325,8 @@ int expand_downwards(struct vm_area_struct *vma,
1335 if (gap_addr > address)
1336 return -ENOMEM;
1337 prev = vma->vm_prev;
1338 - if (prev && prev->vm_end > gap_addr) {
1339 + if (prev && prev->vm_end > gap_addr &&
1340 + (prev->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
1341 if (!(prev->vm_flags & VM_GROWSDOWN))
1342 return -ENOMEM;
1343 /* Check that both stack segments have the same anon_vma? */
1344 diff --git a/mm/page_alloc.c b/mm/page_alloc.c
1345 index fbc38888252b..94018ea5f935 100644
1346 --- a/mm/page_alloc.c
1347 +++ b/mm/page_alloc.c
1348 @@ -2821,9 +2821,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
1349 if (!area->nr_free)
1350 continue;
1351
1352 - if (alloc_harder)
1353 - return true;
1354 -
1355 for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) {
1356 if (!list_empty(&area->free_list[mt]))
1357 return true;
1358 @@ -2835,6 +2832,9 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
1359 return true;
1360 }
1361 #endif
1362 + if (alloc_harder &&
1363 + !list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
1364 + return true;
1365 }
1366 return false;
1367 }
1368 @@ -3421,12 +3421,6 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
1369 return false;
1370 }
1371
1372 -/*
1373 - * Maximum number of reclaim retries without any progress before OOM killer
1374 - * is consider as the only way to move forward.
1375 - */
1376 -#define MAX_RECLAIM_RETRIES 16
1377 -
1378 /*
1379 * Checks whether it makes sense to retry the reclaim to make a forward progress
1380 * for the given allocation request.
1381 @@ -4385,7 +4379,8 @@ void show_free_areas(unsigned int filter)
1382 K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
1383 K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
1384 node_page_state(pgdat, NR_PAGES_SCANNED),
1385 - !pgdat_reclaimable(pgdat) ? "yes" : "no");
1386 + pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
1387 + "yes" : "no");
1388 }
1389
1390 for_each_populated_zone(zone) {
1391 diff --git a/mm/vmscan.c b/mm/vmscan.c
1392 index 30a88b945a44..f118dc23f662 100644
1393 --- a/mm/vmscan.c
1394 +++ b/mm/vmscan.c
1395 @@ -2606,6 +2606,15 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
1396 } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
1397 sc->nr_scanned - nr_scanned, sc));
1398
1399 + /*
1400 + * Kswapd gives up on balancing particular nodes after too
1401 + * many failures to reclaim anything from them and goes to
1402 + * sleep. On reclaim progress, reset the failure counter. A
1403 + * successful direct reclaim run will revive a dormant kswapd.
1404 + */
1405 + if (reclaimable)
1406 + pgdat->kswapd_failures = 0;
1407 +
1408 return reclaimable;
1409 }
1410
1411 @@ -2680,10 +2689,6 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
1412 GFP_KERNEL | __GFP_HARDWALL))
1413 continue;
1414
1415 - if (sc->priority != DEF_PRIORITY &&
1416 - !pgdat_reclaimable(zone->zone_pgdat))
1417 - continue; /* Let kswapd poll it */
1418 -
1419 /*
1420 * If we already have plenty of memory free for
1421 * compaction in this zone, don't free any more.
1422 @@ -2820,7 +2825,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1423 return 0;
1424 }
1425
1426 -static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
1427 +static bool allow_direct_reclaim(pg_data_t *pgdat)
1428 {
1429 struct zone *zone;
1430 unsigned long pfmemalloc_reserve = 0;
1431 @@ -2828,6 +2833,9 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
1432 int i;
1433 bool wmark_ok;
1434
1435 + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
1436 + return true;
1437 +
1438 for (i = 0; i <= ZONE_NORMAL; i++) {
1439 zone = &pgdat->node_zones[i];
1440 if (!managed_zone(zone) ||
1441 @@ -2908,7 +2916,7 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
1442
1443 /* Throttle based on the first usable node */
1444 pgdat = zone->zone_pgdat;
1445 - if (pfmemalloc_watermark_ok(pgdat))
1446 + if (allow_direct_reclaim(pgdat))
1447 goto out;
1448 break;
1449 }
1450 @@ -2930,14 +2938,14 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
1451 */
1452 if (!(gfp_mask & __GFP_FS)) {
1453 wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
1454 - pfmemalloc_watermark_ok(pgdat), HZ);
1455 + allow_direct_reclaim(pgdat), HZ);
1456
1457 goto check_pending;
1458 }
1459
1460 /* Throttle until kswapd wakes the process */
1461 wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
1462 - pfmemalloc_watermark_ok(pgdat));
1463 + allow_direct_reclaim(pgdat));
1464
1465 check_pending:
1466 if (fatal_signal_pending(current))
1467 @@ -3116,7 +3124,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx)
1468
1469 /*
1470 * The throttled processes are normally woken up in balance_pgdat() as
1471 - * soon as pfmemalloc_watermark_ok() is true. But there is a potential
1472 + * soon as allow_direct_reclaim() is true. But there is a potential
1473 * race between when kswapd checks the watermarks and a process gets
1474 * throttled. There is also a potential race if processes get
1475 * throttled, kswapd wakes, a large process exits thereby balancing the
1476 @@ -3130,6 +3138,10 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx)
1477 if (waitqueue_active(&pgdat->pfmemalloc_wait))
1478 wake_up_all(&pgdat->pfmemalloc_wait);
1479
1480 + /* Hopeless node, leave it to direct reclaim */
1481 + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
1482 + return true;
1483 +
1484 for (i = 0; i <= classzone_idx; i++) {
1485 struct zone *zone = pgdat->node_zones + i;
1486
1487 @@ -3216,9 +3228,9 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
1488 count_vm_event(PAGEOUTRUN);
1489
1490 do {
1491 + unsigned long nr_reclaimed = sc.nr_reclaimed;
1492 bool raise_priority = true;
1493
1494 - sc.nr_reclaimed = 0;
1495 sc.reclaim_idx = classzone_idx;
1496
1497 /*
1498 @@ -3297,7 +3309,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
1499 * able to safely make forward progress. Wake them
1500 */
1501 if (waitqueue_active(&pgdat->pfmemalloc_wait) &&
1502 - pfmemalloc_watermark_ok(pgdat))
1503 + allow_direct_reclaim(pgdat))
1504 wake_up_all(&pgdat->pfmemalloc_wait);
1505
1506 /* Check if kswapd should be suspending */
1507 @@ -3308,10 +3320,14 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
1508 * Raise priority if scanning rate is too low or there was no
1509 * progress in reclaiming pages
1510 */
1511 - if (raise_priority || !sc.nr_reclaimed)
1512 + nr_reclaimed = sc.nr_reclaimed - nr_reclaimed;
1513 + if (raise_priority || !nr_reclaimed)
1514 sc.priority--;
1515 } while (sc.priority >= 1);
1516
1517 + if (!sc.nr_reclaimed)
1518 + pgdat->kswapd_failures++;
1519 +
1520 out:
1521 /*
1522 * Return the order kswapd stopped reclaiming at as
1523 @@ -3511,6 +3527,10 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
1524 if (!waitqueue_active(&pgdat->kswapd_wait))
1525 return;
1526
1527 + /* Hopeless node, leave it to direct reclaim */
1528 + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
1529 + return;
1530 +
1531 /* Only wake kswapd if all zones are unbalanced */
1532 for (z = 0; z <= classzone_idx; z++) {
1533 zone = pgdat->node_zones + z;
1534 @@ -3781,9 +3801,6 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
1535 sum_zone_node_page_state(pgdat->node_id, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages)
1536 return NODE_RECLAIM_FULL;
1537
1538 - if (!pgdat_reclaimable(pgdat))
1539 - return NODE_RECLAIM_FULL;
1540 -
1541 /*
1542 * Do not scan if the allocation should not be delayed.
1543 */
1544 diff --git a/mm/vmstat.c b/mm/vmstat.c
1545 index 6a088df04b29..3863b5d6d598 100644
1546 --- a/mm/vmstat.c
1547 +++ b/mm/vmstat.c
1548 @@ -1421,7 +1421,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1549 "\n node_unreclaimable: %u"
1550 "\n start_pfn: %lu"
1551 "\n node_inactive_ratio: %u",
1552 - !pgdat_reclaimable(zone->zone_pgdat),
1553 + pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
1554 zone->zone_start_pfn,
1555 zone->zone_pgdat->inactive_ratio);
1556 seq_putc(m, '\n');
1557 diff --git a/net/can/af_can.c b/net/can/af_can.c
1558 index 5488e4a6ccd0..ac1552d8b4ad 100644
1559 --- a/net/can/af_can.c
1560 +++ b/net/can/af_can.c
1561 @@ -722,13 +722,12 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
1562 if (unlikely(!net_eq(dev_net(dev), &init_net)))
1563 goto drop;
1564
1565 - if (WARN_ONCE(dev->type != ARPHRD_CAN ||
1566 - skb->len != CAN_MTU ||
1567 - cfd->len > CAN_MAX_DLEN,
1568 - "PF_CAN: dropped non conform CAN skbuf: "
1569 - "dev type %d, len %d, datalen %d\n",
1570 - dev->type, skb->len, cfd->len))
1571 + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU ||
1572 + cfd->len > CAN_MAX_DLEN)) {
1573 + pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n",
1574 + dev->type, skb->len, cfd->len);
1575 goto drop;
1576 + }
1577
1578 can_receive(skb, dev);
1579 return NET_RX_SUCCESS;
1580 @@ -746,13 +745,12 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
1581 if (unlikely(!net_eq(dev_net(dev), &init_net)))
1582 goto drop;
1583
1584 - if (WARN_ONCE(dev->type != ARPHRD_CAN ||
1585 - skb->len != CANFD_MTU ||
1586 - cfd->len > CANFD_MAX_DLEN,
1587 - "PF_CAN: dropped non conform CAN FD skbuf: "
1588 - "dev type %d, len %d, datalen %d\n",
1589 - dev->type, skb->len, cfd->len))
1590 + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU ||
1591 + cfd->len > CANFD_MAX_DLEN)) {
1592 + pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n",
1593 + dev->type, skb->len, cfd->len);
1594 goto drop;
1595 + }
1596
1597 can_receive(skb, dev);
1598 return NET_RX_SUCCESS;
1599 diff --git a/net/core/dev.c b/net/core/dev.c
1600 index 09007a71c8dd..67b5d4d8acb1 100644
1601 --- a/net/core/dev.c
1602 +++ b/net/core/dev.c
1603 @@ -3083,10 +3083,21 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
1604 hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
1605
1606 /* + transport layer */
1607 - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
1608 - hdr_len += tcp_hdrlen(skb);
1609 - else
1610 - hdr_len += sizeof(struct udphdr);
1611 + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
1612 + const struct tcphdr *th;
1613 + struct tcphdr _tcphdr;
1614 +
1615 + th = skb_header_pointer(skb, skb_transport_offset(skb),
1616 + sizeof(_tcphdr), &_tcphdr);
1617 + if (likely(th))
1618 + hdr_len += __tcp_hdrlen(th);
1619 + } else {
1620 + struct udphdr _udphdr;
1621 +
1622 + if (skb_header_pointer(skb, skb_transport_offset(skb),
1623 + sizeof(_udphdr), &_udphdr))
1624 + hdr_len += sizeof(struct udphdr);
1625 + }
1626
1627 if (shinfo->gso_type & SKB_GSO_DODGY)
1628 gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
1629 diff --git a/net/core/filter.c b/net/core/filter.c
1630 index 4eb4ce0aeef4..e8c89d2d2bc0 100644
1631 --- a/net/core/filter.c
1632 +++ b/net/core/filter.c
1633 @@ -441,6 +441,10 @@ static int bpf_convert_filter(struct sock_filter *prog, int len,
1634 convert_bpf_extensions(fp, &insn))
1635 break;
1636
1637 + if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
1638 + fp->code == (BPF_ALU | BPF_MOD | BPF_X))
1639 + *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
1640 +
1641 *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
1642 break;
1643
1644 @@ -1005,11 +1009,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
1645 */
1646 goto out_err_free;
1647
1648 - /* We are guaranteed to never error here with cBPF to eBPF
1649 - * transitions, since there's no issue with type compatibility
1650 - * checks on program arrays.
1651 - */
1652 fp = bpf_prog_select_runtime(fp, &err);
1653 + if (err)
1654 + goto out_err_free;
1655
1656 kfree(old_prog);
1657 return fp;
1658 diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
1659 index 32e4e0158846..862d63ec56e4 100644
1660 --- a/net/core/flow_dissector.c
1661 +++ b/net/core/flow_dissector.c
1662 @@ -550,8 +550,8 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
1663 out_good:
1664 ret = true;
1665
1666 - key_control->thoff = (u16)nhoff;
1667 out:
1668 + key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
1669 key_basic->n_proto = proto;
1670 key_basic->ip_proto = ip_proto;
1671
1672 @@ -559,7 +559,6 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
1673
1674 out_bad:
1675 ret = false;
1676 - key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
1677 goto out;
1678 }
1679 EXPORT_SYMBOL(__skb_flow_dissect);
1680 diff --git a/net/core/neighbour.c b/net/core/neighbour.c
1681 index f45f6198851f..7b315663f840 100644
1682 --- a/net/core/neighbour.c
1683 +++ b/net/core/neighbour.c
1684 @@ -496,7 +496,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
1685 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
1686 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
1687
1688 - hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
1689 + hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
1690
1691 if (n->parms->dead) {
1692 rc = ERR_PTR(-EINVAL);
1693 @@ -508,7 +508,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
1694 n1 != NULL;
1695 n1 = rcu_dereference_protected(n1->next,
1696 lockdep_is_held(&tbl->lock))) {
1697 - if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
1698 + if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
1699 if (want_ref)
1700 neigh_hold(n1);
1701 rc = n1;
1702 diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
1703 index a7f05f0130e8..1b4619008c4e 100644
1704 --- a/net/core/sysctl_net_core.c
1705 +++ b/net/core/sysctl_net_core.c
1706 @@ -292,7 +292,13 @@ static struct ctl_table net_core_table[] = {
1707 .data = &bpf_jit_enable,
1708 .maxlen = sizeof(int),
1709 .mode = 0644,
1710 +#ifndef CONFIG_BPF_JIT_ALWAYS_ON
1711 .proc_handler = proc_dointvec
1712 +#else
1713 + .proc_handler = proc_dointvec_minmax,
1714 + .extra1 = &one,
1715 + .extra2 = &one,
1716 +#endif
1717 },
1718 # ifdef CONFIG_HAVE_EBPF_JIT
1719 {
1720 diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
1721 index 5e3a7302f774..7753681195c1 100644
1722 --- a/net/dccp/ccids/ccid2.c
1723 +++ b/net/dccp/ccids/ccid2.c
1724 @@ -140,6 +140,9 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
1725
1726 ccid2_pr_debug("RTO_EXPIRE\n");
1727
1728 + if (sk->sk_state == DCCP_CLOSED)
1729 + goto out;
1730 +
1731 /* back-off timer */
1732 hc->tx_rto <<= 1;
1733 if (hc->tx_rto > DCCP_RTO_MAX)
1734 diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
1735 index 51b27ae09fbd..e60517eb1c3a 100644
1736 --- a/net/ipv4/arp.c
1737 +++ b/net/ipv4/arp.c
1738 @@ -223,11 +223,16 @@ static bool arp_key_eq(const struct neighbour *neigh, const void *pkey)
1739
1740 static int arp_constructor(struct neighbour *neigh)
1741 {
1742 - __be32 addr = *(__be32 *)neigh->primary_key;
1743 + __be32 addr;
1744 struct net_device *dev = neigh->dev;
1745 struct in_device *in_dev;
1746 struct neigh_parms *parms;
1747 + u32 inaddr_any = INADDR_ANY;
1748
1749 + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
1750 + memcpy(neigh->primary_key, &inaddr_any, arp_tbl.key_len);
1751 +
1752 + addr = *(__be32 *)neigh->primary_key;
1753 rcu_read_lock();
1754 in_dev = __in_dev_get_rcu(dev);
1755 if (!in_dev) {
1756 diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
1757 index 7bff0c65046f..9c7a4cea1628 100644
1758 --- a/net/ipv4/igmp.c
1759 +++ b/net/ipv4/igmp.c
1760 @@ -332,7 +332,7 @@ static __be32 igmpv3_get_srcaddr(struct net_device *dev,
1761 return htonl(INADDR_ANY);
1762
1763 for_ifa(in_dev) {
1764 - if (inet_ifa_match(fl4->saddr, ifa))
1765 + if (fl4->saddr == ifa->ifa_local)
1766 return fl4->saddr;
1767 } endfor_ifa(in_dev);
1768
1769 diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
1770 index 05d2bde00864..7efa6b062049 100644
1771 --- a/net/ipv4/tcp.c
1772 +++ b/net/ipv4/tcp.c
1773 @@ -2215,6 +2215,9 @@ void tcp_close(struct sock *sk, long timeout)
1774 tcp_send_active_reset(sk, GFP_ATOMIC);
1775 __NET_INC_STATS(sock_net(sk),
1776 LINUX_MIB_TCPABORTONMEMORY);
1777 + } else if (!check_net(sock_net(sk))) {
1778 + /* Not possible to send reset; just close */
1779 + tcp_set_state(sk, TCP_CLOSE);
1780 }
1781 }
1782
1783 diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
1784 index bc68da38ea86..366b1becff9d 100644
1785 --- a/net/ipv4/tcp_offload.c
1786 +++ b/net/ipv4/tcp_offload.c
1787 @@ -32,6 +32,9 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
1788 static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
1789 netdev_features_t features)
1790 {
1791 + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4))
1792 + return ERR_PTR(-EINVAL);
1793 +
1794 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1795 return ERR_PTR(-EINVAL);
1796
1797 diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
1798 index 74db43b47917..69523389f067 100644
1799 --- a/net/ipv4/tcp_timer.c
1800 +++ b/net/ipv4/tcp_timer.c
1801 @@ -50,11 +50,19 @@ static void tcp_write_err(struct sock *sk)
1802 * to prevent DoS attacks. It is called when a retransmission timeout
1803 * or zero probe timeout occurs on orphaned socket.
1804 *
1805 + * Also close if our net namespace is exiting; in that case there is no
1806 + * hope of ever communicating again since all netns interfaces are already
1807 + * down (or about to be down), and we need to release our dst references,
1808 + * which have been moved to the netns loopback interface, so the namespace
1809 + * can finish exiting. This condition is only possible if we are a kernel
1810 + * socket, as those do not hold references to the namespace.
1811 + *
1812 * Criteria is still not confirmed experimentally and may change.
1813 * We kill the socket, if:
1814 * 1. If number of orphaned sockets exceeds an administratively configured
1815 * limit.
1816 * 2. If we have strong memory pressure.
1817 + * 3. If our net namespace is exiting.
1818 */
1819 static int tcp_out_of_resources(struct sock *sk, bool do_reset)
1820 {
1821 @@ -83,6 +91,13 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
1822 __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
1823 return 1;
1824 }
1825 +
1826 + if (!check_net(sock_net(sk))) {
1827 + /* Not possible to send reset; just close */
1828 + tcp_done(sk);
1829 + return 1;
1830 + }
1831 +
1832 return 0;
1833 }
1834
1835 diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
1836 index 6401574cd638..f4f616eaaeb8 100644
1837 --- a/net/ipv4/udp_offload.c
1838 +++ b/net/ipv4/udp_offload.c
1839 @@ -205,6 +205,9 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
1840 goto out;
1841 }
1842
1843 + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
1844 + goto out;
1845 +
1846 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
1847 goto out;
1848
1849 diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
1850 index c46066c5dc27..db2613b4a049 100644
1851 --- a/net/ipv6/ip6_gre.c
1852 +++ b/net/ipv6/ip6_gre.c
1853 @@ -337,11 +337,12 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
1854
1855 nt->dev = dev;
1856 nt->net = dev_net(dev);
1857 - ip6gre_tnl_link_config(nt, 1);
1858
1859 if (register_netdevice(dev) < 0)
1860 goto failed_free;
1861
1862 + ip6gre_tnl_link_config(nt, 1);
1863 +
1864 /* Can use a lockless transmit, unless we generate output sequences */
1865 if (!(nt->parms.o_flags & TUNNEL_SEQ))
1866 dev->features |= NETIF_F_LLTX;
1867 @@ -1263,7 +1264,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
1868
1869 static int ip6gre_tap_init(struct net_device *dev)
1870 {
1871 - struct ip6_tnl *tunnel;
1872 int ret;
1873
1874 ret = ip6gre_tunnel_init_common(dev);
1875 @@ -1272,10 +1272,6 @@ static int ip6gre_tap_init(struct net_device *dev)
1876
1877 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1878
1879 - tunnel = netdev_priv(dev);
1880 -
1881 - ip6gre_tnl_link_config(tunnel, 1);
1882 -
1883 return 0;
1884 }
1885
1886 @@ -1370,7 +1366,6 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
1887
1888 nt->dev = dev;
1889 nt->net = dev_net(dev);
1890 - ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
1891
1892 dev->features |= GRE6_FEATURES;
1893 dev->hw_features |= GRE6_FEATURES;
1894 @@ -1396,6 +1391,11 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
1895 if (err)
1896 goto out;
1897
1898 + ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
1899 +
1900 + if (tb[IFLA_MTU])
1901 + ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
1902 +
1903 dev_hold(dev);
1904 ip6gre_tunnel_link(ign, nt);
1905
1906 diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
1907 index 388584b8ff31..2e3db3619858 100644
1908 --- a/net/ipv6/ip6_output.c
1909 +++ b/net/ipv6/ip6_output.c
1910 @@ -156,7 +156,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1911 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1912 }
1913
1914 -static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
1915 +bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
1916 {
1917 if (!np->autoflowlabel_set)
1918 return ip6_default_np_autolabel(net);
1919 @@ -1260,14 +1260,16 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1920 v6_cork->tclass = ipc6->tclass;
1921 if (rt->dst.flags & DST_XFRM_TUNNEL)
1922 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1923 - rt->dst.dev->mtu : dst_mtu(&rt->dst);
1924 + READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1925 else
1926 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1927 - rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1928 + READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path);
1929 if (np->frag_size < mtu) {
1930 if (np->frag_size)
1931 mtu = np->frag_size;
1932 }
1933 + if (mtu < IPV6_MIN_MTU)
1934 + return -EINVAL;
1935 cork->base.fragsize = mtu;
1936 if (dst_allfrag(rt->dst.path))
1937 cork->base.flags |= IPCORK_ALLFRAG;
1938 @@ -1798,6 +1800,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
1939 cork.base.flags = 0;
1940 cork.base.addr = 0;
1941 cork.base.opt = NULL;
1942 + cork.base.dst = NULL;
1943 v6_cork.opt = NULL;
1944 err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
1945 if (err) {
1946 diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
1947 index 6e3871c7f8f7..bcea985dd76b 100644
1948 --- a/net/ipv6/ipv6_sockglue.c
1949 +++ b/net/ipv6/ipv6_sockglue.c
1950 @@ -1316,7 +1316,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
1951 break;
1952
1953 case IPV6_AUTOFLOWLABEL:
1954 - val = np->autoflowlabel;
1955 + val = ip6_autoflowlabel(sock_net(sk), np);
1956 break;
1957
1958 default:
1959 diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
1960 index d883c9204c01..278e49cd67d4 100644
1961 --- a/net/ipv6/tcpv6_offload.c
1962 +++ b/net/ipv6/tcpv6_offload.c
1963 @@ -46,6 +46,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
1964 {
1965 struct tcphdr *th;
1966
1967 + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))
1968 + return ERR_PTR(-EINVAL);
1969 +
1970 if (!pskb_may_pull(skb, sizeof(*th)))
1971 return ERR_PTR(-EINVAL);
1972
1973 diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
1974 index e7d378c032cb..2bd2087bd105 100644
1975 --- a/net/ipv6/udp_offload.c
1976 +++ b/net/ipv6/udp_offload.c
1977 @@ -55,6 +55,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
1978 const struct ipv6hdr *ipv6h;
1979 struct udphdr *uh;
1980
1981 + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
1982 + goto out;
1983 +
1984 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
1985 goto out;
1986
1987 diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
1988 index 28d065394c09..3f499126727c 100644
1989 --- a/net/netfilter/nfnetlink_cthelper.c
1990 +++ b/net/netfilter/nfnetlink_cthelper.c
1991 @@ -17,6 +17,7 @@
1992 #include <linux/types.h>
1993 #include <linux/list.h>
1994 #include <linux/errno.h>
1995 +#include <linux/capability.h>
1996 #include <net/netlink.h>
1997 #include <net/sock.h>
1998
1999 @@ -392,6 +393,9 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
2000 struct nfnl_cthelper *nlcth;
2001 int ret = 0;
2002
2003 + if (!capable(CAP_NET_ADMIN))
2004 + return -EPERM;
2005 +
2006 if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
2007 return -EINVAL;
2008
2009 @@ -595,6 +599,9 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
2010 struct nfnl_cthelper *nlcth;
2011 bool tuple_set = false;
2012
2013 + if (!capable(CAP_NET_ADMIN))
2014 + return -EPERM;
2015 +
2016 if (nlh->nlmsg_flags & NLM_F_DUMP) {
2017 struct netlink_dump_control c = {
2018 .dump = nfnl_cthelper_dump_table,
2019 @@ -661,6 +668,9 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
2020 struct nfnl_cthelper *nlcth, *n;
2021 int j = 0, ret;
2022
2023 + if (!capable(CAP_NET_ADMIN))
2024 + return -EPERM;
2025 +
2026 if (tb[NFCTH_NAME])
2027 helper_name = nla_data(tb[NFCTH_NAME]);
2028
2029 diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
2030 index 2455b69b5810..b589a62e68a2 100644
2031 --- a/net/netfilter/xt_osf.c
2032 +++ b/net/netfilter/xt_osf.c
2033 @@ -19,6 +19,7 @@
2034 #include <linux/module.h>
2035 #include <linux/kernel.h>
2036
2037 +#include <linux/capability.h>
2038 #include <linux/if.h>
2039 #include <linux/inetdevice.h>
2040 #include <linux/ip.h>
2041 @@ -69,6 +70,9 @@ static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
2042 struct xt_osf_finger *kf = NULL, *sf;
2043 int err = 0;
2044
2045 + if (!capable(CAP_NET_ADMIN))
2046 + return -EPERM;
2047 +
2048 if (!osf_attrs[OSF_ATTR_FINGER])
2049 return -EINVAL;
2050
2051 @@ -113,6 +117,9 @@ static int xt_osf_remove_callback(struct net *net, struct sock *ctnl,
2052 struct xt_osf_finger *sf;
2053 int err = -ENOENT;
2054
2055 + if (!capable(CAP_NET_ADMIN))
2056 + return -EPERM;
2057 +
2058 if (!osf_attrs[OSF_ATTR_FINGER])
2059 return -EINVAL;
2060
2061 diff --git a/net/sctp/offload.c b/net/sctp/offload.c
2062 index 4f5a2b580aa5..6300f28c9588 100644
2063 --- a/net/sctp/offload.c
2064 +++ b/net/sctp/offload.c
2065 @@ -44,6 +44,9 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
2066 struct sk_buff *segs = ERR_PTR(-EINVAL);
2067 struct sctphdr *sh;
2068
2069 + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP))
2070 + goto out;
2071 +
2072 sh = sctp_hdr(skb);
2073 if (!pskb_may_pull(skb, sizeof(*sh)))
2074 goto out;
2075 diff --git a/net/sctp/socket.c b/net/sctp/socket.c
2076 index 7181ce6c62bf..c472b8391dde 100644
2077 --- a/net/sctp/socket.c
2078 +++ b/net/sctp/socket.c
2079 @@ -83,7 +83,7 @@
2080 static int sctp_writeable(struct sock *sk);
2081 static void sctp_wfree(struct sk_buff *skb);
2082 static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
2083 - size_t msg_len, struct sock **orig_sk);
2084 + size_t msg_len);
2085 static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
2086 static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
2087 static int sctp_wait_for_accept(struct sock *sk, long timeo);
2088 @@ -332,16 +332,14 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
2089 if (len < sizeof (struct sockaddr))
2090 return NULL;
2091
2092 + if (!opt->pf->af_supported(addr->sa.sa_family, opt))
2093 + return NULL;
2094 +
2095 /* V4 mapped address are really of AF_INET family */
2096 if (addr->sa.sa_family == AF_INET6 &&
2097 - ipv6_addr_v4mapped(&addr->v6.sin6_addr)) {
2098 - if (!opt->pf->af_supported(AF_INET, opt))
2099 - return NULL;
2100 - } else {
2101 - /* Does this PF support this AF? */
2102 - if (!opt->pf->af_supported(addr->sa.sa_family, opt))
2103 - return NULL;
2104 - }
2105 + ipv6_addr_v4mapped(&addr->v6.sin6_addr) &&
2106 + !opt->pf->af_supported(AF_INET, opt))
2107 + return NULL;
2108
2109 /* If we get this far, af is valid. */
2110 af = sctp_get_af_specific(addr->sa.sa_family);
2111 @@ -1958,7 +1956,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
2112 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
2113 if (!sctp_wspace(asoc)) {
2114 /* sk can be changed by peel off when waiting for buf. */
2115 - err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk);
2116 + err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
2117 if (err) {
2118 if (err == -ESRCH) {
2119 /* asoc is already dead. */
2120 @@ -7441,12 +7439,12 @@ void sctp_sock_rfree(struct sk_buff *skb)
2121
2122 /* Helper function to wait for space in the sndbuf. */
2123 static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
2124 - size_t msg_len, struct sock **orig_sk)
2125 + size_t msg_len)
2126 {
2127 struct sock *sk = asoc->base.sk;
2128 - int err = 0;
2129 long current_timeo = *timeo_p;
2130 DEFINE_WAIT(wait);
2131 + int err = 0;
2132
2133 pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc,
2134 *timeo_p, msg_len);
2135 @@ -7475,17 +7473,13 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
2136 release_sock(sk);
2137 current_timeo = schedule_timeout(current_timeo);
2138 lock_sock(sk);
2139 - if (sk != asoc->base.sk) {
2140 - release_sock(sk);
2141 - sk = asoc->base.sk;
2142 - lock_sock(sk);
2143 - }
2144 + if (sk != asoc->base.sk)
2145 + goto do_error;
2146
2147 *timeo_p = current_timeo;
2148 }
2149
2150 out:
2151 - *orig_sk = sk;
2152 finish_wait(&asoc->wait, &wait);
2153
2154 /* Release the association's refcnt. */
2155 diff --git a/net/socket.c b/net/socket.c
2156 index 05f13b24572c..bd3b33988ee0 100644
2157 --- a/net/socket.c
2158 +++ b/net/socket.c
2159 @@ -2548,6 +2548,15 @@ static int __init sock_init(void)
2160
2161 core_initcall(sock_init); /* early initcall */
2162
2163 +static int __init jit_init(void)
2164 +{
2165 +#ifdef CONFIG_BPF_JIT_ALWAYS_ON
2166 + bpf_jit_enable = 1;
2167 +#endif
2168 + return 0;
2169 +}
2170 +pure_initcall(jit_init);
2171 +
2172 #ifdef CONFIG_PROC_FS
2173 void socket_seq_show(struct seq_file *seq)
2174 {
2175 diff --git a/net/tipc/node.c b/net/tipc/node.c
2176 index 27753325e06e..5b3e1ea37b6d 100644
2177 --- a/net/tipc/node.c
2178 +++ b/net/tipc/node.c
2179 @@ -1848,36 +1848,38 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info)
2180
2181 if (strcmp(name, tipc_bclink_name) == 0) {
2182 err = tipc_nl_add_bc_link(net, &msg);
2183 - if (err) {
2184 - nlmsg_free(msg.skb);
2185 - return err;
2186 - }
2187 + if (err)
2188 + goto err_free;
2189 } else {
2190 int bearer_id;
2191 struct tipc_node *node;
2192 struct tipc_link *link;
2193
2194 node = tipc_node_find_by_name(net, name, &bearer_id);
2195 - if (!node)
2196 - return -EINVAL;
2197 + if (!node) {
2198 + err = -EINVAL;
2199 + goto err_free;
2200 + }
2201
2202 tipc_node_read_lock(node);
2203 link = node->links[bearer_id].link;
2204 if (!link) {
2205 tipc_node_read_unlock(node);
2206 - nlmsg_free(msg.skb);
2207 - return -EINVAL;
2208 + err = -EINVAL;
2209 + goto err_free;
2210 }
2211
2212 err = __tipc_nl_add_link(net, &msg, link, 0);
2213 tipc_node_read_unlock(node);
2214 - if (err) {
2215 - nlmsg_free(msg.skb);
2216 - return err;
2217 - }
2218 + if (err)
2219 + goto err_free;
2220 }
2221
2222 return genlmsg_reply(msg.skb, info);
2223 +
2224 +err_free:
2225 + nlmsg_free(msg.skb);
2226 + return err;
2227 }
2228
2229 int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
2230 diff --git a/tools/usb/usbip/libsrc/usbip_common.c b/tools/usb/usbip/libsrc/usbip_common.c
2231 index ac73710473de..1517a232ab18 100644
2232 --- a/tools/usb/usbip/libsrc/usbip_common.c
2233 +++ b/tools/usb/usbip/libsrc/usbip_common.c
2234 @@ -215,9 +215,16 @@ int read_usb_interface(struct usbip_usb_device *udev, int i,
2235 struct usbip_usb_interface *uinf)
2236 {
2237 char busid[SYSFS_BUS_ID_SIZE];
2238 + int size;
2239 struct udev_device *sif;
2240
2241 - sprintf(busid, "%s:%d.%d", udev->busid, udev->bConfigurationValue, i);
2242 + size = snprintf(busid, sizeof(busid), "%s:%d.%d",
2243 + udev->busid, udev->bConfigurationValue, i);
2244 + if (size < 0 || (unsigned int)size >= sizeof(busid)) {
2245 + err("busid length %i >= %lu or < 0", size,
2246 + (long unsigned)sizeof(busid));
2247 + return -1;
2248 + }
2249
2250 sif = udev_device_new_from_subsystem_sysname(udev_context, "usb", busid);
2251 if (!sif) {
2252 diff --git a/tools/usb/usbip/libsrc/usbip_host_common.c b/tools/usb/usbip/libsrc/usbip_host_common.c
2253 index 9d415228883d..6ff7b601f854 100644
2254 --- a/tools/usb/usbip/libsrc/usbip_host_common.c
2255 +++ b/tools/usb/usbip/libsrc/usbip_host_common.c
2256 @@ -40,13 +40,20 @@ struct udev *udev_context;
2257 static int32_t read_attr_usbip_status(struct usbip_usb_device *udev)
2258 {
2259 char status_attr_path[SYSFS_PATH_MAX];
2260 + int size;
2261 int fd;
2262 int length;
2263 char status;
2264 int value = 0;
2265
2266 - snprintf(status_attr_path, SYSFS_PATH_MAX, "%s/usbip_status",
2267 - udev->path);
2268 + size = snprintf(status_attr_path, sizeof(status_attr_path),
2269 + "%s/usbip_status", udev->path);
2270 + if (size < 0 || (unsigned int)size >= sizeof(status_attr_path)) {
2271 + err("usbip_status path length %i >= %lu or < 0", size,
2272 + (long unsigned)sizeof(status_attr_path));
2273 + return -1;
2274 + }
2275 +
2276
2277 fd = open(status_attr_path, O_RDONLY);
2278 if (fd < 0) {
2279 @@ -218,6 +225,7 @@ int usbip_export_device(struct usbip_exported_device *edev, int sockfd)
2280 {
2281 char attr_name[] = "usbip_sockfd";
2282 char sockfd_attr_path[SYSFS_PATH_MAX];
2283 + int size;
2284 char sockfd_buff[30];
2285 int ret;
2286
2287 @@ -237,10 +245,20 @@ int usbip_export_device(struct usbip_exported_device *edev, int sockfd)
2288 }
2289
2290 /* only the first interface is true */
2291 - snprintf(sockfd_attr_path, sizeof(sockfd_attr_path), "%s/%s",
2292 - edev->udev.path, attr_name);
2293 + size = snprintf(sockfd_attr_path, sizeof(sockfd_attr_path), "%s/%s",
2294 + edev->udev.path, attr_name);
2295 + if (size < 0 || (unsigned int)size >= sizeof(sockfd_attr_path)) {
2296 + err("exported device path length %i >= %lu or < 0", size,
2297 + (long unsigned)sizeof(sockfd_attr_path));
2298 + return -1;
2299 + }
2300
2301 - snprintf(sockfd_buff, sizeof(sockfd_buff), "%d\n", sockfd);
2302 + size = snprintf(sockfd_buff, sizeof(sockfd_buff), "%d\n", sockfd);
2303 + if (size < 0 || (unsigned int)size >= sizeof(sockfd_buff)) {
2304 + err("socket length %i >= %lu or < 0", size,
2305 + (long unsigned)sizeof(sockfd_buff));
2306 + return -1;
2307 + }
2308
2309 ret = write_sysfs_attribute(sockfd_attr_path, sockfd_buff,
2310 strlen(sockfd_buff));
2311 diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c
2312 index ad9204773533..1274f326242c 100644
2313 --- a/tools/usb/usbip/libsrc/vhci_driver.c
2314 +++ b/tools/usb/usbip/libsrc/vhci_driver.c
2315 @@ -55,12 +55,12 @@ static int parse_status(const char *value)
2316
2317 while (*c != '\0') {
2318 int port, status, speed, devid;
2319 - unsigned long socket;
2320 + int sockfd;
2321 char lbusid[SYSFS_BUS_ID_SIZE];
2322
2323 - ret = sscanf(c, "%d %d %d %x %lx %31s\n",
2324 + ret = sscanf(c, "%d %d %d %x %u %31s\n",
2325 &port, &status, &speed,
2326 - &devid, &socket, lbusid);
2327 + &devid, &sockfd, lbusid);
2328
2329 if (ret < 5) {
2330 dbg("sscanf failed: %d", ret);
2331 @@ -69,7 +69,7 @@ static int parse_status(const char *value)
2332
2333 dbg("port %d status %d speed %d devid %x",
2334 port, status, speed, devid);
2335 - dbg("socket %lx lbusid %s", socket, lbusid);
2336 + dbg("sockfd %u lbusid %s", sockfd, lbusid);
2337
2338
2339 /* if a device is connected, look at it */
2340 diff --git a/tools/usb/usbip/src/usbip.c b/tools/usb/usbip/src/usbip.c
2341 index d7599d943529..73d8eee8130b 100644
2342 --- a/tools/usb/usbip/src/usbip.c
2343 +++ b/tools/usb/usbip/src/usbip.c
2344 @@ -176,6 +176,8 @@ int main(int argc, char *argv[])
2345 break;
2346 case '?':
2347 printf("usbip: invalid option\n");
2348 + /* Terminate after printing error */
2349 + /* FALLTHRU */
2350 default:
2351 usbip_usage();
2352 goto out;