Magellan Linux

Contents of /trunk/kernel-alx/patches-4.4/0117-4.4.18-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2826 - (show annotations) (download)
Tue Sep 13 07:18:21 2016 UTC (7 years, 7 months ago) by niro
File size: 64900 byte(s)
-linux-4.4.18
1 diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt
2 index 54944c71b819..2a4ee6302122 100644
3 --- a/Documentation/x86/pat.txt
4 +++ b/Documentation/x86/pat.txt
5 @@ -196,3 +196,35 @@ Another, more verbose way of getting PAT related debug messages is with
6 "debugpat" boot parameter. With this parameter, various debug messages are
7 printed to dmesg log.
8
9 +PAT Initialization
10 +------------------
11 +
12 +The following table describes how PAT is initialized under various
13 +configurations. The PAT MSR must be updated by Linux in order to support WC
14 +and WT attributes. Otherwise, the PAT MSR has the value programmed in it
15 +by the firmware. Note, Xen enables WC attribute in the PAT MSR for guests.
16 +
17 + MTRR PAT Call Sequence PAT State PAT MSR
18 + =========================================================
19 + E E MTRR -> PAT init Enabled OS
20 + E D MTRR -> PAT init Disabled -
21 + D E MTRR -> PAT disable Disabled BIOS
22 + D D MTRR -> PAT disable Disabled -
23 + - np/E PAT -> PAT disable Disabled BIOS
24 + - np/D PAT -> PAT disable Disabled -
25 + E !P/E MTRR -> PAT init Disabled BIOS
26 + D !P/E MTRR -> PAT disable Disabled BIOS
27 + !M !P/E MTRR stub -> PAT disable Disabled BIOS
28 +
29 + Legend
30 + ------------------------------------------------
31 + E Feature enabled in CPU
32 + D Feature disabled/unsupported in CPU
33 + np "nopat" boot option specified
34 + !P CONFIG_X86_PAT option unset
35 + !M CONFIG_MTRR option unset
36 + Enabled PAT state set to enabled
37 + Disabled PAT state set to disabled
38 + OS PAT initializes PAT MSR with OS setting
39 + BIOS PAT keeps PAT MSR with BIOS setting
40 +
41 diff --git a/Makefile b/Makefile
42 index 76d34f763a41..eaedea88a8a7 100644
43 --- a/Makefile
44 +++ b/Makefile
45 @@ -1,6 +1,6 @@
46 VERSION = 4
47 PATCHLEVEL = 4
48 -SUBLEVEL = 17
49 +SUBLEVEL = 18
50 EXTRAVERSION =
51 NAME = Blurry Fish Butt
52
53 diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
54 index 087acb569b63..5f221acd21ae 100644
55 --- a/arch/arm/kernel/sys_oabi-compat.c
56 +++ b/arch/arm/kernel/sys_oabi-compat.c
57 @@ -279,8 +279,12 @@ asmlinkage long sys_oabi_epoll_wait(int epfd,
58 mm_segment_t fs;
59 long ret, err, i;
60
61 - if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event)))
62 + if (maxevents <= 0 ||
63 + maxevents > (INT_MAX/sizeof(*kbuf)) ||
64 + maxevents > (INT_MAX/sizeof(*events)))
65 return -EINVAL;
66 + if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents))
67 + return -EFAULT;
68 kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL);
69 if (!kbuf)
70 return -ENOMEM;
71 @@ -317,6 +321,8 @@ asmlinkage long sys_oabi_semtimedop(int semid,
72
73 if (nsops < 1 || nsops > SEMOPM)
74 return -EINVAL;
75 + if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops))
76 + return -EFAULT;
77 sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
78 if (!sops)
79 return -ENOMEM;
80 diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
81 index 5a69eb48d0a8..ee93d5fe61d7 100644
82 --- a/arch/mips/kernel/scall64-n32.S
83 +++ b/arch/mips/kernel/scall64-n32.S
84 @@ -344,7 +344,7 @@ EXPORT(sysn32_call_table)
85 PTR sys_ni_syscall /* available, was setaltroot */
86 PTR sys_add_key
87 PTR sys_request_key
88 - PTR sys_keyctl /* 6245 */
89 + PTR compat_sys_keyctl /* 6245 */
90 PTR sys_set_thread_area
91 PTR sys_inotify_init
92 PTR sys_inotify_add_watch
93 diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
94 index e4b6d7c97822..b77052ec6fb2 100644
95 --- a/arch/mips/kernel/scall64-o32.S
96 +++ b/arch/mips/kernel/scall64-o32.S
97 @@ -500,7 +500,7 @@ EXPORT(sys32_call_table)
98 PTR sys_ni_syscall /* available, was setaltroot */
99 PTR sys_add_key /* 4280 */
100 PTR sys_request_key
101 - PTR sys_keyctl
102 + PTR compat_sys_keyctl
103 PTR sys_set_thread_area
104 PTR sys_inotify_init
105 PTR sys_inotify_add_watch /* 4285 */
106 diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
107 index b1f0a90f933b..42570d8fb265 100644
108 --- a/arch/s390/kernel/ipl.c
109 +++ b/arch/s390/kernel/ipl.c
110 @@ -2070,13 +2070,6 @@ void s390_reset_system(void (*fn_pre)(void),
111 S390_lowcore.program_new_psw.addr =
112 PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
113
114 - /*
115 - * Clear subchannel ID and number to signal new kernel that no CCW or
116 - * SCSI IPL has been done (for kexec and kdump)
117 - */
118 - S390_lowcore.subchannel_id = 0;
119 - S390_lowcore.subchannel_nr = 0;
120 -
121 /* Store status at absolute zero */
122 store_status();
123
124 diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
125 index f17705e1332c..e62f4401e792 100644
126 --- a/arch/x86/entry/syscalls/syscall_32.tbl
127 +++ b/arch/x86/entry/syscalls/syscall_32.tbl
128 @@ -294,7 +294,7 @@
129 # 285 sys_setaltroot
130 286 i386 add_key sys_add_key
131 287 i386 request_key sys_request_key
132 -288 i386 keyctl sys_keyctl
133 +288 i386 keyctl sys_keyctl compat_sys_keyctl
134 289 i386 ioprio_set sys_ioprio_set
135 290 i386 ioprio_get sys_ioprio_get
136 291 i386 inotify_init sys_inotify_init
137 diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
138 index b94f6f64e23d..dbff1456d215 100644
139 --- a/arch/x86/include/asm/mtrr.h
140 +++ b/arch/x86/include/asm/mtrr.h
141 @@ -24,6 +24,7 @@
142 #define _ASM_X86_MTRR_H
143
144 #include <uapi/asm/mtrr.h>
145 +#include <asm/pat.h>
146
147
148 /*
149 @@ -83,9 +84,12 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
150 static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
151 {
152 }
153 +static inline void mtrr_bp_init(void)
154 +{
155 + pat_disable("MTRRs disabled, skipping PAT initialization too.");
156 +}
157
158 #define mtrr_ap_init() do {} while (0)
159 -#define mtrr_bp_init() do {} while (0)
160 #define set_mtrr_aps_delayed_init() do {} while (0)
161 #define mtrr_aps_init() do {} while (0)
162 #define mtrr_bp_restore() do {} while (0)
163 diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
164 index ca6c228d5e62..0b1ff4c1c14e 100644
165 --- a/arch/x86/include/asm/pat.h
166 +++ b/arch/x86/include/asm/pat.h
167 @@ -5,8 +5,8 @@
168 #include <asm/pgtable_types.h>
169
170 bool pat_enabled(void);
171 +void pat_disable(const char *reason);
172 extern void pat_init(void);
173 -void pat_init_cache_modes(u64);
174
175 extern int reserve_memtype(u64 start, u64 end,
176 enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
177 diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
178 index 3b533cf37c74..b5624fafa44a 100644
179 --- a/arch/x86/kernel/cpu/mtrr/generic.c
180 +++ b/arch/x86/kernel/cpu/mtrr/generic.c
181 @@ -444,11 +444,24 @@ static void __init print_mtrr_state(void)
182 pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
183 }
184
185 +/* PAT setup for BP. We need to go through sync steps here */
186 +void __init mtrr_bp_pat_init(void)
187 +{
188 + unsigned long flags;
189 +
190 + local_irq_save(flags);
191 + prepare_set();
192 +
193 + pat_init();
194 +
195 + post_set();
196 + local_irq_restore(flags);
197 +}
198 +
199 /* Grab all of the MTRR state for this CPU into *state */
200 bool __init get_mtrr_state(void)
201 {
202 struct mtrr_var_range *vrs;
203 - unsigned long flags;
204 unsigned lo, dummy;
205 unsigned int i;
206
207 @@ -481,15 +494,6 @@ bool __init get_mtrr_state(void)
208
209 mtrr_state_set = 1;
210
211 - /* PAT setup for BP. We need to go through sync steps here */
212 - local_irq_save(flags);
213 - prepare_set();
214 -
215 - pat_init();
216 -
217 - post_set();
218 - local_irq_restore(flags);
219 -
220 return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED);
221 }
222
223 diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
224 index f891b4750f04..fa77ac8291f0 100644
225 --- a/arch/x86/kernel/cpu/mtrr/main.c
226 +++ b/arch/x86/kernel/cpu/mtrr/main.c
227 @@ -752,6 +752,9 @@ void __init mtrr_bp_init(void)
228 /* BIOS may override */
229 __mtrr_enabled = get_mtrr_state();
230
231 + if (mtrr_enabled())
232 + mtrr_bp_pat_init();
233 +
234 if (mtrr_cleanup(phys_addr)) {
235 changed_by_mtrr_cleanup = 1;
236 mtrr_if->set_all();
237 @@ -759,8 +762,16 @@ void __init mtrr_bp_init(void)
238 }
239 }
240
241 - if (!mtrr_enabled())
242 + if (!mtrr_enabled()) {
243 pr_info("MTRR: Disabled\n");
244 +
245 + /*
246 + * PAT initialization relies on MTRR's rendezvous handler.
247 + * Skip PAT init until the handler can initialize both
248 + * features independently.
249 + */
250 + pat_disable("MTRRs disabled, skipping PAT initialization too.");
251 + }
252 }
253
254 void mtrr_ap_init(void)
255 diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
256 index 951884dcc433..6c7ced07d16d 100644
257 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h
258 +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
259 @@ -52,6 +52,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
260 void fill_mtrr_var_range(unsigned int index,
261 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
262 bool get_mtrr_state(void);
263 +void mtrr_bp_pat_init(void);
264
265 extern void set_mtrr_ops(const struct mtrr_ops *ops);
266
267 diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
268 index 844b06d67df4..307f60ecfc6d 100644
269 --- a/arch/x86/mm/mmap.c
270 +++ b/arch/x86/mm/mmap.c
271 @@ -94,18 +94,6 @@ static unsigned long mmap_base(unsigned long rnd)
272 }
273
274 /*
275 - * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64
276 - * does, but not when emulating X86_32
277 - */
278 -static unsigned long mmap_legacy_base(unsigned long rnd)
279 -{
280 - if (mmap_is_ia32())
281 - return TASK_UNMAPPED_BASE;
282 - else
283 - return TASK_UNMAPPED_BASE + rnd;
284 -}
285 -
286 -/*
287 * This function, called very early during the creation of a new
288 * process VM image, sets up which VM layout function to use:
289 */
290 @@ -116,7 +104,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
291 if (current->flags & PF_RANDOMIZE)
292 random_factor = arch_mmap_rnd();
293
294 - mm->mmap_legacy_base = mmap_legacy_base(random_factor);
295 + mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor;
296
297 if (mmap_is_legacy()) {
298 mm->mmap_base = mm->mmap_legacy_base;
299 diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
300 index 188e3e07eeeb..6ad687d104ca 100644
301 --- a/arch/x86/mm/pat.c
302 +++ b/arch/x86/mm/pat.c
303 @@ -39,11 +39,22 @@
304 static bool boot_cpu_done;
305
306 static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT);
307 +static void init_cache_modes(void);
308
309 -static inline void pat_disable(const char *reason)
310 +void pat_disable(const char *reason)
311 {
312 + if (!__pat_enabled)
313 + return;
314 +
315 + if (boot_cpu_done) {
316 + WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
317 + return;
318 + }
319 +
320 __pat_enabled = 0;
321 pr_info("x86/PAT: %s\n", reason);
322 +
323 + init_cache_modes();
324 }
325
326 static int __init nopat(char *str)
327 @@ -180,7 +191,7 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg)
328 * configuration.
329 * Using lower indices is preferred, so we start with highest index.
330 */
331 -void pat_init_cache_modes(u64 pat)
332 +static void __init_cache_modes(u64 pat)
333 {
334 enum page_cache_mode cache;
335 char pat_msg[33];
336 @@ -201,14 +212,11 @@ static void pat_bsp_init(u64 pat)
337 {
338 u64 tmp_pat;
339
340 - if (!cpu_has_pat) {
341 + if (!boot_cpu_has(X86_FEATURE_PAT)) {
342 pat_disable("PAT not supported by CPU.");
343 return;
344 }
345
346 - if (!pat_enabled())
347 - goto done;
348 -
349 rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
350 if (!tmp_pat) {
351 pat_disable("PAT MSR is 0, disabled.");
352 @@ -217,16 +225,12 @@ static void pat_bsp_init(u64 pat)
353
354 wrmsrl(MSR_IA32_CR_PAT, pat);
355
356 -done:
357 - pat_init_cache_modes(pat);
358 + __init_cache_modes(pat);
359 }
360
361 static void pat_ap_init(u64 pat)
362 {
363 - if (!pat_enabled())
364 - return;
365 -
366 - if (!cpu_has_pat) {
367 + if (!boot_cpu_has(X86_FEATURE_PAT)) {
368 /*
369 * If this happens we are on a secondary CPU, but switched to
370 * PAT on the boot CPU. We have no way to undo PAT.
371 @@ -237,18 +241,32 @@ static void pat_ap_init(u64 pat)
372 wrmsrl(MSR_IA32_CR_PAT, pat);
373 }
374
375 -void pat_init(void)
376 +static void init_cache_modes(void)
377 {
378 - u64 pat;
379 - struct cpuinfo_x86 *c = &boot_cpu_data;
380 + u64 pat = 0;
381 + static int init_cm_done;
382
383 - if (!pat_enabled()) {
384 + if (init_cm_done)
385 + return;
386 +
387 + if (boot_cpu_has(X86_FEATURE_PAT)) {
388 + /*
389 + * CPU supports PAT. Set PAT table to be consistent with
390 + * PAT MSR. This case supports "nopat" boot option, and
391 + * virtual machine environments which support PAT without
392 + * MTRRs. In specific, Xen has unique setup to PAT MSR.
393 + *
394 + * If PAT MSR returns 0, it is considered invalid and emulates
395 + * as No PAT.
396 + */
397 + rdmsrl(MSR_IA32_CR_PAT, pat);
398 + }
399 +
400 + if (!pat) {
401 /*
402 * No PAT. Emulate the PAT table that corresponds to the two
403 - * cache bits, PWT (Write Through) and PCD (Cache Disable). This
404 - * setup is the same as the BIOS default setup when the system
405 - * has PAT but the "nopat" boot option has been specified. This
406 - * emulated PAT table is used when MSR_IA32_CR_PAT returns 0.
407 + * cache bits, PWT (Write Through) and PCD (Cache Disable).
408 + * This setup is also the same as the BIOS default setup.
409 *
410 * PTE encoding:
411 *
412 @@ -265,10 +283,36 @@ void pat_init(void)
413 */
414 pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
415 PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
416 + }
417 +
418 + __init_cache_modes(pat);
419 +
420 + init_cm_done = 1;
421 +}
422 +
423 +/**
424 + * pat_init - Initialize PAT MSR and PAT table
425 + *
426 + * This function initializes PAT MSR and PAT table with an OS-defined value
427 + * to enable additional cache attributes, WC and WT.
428 + *
429 + * This function must be called on all CPUs using the specific sequence of
430 + * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
431 + * procedure for PAT.
432 + */
433 +void pat_init(void)
434 +{
435 + u64 pat;
436 + struct cpuinfo_x86 *c = &boot_cpu_data;
437 +
438 + if (!pat_enabled()) {
439 + init_cache_modes();
440 + return;
441 + }
442
443 - } else if ((c->x86_vendor == X86_VENDOR_INTEL) &&
444 - (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
445 - ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
446 + if ((c->x86_vendor == X86_VENDOR_INTEL) &&
447 + (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
448 + ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
449 /*
450 * PAT support with the lower four entries. Intel Pentium 2,
451 * 3, M, and 4 are affected by PAT errata, which makes the
452 @@ -733,25 +777,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
453 if (file->f_flags & O_DSYNC)
454 pcm = _PAGE_CACHE_MODE_UC_MINUS;
455
456 -#ifdef CONFIG_X86_32
457 - /*
458 - * On the PPro and successors, the MTRRs are used to set
459 - * memory types for physical addresses outside main memory,
460 - * so blindly setting UC or PWT on those pages is wrong.
461 - * For Pentiums and earlier, the surround logic should disable
462 - * caching for the high addresses through the KEN pin, but
463 - * we maintain the tradition of paranoia in this code.
464 - */
465 - if (!pat_enabled() &&
466 - !(boot_cpu_has(X86_FEATURE_MTRR) ||
467 - boot_cpu_has(X86_FEATURE_K6_MTRR) ||
468 - boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
469 - boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) &&
470 - (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
471 - pcm = _PAGE_CACHE_MODE_UC;
472 - }
473 -#endif
474 -
475 *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
476 cachemode2protval(pcm));
477 return 1;
478 diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
479 index beab8c706ac9..ffa41591bff9 100644
480 --- a/arch/x86/xen/enlighten.c
481 +++ b/arch/x86/xen/enlighten.c
482 @@ -74,7 +74,6 @@
483 #include <asm/mach_traps.h>
484 #include <asm/mwait.h>
485 #include <asm/pci_x86.h>
486 -#include <asm/pat.h>
487 #include <asm/cpu.h>
488
489 #ifdef CONFIG_ACPI
490 @@ -1519,7 +1518,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
491 {
492 struct physdev_set_iopl set_iopl;
493 unsigned long initrd_start = 0;
494 - u64 pat;
495 int rc;
496
497 if (!xen_start_info)
498 @@ -1627,13 +1625,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
499 xen_start_info->nr_pages);
500 xen_reserve_special_pages();
501
502 - /*
503 - * Modify the cache mode translation tables to match Xen's PAT
504 - * configuration.
505 - */
506 - rdmsrl(MSR_IA32_CR_PAT, pat);
507 - pat_init_cache_modes(pat);
508 -
509 /* keep using Xen gdt for now; no urgent need to change it */
510
511 #ifdef CONFIG_X86_32
512 diff --git a/block/genhd.c b/block/genhd.c
513 index e5cafa51567c..d2a1d43bf9fa 100644
514 --- a/block/genhd.c
515 +++ b/block/genhd.c
516 @@ -831,6 +831,7 @@ static void disk_seqf_stop(struct seq_file *seqf, void *v)
517 if (iter) {
518 class_dev_iter_exit(iter);
519 kfree(iter);
520 + seqf->private = NULL;
521 }
522 }
523
524 diff --git a/crypto/gcm.c b/crypto/gcm.c
525 index bec329b3de8d..d9ea5f9c0574 100644
526 --- a/crypto/gcm.c
527 +++ b/crypto/gcm.c
528 @@ -639,7 +639,9 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
529
530 ghash_alg = crypto_find_alg(ghash_name, &crypto_ahash_type,
531 CRYPTO_ALG_TYPE_HASH,
532 - CRYPTO_ALG_TYPE_AHASH_MASK);
533 + CRYPTO_ALG_TYPE_AHASH_MASK |
534 + crypto_requires_sync(algt->type,
535 + algt->mask));
536 if (IS_ERR(ghash_alg))
537 return PTR_ERR(ghash_alg);
538
539 diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
540 index ea5815c5e128..bc769c448d4a 100644
541 --- a/crypto/scatterwalk.c
542 +++ b/crypto/scatterwalk.c
543 @@ -72,7 +72,8 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out,
544
545 void scatterwalk_done(struct scatter_walk *walk, int out, int more)
546 {
547 - if (!(scatterwalk_pagelen(walk) & (PAGE_SIZE - 1)) || !more)
548 + if (!more || walk->offset >= walk->sg->offset + walk->sg->length ||
549 + !(walk->offset & (PAGE_SIZE - 1)))
550 scatterwalk_pagedone(walk, out, more);
551 }
552 EXPORT_SYMBOL_GPL(scatterwalk_done);
553 diff --git a/drivers/char/random.c b/drivers/char/random.c
554 index d0da5d852d41..0227b0465b40 100644
555 --- a/drivers/char/random.c
556 +++ b/drivers/char/random.c
557 @@ -722,15 +722,18 @@ retry:
558 }
559 }
560
561 -static void credit_entropy_bits_safe(struct entropy_store *r, int nbits)
562 +static int credit_entropy_bits_safe(struct entropy_store *r, int nbits)
563 {
564 const int nbits_max = (int)(~0U >> (ENTROPY_SHIFT + 1));
565
566 + if (nbits < 0)
567 + return -EINVAL;
568 +
569 /* Cap the value to avoid overflows */
570 nbits = min(nbits, nbits_max);
571 - nbits = max(nbits, -nbits_max);
572
573 credit_entropy_bits(r, nbits);
574 + return 0;
575 }
576
577 /*********************************************************************
578 @@ -1542,8 +1545,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
579 return -EPERM;
580 if (get_user(ent_count, p))
581 return -EFAULT;
582 - credit_entropy_bits_safe(&input_pool, ent_count);
583 - return 0;
584 + return credit_entropy_bits_safe(&input_pool, ent_count);
585 case RNDADDENTROPY:
586 if (!capable(CAP_SYS_ADMIN))
587 return -EPERM;
588 @@ -1557,8 +1559,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
589 size);
590 if (retval < 0)
591 return retval;
592 - credit_entropy_bits_safe(&input_pool, ent_count);
593 - return 0;
594 + return credit_entropy_bits_safe(&input_pool, ent_count);
595 case RNDZAPENTCNT:
596 case RNDCLEARPOOL:
597 /*
598 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
599 index 62284e45d531..eb434881ddbc 100644
600 --- a/drivers/gpu/drm/i915/intel_pm.c
601 +++ b/drivers/gpu/drm/i915/intel_pm.c
602 @@ -1789,16 +1789,20 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
603 const struct intel_plane_state *pstate,
604 uint32_t mem_value)
605 {
606 - int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
607 + /*
608 + * We treat the cursor plane as always-on for the purposes of watermark
609 + * calculation. Until we have two-stage watermark programming merged,
610 + * this is necessary to avoid flickering.
611 + */
612 + int cpp = 4;
613 + int width = pstate->visible ? pstate->base.crtc_w : 64;
614
615 - if (!cstate->base.active || !pstate->visible)
616 + if (!cstate->base.active)
617 return 0;
618
619 return ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
620 cstate->base.adjusted_mode.crtc_htotal,
621 - drm_rect_width(&pstate->dst),
622 - bpp,
623 - mem_value);
624 + width, cpp, mem_value);
625 }
626
627 /* Only for WM_LP. */
628 diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
629 index 774cd2210566..21febbb0d84e 100644
630 --- a/drivers/hid/hid-sony.c
631 +++ b/drivers/hid/hid-sony.c
632 @@ -1418,8 +1418,10 @@ static int sixaxis_set_operational_usb(struct hid_device *hdev)
633 }
634
635 ret = hid_hw_output_report(hdev, buf, 1);
636 - if (ret < 0)
637 - hid_err(hdev, "can't set operational mode: step 3\n");
638 + if (ret < 0) {
639 + hid_info(hdev, "can't set operational mode: step 3, ignoring\n");
640 + ret = 0;
641 + }
642
643 out:
644 kfree(buf);
645 diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
646 index 27fa0cb09538..85f39cc3e276 100644
647 --- a/drivers/i2c/busses/i2c-i801.c
648 +++ b/drivers/i2c/busses/i2c-i801.c
649 @@ -244,6 +244,13 @@ struct i801_priv {
650 struct platform_device *mux_pdev;
651 #endif
652 struct platform_device *tco_pdev;
653 +
654 + /*
655 + * If set to true the host controller registers are reserved for
656 + * ACPI AML use. Protected by acpi_lock.
657 + */
658 + bool acpi_reserved;
659 + struct mutex acpi_lock;
660 };
661
662 #define FEATURE_SMBUS_PEC (1 << 0)
663 @@ -714,9 +721,15 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
664 {
665 int hwpec;
666 int block = 0;
667 - int ret, xact = 0;
668 + int ret = 0, xact = 0;
669 struct i801_priv *priv = i2c_get_adapdata(adap);
670
671 + mutex_lock(&priv->acpi_lock);
672 + if (priv->acpi_reserved) {
673 + mutex_unlock(&priv->acpi_lock);
674 + return -EBUSY;
675 + }
676 +
677 hwpec = (priv->features & FEATURE_SMBUS_PEC) && (flags & I2C_CLIENT_PEC)
678 && size != I2C_SMBUS_QUICK
679 && size != I2C_SMBUS_I2C_BLOCK_DATA;
680 @@ -773,7 +786,8 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
681 default:
682 dev_err(&priv->pci_dev->dev, "Unsupported transaction %d\n",
683 size);
684 - return -EOPNOTSUPP;
685 + ret = -EOPNOTSUPP;
686 + goto out;
687 }
688
689 if (hwpec) /* enable/disable hardware PEC */
690 @@ -796,11 +810,11 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
691 ~(SMBAUXCTL_CRC | SMBAUXCTL_E32B), SMBAUXCTL(priv));
692
693 if (block)
694 - return ret;
695 + goto out;
696 if (ret)
697 - return ret;
698 + goto out;
699 if ((read_write == I2C_SMBUS_WRITE) || (xact == I801_QUICK))
700 - return 0;
701 + goto out;
702
703 switch (xact & 0x7f) {
704 case I801_BYTE: /* Result put in SMBHSTDAT0 */
705 @@ -812,7 +826,10 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
706 (inb_p(SMBHSTDAT1(priv)) << 8);
707 break;
708 }
709 - return 0;
710 +
711 +out:
712 + mutex_unlock(&priv->acpi_lock);
713 + return ret;
714 }
715
716
717 @@ -1249,6 +1266,72 @@ static void i801_add_tco(struct i801_priv *priv)
718 priv->tco_pdev = pdev;
719 }
720
721 +#ifdef CONFIG_ACPI
722 +static acpi_status
723 +i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits,
724 + u64 *value, void *handler_context, void *region_context)
725 +{
726 + struct i801_priv *priv = handler_context;
727 + struct pci_dev *pdev = priv->pci_dev;
728 + acpi_status status;
729 +
730 + /*
731 + * Once BIOS AML code touches the OpRegion we warn and inhibit any
732 + * further access from the driver itself. This device is now owned
733 + * by the system firmware.
734 + */
735 + mutex_lock(&priv->acpi_lock);
736 +
737 + if (!priv->acpi_reserved) {
738 + priv->acpi_reserved = true;
739 +
740 + dev_warn(&pdev->dev, "BIOS is accessing SMBus registers\n");
741 + dev_warn(&pdev->dev, "Driver SMBus register access inhibited\n");
742 + }
743 +
744 + if ((function & ACPI_IO_MASK) == ACPI_READ)
745 + status = acpi_os_read_port(address, (u32 *)value, bits);
746 + else
747 + status = acpi_os_write_port(address, (u32)*value, bits);
748 +
749 + mutex_unlock(&priv->acpi_lock);
750 +
751 + return status;
752 +}
753 +
754 +static int i801_acpi_probe(struct i801_priv *priv)
755 +{
756 + struct acpi_device *adev;
757 + acpi_status status;
758 +
759 + adev = ACPI_COMPANION(&priv->pci_dev->dev);
760 + if (adev) {
761 + status = acpi_install_address_space_handler(adev->handle,
762 + ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler,
763 + NULL, priv);
764 + if (ACPI_SUCCESS(status))
765 + return 0;
766 + }
767 +
768 + return acpi_check_resource_conflict(&priv->pci_dev->resource[SMBBAR]);
769 +}
770 +
771 +static void i801_acpi_remove(struct i801_priv *priv)
772 +{
773 + struct acpi_device *adev;
774 +
775 + adev = ACPI_COMPANION(&priv->pci_dev->dev);
776 + if (!adev)
777 + return;
778 +
779 + acpi_remove_address_space_handler(adev->handle,
780 + ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler);
781 +}
782 +#else
783 +static inline int i801_acpi_probe(struct i801_priv *priv) { return 0; }
784 +static inline void i801_acpi_remove(struct i801_priv *priv) { }
785 +#endif
786 +
787 static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
788 {
789 unsigned char temp;
790 @@ -1266,6 +1349,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
791 priv->adapter.dev.parent = &dev->dev;
792 ACPI_COMPANION_SET(&priv->adapter.dev, ACPI_COMPANION(&dev->dev));
793 priv->adapter.retries = 3;
794 + mutex_init(&priv->acpi_lock);
795
796 priv->pci_dev = dev;
797 switch (dev->device) {
798 @@ -1328,10 +1412,8 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
799 return -ENODEV;
800 }
801
802 - err = acpi_check_resource_conflict(&dev->resource[SMBBAR]);
803 - if (err) {
804 + if (i801_acpi_probe(priv))
805 return -ENODEV;
806 - }
807
808 err = pcim_iomap_regions(dev, 1 << SMBBAR,
809 dev_driver_string(&dev->dev));
810 @@ -1340,6 +1422,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
811 "Failed to request SMBus region 0x%lx-0x%Lx\n",
812 priv->smba,
813 (unsigned long long)pci_resource_end(dev, SMBBAR));
814 + i801_acpi_remove(priv);
815 return err;
816 }
817
818 @@ -1404,6 +1487,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
819 err = i2c_add_adapter(&priv->adapter);
820 if (err) {
821 dev_err(&dev->dev, "Failed to add SMBus adapter\n");
822 + i801_acpi_remove(priv);
823 return err;
824 }
825
826 @@ -1422,6 +1506,7 @@ static void i801_remove(struct pci_dev *dev)
827
828 i801_del_mux(priv);
829 i2c_del_adapter(&priv->adapter);
830 + i801_acpi_remove(priv);
831 pci_write_config_byte(dev, SMBHSTCFG, priv->original_hstcfg);
832
833 platform_device_unregister(priv->tco_pdev);
834 diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
835 index db760e84119f..b8df0f5e8c25 100644
836 --- a/drivers/net/bonding/bond_netlink.c
837 +++ b/drivers/net/bonding/bond_netlink.c
838 @@ -446,7 +446,11 @@ static int bond_newlink(struct net *src_net, struct net_device *bond_dev,
839 if (err < 0)
840 return err;
841
842 - return register_netdevice(bond_dev);
843 + err = register_netdevice(bond_dev);
844 +
845 + netif_carrier_off(bond_dev);
846 +
847 + return err;
848 }
849
850 static size_t bond_get_size(const struct net_device *bond_dev)
851 diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
852 index 28f7610b03fe..c32f5d32f811 100644
853 --- a/drivers/net/ethernet/broadcom/bgmac.c
854 +++ b/drivers/net/ethernet/broadcom/bgmac.c
855 @@ -219,7 +219,7 @@ err_dma:
856 dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
857 DMA_TO_DEVICE);
858
859 - while (i > 0) {
860 + while (i-- > 0) {
861 int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
862 struct bgmac_slot_info *slot = &ring->slots[index];
863 u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
864 diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
865 index 3dd548ab8df1..40365cb1abe6 100644
866 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
867 +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
868 @@ -794,13 +794,12 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
869 * in a bitmap and increasing the chain consumer only
870 * for the first successive completed entries.
871 */
872 - bitmap_set(p_spq->p_comp_bitmap, pos, SPQ_RING_SIZE);
873 + __set_bit(pos, p_spq->p_comp_bitmap);
874
875 while (test_bit(p_spq->comp_bitmap_idx,
876 p_spq->p_comp_bitmap)) {
877 - bitmap_clear(p_spq->p_comp_bitmap,
878 - p_spq->comp_bitmap_idx,
879 - SPQ_RING_SIZE);
880 + __clear_bit(p_spq->comp_bitmap_idx,
881 + p_spq->p_comp_bitmap);
882 p_spq->comp_bitmap_idx++;
883 qed_chain_return_produced(&p_spq->chain);
884 }
885 diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
886 index a790d5f90b83..e0e94b855bbe 100644
887 --- a/drivers/net/usb/cdc_ncm.c
888 +++ b/drivers/net/usb/cdc_ncm.c
889 @@ -952,8 +952,6 @@ EXPORT_SYMBOL_GPL(cdc_ncm_select_altsetting);
890
891 static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf)
892 {
893 - int ret;
894 -
895 /* MBIM backwards compatible function? */
896 if (cdc_ncm_select_altsetting(intf) != CDC_NCM_COMM_ALTSETTING_NCM)
897 return -ENODEV;
898 @@ -962,16 +960,7 @@ static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf)
899 * Additionally, generic NCM devices are assumed to accept arbitrarily
900 * placed NDP.
901 */
902 - ret = cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM, 0);
903 -
904 - /*
905 - * We should get an event when network connection is "connected" or
906 - * "disconnected". Set network connection in "disconnected" state
907 - * (carrier is OFF) during attach, so the IP network stack does not
908 - * start IPv6 negotiation and more.
909 - */
910 - usbnet_link_change(dev, 0, 0);
911 - return ret;
912 + return cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM, 0);
913 }
914
915 static void cdc_ncm_align_tail(struct sk_buff *skb, size_t modulus, size_t remainder, size_t max)
916 @@ -1554,7 +1543,8 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
917
918 static const struct driver_info cdc_ncm_info = {
919 .description = "CDC NCM",
920 - .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET,
921 + .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
922 + | FLAG_LINK_INTR,
923 .bind = cdc_ncm_bind,
924 .unbind = cdc_ncm_unbind,
925 .manage_power = usbnet_manage_power,
926 @@ -1567,7 +1557,7 @@ static const struct driver_info cdc_ncm_info = {
927 static const struct driver_info wwan_info = {
928 .description = "Mobile Broadband Network Device",
929 .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
930 - | FLAG_WWAN,
931 + | FLAG_LINK_INTR | FLAG_WWAN,
932 .bind = cdc_ncm_bind,
933 .unbind = cdc_ncm_unbind,
934 .manage_power = usbnet_manage_power,
935 @@ -1580,7 +1570,7 @@ static const struct driver_info wwan_info = {
936 static const struct driver_info wwan_noarp_info = {
937 .description = "Mobile Broadband Network Device (NO ARP)",
938 .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
939 - | FLAG_WWAN | FLAG_NOARP,
940 + | FLAG_LINK_INTR | FLAG_WWAN | FLAG_NOARP,
941 .bind = cdc_ncm_bind,
942 .unbind = cdc_ncm_unbind,
943 .manage_power = usbnet_manage_power,
944 diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c
945 index 943c1cb9566c..d28e3ab9479c 100644
946 --- a/drivers/pnp/quirks.c
947 +++ b/drivers/pnp/quirks.c
948 @@ -342,7 +342,9 @@ static void quirk_amd_mmconfig_area(struct pnp_dev *dev)
949 /* Device IDs of parts that have 32KB MCH space */
950 static const unsigned int mch_quirk_devices[] = {
951 0x0154, /* Ivy Bridge */
952 + 0x0a04, /* Haswell-ULT */
953 0x0c00, /* Haswell */
954 + 0x1604, /* Broadwell */
955 };
956
957 static struct pci_dev *get_intel_host(void)
958 diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
959 index f7ae898833dd..7232d43e2207 100644
960 --- a/drivers/scsi/scsi_sysfs.c
961 +++ b/drivers/scsi/scsi_sysfs.c
962 @@ -1058,11 +1058,12 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
963 }
964
965 error = scsi_dh_add_device(sdev);
966 - if (error) {
967 + if (error)
968 + /*
969 + * device_handler is optional, so any error can be ignored
970 + */
971 sdev_printk(KERN_INFO, sdev,
972 "failed to add device handler: %d\n", error);
973 - return error;
974 - }
975
976 device_enable_async_suspend(&sdev->sdev_dev);
977 error = device_add(&sdev->sdev_dev);
978 diff --git a/drivers/staging/rdma/ipath/ipath_file_ops.c b/drivers/staging/rdma/ipath/ipath_file_ops.c
979 index 13c3cd11ab92..05d30f433b19 100644
980 --- a/drivers/staging/rdma/ipath/ipath_file_ops.c
981 +++ b/drivers/staging/rdma/ipath/ipath_file_ops.c
982 @@ -45,6 +45,8 @@
983 #include <linux/uio.h>
984 #include <asm/pgtable.h>
985
986 +#include <rdma/ib.h>
987 +
988 #include "ipath_kernel.h"
989 #include "ipath_common.h"
990 #include "ipath_user_sdma.h"
991 @@ -2243,6 +2245,9 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
992 ssize_t ret = 0;
993 void *dest;
994
995 + if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
996 + return -EACCES;
997 +
998 if (count < sizeof(cmd.type)) {
999 ret = -EINVAL;
1000 goto bail;
1001 diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
1002 index 7865228f664f..807d80145686 100644
1003 --- a/drivers/tty/pty.c
1004 +++ b/drivers/tty/pty.c
1005 @@ -679,14 +679,14 @@ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty)
1006 /* this is called once with whichever end is closed last */
1007 static void pty_unix98_shutdown(struct tty_struct *tty)
1008 {
1009 - struct inode *ptmx_inode;
1010 + struct pts_fs_info *fsi;
1011
1012 if (tty->driver->subtype == PTY_TYPE_MASTER)
1013 - ptmx_inode = tty->driver_data;
1014 + fsi = tty->driver_data;
1015 else
1016 - ptmx_inode = tty->link->driver_data;
1017 - devpts_kill_index(ptmx_inode, tty->index);
1018 - devpts_del_ref(ptmx_inode);
1019 + fsi = tty->link->driver_data;
1020 + devpts_kill_index(fsi, tty->index);
1021 + devpts_put_ref(fsi);
1022 }
1023
1024 static const struct tty_operations ptm_unix98_ops = {
1025 @@ -738,6 +738,7 @@ static const struct tty_operations pty_unix98_ops = {
1026
1027 static int ptmx_open(struct inode *inode, struct file *filp)
1028 {
1029 + struct pts_fs_info *fsi;
1030 struct tty_struct *tty;
1031 struct inode *slave_inode;
1032 int retval;
1033 @@ -752,47 +753,41 @@ static int ptmx_open(struct inode *inode, struct file *filp)
1034 if (retval)
1035 return retval;
1036
1037 + fsi = devpts_get_ref(inode, filp);
1038 + retval = -ENODEV;
1039 + if (!fsi)
1040 + goto out_free_file;
1041 +
1042 /* find a device that is not in use. */
1043 mutex_lock(&devpts_mutex);
1044 - index = devpts_new_index(inode);
1045 - if (index < 0) {
1046 - retval = index;
1047 - mutex_unlock(&devpts_mutex);
1048 - goto err_file;
1049 - }
1050 -
1051 + index = devpts_new_index(fsi);
1052 mutex_unlock(&devpts_mutex);
1053
1054 - mutex_lock(&tty_mutex);
1055 - tty = tty_init_dev(ptm_driver, index);
1056 + retval = index;
1057 + if (index < 0)
1058 + goto out_put_ref;
1059
1060 - if (IS_ERR(tty)) {
1061 - retval = PTR_ERR(tty);
1062 - goto out;
1063 - }
1064
1065 + mutex_lock(&tty_mutex);
1066 + tty = tty_init_dev(ptm_driver, index);
1067 /* The tty returned here is locked so we can safely
1068 drop the mutex */
1069 mutex_unlock(&tty_mutex);
1070
1071 - set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
1072 - tty->driver_data = inode;
1073 + retval = PTR_ERR(tty);
1074 + if (IS_ERR(tty))
1075 + goto out;
1076
1077 /*
1078 - * In the case where all references to ptmx inode are dropped and we
1079 - * still have /dev/tty opened pointing to the master/slave pair (ptmx
1080 - * is closed/released before /dev/tty), we must make sure that the inode
1081 - * is still valid when we call the final pty_unix98_shutdown, thus we
1082 - * hold an additional reference to the ptmx inode. For the same /dev/tty
1083 - * last close case, we also need to make sure the super_block isn't
1084 - * destroyed (devpts instance unmounted), before /dev/tty is closed and
1085 - * on its release devpts_kill_index is called.
1086 + * From here on out, the tty is "live", and the index and
1087 + * fsi will be killed/put by the tty_release()
1088 */
1089 - devpts_add_ref(inode);
1090 + set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
1091 + tty->driver_data = fsi;
1092
1093 tty_add_file(tty, filp);
1094
1095 - slave_inode = devpts_pty_new(inode,
1096 + slave_inode = devpts_pty_new(fsi,
1097 MKDEV(UNIX98_PTY_SLAVE_MAJOR, index), index,
1098 tty->link);
1099 if (IS_ERR(slave_inode)) {
1100 @@ -811,12 +806,14 @@ static int ptmx_open(struct inode *inode, struct file *filp)
1101 return 0;
1102 err_release:
1103 tty_unlock(tty);
1104 + // This will also put-ref the fsi
1105 tty_release(inode, filp);
1106 return retval;
1107 out:
1108 - mutex_unlock(&tty_mutex);
1109 - devpts_kill_index(inode, index);
1110 -err_file:
1111 + devpts_kill_index(fsi, index);
1112 +out_put_ref:
1113 + devpts_put_ref(fsi);
1114 +out_free_file:
1115 tty_free_file(filp);
1116 return retval;
1117 }
1118 diff --git a/fs/dcache.c b/fs/dcache.c
1119 index 108d7d810be3..71b6056ad35d 100644
1120 --- a/fs/dcache.c
1121 +++ b/fs/dcache.c
1122 @@ -578,7 +578,6 @@ static struct dentry *dentry_kill(struct dentry *dentry)
1123
1124 failed:
1125 spin_unlock(&dentry->d_lock);
1126 - cpu_relax();
1127 return dentry; /* try again with same dentry */
1128 }
1129
1130 @@ -752,6 +751,8 @@ void dput(struct dentry *dentry)
1131 return;
1132
1133 repeat:
1134 + might_sleep();
1135 +
1136 rcu_read_lock();
1137 if (likely(fast_dput(dentry))) {
1138 rcu_read_unlock();
1139 @@ -783,8 +784,10 @@ repeat:
1140
1141 kill_it:
1142 dentry = dentry_kill(dentry);
1143 - if (dentry)
1144 + if (dentry) {
1145 + cond_resched();
1146 goto repeat;
1147 + }
1148 }
1149 EXPORT_SYMBOL(dput);
1150
1151 diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
1152 index 706de324f2a6..c82edb049117 100644
1153 --- a/fs/devpts/inode.c
1154 +++ b/fs/devpts/inode.c
1155 @@ -128,6 +128,7 @@ static const match_table_t tokens = {
1156 struct pts_fs_info {
1157 struct ida allocated_ptys;
1158 struct pts_mount_opts mount_opts;
1159 + struct super_block *sb;
1160 struct dentry *ptmx_dentry;
1161 };
1162
1163 @@ -358,7 +359,7 @@ static const struct super_operations devpts_sops = {
1164 .show_options = devpts_show_options,
1165 };
1166
1167 -static void *new_pts_fs_info(void)
1168 +static void *new_pts_fs_info(struct super_block *sb)
1169 {
1170 struct pts_fs_info *fsi;
1171
1172 @@ -369,6 +370,7 @@ static void *new_pts_fs_info(void)
1173 ida_init(&fsi->allocated_ptys);
1174 fsi->mount_opts.mode = DEVPTS_DEFAULT_MODE;
1175 fsi->mount_opts.ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
1176 + fsi->sb = sb;
1177
1178 return fsi;
1179 }
1180 @@ -384,7 +386,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
1181 s->s_op = &devpts_sops;
1182 s->s_time_gran = 1;
1183
1184 - s->s_fs_info = new_pts_fs_info();
1185 + s->s_fs_info = new_pts_fs_info(s);
1186 if (!s->s_fs_info)
1187 goto fail;
1188
1189 @@ -524,17 +526,14 @@ static struct file_system_type devpts_fs_type = {
1190 * to the System V naming convention
1191 */
1192
1193 -int devpts_new_index(struct inode *ptmx_inode)
1194 +int devpts_new_index(struct pts_fs_info *fsi)
1195 {
1196 - struct super_block *sb = pts_sb_from_inode(ptmx_inode);
1197 - struct pts_fs_info *fsi;
1198 int index;
1199 int ida_ret;
1200
1201 - if (!sb)
1202 + if (!fsi)
1203 return -ENODEV;
1204
1205 - fsi = DEVPTS_SB(sb);
1206 retry:
1207 if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL))
1208 return -ENOMEM;
1209 @@ -564,11 +563,8 @@ retry:
1210 return index;
1211 }
1212
1213 -void devpts_kill_index(struct inode *ptmx_inode, int idx)
1214 +void devpts_kill_index(struct pts_fs_info *fsi, int idx)
1215 {
1216 - struct super_block *sb = pts_sb_from_inode(ptmx_inode);
1217 - struct pts_fs_info *fsi = DEVPTS_SB(sb);
1218 -
1219 mutex_lock(&allocated_ptys_lock);
1220 ida_remove(&fsi->allocated_ptys, idx);
1221 pty_count--;
1222 @@ -578,21 +574,25 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx)
1223 /*
1224 * pty code needs to hold extra references in case of last /dev/tty close
1225 */
1226 -
1227 -void devpts_add_ref(struct inode *ptmx_inode)
1228 +struct pts_fs_info *devpts_get_ref(struct inode *ptmx_inode, struct file *file)
1229 {
1230 - struct super_block *sb = pts_sb_from_inode(ptmx_inode);
1231 + struct super_block *sb;
1232 + struct pts_fs_info *fsi;
1233 +
1234 + sb = pts_sb_from_inode(ptmx_inode);
1235 + if (!sb)
1236 + return NULL;
1237 + fsi = DEVPTS_SB(sb);
1238 + if (!fsi)
1239 + return NULL;
1240
1241 atomic_inc(&sb->s_active);
1242 - ihold(ptmx_inode);
1243 + return fsi;
1244 }
1245
1246 -void devpts_del_ref(struct inode *ptmx_inode)
1247 +void devpts_put_ref(struct pts_fs_info *fsi)
1248 {
1249 - struct super_block *sb = pts_sb_from_inode(ptmx_inode);
1250 -
1251 - iput(ptmx_inode);
1252 - deactivate_super(sb);
1253 + deactivate_super(fsi->sb);
1254 }
1255
1256 /**
1257 @@ -604,22 +604,21 @@ void devpts_del_ref(struct inode *ptmx_inode)
1258 *
1259 * The created inode is returned. Remove it from /dev/pts/ by devpts_pty_kill.
1260 */
1261 -struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
1262 +struct inode *devpts_pty_new(struct pts_fs_info *fsi, dev_t device, int index,
1263 void *priv)
1264 {
1265 struct dentry *dentry;
1266 - struct super_block *sb = pts_sb_from_inode(ptmx_inode);
1267 + struct super_block *sb;
1268 struct inode *inode;
1269 struct dentry *root;
1270 - struct pts_fs_info *fsi;
1271 struct pts_mount_opts *opts;
1272 char s[12];
1273
1274 - if (!sb)
1275 + if (!fsi)
1276 return ERR_PTR(-ENODEV);
1277
1278 + sb = fsi->sb;
1279 root = sb->s_root;
1280 - fsi = DEVPTS_SB(sb);
1281 opts = &fsi->mount_opts;
1282
1283 inode = new_inode(sb);
1284 diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
1285 index fe1f50fe764f..f97110461c19 100644
1286 --- a/fs/ext4/balloc.c
1287 +++ b/fs/ext4/balloc.c
1288 @@ -208,6 +208,9 @@ static int ext4_init_block_bitmap(struct super_block *sb,
1289 memset(bh->b_data, 0, sb->s_blocksize);
1290
1291 bit_max = ext4_num_base_meta_clusters(sb, block_group);
1292 + if ((bit_max >> 3) >= bh->b_size)
1293 + return -EFSCORRUPTED;
1294 +
1295 for (bit = 0; bit < bit_max; bit++)
1296 ext4_set_bit(bit, bh->b_data);
1297
1298 diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
1299 index 62880586ed85..8eac7d586997 100644
1300 --- a/fs/ext4/extents.c
1301 +++ b/fs/ext4/extents.c
1302 @@ -376,9 +376,13 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
1303 ext4_fsblk_t block = ext4_ext_pblock(ext);
1304 int len = ext4_ext_get_actual_len(ext);
1305 ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
1306 - ext4_lblk_t last = lblock + len - 1;
1307
1308 - if (len == 0 || lblock > last)
1309 + /*
1310 + * We allow neither:
1311 + * - zero length
1312 + * - overflow/wrap-around
1313 + */
1314 + if (lblock + len <= lblock)
1315 return 0;
1316 return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
1317 }
1318 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
1319 index e31d762eedce..9a5ad0f0d3ed 100644
1320 --- a/fs/ext4/inode.c
1321 +++ b/fs/ext4/inode.c
1322 @@ -205,9 +205,9 @@ void ext4_evict_inode(struct inode *inode)
1323 * Note that directories do not have this problem because they
1324 * don't use page cache.
1325 */
1326 - if (ext4_should_journal_data(inode) &&
1327 - (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
1328 - inode->i_ino != EXT4_JOURNAL_INO) {
1329 + if (inode->i_ino != EXT4_JOURNAL_INO &&
1330 + ext4_should_journal_data(inode) &&
1331 + (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
1332 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
1333 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
1334
1335 @@ -2589,13 +2589,36 @@ retry:
1336 done = true;
1337 }
1338 }
1339 - ext4_journal_stop(handle);
1340 + /*
1341 + * Caution: If the handle is synchronous,
1342 + * ext4_journal_stop() can wait for transaction commit
1343 + * to finish which may depend on writeback of pages to
1344 + * complete or on page lock to be released. In that
1345 + * case, we have to wait until after after we have
1346 + * submitted all the IO, released page locks we hold,
1347 + * and dropped io_end reference (for extent conversion
1348 + * to be able to complete) before stopping the handle.
1349 + */
1350 + if (!ext4_handle_valid(handle) || handle->h_sync == 0) {
1351 + ext4_journal_stop(handle);
1352 + handle = NULL;
1353 + }
1354 /* Submit prepared bio */
1355 ext4_io_submit(&mpd.io_submit);
1356 /* Unlock pages we didn't use */
1357 mpage_release_unused_pages(&mpd, give_up_on_write);
1358 - /* Drop our io_end reference we got from init */
1359 - ext4_put_io_end(mpd.io_submit.io_end);
1360 + /*
1361 + * Drop our io_end reference we got from init. We have
1362 + * to be careful and use deferred io_end finishing if
1363 + * we are still holding the transaction as we can
1364 + * release the last reference to io_end which may end
1365 + * up doing unwritten extent conversion.
1366 + */
1367 + if (handle) {
1368 + ext4_put_io_end_defer(mpd.io_submit.io_end);
1369 + ext4_journal_stop(handle);
1370 + } else
1371 + ext4_put_io_end(mpd.io_submit.io_end);
1372
1373 if (ret == -ENOSPC && sbi->s_journal) {
1374 /*
1375 diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
1376 index cf734170daa9..c4dcac8a018d 100644
1377 --- a/fs/ext4/mballoc.c
1378 +++ b/fs/ext4/mballoc.c
1379 @@ -2932,7 +2932,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
1380 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
1381 "fs metadata", block, block+len);
1382 /* File system mounted not to panic on error
1383 - * Fix the bitmap and repeat the block allocation
1384 + * Fix the bitmap and return EFSCORRUPTED
1385 * We leak some of the blocks here.
1386 */
1387 ext4_lock_group(sb, ac->ac_b_ex.fe_group);
1388 @@ -2941,7 +2941,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
1389 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
1390 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
1391 if (!err)
1392 - err = -EAGAIN;
1393 + err = -EFSCORRUPTED;
1394 goto out_err;
1395 }
1396
1397 @@ -4506,18 +4506,7 @@ repeat:
1398 }
1399 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
1400 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
1401 - if (*errp == -EAGAIN) {
1402 - /*
1403 - * drop the reference that we took
1404 - * in ext4_mb_use_best_found
1405 - */
1406 - ext4_mb_release_context(ac);
1407 - ac->ac_b_ex.fe_group = 0;
1408 - ac->ac_b_ex.fe_start = 0;
1409 - ac->ac_b_ex.fe_len = 0;
1410 - ac->ac_status = AC_STATUS_CONTINUE;
1411 - goto repeat;
1412 - } else if (*errp) {
1413 + if (*errp) {
1414 ext4_discard_allocated_blocks(ac);
1415 goto errout;
1416 } else {
1417 diff --git a/fs/ext4/super.c b/fs/ext4/super.c
1418 index 852c26806af2..c542ebcf7a92 100644
1419 --- a/fs/ext4/super.c
1420 +++ b/fs/ext4/super.c
1421 @@ -2240,6 +2240,16 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1422 while (es->s_last_orphan) {
1423 struct inode *inode;
1424
1425 + /*
1426 + * We may have encountered an error during cleanup; if
1427 + * so, skip the rest.
1428 + */
1429 + if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
1430 + jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
1431 + es->s_last_orphan = 0;
1432 + break;
1433 + }
1434 +
1435 inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1436 if (IS_ERR(inode)) {
1437 es->s_last_orphan = 0;
1438 @@ -3372,6 +3382,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1439 goto failed_mount;
1440 }
1441
1442 + if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
1443 + ext4_msg(sb, KERN_ERR,
1444 + "Number of reserved GDT blocks insanely large: %d",
1445 + le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
1446 + goto failed_mount;
1447 + }
1448 +
1449 if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
1450 if (blocksize != PAGE_SIZE) {
1451 ext4_msg(sb, KERN_ERR,
1452 diff --git a/fs/fuse/file.c b/fs/fuse/file.c
1453 index c2e340d6ec6e..d58d4c0af0ce 100644
1454 --- a/fs/fuse/file.c
1455 +++ b/fs/fuse/file.c
1456 @@ -417,6 +417,15 @@ static int fuse_flush(struct file *file, fl_owner_t id)
1457 fuse_sync_writes(inode);
1458 mutex_unlock(&inode->i_mutex);
1459
1460 + if (test_bit(AS_ENOSPC, &file->f_mapping->flags) &&
1461 + test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags))
1462 + err = -ENOSPC;
1463 + if (test_bit(AS_EIO, &file->f_mapping->flags) &&
1464 + test_and_clear_bit(AS_EIO, &file->f_mapping->flags))
1465 + err = -EIO;
1466 + if (err)
1467 + return err;
1468 +
1469 req = fuse_get_req_nofail_nopages(fc, file);
1470 memset(&inarg, 0, sizeof(inarg));
1471 inarg.fh = ff->fh;
1472 @@ -462,6 +471,21 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
1473 goto out;
1474
1475 fuse_sync_writes(inode);
1476 +
1477 + /*
1478 + * Due to implementation of fuse writeback
1479 + * filemap_write_and_wait_range() does not catch errors.
1480 + * We have to do this directly after fuse_sync_writes()
1481 + */
1482 + if (test_bit(AS_ENOSPC, &file->f_mapping->flags) &&
1483 + test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags))
1484 + err = -ENOSPC;
1485 + if (test_bit(AS_EIO, &file->f_mapping->flags) &&
1486 + test_and_clear_bit(AS_EIO, &file->f_mapping->flags))
1487 + err = -EIO;
1488 + if (err)
1489 + goto out;
1490 +
1491 err = sync_inode_metadata(inode, 1);
1492 if (err)
1493 goto out;
1494 diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
1495 index 2913db2a5b99..0d5e8e59b390 100644
1496 --- a/fs/fuse/inode.c
1497 +++ b/fs/fuse/inode.c
1498 @@ -926,7 +926,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
1499 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
1500 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
1501 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
1502 - FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
1503 + FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
1504 FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
1505 FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT;
1506 req->in.h.opcode = FUSE_INIT;
1507 diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h
1508 index e0ee0b3000b2..358a4db72a27 100644
1509 --- a/include/linux/devpts_fs.h
1510 +++ b/include/linux/devpts_fs.h
1511 @@ -15,38 +15,24 @@
1512
1513 #include <linux/errno.h>
1514
1515 +struct pts_fs_info;
1516 +
1517 #ifdef CONFIG_UNIX98_PTYS
1518
1519 -int devpts_new_index(struct inode *ptmx_inode);
1520 -void devpts_kill_index(struct inode *ptmx_inode, int idx);
1521 -void devpts_add_ref(struct inode *ptmx_inode);
1522 -void devpts_del_ref(struct inode *ptmx_inode);
1523 +/* Look up a pts fs info and get a ref to it */
1524 +struct pts_fs_info *devpts_get_ref(struct inode *, struct file *);
1525 +void devpts_put_ref(struct pts_fs_info *);
1526 +
1527 +int devpts_new_index(struct pts_fs_info *);
1528 +void devpts_kill_index(struct pts_fs_info *, int);
1529 +
1530 /* mknod in devpts */
1531 -struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
1532 - void *priv);
1533 +struct inode *devpts_pty_new(struct pts_fs_info *, dev_t, int, void *);
1534 /* get private structure */
1535 void *devpts_get_priv(struct inode *pts_inode);
1536 /* unlink */
1537 void devpts_pty_kill(struct inode *inode);
1538
1539 -#else
1540 -
1541 -/* Dummy stubs in the no-pty case */
1542 -static inline int devpts_new_index(struct inode *ptmx_inode) { return -EINVAL; }
1543 -static inline void devpts_kill_index(struct inode *ptmx_inode, int idx) { }
1544 -static inline void devpts_add_ref(struct inode *ptmx_inode) { }
1545 -static inline void devpts_del_ref(struct inode *ptmx_inode) { }
1546 -static inline struct inode *devpts_pty_new(struct inode *ptmx_inode,
1547 - dev_t device, int index, void *priv)
1548 -{
1549 - return ERR_PTR(-EINVAL);
1550 -}
1551 -static inline void *devpts_get_priv(struct inode *pts_inode)
1552 -{
1553 - return NULL;
1554 -}
1555 -static inline void devpts_pty_kill(struct inode *inode) { }
1556 -
1557 #endif
1558
1559
1560 diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
1561 index cd0e2413c358..435fd8426b8a 100644
1562 --- a/include/linux/memcontrol.h
1563 +++ b/include/linux/memcontrol.h
1564 @@ -174,6 +174,11 @@ struct mem_cgroup_thresholds {
1565 struct mem_cgroup_threshold_ary *spare;
1566 };
1567
1568 +struct mem_cgroup_id {
1569 + int id;
1570 + atomic_t ref;
1571 +};
1572 +
1573 /*
1574 * The memory controller data structure. The memory controller controls both
1575 * page cache and RSS per cgroup. We would eventually like to provide
1576 @@ -183,6 +188,9 @@ struct mem_cgroup_thresholds {
1577 struct mem_cgroup {
1578 struct cgroup_subsys_state css;
1579
1580 + /* Private memcg ID. Used to ID objects that outlive the cgroup */
1581 + struct mem_cgroup_id id;
1582 +
1583 /* Accounted resources */
1584 struct page_counter memory;
1585 struct page_counter memsw;
1586 diff --git a/ipc/msg.c b/ipc/msg.c
1587 index 1471db9a7e61..c6521c205cb4 100644
1588 --- a/ipc/msg.c
1589 +++ b/ipc/msg.c
1590 @@ -680,7 +680,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
1591 rcu_read_lock();
1592 ipc_lock_object(&msq->q_perm);
1593
1594 - ipc_rcu_putref(msq, ipc_rcu_free);
1595 + ipc_rcu_putref(msq, msg_rcu_free);
1596 /* raced with RMID? */
1597 if (!ipc_valid_object(&msq->q_perm)) {
1598 err = -EIDRM;
1599 diff --git a/ipc/sem.c b/ipc/sem.c
1600 index b471e5a3863d..20d07008ad5e 100644
1601 --- a/ipc/sem.c
1602 +++ b/ipc/sem.c
1603 @@ -442,7 +442,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
1604 static inline void sem_lock_and_putref(struct sem_array *sma)
1605 {
1606 sem_lock(sma, NULL, -1);
1607 - ipc_rcu_putref(sma, ipc_rcu_free);
1608 + ipc_rcu_putref(sma, sem_rcu_free);
1609 }
1610
1611 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
1612 @@ -1385,7 +1385,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1613 rcu_read_unlock();
1614 sem_io = ipc_alloc(sizeof(ushort)*nsems);
1615 if (sem_io == NULL) {
1616 - ipc_rcu_putref(sma, ipc_rcu_free);
1617 + ipc_rcu_putref(sma, sem_rcu_free);
1618 return -ENOMEM;
1619 }
1620
1621 @@ -1419,20 +1419,20 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1622 if (nsems > SEMMSL_FAST) {
1623 sem_io = ipc_alloc(sizeof(ushort)*nsems);
1624 if (sem_io == NULL) {
1625 - ipc_rcu_putref(sma, ipc_rcu_free);
1626 + ipc_rcu_putref(sma, sem_rcu_free);
1627 return -ENOMEM;
1628 }
1629 }
1630
1631 if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
1632 - ipc_rcu_putref(sma, ipc_rcu_free);
1633 + ipc_rcu_putref(sma, sem_rcu_free);
1634 err = -EFAULT;
1635 goto out_free;
1636 }
1637
1638 for (i = 0; i < nsems; i++) {
1639 if (sem_io[i] > SEMVMX) {
1640 - ipc_rcu_putref(sma, ipc_rcu_free);
1641 + ipc_rcu_putref(sma, sem_rcu_free);
1642 err = -ERANGE;
1643 goto out_free;
1644 }
1645 @@ -1722,7 +1722,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
1646 /* step 2: allocate new undo structure */
1647 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
1648 if (!new) {
1649 - ipc_rcu_putref(sma, ipc_rcu_free);
1650 + ipc_rcu_putref(sma, sem_rcu_free);
1651 return ERR_PTR(-ENOMEM);
1652 }
1653
1654 diff --git a/mm/memcontrol.c b/mm/memcontrol.c
1655 index 67648e6b2ac8..6b90d184e9c0 100644
1656 --- a/mm/memcontrol.c
1657 +++ b/mm/memcontrol.c
1658 @@ -272,21 +272,7 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
1659
1660 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
1661 {
1662 - return memcg->css.id;
1663 -}
1664 -
1665 -/*
1666 - * A helper function to get mem_cgroup from ID. must be called under
1667 - * rcu_read_lock(). The caller is responsible for calling
1668 - * css_tryget_online() if the mem_cgroup is used for charging. (dropping
1669 - * refcnt from swap can be called against removed memcg.)
1670 - */
1671 -static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
1672 -{
1673 - struct cgroup_subsys_state *css;
1674 -
1675 - css = css_from_id(id, &memory_cgrp_subsys);
1676 - return mem_cgroup_from_css(css);
1677 + return memcg->id.id;
1678 }
1679
1680 /* Writing them here to avoid exposing memcg's inner layout */
1681 @@ -4124,6 +4110,88 @@ static struct cftype mem_cgroup_legacy_files[] = {
1682 { }, /* terminate */
1683 };
1684
1685 +/*
1686 + * Private memory cgroup IDR
1687 + *
1688 + * Swap-out records and page cache shadow entries need to store memcg
1689 + * references in constrained space, so we maintain an ID space that is
1690 + * limited to 16 bit (MEM_CGROUP_ID_MAX), limiting the total number of
1691 + * memory-controlled cgroups to 64k.
1692 + *
1693 + * However, there usually are many references to the oflline CSS after
1694 + * the cgroup has been destroyed, such as page cache or reclaimable
1695 + * slab objects, that don't need to hang on to the ID. We want to keep
1696 + * those dead CSS from occupying IDs, or we might quickly exhaust the
1697 + * relatively small ID space and prevent the creation of new cgroups
1698 + * even when there are much fewer than 64k cgroups - possibly none.
1699 + *
1700 + * Maintain a private 16-bit ID space for memcg, and allow the ID to
1701 + * be freed and recycled when it's no longer needed, which is usually
1702 + * when the CSS is offlined.
1703 + *
1704 + * The only exception to that are records of swapped out tmpfs/shmem
1705 + * pages that need to be attributed to live ancestors on swapin. But
1706 + * those references are manageable from userspace.
1707 + */
1708 +
1709 +static DEFINE_IDR(mem_cgroup_idr);
1710 +
1711 +static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
1712 +{
1713 + atomic_add(n, &memcg->id.ref);
1714 +}
1715 +
1716 +static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
1717 +{
1718 + while (!atomic_inc_not_zero(&memcg->id.ref)) {
1719 + /*
1720 + * The root cgroup cannot be destroyed, so it's refcount must
1721 + * always be >= 1.
1722 + */
1723 + if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
1724 + VM_BUG_ON(1);
1725 + break;
1726 + }
1727 + memcg = parent_mem_cgroup(memcg);
1728 + if (!memcg)
1729 + memcg = root_mem_cgroup;
1730 + }
1731 + return memcg;
1732 +}
1733 +
1734 +static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
1735 +{
1736 + if (atomic_sub_and_test(n, &memcg->id.ref)) {
1737 + idr_remove(&mem_cgroup_idr, memcg->id.id);
1738 + memcg->id.id = 0;
1739 +
1740 + /* Memcg ID pins CSS */
1741 + css_put(&memcg->css);
1742 + }
1743 +}
1744 +
1745 +static inline void mem_cgroup_id_get(struct mem_cgroup *memcg)
1746 +{
1747 + mem_cgroup_id_get_many(memcg, 1);
1748 +}
1749 +
1750 +static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
1751 +{
1752 + mem_cgroup_id_put_many(memcg, 1);
1753 +}
1754 +
1755 +/**
1756 + * mem_cgroup_from_id - look up a memcg from a memcg id
1757 + * @id: the memcg id to look up
1758 + *
1759 + * Caller must hold rcu_read_lock().
1760 + */
1761 +struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
1762 +{
1763 + WARN_ON_ONCE(!rcu_read_lock_held());
1764 + return idr_find(&mem_cgroup_idr, id);
1765 +}
1766 +
1767 static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
1768 {
1769 struct mem_cgroup_per_node *pn;
1770 @@ -4178,6 +4246,12 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
1771 if (memcg_wb_domain_init(memcg, GFP_KERNEL))
1772 goto out_free_stat;
1773
1774 + memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL,
1775 + 1, MEM_CGROUP_ID_MAX,
1776 + GFP_KERNEL);
1777 + if (memcg->id.id < 0)
1778 + goto out_free_stat;
1779 +
1780 return memcg;
1781
1782 out_free_stat:
1783 @@ -4263,9 +4337,11 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
1784 #ifdef CONFIG_CGROUP_WRITEBACK
1785 INIT_LIST_HEAD(&memcg->cgwb_list);
1786 #endif
1787 + idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
1788 return &memcg->css;
1789
1790 free_out:
1791 + idr_remove(&mem_cgroup_idr, memcg->id.id);
1792 __mem_cgroup_free(memcg);
1793 return ERR_PTR(error);
1794 }
1795 @@ -4277,8 +4353,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
1796 struct mem_cgroup *parent = mem_cgroup_from_css(css->parent);
1797 int ret;
1798
1799 - if (css->id > MEM_CGROUP_ID_MAX)
1800 - return -ENOSPC;
1801 + /* Online state pins memcg ID, memcg ID pins CSS */
1802 + mem_cgroup_id_get(mem_cgroup_from_css(css));
1803 + css_get(css);
1804
1805 if (!parent)
1806 return 0;
1807 @@ -4352,6 +4429,8 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
1808 memcg_deactivate_kmem(memcg);
1809
1810 wb_memcg_offline(memcg);
1811 +
1812 + mem_cgroup_id_put(memcg);
1813 }
1814
1815 static void mem_cgroup_css_released(struct cgroup_subsys_state *css)
1816 @@ -4785,6 +4864,8 @@ static void __mem_cgroup_clear_mc(void)
1817 if (!mem_cgroup_is_root(mc.from))
1818 page_counter_uncharge(&mc.from->memsw, mc.moved_swap);
1819
1820 + mem_cgroup_id_put_many(mc.from, mc.moved_swap);
1821 +
1822 /*
1823 * we charged both to->memory and to->memsw, so we
1824 * should uncharge to->memory.
1825 @@ -4792,9 +4873,9 @@ static void __mem_cgroup_clear_mc(void)
1826 if (!mem_cgroup_is_root(mc.to))
1827 page_counter_uncharge(&mc.to->memory, mc.moved_swap);
1828
1829 - css_put_many(&mc.from->css, mc.moved_swap);
1830 + mem_cgroup_id_get_many(mc.to, mc.moved_swap);
1831 + css_put_many(&mc.to->css, mc.moved_swap);
1832
1833 - /* we've already done css_get(mc.to) */
1834 mc.moved_swap = 0;
1835 }
1836 memcg_oom_recover(from);
1837 @@ -5670,7 +5751,7 @@ subsys_initcall(mem_cgroup_init);
1838 */
1839 void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
1840 {
1841 - struct mem_cgroup *memcg;
1842 + struct mem_cgroup *memcg, *swap_memcg;
1843 unsigned short oldid;
1844
1845 VM_BUG_ON_PAGE(PageLRU(page), page);
1846 @@ -5685,15 +5766,27 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
1847 if (!memcg)
1848 return;
1849
1850 - oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
1851 + /*
1852 + * In case the memcg owning these pages has been offlined and doesn't
1853 + * have an ID allocated to it anymore, charge the closest online
1854 + * ancestor for the swap instead and transfer the memory+swap charge.
1855 + */
1856 + swap_memcg = mem_cgroup_id_get_online(memcg);
1857 + oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
1858 VM_BUG_ON_PAGE(oldid, page);
1859 - mem_cgroup_swap_statistics(memcg, true);
1860 + mem_cgroup_swap_statistics(swap_memcg, true);
1861
1862 page->mem_cgroup = NULL;
1863
1864 if (!mem_cgroup_is_root(memcg))
1865 page_counter_uncharge(&memcg->memory, 1);
1866
1867 + if (memcg != swap_memcg) {
1868 + if (!mem_cgroup_is_root(swap_memcg))
1869 + page_counter_charge(&swap_memcg->memsw, 1);
1870 + page_counter_uncharge(&memcg->memsw, 1);
1871 + }
1872 +
1873 /*
1874 * Interrupts should be disabled here because the caller holds the
1875 * mapping->tree_lock lock which is taken with interrupts-off. It is
1876 @@ -5703,6 +5796,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
1877 VM_BUG_ON(!irqs_disabled());
1878 mem_cgroup_charge_statistics(memcg, page, -1);
1879 memcg_check_events(memcg, page);
1880 +
1881 + if (!mem_cgroup_is_root(memcg))
1882 + css_put(&memcg->css);
1883 }
1884
1885 /**
1886 @@ -5726,7 +5822,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
1887 if (!mem_cgroup_is_root(memcg))
1888 page_counter_uncharge(&memcg->memsw, 1);
1889 mem_cgroup_swap_statistics(memcg, false);
1890 - css_put(&memcg->css);
1891 + mem_cgroup_id_put(memcg);
1892 }
1893 rcu_read_unlock();
1894 }
1895 diff --git a/mm/slab_common.c b/mm/slab_common.c
1896 index 3c6a86b4ec25..bec2fce9fafc 100644
1897 --- a/mm/slab_common.c
1898 +++ b/mm/slab_common.c
1899 @@ -521,8 +521,8 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
1900 goto out_unlock;
1901
1902 cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
1903 - cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
1904 - css->id, memcg_name_buf);
1905 + cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name,
1906 + css->serial_nr, memcg_name_buf);
1907 if (!cache_name)
1908 goto out_unlock;
1909
1910 diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
1911 index 2b68418c7198..ffe95d954007 100644
1912 --- a/net/ipv4/fib_semantics.c
1913 +++ b/net/ipv4/fib_semantics.c
1914 @@ -479,6 +479,9 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
1915 if (!rtnh_ok(rtnh, remaining))
1916 return -EINVAL;
1917
1918 + if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
1919 + return -EINVAL;
1920 +
1921 nexthop_nh->nh_flags =
1922 (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
1923 nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
1924 @@ -1003,6 +1006,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
1925 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
1926 goto err_inval;
1927
1928 + if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
1929 + goto err_inval;
1930 +
1931 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1932 if (cfg->fc_mp) {
1933 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
1934 diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
1935 index d4c51158470f..12b98e257c5f 100644
1936 --- a/net/ipv4/tcp_input.c
1937 +++ b/net/ipv4/tcp_input.c
1938 @@ -89,7 +89,7 @@ int sysctl_tcp_adv_win_scale __read_mostly = 1;
1939 EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
1940
1941 /* rfc5961 challenge ack rate limiting */
1942 -int sysctl_tcp_challenge_ack_limit = 100;
1943 +int sysctl_tcp_challenge_ack_limit = 1000;
1944
1945 int sysctl_tcp_stdurg __read_mostly;
1946 int sysctl_tcp_rfc1337 __read_mostly;
1947 @@ -3390,6 +3390,23 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
1948 return flag;
1949 }
1950
1951 +static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
1952 + u32 *last_oow_ack_time)
1953 +{
1954 + if (*last_oow_ack_time) {
1955 + s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
1956 +
1957 + if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
1958 + NET_INC_STATS_BH(net, mib_idx);
1959 + return true; /* rate-limited: don't send yet! */
1960 + }
1961 + }
1962 +
1963 + *last_oow_ack_time = tcp_time_stamp;
1964 +
1965 + return false; /* not rate-limited: go ahead, send dupack now! */
1966 +}
1967 +
1968 /* Return true if we're currently rate-limiting out-of-window ACKs and
1969 * thus shouldn't send a dupack right now. We rate-limit dupacks in
1970 * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
1971 @@ -3403,21 +3420,9 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
1972 /* Data packets without SYNs are not likely part of an ACK loop. */
1973 if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
1974 !tcp_hdr(skb)->syn)
1975 - goto not_rate_limited;
1976 -
1977 - if (*last_oow_ack_time) {
1978 - s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
1979 -
1980 - if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
1981 - NET_INC_STATS_BH(net, mib_idx);
1982 - return true; /* rate-limited: don't send yet! */
1983 - }
1984 - }
1985 -
1986 - *last_oow_ack_time = tcp_time_stamp;
1987 + return false;
1988
1989 -not_rate_limited:
1990 - return false; /* not rate-limited: go ahead, send dupack now! */
1991 + return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
1992 }
1993
1994 /* RFC 5961 7 [ACK Throttling] */
1995 @@ -3427,21 +3432,26 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
1996 static u32 challenge_timestamp;
1997 static unsigned int challenge_count;
1998 struct tcp_sock *tp = tcp_sk(sk);
1999 - u32 now;
2000 + u32 count, now;
2001
2002 /* First check our per-socket dupack rate limit. */
2003 - if (tcp_oow_rate_limited(sock_net(sk), skb,
2004 - LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
2005 - &tp->last_oow_ack_time))
2006 + if (__tcp_oow_rate_limited(sock_net(sk),
2007 + LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
2008 + &tp->last_oow_ack_time))
2009 return;
2010
2011 - /* Then check the check host-wide RFC 5961 rate limit. */
2012 + /* Then check host-wide RFC 5961 rate limit. */
2013 now = jiffies / HZ;
2014 if (now != challenge_timestamp) {
2015 + u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
2016 +
2017 challenge_timestamp = now;
2018 - challenge_count = 0;
2019 + WRITE_ONCE(challenge_count, half +
2020 + prandom_u32_max(sysctl_tcp_challenge_ack_limit));
2021 }
2022 - if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
2023 + count = READ_ONCE(challenge_count);
2024 + if (count > 0) {
2025 + WRITE_ONCE(challenge_count, count - 1);
2026 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
2027 tcp_send_ack(sk);
2028 }
2029 diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
2030 index 7c9883ab56e5..660c967ba84a 100644
2031 --- a/net/ipv4/tcp_output.c
2032 +++ b/net/ipv4/tcp_output.c
2033 @@ -239,7 +239,8 @@ void tcp_select_initial_window(int __space, __u32 mss,
2034 /* Set window scaling on max possible window
2035 * See RFC1323 for an explanation of the limit to 14
2036 */
2037 - space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
2038 + space = max_t(u32, space, sysctl_tcp_rmem[2]);
2039 + space = max_t(u32, space, sysctl_rmem_max);
2040 space = min_t(u32, space, *window_clamp);
2041 while (space > 65535 && (*rcv_wscale) < 14) {
2042 space >>= 1;
2043 diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
2044 index 923abd6b3064..8d2f7c9b491d 100644
2045 --- a/net/irda/af_irda.c
2046 +++ b/net/irda/af_irda.c
2047 @@ -1024,8 +1024,11 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
2048 }
2049
2050 /* Check if we have opened a local TSAP */
2051 - if (!self->tsap)
2052 - irda_open_tsap(self, LSAP_ANY, addr->sir_name);
2053 + if (!self->tsap) {
2054 + err = irda_open_tsap(self, LSAP_ANY, addr->sir_name);
2055 + if (err)
2056 + goto out;
2057 + }
2058
2059 /* Move to connecting socket, start sending Connect Requests */
2060 sock->state = SS_CONNECTING;
2061 diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
2062 index ad4fa49ad1db..9068369f8a1b 100644
2063 --- a/security/apparmor/apparmorfs.c
2064 +++ b/security/apparmor/apparmorfs.c
2065 @@ -331,6 +331,7 @@ static int aa_fs_seq_hash_show(struct seq_file *seq, void *v)
2066 seq_printf(seq, "%.2x", profile->hash[i]);
2067 seq_puts(seq, "\n");
2068 }
2069 + aa_put_profile(profile);
2070
2071 return 0;
2072 }