Magellan Linux

Annotation of /trunk/kernel-alx/patches-4.4/0117-4.4.18-all-fixes.patch

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2826 - (hide annotations) (download)
Tue Sep 13 07:18:21 2016 UTC (7 years, 8 months ago) by niro
File size: 64900 byte(s)
-linux-4.4.18
1 niro 2826 diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt
2     index 54944c71b819..2a4ee6302122 100644
3     --- a/Documentation/x86/pat.txt
4     +++ b/Documentation/x86/pat.txt
5     @@ -196,3 +196,35 @@ Another, more verbose way of getting PAT related debug messages is with
6     "debugpat" boot parameter. With this parameter, various debug messages are
7     printed to dmesg log.
8    
9     +PAT Initialization
10     +------------------
11     +
12     +The following table describes how PAT is initialized under various
13     +configurations. The PAT MSR must be updated by Linux in order to support WC
14     +and WT attributes. Otherwise, the PAT MSR has the value programmed in it
15     +by the firmware. Note, Xen enables WC attribute in the PAT MSR for guests.
16     +
17     + MTRR PAT Call Sequence PAT State PAT MSR
18     + =========================================================
19     + E E MTRR -> PAT init Enabled OS
20     + E D MTRR -> PAT init Disabled -
21     + D E MTRR -> PAT disable Disabled BIOS
22     + D D MTRR -> PAT disable Disabled -
23     + - np/E PAT -> PAT disable Disabled BIOS
24     + - np/D PAT -> PAT disable Disabled -
25     + E !P/E MTRR -> PAT init Disabled BIOS
26     + D !P/E MTRR -> PAT disable Disabled BIOS
27     + !M !P/E MTRR stub -> PAT disable Disabled BIOS
28     +
29     + Legend
30     + ------------------------------------------------
31     + E Feature enabled in CPU
32     + D Feature disabled/unsupported in CPU
33     + np "nopat" boot option specified
34     + !P CONFIG_X86_PAT option unset
35     + !M CONFIG_MTRR option unset
36     + Enabled PAT state set to enabled
37     + Disabled PAT state set to disabled
38     + OS PAT initializes PAT MSR with OS setting
39     + BIOS PAT keeps PAT MSR with BIOS setting
40     +
41     diff --git a/Makefile b/Makefile
42     index 76d34f763a41..eaedea88a8a7 100644
43     --- a/Makefile
44     +++ b/Makefile
45     @@ -1,6 +1,6 @@
46     VERSION = 4
47     PATCHLEVEL = 4
48     -SUBLEVEL = 17
49     +SUBLEVEL = 18
50     EXTRAVERSION =
51     NAME = Blurry Fish Butt
52    
53     diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
54     index 087acb569b63..5f221acd21ae 100644
55     --- a/arch/arm/kernel/sys_oabi-compat.c
56     +++ b/arch/arm/kernel/sys_oabi-compat.c
57     @@ -279,8 +279,12 @@ asmlinkage long sys_oabi_epoll_wait(int epfd,
58     mm_segment_t fs;
59     long ret, err, i;
60    
61     - if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event)))
62     + if (maxevents <= 0 ||
63     + maxevents > (INT_MAX/sizeof(*kbuf)) ||
64     + maxevents > (INT_MAX/sizeof(*events)))
65     return -EINVAL;
66     + if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents))
67     + return -EFAULT;
68     kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL);
69     if (!kbuf)
70     return -ENOMEM;
71     @@ -317,6 +321,8 @@ asmlinkage long sys_oabi_semtimedop(int semid,
72    
73     if (nsops < 1 || nsops > SEMOPM)
74     return -EINVAL;
75     + if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops))
76     + return -EFAULT;
77     sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
78     if (!sops)
79     return -ENOMEM;
80     diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
81     index 5a69eb48d0a8..ee93d5fe61d7 100644
82     --- a/arch/mips/kernel/scall64-n32.S
83     +++ b/arch/mips/kernel/scall64-n32.S
84     @@ -344,7 +344,7 @@ EXPORT(sysn32_call_table)
85     PTR sys_ni_syscall /* available, was setaltroot */
86     PTR sys_add_key
87     PTR sys_request_key
88     - PTR sys_keyctl /* 6245 */
89     + PTR compat_sys_keyctl /* 6245 */
90     PTR sys_set_thread_area
91     PTR sys_inotify_init
92     PTR sys_inotify_add_watch
93     diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
94     index e4b6d7c97822..b77052ec6fb2 100644
95     --- a/arch/mips/kernel/scall64-o32.S
96     +++ b/arch/mips/kernel/scall64-o32.S
97     @@ -500,7 +500,7 @@ EXPORT(sys32_call_table)
98     PTR sys_ni_syscall /* available, was setaltroot */
99     PTR sys_add_key /* 4280 */
100     PTR sys_request_key
101     - PTR sys_keyctl
102     + PTR compat_sys_keyctl
103     PTR sys_set_thread_area
104     PTR sys_inotify_init
105     PTR sys_inotify_add_watch /* 4285 */
106     diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
107     index b1f0a90f933b..42570d8fb265 100644
108     --- a/arch/s390/kernel/ipl.c
109     +++ b/arch/s390/kernel/ipl.c
110     @@ -2070,13 +2070,6 @@ void s390_reset_system(void (*fn_pre)(void),
111     S390_lowcore.program_new_psw.addr =
112     PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
113    
114     - /*
115     - * Clear subchannel ID and number to signal new kernel that no CCW or
116     - * SCSI IPL has been done (for kexec and kdump)
117     - */
118     - S390_lowcore.subchannel_id = 0;
119     - S390_lowcore.subchannel_nr = 0;
120     -
121     /* Store status at absolute zero */
122     store_status();
123    
124     diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
125     index f17705e1332c..e62f4401e792 100644
126     --- a/arch/x86/entry/syscalls/syscall_32.tbl
127     +++ b/arch/x86/entry/syscalls/syscall_32.tbl
128     @@ -294,7 +294,7 @@
129     # 285 sys_setaltroot
130     286 i386 add_key sys_add_key
131     287 i386 request_key sys_request_key
132     -288 i386 keyctl sys_keyctl
133     +288 i386 keyctl sys_keyctl compat_sys_keyctl
134     289 i386 ioprio_set sys_ioprio_set
135     290 i386 ioprio_get sys_ioprio_get
136     291 i386 inotify_init sys_inotify_init
137     diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
138     index b94f6f64e23d..dbff1456d215 100644
139     --- a/arch/x86/include/asm/mtrr.h
140     +++ b/arch/x86/include/asm/mtrr.h
141     @@ -24,6 +24,7 @@
142     #define _ASM_X86_MTRR_H
143    
144     #include <uapi/asm/mtrr.h>
145     +#include <asm/pat.h>
146    
147    
148     /*
149     @@ -83,9 +84,12 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
150     static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
151     {
152     }
153     +static inline void mtrr_bp_init(void)
154     +{
155     + pat_disable("MTRRs disabled, skipping PAT initialization too.");
156     +}
157    
158     #define mtrr_ap_init() do {} while (0)
159     -#define mtrr_bp_init() do {} while (0)
160     #define set_mtrr_aps_delayed_init() do {} while (0)
161     #define mtrr_aps_init() do {} while (0)
162     #define mtrr_bp_restore() do {} while (0)
163     diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
164     index ca6c228d5e62..0b1ff4c1c14e 100644
165     --- a/arch/x86/include/asm/pat.h
166     +++ b/arch/x86/include/asm/pat.h
167     @@ -5,8 +5,8 @@
168     #include <asm/pgtable_types.h>
169    
170     bool pat_enabled(void);
171     +void pat_disable(const char *reason);
172     extern void pat_init(void);
173     -void pat_init_cache_modes(u64);
174    
175     extern int reserve_memtype(u64 start, u64 end,
176     enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
177     diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
178     index 3b533cf37c74..b5624fafa44a 100644
179     --- a/arch/x86/kernel/cpu/mtrr/generic.c
180     +++ b/arch/x86/kernel/cpu/mtrr/generic.c
181     @@ -444,11 +444,24 @@ static void __init print_mtrr_state(void)
182     pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
183     }
184    
185     +/* PAT setup for BP. We need to go through sync steps here */
186     +void __init mtrr_bp_pat_init(void)
187     +{
188     + unsigned long flags;
189     +
190     + local_irq_save(flags);
191     + prepare_set();
192     +
193     + pat_init();
194     +
195     + post_set();
196     + local_irq_restore(flags);
197     +}
198     +
199     /* Grab all of the MTRR state for this CPU into *state */
200     bool __init get_mtrr_state(void)
201     {
202     struct mtrr_var_range *vrs;
203     - unsigned long flags;
204     unsigned lo, dummy;
205     unsigned int i;
206    
207     @@ -481,15 +494,6 @@ bool __init get_mtrr_state(void)
208    
209     mtrr_state_set = 1;
210    
211     - /* PAT setup for BP. We need to go through sync steps here */
212     - local_irq_save(flags);
213     - prepare_set();
214     -
215     - pat_init();
216     -
217     - post_set();
218     - local_irq_restore(flags);
219     -
220     return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED);
221     }
222    
223     diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
224     index f891b4750f04..fa77ac8291f0 100644
225     --- a/arch/x86/kernel/cpu/mtrr/main.c
226     +++ b/arch/x86/kernel/cpu/mtrr/main.c
227     @@ -752,6 +752,9 @@ void __init mtrr_bp_init(void)
228     /* BIOS may override */
229     __mtrr_enabled = get_mtrr_state();
230    
231     + if (mtrr_enabled())
232     + mtrr_bp_pat_init();
233     +
234     if (mtrr_cleanup(phys_addr)) {
235     changed_by_mtrr_cleanup = 1;
236     mtrr_if->set_all();
237     @@ -759,8 +762,16 @@ void __init mtrr_bp_init(void)
238     }
239     }
240    
241     - if (!mtrr_enabled())
242     + if (!mtrr_enabled()) {
243     pr_info("MTRR: Disabled\n");
244     +
245     + /*
246     + * PAT initialization relies on MTRR's rendezvous handler.
247     + * Skip PAT init until the handler can initialize both
248     + * features independently.
249     + */
250     + pat_disable("MTRRs disabled, skipping PAT initialization too.");
251     + }
252     }
253    
254     void mtrr_ap_init(void)
255     diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
256     index 951884dcc433..6c7ced07d16d 100644
257     --- a/arch/x86/kernel/cpu/mtrr/mtrr.h
258     +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
259     @@ -52,6 +52,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
260     void fill_mtrr_var_range(unsigned int index,
261     u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
262     bool get_mtrr_state(void);
263     +void mtrr_bp_pat_init(void);
264    
265     extern void set_mtrr_ops(const struct mtrr_ops *ops);
266    
267     diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
268     index 844b06d67df4..307f60ecfc6d 100644
269     --- a/arch/x86/mm/mmap.c
270     +++ b/arch/x86/mm/mmap.c
271     @@ -94,18 +94,6 @@ static unsigned long mmap_base(unsigned long rnd)
272     }
273    
274     /*
275     - * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64
276     - * does, but not when emulating X86_32
277     - */
278     -static unsigned long mmap_legacy_base(unsigned long rnd)
279     -{
280     - if (mmap_is_ia32())
281     - return TASK_UNMAPPED_BASE;
282     - else
283     - return TASK_UNMAPPED_BASE + rnd;
284     -}
285     -
286     -/*
287     * This function, called very early during the creation of a new
288     * process VM image, sets up which VM layout function to use:
289     */
290     @@ -116,7 +104,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
291     if (current->flags & PF_RANDOMIZE)
292     random_factor = arch_mmap_rnd();
293    
294     - mm->mmap_legacy_base = mmap_legacy_base(random_factor);
295     + mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor;
296    
297     if (mmap_is_legacy()) {
298     mm->mmap_base = mm->mmap_legacy_base;
299     diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
300     index 188e3e07eeeb..6ad687d104ca 100644
301     --- a/arch/x86/mm/pat.c
302     +++ b/arch/x86/mm/pat.c
303     @@ -39,11 +39,22 @@
304     static bool boot_cpu_done;
305    
306     static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT);
307     +static void init_cache_modes(void);
308    
309     -static inline void pat_disable(const char *reason)
310     +void pat_disable(const char *reason)
311     {
312     + if (!__pat_enabled)
313     + return;
314     +
315     + if (boot_cpu_done) {
316     + WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
317     + return;
318     + }
319     +
320     __pat_enabled = 0;
321     pr_info("x86/PAT: %s\n", reason);
322     +
323     + init_cache_modes();
324     }
325    
326     static int __init nopat(char *str)
327     @@ -180,7 +191,7 @@ static enum page_cache_mode pat_get_cache_mode(unsigned pat_val, char *msg)
328     * configuration.
329     * Using lower indices is preferred, so we start with highest index.
330     */
331     -void pat_init_cache_modes(u64 pat)
332     +static void __init_cache_modes(u64 pat)
333     {
334     enum page_cache_mode cache;
335     char pat_msg[33];
336     @@ -201,14 +212,11 @@ static void pat_bsp_init(u64 pat)
337     {
338     u64 tmp_pat;
339    
340     - if (!cpu_has_pat) {
341     + if (!boot_cpu_has(X86_FEATURE_PAT)) {
342     pat_disable("PAT not supported by CPU.");
343     return;
344     }
345    
346     - if (!pat_enabled())
347     - goto done;
348     -
349     rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
350     if (!tmp_pat) {
351     pat_disable("PAT MSR is 0, disabled.");
352     @@ -217,16 +225,12 @@ static void pat_bsp_init(u64 pat)
353    
354     wrmsrl(MSR_IA32_CR_PAT, pat);
355    
356     -done:
357     - pat_init_cache_modes(pat);
358     + __init_cache_modes(pat);
359     }
360    
361     static void pat_ap_init(u64 pat)
362     {
363     - if (!pat_enabled())
364     - return;
365     -
366     - if (!cpu_has_pat) {
367     + if (!boot_cpu_has(X86_FEATURE_PAT)) {
368     /*
369     * If this happens we are on a secondary CPU, but switched to
370     * PAT on the boot CPU. We have no way to undo PAT.
371     @@ -237,18 +241,32 @@ static void pat_ap_init(u64 pat)
372     wrmsrl(MSR_IA32_CR_PAT, pat);
373     }
374    
375     -void pat_init(void)
376     +static void init_cache_modes(void)
377     {
378     - u64 pat;
379     - struct cpuinfo_x86 *c = &boot_cpu_data;
380     + u64 pat = 0;
381     + static int init_cm_done;
382    
383     - if (!pat_enabled()) {
384     + if (init_cm_done)
385     + return;
386     +
387     + if (boot_cpu_has(X86_FEATURE_PAT)) {
388     + /*
389     + * CPU supports PAT. Set PAT table to be consistent with
390     + * PAT MSR. This case supports "nopat" boot option, and
391     + * virtual machine environments which support PAT without
392     + * MTRRs. In specific, Xen has unique setup to PAT MSR.
393     + *
394     + * If PAT MSR returns 0, it is considered invalid and emulates
395     + * as No PAT.
396     + */
397     + rdmsrl(MSR_IA32_CR_PAT, pat);
398     + }
399     +
400     + if (!pat) {
401     /*
402     * No PAT. Emulate the PAT table that corresponds to the two
403     - * cache bits, PWT (Write Through) and PCD (Cache Disable). This
404     - * setup is the same as the BIOS default setup when the system
405     - * has PAT but the "nopat" boot option has been specified. This
406     - * emulated PAT table is used when MSR_IA32_CR_PAT returns 0.
407     + * cache bits, PWT (Write Through) and PCD (Cache Disable).
408     + * This setup is also the same as the BIOS default setup.
409     *
410     * PTE encoding:
411     *
412     @@ -265,10 +283,36 @@ void pat_init(void)
413     */
414     pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
415     PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
416     + }
417     +
418     + __init_cache_modes(pat);
419     +
420     + init_cm_done = 1;
421     +}
422     +
423     +/**
424     + * pat_init - Initialize PAT MSR and PAT table
425     + *
426     + * This function initializes PAT MSR and PAT table with an OS-defined value
427     + * to enable additional cache attributes, WC and WT.
428     + *
429     + * This function must be called on all CPUs using the specific sequence of
430     + * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
431     + * procedure for PAT.
432     + */
433     +void pat_init(void)
434     +{
435     + u64 pat;
436     + struct cpuinfo_x86 *c = &boot_cpu_data;
437     +
438     + if (!pat_enabled()) {
439     + init_cache_modes();
440     + return;
441     + }
442    
443     - } else if ((c->x86_vendor == X86_VENDOR_INTEL) &&
444     - (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
445     - ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
446     + if ((c->x86_vendor == X86_VENDOR_INTEL) &&
447     + (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
448     + ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
449     /*
450     * PAT support with the lower four entries. Intel Pentium 2,
451     * 3, M, and 4 are affected by PAT errata, which makes the
452     @@ -733,25 +777,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
453     if (file->f_flags & O_DSYNC)
454     pcm = _PAGE_CACHE_MODE_UC_MINUS;
455    
456     -#ifdef CONFIG_X86_32
457     - /*
458     - * On the PPro and successors, the MTRRs are used to set
459     - * memory types for physical addresses outside main memory,
460     - * so blindly setting UC or PWT on those pages is wrong.
461     - * For Pentiums and earlier, the surround logic should disable
462     - * caching for the high addresses through the KEN pin, but
463     - * we maintain the tradition of paranoia in this code.
464     - */
465     - if (!pat_enabled() &&
466     - !(boot_cpu_has(X86_FEATURE_MTRR) ||
467     - boot_cpu_has(X86_FEATURE_K6_MTRR) ||
468     - boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
469     - boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) &&
470     - (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
471     - pcm = _PAGE_CACHE_MODE_UC;
472     - }
473     -#endif
474     -
475     *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
476     cachemode2protval(pcm));
477     return 1;
478     diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
479     index beab8c706ac9..ffa41591bff9 100644
480     --- a/arch/x86/xen/enlighten.c
481     +++ b/arch/x86/xen/enlighten.c
482     @@ -74,7 +74,6 @@
483     #include <asm/mach_traps.h>
484     #include <asm/mwait.h>
485     #include <asm/pci_x86.h>
486     -#include <asm/pat.h>
487     #include <asm/cpu.h>
488    
489     #ifdef CONFIG_ACPI
490     @@ -1519,7 +1518,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
491     {
492     struct physdev_set_iopl set_iopl;
493     unsigned long initrd_start = 0;
494     - u64 pat;
495     int rc;
496    
497     if (!xen_start_info)
498     @@ -1627,13 +1625,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
499     xen_start_info->nr_pages);
500     xen_reserve_special_pages();
501    
502     - /*
503     - * Modify the cache mode translation tables to match Xen's PAT
504     - * configuration.
505     - */
506     - rdmsrl(MSR_IA32_CR_PAT, pat);
507     - pat_init_cache_modes(pat);
508     -
509     /* keep using Xen gdt for now; no urgent need to change it */
510    
511     #ifdef CONFIG_X86_32
512     diff --git a/block/genhd.c b/block/genhd.c
513     index e5cafa51567c..d2a1d43bf9fa 100644
514     --- a/block/genhd.c
515     +++ b/block/genhd.c
516     @@ -831,6 +831,7 @@ static void disk_seqf_stop(struct seq_file *seqf, void *v)
517     if (iter) {
518     class_dev_iter_exit(iter);
519     kfree(iter);
520     + seqf->private = NULL;
521     }
522     }
523    
524     diff --git a/crypto/gcm.c b/crypto/gcm.c
525     index bec329b3de8d..d9ea5f9c0574 100644
526     --- a/crypto/gcm.c
527     +++ b/crypto/gcm.c
528     @@ -639,7 +639,9 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
529    
530     ghash_alg = crypto_find_alg(ghash_name, &crypto_ahash_type,
531     CRYPTO_ALG_TYPE_HASH,
532     - CRYPTO_ALG_TYPE_AHASH_MASK);
533     + CRYPTO_ALG_TYPE_AHASH_MASK |
534     + crypto_requires_sync(algt->type,
535     + algt->mask));
536     if (IS_ERR(ghash_alg))
537     return PTR_ERR(ghash_alg);
538    
539     diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
540     index ea5815c5e128..bc769c448d4a 100644
541     --- a/crypto/scatterwalk.c
542     +++ b/crypto/scatterwalk.c
543     @@ -72,7 +72,8 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out,
544    
545     void scatterwalk_done(struct scatter_walk *walk, int out, int more)
546     {
547     - if (!(scatterwalk_pagelen(walk) & (PAGE_SIZE - 1)) || !more)
548     + if (!more || walk->offset >= walk->sg->offset + walk->sg->length ||
549     + !(walk->offset & (PAGE_SIZE - 1)))
550     scatterwalk_pagedone(walk, out, more);
551     }
552     EXPORT_SYMBOL_GPL(scatterwalk_done);
553     diff --git a/drivers/char/random.c b/drivers/char/random.c
554     index d0da5d852d41..0227b0465b40 100644
555     --- a/drivers/char/random.c
556     +++ b/drivers/char/random.c
557     @@ -722,15 +722,18 @@ retry:
558     }
559     }
560    
561     -static void credit_entropy_bits_safe(struct entropy_store *r, int nbits)
562     +static int credit_entropy_bits_safe(struct entropy_store *r, int nbits)
563     {
564     const int nbits_max = (int)(~0U >> (ENTROPY_SHIFT + 1));
565    
566     + if (nbits < 0)
567     + return -EINVAL;
568     +
569     /* Cap the value to avoid overflows */
570     nbits = min(nbits, nbits_max);
571     - nbits = max(nbits, -nbits_max);
572    
573     credit_entropy_bits(r, nbits);
574     + return 0;
575     }
576    
577     /*********************************************************************
578     @@ -1542,8 +1545,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
579     return -EPERM;
580     if (get_user(ent_count, p))
581     return -EFAULT;
582     - credit_entropy_bits_safe(&input_pool, ent_count);
583     - return 0;
584     + return credit_entropy_bits_safe(&input_pool, ent_count);
585     case RNDADDENTROPY:
586     if (!capable(CAP_SYS_ADMIN))
587     return -EPERM;
588     @@ -1557,8 +1559,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
589     size);
590     if (retval < 0)
591     return retval;
592     - credit_entropy_bits_safe(&input_pool, ent_count);
593     - return 0;
594     + return credit_entropy_bits_safe(&input_pool, ent_count);
595     case RNDZAPENTCNT:
596     case RNDCLEARPOOL:
597     /*
598     diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
599     index 62284e45d531..eb434881ddbc 100644
600     --- a/drivers/gpu/drm/i915/intel_pm.c
601     +++ b/drivers/gpu/drm/i915/intel_pm.c
602     @@ -1789,16 +1789,20 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
603     const struct intel_plane_state *pstate,
604     uint32_t mem_value)
605     {
606     - int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
607     + /*
608     + * We treat the cursor plane as always-on for the purposes of watermark
609     + * calculation. Until we have two-stage watermark programming merged,
610     + * this is necessary to avoid flickering.
611     + */
612     + int cpp = 4;
613     + int width = pstate->visible ? pstate->base.crtc_w : 64;
614    
615     - if (!cstate->base.active || !pstate->visible)
616     + if (!cstate->base.active)
617     return 0;
618    
619     return ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
620     cstate->base.adjusted_mode.crtc_htotal,
621     - drm_rect_width(&pstate->dst),
622     - bpp,
623     - mem_value);
624     + width, cpp, mem_value);
625     }
626    
627     /* Only for WM_LP. */
628     diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
629     index 774cd2210566..21febbb0d84e 100644
630     --- a/drivers/hid/hid-sony.c
631     +++ b/drivers/hid/hid-sony.c
632     @@ -1418,8 +1418,10 @@ static int sixaxis_set_operational_usb(struct hid_device *hdev)
633     }
634    
635     ret = hid_hw_output_report(hdev, buf, 1);
636     - if (ret < 0)
637     - hid_err(hdev, "can't set operational mode: step 3\n");
638     + if (ret < 0) {
639     + hid_info(hdev, "can't set operational mode: step 3, ignoring\n");
640     + ret = 0;
641     + }
642    
643     out:
644     kfree(buf);
645     diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
646     index 27fa0cb09538..85f39cc3e276 100644
647     --- a/drivers/i2c/busses/i2c-i801.c
648     +++ b/drivers/i2c/busses/i2c-i801.c
649     @@ -244,6 +244,13 @@ struct i801_priv {
650     struct platform_device *mux_pdev;
651     #endif
652     struct platform_device *tco_pdev;
653     +
654     + /*
655     + * If set to true the host controller registers are reserved for
656     + * ACPI AML use. Protected by acpi_lock.
657     + */
658     + bool acpi_reserved;
659     + struct mutex acpi_lock;
660     };
661    
662     #define FEATURE_SMBUS_PEC (1 << 0)
663     @@ -714,9 +721,15 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
664     {
665     int hwpec;
666     int block = 0;
667     - int ret, xact = 0;
668     + int ret = 0, xact = 0;
669     struct i801_priv *priv = i2c_get_adapdata(adap);
670    
671     + mutex_lock(&priv->acpi_lock);
672     + if (priv->acpi_reserved) {
673     + mutex_unlock(&priv->acpi_lock);
674     + return -EBUSY;
675     + }
676     +
677     hwpec = (priv->features & FEATURE_SMBUS_PEC) && (flags & I2C_CLIENT_PEC)
678     && size != I2C_SMBUS_QUICK
679     && size != I2C_SMBUS_I2C_BLOCK_DATA;
680     @@ -773,7 +786,8 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
681     default:
682     dev_err(&priv->pci_dev->dev, "Unsupported transaction %d\n",
683     size);
684     - return -EOPNOTSUPP;
685     + ret = -EOPNOTSUPP;
686     + goto out;
687     }
688    
689     if (hwpec) /* enable/disable hardware PEC */
690     @@ -796,11 +810,11 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
691     ~(SMBAUXCTL_CRC | SMBAUXCTL_E32B), SMBAUXCTL(priv));
692    
693     if (block)
694     - return ret;
695     + goto out;
696     if (ret)
697     - return ret;
698     + goto out;
699     if ((read_write == I2C_SMBUS_WRITE) || (xact == I801_QUICK))
700     - return 0;
701     + goto out;
702    
703     switch (xact & 0x7f) {
704     case I801_BYTE: /* Result put in SMBHSTDAT0 */
705     @@ -812,7 +826,10 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
706     (inb_p(SMBHSTDAT1(priv)) << 8);
707     break;
708     }
709     - return 0;
710     +
711     +out:
712     + mutex_unlock(&priv->acpi_lock);
713     + return ret;
714     }
715    
716    
717     @@ -1249,6 +1266,72 @@ static void i801_add_tco(struct i801_priv *priv)
718     priv->tco_pdev = pdev;
719     }
720    
721     +#ifdef CONFIG_ACPI
722     +static acpi_status
723     +i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits,
724     + u64 *value, void *handler_context, void *region_context)
725     +{
726     + struct i801_priv *priv = handler_context;
727     + struct pci_dev *pdev = priv->pci_dev;
728     + acpi_status status;
729     +
730     + /*
731     + * Once BIOS AML code touches the OpRegion we warn and inhibit any
732     + * further access from the driver itself. This device is now owned
733     + * by the system firmware.
734     + */
735     + mutex_lock(&priv->acpi_lock);
736     +
737     + if (!priv->acpi_reserved) {
738     + priv->acpi_reserved = true;
739     +
740     + dev_warn(&pdev->dev, "BIOS is accessing SMBus registers\n");
741     + dev_warn(&pdev->dev, "Driver SMBus register access inhibited\n");
742     + }
743     +
744     + if ((function & ACPI_IO_MASK) == ACPI_READ)
745     + status = acpi_os_read_port(address, (u32 *)value, bits);
746     + else
747     + status = acpi_os_write_port(address, (u32)*value, bits);
748     +
749     + mutex_unlock(&priv->acpi_lock);
750     +
751     + return status;
752     +}
753     +
754     +static int i801_acpi_probe(struct i801_priv *priv)
755     +{
756     + struct acpi_device *adev;
757     + acpi_status status;
758     +
759     + adev = ACPI_COMPANION(&priv->pci_dev->dev);
760     + if (adev) {
761     + status = acpi_install_address_space_handler(adev->handle,
762     + ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler,
763     + NULL, priv);
764     + if (ACPI_SUCCESS(status))
765     + return 0;
766     + }
767     +
768     + return acpi_check_resource_conflict(&priv->pci_dev->resource[SMBBAR]);
769     +}
770     +
771     +static void i801_acpi_remove(struct i801_priv *priv)
772     +{
773     + struct acpi_device *adev;
774     +
775     + adev = ACPI_COMPANION(&priv->pci_dev->dev);
776     + if (!adev)
777     + return;
778     +
779     + acpi_remove_address_space_handler(adev->handle,
780     + ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler);
781     +}
782     +#else
783     +static inline int i801_acpi_probe(struct i801_priv *priv) { return 0; }
784     +static inline void i801_acpi_remove(struct i801_priv *priv) { }
785     +#endif
786     +
787     static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
788     {
789     unsigned char temp;
790     @@ -1266,6 +1349,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
791     priv->adapter.dev.parent = &dev->dev;
792     ACPI_COMPANION_SET(&priv->adapter.dev, ACPI_COMPANION(&dev->dev));
793     priv->adapter.retries = 3;
794     + mutex_init(&priv->acpi_lock);
795    
796     priv->pci_dev = dev;
797     switch (dev->device) {
798     @@ -1328,10 +1412,8 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
799     return -ENODEV;
800     }
801    
802     - err = acpi_check_resource_conflict(&dev->resource[SMBBAR]);
803     - if (err) {
804     + if (i801_acpi_probe(priv))
805     return -ENODEV;
806     - }
807    
808     err = pcim_iomap_regions(dev, 1 << SMBBAR,
809     dev_driver_string(&dev->dev));
810     @@ -1340,6 +1422,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
811     "Failed to request SMBus region 0x%lx-0x%Lx\n",
812     priv->smba,
813     (unsigned long long)pci_resource_end(dev, SMBBAR));
814     + i801_acpi_remove(priv);
815     return err;
816     }
817    
818     @@ -1404,6 +1487,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
819     err = i2c_add_adapter(&priv->adapter);
820     if (err) {
821     dev_err(&dev->dev, "Failed to add SMBus adapter\n");
822     + i801_acpi_remove(priv);
823     return err;
824     }
825    
826     @@ -1422,6 +1506,7 @@ static void i801_remove(struct pci_dev *dev)
827    
828     i801_del_mux(priv);
829     i2c_del_adapter(&priv->adapter);
830     + i801_acpi_remove(priv);
831     pci_write_config_byte(dev, SMBHSTCFG, priv->original_hstcfg);
832    
833     platform_device_unregister(priv->tco_pdev);
834     diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
835     index db760e84119f..b8df0f5e8c25 100644
836     --- a/drivers/net/bonding/bond_netlink.c
837     +++ b/drivers/net/bonding/bond_netlink.c
838     @@ -446,7 +446,11 @@ static int bond_newlink(struct net *src_net, struct net_device *bond_dev,
839     if (err < 0)
840     return err;
841    
842     - return register_netdevice(bond_dev);
843     + err = register_netdevice(bond_dev);
844     +
845     + netif_carrier_off(bond_dev);
846     +
847     + return err;
848     }
849    
850     static size_t bond_get_size(const struct net_device *bond_dev)
851     diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
852     index 28f7610b03fe..c32f5d32f811 100644
853     --- a/drivers/net/ethernet/broadcom/bgmac.c
854     +++ b/drivers/net/ethernet/broadcom/bgmac.c
855     @@ -219,7 +219,7 @@ err_dma:
856     dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
857     DMA_TO_DEVICE);
858    
859     - while (i > 0) {
860     + while (i-- > 0) {
861     int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
862     struct bgmac_slot_info *slot = &ring->slots[index];
863     u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
864     diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
865     index 3dd548ab8df1..40365cb1abe6 100644
866     --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
867     +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
868     @@ -794,13 +794,12 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
869     * in a bitmap and increasing the chain consumer only
870     * for the first successive completed entries.
871     */
872     - bitmap_set(p_spq->p_comp_bitmap, pos, SPQ_RING_SIZE);
873     + __set_bit(pos, p_spq->p_comp_bitmap);
874    
875     while (test_bit(p_spq->comp_bitmap_idx,
876     p_spq->p_comp_bitmap)) {
877     - bitmap_clear(p_spq->p_comp_bitmap,
878     - p_spq->comp_bitmap_idx,
879     - SPQ_RING_SIZE);
880     + __clear_bit(p_spq->comp_bitmap_idx,
881     + p_spq->p_comp_bitmap);
882     p_spq->comp_bitmap_idx++;
883     qed_chain_return_produced(&p_spq->chain);
884     }
885     diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
886     index a790d5f90b83..e0e94b855bbe 100644
887     --- a/drivers/net/usb/cdc_ncm.c
888     +++ b/drivers/net/usb/cdc_ncm.c
889     @@ -952,8 +952,6 @@ EXPORT_SYMBOL_GPL(cdc_ncm_select_altsetting);
890    
891     static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf)
892     {
893     - int ret;
894     -
895     /* MBIM backwards compatible function? */
896     if (cdc_ncm_select_altsetting(intf) != CDC_NCM_COMM_ALTSETTING_NCM)
897     return -ENODEV;
898     @@ -962,16 +960,7 @@ static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf)
899     * Additionally, generic NCM devices are assumed to accept arbitrarily
900     * placed NDP.
901     */
902     - ret = cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM, 0);
903     -
904     - /*
905     - * We should get an event when network connection is "connected" or
906     - * "disconnected". Set network connection in "disconnected" state
907     - * (carrier is OFF) during attach, so the IP network stack does not
908     - * start IPv6 negotiation and more.
909     - */
910     - usbnet_link_change(dev, 0, 0);
911     - return ret;
912     + return cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM, 0);
913     }
914    
915     static void cdc_ncm_align_tail(struct sk_buff *skb, size_t modulus, size_t remainder, size_t max)
916     @@ -1554,7 +1543,8 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
917    
918     static const struct driver_info cdc_ncm_info = {
919     .description = "CDC NCM",
920     - .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET,
921     + .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
922     + | FLAG_LINK_INTR,
923     .bind = cdc_ncm_bind,
924     .unbind = cdc_ncm_unbind,
925     .manage_power = usbnet_manage_power,
926     @@ -1567,7 +1557,7 @@ static const struct driver_info cdc_ncm_info = {
927     static const struct driver_info wwan_info = {
928     .description = "Mobile Broadband Network Device",
929     .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
930     - | FLAG_WWAN,
931     + | FLAG_LINK_INTR | FLAG_WWAN,
932     .bind = cdc_ncm_bind,
933     .unbind = cdc_ncm_unbind,
934     .manage_power = usbnet_manage_power,
935     @@ -1580,7 +1570,7 @@ static const struct driver_info wwan_info = {
936     static const struct driver_info wwan_noarp_info = {
937     .description = "Mobile Broadband Network Device (NO ARP)",
938     .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
939     - | FLAG_WWAN | FLAG_NOARP,
940     + | FLAG_LINK_INTR | FLAG_WWAN | FLAG_NOARP,
941     .bind = cdc_ncm_bind,
942     .unbind = cdc_ncm_unbind,
943     .manage_power = usbnet_manage_power,
944     diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c
945     index 943c1cb9566c..d28e3ab9479c 100644
946     --- a/drivers/pnp/quirks.c
947     +++ b/drivers/pnp/quirks.c
948     @@ -342,7 +342,9 @@ static void quirk_amd_mmconfig_area(struct pnp_dev *dev)
949     /* Device IDs of parts that have 32KB MCH space */
950     static const unsigned int mch_quirk_devices[] = {
951     0x0154, /* Ivy Bridge */
952     + 0x0a04, /* Haswell-ULT */
953     0x0c00, /* Haswell */
954     + 0x1604, /* Broadwell */
955     };
956    
957     static struct pci_dev *get_intel_host(void)
958     diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
959     index f7ae898833dd..7232d43e2207 100644
960     --- a/drivers/scsi/scsi_sysfs.c
961     +++ b/drivers/scsi/scsi_sysfs.c
962     @@ -1058,11 +1058,12 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
963     }
964    
965     error = scsi_dh_add_device(sdev);
966     - if (error) {
967     + if (error)
968     + /*
969     + * device_handler is optional, so any error can be ignored
970     + */
971     sdev_printk(KERN_INFO, sdev,
972     "failed to add device handler: %d\n", error);
973     - return error;
974     - }
975    
976     device_enable_async_suspend(&sdev->sdev_dev);
977     error = device_add(&sdev->sdev_dev);
978     diff --git a/drivers/staging/rdma/ipath/ipath_file_ops.c b/drivers/staging/rdma/ipath/ipath_file_ops.c
979     index 13c3cd11ab92..05d30f433b19 100644
980     --- a/drivers/staging/rdma/ipath/ipath_file_ops.c
981     +++ b/drivers/staging/rdma/ipath/ipath_file_ops.c
982     @@ -45,6 +45,8 @@
983     #include <linux/uio.h>
984     #include <asm/pgtable.h>
985    
986     +#include <rdma/ib.h>
987     +
988     #include "ipath_kernel.h"
989     #include "ipath_common.h"
990     #include "ipath_user_sdma.h"
991     @@ -2243,6 +2245,9 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
992     ssize_t ret = 0;
993     void *dest;
994    
995     + if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
996     + return -EACCES;
997     +
998     if (count < sizeof(cmd.type)) {
999     ret = -EINVAL;
1000     goto bail;
1001     diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
1002     index 7865228f664f..807d80145686 100644
1003     --- a/drivers/tty/pty.c
1004     +++ b/drivers/tty/pty.c
1005     @@ -679,14 +679,14 @@ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty)
1006     /* this is called once with whichever end is closed last */
1007     static void pty_unix98_shutdown(struct tty_struct *tty)
1008     {
1009     - struct inode *ptmx_inode;
1010     + struct pts_fs_info *fsi;
1011    
1012     if (tty->driver->subtype == PTY_TYPE_MASTER)
1013     - ptmx_inode = tty->driver_data;
1014     + fsi = tty->driver_data;
1015     else
1016     - ptmx_inode = tty->link->driver_data;
1017     - devpts_kill_index(ptmx_inode, tty->index);
1018     - devpts_del_ref(ptmx_inode);
1019     + fsi = tty->link->driver_data;
1020     + devpts_kill_index(fsi, tty->index);
1021     + devpts_put_ref(fsi);
1022     }
1023    
1024     static const struct tty_operations ptm_unix98_ops = {
1025     @@ -738,6 +738,7 @@ static const struct tty_operations pty_unix98_ops = {
1026    
1027     static int ptmx_open(struct inode *inode, struct file *filp)
1028     {
1029     + struct pts_fs_info *fsi;
1030     struct tty_struct *tty;
1031     struct inode *slave_inode;
1032     int retval;
1033     @@ -752,47 +753,41 @@ static int ptmx_open(struct inode *inode, struct file *filp)
1034     if (retval)
1035     return retval;
1036    
1037     + fsi = devpts_get_ref(inode, filp);
1038     + retval = -ENODEV;
1039     + if (!fsi)
1040     + goto out_free_file;
1041     +
1042     /* find a device that is not in use. */
1043     mutex_lock(&devpts_mutex);
1044     - index = devpts_new_index(inode);
1045     - if (index < 0) {
1046     - retval = index;
1047     - mutex_unlock(&devpts_mutex);
1048     - goto err_file;
1049     - }
1050     -
1051     + index = devpts_new_index(fsi);
1052     mutex_unlock(&devpts_mutex);
1053    
1054     - mutex_lock(&tty_mutex);
1055     - tty = tty_init_dev(ptm_driver, index);
1056     + retval = index;
1057     + if (index < 0)
1058     + goto out_put_ref;
1059    
1060     - if (IS_ERR(tty)) {
1061     - retval = PTR_ERR(tty);
1062     - goto out;
1063     - }
1064    
1065     + mutex_lock(&tty_mutex);
1066     + tty = tty_init_dev(ptm_driver, index);
1067     /* The tty returned here is locked so we can safely
1068     drop the mutex */
1069     mutex_unlock(&tty_mutex);
1070    
1071     - set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
1072     - tty->driver_data = inode;
1073     + retval = PTR_ERR(tty);
1074     + if (IS_ERR(tty))
1075     + goto out;
1076    
1077     /*
1078     - * In the case where all references to ptmx inode are dropped and we
1079     - * still have /dev/tty opened pointing to the master/slave pair (ptmx
1080     - * is closed/released before /dev/tty), we must make sure that the inode
1081     - * is still valid when we call the final pty_unix98_shutdown, thus we
1082     - * hold an additional reference to the ptmx inode. For the same /dev/tty
1083     - * last close case, we also need to make sure the super_block isn't
1084     - * destroyed (devpts instance unmounted), before /dev/tty is closed and
1085     - * on its release devpts_kill_index is called.
1086     + * From here on out, the tty is "live", and the index and
1087     + * fsi will be killed/put by the tty_release()
1088     */
1089     - devpts_add_ref(inode);
1090     + set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
1091     + tty->driver_data = fsi;
1092    
1093     tty_add_file(tty, filp);
1094    
1095     - slave_inode = devpts_pty_new(inode,
1096     + slave_inode = devpts_pty_new(fsi,
1097     MKDEV(UNIX98_PTY_SLAVE_MAJOR, index), index,
1098     tty->link);
1099     if (IS_ERR(slave_inode)) {
1100     @@ -811,12 +806,14 @@ static int ptmx_open(struct inode *inode, struct file *filp)
1101     return 0;
1102     err_release:
1103     tty_unlock(tty);
1104     + // This will also put-ref the fsi
1105     tty_release(inode, filp);
1106     return retval;
1107     out:
1108     - mutex_unlock(&tty_mutex);
1109     - devpts_kill_index(inode, index);
1110     -err_file:
1111     + devpts_kill_index(fsi, index);
1112     +out_put_ref:
1113     + devpts_put_ref(fsi);
1114     +out_free_file:
1115     tty_free_file(filp);
1116     return retval;
1117     }
1118     diff --git a/fs/dcache.c b/fs/dcache.c
1119     index 108d7d810be3..71b6056ad35d 100644
1120     --- a/fs/dcache.c
1121     +++ b/fs/dcache.c
1122     @@ -578,7 +578,6 @@ static struct dentry *dentry_kill(struct dentry *dentry)
1123    
1124     failed:
1125     spin_unlock(&dentry->d_lock);
1126     - cpu_relax();
1127     return dentry; /* try again with same dentry */
1128     }
1129    
1130     @@ -752,6 +751,8 @@ void dput(struct dentry *dentry)
1131     return;
1132    
1133     repeat:
1134     + might_sleep();
1135     +
1136     rcu_read_lock();
1137     if (likely(fast_dput(dentry))) {
1138     rcu_read_unlock();
1139     @@ -783,8 +784,10 @@ repeat:
1140    
1141     kill_it:
1142     dentry = dentry_kill(dentry);
1143     - if (dentry)
1144     + if (dentry) {
1145     + cond_resched();
1146     goto repeat;
1147     + }
1148     }
1149     EXPORT_SYMBOL(dput);
1150    
1151     diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
1152     index 706de324f2a6..c82edb049117 100644
1153     --- a/fs/devpts/inode.c
1154     +++ b/fs/devpts/inode.c
1155     @@ -128,6 +128,7 @@ static const match_table_t tokens = {
1156     struct pts_fs_info {
1157     struct ida allocated_ptys;
1158     struct pts_mount_opts mount_opts;
1159     + struct super_block *sb;
1160     struct dentry *ptmx_dentry;
1161     };
1162    
1163     @@ -358,7 +359,7 @@ static const struct super_operations devpts_sops = {
1164     .show_options = devpts_show_options,
1165     };
1166    
1167     -static void *new_pts_fs_info(void)
1168     +static void *new_pts_fs_info(struct super_block *sb)
1169     {
1170     struct pts_fs_info *fsi;
1171    
1172     @@ -369,6 +370,7 @@ static void *new_pts_fs_info(void)
1173     ida_init(&fsi->allocated_ptys);
1174     fsi->mount_opts.mode = DEVPTS_DEFAULT_MODE;
1175     fsi->mount_opts.ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
1176     + fsi->sb = sb;
1177    
1178     return fsi;
1179     }
1180     @@ -384,7 +386,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
1181     s->s_op = &devpts_sops;
1182     s->s_time_gran = 1;
1183    
1184     - s->s_fs_info = new_pts_fs_info();
1185     + s->s_fs_info = new_pts_fs_info(s);
1186     if (!s->s_fs_info)
1187     goto fail;
1188    
1189     @@ -524,17 +526,14 @@ static struct file_system_type devpts_fs_type = {
1190     * to the System V naming convention
1191     */
1192    
1193     -int devpts_new_index(struct inode *ptmx_inode)
1194     +int devpts_new_index(struct pts_fs_info *fsi)
1195     {
1196     - struct super_block *sb = pts_sb_from_inode(ptmx_inode);
1197     - struct pts_fs_info *fsi;
1198     int index;
1199     int ida_ret;
1200    
1201     - if (!sb)
1202     + if (!fsi)
1203     return -ENODEV;
1204    
1205     - fsi = DEVPTS_SB(sb);
1206     retry:
1207     if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL))
1208     return -ENOMEM;
1209     @@ -564,11 +563,8 @@ retry:
1210     return index;
1211     }
1212    
1213     -void devpts_kill_index(struct inode *ptmx_inode, int idx)
1214     +void devpts_kill_index(struct pts_fs_info *fsi, int idx)
1215     {
1216     - struct super_block *sb = pts_sb_from_inode(ptmx_inode);
1217     - struct pts_fs_info *fsi = DEVPTS_SB(sb);
1218     -
1219     mutex_lock(&allocated_ptys_lock);
1220     ida_remove(&fsi->allocated_ptys, idx);
1221     pty_count--;
1222     @@ -578,21 +574,25 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx)
1223     /*
1224     * pty code needs to hold extra references in case of last /dev/tty close
1225     */
1226     -
1227     -void devpts_add_ref(struct inode *ptmx_inode)
1228     +struct pts_fs_info *devpts_get_ref(struct inode *ptmx_inode, struct file *file)
1229     {
1230     - struct super_block *sb = pts_sb_from_inode(ptmx_inode);
1231     + struct super_block *sb;
1232     + struct pts_fs_info *fsi;
1233     +
1234     + sb = pts_sb_from_inode(ptmx_inode);
1235     + if (!sb)
1236     + return NULL;
1237     + fsi = DEVPTS_SB(sb);
1238     + if (!fsi)
1239     + return NULL;
1240    
1241     atomic_inc(&sb->s_active);
1242     - ihold(ptmx_inode);
1243     + return fsi;
1244     }
1245    
1246     -void devpts_del_ref(struct inode *ptmx_inode)
1247     +void devpts_put_ref(struct pts_fs_info *fsi)
1248     {
1249     - struct super_block *sb = pts_sb_from_inode(ptmx_inode);
1250     -
1251     - iput(ptmx_inode);
1252     - deactivate_super(sb);
1253     + deactivate_super(fsi->sb);
1254     }
1255    
1256     /**
1257     @@ -604,22 +604,21 @@ void devpts_del_ref(struct inode *ptmx_inode)
1258     *
1259     * The created inode is returned. Remove it from /dev/pts/ by devpts_pty_kill.
1260     */
1261     -struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
1262     +struct inode *devpts_pty_new(struct pts_fs_info *fsi, dev_t device, int index,
1263     void *priv)
1264     {
1265     struct dentry *dentry;
1266     - struct super_block *sb = pts_sb_from_inode(ptmx_inode);
1267     + struct super_block *sb;
1268     struct inode *inode;
1269     struct dentry *root;
1270     - struct pts_fs_info *fsi;
1271     struct pts_mount_opts *opts;
1272     char s[12];
1273    
1274     - if (!sb)
1275     + if (!fsi)
1276     return ERR_PTR(-ENODEV);
1277    
1278     + sb = fsi->sb;
1279     root = sb->s_root;
1280     - fsi = DEVPTS_SB(sb);
1281     opts = &fsi->mount_opts;
1282    
1283     inode = new_inode(sb);
1284     diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
1285     index fe1f50fe764f..f97110461c19 100644
1286     --- a/fs/ext4/balloc.c
1287     +++ b/fs/ext4/balloc.c
1288     @@ -208,6 +208,9 @@ static int ext4_init_block_bitmap(struct super_block *sb,
1289     memset(bh->b_data, 0, sb->s_blocksize);
1290    
1291     bit_max = ext4_num_base_meta_clusters(sb, block_group);
1292     + if ((bit_max >> 3) >= bh->b_size)
1293     + return -EFSCORRUPTED;
1294     +
1295     for (bit = 0; bit < bit_max; bit++)
1296     ext4_set_bit(bit, bh->b_data);
1297    
1298     diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
1299     index 62880586ed85..8eac7d586997 100644
1300     --- a/fs/ext4/extents.c
1301     +++ b/fs/ext4/extents.c
1302     @@ -376,9 +376,13 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
1303     ext4_fsblk_t block = ext4_ext_pblock(ext);
1304     int len = ext4_ext_get_actual_len(ext);
1305     ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
1306     - ext4_lblk_t last = lblock + len - 1;
1307    
1308     - if (len == 0 || lblock > last)
1309     + /*
1310     + * We allow neither:
1311     + * - zero length
1312     + * - overflow/wrap-around
1313     + */
1314     + if (lblock + len <= lblock)
1315     return 0;
1316     return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
1317     }
1318     diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
1319     index e31d762eedce..9a5ad0f0d3ed 100644
1320     --- a/fs/ext4/inode.c
1321     +++ b/fs/ext4/inode.c
1322     @@ -205,9 +205,9 @@ void ext4_evict_inode(struct inode *inode)
1323     * Note that directories do not have this problem because they
1324     * don't use page cache.
1325     */
1326     - if (ext4_should_journal_data(inode) &&
1327     - (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
1328     - inode->i_ino != EXT4_JOURNAL_INO) {
1329     + if (inode->i_ino != EXT4_JOURNAL_INO &&
1330     + ext4_should_journal_data(inode) &&
1331     + (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
1332     journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
1333     tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
1334    
1335     @@ -2589,13 +2589,36 @@ retry:
1336     done = true;
1337     }
1338     }
1339     - ext4_journal_stop(handle);
1340     + /*
1341     + * Caution: If the handle is synchronous,
1342     + * ext4_journal_stop() can wait for transaction commit
1343     + * to finish which may depend on writeback of pages to
1344     + * complete or on page lock to be released. In that
1345     + * case, we have to wait until after after we have
1346     + * submitted all the IO, released page locks we hold,
1347     + * and dropped io_end reference (for extent conversion
1348     + * to be able to complete) before stopping the handle.
1349     + */
1350     + if (!ext4_handle_valid(handle) || handle->h_sync == 0) {
1351     + ext4_journal_stop(handle);
1352     + handle = NULL;
1353     + }
1354     /* Submit prepared bio */
1355     ext4_io_submit(&mpd.io_submit);
1356     /* Unlock pages we didn't use */
1357     mpage_release_unused_pages(&mpd, give_up_on_write);
1358     - /* Drop our io_end reference we got from init */
1359     - ext4_put_io_end(mpd.io_submit.io_end);
1360     + /*
1361     + * Drop our io_end reference we got from init. We have
1362     + * to be careful and use deferred io_end finishing if
1363     + * we are still holding the transaction as we can
1364     + * release the last reference to io_end which may end
1365     + * up doing unwritten extent conversion.
1366     + */
1367     + if (handle) {
1368     + ext4_put_io_end_defer(mpd.io_submit.io_end);
1369     + ext4_journal_stop(handle);
1370     + } else
1371     + ext4_put_io_end(mpd.io_submit.io_end);
1372    
1373     if (ret == -ENOSPC && sbi->s_journal) {
1374     /*
1375     diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
1376     index cf734170daa9..c4dcac8a018d 100644
1377     --- a/fs/ext4/mballoc.c
1378     +++ b/fs/ext4/mballoc.c
1379     @@ -2932,7 +2932,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
1380     ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
1381     "fs metadata", block, block+len);
1382     /* File system mounted not to panic on error
1383     - * Fix the bitmap and repeat the block allocation
1384     + * Fix the bitmap and return EFSCORRUPTED
1385     * We leak some of the blocks here.
1386     */
1387     ext4_lock_group(sb, ac->ac_b_ex.fe_group);
1388     @@ -2941,7 +2941,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
1389     ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
1390     err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
1391     if (!err)
1392     - err = -EAGAIN;
1393     + err = -EFSCORRUPTED;
1394     goto out_err;
1395     }
1396    
1397     @@ -4506,18 +4506,7 @@ repeat:
1398     }
1399     if (likely(ac->ac_status == AC_STATUS_FOUND)) {
1400     *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
1401     - if (*errp == -EAGAIN) {
1402     - /*
1403     - * drop the reference that we took
1404     - * in ext4_mb_use_best_found
1405     - */
1406     - ext4_mb_release_context(ac);
1407     - ac->ac_b_ex.fe_group = 0;
1408     - ac->ac_b_ex.fe_start = 0;
1409     - ac->ac_b_ex.fe_len = 0;
1410     - ac->ac_status = AC_STATUS_CONTINUE;
1411     - goto repeat;
1412     - } else if (*errp) {
1413     + if (*errp) {
1414     ext4_discard_allocated_blocks(ac);
1415     goto errout;
1416     } else {
1417     diff --git a/fs/ext4/super.c b/fs/ext4/super.c
1418     index 852c26806af2..c542ebcf7a92 100644
1419     --- a/fs/ext4/super.c
1420     +++ b/fs/ext4/super.c
1421     @@ -2240,6 +2240,16 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1422     while (es->s_last_orphan) {
1423     struct inode *inode;
1424    
1425     + /*
1426     + * We may have encountered an error during cleanup; if
1427     + * so, skip the rest.
1428     + */
1429     + if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
1430     + jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
1431     + es->s_last_orphan = 0;
1432     + break;
1433     + }
1434     +
1435     inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1436     if (IS_ERR(inode)) {
1437     es->s_last_orphan = 0;
1438     @@ -3372,6 +3382,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1439     goto failed_mount;
1440     }
1441    
1442     + if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
1443     + ext4_msg(sb, KERN_ERR,
1444     + "Number of reserved GDT blocks insanely large: %d",
1445     + le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
1446     + goto failed_mount;
1447     + }
1448     +
1449     if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
1450     if (blocksize != PAGE_SIZE) {
1451     ext4_msg(sb, KERN_ERR,
1452     diff --git a/fs/fuse/file.c b/fs/fuse/file.c
1453     index c2e340d6ec6e..d58d4c0af0ce 100644
1454     --- a/fs/fuse/file.c
1455     +++ b/fs/fuse/file.c
1456     @@ -417,6 +417,15 @@ static int fuse_flush(struct file *file, fl_owner_t id)
1457     fuse_sync_writes(inode);
1458     mutex_unlock(&inode->i_mutex);
1459    
1460     + if (test_bit(AS_ENOSPC, &file->f_mapping->flags) &&
1461     + test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags))
1462     + err = -ENOSPC;
1463     + if (test_bit(AS_EIO, &file->f_mapping->flags) &&
1464     + test_and_clear_bit(AS_EIO, &file->f_mapping->flags))
1465     + err = -EIO;
1466     + if (err)
1467     + return err;
1468     +
1469     req = fuse_get_req_nofail_nopages(fc, file);
1470     memset(&inarg, 0, sizeof(inarg));
1471     inarg.fh = ff->fh;
1472     @@ -462,6 +471,21 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
1473     goto out;
1474    
1475     fuse_sync_writes(inode);
1476     +
1477     + /*
1478     + * Due to implementation of fuse writeback
1479     + * filemap_write_and_wait_range() does not catch errors.
1480     + * We have to do this directly after fuse_sync_writes()
1481     + */
1482     + if (test_bit(AS_ENOSPC, &file->f_mapping->flags) &&
1483     + test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags))
1484     + err = -ENOSPC;
1485     + if (test_bit(AS_EIO, &file->f_mapping->flags) &&
1486     + test_and_clear_bit(AS_EIO, &file->f_mapping->flags))
1487     + err = -EIO;
1488     + if (err)
1489     + goto out;
1490     +
1491     err = sync_inode_metadata(inode, 1);
1492     if (err)
1493     goto out;
1494     diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
1495     index 2913db2a5b99..0d5e8e59b390 100644
1496     --- a/fs/fuse/inode.c
1497     +++ b/fs/fuse/inode.c
1498     @@ -926,7 +926,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
1499     arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
1500     FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
1501     FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
1502     - FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
1503     + FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
1504     FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
1505     FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT;
1506     req->in.h.opcode = FUSE_INIT;
1507     diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h
1508     index e0ee0b3000b2..358a4db72a27 100644
1509     --- a/include/linux/devpts_fs.h
1510     +++ b/include/linux/devpts_fs.h
1511     @@ -15,38 +15,24 @@
1512    
1513     #include <linux/errno.h>
1514    
1515     +struct pts_fs_info;
1516     +
1517     #ifdef CONFIG_UNIX98_PTYS
1518    
1519     -int devpts_new_index(struct inode *ptmx_inode);
1520     -void devpts_kill_index(struct inode *ptmx_inode, int idx);
1521     -void devpts_add_ref(struct inode *ptmx_inode);
1522     -void devpts_del_ref(struct inode *ptmx_inode);
1523     +/* Look up a pts fs info and get a ref to it */
1524     +struct pts_fs_info *devpts_get_ref(struct inode *, struct file *);
1525     +void devpts_put_ref(struct pts_fs_info *);
1526     +
1527     +int devpts_new_index(struct pts_fs_info *);
1528     +void devpts_kill_index(struct pts_fs_info *, int);
1529     +
1530     /* mknod in devpts */
1531     -struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
1532     - void *priv);
1533     +struct inode *devpts_pty_new(struct pts_fs_info *, dev_t, int, void *);
1534     /* get private structure */
1535     void *devpts_get_priv(struct inode *pts_inode);
1536     /* unlink */
1537     void devpts_pty_kill(struct inode *inode);
1538    
1539     -#else
1540     -
1541     -/* Dummy stubs in the no-pty case */
1542     -static inline int devpts_new_index(struct inode *ptmx_inode) { return -EINVAL; }
1543     -static inline void devpts_kill_index(struct inode *ptmx_inode, int idx) { }
1544     -static inline void devpts_add_ref(struct inode *ptmx_inode) { }
1545     -static inline void devpts_del_ref(struct inode *ptmx_inode) { }
1546     -static inline struct inode *devpts_pty_new(struct inode *ptmx_inode,
1547     - dev_t device, int index, void *priv)
1548     -{
1549     - return ERR_PTR(-EINVAL);
1550     -}
1551     -static inline void *devpts_get_priv(struct inode *pts_inode)
1552     -{
1553     - return NULL;
1554     -}
1555     -static inline void devpts_pty_kill(struct inode *inode) { }
1556     -
1557     #endif
1558    
1559    
1560     diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
1561     index cd0e2413c358..435fd8426b8a 100644
1562     --- a/include/linux/memcontrol.h
1563     +++ b/include/linux/memcontrol.h
1564     @@ -174,6 +174,11 @@ struct mem_cgroup_thresholds {
1565     struct mem_cgroup_threshold_ary *spare;
1566     };
1567    
1568     +struct mem_cgroup_id {
1569     + int id;
1570     + atomic_t ref;
1571     +};
1572     +
1573     /*
1574     * The memory controller data structure. The memory controller controls both
1575     * page cache and RSS per cgroup. We would eventually like to provide
1576     @@ -183,6 +188,9 @@ struct mem_cgroup_thresholds {
1577     struct mem_cgroup {
1578     struct cgroup_subsys_state css;
1579    
1580     + /* Private memcg ID. Used to ID objects that outlive the cgroup */
1581     + struct mem_cgroup_id id;
1582     +
1583     /* Accounted resources */
1584     struct page_counter memory;
1585     struct page_counter memsw;
1586     diff --git a/ipc/msg.c b/ipc/msg.c
1587     index 1471db9a7e61..c6521c205cb4 100644
1588     --- a/ipc/msg.c
1589     +++ b/ipc/msg.c
1590     @@ -680,7 +680,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
1591     rcu_read_lock();
1592     ipc_lock_object(&msq->q_perm);
1593    
1594     - ipc_rcu_putref(msq, ipc_rcu_free);
1595     + ipc_rcu_putref(msq, msg_rcu_free);
1596     /* raced with RMID? */
1597     if (!ipc_valid_object(&msq->q_perm)) {
1598     err = -EIDRM;
1599     diff --git a/ipc/sem.c b/ipc/sem.c
1600     index b471e5a3863d..20d07008ad5e 100644
1601     --- a/ipc/sem.c
1602     +++ b/ipc/sem.c
1603     @@ -442,7 +442,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
1604     static inline void sem_lock_and_putref(struct sem_array *sma)
1605     {
1606     sem_lock(sma, NULL, -1);
1607     - ipc_rcu_putref(sma, ipc_rcu_free);
1608     + ipc_rcu_putref(sma, sem_rcu_free);
1609     }
1610    
1611     static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
1612     @@ -1385,7 +1385,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1613     rcu_read_unlock();
1614     sem_io = ipc_alloc(sizeof(ushort)*nsems);
1615     if (sem_io == NULL) {
1616     - ipc_rcu_putref(sma, ipc_rcu_free);
1617     + ipc_rcu_putref(sma, sem_rcu_free);
1618     return -ENOMEM;
1619     }
1620    
1621     @@ -1419,20 +1419,20 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1622     if (nsems > SEMMSL_FAST) {
1623     sem_io = ipc_alloc(sizeof(ushort)*nsems);
1624     if (sem_io == NULL) {
1625     - ipc_rcu_putref(sma, ipc_rcu_free);
1626     + ipc_rcu_putref(sma, sem_rcu_free);
1627     return -ENOMEM;
1628     }
1629     }
1630    
1631     if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
1632     - ipc_rcu_putref(sma, ipc_rcu_free);
1633     + ipc_rcu_putref(sma, sem_rcu_free);
1634     err = -EFAULT;
1635     goto out_free;
1636     }
1637    
1638     for (i = 0; i < nsems; i++) {
1639     if (sem_io[i] > SEMVMX) {
1640     - ipc_rcu_putref(sma, ipc_rcu_free);
1641     + ipc_rcu_putref(sma, sem_rcu_free);
1642     err = -ERANGE;
1643     goto out_free;
1644     }
1645     @@ -1722,7 +1722,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
1646     /* step 2: allocate new undo structure */
1647     new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
1648     if (!new) {
1649     - ipc_rcu_putref(sma, ipc_rcu_free);
1650     + ipc_rcu_putref(sma, sem_rcu_free);
1651     return ERR_PTR(-ENOMEM);
1652     }
1653    
1654     diff --git a/mm/memcontrol.c b/mm/memcontrol.c
1655     index 67648e6b2ac8..6b90d184e9c0 100644
1656     --- a/mm/memcontrol.c
1657     +++ b/mm/memcontrol.c
1658     @@ -272,21 +272,7 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
1659    
1660     static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
1661     {
1662     - return memcg->css.id;
1663     -}
1664     -
1665     -/*
1666     - * A helper function to get mem_cgroup from ID. must be called under
1667     - * rcu_read_lock(). The caller is responsible for calling
1668     - * css_tryget_online() if the mem_cgroup is used for charging. (dropping
1669     - * refcnt from swap can be called against removed memcg.)
1670     - */
1671     -static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
1672     -{
1673     - struct cgroup_subsys_state *css;
1674     -
1675     - css = css_from_id(id, &memory_cgrp_subsys);
1676     - return mem_cgroup_from_css(css);
1677     + return memcg->id.id;
1678     }
1679    
1680     /* Writing them here to avoid exposing memcg's inner layout */
1681     @@ -4124,6 +4110,88 @@ static struct cftype mem_cgroup_legacy_files[] = {
1682     { }, /* terminate */
1683     };
1684    
1685     +/*
1686     + * Private memory cgroup IDR
1687     + *
1688     + * Swap-out records and page cache shadow entries need to store memcg
1689     + * references in constrained space, so we maintain an ID space that is
1690     + * limited to 16 bit (MEM_CGROUP_ID_MAX), limiting the total number of
1691     + * memory-controlled cgroups to 64k.
1692     + *
1693     + * However, there usually are many references to the oflline CSS after
1694     + * the cgroup has been destroyed, such as page cache or reclaimable
1695     + * slab objects, that don't need to hang on to the ID. We want to keep
1696     + * those dead CSS from occupying IDs, or we might quickly exhaust the
1697     + * relatively small ID space and prevent the creation of new cgroups
1698     + * even when there are much fewer than 64k cgroups - possibly none.
1699     + *
1700     + * Maintain a private 16-bit ID space for memcg, and allow the ID to
1701     + * be freed and recycled when it's no longer needed, which is usually
1702     + * when the CSS is offlined.
1703     + *
1704     + * The only exception to that are records of swapped out tmpfs/shmem
1705     + * pages that need to be attributed to live ancestors on swapin. But
1706     + * those references are manageable from userspace.
1707     + */
1708     +
1709     +static DEFINE_IDR(mem_cgroup_idr);
1710     +
1711     +static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
1712     +{
1713     + atomic_add(n, &memcg->id.ref);
1714     +}
1715     +
1716     +static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
1717     +{
1718     + while (!atomic_inc_not_zero(&memcg->id.ref)) {
1719     + /*
1720     + * The root cgroup cannot be destroyed, so it's refcount must
1721     + * always be >= 1.
1722     + */
1723     + if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
1724     + VM_BUG_ON(1);
1725     + break;
1726     + }
1727     + memcg = parent_mem_cgroup(memcg);
1728     + if (!memcg)
1729     + memcg = root_mem_cgroup;
1730     + }
1731     + return memcg;
1732     +}
1733     +
1734     +static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
1735     +{
1736     + if (atomic_sub_and_test(n, &memcg->id.ref)) {
1737     + idr_remove(&mem_cgroup_idr, memcg->id.id);
1738     + memcg->id.id = 0;
1739     +
1740     + /* Memcg ID pins CSS */
1741     + css_put(&memcg->css);
1742     + }
1743     +}
1744     +
1745     +static inline void mem_cgroup_id_get(struct mem_cgroup *memcg)
1746     +{
1747     + mem_cgroup_id_get_many(memcg, 1);
1748     +}
1749     +
1750     +static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
1751     +{
1752     + mem_cgroup_id_put_many(memcg, 1);
1753     +}
1754     +
1755     +/**
1756     + * mem_cgroup_from_id - look up a memcg from a memcg id
1757     + * @id: the memcg id to look up
1758     + *
1759     + * Caller must hold rcu_read_lock().
1760     + */
1761     +struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
1762     +{
1763     + WARN_ON_ONCE(!rcu_read_lock_held());
1764     + return idr_find(&mem_cgroup_idr, id);
1765     +}
1766     +
1767     static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
1768     {
1769     struct mem_cgroup_per_node *pn;
1770     @@ -4178,6 +4246,12 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
1771     if (memcg_wb_domain_init(memcg, GFP_KERNEL))
1772     goto out_free_stat;
1773    
1774     + memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL,
1775     + 1, MEM_CGROUP_ID_MAX,
1776     + GFP_KERNEL);
1777     + if (memcg->id.id < 0)
1778     + goto out_free_stat;
1779     +
1780     return memcg;
1781    
1782     out_free_stat:
1783     @@ -4263,9 +4337,11 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
1784     #ifdef CONFIG_CGROUP_WRITEBACK
1785     INIT_LIST_HEAD(&memcg->cgwb_list);
1786     #endif
1787     + idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
1788     return &memcg->css;
1789    
1790     free_out:
1791     + idr_remove(&mem_cgroup_idr, memcg->id.id);
1792     __mem_cgroup_free(memcg);
1793     return ERR_PTR(error);
1794     }
1795     @@ -4277,8 +4353,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
1796     struct mem_cgroup *parent = mem_cgroup_from_css(css->parent);
1797     int ret;
1798    
1799     - if (css->id > MEM_CGROUP_ID_MAX)
1800     - return -ENOSPC;
1801     + /* Online state pins memcg ID, memcg ID pins CSS */
1802     + mem_cgroup_id_get(mem_cgroup_from_css(css));
1803     + css_get(css);
1804    
1805     if (!parent)
1806     return 0;
1807     @@ -4352,6 +4429,8 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
1808     memcg_deactivate_kmem(memcg);
1809    
1810     wb_memcg_offline(memcg);
1811     +
1812     + mem_cgroup_id_put(memcg);
1813     }
1814    
1815     static void mem_cgroup_css_released(struct cgroup_subsys_state *css)
1816     @@ -4785,6 +4864,8 @@ static void __mem_cgroup_clear_mc(void)
1817     if (!mem_cgroup_is_root(mc.from))
1818     page_counter_uncharge(&mc.from->memsw, mc.moved_swap);
1819    
1820     + mem_cgroup_id_put_many(mc.from, mc.moved_swap);
1821     +
1822     /*
1823     * we charged both to->memory and to->memsw, so we
1824     * should uncharge to->memory.
1825     @@ -4792,9 +4873,9 @@ static void __mem_cgroup_clear_mc(void)
1826     if (!mem_cgroup_is_root(mc.to))
1827     page_counter_uncharge(&mc.to->memory, mc.moved_swap);
1828    
1829     - css_put_many(&mc.from->css, mc.moved_swap);
1830     + mem_cgroup_id_get_many(mc.to, mc.moved_swap);
1831     + css_put_many(&mc.to->css, mc.moved_swap);
1832    
1833     - /* we've already done css_get(mc.to) */
1834     mc.moved_swap = 0;
1835     }
1836     memcg_oom_recover(from);
1837     @@ -5670,7 +5751,7 @@ subsys_initcall(mem_cgroup_init);
1838     */
1839     void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
1840     {
1841     - struct mem_cgroup *memcg;
1842     + struct mem_cgroup *memcg, *swap_memcg;
1843     unsigned short oldid;
1844    
1845     VM_BUG_ON_PAGE(PageLRU(page), page);
1846     @@ -5685,15 +5766,27 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
1847     if (!memcg)
1848     return;
1849    
1850     - oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
1851     + /*
1852     + * In case the memcg owning these pages has been offlined and doesn't
1853     + * have an ID allocated to it anymore, charge the closest online
1854     + * ancestor for the swap instead and transfer the memory+swap charge.
1855     + */
1856     + swap_memcg = mem_cgroup_id_get_online(memcg);
1857     + oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
1858     VM_BUG_ON_PAGE(oldid, page);
1859     - mem_cgroup_swap_statistics(memcg, true);
1860     + mem_cgroup_swap_statistics(swap_memcg, true);
1861    
1862     page->mem_cgroup = NULL;
1863    
1864     if (!mem_cgroup_is_root(memcg))
1865     page_counter_uncharge(&memcg->memory, 1);
1866    
1867     + if (memcg != swap_memcg) {
1868     + if (!mem_cgroup_is_root(swap_memcg))
1869     + page_counter_charge(&swap_memcg->memsw, 1);
1870     + page_counter_uncharge(&memcg->memsw, 1);
1871     + }
1872     +
1873     /*
1874     * Interrupts should be disabled here because the caller holds the
1875     * mapping->tree_lock lock which is taken with interrupts-off. It is
1876     @@ -5703,6 +5796,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
1877     VM_BUG_ON(!irqs_disabled());
1878     mem_cgroup_charge_statistics(memcg, page, -1);
1879     memcg_check_events(memcg, page);
1880     +
1881     + if (!mem_cgroup_is_root(memcg))
1882     + css_put(&memcg->css);
1883     }
1884    
1885     /**
1886     @@ -5726,7 +5822,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
1887     if (!mem_cgroup_is_root(memcg))
1888     page_counter_uncharge(&memcg->memsw, 1);
1889     mem_cgroup_swap_statistics(memcg, false);
1890     - css_put(&memcg->css);
1891     + mem_cgroup_id_put(memcg);
1892     }
1893     rcu_read_unlock();
1894     }
1895     diff --git a/mm/slab_common.c b/mm/slab_common.c
1896     index 3c6a86b4ec25..bec2fce9fafc 100644
1897     --- a/mm/slab_common.c
1898     +++ b/mm/slab_common.c
1899     @@ -521,8 +521,8 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
1900     goto out_unlock;
1901    
1902     cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
1903     - cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
1904     - css->id, memcg_name_buf);
1905     + cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name,
1906     + css->serial_nr, memcg_name_buf);
1907     if (!cache_name)
1908     goto out_unlock;
1909    
1910     diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
1911     index 2b68418c7198..ffe95d954007 100644
1912     --- a/net/ipv4/fib_semantics.c
1913     +++ b/net/ipv4/fib_semantics.c
1914     @@ -479,6 +479,9 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
1915     if (!rtnh_ok(rtnh, remaining))
1916     return -EINVAL;
1917    
1918     + if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
1919     + return -EINVAL;
1920     +
1921     nexthop_nh->nh_flags =
1922     (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
1923     nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
1924     @@ -1003,6 +1006,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
1925     if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
1926     goto err_inval;
1927    
1928     + if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
1929     + goto err_inval;
1930     +
1931     #ifdef CONFIG_IP_ROUTE_MULTIPATH
1932     if (cfg->fc_mp) {
1933     nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
1934     diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
1935     index d4c51158470f..12b98e257c5f 100644
1936     --- a/net/ipv4/tcp_input.c
1937     +++ b/net/ipv4/tcp_input.c
1938     @@ -89,7 +89,7 @@ int sysctl_tcp_adv_win_scale __read_mostly = 1;
1939     EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
1940    
1941     /* rfc5961 challenge ack rate limiting */
1942     -int sysctl_tcp_challenge_ack_limit = 100;
1943     +int sysctl_tcp_challenge_ack_limit = 1000;
1944    
1945     int sysctl_tcp_stdurg __read_mostly;
1946     int sysctl_tcp_rfc1337 __read_mostly;
1947     @@ -3390,6 +3390,23 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
1948     return flag;
1949     }
1950    
1951     +static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
1952     + u32 *last_oow_ack_time)
1953     +{
1954     + if (*last_oow_ack_time) {
1955     + s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
1956     +
1957     + if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
1958     + NET_INC_STATS_BH(net, mib_idx);
1959     + return true; /* rate-limited: don't send yet! */
1960     + }
1961     + }
1962     +
1963     + *last_oow_ack_time = tcp_time_stamp;
1964     +
1965     + return false; /* not rate-limited: go ahead, send dupack now! */
1966     +}
1967     +
1968     /* Return true if we're currently rate-limiting out-of-window ACKs and
1969     * thus shouldn't send a dupack right now. We rate-limit dupacks in
1970     * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
1971     @@ -3403,21 +3420,9 @@ bool tcp_oow_rate_limited(struct net *net, const struct sk_buff *skb,
1972     /* Data packets without SYNs are not likely part of an ACK loop. */
1973     if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
1974     !tcp_hdr(skb)->syn)
1975     - goto not_rate_limited;
1976     -
1977     - if (*last_oow_ack_time) {
1978     - s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
1979     -
1980     - if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
1981     - NET_INC_STATS_BH(net, mib_idx);
1982     - return true; /* rate-limited: don't send yet! */
1983     - }
1984     - }
1985     -
1986     - *last_oow_ack_time = tcp_time_stamp;
1987     + return false;
1988    
1989     -not_rate_limited:
1990     - return false; /* not rate-limited: go ahead, send dupack now! */
1991     + return __tcp_oow_rate_limited(net, mib_idx, last_oow_ack_time);
1992     }
1993    
1994     /* RFC 5961 7 [ACK Throttling] */
1995     @@ -3427,21 +3432,26 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
1996     static u32 challenge_timestamp;
1997     static unsigned int challenge_count;
1998     struct tcp_sock *tp = tcp_sk(sk);
1999     - u32 now;
2000     + u32 count, now;
2001    
2002     /* First check our per-socket dupack rate limit. */
2003     - if (tcp_oow_rate_limited(sock_net(sk), skb,
2004     - LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
2005     - &tp->last_oow_ack_time))
2006     + if (__tcp_oow_rate_limited(sock_net(sk),
2007     + LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
2008     + &tp->last_oow_ack_time))
2009     return;
2010    
2011     - /* Then check the check host-wide RFC 5961 rate limit. */
2012     + /* Then check host-wide RFC 5961 rate limit. */
2013     now = jiffies / HZ;
2014     if (now != challenge_timestamp) {
2015     + u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
2016     +
2017     challenge_timestamp = now;
2018     - challenge_count = 0;
2019     + WRITE_ONCE(challenge_count, half +
2020     + prandom_u32_max(sysctl_tcp_challenge_ack_limit));
2021     }
2022     - if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
2023     + count = READ_ONCE(challenge_count);
2024     + if (count > 0) {
2025     + WRITE_ONCE(challenge_count, count - 1);
2026     NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
2027     tcp_send_ack(sk);
2028     }
2029     diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
2030     index 7c9883ab56e5..660c967ba84a 100644
2031     --- a/net/ipv4/tcp_output.c
2032     +++ b/net/ipv4/tcp_output.c
2033     @@ -239,7 +239,8 @@ void tcp_select_initial_window(int __space, __u32 mss,
2034     /* Set window scaling on max possible window
2035     * See RFC1323 for an explanation of the limit to 14
2036     */
2037     - space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
2038     + space = max_t(u32, space, sysctl_tcp_rmem[2]);
2039     + space = max_t(u32, space, sysctl_rmem_max);
2040     space = min_t(u32, space, *window_clamp);
2041     while (space > 65535 && (*rcv_wscale) < 14) {
2042     space >>= 1;
2043     diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
2044     index 923abd6b3064..8d2f7c9b491d 100644
2045     --- a/net/irda/af_irda.c
2046     +++ b/net/irda/af_irda.c
2047     @@ -1024,8 +1024,11 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
2048     }
2049    
2050     /* Check if we have opened a local TSAP */
2051     - if (!self->tsap)
2052     - irda_open_tsap(self, LSAP_ANY, addr->sir_name);
2053     + if (!self->tsap) {
2054     + err = irda_open_tsap(self, LSAP_ANY, addr->sir_name);
2055     + if (err)
2056     + goto out;
2057     + }
2058    
2059     /* Move to connecting socket, start sending Connect Requests */
2060     sock->state = SS_CONNECTING;
2061     diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
2062     index ad4fa49ad1db..9068369f8a1b 100644
2063     --- a/security/apparmor/apparmorfs.c
2064     +++ b/security/apparmor/apparmorfs.c
2065     @@ -331,6 +331,7 @@ static int aa_fs_seq_hash_show(struct seq_file *seq, void *v)
2066     seq_printf(seq, "%.2x", profile->hash[i]);
2067     seq_puts(seq, "\n");
2068     }
2069     + aa_put_profile(profile);
2070    
2071     return 0;
2072     }